lunr.js 83 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977
  1. /**
  2. * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.1.5
  3. * Copyright (C) 2017 Oliver Nightingale
  4. * @license MIT
  5. */
  6. ;(function(){
  7. /**
  8. * A convenience function for configuring and constructing
  9. * a new lunr Index.
  10. *
  11. * A lunr.Builder instance is created and the pipeline setup
  12. * with a trimmer, stop word filter and stemmer.
  13. *
  14. * This builder object is yielded to the configuration function
  15. * that is passed as a parameter, allowing the list of fields
  16. * and other builder parameters to be customised.
  17. *
  18. * All documents _must_ be added within the passed config function.
  19. *
  20. * @example
  21. * var idx = lunr(function () {
  22. * this.field('title')
  23. * this.field('body')
  24. * this.ref('id')
  25. *
  26. * documents.forEach(function (doc) {
  27. * this.add(doc)
  28. * }, this)
  29. * })
  30. *
  31. * @see {@link lunr.Builder}
  32. * @see {@link lunr.Pipeline}
  33. * @see {@link lunr.trimmer}
  34. * @see {@link lunr.stopWordFilter}
  35. * @see {@link lunr.stemmer}
  36. * @namespace {function} lunr
  37. */
  38. var lunr = function (config) {
  39. var builder = new lunr.Builder
  40. builder.pipeline.add(
  41. lunr.trimmer,
  42. lunr.stopWordFilter,
  43. lunr.stemmer
  44. )
  45. builder.searchPipeline.add(
  46. lunr.stemmer
  47. )
  48. config.call(builder, builder)
  49. return builder.build()
  50. }
  51. lunr.version = "2.1.5"
  52. /*!
  53. * lunr.utils
  54. * Copyright (C) 2017 Oliver Nightingale
  55. */
  56. /**
  57. * A namespace containing utils for the rest of the lunr library
  58. */
  59. lunr.utils = {}
  60. /**
  61. * Print a warning message to the console.
  62. *
  63. * @param {String} message The message to be printed.
  64. * @memberOf Utils
  65. */
  66. lunr.utils.warn = (function (global) {
  67. /* eslint-disable no-console */
  68. return function (message) {
  69. if (global.console && console.warn) {
  70. console.warn(message)
  71. }
  72. }
  73. /* eslint-enable no-console */
  74. })(this)
  75. /**
  76. * Convert an object to a string.
  77. *
  78. * In the case of `null` and `undefined` the function returns
  79. * the empty string, in all other cases the result of calling
  80. * `toString` on the passed object is returned.
  81. *
  82. * @param {Any} obj The object to convert to a string.
  83. * @return {String} string representation of the passed object.
  84. * @memberOf Utils
  85. */
  86. lunr.utils.asString = function (obj) {
  87. if (obj === void 0 || obj === null) {
  88. return ""
  89. } else {
  90. return obj.toString()
  91. }
  92. }
  93. lunr.FieldRef = function (docRef, fieldName, stringValue) {
  94. this.docRef = docRef
  95. this.fieldName = fieldName
  96. this._stringValue = stringValue
  97. }
  98. lunr.FieldRef.joiner = "/"
  99. lunr.FieldRef.fromString = function (s) {
  100. var n = s.indexOf(lunr.FieldRef.joiner)
  101. if (n === -1) {
  102. throw "malformed field ref string"
  103. }
  104. var fieldRef = s.slice(0, n),
  105. docRef = s.slice(n + 1)
  106. return new lunr.FieldRef (docRef, fieldRef, s)
  107. }
  108. lunr.FieldRef.prototype.toString = function () {
  109. if (this._stringValue == undefined) {
  110. this._stringValue = this.fieldName + lunr.FieldRef.joiner + this.docRef
  111. }
  112. return this._stringValue
  113. }
  114. /**
  115. * A function to calculate the inverse document frequency for
  116. * a posting. This is shared between the builder and the index
  117. *
  118. * @private
  119. * @param {object} posting - The posting for a given term
  120. * @param {number} documentCount - The total number of documents.
  121. */
  122. lunr.idf = function (posting, documentCount) {
  123. var documentsWithTerm = 0
  124. for (var fieldName in posting) {
  125. if (fieldName == '_index') continue // Ignore the term index, its not a field
  126. documentsWithTerm += Object.keys(posting[fieldName]).length
  127. }
  128. var x = (documentCount - documentsWithTerm + 0.5) / (documentsWithTerm + 0.5)
  129. return Math.log(1 + Math.abs(x))
  130. }
  131. /**
  132. * A token wraps a string representation of a token
  133. * as it is passed through the text processing pipeline.
  134. *
  135. * @constructor
  136. * @param {string} [str=''] - The string token being wrapped.
  137. * @param {object} [metadata={}] - Metadata associated with this token.
  138. */
  139. lunr.Token = function (str, metadata) {
  140. this.str = str || ""
  141. this.metadata = metadata || {}
  142. }
  143. /**
  144. * Returns the token string that is being wrapped by this object.
  145. *
  146. * @returns {string}
  147. */
  148. lunr.Token.prototype.toString = function () {
  149. return this.str
  150. }
  151. /**
  152. * A token update function is used when updating or optionally
  153. * when cloning a token.
  154. *
  155. * @callback lunr.Token~updateFunction
  156. * @param {string} str - The string representation of the token.
  157. * @param {Object} metadata - All metadata associated with this token.
  158. */
  159. /**
  160. * Applies the given function to the wrapped string token.
  161. *
  162. * @example
  163. * token.update(function (str, metadata) {
  164. * return str.toUpperCase()
  165. * })
  166. *
  167. * @param {lunr.Token~updateFunction} fn - A function to apply to the token string.
  168. * @returns {lunr.Token}
  169. */
  170. lunr.Token.prototype.update = function (fn) {
  171. this.str = fn(this.str, this.metadata)
  172. return this
  173. }
  174. /**
  175. * Creates a clone of this token. Optionally a function can be
  176. * applied to the cloned token.
  177. *
  178. * @param {lunr.Token~updateFunction} [fn] - An optional function to apply to the cloned token.
  179. * @returns {lunr.Token}
  180. */
  181. lunr.Token.prototype.clone = function (fn) {
  182. fn = fn || function (s) { return s }
  183. return new lunr.Token (fn(this.str, this.metadata), this.metadata)
  184. }
  185. /*!
  186. * lunr.tokenizer
  187. * Copyright (C) 2017 Oliver Nightingale
  188. */
  189. /**
  190. * A function for splitting a string into tokens ready to be inserted into
  191. * the search index. Uses `lunr.tokenizer.separator` to split strings, change
  192. * the value of this property to change how strings are split into tokens.
  193. *
  194. * This tokenizer will convert its parameter to a string by calling `toString` and
  195. * then will split this string on the character in `lunr.tokenizer.separator`.
  196. * Arrays will have their elements converted to strings and wrapped in a lunr.Token.
  197. *
  198. * @static
  199. * @param {?(string|object|object[])} obj - The object to convert into tokens
  200. * @returns {lunr.Token[]}
  201. */
  202. lunr.tokenizer = function (obj) {
  203. if (obj == null || obj == undefined) {
  204. return []
  205. }
  206. if (Array.isArray(obj)) {
  207. return obj.map(function (t) {
  208. return new lunr.Token(lunr.utils.asString(t).toLowerCase())
  209. })
  210. }
  211. var str = obj.toString().trim().toLowerCase(),
  212. len = str.length,
  213. tokens = []
  214. for (var sliceEnd = 0, sliceStart = 0; sliceEnd <= len; sliceEnd++) {
  215. var char = str.charAt(sliceEnd),
  216. sliceLength = sliceEnd - sliceStart
  217. if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {
  218. if (sliceLength > 0) {
  219. tokens.push(
  220. new lunr.Token (str.slice(sliceStart, sliceEnd), {
  221. position: [sliceStart, sliceLength],
  222. index: tokens.length
  223. })
  224. )
  225. }
  226. sliceStart = sliceEnd + 1
  227. }
  228. }
  229. return tokens
  230. }
  231. /**
  232. * The separator used to split a string into tokens. Override this property to change the behaviour of
  233. * `lunr.tokenizer` behaviour when tokenizing strings. By default this splits on whitespace and hyphens.
  234. *
  235. * @static
  236. * @see lunr.tokenizer
  237. */
  238. lunr.tokenizer.separator = /[\s\-]+/
  239. /*!
  240. * lunr.Pipeline
  241. * Copyright (C) 2017 Oliver Nightingale
  242. */
  243. /**
  244. * lunr.Pipelines maintain an ordered list of functions to be applied to all
  245. * tokens in documents entering the search index and queries being ran against
  246. * the index.
  247. *
  248. * An instance of lunr.Index created with the lunr shortcut will contain a
  249. * pipeline with a stop word filter and an English language stemmer. Extra
  250. * functions can be added before or after either of these functions or these
  251. * default functions can be removed.
  252. *
  253. * When run the pipeline will call each function in turn, passing a token, the
  254. * index of that token in the original list of all tokens and finally a list of
  255. * all the original tokens.
  256. *
  257. * The output of functions in the pipeline will be passed to the next function
  258. * in the pipeline. To exclude a token from entering the index the function
  259. * should return undefined, the rest of the pipeline will not be called with
  260. * this token.
  261. *
  262. * For serialisation of pipelines to work, all functions used in an instance of
  263. * a pipeline should be registered with lunr.Pipeline. Registered functions can
  264. * then be loaded. If trying to load a serialised pipeline that uses functions
  265. * that are not registered an error will be thrown.
  266. *
  267. * If not planning on serialising the pipeline then registering pipeline functions
  268. * is not necessary.
  269. *
  270. * @constructor
  271. */
  272. lunr.Pipeline = function () {
  273. this._stack = []
  274. }
  275. lunr.Pipeline.registeredFunctions = Object.create(null)
  276. /**
  277. * A pipeline function maps lunr.Token to lunr.Token. A lunr.Token contains the token
  278. * string as well as all known metadata. A pipeline function can mutate the token string
  279. * or mutate (or add) metadata for a given token.
  280. *
  281. * A pipeline function can indicate that the passed token should be discarded by returning
  282. * null. This token will not be passed to any downstream pipeline functions and will not be
  283. * added to the index.
  284. *
  285. * Multiple tokens can be returned by returning an array of tokens. Each token will be passed
  286. * to any downstream pipeline functions and all will returned tokens will be added to the index.
  287. *
  288. * Any number of pipeline functions may be chained together using a lunr.Pipeline.
  289. *
  290. * @interface lunr.PipelineFunction
  291. * @param {lunr.Token} token - A token from the document being processed.
  292. * @param {number} i - The index of this token in the complete list of tokens for this document/field.
  293. * @param {lunr.Token[]} tokens - All tokens for this document/field.
  294. * @returns {(?lunr.Token|lunr.Token[])}
  295. */
  296. /**
  297. * Register a function with the pipeline.
  298. *
  299. * Functions that are used in the pipeline should be registered if the pipeline
  300. * needs to be serialised, or a serialised pipeline needs to be loaded.
  301. *
  302. * Registering a function does not add it to a pipeline, functions must still be
  303. * added to instances of the pipeline for them to be used when running a pipeline.
  304. *
  305. * @param {lunr.PipelineFunction} fn - The function to check for.
  306. * @param {String} label - The label to register this function with
  307. */
  308. lunr.Pipeline.registerFunction = function (fn, label) {
  309. if (label in this.registeredFunctions) {
  310. lunr.utils.warn('Overwriting existing registered function: ' + label)
  311. }
  312. fn.label = label
  313. lunr.Pipeline.registeredFunctions[fn.label] = fn
  314. }
  315. /**
  316. * Warns if the function is not registered as a Pipeline function.
  317. *
  318. * @param {lunr.PipelineFunction} fn - The function to check for.
  319. * @private
  320. */
  321. lunr.Pipeline.warnIfFunctionNotRegistered = function (fn) {
  322. var isRegistered = fn.label && (fn.label in this.registeredFunctions)
  323. if (!isRegistered) {
  324. lunr.utils.warn('Function is not registered with pipeline. This may cause problems when serialising the index.\n', fn)
  325. }
  326. }
  327. /**
  328. * Loads a previously serialised pipeline.
  329. *
  330. * All functions to be loaded must already be registered with lunr.Pipeline.
  331. * If any function from the serialised data has not been registered then an
  332. * error will be thrown.
  333. *
  334. * @param {Object} serialised - The serialised pipeline to load.
  335. * @returns {lunr.Pipeline}
  336. */
  337. lunr.Pipeline.load = function (serialised) {
  338. var pipeline = new lunr.Pipeline
  339. serialised.forEach(function (fnName) {
  340. var fn = lunr.Pipeline.registeredFunctions[fnName]
  341. if (fn) {
  342. pipeline.add(fn)
  343. } else {
  344. throw new Error('Cannot load unregistered function: ' + fnName)
  345. }
  346. })
  347. return pipeline
  348. }
  349. /**
  350. * Adds new functions to the end of the pipeline.
  351. *
  352. * Logs a warning if the function has not been registered.
  353. *
  354. * @param {lunr.PipelineFunction[]} functions - Any number of functions to add to the pipeline.
  355. */
  356. lunr.Pipeline.prototype.add = function () {
  357. var fns = Array.prototype.slice.call(arguments)
  358. fns.forEach(function (fn) {
  359. lunr.Pipeline.warnIfFunctionNotRegistered(fn)
  360. this._stack.push(fn)
  361. }, this)
  362. }
  363. /**
  364. * Adds a single function after a function that already exists in the
  365. * pipeline.
  366. *
  367. * Logs a warning if the function has not been registered.
  368. *
  369. * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
  370. * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
  371. */
  372. lunr.Pipeline.prototype.after = function (existingFn, newFn) {
  373. lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
  374. var pos = this._stack.indexOf(existingFn)
  375. if (pos == -1) {
  376. throw new Error('Cannot find existingFn')
  377. }
  378. pos = pos + 1
  379. this._stack.splice(pos, 0, newFn)
  380. }
  381. /**
  382. * Adds a single function before a function that already exists in the
  383. * pipeline.
  384. *
  385. * Logs a warning if the function has not been registered.
  386. *
  387. * @param {lunr.PipelineFunction} existingFn - A function that already exists in the pipeline.
  388. * @param {lunr.PipelineFunction} newFn - The new function to add to the pipeline.
  389. */
  390. lunr.Pipeline.prototype.before = function (existingFn, newFn) {
  391. lunr.Pipeline.warnIfFunctionNotRegistered(newFn)
  392. var pos = this._stack.indexOf(existingFn)
  393. if (pos == -1) {
  394. throw new Error('Cannot find existingFn')
  395. }
  396. this._stack.splice(pos, 0, newFn)
  397. }
  398. /**
  399. * Removes a function from the pipeline.
  400. *
  401. * @param {lunr.PipelineFunction} fn The function to remove from the pipeline.
  402. */
  403. lunr.Pipeline.prototype.remove = function (fn) {
  404. var pos = this._stack.indexOf(fn)
  405. if (pos == -1) {
  406. return
  407. }
  408. this._stack.splice(pos, 1)
  409. }
  410. /**
  411. * Runs the current list of functions that make up the pipeline against the
  412. * passed tokens.
  413. *
  414. * @param {Array} tokens The tokens to run through the pipeline.
  415. * @returns {Array}
  416. */
  417. lunr.Pipeline.prototype.run = function (tokens) {
  418. var stackLength = this._stack.length
  419. for (var i = 0; i < stackLength; i++) {
  420. var fn = this._stack[i]
  421. tokens = tokens.reduce(function (memo, token, j) {
  422. var result = fn(token, j, tokens)
  423. if (result === void 0 || result === '') return memo
  424. return memo.concat(result)
  425. }, [])
  426. }
  427. return tokens
  428. }
  429. /**
  430. * Convenience method for passing a string through a pipeline and getting
  431. * strings out. This method takes care of wrapping the passed string in a
  432. * token and mapping the resulting tokens back to strings.
  433. *
  434. * @param {string} str - The string to pass through the pipeline.
  435. * @returns {string[]}
  436. */
  437. lunr.Pipeline.prototype.runString = function (str) {
  438. var token = new lunr.Token (str)
  439. return this.run([token]).map(function (t) {
  440. return t.toString()
  441. })
  442. }
  443. /**
  444. * Resets the pipeline by removing any existing processors.
  445. *
  446. */
  447. lunr.Pipeline.prototype.reset = function () {
  448. this._stack = []
  449. }
  450. /**
  451. * Returns a representation of the pipeline ready for serialisation.
  452. *
  453. * Logs a warning if the function has not been registered.
  454. *
  455. * @returns {Array}
  456. */
  457. lunr.Pipeline.prototype.toJSON = function () {
  458. return this._stack.map(function (fn) {
  459. lunr.Pipeline.warnIfFunctionNotRegistered(fn)
  460. return fn.label
  461. })
  462. }
  463. /*!
  464. * lunr.Vector
  465. * Copyright (C) 2017 Oliver Nightingale
  466. */
  467. /**
  468. * A vector is used to construct the vector space of documents and queries. These
  469. * vectors support operations to determine the similarity between two documents or
  470. * a document and a query.
  471. *
  472. * Normally no parameters are required for initializing a vector, but in the case of
  473. * loading a previously dumped vector the raw elements can be provided to the constructor.
  474. *
  475. * For performance reasons vectors are implemented with a flat array, where an elements
  476. * index is immediately followed by its value. E.g. [index, value, index, value]. This
  477. * allows the underlying array to be as sparse as possible and still offer decent
  478. * performance when being used for vector calculations.
  479. *
  480. * @constructor
  481. * @param {Number[]} [elements] - The flat list of element index and element value pairs.
  482. */
  483. lunr.Vector = function (elements) {
  484. this._magnitude = 0
  485. this.elements = elements || []
  486. }
  487. /**
  488. * Calculates the position within the vector to insert a given index.
  489. *
  490. * This is used internally by insert and upsert. If there are duplicate indexes then
  491. * the position is returned as if the value for that index were to be updated, but it
  492. * is the callers responsibility to check whether there is a duplicate at that index
  493. *
  494. * @param {Number} insertIdx - The index at which the element should be inserted.
  495. * @returns {Number}
  496. */
  497. lunr.Vector.prototype.positionForIndex = function (index) {
  498. // For an empty vector the tuple can be inserted at the beginning
  499. if (this.elements.length == 0) {
  500. return 0
  501. }
  502. var start = 0,
  503. end = this.elements.length / 2,
  504. sliceLength = end - start,
  505. pivotPoint = Math.floor(sliceLength / 2),
  506. pivotIndex = this.elements[pivotPoint * 2]
  507. while (sliceLength > 1) {
  508. if (pivotIndex < index) {
  509. start = pivotPoint
  510. }
  511. if (pivotIndex > index) {
  512. end = pivotPoint
  513. }
  514. if (pivotIndex == index) {
  515. break
  516. }
  517. sliceLength = end - start
  518. pivotPoint = start + Math.floor(sliceLength / 2)
  519. pivotIndex = this.elements[pivotPoint * 2]
  520. }
  521. if (pivotIndex == index) {
  522. return pivotPoint * 2
  523. }
  524. if (pivotIndex > index) {
  525. return pivotPoint * 2
  526. }
  527. if (pivotIndex < index) {
  528. return (pivotPoint + 1) * 2
  529. }
  530. }
  531. /**
  532. * Inserts an element at an index within the vector.
  533. *
  534. * Does not allow duplicates, will throw an error if there is already an entry
  535. * for this index.
  536. *
  537. * @param {Number} insertIdx - The index at which the element should be inserted.
  538. * @param {Number} val - The value to be inserted into the vector.
  539. */
  540. lunr.Vector.prototype.insert = function (insertIdx, val) {
  541. this.upsert(insertIdx, val, function () {
  542. throw "duplicate index"
  543. })
  544. }
  545. /**
  546. * Inserts or updates an existing index within the vector.
  547. *
  548. * @param {Number} insertIdx - The index at which the element should be inserted.
  549. * @param {Number} val - The value to be inserted into the vector.
  550. * @param {function} fn - A function that is called for updates, the existing value and the
  551. * requested value are passed as arguments
  552. */
  553. lunr.Vector.prototype.upsert = function (insertIdx, val, fn) {
  554. this._magnitude = 0
  555. var position = this.positionForIndex(insertIdx)
  556. if (this.elements[position] == insertIdx) {
  557. this.elements[position + 1] = fn(this.elements[position + 1], val)
  558. } else {
  559. this.elements.splice(position, 0, insertIdx, val)
  560. }
  561. }
  562. /**
  563. * Calculates the magnitude of this vector.
  564. *
  565. * @returns {Number}
  566. */
  567. lunr.Vector.prototype.magnitude = function () {
  568. if (this._magnitude) return this._magnitude
  569. var sumOfSquares = 0,
  570. elementsLength = this.elements.length
  571. for (var i = 1; i < elementsLength; i += 2) {
  572. var val = this.elements[i]
  573. sumOfSquares += val * val
  574. }
  575. return this._magnitude = Math.sqrt(sumOfSquares)
  576. }
  577. /**
  578. * Calculates the dot product of this vector and another vector.
  579. *
  580. * @param {lunr.Vector} otherVector - The vector to compute the dot product with.
  581. * @returns {Number}
  582. */
  583. lunr.Vector.prototype.dot = function (otherVector) {
  584. var dotProduct = 0,
  585. a = this.elements, b = otherVector.elements,
  586. aLen = a.length, bLen = b.length,
  587. aVal = 0, bVal = 0,
  588. i = 0, j = 0
  589. while (i < aLen && j < bLen) {
  590. aVal = a[i], bVal = b[j]
  591. if (aVal < bVal) {
  592. i += 2
  593. } else if (aVal > bVal) {
  594. j += 2
  595. } else if (aVal == bVal) {
  596. dotProduct += a[i + 1] * b[j + 1]
  597. i += 2
  598. j += 2
  599. }
  600. }
  601. return dotProduct
  602. }
  603. /**
  604. * Calculates the cosine similarity between this vector and another
  605. * vector.
  606. *
  607. * @param {lunr.Vector} otherVector - The other vector to calculate the
  608. * similarity with.
  609. * @returns {Number}
  610. */
  611. lunr.Vector.prototype.similarity = function (otherVector) {
  612. return this.dot(otherVector) / (this.magnitude() * otherVector.magnitude())
  613. }
  614. /**
  615. * Converts the vector to an array of the elements within the vector.
  616. *
  617. * @returns {Number[]}
  618. */
  619. lunr.Vector.prototype.toArray = function () {
  620. var output = new Array (this.elements.length / 2)
  621. for (var i = 1, j = 0; i < this.elements.length; i += 2, j++) {
  622. output[j] = this.elements[i]
  623. }
  624. return output
  625. }
  626. /**
  627. * A JSON serializable representation of the vector.
  628. *
  629. * @returns {Number[]}
  630. */
  631. lunr.Vector.prototype.toJSON = function () {
  632. return this.elements
  633. }
  634. /* eslint-disable */
  635. /*!
  636. * lunr.stemmer
  637. * Copyright (C) 2017 Oliver Nightingale
  638. * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
  639. */
  640. /**
  641. * lunr.stemmer is an english language stemmer, this is a JavaScript
  642. * implementation of the PorterStemmer taken from http://tartarus.org/~martin
  643. *
  644. * @static
  645. * @implements {lunr.PipelineFunction}
  646. * @param {lunr.Token} token - The string to stem
  647. * @returns {lunr.Token}
  648. * @see {@link lunr.Pipeline}
  649. */
  650. lunr.stemmer = (function(){
  651. var step2list = {
  652. "ational" : "ate",
  653. "tional" : "tion",
  654. "enci" : "ence",
  655. "anci" : "ance",
  656. "izer" : "ize",
  657. "bli" : "ble",
  658. "alli" : "al",
  659. "entli" : "ent",
  660. "eli" : "e",
  661. "ousli" : "ous",
  662. "ization" : "ize",
  663. "ation" : "ate",
  664. "ator" : "ate",
  665. "alism" : "al",
  666. "iveness" : "ive",
  667. "fulness" : "ful",
  668. "ousness" : "ous",
  669. "aliti" : "al",
  670. "iviti" : "ive",
  671. "biliti" : "ble",
  672. "logi" : "log"
  673. },
  674. step3list = {
  675. "icate" : "ic",
  676. "ative" : "",
  677. "alize" : "al",
  678. "iciti" : "ic",
  679. "ical" : "ic",
  680. "ful" : "",
  681. "ness" : ""
  682. },
  683. c = "[^aeiou]", // consonant
  684. v = "[aeiouy]", // vowel
  685. C = c + "[^aeiouy]*", // consonant sequence
  686. V = v + "[aeiou]*", // vowel sequence
  687. mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
  688. meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
  689. mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
  690. s_v = "^(" + C + ")?" + v; // vowel in stem
  691. var re_mgr0 = new RegExp(mgr0);
  692. var re_mgr1 = new RegExp(mgr1);
  693. var re_meq1 = new RegExp(meq1);
  694. var re_s_v = new RegExp(s_v);
  695. var re_1a = /^(.+?)(ss|i)es$/;
  696. var re2_1a = /^(.+?)([^s])s$/;
  697. var re_1b = /^(.+?)eed$/;
  698. var re2_1b = /^(.+?)(ed|ing)$/;
  699. var re_1b_2 = /.$/;
  700. var re2_1b_2 = /(at|bl|iz)$/;
  701. var re3_1b_2 = new RegExp("([^aeiouylsz])\\1$");
  702. var re4_1b_2 = new RegExp("^" + C + v + "[^aeiouwxy]$");
  703. var re_1c = /^(.+?[^aeiou])y$/;
  704. var re_2 = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
  705. var re_3 = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
  706. var re_4 = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
  707. var re2_4 = /^(.+?)(s|t)(ion)$/;
  708. var re_5 = /^(.+?)e$/;
  709. var re_5_1 = /ll$/;
  710. var re3_5 = new RegExp("^" + C + v + "[^aeiouwxy]$");
  711. var porterStemmer = function porterStemmer(w) {
  712. var stem,
  713. suffix,
  714. firstch,
  715. re,
  716. re2,
  717. re3,
  718. re4;
  719. if (w.length < 3) { return w; }
  720. firstch = w.substr(0,1);
  721. if (firstch == "y") {
  722. w = firstch.toUpperCase() + w.substr(1);
  723. }
  724. // Step 1a
  725. re = re_1a
  726. re2 = re2_1a;
  727. if (re.test(w)) { w = w.replace(re,"$1$2"); }
  728. else if (re2.test(w)) { w = w.replace(re2,"$1$2"); }
  729. // Step 1b
  730. re = re_1b;
  731. re2 = re2_1b;
  732. if (re.test(w)) {
  733. var fp = re.exec(w);
  734. re = re_mgr0;
  735. if (re.test(fp[1])) {
  736. re = re_1b_2;
  737. w = w.replace(re,"");
  738. }
  739. } else if (re2.test(w)) {
  740. var fp = re2.exec(w);
  741. stem = fp[1];
  742. re2 = re_s_v;
  743. if (re2.test(stem)) {
  744. w = stem;
  745. re2 = re2_1b_2;
  746. re3 = re3_1b_2;
  747. re4 = re4_1b_2;
  748. if (re2.test(w)) { w = w + "e"; }
  749. else if (re3.test(w)) { re = re_1b_2; w = w.replace(re,""); }
  750. else if (re4.test(w)) { w = w + "e"; }
  751. }
  752. }
  753. // Step 1c - replace suffix y or Y by i if preceded by a non-vowel which is not the first letter of the word (so cry -> cri, by -> by, say -> say)
  754. re = re_1c;
  755. if (re.test(w)) {
  756. var fp = re.exec(w);
  757. stem = fp[1];
  758. w = stem + "i";
  759. }
  760. // Step 2
  761. re = re_2;
  762. if (re.test(w)) {
  763. var fp = re.exec(w);
  764. stem = fp[1];
  765. suffix = fp[2];
  766. re = re_mgr0;
  767. if (re.test(stem)) {
  768. w = stem + step2list[suffix];
  769. }
  770. }
  771. // Step 3
  772. re = re_3;
  773. if (re.test(w)) {
  774. var fp = re.exec(w);
  775. stem = fp[1];
  776. suffix = fp[2];
  777. re = re_mgr0;
  778. if (re.test(stem)) {
  779. w = stem + step3list[suffix];
  780. }
  781. }
  782. // Step 4
  783. re = re_4;
  784. re2 = re2_4;
  785. if (re.test(w)) {
  786. var fp = re.exec(w);
  787. stem = fp[1];
  788. re = re_mgr1;
  789. if (re.test(stem)) {
  790. w = stem;
  791. }
  792. } else if (re2.test(w)) {
  793. var fp = re2.exec(w);
  794. stem = fp[1] + fp[2];
  795. re2 = re_mgr1;
  796. if (re2.test(stem)) {
  797. w = stem;
  798. }
  799. }
  800. // Step 5
  801. re = re_5;
  802. if (re.test(w)) {
  803. var fp = re.exec(w);
  804. stem = fp[1];
  805. re = re_mgr1;
  806. re2 = re_meq1;
  807. re3 = re3_5;
  808. if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
  809. w = stem;
  810. }
  811. }
  812. re = re_5_1;
  813. re2 = re_mgr1;
  814. if (re.test(w) && re2.test(w)) {
  815. re = re_1b_2;
  816. w = w.replace(re,"");
  817. }
  818. // and turn initial Y back to y
  819. if (firstch == "y") {
  820. w = firstch.toLowerCase() + w.substr(1);
  821. }
  822. return w;
  823. };
  824. return function (token) {
  825. return token.update(porterStemmer);
  826. }
  827. })();
  828. lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')
  829. /*!
  830. * lunr.stopWordFilter
  831. * Copyright (C) 2017 Oliver Nightingale
  832. */
  833. /**
  834. * lunr.generateStopWordFilter builds a stopWordFilter function from the provided
  835. * list of stop words.
  836. *
  837. * The built in lunr.stopWordFilter is built using this generator and can be used
  838. * to generate custom stopWordFilters for applications or non English languages.
  839. *
  840. * @param {Array} token The token to pass through the filter
  841. * @returns {lunr.PipelineFunction}
  842. * @see lunr.Pipeline
  843. * @see lunr.stopWordFilter
  844. */
  845. lunr.generateStopWordFilter = function (stopWords) {
  846. var words = stopWords.reduce(function (memo, stopWord) {
  847. memo[stopWord] = stopWord
  848. return memo
  849. }, {})
  850. return function (token) {
  851. if (token && words[token.toString()] !== token.toString()) return token
  852. }
  853. }
  854. /**
  855. * lunr.stopWordFilter is an English language stop word list filter, any words
  856. * contained in the list will not be passed through the filter.
  857. *
  858. * This is intended to be used in the Pipeline. If the token does not pass the
  859. * filter then undefined will be returned.
  860. *
  861. * @implements {lunr.PipelineFunction}
  862. * @params {lunr.Token} token - A token to check for being a stop word.
  863. * @returns {lunr.Token}
  864. * @see {@link lunr.Pipeline}
  865. */
  866. lunr.stopWordFilter = lunr.generateStopWordFilter([
  867. 'a',
  868. 'able',
  869. 'about',
  870. 'across',
  871. 'after',
  872. 'all',
  873. 'almost',
  874. 'also',
  875. 'am',
  876. 'among',
  877. 'an',
  878. 'and',
  879. 'any',
  880. 'are',
  881. 'as',
  882. 'at',
  883. 'be',
  884. 'because',
  885. 'been',
  886. 'but',
  887. 'by',
  888. 'can',
  889. 'cannot',
  890. 'could',
  891. 'dear',
  892. 'did',
  893. 'do',
  894. 'does',
  895. 'either',
  896. 'else',
  897. 'ever',
  898. 'every',
  899. 'for',
  900. 'from',
  901. 'get',
  902. 'got',
  903. 'had',
  904. 'has',
  905. 'have',
  906. 'he',
  907. 'her',
  908. 'hers',
  909. 'him',
  910. 'his',
  911. 'how',
  912. 'however',
  913. 'i',
  914. 'if',
  915. 'in',
  916. 'into',
  917. 'is',
  918. 'it',
  919. 'its',
  920. 'just',
  921. 'least',
  922. 'let',
  923. 'like',
  924. 'likely',
  925. 'may',
  926. 'me',
  927. 'might',
  928. 'most',
  929. 'must',
  930. 'my',
  931. 'neither',
  932. 'no',
  933. 'nor',
  934. 'not',
  935. 'of',
  936. 'off',
  937. 'often',
  938. 'on',
  939. 'only',
  940. 'or',
  941. 'other',
  942. 'our',
  943. 'own',
  944. 'rather',
  945. 'said',
  946. 'say',
  947. 'says',
  948. 'she',
  949. 'should',
  950. 'since',
  951. 'so',
  952. 'some',
  953. 'than',
  954. 'that',
  955. 'the',
  956. 'their',
  957. 'them',
  958. 'then',
  959. 'there',
  960. 'these',
  961. 'they',
  962. 'this',
  963. 'tis',
  964. 'to',
  965. 'too',
  966. 'twas',
  967. 'us',
  968. 'wants',
  969. 'was',
  970. 'we',
  971. 'were',
  972. 'what',
  973. 'when',
  974. 'where',
  975. 'which',
  976. 'while',
  977. 'who',
  978. 'whom',
  979. 'why',
  980. 'will',
  981. 'with',
  982. 'would',
  983. 'yet',
  984. 'you',
  985. 'your'
  986. ])
  987. lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter')
  988. /*!
  989. * lunr.trimmer
  990. * Copyright (C) 2017 Oliver Nightingale
  991. */
  992. /**
  993. * lunr.trimmer is a pipeline function for trimming non word
  994. * characters from the beginning and end of tokens before they
  995. * enter the index.
  996. *
  997. * This implementation may not work correctly for non latin
  998. * characters and should either be removed or adapted for use
  999. * with languages with non-latin characters.
  1000. *
  1001. * @static
  1002. * @implements {lunr.PipelineFunction}
  1003. * @param {lunr.Token} token The token to pass through the filter
  1004. * @returns {lunr.Token}
  1005. * @see lunr.Pipeline
  1006. */
  1007. lunr.trimmer = function (token) {
  1008. return token.update(function (s) {
  1009. return s.replace(/^\W+/, '').replace(/\W+$/, '')
  1010. })
  1011. }
  1012. lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer')
  1013. /*!
  1014. * lunr.TokenSet
  1015. * Copyright (C) 2017 Oliver Nightingale
  1016. */
  1017. /**
  1018. * A token set is used to store the unique list of all tokens
  1019. * within an index. Token sets are also used to represent an
  1020. * incoming query to the index, this query token set and index
  1021. * token set are then intersected to find which tokens to look
  1022. * up in the inverted index.
  1023. *
  1024. * A token set can hold multiple tokens, as in the case of the
  1025. * index token set, or it can hold a single token as in the
  1026. * case of a simple query token set.
  1027. *
  1028. * Additionally token sets are used to perform wildcard matching.
  1029. * Leading, contained and trailing wildcards are supported, and
  1030. * from this edit distance matching can also be provided.
  1031. *
  1032. * Token sets are implemented as a minimal finite state automata,
  1033. * where both common prefixes and suffixes are shared between tokens.
  1034. * This helps to reduce the space used for storing the token set.
  1035. *
  1036. * @constructor
  1037. */
  1038. lunr.TokenSet = function () {
  1039. this.final = false
  1040. this.edges = {}
  1041. this.id = lunr.TokenSet._nextId
  1042. lunr.TokenSet._nextId += 1
  1043. }
  1044. /**
  1045. * Keeps track of the next, auto increment, identifier to assign
  1046. * to a new tokenSet.
  1047. *
  1048. * TokenSets require a unique identifier to be correctly minimised.
  1049. *
  1050. * @private
  1051. */
  1052. lunr.TokenSet._nextId = 1
  1053. /**
  1054. * Creates a TokenSet instance from the given sorted array of words.
  1055. *
  1056. * @param {String[]} arr - A sorted array of strings to create the set from.
  1057. * @returns {lunr.TokenSet}
  1058. * @throws Will throw an error if the input array is not sorted.
  1059. */
  1060. lunr.TokenSet.fromArray = function (arr) {
  1061. var builder = new lunr.TokenSet.Builder
  1062. for (var i = 0, len = arr.length; i < len; i++) {
  1063. builder.insert(arr[i])
  1064. }
  1065. builder.finish()
  1066. return builder.root
  1067. }
  1068. /**
  1069. * Creates a token set from a query clause.
  1070. *
  1071. * @private
  1072. * @param {Object} clause - A single clause from lunr.Query.
  1073. * @param {string} clause.term - The query clause term.
  1074. * @param {number} [clause.editDistance] - The optional edit distance for the term.
  1075. * @returns {lunr.TokenSet}
  1076. */
  1077. lunr.TokenSet.fromClause = function (clause) {
  1078. if ('editDistance' in clause) {
  1079. return lunr.TokenSet.fromFuzzyString(clause.term, clause.editDistance)
  1080. } else {
  1081. return lunr.TokenSet.fromString(clause.term)
  1082. }
  1083. }
  1084. /**
  1085. * Creates a token set representing a single string with a specified
  1086. * edit distance.
  1087. *
  1088. * Insertions, deletions, substitutions and transpositions are each
  1089. * treated as an edit distance of 1.
  1090. *
  1091. * Increasing the allowed edit distance will have a dramatic impact
  1092. * on the performance of both creating and intersecting these TokenSets.
  1093. * It is advised to keep the edit distance less than 3.
  1094. *
  1095. * @param {string} str - The string to create the token set from.
  1096. * @param {number} editDistance - The allowed edit distance to match.
  1097. * @returns {lunr.Vector}
  1098. */
  1099. lunr.TokenSet.fromFuzzyString = function (str, editDistance) {
  1100. var root = new lunr.TokenSet
  1101. var stack = [{
  1102. node: root,
  1103. editsRemaining: editDistance,
  1104. str: str
  1105. }]
  1106. while (stack.length) {
  1107. var frame = stack.pop()
  1108. // no edit
  1109. if (frame.str.length > 0) {
  1110. var char = frame.str.charAt(0),
  1111. noEditNode
  1112. if (char in frame.node.edges) {
  1113. noEditNode = frame.node.edges[char]
  1114. } else {
  1115. noEditNode = new lunr.TokenSet
  1116. frame.node.edges[char] = noEditNode
  1117. }
  1118. if (frame.str.length == 1) {
  1119. noEditNode.final = true
  1120. } else {
  1121. stack.push({
  1122. node: noEditNode,
  1123. editsRemaining: frame.editsRemaining,
  1124. str: frame.str.slice(1)
  1125. })
  1126. }
  1127. }
  1128. // deletion
  1129. // can only do a deletion if we have enough edits remaining
  1130. // and if there are characters left to delete in the string
  1131. if (frame.editsRemaining > 0 && frame.str.length > 1) {
  1132. var char = frame.str.charAt(1),
  1133. deletionNode
  1134. if (char in frame.node.edges) {
  1135. deletionNode = frame.node.edges[char]
  1136. } else {
  1137. deletionNode = new lunr.TokenSet
  1138. frame.node.edges[char] = deletionNode
  1139. }
  1140. if (frame.str.length <= 2) {
  1141. deletionNode.final = true
  1142. } else {
  1143. stack.push({
  1144. node: deletionNode,
  1145. editsRemaining: frame.editsRemaining - 1,
  1146. str: frame.str.slice(2)
  1147. })
  1148. }
  1149. }
  1150. // deletion
  1151. // just removing the last character from the str
  1152. if (frame.editsRemaining > 0 && frame.str.length == 1) {
  1153. frame.node.final = true
  1154. }
  1155. // substitution
  1156. // can only do a substitution if we have enough edits remaining
  1157. // and if there are characters left to substitute
  1158. if (frame.editsRemaining > 0 && frame.str.length >= 1) {
  1159. if ("*" in frame.node.edges) {
  1160. var substitutionNode = frame.node.edges["*"]
  1161. } else {
  1162. var substitutionNode = new lunr.TokenSet
  1163. frame.node.edges["*"] = substitutionNode
  1164. }
  1165. if (frame.str.length == 1) {
  1166. substitutionNode.final = true
  1167. } else {
  1168. stack.push({
  1169. node: substitutionNode,
  1170. editsRemaining: frame.editsRemaining - 1,
  1171. str: frame.str.slice(1)
  1172. })
  1173. }
  1174. }
  1175. // insertion
  1176. // can only do insertion if there are edits remaining
  1177. if (frame.editsRemaining > 0) {
  1178. if ("*" in frame.node.edges) {
  1179. var insertionNode = frame.node.edges["*"]
  1180. } else {
  1181. var insertionNode = new lunr.TokenSet
  1182. frame.node.edges["*"] = insertionNode
  1183. }
  1184. if (frame.str.length == 0) {
  1185. insertionNode.final = true
  1186. } else {
  1187. stack.push({
  1188. node: insertionNode,
  1189. editsRemaining: frame.editsRemaining - 1,
  1190. str: frame.str
  1191. })
  1192. }
  1193. }
  1194. // transposition
  1195. // can only do a transposition if there are edits remaining
  1196. // and there are enough characters to transpose
  1197. if (frame.editsRemaining > 0 && frame.str.length > 1) {
  1198. var charA = frame.str.charAt(0),
  1199. charB = frame.str.charAt(1),
  1200. transposeNode
  1201. if (charB in frame.node.edges) {
  1202. transposeNode = frame.node.edges[charB]
  1203. } else {
  1204. transposeNode = new lunr.TokenSet
  1205. frame.node.edges[charB] = transposeNode
  1206. }
  1207. if (frame.str.length == 1) {
  1208. transposeNode.final = true
  1209. } else {
  1210. stack.push({
  1211. node: transposeNode,
  1212. editsRemaining: frame.editsRemaining - 1,
  1213. str: charA + frame.str.slice(2)
  1214. })
  1215. }
  1216. }
  1217. }
  1218. return root
  1219. }
  1220. /**
  1221. * Creates a TokenSet from a string.
  1222. *
  1223. * The string may contain one or more wildcard characters (*)
  1224. * that will allow wildcard matching when intersecting with
  1225. * another TokenSet.
  1226. *
  1227. * @param {string} str - The string to create a TokenSet from.
  1228. * @returns {lunr.TokenSet}
  1229. */
  1230. lunr.TokenSet.fromString = function (str) {
  1231. var node = new lunr.TokenSet,
  1232. root = node,
  1233. wildcardFound = false
  1234. /*
  1235. * Iterates through all characters within the passed string
  1236. * appending a node for each character.
  1237. *
  1238. * As soon as a wildcard character is found then a self
  1239. * referencing edge is introduced to continually match
  1240. * any number of any characters.
  1241. */
  1242. for (var i = 0, len = str.length; i < len; i++) {
  1243. var char = str[i],
  1244. final = (i == len - 1)
  1245. if (char == "*") {
  1246. wildcardFound = true
  1247. node.edges[char] = node
  1248. node.final = final
  1249. } else {
  1250. var next = new lunr.TokenSet
  1251. next.final = final
  1252. node.edges[char] = next
  1253. node = next
  1254. // TODO: is this needed anymore?
  1255. if (wildcardFound) {
  1256. node.edges["*"] = root
  1257. }
  1258. }
  1259. }
  1260. return root
  1261. }
  1262. /**
  1263. * Converts this TokenSet into an array of strings
  1264. * contained within the TokenSet.
  1265. *
  1266. * @returns {string[]}
  1267. */
  1268. lunr.TokenSet.prototype.toArray = function () {
  1269. var words = []
  1270. var stack = [{
  1271. prefix: "",
  1272. node: this
  1273. }]
  1274. while (stack.length) {
  1275. var frame = stack.pop(),
  1276. edges = Object.keys(frame.node.edges),
  1277. len = edges.length
  1278. if (frame.node.final) {
  1279. words.push(frame.prefix)
  1280. }
  1281. for (var i = 0; i < len; i++) {
  1282. var edge = edges[i]
  1283. stack.push({
  1284. prefix: frame.prefix.concat(edge),
  1285. node: frame.node.edges[edge]
  1286. })
  1287. }
  1288. }
  1289. return words
  1290. }
  1291. /**
  1292. * Generates a string representation of a TokenSet.
  1293. *
  1294. * This is intended to allow TokenSets to be used as keys
  1295. * in objects, largely to aid the construction and minimisation
  1296. * of a TokenSet. As such it is not designed to be a human
  1297. * friendly representation of the TokenSet.
  1298. *
  1299. * @returns {string}
  1300. */
  1301. lunr.TokenSet.prototype.toString = function () {
  1302. // NOTE: Using Object.keys here as this.edges is very likely
  1303. // to enter 'hash-mode' with many keys being added
  1304. //
  1305. // avoiding a for-in loop here as it leads to the function
  1306. // being de-optimised (at least in V8). From some simple
  1307. // benchmarks the performance is comparable, but allowing
  1308. // V8 to optimize may mean easy performance wins in the future.
  1309. if (this._str) {
  1310. return this._str
  1311. }
  1312. var str = this.final ? '1' : '0',
  1313. labels = Object.keys(this.edges).sort(),
  1314. len = labels.length
  1315. for (var i = 0; i < len; i++) {
  1316. var label = labels[i],
  1317. node = this.edges[label]
  1318. str = str + label + node.id
  1319. }
  1320. return str
  1321. }
  1322. /**
  1323. * Returns a new TokenSet that is the intersection of
  1324. * this TokenSet and the passed TokenSet.
  1325. *
  1326. * This intersection will take into account any wildcards
  1327. * contained within the TokenSet.
  1328. *
  1329. * @param {lunr.TokenSet} b - An other TokenSet to intersect with.
  1330. * @returns {lunr.TokenSet}
  1331. */
  1332. lunr.TokenSet.prototype.intersect = function (b) {
  1333. var output = new lunr.TokenSet,
  1334. frame = undefined
  1335. var stack = [{
  1336. qNode: b,
  1337. output: output,
  1338. node: this
  1339. }]
  1340. while (stack.length) {
  1341. frame = stack.pop()
  1342. // NOTE: As with the #toString method, we are using
  1343. // Object.keys and a for loop instead of a for-in loop
  1344. // as both of these objects enter 'hash' mode, causing
  1345. // the function to be de-optimised in V8
  1346. var qEdges = Object.keys(frame.qNode.edges),
  1347. qLen = qEdges.length,
  1348. nEdges = Object.keys(frame.node.edges),
  1349. nLen = nEdges.length
  1350. for (var q = 0; q < qLen; q++) {
  1351. var qEdge = qEdges[q]
  1352. for (var n = 0; n < nLen; n++) {
  1353. var nEdge = nEdges[n]
  1354. if (nEdge == qEdge || qEdge == '*') {
  1355. var node = frame.node.edges[nEdge],
  1356. qNode = frame.qNode.edges[qEdge],
  1357. final = node.final && qNode.final,
  1358. next = undefined
  1359. if (nEdge in frame.output.edges) {
  1360. // an edge already exists for this character
  1361. // no need to create a new node, just set the finality
  1362. // bit unless this node is already final
  1363. next = frame.output.edges[nEdge]
  1364. next.final = next.final || final
  1365. } else {
  1366. // no edge exists yet, must create one
  1367. // set the finality bit and insert it
  1368. // into the output
  1369. next = new lunr.TokenSet
  1370. next.final = final
  1371. frame.output.edges[nEdge] = next
  1372. }
  1373. stack.push({
  1374. qNode: qNode,
  1375. output: next,
  1376. node: node
  1377. })
  1378. }
  1379. }
  1380. }
  1381. }
  1382. return output
  1383. }
  1384. lunr.TokenSet.Builder = function () {
  1385. this.previousWord = ""
  1386. this.root = new lunr.TokenSet
  1387. this.uncheckedNodes = []
  1388. this.minimizedNodes = {}
  1389. }
  1390. lunr.TokenSet.Builder.prototype.insert = function (word) {
  1391. var node,
  1392. commonPrefix = 0
  1393. if (word < this.previousWord) {
  1394. throw new Error ("Out of order word insertion")
  1395. }
  1396. for (var i = 0; i < word.length && i < this.previousWord.length; i++) {
  1397. if (word[i] != this.previousWord[i]) break
  1398. commonPrefix++
  1399. }
  1400. this.minimize(commonPrefix)
  1401. if (this.uncheckedNodes.length == 0) {
  1402. node = this.root
  1403. } else {
  1404. node = this.uncheckedNodes[this.uncheckedNodes.length - 1].child
  1405. }
  1406. for (var i = commonPrefix; i < word.length; i++) {
  1407. var nextNode = new lunr.TokenSet,
  1408. char = word[i]
  1409. node.edges[char] = nextNode
  1410. this.uncheckedNodes.push({
  1411. parent: node,
  1412. char: char,
  1413. child: nextNode
  1414. })
  1415. node = nextNode
  1416. }
  1417. node.final = true
  1418. this.previousWord = word
  1419. }
  1420. lunr.TokenSet.Builder.prototype.finish = function () {
  1421. this.minimize(0)
  1422. }
  1423. lunr.TokenSet.Builder.prototype.minimize = function (downTo) {
  1424. for (var i = this.uncheckedNodes.length - 1; i >= downTo; i--) {
  1425. var node = this.uncheckedNodes[i],
  1426. childKey = node.child.toString()
  1427. if (childKey in this.minimizedNodes) {
  1428. node.parent.edges[node.char] = this.minimizedNodes[childKey]
  1429. } else {
  1430. // Cache the key for this node since
  1431. // we know it can't change anymore
  1432. node.child._str = childKey
  1433. this.minimizedNodes[childKey] = node.child
  1434. }
  1435. this.uncheckedNodes.pop()
  1436. }
  1437. }
  1438. /*!
  1439. * lunr.Index
  1440. * Copyright (C) 2017 Oliver Nightingale
  1441. */
  1442. /**
  1443. * An index contains the built index of all documents and provides a query interface
  1444. * to the index.
  1445. *
  1446. * Usually instances of lunr.Index will not be created using this constructor, instead
  1447. * lunr.Builder should be used to construct new indexes, or lunr.Index.load should be
  1448. * used to load previously built and serialized indexes.
  1449. *
  1450. * @constructor
  1451. * @param {Object} attrs - The attributes of the built search index.
  1452. * @param {Object} attrs.invertedIndex - An index of term/field to document reference.
  1453. * @param {Object<string, lunr.Vector>} attrs.documentVectors - Document vectors keyed by document reference.
  1454. * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens.
  1455. * @param {string[]} attrs.fields - The names of indexed document fields.
  1456. * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms.
  1457. */
  1458. lunr.Index = function (attrs) {
  1459. this.invertedIndex = attrs.invertedIndex
  1460. this.fieldVectors = attrs.fieldVectors
  1461. this.tokenSet = attrs.tokenSet
  1462. this.fields = attrs.fields
  1463. this.pipeline = attrs.pipeline
  1464. }
  1465. /**
  1466. * A result contains details of a document matching a search query.
  1467. * @typedef {Object} lunr.Index~Result
  1468. * @property {string} ref - The reference of the document this result represents.
  1469. * @property {number} score - A number between 0 and 1 representing how similar this document is to the query.
  1470. * @property {lunr.MatchData} matchData - Contains metadata about this match including which term(s) caused the match.
  1471. */
  1472. /**
  1473. * Although lunr provides the ability to create queries using lunr.Query, it also provides a simple
  1474. * query language which itself is parsed into an instance of lunr.Query.
  1475. *
  1476. * For programmatically building queries it is advised to directly use lunr.Query, the query language
  1477. * is best used for human entered text rather than program generated text.
  1478. *
  1479. * At its simplest queries can just be a single term, e.g. `hello`, multiple terms are also supported
  1480. * and will be combined with OR, e.g `hello world` will match documents that contain either 'hello'
  1481. * or 'world', though those that contain both will rank higher in the results.
  1482. *
  1483. * Wildcards can be included in terms to match one or more unspecified characters, these wildcards can
  1484. * be inserted anywhere within the term, and more than one wildcard can exist in a single term. Adding
  1485. * wildcards will increase the number of documents that will be found but can also have a negative
  1486. * impact on query performance, especially with wildcards at the beginning of a term.
  1487. *
  1488. * Terms can be restricted to specific fields, e.g. `title:hello`, only documents with the term
  1489. * hello in the title field will match this query. Using a field not present in the index will lead
  1490. * to an error being thrown.
  1491. *
  1492. * Modifiers can also be added to terms, lunr supports edit distance and boost modifiers on terms. A term
  1493. * boost will make documents matching that term score higher, e.g. `foo^5`. Edit distance is also supported
  1494. * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
  1495. * Avoid large values for edit distance to improve query performance.
  1496. *
  1497. * To escape special characters the backslash character '\' can be used, this allows searches to include
  1498. * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead
  1499. * of attempting to apply a boost of 2 to the search term "foo".
  1500. *
  1501. * @typedef {string} lunr.Index~QueryString
  1502. * @example <caption>Simple single term query</caption>
  1503. * hello
  1504. * @example <caption>Multiple term query</caption>
  1505. * hello world
  1506. * @example <caption>term scoped to a field</caption>
  1507. * title:hello
  1508. * @example <caption>term with a boost of 10</caption>
  1509. * hello^10
  1510. * @example <caption>term with an edit distance of 2</caption>
  1511. * hello~2
  1512. */
  1513. /**
  1514. * Performs a search against the index using lunr query syntax.
  1515. *
  1516. * Results will be returned sorted by their score, the most relevant results
  1517. * will be returned first.
  1518. *
  1519. * For more programmatic querying use lunr.Index#query.
  1520. *
  1521. * @param {lunr.Index~QueryString} queryString - A string containing a lunr query.
  1522. * @throws {lunr.QueryParseError} If the passed query string cannot be parsed.
  1523. * @returns {lunr.Index~Result[]}
  1524. */
  1525. lunr.Index.prototype.search = function (queryString) {
  1526. return this.query(function (query) {
  1527. var parser = new lunr.QueryParser(queryString, query)
  1528. parser.parse()
  1529. })
  1530. }
  1531. /**
  1532. * A query builder callback provides a query object to be used to express
  1533. * the query to perform on the index.
  1534. *
  1535. * @callback lunr.Index~queryBuilder
  1536. * @param {lunr.Query} query - The query object to build up.
  1537. * @this lunr.Query
  1538. */
  1539. /**
  1540. * Performs a query against the index using the yielded lunr.Query object.
  1541. *
  1542. * If performing programmatic queries against the index, this method is preferred
  1543. * over lunr.Index#search so as to avoid the additional query parsing overhead.
  1544. *
  1545. * A query object is yielded to the supplied function which should be used to
  1546. * express the query to be run against the index.
  1547. *
  1548. * Note that although this function takes a callback parameter it is _not_ an
  1549. * asynchronous operation, the callback is just yielded a query object to be
  1550. * customized.
  1551. *
  1552. * @param {lunr.Index~queryBuilder} fn - A function that is used to build the query.
  1553. * @returns {lunr.Index~Result[]}
  1554. */
  1555. lunr.Index.prototype.query = function (fn) {
  1556. // for each query clause
  1557. // * process terms
  1558. // * expand terms from token set
  1559. // * find matching documents and metadata
  1560. // * get document vectors
  1561. // * score documents
  1562. var query = new lunr.Query(this.fields),
  1563. matchingFields = Object.create(null),
  1564. queryVectors = Object.create(null),
  1565. termFieldCache = Object.create(null)
  1566. fn.call(query, query)
  1567. for (var i = 0; i < query.clauses.length; i++) {
  1568. /*
  1569. * Unless the pipeline has been disabled for this term, which is
  1570. * the case for terms with wildcards, we need to pass the clause
  1571. * term through the search pipeline. A pipeline returns an array
  1572. * of processed terms. Pipeline functions may expand the passed
  1573. * term, which means we may end up performing multiple index lookups
  1574. * for a single query term.
  1575. */
  1576. var clause = query.clauses[i],
  1577. terms = null
  1578. if (clause.usePipeline) {
  1579. terms = this.pipeline.runString(clause.term)
  1580. } else {
  1581. terms = [clause.term]
  1582. }
  1583. for (var m = 0; m < terms.length; m++) {
  1584. var term = terms[m]
  1585. /*
  1586. * Each term returned from the pipeline needs to use the same query
  1587. * clause object, e.g. the same boost and or edit distance. The
  1588. * simplest way to do this is to re-use the clause object but mutate
  1589. * its term property.
  1590. */
  1591. clause.term = term
  1592. /*
  1593. * From the term in the clause we create a token set which will then
  1594. * be used to intersect the indexes token set to get a list of terms
  1595. * to lookup in the inverted index
  1596. */
  1597. var termTokenSet = lunr.TokenSet.fromClause(clause),
  1598. expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
  1599. for (var j = 0; j < expandedTerms.length; j++) {
  1600. /*
  1601. * For each term get the posting and termIndex, this is required for
  1602. * building the query vector.
  1603. */
  1604. var expandedTerm = expandedTerms[j],
  1605. posting = this.invertedIndex[expandedTerm],
  1606. termIndex = posting._index
  1607. for (var k = 0; k < clause.fields.length; k++) {
  1608. /*
  1609. * For each field that this query term is scoped by (by default
  1610. * all fields are in scope) we need to get all the document refs
  1611. * that have this term in that field.
  1612. *
  1613. * The posting is the entry in the invertedIndex for the matching
  1614. * term from above.
  1615. */
  1616. var field = clause.fields[k],
  1617. fieldPosting = posting[field],
  1618. matchingDocumentRefs = Object.keys(fieldPosting),
  1619. termField = expandedTerm + "/" + field
  1620. /*
  1621. * To support field level boosts a query vector is created per
  1622. * field. This vector is populated using the termIndex found for
  1623. * the term and a unit value with the appropriate boost applied.
  1624. *
  1625. * If the query vector for this field does not exist yet it needs
  1626. * to be created.
  1627. */
  1628. if (queryVectors[field] === undefined) {
  1629. queryVectors[field] = new lunr.Vector
  1630. }
  1631. /*
  1632. * Using upsert because there could already be an entry in the vector
  1633. * for the term we are working with. In that case we just add the scores
  1634. * together.
  1635. */
  1636. queryVectors[field].upsert(termIndex, 1 * clause.boost, function (a, b) { return a + b })
  1637. /**
  1638. * If we've already seen this term, field combo then we've already collected
  1639. * the matching documents and metadata, no need to go through all that again
  1640. */
  1641. if (termFieldCache[termField]) {
  1642. continue
  1643. }
  1644. for (var l = 0; l < matchingDocumentRefs.length; l++) {
  1645. /*
  1646. * All metadata for this term/field/document triple
  1647. * are then extracted and collected into an instance
  1648. * of lunr.MatchData ready to be returned in the query
  1649. * results
  1650. */
  1651. var matchingDocumentRef = matchingDocumentRefs[l],
  1652. matchingFieldRef = new lunr.FieldRef (matchingDocumentRef, field),
  1653. metadata = fieldPosting[matchingDocumentRef],
  1654. fieldMatch
  1655. if ((fieldMatch = matchingFields[matchingFieldRef]) === undefined) {
  1656. matchingFields[matchingFieldRef] = new lunr.MatchData (expandedTerm, field, metadata)
  1657. } else {
  1658. fieldMatch.add(expandedTerm, field, metadata)
  1659. }
  1660. }
  1661. termFieldCache[termField] = true
  1662. }
  1663. }
  1664. }
  1665. }
  1666. var matchingFieldRefs = Object.keys(matchingFields),
  1667. results = [],
  1668. matches = Object.create(null)
  1669. for (var i = 0; i < matchingFieldRefs.length; i++) {
  1670. /*
  1671. * Currently we have document fields that match the query, but we
  1672. * need to return documents. The matchData and scores are combined
  1673. * from multiple fields belonging to the same document.
  1674. *
  1675. * Scores are calculated by field, using the query vectors created
  1676. * above, and combined into a final document score using addition.
  1677. */
  1678. var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),
  1679. docRef = fieldRef.docRef,
  1680. fieldVector = this.fieldVectors[fieldRef],
  1681. score = queryVectors[fieldRef.fieldName].similarity(fieldVector),
  1682. docMatch
  1683. if ((docMatch = matches[docRef]) !== undefined) {
  1684. docMatch.score += score
  1685. docMatch.matchData.combine(matchingFields[fieldRef])
  1686. } else {
  1687. var match = {
  1688. ref: docRef,
  1689. score: score,
  1690. matchData: matchingFields[fieldRef]
  1691. }
  1692. matches[docRef] = match
  1693. results.push(match)
  1694. }
  1695. }
  1696. /*
  1697. * Sort the results objects by score, highest first.
  1698. */
  1699. return results.sort(function (a, b) {
  1700. return b.score - a.score
  1701. })
  1702. }
  1703. /**
  1704. * Prepares the index for JSON serialization.
  1705. *
  1706. * The schema for this JSON blob will be described in a
  1707. * separate JSON schema file.
  1708. *
  1709. * @returns {Object}
  1710. */
  1711. lunr.Index.prototype.toJSON = function () {
  1712. var invertedIndex = Object.keys(this.invertedIndex)
  1713. .sort()
  1714. .map(function (term) {
  1715. return [term, this.invertedIndex[term]]
  1716. }, this)
  1717. var fieldVectors = Object.keys(this.fieldVectors)
  1718. .map(function (ref) {
  1719. return [ref, this.fieldVectors[ref].toJSON()]
  1720. }, this)
  1721. return {
  1722. version: lunr.version,
  1723. fields: this.fields,
  1724. fieldVectors: fieldVectors,
  1725. invertedIndex: invertedIndex,
  1726. pipeline: this.pipeline.toJSON()
  1727. }
  1728. }
  1729. /**
  1730. * Loads a previously serialized lunr.Index
  1731. *
  1732. * @param {Object} serializedIndex - A previously serialized lunr.Index
  1733. * @returns {lunr.Index}
  1734. */
  1735. lunr.Index.load = function (serializedIndex) {
  1736. var attrs = {},
  1737. fieldVectors = {},
  1738. serializedVectors = serializedIndex.fieldVectors,
  1739. invertedIndex = {},
  1740. serializedInvertedIndex = serializedIndex.invertedIndex,
  1741. tokenSetBuilder = new lunr.TokenSet.Builder,
  1742. pipeline = lunr.Pipeline.load(serializedIndex.pipeline)
  1743. if (serializedIndex.version != lunr.version) {
  1744. lunr.utils.warn("Version mismatch when loading serialised index. Current version of lunr '" + lunr.version + "' does not match serialized index '" + serializedIndex.version + "'")
  1745. }
  1746. for (var i = 0; i < serializedVectors.length; i++) {
  1747. var tuple = serializedVectors[i],
  1748. ref = tuple[0],
  1749. elements = tuple[1]
  1750. fieldVectors[ref] = new lunr.Vector(elements)
  1751. }
  1752. for (var i = 0; i < serializedInvertedIndex.length; i++) {
  1753. var tuple = serializedInvertedIndex[i],
  1754. term = tuple[0],
  1755. posting = tuple[1]
  1756. tokenSetBuilder.insert(term)
  1757. invertedIndex[term] = posting
  1758. }
  1759. tokenSetBuilder.finish()
  1760. attrs.fields = serializedIndex.fields
  1761. attrs.fieldVectors = fieldVectors
  1762. attrs.invertedIndex = invertedIndex
  1763. attrs.tokenSet = tokenSetBuilder.root
  1764. attrs.pipeline = pipeline
  1765. return new lunr.Index(attrs)
  1766. }
  1767. /*!
  1768. * lunr.Builder
  1769. * Copyright (C) 2017 Oliver Nightingale
  1770. */
  1771. /**
  1772. * lunr.Builder performs indexing on a set of documents and
  1773. * returns instances of lunr.Index ready for querying.
  1774. *
  1775. * All configuration of the index is done via the builder, the
  1776. * fields to index, the document reference, the text processing
  1777. * pipeline and document scoring parameters are all set on the
  1778. * builder before indexing.
  1779. *
  1780. * @constructor
  1781. * @property {string} _ref - Internal reference to the document reference field.
  1782. * @property {string[]} _fields - Internal reference to the document fields to index.
  1783. * @property {object} invertedIndex - The inverted index maps terms to document fields.
  1784. * @property {object} documentTermFrequencies - Keeps track of document term frequencies.
  1785. * @property {object} documentLengths - Keeps track of the length of documents added to the index.
  1786. * @property {lunr.tokenizer} tokenizer - Function for splitting strings into tokens for indexing.
  1787. * @property {lunr.Pipeline} pipeline - The pipeline performs text processing on tokens before indexing.
  1788. * @property {lunr.Pipeline} searchPipeline - A pipeline for processing search terms before querying the index.
  1789. * @property {number} documentCount - Keeps track of the total number of documents indexed.
  1790. * @property {number} _b - A parameter to control field length normalization, setting this to 0 disabled normalization, 1 fully normalizes field lengths, the default value is 0.75.
  1791. * @property {number} _k1 - A parameter to control how quickly an increase in term frequency results in term frequency saturation, the default value is 1.2.
  1792. * @property {number} termIndex - A counter incremented for each unique term, used to identify a terms position in the vector space.
  1793. * @property {array} metadataWhitelist - A list of metadata keys that have been whitelisted for entry in the index.
  1794. */
  1795. lunr.Builder = function () {
  1796. this._ref = "id"
  1797. this._fields = []
  1798. this.invertedIndex = Object.create(null)
  1799. this.fieldTermFrequencies = {}
  1800. this.fieldLengths = {}
  1801. this.tokenizer = lunr.tokenizer
  1802. this.pipeline = new lunr.Pipeline
  1803. this.searchPipeline = new lunr.Pipeline
  1804. this.documentCount = 0
  1805. this._b = 0.75
  1806. this._k1 = 1.2
  1807. this.termIndex = 0
  1808. this.metadataWhitelist = []
  1809. }
  1810. /**
  1811. * Sets the document field used as the document reference. Every document must have this field.
  1812. * The type of this field in the document should be a string, if it is not a string it will be
  1813. * coerced into a string by calling toString.
  1814. *
  1815. * The default ref is 'id'.
  1816. *
  1817. * The ref should _not_ be changed during indexing, it should be set before any documents are
  1818. * added to the index. Changing it during indexing can lead to inconsistent results.
  1819. *
  1820. * @param {string} ref - The name of the reference field in the document.
  1821. */
  1822. lunr.Builder.prototype.ref = function (ref) {
  1823. this._ref = ref
  1824. }
  1825. /**
  1826. * Adds a field to the list of document fields that will be indexed. Every document being
  1827. * indexed should have this field. Null values for this field in indexed documents will
  1828. * not cause errors but will limit the chance of that document being retrieved by searches.
  1829. *
  1830. * All fields should be added before adding documents to the index. Adding fields after
  1831. * a document has been indexed will have no effect on already indexed documents.
  1832. *
  1833. * @param {string} field - The name of a field to index in all documents.
  1834. */
  1835. lunr.Builder.prototype.field = function (field) {
  1836. this._fields.push(field)
  1837. }
  1838. /**
  1839. * A parameter to tune the amount of field length normalisation that is applied when
  1840. * calculating relevance scores. A value of 0 will completely disable any normalisation
  1841. * and a value of 1 will fully normalise field lengths. The default is 0.75. Values of b
  1842. * will be clamped to the range 0 - 1.
  1843. *
  1844. * @param {number} number - The value to set for this tuning parameter.
  1845. */
  1846. lunr.Builder.prototype.b = function (number) {
  1847. if (number < 0) {
  1848. this._b = 0
  1849. } else if (number > 1) {
  1850. this._b = 1
  1851. } else {
  1852. this._b = number
  1853. }
  1854. }
  1855. /**
  1856. * A parameter that controls the speed at which a rise in term frequency results in term
  1857. * frequency saturation. The default value is 1.2. Setting this to a higher value will give
  1858. * slower saturation levels, a lower value will result in quicker saturation.
  1859. *
  1860. * @param {number} number - The value to set for this tuning parameter.
  1861. */
  1862. lunr.Builder.prototype.k1 = function (number) {
  1863. this._k1 = number
  1864. }
  1865. /**
  1866. * Adds a document to the index.
  1867. *
  1868. * Before adding fields to the index the index should have been fully setup, with the document
  1869. * ref and all fields to index already having been specified.
  1870. *
  1871. * The document must have a field name as specified by the ref (by default this is 'id') and
  1872. * it should have all fields defined for indexing, though null or undefined values will not
  1873. * cause errors.
  1874. *
  1875. * @param {object} doc - The document to add to the index.
  1876. */
  1877. lunr.Builder.prototype.add = function (doc) {
  1878. var docRef = doc[this._ref]
  1879. this.documentCount += 1
  1880. for (var i = 0; i < this._fields.length; i++) {
  1881. var fieldName = this._fields[i],
  1882. field = doc[fieldName],
  1883. tokens = this.tokenizer(field),
  1884. terms = this.pipeline.run(tokens),
  1885. fieldRef = new lunr.FieldRef (docRef, fieldName),
  1886. fieldTerms = Object.create(null)
  1887. this.fieldTermFrequencies[fieldRef] = fieldTerms
  1888. this.fieldLengths[fieldRef] = 0
  1889. // store the length of this field for this document
  1890. this.fieldLengths[fieldRef] += terms.length
  1891. // calculate term frequencies for this field
  1892. for (var j = 0; j < terms.length; j++) {
  1893. var term = terms[j]
  1894. if (fieldTerms[term] == undefined) {
  1895. fieldTerms[term] = 0
  1896. }
  1897. fieldTerms[term] += 1
  1898. // add to inverted index
  1899. // create an initial posting if one doesn't exist
  1900. if (this.invertedIndex[term] == undefined) {
  1901. var posting = Object.create(null)
  1902. posting["_index"] = this.termIndex
  1903. this.termIndex += 1
  1904. for (var k = 0; k < this._fields.length; k++) {
  1905. posting[this._fields[k]] = Object.create(null)
  1906. }
  1907. this.invertedIndex[term] = posting
  1908. }
  1909. // add an entry for this term/fieldName/docRef to the invertedIndex
  1910. if (this.invertedIndex[term][fieldName][docRef] == undefined) {
  1911. this.invertedIndex[term][fieldName][docRef] = Object.create(null)
  1912. }
  1913. // store all whitelisted metadata about this token in the
  1914. // inverted index
  1915. for (var l = 0; l < this.metadataWhitelist.length; l++) {
  1916. var metadataKey = this.metadataWhitelist[l],
  1917. metadata = term.metadata[metadataKey]
  1918. if (this.invertedIndex[term][fieldName][docRef][metadataKey] == undefined) {
  1919. this.invertedIndex[term][fieldName][docRef][metadataKey] = []
  1920. }
  1921. this.invertedIndex[term][fieldName][docRef][metadataKey].push(metadata)
  1922. }
  1923. }
  1924. }
  1925. }
  1926. /**
  1927. * Calculates the average document length for this index
  1928. *
  1929. * @private
  1930. */
  1931. lunr.Builder.prototype.calculateAverageFieldLengths = function () {
  1932. var fieldRefs = Object.keys(this.fieldLengths),
  1933. numberOfFields = fieldRefs.length,
  1934. accumulator = {},
  1935. documentsWithField = {}
  1936. for (var i = 0; i < numberOfFields; i++) {
  1937. var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
  1938. field = fieldRef.fieldName
  1939. documentsWithField[field] || (documentsWithField[field] = 0)
  1940. documentsWithField[field] += 1
  1941. accumulator[field] || (accumulator[field] = 0)
  1942. accumulator[field] += this.fieldLengths[fieldRef]
  1943. }
  1944. for (var i = 0; i < this._fields.length; i++) {
  1945. var field = this._fields[i]
  1946. accumulator[field] = accumulator[field] / documentsWithField[field]
  1947. }
  1948. this.averageFieldLength = accumulator
  1949. }
  1950. /**
  1951. * Builds a vector space model of every document using lunr.Vector
  1952. *
  1953. * @private
  1954. */
  1955. lunr.Builder.prototype.createFieldVectors = function () {
  1956. var fieldVectors = {},
  1957. fieldRefs = Object.keys(this.fieldTermFrequencies),
  1958. fieldRefsLength = fieldRefs.length,
  1959. termIdfCache = Object.create(null)
  1960. for (var i = 0; i < fieldRefsLength; i++) {
  1961. var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
  1962. field = fieldRef.fieldName,
  1963. fieldLength = this.fieldLengths[fieldRef],
  1964. fieldVector = new lunr.Vector,
  1965. termFrequencies = this.fieldTermFrequencies[fieldRef],
  1966. terms = Object.keys(termFrequencies),
  1967. termsLength = terms.length
  1968. for (var j = 0; j < termsLength; j++) {
  1969. var term = terms[j],
  1970. tf = termFrequencies[term],
  1971. termIndex = this.invertedIndex[term]._index,
  1972. idf, score, scoreWithPrecision
  1973. if (termIdfCache[term] === undefined) {
  1974. idf = lunr.idf(this.invertedIndex[term], this.documentCount)
  1975. termIdfCache[term] = idf
  1976. } else {
  1977. idf = termIdfCache[term]
  1978. }
  1979. score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[field])) + tf)
  1980. scoreWithPrecision = Math.round(score * 1000) / 1000
  1981. // Converts 1.23456789 to 1.234.
  1982. // Reducing the precision so that the vectors take up less
  1983. // space when serialised. Doing it now so that they behave
  1984. // the same before and after serialisation. Also, this is
  1985. // the fastest approach to reducing a number's precision in
  1986. // JavaScript.
  1987. fieldVector.insert(termIndex, scoreWithPrecision)
  1988. }
  1989. fieldVectors[fieldRef] = fieldVector
  1990. }
  1991. this.fieldVectors = fieldVectors
  1992. }
  1993. /**
  1994. * Creates a token set of all tokens in the index using lunr.TokenSet
  1995. *
  1996. * @private
  1997. */
  1998. lunr.Builder.prototype.createTokenSet = function () {
  1999. this.tokenSet = lunr.TokenSet.fromArray(
  2000. Object.keys(this.invertedIndex).sort()
  2001. )
  2002. }
  2003. /**
  2004. * Builds the index, creating an instance of lunr.Index.
  2005. *
  2006. * This completes the indexing process and should only be called
  2007. * once all documents have been added to the index.
  2008. *
  2009. * @returns {lunr.Index}
  2010. */
  2011. lunr.Builder.prototype.build = function () {
  2012. this.calculateAverageFieldLengths()
  2013. this.createFieldVectors()
  2014. this.createTokenSet()
  2015. return new lunr.Index({
  2016. invertedIndex: this.invertedIndex,
  2017. fieldVectors: this.fieldVectors,
  2018. tokenSet: this.tokenSet,
  2019. fields: this._fields,
  2020. pipeline: this.searchPipeline
  2021. })
  2022. }
  2023. /**
  2024. * Applies a plugin to the index builder.
  2025. *
  2026. * A plugin is a function that is called with the index builder as its context.
  2027. * Plugins can be used to customise or extend the behaviour of the index
  2028. * in some way. A plugin is just a function, that encapsulated the custom
  2029. * behaviour that should be applied when building the index.
  2030. *
  2031. * The plugin function will be called with the index builder as its argument, additional
  2032. * arguments can also be passed when calling use. The function will be called
  2033. * with the index builder as its context.
  2034. *
  2035. * @param {Function} plugin The plugin to apply.
  2036. */
  2037. lunr.Builder.prototype.use = function (fn) {
  2038. var args = Array.prototype.slice.call(arguments, 1)
  2039. args.unshift(this)
  2040. fn.apply(this, args)
  2041. }
  2042. /**
  2043. * Contains and collects metadata about a matching document.
  2044. * A single instance of lunr.MatchData is returned as part of every
  2045. * lunr.Index~Result.
  2046. *
  2047. * @constructor
  2048. * @param {string} term - The term this match data is associated with
  2049. * @param {string} field - The field in which the term was found
  2050. * @param {object} metadata - The metadata recorded about this term in this field
  2051. * @property {object} metadata - A cloned collection of metadata associated with this document.
  2052. * @see {@link lunr.Index~Result}
  2053. */
  2054. lunr.MatchData = function (term, field, metadata) {
  2055. var clonedMetadata = Object.create(null),
  2056. metadataKeys = Object.keys(metadata)
  2057. // Cloning the metadata to prevent the original
  2058. // being mutated during match data combination.
  2059. // Metadata is kept in an array within the inverted
  2060. // index so cloning the data can be done with
  2061. // Array#slice
  2062. for (var i = 0; i < metadataKeys.length; i++) {
  2063. var key = metadataKeys[i]
  2064. clonedMetadata[key] = metadata[key].slice()
  2065. }
  2066. this.metadata = Object.create(null)
  2067. this.metadata[term] = Object.create(null)
  2068. this.metadata[term][field] = clonedMetadata
  2069. }
  2070. /**
  2071. * An instance of lunr.MatchData will be created for every term that matches a
  2072. * document. However only one instance is required in a lunr.Index~Result. This
  2073. * method combines metadata from another instance of lunr.MatchData with this
  2074. * objects metadata.
  2075. *
  2076. * @param {lunr.MatchData} otherMatchData - Another instance of match data to merge with this one.
  2077. * @see {@link lunr.Index~Result}
  2078. */
  2079. lunr.MatchData.prototype.combine = function (otherMatchData) {
  2080. var terms = Object.keys(otherMatchData.metadata)
  2081. for (var i = 0; i < terms.length; i++) {
  2082. var term = terms[i],
  2083. fields = Object.keys(otherMatchData.metadata[term])
  2084. if (this.metadata[term] == undefined) {
  2085. this.metadata[term] = Object.create(null)
  2086. }
  2087. for (var j = 0; j < fields.length; j++) {
  2088. var field = fields[j],
  2089. keys = Object.keys(otherMatchData.metadata[term][field])
  2090. if (this.metadata[term][field] == undefined) {
  2091. this.metadata[term][field] = Object.create(null)
  2092. }
  2093. for (var k = 0; k < keys.length; k++) {
  2094. var key = keys[k]
  2095. if (this.metadata[term][field][key] == undefined) {
  2096. this.metadata[term][field][key] = otherMatchData.metadata[term][field][key]
  2097. } else {
  2098. this.metadata[term][field][key] = this.metadata[term][field][key].concat(otherMatchData.metadata[term][field][key])
  2099. }
  2100. }
  2101. }
  2102. }
  2103. }
  2104. /**
  2105. * Add metadata for a term/field pair to this instance of match data.
  2106. *
  2107. * @param {string} term - The term this match data is associated with
  2108. * @param {string} field - The field in which the term was found
  2109. * @param {object} metadata - The metadata recorded about this term in this field
  2110. */
  2111. lunr.MatchData.prototype.add = function (term, field, metadata) {
  2112. if (!(term in this.metadata)) {
  2113. this.metadata[term] = Object.create(null)
  2114. this.metadata[term][field] = metadata
  2115. return
  2116. }
  2117. if (!(field in this.metadata[term])) {
  2118. this.metadata[term][field] = metadata
  2119. return
  2120. }
  2121. var metadataKeys = Object.keys(metadata)
  2122. for (var i = 0; i < metadataKeys.length; i++) {
  2123. var key = metadataKeys[i]
  2124. if (key in this.metadata[term][field]) {
  2125. this.metadata[term][field][key] = this.metadata[term][field][key].concat(metadata[key])
  2126. } else {
  2127. this.metadata[term][field][key] = metadata[key]
  2128. }
  2129. }
  2130. }
  2131. /**
  2132. * A lunr.Query provides a programmatic way of defining queries to be performed
  2133. * against a {@link lunr.Index}.
  2134. *
  2135. * Prefer constructing a lunr.Query using the {@link lunr.Index#query} method
  2136. * so the query object is pre-initialized with the right index fields.
  2137. *
  2138. * @constructor
  2139. * @property {lunr.Query~Clause[]} clauses - An array of query clauses.
  2140. * @property {string[]} allFields - An array of all available fields in a lunr.Index.
  2141. */
  2142. lunr.Query = function (allFields) {
  2143. this.clauses = []
  2144. this.allFields = allFields
  2145. }
  2146. /**
  2147. * Constants for indicating what kind of automatic wildcard insertion will be used when constructing a query clause.
  2148. *
  2149. * This allows wildcards to be added to the beginning and end of a term without having to manually do any string
  2150. * concatenation.
  2151. *
  2152. * The wildcard constants can be bitwise combined to select both leading and trailing wildcards.
  2153. *
  2154. * @constant
  2155. * @default
  2156. * @property {number} wildcard.NONE - The term will have no wildcards inserted, this is the default behaviour
  2157. * @property {number} wildcard.LEADING - Prepend the term with a wildcard, unless a leading wildcard already exists
  2158. * @property {number} wildcard.TRAILING - Append a wildcard to the term, unless a trailing wildcard already exists
  2159. * @see lunr.Query~Clause
  2160. * @see lunr.Query#clause
  2161. * @see lunr.Query#term
  2162. * @example <caption>query term with trailing wildcard</caption>
  2163. * query.term('foo', { wildcard: lunr.Query.wildcard.TRAILING })
  2164. * @example <caption>query term with leading and trailing wildcard</caption>
  2165. * query.term('foo', {
  2166. * wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING
  2167. * })
  2168. */
  2169. lunr.Query.wildcard = new String ("*")
  2170. lunr.Query.wildcard.NONE = 0
  2171. lunr.Query.wildcard.LEADING = 1
  2172. lunr.Query.wildcard.TRAILING = 2
  2173. /**
  2174. * A single clause in a {@link lunr.Query} contains a term and details on how to
  2175. * match that term against a {@link lunr.Index}.
  2176. *
  2177. * @typedef {Object} lunr.Query~Clause
  2178. * @property {string[]} fields - The fields in an index this clause should be matched against.
  2179. * @property {number} [boost=1] - Any boost that should be applied when matching this clause.
  2180. * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be.
  2181. * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline.
  2182. * @property {number} [wildcard=0] - Whether the term should have wildcards appended or prepended.
  2183. */
  2184. /**
  2185. * Adds a {@link lunr.Query~Clause} to this query.
  2186. *
  2187. * Unless the clause contains the fields to be matched all fields will be matched. In addition
  2188. * a default boost of 1 is applied to the clause.
  2189. *
  2190. * @param {lunr.Query~Clause} clause - The clause to add to this query.
  2191. * @see lunr.Query~Clause
  2192. * @returns {lunr.Query}
  2193. */
  2194. lunr.Query.prototype.clause = function (clause) {
  2195. if (!('fields' in clause)) {
  2196. clause.fields = this.allFields
  2197. }
  2198. if (!('boost' in clause)) {
  2199. clause.boost = 1
  2200. }
  2201. if (!('usePipeline' in clause)) {
  2202. clause.usePipeline = true
  2203. }
  2204. if (!('wildcard' in clause)) {
  2205. clause.wildcard = lunr.Query.wildcard.NONE
  2206. }
  2207. if ((clause.wildcard & lunr.Query.wildcard.LEADING) && (clause.term.charAt(0) != lunr.Query.wildcard)) {
  2208. clause.term = "*" + clause.term
  2209. }
  2210. if ((clause.wildcard & lunr.Query.wildcard.TRAILING) && (clause.term.slice(-1) != lunr.Query.wildcard)) {
  2211. clause.term = "" + clause.term + "*"
  2212. }
  2213. this.clauses.push(clause)
  2214. return this
  2215. }
  2216. /**
  2217. * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause}
  2218. * to the list of clauses that make up this query.
  2219. *
  2220. * @param {string} term - The term to add to the query.
  2221. * @param {Object} [options] - Any additional properties to add to the query clause.
  2222. * @returns {lunr.Query}
  2223. * @see lunr.Query#clause
  2224. * @see lunr.Query~Clause
  2225. * @example <caption>adding a single term to a query</caption>
  2226. * query.term("foo")
  2227. * @example <caption>adding a single term to a query and specifying search fields, term boost and automatic trailing wildcard</caption>
  2228. * query.term("foo", {
  2229. * fields: ["title"],
  2230. * boost: 10,
  2231. * wildcard: lunr.Query.wildcard.TRAILING
  2232. * })
  2233. */
  2234. lunr.Query.prototype.term = function (term, options) {
  2235. var clause = options || {}
  2236. clause.term = term
  2237. this.clause(clause)
  2238. return this
  2239. }
  2240. lunr.QueryParseError = function (message, start, end) {
  2241. this.name = "QueryParseError"
  2242. this.message = message
  2243. this.start = start
  2244. this.end = end
  2245. }
  2246. lunr.QueryParseError.prototype = new Error
  2247. lunr.QueryLexer = function (str) {
  2248. this.lexemes = []
  2249. this.str = str
  2250. this.length = str.length
  2251. this.pos = 0
  2252. this.start = 0
  2253. this.escapeCharPositions = []
  2254. }
  2255. lunr.QueryLexer.prototype.run = function () {
  2256. var state = lunr.QueryLexer.lexText
  2257. while (state) {
  2258. state = state(this)
  2259. }
  2260. }
  2261. lunr.QueryLexer.prototype.sliceString = function () {
  2262. var subSlices = [],
  2263. sliceStart = this.start,
  2264. sliceEnd = this.pos
  2265. for (var i = 0; i < this.escapeCharPositions.length; i++) {
  2266. sliceEnd = this.escapeCharPositions[i]
  2267. subSlices.push(this.str.slice(sliceStart, sliceEnd))
  2268. sliceStart = sliceEnd + 1
  2269. }
  2270. subSlices.push(this.str.slice(sliceStart, this.pos))
  2271. this.escapeCharPositions.length = 0
  2272. return subSlices.join('')
  2273. }
  2274. lunr.QueryLexer.prototype.emit = function (type) {
  2275. this.lexemes.push({
  2276. type: type,
  2277. str: this.sliceString(),
  2278. start: this.start,
  2279. end: this.pos
  2280. })
  2281. this.start = this.pos
  2282. }
  2283. lunr.QueryLexer.prototype.escapeCharacter = function () {
  2284. this.escapeCharPositions.push(this.pos - 1)
  2285. this.pos += 1
  2286. }
  2287. lunr.QueryLexer.prototype.next = function () {
  2288. if (this.pos >= this.length) {
  2289. return lunr.QueryLexer.EOS
  2290. }
  2291. var char = this.str.charAt(this.pos)
  2292. this.pos += 1
  2293. return char
  2294. }
  2295. lunr.QueryLexer.prototype.width = function () {
  2296. return this.pos - this.start
  2297. }
  2298. lunr.QueryLexer.prototype.ignore = function () {
  2299. if (this.start == this.pos) {
  2300. this.pos += 1
  2301. }
  2302. this.start = this.pos
  2303. }
  2304. lunr.QueryLexer.prototype.backup = function () {
  2305. this.pos -= 1
  2306. }
  2307. lunr.QueryLexer.prototype.acceptDigitRun = function () {
  2308. var char, charCode
  2309. do {
  2310. char = this.next()
  2311. charCode = char.charCodeAt(0)
  2312. } while (charCode > 47 && charCode < 58)
  2313. if (char != lunr.QueryLexer.EOS) {
  2314. this.backup()
  2315. }
  2316. }
  2317. lunr.QueryLexer.prototype.more = function () {
  2318. return this.pos < this.length
  2319. }
  2320. lunr.QueryLexer.EOS = 'EOS'
  2321. lunr.QueryLexer.FIELD = 'FIELD'
  2322. lunr.QueryLexer.TERM = 'TERM'
  2323. lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'
  2324. lunr.QueryLexer.BOOST = 'BOOST'
  2325. lunr.QueryLexer.lexField = function (lexer) {
  2326. lexer.backup()
  2327. lexer.emit(lunr.QueryLexer.FIELD)
  2328. lexer.ignore()
  2329. return lunr.QueryLexer.lexText
  2330. }
  2331. lunr.QueryLexer.lexTerm = function (lexer) {
  2332. if (lexer.width() > 1) {
  2333. lexer.backup()
  2334. lexer.emit(lunr.QueryLexer.TERM)
  2335. }
  2336. lexer.ignore()
  2337. if (lexer.more()) {
  2338. return lunr.QueryLexer.lexText
  2339. }
  2340. }
  2341. lunr.QueryLexer.lexEditDistance = function (lexer) {
  2342. lexer.ignore()
  2343. lexer.acceptDigitRun()
  2344. lexer.emit(lunr.QueryLexer.EDIT_DISTANCE)
  2345. return lunr.QueryLexer.lexText
  2346. }
  2347. lunr.QueryLexer.lexBoost = function (lexer) {
  2348. lexer.ignore()
  2349. lexer.acceptDigitRun()
  2350. lexer.emit(lunr.QueryLexer.BOOST)
  2351. return lunr.QueryLexer.lexText
  2352. }
  2353. lunr.QueryLexer.lexEOS = function (lexer) {
  2354. if (lexer.width() > 0) {
  2355. lexer.emit(lunr.QueryLexer.TERM)
  2356. }
  2357. }
  2358. // This matches the separator used when tokenising fields
  2359. // within a document. These should match otherwise it is
  2360. // not possible to search for some tokens within a document.
  2361. //
  2362. // It is possible for the user to change the separator on the
  2363. // tokenizer so it _might_ clash with any other of the special
  2364. // characters already used within the search string, e.g. :.
  2365. //
  2366. // This means that it is possible to change the separator in
  2367. // such a way that makes some words unsearchable using a search
  2368. // string.
  2369. lunr.QueryLexer.termSeparator = lunr.tokenizer.separator
  2370. lunr.QueryLexer.lexText = function (lexer) {
  2371. while (true) {
  2372. var char = lexer.next()
  2373. if (char == lunr.QueryLexer.EOS) {
  2374. return lunr.QueryLexer.lexEOS
  2375. }
  2376. // Escape character is '\'
  2377. if (char.charCodeAt(0) == 92) {
  2378. lexer.escapeCharacter()
  2379. continue
  2380. }
  2381. if (char == ":") {
  2382. return lunr.QueryLexer.lexField
  2383. }
  2384. if (char == "~") {
  2385. lexer.backup()
  2386. if (lexer.width() > 0) {
  2387. lexer.emit(lunr.QueryLexer.TERM)
  2388. }
  2389. return lunr.QueryLexer.lexEditDistance
  2390. }
  2391. if (char == "^") {
  2392. lexer.backup()
  2393. if (lexer.width() > 0) {
  2394. lexer.emit(lunr.QueryLexer.TERM)
  2395. }
  2396. return lunr.QueryLexer.lexBoost
  2397. }
  2398. if (char.match(lunr.QueryLexer.termSeparator)) {
  2399. return lunr.QueryLexer.lexTerm
  2400. }
  2401. }
  2402. }
  2403. lunr.QueryParser = function (str, query) {
  2404. this.lexer = new lunr.QueryLexer (str)
  2405. this.query = query
  2406. this.currentClause = {}
  2407. this.lexemeIdx = 0
  2408. }
  2409. lunr.QueryParser.prototype.parse = function () {
  2410. this.lexer.run()
  2411. this.lexemes = this.lexer.lexemes
  2412. var state = lunr.QueryParser.parseFieldOrTerm
  2413. while (state) {
  2414. state = state(this)
  2415. }
  2416. return this.query
  2417. }
  2418. lunr.QueryParser.prototype.peekLexeme = function () {
  2419. return this.lexemes[this.lexemeIdx]
  2420. }
  2421. lunr.QueryParser.prototype.consumeLexeme = function () {
  2422. var lexeme = this.peekLexeme()
  2423. this.lexemeIdx += 1
  2424. return lexeme
  2425. }
  2426. lunr.QueryParser.prototype.nextClause = function () {
  2427. var completedClause = this.currentClause
  2428. this.query.clause(completedClause)
  2429. this.currentClause = {}
  2430. }
  2431. lunr.QueryParser.parseFieldOrTerm = function (parser) {
  2432. var lexeme = parser.peekLexeme()
  2433. if (lexeme == undefined) {
  2434. return
  2435. }
  2436. switch (lexeme.type) {
  2437. case lunr.QueryLexer.FIELD:
  2438. return lunr.QueryParser.parseField
  2439. case lunr.QueryLexer.TERM:
  2440. return lunr.QueryParser.parseTerm
  2441. default:
  2442. var errorMessage = "expected either a field or a term, found " + lexeme.type
  2443. if (lexeme.str.length >= 1) {
  2444. errorMessage += " with value '" + lexeme.str + "'"
  2445. }
  2446. throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
  2447. }
  2448. }
  2449. lunr.QueryParser.parseField = function (parser) {
  2450. var lexeme = parser.consumeLexeme()
  2451. if (lexeme == undefined) {
  2452. return
  2453. }
  2454. if (parser.query.allFields.indexOf(lexeme.str) == -1) {
  2455. var possibleFields = parser.query.allFields.map(function (f) { return "'" + f + "'" }).join(', '),
  2456. errorMessage = "unrecognised field '" + lexeme.str + "', possible fields: " + possibleFields
  2457. throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
  2458. }
  2459. parser.currentClause.fields = [lexeme.str]
  2460. var nextLexeme = parser.peekLexeme()
  2461. if (nextLexeme == undefined) {
  2462. var errorMessage = "expecting term, found nothing"
  2463. throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
  2464. }
  2465. switch (nextLexeme.type) {
  2466. case lunr.QueryLexer.TERM:
  2467. return lunr.QueryParser.parseTerm
  2468. default:
  2469. var errorMessage = "expecting term, found '" + nextLexeme.type + "'"
  2470. throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
  2471. }
  2472. }
  2473. lunr.QueryParser.parseTerm = function (parser) {
  2474. var lexeme = parser.consumeLexeme()
  2475. if (lexeme == undefined) {
  2476. return
  2477. }
  2478. parser.currentClause.term = lexeme.str.toLowerCase()
  2479. if (lexeme.str.indexOf("*") != -1) {
  2480. parser.currentClause.usePipeline = false
  2481. }
  2482. var nextLexeme = parser.peekLexeme()
  2483. if (nextLexeme == undefined) {
  2484. parser.nextClause()
  2485. return
  2486. }
  2487. switch (nextLexeme.type) {
  2488. case lunr.QueryLexer.TERM:
  2489. parser.nextClause()
  2490. return lunr.QueryParser.parseTerm
  2491. case lunr.QueryLexer.FIELD:
  2492. parser.nextClause()
  2493. return lunr.QueryParser.parseField
  2494. case lunr.QueryLexer.EDIT_DISTANCE:
  2495. return lunr.QueryParser.parseEditDistance
  2496. case lunr.QueryLexer.BOOST:
  2497. return lunr.QueryParser.parseBoost
  2498. default:
  2499. var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
  2500. throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
  2501. }
  2502. }
  2503. lunr.QueryParser.parseEditDistance = function (parser) {
  2504. var lexeme = parser.consumeLexeme()
  2505. if (lexeme == undefined) {
  2506. return
  2507. }
  2508. var editDistance = parseInt(lexeme.str, 10)
  2509. if (isNaN(editDistance)) {
  2510. var errorMessage = "edit distance must be numeric"
  2511. throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
  2512. }
  2513. parser.currentClause.editDistance = editDistance
  2514. var nextLexeme = parser.peekLexeme()
  2515. if (nextLexeme == undefined) {
  2516. parser.nextClause()
  2517. return
  2518. }
  2519. switch (nextLexeme.type) {
  2520. case lunr.QueryLexer.TERM:
  2521. parser.nextClause()
  2522. return lunr.QueryParser.parseTerm
  2523. case lunr.QueryLexer.FIELD:
  2524. parser.nextClause()
  2525. return lunr.QueryParser.parseField
  2526. case lunr.QueryLexer.EDIT_DISTANCE:
  2527. return lunr.QueryParser.parseEditDistance
  2528. case lunr.QueryLexer.BOOST:
  2529. return lunr.QueryParser.parseBoost
  2530. default:
  2531. var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
  2532. throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
  2533. }
  2534. }
  2535. lunr.QueryParser.parseBoost = function (parser) {
  2536. var lexeme = parser.consumeLexeme()
  2537. if (lexeme == undefined) {
  2538. return
  2539. }
  2540. var boost = parseInt(lexeme.str, 10)
  2541. if (isNaN(boost)) {
  2542. var errorMessage = "boost must be numeric"
  2543. throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
  2544. }
  2545. parser.currentClause.boost = boost
  2546. var nextLexeme = parser.peekLexeme()
  2547. if (nextLexeme == undefined) {
  2548. parser.nextClause()
  2549. return
  2550. }
  2551. switch (nextLexeme.type) {
  2552. case lunr.QueryLexer.TERM:
  2553. parser.nextClause()
  2554. return lunr.QueryParser.parseTerm
  2555. case lunr.QueryLexer.FIELD:
  2556. parser.nextClause()
  2557. return lunr.QueryParser.parseField
  2558. case lunr.QueryLexer.EDIT_DISTANCE:
  2559. return lunr.QueryParser.parseEditDistance
  2560. case lunr.QueryLexer.BOOST:
  2561. return lunr.QueryParser.parseBoost
  2562. default:
  2563. var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
  2564. throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
  2565. }
  2566. }
  2567. /**
  2568. * export the module via AMD, CommonJS or as a browser global
  2569. * Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
  2570. */
  2571. ;(function (root, factory) {
  2572. if (typeof define === 'function' && define.amd) {
  2573. // AMD. Register as an anonymous module.
  2574. define(factory)
  2575. } else if (typeof exports === 'object') {
  2576. /**
  2577. * Node. Does not work with strict CommonJS, but
  2578. * only CommonJS-like enviroments that support module.exports,
  2579. * like Node.
  2580. */
  2581. module.exports = factory()
  2582. } else {
  2583. // Browser globals (root is window)
  2584. root.lunr = factory()
  2585. }
  2586. }(this, function () {
  2587. /**
  2588. * Just return a value to define the module export.
  2589. * This example returns an object, but the module
  2590. * can return a function as the exported value.
  2591. */
  2592. return lunr
  2593. }))
  2594. })();