huu-ontocord commited on
Commit
e12990a
·
verified ·
1 Parent(s): 4e96485

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +31 -0
README.md CHANGED
@@ -34,3 +34,34 @@ textbook_model = fasttext.load_model("model_textbook_quality.bin")
34
  ```
35
 
36
  See the files here: https://huggingface.co/ontocord/riverbed/tree/main
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  ```
35
 
36
  See the files here: https://huggingface.co/ontocord/riverbed/tree/main
37
+
38
+
39
+ This includes a a small whoosh search index of wikidata useful for background knowledge for LLMs.
40
+
41
+ installation:
42
+ ```import os
43
+
44
+ if not os.path.exists("./wikidata_bm25_whoosh"):
45
+ !git clone https://huggingface.co/ontocord/riverbed
46
+ !pip install -q whoosh
47
+ import whoosh.index as whoosh_index
48
+ from whoosh.qparser import QueryParser
49
+ from whoosh.analysis import StemmingAnalyzer, Filter
50
+ class MyFilter(Filter):
51
+ def __call__(self, tokens):
52
+
53
+ for t in tokens:
54
+ t.text = t.text.lower()
55
+ if len(t.text) > 5:
56
+ yield t
57
+ t.text = t.text[:5]
58
+ yield t
59
+
60
+ try:
61
+ if qp is None: assert False
62
+ except:
63
+ bm25_dir = "./riverbed"
64
+ index = whoosh_index.open_dir(bm25_dir)
65
+ searcher = index.searcher()
66
+ qp = QueryParser("content", schema=index.schema)
67
+ ```