Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -71,6 +71,16 @@ from tqdm.autonotebook import tqdm
|
|
| 71 |
import numpy as np
|
| 72 |
import re
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
# We lower case our text and remove stop-words from indexing
|
| 76 |
def bm25_tokenizer(text):
|
|
@@ -88,6 +98,9 @@ for passage in tqdm(passages):
|
|
| 88 |
|
| 89 |
bm25 = BM25Okapi(tokenized_corpus)
|
| 90 |
|
|
|
|
|
|
|
|
|
|
| 91 |
# This function will search all wikipedia articles for passages that
|
| 92 |
# answer the query
|
| 93 |
def search(query):
|
|
@@ -163,7 +176,24 @@ def search(query):
|
|
| 163 |
#st.write(rs_final.strip())
|
| 164 |
res.append(rs_final.strip())
|
| 165 |
|
| 166 |
-
st.write(res[0:maxtags_sidebar])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
|
| 168 |
st.write("## Results:")
|
| 169 |
if st.button('Generated Expansion'):
|
|
|
|
| 71 |
import numpy as np
|
| 72 |
import re
|
| 73 |
|
| 74 |
+
import yake
|
| 75 |
+
|
| 76 |
+
language = "en"
|
| 77 |
+
max_ngram_size = 3
|
| 78 |
+
deduplication_threshold = 0.9
|
| 79 |
+
deduplication_algo = 'seqm'
|
| 80 |
+
windowSize = 3
|
| 81 |
+
numOfKeywords = 3
|
| 82 |
+
|
| 83 |
+
custom_kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, dedupFunc=deduplication_algo, windowsSize=windowSize, top=numOfKeywords, features=None)
|
| 84 |
|
| 85 |
# We lower case our text and remove stop-words from indexing
|
| 86 |
def bm25_tokenizer(text):
|
|
|
|
| 98 |
|
| 99 |
bm25 = BM25Okapi(tokenized_corpus)
|
| 100 |
|
| 101 |
+
def word_len(s):
|
| 102 |
+
return len([i for i in s.split(' ') if i])
|
| 103 |
+
|
| 104 |
# This function will search all wikipedia articles for passages that
|
| 105 |
# answer the query
|
| 106 |
def search(query):
|
|
|
|
| 176 |
#st.write(rs_final.strip())
|
| 177 |
res.append(rs_final.strip())
|
| 178 |
|
| 179 |
+
#st.write(res[0:maxtags_sidebar])
|
| 180 |
+
|
| 181 |
+
res_clean = []
|
| 182 |
+
for out in result:
|
| 183 |
+
if len(out) > 20:
|
| 184 |
+
keywords = custom_kw_extractor.extract_keywords(out)
|
| 185 |
+
for key in keywords:
|
| 186 |
+
res_clean.append(key[0])
|
| 187 |
+
else:
|
| 188 |
+
res_clean.append(out)
|
| 189 |
+
|
| 190 |
+
show_out = []
|
| 191 |
+
for i in res_clean:
|
| 192 |
+
num = word_len(i)
|
| 193 |
+
if num > 1:
|
| 194 |
+
show_out.append(i)
|
| 195 |
+
|
| 196 |
+
st.write(show_out[0:maxtags_sidebar])
|
| 197 |
|
| 198 |
st.write("## Results:")
|
| 199 |
if st.button('Generated Expansion'):
|