Spaces:
Runtime error
Runtime error
Sheshera Mysore commited on
Commit ·
7afe16d
1
Parent(s): 8f2fefc
Use mlconf candidate data; add urls to the title.
Browse files
app.py
CHANGED
|
@@ -11,6 +11,8 @@ Build an editable user profile based recommender.
|
|
| 11 |
import copy
|
| 12 |
import json
|
| 13 |
import pickle
|
|
|
|
|
|
|
| 14 |
import joblib
|
| 15 |
import os
|
| 16 |
import collections
|
|
@@ -79,8 +81,8 @@ def first_stage_ranked_docs(user_doc_queries, per_doc_to_rank, total_to_rank=200
|
|
| 79 |
"""
|
| 80 |
if 'first_stage_ret_pids' not in st.session_state:
|
| 81 |
# read the document vectors
|
| 82 |
-
doc_vectors = np.load(os.path.join(in_path, 'cands', 'embeds-
|
| 83 |
-
with open(os.path.join(in_path, 'cands', 'pid2idx-
|
| 84 |
pid2idx_cands = pickle.load(fp)
|
| 85 |
idx2pid_cands = dict([(v, k) for k, v in pid2idx_cands.items()])
|
| 86 |
# index the vectors into a nearest neighbors structure
|
|
@@ -138,10 +140,10 @@ def read_candidates(in_path):
|
|
| 138 |
:return:
|
| 139 |
"""
|
| 140 |
if 'pid2abstract' not in st.session_state:
|
| 141 |
-
with open(os.path.join(in_path, 'cands', '
|
| 142 |
pid2abstract = pickle.load(fp)
|
| 143 |
# read the sentence vectors
|
| 144 |
-
pid2sent_vectors = joblib.load(os.path.join(in_path, 'cands', f'embeds-sent-
|
| 145 |
st.session_state['pid2sent_vectors_cands'] = pid2sent_vectors
|
| 146 |
st.session_state['pid2abstract'] = pid2abstract
|
| 147 |
return pid2abstract, pid2sent_vectors
|
|
@@ -253,7 +255,9 @@ def second_stage_ranked_docs(selected_query_kps, first_stage_pids, pid2abstract,
|
|
| 253 |
retrieved_papers[pid2abstract[pid]['title']] = {
|
| 254 |
'title': pid2abstract[pid]['title'],
|
| 255 |
'kp_explanations': pid2kp_expls[pid],
|
| 256 |
-
'abstract': pid2abstract[pid]['abstract']
|
|
|
|
|
|
|
| 257 |
}
|
| 258 |
if len(retrieved_papers) == to_rank:
|
| 259 |
break
|
|
@@ -322,9 +326,18 @@ def format_abstract(paperd, to_display=3, markdown=True):
|
|
| 322 |
kp_expl = ', '.join(paperd['kp_explanations'])
|
| 323 |
except KeyError:
|
| 324 |
kp_expl = ''
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
if markdown:
|
| 326 |
-
|
| 327 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
else:
|
| 329 |
par = 'Title: {:s}; Abstract: {:s}'.format(paper['title'], sents)
|
| 330 |
return par
|
|
|
|
| 11 |
import copy
|
| 12 |
import json
|
| 13 |
import pickle
|
| 14 |
+
import re
|
| 15 |
+
|
| 16 |
import joblib
|
| 17 |
import os
|
| 18 |
import collections
|
|
|
|
| 81 |
"""
|
| 82 |
if 'first_stage_ret_pids' not in st.session_state:
|
| 83 |
# read the document vectors
|
| 84 |
+
doc_vectors = np.load(os.path.join(in_path, 'cands', 'embeds-mlconfs-18_23.npy'))
|
| 85 |
+
with open(os.path.join(in_path, 'cands', 'pid2idx-mlconfs-18_23.pickle'), 'rb') as fp:
|
| 86 |
pid2idx_cands = pickle.load(fp)
|
| 87 |
idx2pid_cands = dict([(v, k) for k, v in pid2idx_cands.items()])
|
| 88 |
# index the vectors into a nearest neighbors structure
|
|
|
|
| 140 |
:return:
|
| 141 |
"""
|
| 142 |
if 'pid2abstract' not in st.session_state:
|
| 143 |
+
with open(os.path.join(in_path, 'cands', 'abstract-mlconfs-18_23.pickle'), 'rb') as fp:
|
| 144 |
pid2abstract = pickle.load(fp)
|
| 145 |
# read the sentence vectors
|
| 146 |
+
pid2sent_vectors = joblib.load(os.path.join(in_path, 'cands', f'embeds-sent-mlconfs-18_23.pickle'))
|
| 147 |
st.session_state['pid2sent_vectors_cands'] = pid2sent_vectors
|
| 148 |
st.session_state['pid2abstract'] = pid2abstract
|
| 149 |
return pid2abstract, pid2sent_vectors
|
|
|
|
| 255 |
retrieved_papers[pid2abstract[pid]['title']] = {
|
| 256 |
'title': pid2abstract[pid]['title'],
|
| 257 |
'kp_explanations': pid2kp_expls[pid],
|
| 258 |
+
'abstract': pid2abstract[pid]['abstract'],
|
| 259 |
+
'author_names': pid2abstract[pid]['author_names'],
|
| 260 |
+
'url': pid2abstract[pid]['url'],
|
| 261 |
}
|
| 262 |
if len(retrieved_papers) == to_rank:
|
| 263 |
break
|
|
|
|
| 326 |
kp_expl = ', '.join(paperd['kp_explanations'])
|
| 327 |
except KeyError:
|
| 328 |
kp_expl = ''
|
| 329 |
+
title = re.sub('\{', '', paper['title'])
|
| 330 |
+
title = re.sub('\}', '', title)
|
| 331 |
+
sents = re.sub('\{', '', sents)
|
| 332 |
+
sents = re.sub('\}', '', sents)
|
| 333 |
if markdown:
|
| 334 |
+
try:
|
| 335 |
+
url = paperd['url']
|
| 336 |
+
par = '<p><b>Title</b>: <i><a href="{:s}">{:s}</a></i><br><b>Abstract</b>: {:s}<br><i>{:s}</i></p>'. \
|
| 337 |
+
format(url, title, sents, kp_expl)
|
| 338 |
+
except KeyError:
|
| 339 |
+
par = '<p><b>Title</b>: <i>{:s}</i><br><b>Abstract</b>: {:s}<br><i>{:s}</i></p>'. \
|
| 340 |
+
format(paper['title'], sents, kp_expl)
|
| 341 |
else:
|
| 342 |
par = 'Title: {:s}; Abstract: {:s}'.format(paper['title'], sents)
|
| 343 |
return par
|
data/cands/abstract-mlconfs-18_23.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:306dacf92c0abca2557fab1d5ac22b9a8b470f4e1c5cafb18f902f7257bbc7eb
|
| 3 |
+
size 71414390
|
data/cands/embeds-mlconfs-18_23.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa24ae6c04e33a80f853b8b097ac5eefd4a84bf3b9eb350202bd150004c75e37
|
| 3 |
+
size 271798400
|
data/cands/embeds-sent-mlconfs-18_23.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:786bc3ffc0846d07a36395a339575b910564b38291ea526a4d62f23b98e412a4
|
| 3 |
+
size 942861038
|
data/cands/pid2idx-mlconfs-18_23.pickle
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82065231dc98bfee763999573d67b0c2e24015fe198aa1427cd00077f022e5b9
|
| 3 |
+
size 3405401
|