Sheshera Mysore commited on
Commit
7afe16d
·
1 Parent(s): 8f2fefc

Use mlconf candidate data; add urls to the title.

Browse files
app.py CHANGED
@@ -11,6 +11,8 @@ Build an editable user profile based recommender.
11
  import copy
12
  import json
13
  import pickle
 
 
14
  import joblib
15
  import os
16
  import collections
@@ -79,8 +81,8 @@ def first_stage_ranked_docs(user_doc_queries, per_doc_to_rank, total_to_rank=200
79
  """
80
  if 'first_stage_ret_pids' not in st.session_state:
81
  # read the document vectors
82
- doc_vectors = np.load(os.path.join(in_path, 'cands', 'embeds-s2orccompsci-100k.npy'))
83
- with open(os.path.join(in_path, 'cands', 'pid2idx-s2orccompsci-100k.pickle'), 'rb') as fp:
84
  pid2idx_cands = pickle.load(fp)
85
  idx2pid_cands = dict([(v, k) for k, v in pid2idx_cands.items()])
86
  # index the vectors into a nearest neighbors structure
@@ -138,10 +140,10 @@ def read_candidates(in_path):
138
  :return:
139
  """
140
  if 'pid2abstract' not in st.session_state:
141
- with open(os.path.join(in_path, 'cands', 'abstracts-s2orccompsci-100k.pickle'), 'rb') as fp:
142
  pid2abstract = pickle.load(fp)
143
  # read the sentence vectors
144
- pid2sent_vectors = joblib.load(os.path.join(in_path, 'cands', f'embeds-sent-s2orccompsci-100k.pickle'))
145
  st.session_state['pid2sent_vectors_cands'] = pid2sent_vectors
146
  st.session_state['pid2abstract'] = pid2abstract
147
  return pid2abstract, pid2sent_vectors
@@ -253,7 +255,9 @@ def second_stage_ranked_docs(selected_query_kps, first_stage_pids, pid2abstract,
253
  retrieved_papers[pid2abstract[pid]['title']] = {
254
  'title': pid2abstract[pid]['title'],
255
  'kp_explanations': pid2kp_expls[pid],
256
- 'abstract': pid2abstract[pid]['abstract']
 
 
257
  }
258
  if len(retrieved_papers) == to_rank:
259
  break
@@ -322,9 +326,18 @@ def format_abstract(paperd, to_display=3, markdown=True):
322
  kp_expl = ', '.join(paperd['kp_explanations'])
323
  except KeyError:
324
  kp_expl = ''
 
 
 
 
325
  if markdown:
326
- par = '<p><b>Title</b>: <i>{:s}</i><br><b>Abstract</b>: {:s}<br><i>{:s}</i></p>'.\
327
- format(paper['title'], sents, kp_expl)
 
 
 
 
 
328
  else:
329
  par = 'Title: {:s}; Abstract: {:s}'.format(paper['title'], sents)
330
  return par
 
11
  import copy
12
  import json
13
  import pickle
14
+ import re
15
+
16
  import joblib
17
  import os
18
  import collections
 
81
  """
82
  if 'first_stage_ret_pids' not in st.session_state:
83
  # read the document vectors
84
+ doc_vectors = np.load(os.path.join(in_path, 'cands', 'embeds-mlconfs-18_23.npy'))
85
+ with open(os.path.join(in_path, 'cands', 'pid2idx-mlconfs-18_23.pickle'), 'rb') as fp:
86
  pid2idx_cands = pickle.load(fp)
87
  idx2pid_cands = dict([(v, k) for k, v in pid2idx_cands.items()])
88
  # index the vectors into a nearest neighbors structure
 
140
  :return:
141
  """
142
  if 'pid2abstract' not in st.session_state:
143
+ with open(os.path.join(in_path, 'cands', 'abstract-mlconfs-18_23.pickle'), 'rb') as fp:
144
  pid2abstract = pickle.load(fp)
145
  # read the sentence vectors
146
+ pid2sent_vectors = joblib.load(os.path.join(in_path, 'cands', f'embeds-sent-mlconfs-18_23.pickle'))
147
  st.session_state['pid2sent_vectors_cands'] = pid2sent_vectors
148
  st.session_state['pid2abstract'] = pid2abstract
149
  return pid2abstract, pid2sent_vectors
 
255
  retrieved_papers[pid2abstract[pid]['title']] = {
256
  'title': pid2abstract[pid]['title'],
257
  'kp_explanations': pid2kp_expls[pid],
258
+ 'abstract': pid2abstract[pid]['abstract'],
259
+ 'author_names': pid2abstract[pid]['author_names'],
260
+ 'url': pid2abstract[pid]['url'],
261
  }
262
  if len(retrieved_papers) == to_rank:
263
  break
 
326
  kp_expl = ', '.join(paperd['kp_explanations'])
327
  except KeyError:
328
  kp_expl = ''
329
+ title = re.sub('\{', '', paper['title'])
330
+ title = re.sub('\}', '', title)
331
+ sents = re.sub('\{', '', sents)
332
+ sents = re.sub('\}', '', sents)
333
  if markdown:
334
+ try:
335
+ url = paperd['url']
336
+ par = '<p><b>Title</b>: <i><a href="{:s}">{:s}</a></i><br><b>Abstract</b>: {:s}<br><i>{:s}</i></p>'. \
337
+ format(url, title, sents, kp_expl)
338
+ except KeyError:
339
+ par = '<p><b>Title</b>: <i>{:s}</i><br><b>Abstract</b>: {:s}<br><i>{:s}</i></p>'. \
340
+ format(paper['title'], sents, kp_expl)
341
  else:
342
  par = 'Title: {:s}; Abstract: {:s}'.format(paper['title'], sents)
343
  return par
data/cands/abstract-mlconfs-18_23.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:306dacf92c0abca2557fab1d5ac22b9a8b470f4e1c5cafb18f902f7257bbc7eb
3
+ size 71414390
data/cands/embeds-mlconfs-18_23.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa24ae6c04e33a80f853b8b097ac5eefd4a84bf3b9eb350202bd150004c75e37
3
+ size 271798400
data/cands/embeds-sent-mlconfs-18_23.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:786bc3ffc0846d07a36395a339575b910564b38291ea526a4d62f23b98e412a4
3
+ size 942861038
data/cands/pid2idx-mlconfs-18_23.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82065231dc98bfee763999573d67b0c2e24015fe198aa1427cd00077f022e5b9
3
+ size 3405401