Spaces:

Sheshera
/

maple-paper-recommender

Runtime error

App Files Files Community

Sheshera Mysore commited on Feb 20, 2024

Commit

7afe16d

1 Parent(s): 8f2fefc

Use mlconf candidate data; add urls to the title.

Browse files

Files changed (5) hide show

app.py +20 -7
data/cands/abstract-mlconfs-18_23.pickle +3 -0
data/cands/embeds-mlconfs-18_23.npy +3 -0
data/cands/embeds-sent-mlconfs-18_23.pickle +3 -0
data/cands/pid2idx-mlconfs-18_23.pickle +3 -0

app.py CHANGED Viewed

@@ -11,6 +11,8 @@ Build an editable user profile based recommender.
 import copy
 import json
 import pickle
 import joblib
 import os
 import collections
@@ -79,8 +81,8 @@ def first_stage_ranked_docs(user_doc_queries, per_doc_to_rank, total_to_rank=200
     """
     if 'first_stage_ret_pids' not in st.session_state:
         # read the document vectors
-        doc_vectors = np.load(os.path.join(in_path, 'cands', 'embeds-s2orccompsci-100k.npy'))
-        with open(os.path.join(in_path, 'cands', 'pid2idx-s2orccompsci-100k.pickle'), 'rb') as fp:
             pid2idx_cands = pickle.load(fp)
             idx2pid_cands = dict([(v, k) for k, v in pid2idx_cands.items()])
         # index the vectors into a nearest neighbors structure
@@ -138,10 +140,10 @@ def read_candidates(in_path):
     :return:
     """
     if 'pid2abstract' not in st.session_state:
-        with open(os.path.join(in_path, 'cands', 'abstracts-s2orccompsci-100k.pickle'), 'rb') as fp:
             pid2abstract = pickle.load(fp)
         # read the sentence vectors
-        pid2sent_vectors = joblib.load(os.path.join(in_path, 'cands', f'embeds-sent-s2orccompsci-100k.pickle'))
         st.session_state['pid2sent_vectors_cands'] = pid2sent_vectors
         st.session_state['pid2abstract'] = pid2abstract
         return pid2abstract, pid2sent_vectors
@@ -253,7 +255,9 @@ def second_stage_ranked_docs(selected_query_kps, first_stage_pids, pid2abstract,
         retrieved_papers[pid2abstract[pid]['title']] = {
             'title': pid2abstract[pid]['title'],
             'kp_explanations': pid2kp_expls[pid],
-            'abstract': pid2abstract[pid]['abstract']
         }
         if len(retrieved_papers) == to_rank:
             break
@@ -322,9 +326,18 @@ def format_abstract(paperd, to_display=3, markdown=True):
         kp_expl = ', '.join(paperd['kp_explanations'])
     except KeyError:
         kp_expl = ''
     if markdown:
-        par = '<p><b>Title</b>: <i>{:s}</i><br><b>Abstract</b>: {:s}<br><i>{:s}</i></p>'.\
-            format(paper['title'], sents, kp_expl)
     else:
         par = 'Title: {:s}; Abstract: {:s}'.format(paper['title'], sents)
     return par

 import copy
 import json
 import pickle
+import re
 import joblib
 import os
 import collections
     """
     if 'first_stage_ret_pids' not in st.session_state:
         # read the document vectors
+        doc_vectors = np.load(os.path.join(in_path, 'cands', 'embeds-mlconfs-18_23.npy'))
+        with open(os.path.join(in_path, 'cands', 'pid2idx-mlconfs-18_23.pickle'), 'rb') as fp:
             pid2idx_cands = pickle.load(fp)
             idx2pid_cands = dict([(v, k) for k, v in pid2idx_cands.items()])
         # index the vectors into a nearest neighbors structure
     :return:
     """
     if 'pid2abstract' not in st.session_state:
+        with open(os.path.join(in_path, 'cands', 'abstract-mlconfs-18_23.pickle'), 'rb') as fp:
             pid2abstract = pickle.load(fp)
         # read the sentence vectors
+        pid2sent_vectors = joblib.load(os.path.join(in_path, 'cands', f'embeds-sent-mlconfs-18_23.pickle'))
         st.session_state['pid2sent_vectors_cands'] = pid2sent_vectors
         st.session_state['pid2abstract'] = pid2abstract
         return pid2abstract, pid2sent_vectors
         retrieved_papers[pid2abstract[pid]['title']] = {
             'title': pid2abstract[pid]['title'],
             'kp_explanations': pid2kp_expls[pid],
+            'abstract': pid2abstract[pid]['abstract'],
+            'author_names': pid2abstract[pid]['author_names'],
+            'url': pid2abstract[pid]['url'],
         }
         if len(retrieved_papers) == to_rank:
             break
         kp_expl = ', '.join(paperd['kp_explanations'])
     except KeyError:
         kp_expl = ''
+    title = re.sub('\{', '', paper['title'])
+    title = re.sub('\}', '', title)
+    sents = re.sub('\{', '', sents)
+    sents = re.sub('\}', '', sents)
     if markdown:
+        try:
+            url = paperd['url']
+            par = '<p><b>Title</b>: <i><a href="{:s}">{:s}</a></i><br><b>Abstract</b>: {:s}<br><i>{:s}</i></p>'. \
+                format(url, title, sents, kp_expl)
+        except KeyError:
+            par = '<p><b>Title</b>: <i>{:s}</i><br><b>Abstract</b>: {:s}<br><i>{:s}</i></p>'. \
+                format(paper['title'], sents, kp_expl)
     else:
         par = 'Title: {:s}; Abstract: {:s}'.format(paper['title'], sents)
     return par

data/cands/abstract-mlconfs-18_23.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:306dacf92c0abca2557fab1d5ac22b9a8b470f4e1c5cafb18f902f7257bbc7eb
+size 71414390

data/cands/embeds-mlconfs-18_23.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa24ae6c04e33a80f853b8b097ac5eefd4a84bf3b9eb350202bd150004c75e37
+size 271798400

data/cands/embeds-sent-mlconfs-18_23.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:786bc3ffc0846d07a36395a339575b910564b38291ea526a4d62f23b98e412a4
+size 942861038

data/cands/pid2idx-mlconfs-18_23.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:82065231dc98bfee763999573d67b0c2e24015fe198aa1427cd00077f022e5b9
+size 3405401