Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| import torch | |
| def get_matches1(query): | |
| matches = vecdb1.similarity_search_with_score(query, k=60) | |
| return matches | |
| def get_matches2(query): | |
| matches = vecdb2.similarity_search_with_score(query, k=60) | |
| return matches | |
| def get_matches3(query): | |
| matches = vecdb3.similarity_search_with_score(query, k=60) | |
| return matches | |
| def inference(query,method=1): | |
| if method==1: | |
| matches = get_matches1(query) | |
| elif method==2: | |
| matches = get_matches2(query) | |
| else: | |
| matches = get_matches3(query) | |
| auth_counts = {} | |
| j_bucket = {} | |
| n_table = [] | |
| a_table = [] | |
| scores = [round(match[1].item(), 3) for match in matches] | |
| min_score = min(scores) | |
| max_score = max(scores) | |
| def normaliser(x): return round(1 - (x-min_score)/max_score, 3) | |
| for i, match in enumerate(matches): | |
| doc = match[0] | |
| score = round(normaliser(round(match[1].item(), 3)), 3) | |
| title = doc.metadata['title'] | |
| author = doc.metadata['authors'][0].title() | |
| date = doc.metadata.get('date', 'None') | |
| link = doc.metadata.get('link', 'None') | |
| submitter = doc.metadata.get('submitter', 'None') | |
| # journal = doc.metadata.get('journal', 'None').strip() | |
| journal = doc.metadata['journal'] | |
| if (journal is None or journal.strip() == ''): | |
| journal = 'None' | |
| else: | |
| journal = journal.strip() | |
| # For journals | |
| if journal not in j_bucket: | |
| j_bucket[journal] = score | |
| else: | |
| j_bucket[journal] += score | |
| # For authors | |
| record = [i+1, | |
| score, | |
| author, | |
| title, | |
| link, | |
| date] | |
| if auth_counts.get(author, 0) < 2: | |
| n_table.append(record) | |
| if auth_counts.get(author, 0) == 0: | |
| auth_counts[author] = 1 | |
| else: | |
| auth_counts[author] += 1 | |
| # For abstracts | |
| record = [i+1, | |
| title, | |
| author, | |
| submitter, | |
| journal, | |
| date, | |
| link, | |
| score | |
| ] | |
| a_table.append(record) | |
| del j_bucket['None'] | |
| j_table = sorted([[journal, round(score, 3)] for journal, | |
| score in j_bucket.items()], | |
| key=lambda x: x[1], reverse=True) | |
| j_table = [[i+1, item[0], item[1]] for i, item in enumerate(j_table)] | |
| j_output = gr.Dataframe.update(value=j_table, visible=True) | |
| n_output = gr.Dataframe.update(value=n_table, visible=True) | |
| a_output = gr.Dataframe.update(value=a_table, visible=True) | |
| return [a_output, j_output, n_output] | |
| def inference1(query): | |
| return inference(query,1) | |
| def inference2(query): | |
| return inference(query,2) | |
| def inference3(query): | |
| return inference(query,3) | |
| model1_name = "biodatlab/MIReAD-Neuro-Large" | |
| model2_name = "biodatlab/MIReAD-Neuro-Contrastive" | |
| model3_name = "biodatlab/SciBERT-Neuro-Contrastive" | |
| model_kwargs = {'device': 'cpu'} | |
| encode_kwargs = {'normalize_embeddings': False} | |
| faiss_embedder1 = HuggingFaceEmbeddings( | |
| model_name=model1_name, | |
| model_kwargs=model_kwargs, | |
| encode_kwargs=encode_kwargs | |
| ) | |
| faiss_embedder2 = HuggingFaceEmbeddings( | |
| model_name=model2_name, | |
| model_kwargs=model_kwargs, | |
| encode_kwargs=encode_kwargs | |
| ) | |
| faiss_embedder3 = HuggingFaceEmbeddings( | |
| model_name=model3_name, | |
| model_kwargs=model_kwargs, | |
| encode_kwargs=encode_kwargs | |
| ) | |
| vecdb1 = FAISS.load_local("nbdt_index", faiss_embedder1) | |
| vecdb2 = FAISS.load_local("indexes", faiss_embedder2) | |
| vecdb3 = FAISS.load_local("scibert_contr",faiss_embedder3) | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# NBDT Recommendation Engine for Editors") | |
| gr.Markdown("NBDT Recommendation Engine for Editors is a tool for neuroscience authors/abstracts/journalsrecommendation built for NBDT journal editors. \ | |
| It aims to help an editor to find similar reviewers, abstracts, and journals to a given submitted abstract.\ | |
| To find a recommendation, paste a `title[SEP]abstract` or `abstract` in the text box below and click \"Find Matches\".\ | |
| Then, you can hover to authors/abstracts/journals tab to find a suggested list.\ | |
| The data in our current demo includes authors associated with the NBDT Journal. We will update the data monthly for an up-to-date publications.") | |
| abst = gr.Textbox(label="Abstract", lines=10) | |
| action_btn = gr.Button(value="Find Matches with Normal Model") | |
| action2_btn = gr.Button(value="Find Matches with MIReAD Contrastive Model") | |
| action3_btn = gr.Button(value="Find Matches with SciBERT Contrastive Model") | |
| with gr.Tab("Authors"): | |
| n_output = gr.Dataframe( | |
| headers=['No.', 'Score', 'Name', 'Title', 'Link', 'Date'], | |
| datatype=['number', 'number', 'str', 'str', 'str', 'str'], | |
| col_count=(6, "fixed"), | |
| wrap=True, | |
| visible=False | |
| ) | |
| with gr.Tab("Abstracts"): | |
| a_output = gr.Dataframe( | |
| headers=['No.', 'Title', 'Author', 'Corresponding Author', | |
| 'Journal', 'Date', 'Link', 'Score'], | |
| datatype=['number', 'str', 'str', 'str', | |
| 'str', 'str', 'str', 'number'], | |
| col_count=(8, "fixed"), | |
| wrap=True, | |
| visible=False | |
| ) | |
| with gr.Tab("Journals"): | |
| j_output = gr.Dataframe( | |
| headers=['No.', 'Name', 'Score'], | |
| datatype=['number', 'str', 'number'], | |
| col_count=(3, "fixed"), | |
| wrap=True, | |
| visible=False | |
| ) | |
| action_btn.click(fn=inference1, | |
| inputs=[ | |
| abst, | |
| ], | |
| outputs=[a_output, j_output, n_output], | |
| api_name="neurojane") | |
| action2_btn.click(fn=inference2, | |
| inputs=[ | |
| abst, | |
| ], | |
| outputs=[a_output, j_output, n_output], | |
| api_name="neurojane") | |
| action3_btn.click(fn=inference3, | |
| inputs=[ | |
| abst, | |
| ], | |
| outputs=[a_output, j_output, n_output], | |
| api_name="neurojane") | |
| demo.launch(debug=True) | |