Spaces:

stratpoint-archive
/

LLM_Research_Helper

Runtime error

App Files Files

paolosandejas-stratpoint commited on May 2, 2024

Commit

9c78fbf

verified ·

1 Parent(s): 836ffb8

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +3 -9
app.py +230 -0
requirements.txt +175 -0

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
-title: LLM Research Helper
-emoji: 🏆
-colorFrom: indigo
-colorTo: indigo
-sdk: gradio
-sdk_version: 4.28.3
 app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: LLM_Research_Helper
 app_file: app.py
+sdk: gradio
+sdk_version: 4.26.0
 ---

app.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import textwrap
+import os
+import re
+import argparse
+import requests
+import google.generativeai as genai
+from IPython.display import Markdown
+import gradio as gr
+# # Used to securely store your API key
+# from google.colab import userdata
+gemini_api_key = os.environ.get('GEMINI_API_KEY', '-1')
+genai.configure(api_key=gemini_api_key)
+S2_API_KEY = os.getenv('S2_API_KEY')
+initial_result_limit = 10
+final_result_limit = 5
+# Select relevant fields to pull
+fields = 'title,url,abstract,citationCount,journal,isOpenAccess,fieldsOfStudy,year,journal'
+def raw_to_markdown(text):
+    text = text.replace('•', '  *')
+    return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))
+def markdown_to_raw(markdown_text):
+    """
+    This function converts basic markdown text to raw text.
+    Args:
+        markdown_text: The markdown text string to be converted.
+    Returns:
+        A string containing the raw text equivalent of the markdown text.
+    """
+    # Remove headers
+    text = re.sub(r'#+ ?', '', markdown_text)
+    # Remove bold and italics (can be adjusted based on needs)
+    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)  # Bold
+    text = re.sub(r'_(.+?)_', r'\1', text)        # Italics
+    # Remove code blocks
+    text = re.sub(r'`(.*?)`', '', text, flags=re.DOTALL)
+    # Remove lists
+    text = re.sub(r'\*+ (.*?)$', r'\1', text, flags=re.MULTILINE)  # Unordered lists
+    text.strip()  # Remove extra whitespace
+    return text
+def find_basis_papers(query):
+    papers = None
+    if not query:
+        print('No query given')
+        return None
+    rsp = requests.get('https://api.semanticscholar.org/graph/v1/paper/search',
+                       headers={'X-API-KEY': S2_API_KEY},
+                       params={'query': query, 'limit': initial_result_limit, 'fields': fields})
+    rsp.raise_for_status()
+    results = rsp.json()
+    total = results["total"]
+    if not total:
+        print('No matches found. Please try another query.')
+        return None
+    print(f'Found {total} initial results. Showing up to {initial_result_limit}.')
+    papers = results['data']
+    # print("INITIAL RESULTS")
+    # print_papers(papers)
+    # Filter paper results
+    filtered_papers = list(filter(isValidPaper, papers))
+    # print("FILTERED RESULTS")
+    # print_papers(filtered_papers)
+    # rank paper results
+    ranked_papers = sorted(filtered_papers, key=lambda x: (x['year'], x['citationCount']), reverse=True)
+    # print("RANKED RESULTS")
+    # print_papers(ranked_papers)
+    # return 5 best papers
+    return ranked_papers[0:5]
+# def print_papers(papers):
+#     for idx, paper in enumerate(papers):
+#         print(f"PAPER {idx}")
+#         for key, value in paper.items():
+#             if key != 'abstract':
+#                 print(f"\t{key}: '{value}'")
+def isValidPaper(paper):
+    if paper['isOpenAccess'] and paper['abstract']:
+        return True
+    else:
+        return False
+# def filter_papers(papers):
+#     filtered_papers = []
+#     for paper in papers:
+#         if paper['isOpenAccess'] and paper['abstract']:
+#             # paper is acceptable
+#             filtered_papers.append(paper)
+#     return filtered_papers
+def GEMINI_optimize_query(initial_query: str):
+    # initialize gemini LLM
+    model = genai.GenerativeModel('gemini-pro')
+    chat = model.start_chat(history=[])
+    prompt = f"""Given a search query, return an optimized version of the query to find related academic papers
+    QUERY: {initial_query}.
+    Only return the optimized query"""
+    response = chat.send_message(prompt)
+    optimized_query = markdown_to_raw(response.text)
+    return optimized_query
+def GEMINI_summarize_abstracts(initial_query: str, papers: str):
+    # initialize gemini LLM
+    model = genai.GenerativeModel('gemini-pro')
+    chat = model.start_chat(history=[])
+    prompt = f"""Given the following academic papers,
+    return a review of related literature for the search query: {initial_query}.
+    Ignore papers without abstracts.
+    Here are the papers {papers}
+    """
+    response = chat.send_message(prompt)
+    abstract_summary = markdown_to_raw(response.text)
+    return abstract_summary
+def create_gemini_model():
+    # initialize gemini LLM
+    model = genai.GenerativeModel('gemini-pro')
+    chat = model.start_chat(history=[])
+    return model, chat
+# instantiate models
+summarizer_model, summarizer_chat = create_gemini_model()
+query_optimizer_model, query_optimizer_chat = create_gemini_model()
+# def get_paper_links(papers):
+#     urls = []
+#     for paper in papers:
+#         urls = paper['url']
+#     return urls
+def predict(message, history):
+    if history == []:
+        query = message
+        print(f"INITIAL QUERY: {query}")
+        if optimize_query:
+            optimizer_prompt = f"""Given a search query, return an optimized
+            version of the query to find related academic papers
+            QUERY: {query}.
+            Only return the optimized query"""
+            response = query_optimizer_chat.send_message(optimizer_prompt)
+            query = markdown_to_raw(response.text)
+            print(f"OPTIMIZED QUERY: {query}")
+        # optimized query used to search semantic scholar
+        papers = find_basis_papers(query)
+        summarizer_prompt = f"""Given the following academic papers,
+        return a review of related literature for the search query: {query}.
+        Focus on data/key factors and methodologies considered.
+        Here are the papers {papers}
+        Include the paper urls at the end of the review of related literature.
+        """
+        response = summarizer_chat.send_message(summarizer_prompt)
+        abstract_summary = markdown_to_raw(response.text)
+        return abstract_summary
+    response = summarizer_chat.send_message(message)
+    response_text = markdown_to_raw(response.text)
+    return response_text
+def main():
+    # GEMINI optimizes query
+    gr.ChatInterface(
+        predict,
+        title="LLM Research Helper",
+        description="""Start by inputing a brief description/title
+        of your research and our assistant will return a review of
+        related literature
+        ex. Finding optimal site locations for solar farms"""
+    ).launch()
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="Literature review chatbot")
+    parser.add_argument("-o", "--optimize_query", help="Use query optimization (True, False)", default=False)
+    args = parser.parse_args()
+    optimize_query = args.optimize_query if args.optimize_query in [True, False] else False
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,175 @@

+aiofiles==23.2.1
+aiohttp==3.9.4
+aiosignal==1.3.1
+altair==5.3.0
+annotated-types==0.6.0
+anyio==4.3.0
+appnope==0.1.4
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==2.4.1
+async-lru==2.0.4
+attrs==23.2.0
+Babel==2.14.0
+beautifulsoup4==4.12.3
+bleach==6.1.0
+cachetools==5.3.3
+certifi==2024.2.2
+cffi==1.16.0
+charset-normalizer==3.3.2
+click==8.1.7
+comm==0.2.2
+contourpy==1.2.1
+cycler==0.12.1
+datasets==2.18.0
+debugpy==1.8.1
+decorator==5.1.1
+defusedxml==0.7.1
+dill==0.3.8
+distro==1.9.0
+evaluate==0.4.1
+executing==2.0.1
+fastapi==0.110.1
+fastjsonschema==2.19.1
+ffmpy==0.3.2
+filelock==3.13.4
+fonttools==4.51.0
+fqdn==1.5.1
+frozenlist==1.4.1
+fsspec==2024.2.0
+google-ai-generativelanguage==0.6.1
+google-api-core==2.18.0
+google-api-python-client==2.125.0
+google-auth==2.29.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.5.0
+googleapis-common-protos==1.63.0
+gradio==4.26.0
+gradio_client==0.15.1
+grpcio==1.62.1
+grpcio-status==1.62.1
+h11==0.14.0
+httpcore==1.0.5
+httplib2==0.22.0
+httpx==0.27.0
+huggingface-hub==0.22.2
+idna==3.7
+importlib_resources==6.4.0
+ipykernel==6.29.4
+ipython==8.23.0
+ipywidgets==8.1.2
+isoduration==20.11.0
+jedi==0.19.1
+Jinja2==3.1.3
+jiwer==3.0.3
+json5==0.9.25
+jsonpointer==2.4
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+jupyter==1.0.0
+jupyter-console==6.6.3
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.1
+jupyter_core==5.7.2
+jupyter_server==2.14.0
+jupyter_server_terminals==0.5.3
+jupyterlab==4.1.6
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.26.0
+jupyterlab_widgets==3.0.10
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.8.4
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+mistune==3.0.2
+multidict==6.0.5
+multiprocess==0.70.16
+nbclient==0.10.0
+nbconvert==7.16.3
+nbformat==5.10.4
+nest-asyncio==1.6.0
+notebook==7.1.2
+notebook_shim==0.2.4
+numpy==1.26.4
+openai==1.17.1
+orjson==3.10.0
+overrides==7.7.0
+packaging==24.0
+pandas==2.2.2
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.3.0
+platformdirs==4.2.0
+prometheus_client==0.20.0
+prompt-toolkit==3.0.43
+proto-plus==1.23.0
+protobuf==4.25.3
+psutil==5.9.8
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==15.0.2
+pyarrow-hotfix==0.6
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycparser==2.22
+pydantic==2.7.0
+pydantic_core==2.18.1
+pydub==0.25.1
+Pygments==2.17.2
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-json-logger==2.0.7
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+pyzmq==25.1.2
+qtconsole==5.5.1
+QtPy==2.4.1
+rapidfuzz==3.8.1
+referencing==0.34.0
+requests==2.31.0
+responses==0.18.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.7.1
+rpds-py==0.18.0
+rsa==4.9
+ruff==0.3.7
+semantic-version==2.10.0
+Send2Trash==1.8.3
+setuptools==68.2.2
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+soupsieve==2.5
+stack-data==0.6.3
+starlette==0.37.2
+terminado==0.18.1
+tinycss2==1.2.1
+tomlkit==0.12.0
+toolz==0.12.1
+tornado==6.4
+tqdm==4.66.2
+traitlets==5.14.2
+typer==0.12.3
+types-python-dateutil==2.9.0.20240316
+typing_extensions==4.11.0
+tzdata==2024.1
+uri-template==1.3.0
+uritemplate==4.1.1
+urllib3==2.2.1
+uvicorn==0.29.0
+wcwidth==0.2.13
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.7.0
+websockets==11.0.3
+wheel==0.41.2
+widgetsnbextension==4.0.10
+xxhash==3.4.1
+yarl==1.9.4