Spaces:

jinysun
/

TeLLAgent

Running

App Files Files Community

jinysun commited on Jun 23, 2025

Commit

f51746a

verified ·

1 Parent(s): 503b822

Update tool/search.py

Browse files

Files changed (1) hide show

tool/search.py +31 -25

tool/search.py CHANGED Viewed

@@ -6,8 +6,12 @@ subprocess.check_call(["pip", "install", "--no-deps", "paper-scraper @ git+https
 subprocess.check_call(["pip", "install", "--no-deps", "google-search-results"])
 import langchain
 import paperqa
 import paperscraper
 from langchain_community.utilities import SerpAPIWrapper
@@ -17,7 +21,9 @@ from langchain_openai import OpenAIEmbeddings
 from pypdf.errors import PdfReadError
 from rdkit import Chem, DataStructs
 from rdkit.Chem import AllChem
 def is_smiles(text):
     try:
         m = Chem.MolFromSmiles(text, sanitize=False)
@@ -38,7 +44,7 @@ def is_multiple_smiles(text):
 def split_smiles(text):
     return text.split(".")
-def paper_scrap(search: str, pdir: str = "query", semantic_scholar_api_key: str = None) -> dict:
     try:
         return paperscraper.search_papers(
             search,
@@ -63,27 +69,26 @@ def paper_search(llm, query, semantic_scholar_api_key=None):
     query_chain = langchain.chains.llm.LLMChain(llm=llm, prompt=prompt)
     if not os.path.isdir("./query"):  # todo: move to ckpt
         os.mkdir("query/")
-    search = query_chain.run(query)
     print("\nSearch:", search)
-    papers = paper_scrap(search, pdir=f"query/{re.sub(' ', '', search)}", semantic_scholar_api_key=semantic_scholar_api_key)
     return papers
-def scholar2result_llm(llm, query, k=5, max_sources=2, openai_api_key=None, semantic_scholar_api_key=None):
     """Useful to answer questions that require
     technical knowledge. Ask a specific question."""
     papers = paper_search(llm, query, semantic_scholar_api_key=semantic_scholar_api_key)
     if len(papers) == 0:
         return "Not enough papers found"
-    docs = paperqa.Docs(
-        llm=llm,
-        summary_llm=llm,
-        embeddings=OpenAIEmbeddings(openai_api_key=openai_api_key),
-    )
     not_loaded = 0
     for path, data in papers.items():
         try:
-            docs.add(path, data["citation"])
         except (ValueError, FileNotFoundError, PdfReadError):
             not_loaded += 1
@@ -92,12 +97,13 @@ def scholar2result_llm(llm, query, k=5, max_sources=2, openai_api_key=None, sema
     else:
         print(f"\nFound {len(papers.items())} papers and loaded all of them.")
-    answer = docs.query(query, k=k, max_sources=max_sources).formatted_answer
-    return answer
-class Scholar2ResultLLM(BaseTool):
-    name : str = "LiteratureSearch"
     description: str = (
         "Useful to answer questions that require technical "
         "knowledge. Ask a specific question."
@@ -109,28 +115,30 @@ class Scholar2ResultLLM(BaseTool):
     def __init__(self, llm, openai_api_key, semantic_scholar_api_key):
         super().__init__()
-        self.llm = llm
         # api keys
         self.openai_api_key = openai_api_key
         self.semantic_scholar_api_key = semantic_scholar_api_key
     def _run(self, query) -> str:
-        return scholar2result_llm(
             self.llm,
             query,
             openai_api_key=self.openai_api_key,
             semantic_scholar_api_key=self.semantic_scholar_api_key
-        )
     async def _arun(self, query) -> str:
         """Use the tool asynchronously."""
         raise NotImplementedError("this tool does not support async")
 def web_search(keywords, search_engine="google"):
     try:
         return SerpAPIWrapper(
-            serpapi_api_key='3795acda6a74ea15033d34b54eac82982b26f559147d9cf04aca4bfca91c3e9d', search_engine=search_engine
         ).run(keywords)
     except:
         return "No results, try another search"
@@ -156,6 +164,4 @@ class WebSearch(BaseTool):
         return web_search(query)
     async def _arun(self, query: str) -> str:
-        raise NotImplementedError("Async not implemented")

 subprocess.check_call(["pip", "install", "--no-deps", "google-search-results"])
+import os
+import re
 import langchain
+from paperqa import Docs, Settings
+import asyncio
 import paperqa
 import paperscraper
 from langchain_community.utilities import SerpAPIWrapper
 from pypdf.errors import PdfReadError
 from rdkit import Chem, DataStructs
 from rdkit.Chem import AllChem
+import nest_asyncio
+from langchain_openai import ChatOpenAI
+nest_asyncio.apply()
 def is_smiles(text):
     try:
         m = Chem.MolFromSmiles(text, sanitize=False)
 def split_smiles(text):
     return text.split(".")
+def paper_scraper(search: str, pdir: str = "query", semantic_scholar_api_key: str = None) -> dict:
     try:
         return paperscraper.search_papers(
             search,
     query_chain = langchain.chains.llm.LLMChain(llm=llm, prompt=prompt)
     if not os.path.isdir("./query"):  # todo: move to ckpt
         os.mkdir("query/")
+    search = query_chain.invoke(query)
     print("\nSearch:", search)
+    papers = paper_scraper(search['text'],   semantic_scholar_api_key=semantic_scholar_api_key)
     return papers
+async def scholar2result_llm(llm, query, k=5, max_sources=2, openai_api_key=None, semantic_scholar_api_key=None):
     """Useful to answer questions that require
     technical knowledge. Ask a specific question."""
     papers = paper_search(llm, query, semantic_scholar_api_key=semantic_scholar_api_key)
     if len(papers) == 0:
         return "Not enough papers found"
+    docs = Docs()
+    settings = Settings()
+    settings.llm = llm
     not_loaded = 0
     for path, data in papers.items():
         try:
+            await docs.aadd(path)
         except (ValueError, FileNotFoundError, PdfReadError):
             not_loaded += 1
     else:
         print(f"\nFound {len(papers.items())} papers and loaded all of them.")
+    answer =  await docs.aquery(query)
+    return answer.answer
+class LiteratureSearch(BaseTool):
+    name: str = "LiteratureSearch"
     description: str = (
         "Useful to answer questions that require technical "
         "knowledge. Ask a specific question."
     def __init__(self, llm, openai_api_key, semantic_scholar_api_key):
         super().__init__()
         # api keys
         self.openai_api_key = openai_api_key
         self.semantic_scholar_api_key = semantic_scholar_api_key
+        self.llm = ChatOpenAI(model="gpt-4o-2024-11-20",openai_api_key=self.openai_api_key,
+             base_url=os.getenv("OPENAI_API_BASE"))
     def _run(self, query) -> str:
+        os.environ["OPENAI_API_KEY"] = self.openai_api_key
+        os.environ["OPENAI_API_BASE"] = os.getenv("OPENAI_API_BASE")
+        return asyncio.run(scholar2result_llm(
             self.llm,
             query,
             openai_api_key=self.openai_api_key,
             semantic_scholar_api_key=self.semantic_scholar_api_key
+        ))
     async def _arun(self, query) -> str:
         """Use the tool asynchronously."""
         raise NotImplementedError("this tool does not support async")
 def web_search(keywords, search_engine="google"):
     try:
         return SerpAPIWrapper(
+            serpapi_api_key=os.getenv("SERP_API_KEY"), search_engine=search_engine
         ).run(keywords)
     except:
         return "No results, try another search"
         return web_search(query)
     async def _arun(self, query: str) -> str:
+        raise NotImplementedError("Async not implemented")