TeLLAgent / tool /search.py
jinysun's picture
Update tool/search.py
0d546a3 verified
import os
import re
import langchain
from paperqa import Docs, Settings
import asyncio
#import paperqa
#import paperscraper
from langchain_community.utilities import SerpAPIWrapper
from langchain.base_language import BaseLanguageModel
from langchain.tools import BaseTool
from langchain_openai import OpenAIEmbeddings
from pypdf.errors import PdfReadError
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from langchain_openai import ChatOpenAI
def is_smiles(text):
try:
m = Chem.MolFromSmiles(text, sanitize=False)
if m is None:
return False
return True
except:
return False
def is_multiple_smiles(text):
if is_smiles(text):
return "." in text
return False
def split_smiles(text):
return text.split(".")
import os
import re
import langchain
from paperqa import Docs, Settings
import asyncio
# import paperqa
# import paperscraper
from langchain_community.utilities import SerpAPIWrapper
from langchain.base_language import BaseLanguageModel
from langchain.tools import BaseTool
from langchain_openai import OpenAIEmbeddings
from pypdf.errors import PdfReadError
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
import nest_asyncio
from langchain_openai import ChatOpenAI
nest_asyncio.apply()
def is_smiles(text):
try:
m = Chem.MolFromSmiles(text, sanitize=False)
if m is None:
return False
return True
except:
return False
def is_multiple_smiles(text):
if is_smiles(text):
return "." in text
return False
def split_smiles(text):
return text.split(".")
def paper_scraper(search: str, pdir: str = "query", semantic_scholar_api_key: str = None) -> dict:
try:
return paperscraper.search_papers(
search,
pdir=pdir,
semantic_scholar_api_key=semantic_scholar_api_key,
)
except KeyError:
return {}
# def paper_search(llm, query, semantic_scholar_api_key=None):
# prompt = langchain.prompts.PromptTemplate(
# input_variables=["question"],
# template="""
# I would like to find scholarly papers to answer
# this question: {question}. Your response must be at
# most 10 words long.
# 'A search query that would bring up papers that can answer
# this question would be: '""",
# )
# query_chain = langchain.chains.llm.LLMChain(llm=llm, prompt=prompt)
# if not os.path.isdir("./query"): # todo: move to ckpt
# os.mkdir("query/")
# search = query_chain.invoke(query)
# print("\nSearch:", search)
# papers = paper_scraper(search['text'], semantic_scholar_api_key=semantic_scholar_api_key)
# return papers
# async def scholar2result_llm(llm, query, k=5, max_sources=2, openai_api_key=None, semantic_scholar_api_key=None):
# """Useful to answer questions that require
# technical knowledge. Ask a specific question."""
# papers = paper_search(llm, query, semantic_scholar_api_key=semantic_scholar_api_key)
# if len(papers) == 0:
# return "Not enough papers found"
# docs = Docs()
# settings = Settings()
# settings.llm = llm
# not_loaded = 0
# for path, data in papers.items():
# try:
# await docs.aadd(path)
# except (ValueError, FileNotFoundError, PdfReadError):
# not_loaded += 1
# if not_loaded > 0:
# print(f"\nFound {len(papers.items())} papers but couldn't load {not_loaded}.")
# else:
# print(f"\nFound {len(papers.items())} papers and loaded all of them.")
# answer = await docs.aquery(query)
# return answer.answer
# class LiteratureSearch(BaseTool):
# name: str = "LiteratureSearch"
# description: str = (
# "Useful to answer questions that require technical "
# "knowledge. Ask a specific question."
# )
# llm: BaseLanguageModel = None
# openai_api_key: str = None
# semantic_scholar_api_key: str = None
# def __init__(self, llm, openai_api_key, semantic_scholar_api_key):
# super().__init__()
# # api keys
# self.openai_api_key = openai_api_key
# self.semantic_scholar_api_key = semantic_scholar_api_key
# self.llm = ChatOpenAI(model="gpt-4o-2024-11-20",openai_api_key=self.openai_api_key,
# base_url=os.getenv("OPENAI_API_BASE"))
# def _run(self, query) -> str:
# os.environ["OPENAI_API_KEY"] = self.openai_api_key
# os.environ["OPENAI_API_BASE"] = os.getenv("OPENAI_API_BASE")
# return asyncio.run(scholar2result_llm(
# self.llm,
# query,
# openai_api_key=self.openai_api_key,
# semantic_scholar_api_key=self.semantic_scholar_api_key
# ))
# async def _arun(self, query) -> str:
# """Use the tool asynchronously."""
# raise NotImplementedError("this tool does not support async")
def web_search(keywords, search_engine="google"):
try:
return SerpAPIWrapper(
serpapi_api_key=os.getenv("SERP_API_KEY"), search_engine=search_engine
).run(keywords)
except:
return "No results, try another search"
class WebSearch(BaseTool):
name: str = "WebSearch"
description: str = (
"Input a specific question, returns an answer from web search. "
"Give more detailed information and use more general features to formulate your questions."
)
serp_api_key: str = None
def __init__(self, serp_api_key: str = None):
super().__init__()
self.serp_api_key = serp_api_key
def _run(self, query: str) -> str:
if not self.serp_api_key:
return (
"No SerpAPI key found. This tool may not be used without a SerpAPI key."
)
return web_search(query)
async def _arun(self, query: str) -> str:
raise NotImplementedError("Async not implemented")
def web_search(keywords, search_engine="google"):
try:
return SerpAPIWrapper(
serpapi_api_key=os.getenv("SERP_API_KEY"), search_engine=search_engine
).run(keywords)
except:
return "No results, try another search"
class WebSearch(BaseTool):
name: str = "WebSearch"
description: str = (
"Input a specific question, returns an answer from web search. "
"Give more detailed information and use more general features to formulate your questions."
)
serp_api_key: str = None
def __init__(self, serp_api_key: str = None):
super().__init__()
self.serp_api_key = serp_api_key
def _run(self, query: str) -> str:
if not self.serp_api_key:
return (
"No SerpAPI key found. This tool may not be used without a SerpAPI key."
)
return web_search(query)
async def _arun(self, query: str) -> str:
raise NotImplementedError("Async not implemented")