File size: 6,998 Bytes
64e9ead
 
 
f51746a
 
0d546a3
 
64e9ead
 
 
 
 
 
 
f51746a
0a07b5d
64e9ead
 
 
 
 
 
 
 
 
 
 
0d546a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64e9ead
 
 
 
 
 
 
 
 
f51746a
64e9ead
 
 
 
 
 
 
 
 
 
0d546a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f51746a
0d546a3
 
 
 
 
 
 
 
 
 
 
64e9ead
f51746a
0d546a3
 
 
 
 
 
 
 
 
 
 
 
 
64e9ead
 
0d546a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64e9ead
0d546a3
 
64e9ead
0d546a3
64e9ead
0d546a3
64e9ead
0d546a3
 
 
 
 
 
 
 
 
 
 
 
64e9ead
 
 
 
f51746a
64e9ead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f51746a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import os
import re
import langchain
from paperqa import Docs, Settings
import asyncio
#import paperqa
#import paperscraper
from langchain_community.utilities import SerpAPIWrapper
from langchain.base_language import BaseLanguageModel
from langchain.tools import BaseTool
from langchain_openai import OpenAIEmbeddings
from pypdf.errors import PdfReadError
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
from langchain_openai import ChatOpenAI

def is_smiles(text):
    try:
        m = Chem.MolFromSmiles(text, sanitize=False)
        if m is None:
            return False
        return True
    except:
        return False



def is_multiple_smiles(text):
    if is_smiles(text):
        return "." in text
    return False


def split_smiles(text):
    return text.split(".")
import os
import re

import langchain
from paperqa import Docs, Settings
import asyncio
# import paperqa
# import paperscraper
from langchain_community.utilities import SerpAPIWrapper
from langchain.base_language import BaseLanguageModel
from langchain.tools import BaseTool
from langchain_openai import OpenAIEmbeddings
from pypdf.errors import PdfReadError
from rdkit import Chem, DataStructs
from rdkit.Chem import AllChem
import nest_asyncio
from langchain_openai import ChatOpenAI
nest_asyncio.apply() 
def is_smiles(text):
    try:
        m = Chem.MolFromSmiles(text, sanitize=False)
        if m is None:
            return False
        return True
    except:
        return False



def is_multiple_smiles(text):
    if is_smiles(text):
        return "." in text
    return False


def split_smiles(text):
    return text.split(".")

def paper_scraper(search: str, pdir: str = "query", semantic_scholar_api_key: str = None) -> dict:
    try:
        return paperscraper.search_papers(
            search,
            pdir=pdir,
            semantic_scholar_api_key=semantic_scholar_api_key,
        )
    except KeyError:
        return {}


# def paper_search(llm, query, semantic_scholar_api_key=None):
#     prompt = langchain.prompts.PromptTemplate(
#         input_variables=["question"],
#         template="""
#         I would like to find scholarly papers to answer
#         this question: {question}. Your response must be at
#         most 10 words long.
#         'A search query that would bring up papers that can answer
#         this question would be: '""",
#     )

#     query_chain = langchain.chains.llm.LLMChain(llm=llm, prompt=prompt)
#     if not os.path.isdir("./query"):  # todo: move to ckpt
#         os.mkdir("query/")
#     search = query_chain.invoke(query)
#     print("\nSearch:", search)
#     papers = paper_scraper(search['text'],   semantic_scholar_api_key=semantic_scholar_api_key)
#     return papers


# async def scholar2result_llm(llm, query, k=5, max_sources=2, openai_api_key=None, semantic_scholar_api_key=None):
#     """Useful to answer questions that require
#     technical knowledge. Ask a specific question."""
#     papers = paper_search(llm, query, semantic_scholar_api_key=semantic_scholar_api_key)
#     if len(papers) == 0:
#         return "Not enough papers found"
#     docs = Docs()
#     settings = Settings()
#     settings.llm = llm
    
#     not_loaded = 0
#     for path, data in papers.items():
#         try:
#             await docs.aadd(path)
#         except (ValueError, FileNotFoundError, PdfReadError):
#             not_loaded += 1

#     if not_loaded > 0:
#         print(f"\nFound {len(papers.items())} papers but couldn't load {not_loaded}.")
#     else:
#         print(f"\nFound {len(papers.items())} papers and loaded all of them.")

      
#     answer =  await docs.aquery(query)
#     return answer.answer


# class LiteratureSearch(BaseTool):
#     name: str = "LiteratureSearch"
#     description: str = (
#         "Useful to answer questions that require technical "
#         "knowledge. Ask a specific question."
#     )
#     llm: BaseLanguageModel = None
#     openai_api_key: str = None 
#     semantic_scholar_api_key: str = None


#     def __init__(self, llm, openai_api_key, semantic_scholar_api_key):
#         super().__init__()
        
#         # api keys
#         self.openai_api_key = openai_api_key
#         self.semantic_scholar_api_key = semantic_scholar_api_key
#         self.llm = ChatOpenAI(model="gpt-4o-2024-11-20",openai_api_key=self.openai_api_key,
#              base_url=os.getenv("OPENAI_API_BASE"))
#     def _run(self, query) -> str:
#         os.environ["OPENAI_API_KEY"] = self.openai_api_key
#         os.environ["OPENAI_API_BASE"] = os.getenv("OPENAI_API_BASE")
#         return asyncio.run(scholar2result_llm(
#             self.llm,
#             query,
#             openai_api_key=self.openai_api_key,
#             semantic_scholar_api_key=self.semantic_scholar_api_key
#         ))

#     async def _arun(self, query) -> str:
#         """Use the tool asynchronously."""
#         raise NotImplementedError("this tool does not support async")

def web_search(keywords, search_engine="google"):
    try:
        return SerpAPIWrapper(
            serpapi_api_key=os.getenv("SERP_API_KEY"), search_engine=search_engine
        ).run(keywords)
    except:
        return "No results, try another search"


class WebSearch(BaseTool):
    name: str = "WebSearch"
    description: str = (
        "Input a specific question, returns an answer from web search. "
        "Give more detailed information and use more general features to formulate your questions."
    )
    serp_api_key: str = None

    def __init__(self, serp_api_key: str = None):
        super().__init__()
        self.serp_api_key = serp_api_key

    def _run(self, query: str) -> str:
        if not self.serp_api_key:
            return (
                "No SerpAPI key found. This tool may not be used without a SerpAPI key."
            )
        return web_search(query)

    async def _arun(self, query: str) -> str:
        raise NotImplementedError("Async not implemented")


def web_search(keywords, search_engine="google"):
    try:
        return SerpAPIWrapper(
            serpapi_api_key=os.getenv("SERP_API_KEY"), search_engine=search_engine
        ).run(keywords)
    except:
        return "No results, try another search"


class WebSearch(BaseTool):
    name: str = "WebSearch"
    description: str = (
        "Input a specific question, returns an answer from web search. "
        "Give more detailed information and use more general features to formulate your questions."
    )
    serp_api_key: str = None

    def __init__(self, serp_api_key: str = None):
        super().__init__()
        self.serp_api_key = serp_api_key

    def _run(self, query: str) -> str:
        if not self.serp_api_key:
            return (
                "No SerpAPI key found. This tool may not be used without a SerpAPI key."
            )
        return web_search(query)

    async def _arun(self, query: str) -> str:
        raise NotImplementedError("Async not implemented")