Spaces:
Build error
Build error
Commit
·
f274d93
1
Parent(s):
5216067
First commit
Browse files
agent.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from mapi_tools import MAPI_class_tools, MAPI_reg_tools
|
| 2 |
+
from utils import common_tools
|
| 3 |
+
from langchain import OpenAI
|
| 4 |
+
from gpt_index import GPTListIndex, GPTIndexMemory
|
| 5 |
+
from langchain import agents
|
| 6 |
+
from langchain.agents import initialize_agent
|
| 7 |
+
|
| 8 |
+
stability = MAPI_class_tools(
|
| 9 |
+
"is_stable","stable","Stable","Unstable"
|
| 10 |
+
)
|
| 11 |
+
magnetism = MAPI_class_tools(
|
| 12 |
+
"is_magnetic","magnetic","Magnetic","Not magnetic"
|
| 13 |
+
)
|
| 14 |
+
metal = MAPI_class_tools(
|
| 15 |
+
"is_metal","metallic","Metal","Not metal"
|
| 16 |
+
)
|
| 17 |
+
gap_direct = MAPI_class_tools(
|
| 18 |
+
"is_gap_direct","gap direct","Gap direct","Gap indirect"
|
| 19 |
+
)
|
| 20 |
+
band_gap = MAPI_reg_tools(
|
| 21 |
+
"band_gap","band gap"
|
| 22 |
+
)
|
| 23 |
+
energy_per_atom = MAPI_reg_tools(
|
| 24 |
+
"energy_per_atom","energy per atom gap"
|
| 25 |
+
)
|
| 26 |
+
formation_energy_per_atom = MAPI_reg_tools(
|
| 27 |
+
"formation_energy_per_atom","formation energy per atom gap"
|
| 28 |
+
)
|
| 29 |
+
volume = MAPI_reg_tools(
|
| 30 |
+
"volume","volume"
|
| 31 |
+
)
|
| 32 |
+
density = MAPI_reg_tools(
|
| 33 |
+
"density","density"
|
| 34 |
+
)
|
| 35 |
+
atomic_density = MAPI_reg_tools(
|
| 36 |
+
"density_atomic","atomic density"
|
| 37 |
+
)
|
| 38 |
+
electronic_energy = MAPI_reg_tools(
|
| 39 |
+
"e_electronic","electronic energy"
|
| 40 |
+
)
|
| 41 |
+
ionic_energy = MAPI_reg_tools(
|
| 42 |
+
"e_ion","cationic energy"
|
| 43 |
+
)
|
| 44 |
+
total_energy = MAPI_reg_tools(
|
| 45 |
+
"e_total","total energy"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
memory = GPTIndexMemory(index=GPTListIndex([]), memory_key="chat_history", query_kwargs={"response_mode": "compact"})
|
| 50 |
+
llm=OpenAI(temperature=0.7)
|
| 51 |
+
tools = (
|
| 52 |
+
stability.get_tools() +
|
| 53 |
+
magnetism.get_tools() +
|
| 54 |
+
gap_direct.get_tools() +
|
| 55 |
+
metal.get_tools() +
|
| 56 |
+
band_gap.get_tools() +
|
| 57 |
+
volume.get_tools() +
|
| 58 |
+
density.get_tools() +
|
| 59 |
+
atomic_density.get_tools() +
|
| 60 |
+
formation_energy_per_atom.get_tools() +
|
| 61 |
+
energy_per_atom.get_tools() +
|
| 62 |
+
electronic_energy.get_tools() +
|
| 63 |
+
ionic_energy.get_tools() +
|
| 64 |
+
total_energy.get_tools() +
|
| 65 |
+
agents.load_tools(["llm-math", "python_repl"], llm=llm) +
|
| 66 |
+
common_tools
|
| 67 |
+
)
|
| 68 |
+
agent_chain = initialize_agent(tools, llm, agent="zero-shot-react-description", verbose=True, memory=memory)
|
app.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
import agent
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
css_style = """
|
| 7 |
+
.gradio-container {
|
| 8 |
+
font-family: "IBM Plex Mono";
|
| 9 |
+
}
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def agent_run(q, openai_api_key, mapi_api_key):
|
| 13 |
+
os.environ["OPENAI_API_KEY"]=openai_api_key
|
| 14 |
+
os.environ["MAPI_API_KEY"]=mapi_api_key
|
| 15 |
+
try:
|
| 16 |
+
out = agent.agent_chain.run(input=q)
|
| 17 |
+
except:
|
| 18 |
+
out = "Something went wrong, please try again"
|
| 19 |
+
return out
|
| 20 |
+
|
| 21 |
+
with gr.Blocks(css=css_style) as demo:
|
| 22 |
+
gr.Markdown(f'''
|
| 23 |
+
# A LLM application developed during the LLM March *MADNESS* Hackathon
|
| 24 |
+
- Developed by: Mayk Caldas ([@maykcaldas](https://github.com/maykcaldas)) and Sam Cox ([@SamCox822](https://github.com/SamCox822))
|
| 25 |
+
|
| 26 |
+
## What is this?
|
| 27 |
+
- This is a demo of a LLM agent that can answer questions about materials science using the [LangChain🦜️🔗](https://github.com/hwchase17/langchain/) and the [Materials Project API](https://materialsproject.org/).
|
| 28 |
+
- Its behave is based on Large Language Models (LLM) and aim to be a tool to help scientists with quick predictions of a nunerous of properties of materials.
|
| 29 |
+
It is a work in progress, so please be patient with it.
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
### Some keys are needed in order to use it:
|
| 33 |
+
1. An openAI API key ( [Check it here](https://platform.openai.com/account/api-keys) )
|
| 34 |
+
2. A material project's API key ( [Check it here](https://materialsproject.org/api#api-key) )
|
| 35 |
+
''')
|
| 36 |
+
with gr.Accordion("List of properties we developed tools for", open=False):
|
| 37 |
+
gr.Markdown(f"""
|
| 38 |
+
Classification tasks: Stability, magnetism, gap_direct, metal,
|
| 39 |
+
regression tasks: band_gap, volume, density, atomic_density, formation energy per atom, energy per atom, electronic energy, ionic energy, total energy
|
| 40 |
+
""")
|
| 41 |
+
openai_api_key = gr.Textbox(
|
| 42 |
+
label="OpenAI API Key", placeholder="sk-...", type="password")
|
| 43 |
+
mapi_api_key = gr.Textbox(
|
| 44 |
+
label="Material Project API Key", placeholder="...", type="password")
|
| 45 |
+
with gr.Tab("MAPI Query"):
|
| 46 |
+
text_input = gr.Textbox(label="", placeholder="Enter question here...")
|
| 47 |
+
text_output = gr.Textbox()
|
| 48 |
+
text_button = gr.Button("Query!")
|
| 49 |
+
|
| 50 |
+
text_button.click(agent_run, inputs=[text_input, openai_api_key, mapi_api_key], outputs=text_output)
|
| 51 |
+
|
| 52 |
+
demo.launch()
|
mapi_tools.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from mp_api.client import MPRester
|
| 2 |
+
from emmet.core.summary import HasProps
|
| 3 |
+
import openai
|
| 4 |
+
import langchain
|
| 5 |
+
from langchain import OpenAI
|
| 6 |
+
from langchain import agents
|
| 7 |
+
from langchain.agents import initialize_agent
|
| 8 |
+
from langchain.agents import Tool, tool
|
| 9 |
+
from langchain import LLMMathChain, SerpAPIWrapper
|
| 10 |
+
from gpt_index import GPTListIndex, GPTIndexMemory
|
| 11 |
+
from langchain import SerpAPIWrapper
|
| 12 |
+
from langchain.prompts.few_shot import FewShotPromptTemplate
|
| 13 |
+
from langchain.prompts.prompt import PromptTemplate
|
| 14 |
+
from langchain.vectorstores import FAISS, Chroma
|
| 15 |
+
from langchain.embeddings import OpenAIEmbeddings
|
| 16 |
+
from langchain.prompts.example_selector import (MaxMarginalRelevanceExampleSelector,
|
| 17 |
+
SemanticSimilarityExampleSelector)
|
| 18 |
+
import requests
|
| 19 |
+
from rdkit import Chem
|
| 20 |
+
import pandas as pd
|
| 21 |
+
import os
|
| 22 |
+
|
| 23 |
+
class MAPITools:
|
| 24 |
+
def __init__(self):
|
| 25 |
+
self.model = 'text-ada-001' #maybe change to gpt-4 when ready
|
| 26 |
+
self.k=10
|
| 27 |
+
|
| 28 |
+
def get_material_atoms(self, formula):
|
| 29 |
+
'''Receives a material formula and returns the atoms symbols present in it separated by comma.'''
|
| 30 |
+
import re
|
| 31 |
+
pattern = re.compile(r"([A-Z][a-z]*)(\d*)")
|
| 32 |
+
matches = pattern.findall(formula)
|
| 33 |
+
atoms = []
|
| 34 |
+
for m in matches:
|
| 35 |
+
atom, count = m
|
| 36 |
+
count = int(count) if count else 1
|
| 37 |
+
atoms.append((atom, count))
|
| 38 |
+
return ",".join([a[0] for a in atoms])
|
| 39 |
+
|
| 40 |
+
def check_prop_by_formula(self, formula):
|
| 41 |
+
raise NotImplementedError('Should be implemented in children classes')
|
| 42 |
+
|
| 43 |
+
def search_similars_by_atom(self, atoms):
|
| 44 |
+
'''This function receives a string with the atoms separated by comma as input and returns a list of similar materials'''
|
| 45 |
+
atoms = atoms.replace(" ", "")
|
| 46 |
+
with MPRester(os.getenv("MAPI_API_KEY")) as mpr:
|
| 47 |
+
docs = mpr.summary.search(elements=atoms.split(','), fields=["formula_pretty", self.prop])
|
| 48 |
+
return docs
|
| 49 |
+
|
| 50 |
+
def create_context_prompt(self, formula):
|
| 51 |
+
raise NotImplementedError('Should be implemented in children classes')
|
| 52 |
+
|
| 53 |
+
def LLM_predict(self, prompt):
|
| 54 |
+
''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion'''
|
| 55 |
+
llm = OpenAI(
|
| 56 |
+
model_name=self.model,
|
| 57 |
+
temperature=0.7,
|
| 58 |
+
n=1,
|
| 59 |
+
best_of=5,
|
| 60 |
+
top_p=1.0,
|
| 61 |
+
stop=["\n\n", "###", "#", "##"],
|
| 62 |
+
# model_kwargs=kwargs,
|
| 63 |
+
)
|
| 64 |
+
return llm.generate([prompt]).generations[0][0].text
|
| 65 |
+
|
| 66 |
+
def get_tools(self):
|
| 67 |
+
return [
|
| 68 |
+
Tool(
|
| 69 |
+
name = "Get atoms in material",
|
| 70 |
+
func = self.get_material_atoms,
|
| 71 |
+
description = (
|
| 72 |
+
"Receives a material formula and returns the atoms symbols present in it separated by comma."
|
| 73 |
+
)
|
| 74 |
+
),
|
| 75 |
+
Tool(
|
| 76 |
+
name = f"Checks if material is {self.prop_name} by formula",
|
| 77 |
+
func = self.check_prop_by_formula,
|
| 78 |
+
description = (
|
| 79 |
+
f"This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not."
|
| 80 |
+
)
|
| 81 |
+
),
|
| 82 |
+
# Tool(
|
| 83 |
+
# name = "Search similar materials by atom",
|
| 84 |
+
# func = self.search_similars_by_atom,
|
| 85 |
+
# description = (
|
| 86 |
+
# "This function receives a string with the atoms separated by comma as input and returns a list of similar materials."
|
| 87 |
+
# )
|
| 88 |
+
# ),
|
| 89 |
+
Tool(
|
| 90 |
+
name = f"Create {self.prop_name} context to LLM search",
|
| 91 |
+
func = self.create_context_prompt,
|
| 92 |
+
description = (
|
| 93 |
+
f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the material is {self.prop_name}."
|
| 94 |
+
if isinstance(self, MAPI_class_tools) else
|
| 95 |
+
f"This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of a material."
|
| 96 |
+
)
|
| 97 |
+
),
|
| 98 |
+
Tool(name = "LLM predictiom",
|
| 99 |
+
func = self.LLM_predict,
|
| 100 |
+
description = (
|
| 101 |
+
"This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion"
|
| 102 |
+
)
|
| 103 |
+
)
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
class MAPI_class_tools(MAPITools):
|
| 107 |
+
def __init__(self, prop, prop_name, p_label, n_label):
|
| 108 |
+
super().__init__()
|
| 109 |
+
self.prop = prop
|
| 110 |
+
self.prop_name = prop_name
|
| 111 |
+
self.p_label = p_label
|
| 112 |
+
self.n_label = n_label
|
| 113 |
+
|
| 114 |
+
def check_prop_by_formula(self, formula):
|
| 115 |
+
f''' This functions searches in the material project's API for the formula and returns if it is {self.prop_name} or not'''
|
| 116 |
+
with MPRester(os.getenv("MAPI_API_KEY")) as mpr:
|
| 117 |
+
docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop])
|
| 118 |
+
if docs:
|
| 119 |
+
if docs[0].formula_pretty == formula:
|
| 120 |
+
return self.p_label if docs[0].dict()[self.prop] else self.n_label
|
| 121 |
+
return f"Could not find any material while searching {formula}"
|
| 122 |
+
|
| 123 |
+
def create_context_prompt(self, formula):
|
| 124 |
+
'''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict if the formula is a stable material '''
|
| 125 |
+
elements = self.get_material_atoms(formula)
|
| 126 |
+
similars = self.search_similars_by_atom(elements)
|
| 127 |
+
similars = [
|
| 128 |
+
{'formula': ex.formula_pretty,
|
| 129 |
+
'prop': self.p_label if ex.dict()[self.prop] else self.n_label
|
| 130 |
+
} for ex in similars
|
| 131 |
+
]
|
| 132 |
+
examples = pd.DataFrame(similars).drop_duplicates().to_dict(orient="records")
|
| 133 |
+
example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
|
| 134 |
+
examples,
|
| 135 |
+
OpenAIEmbeddings(),
|
| 136 |
+
FAISS,
|
| 137 |
+
k=self.k,
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
prefix=(
|
| 141 |
+
f'You are a bot who can predict if a material is {self.prop_name}.\n'
|
| 142 |
+
f'Given this list of known materials and the information if they are {self.p_label} or {self.n_label}, \n'
|
| 143 |
+
f'you need to answer the question if the last material is {self.prop_name}:'
|
| 144 |
+
)
|
| 145 |
+
prompt_template=PromptTemplate(
|
| 146 |
+
input_variables=["formula", "prop"],
|
| 147 |
+
template=f"Is {{formula}} a {self.prop_name} material?@@@\n{{prop}}###",
|
| 148 |
+
)
|
| 149 |
+
suffix = f"Is {{formula}} a {self.prop_name} material?@@@\n"
|
| 150 |
+
prompt = FewShotPromptTemplate(
|
| 151 |
+
# examples=examples,
|
| 152 |
+
example_prompt=prompt_template,
|
| 153 |
+
example_selector=example_selector,
|
| 154 |
+
prefix=prefix,
|
| 155 |
+
suffix=suffix,
|
| 156 |
+
input_variables=["formula"])
|
| 157 |
+
|
| 158 |
+
return prompt.format(formula=formula)
|
| 159 |
+
|
| 160 |
+
class MAPI_reg_tools(MAPITools):
|
| 161 |
+
# TODO: deal with units
|
| 162 |
+
def __init__(self, prop, prop_name):
|
| 163 |
+
super().__init__()
|
| 164 |
+
self.prop = prop
|
| 165 |
+
self.prop_name = prop_name
|
| 166 |
+
|
| 167 |
+
def check_prop_by_formula(self, formula):
|
| 168 |
+
''' This functions searches in the material project's API for the formula and returns if it is stable or not'''
|
| 169 |
+
with MPRester(os.getenv("MAPI_API_KEY")) as mpr:
|
| 170 |
+
docs = mpr.summary.search(formula=formula, fields=["formula_pretty", self.prop])
|
| 171 |
+
if docs:
|
| 172 |
+
if docs[0].formula_pretty == formula:
|
| 173 |
+
return docs[0].dict()[self.prop]
|
| 174 |
+
elif docs[0].dict()[self.prop] is None:
|
| 175 |
+
return f"There is no record of {self.prop_name} for {formula}"
|
| 176 |
+
return f"Could not find any material while searching {formula}"
|
| 177 |
+
|
| 178 |
+
def create_context_prompt(self, formula):
|
| 179 |
+
f'''This function received a material formula as input and create a prompt to be inputed in the LLM_predict tool to predict the {self.prop_name} of the material '''
|
| 180 |
+
elements = self.get_material_atoms(formula)
|
| 181 |
+
similars = self.search_similars_by_atom(elements)
|
| 182 |
+
similars = [
|
| 183 |
+
{'formula': ex.formula_pretty,
|
| 184 |
+
'prop': f"{ex.dict()[self.prop]:2f}" if ex.dict()[self.prop] is not None else None
|
| 185 |
+
} for ex in similars
|
| 186 |
+
]
|
| 187 |
+
examples = pd.DataFrame(similars).drop_duplicates().dropna().to_dict(orient="records")
|
| 188 |
+
|
| 189 |
+
example_selector = MaxMarginalRelevanceExampleSelector.from_examples(
|
| 190 |
+
examples,
|
| 191 |
+
OpenAIEmbeddings(),
|
| 192 |
+
FAISS,
|
| 193 |
+
k=self.k,
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
prefix=(
|
| 197 |
+
f'You are a bot who can predict the {self.prop_name} of a material .\n'
|
| 198 |
+
f'Given this list of known materials and the measurement of their {self.prop_name}, \n'
|
| 199 |
+
f'you need to answer the what is the {self.prop_name} of the material:'
|
| 200 |
+
'The answer should be numeric and finish with ###'
|
| 201 |
+
)
|
| 202 |
+
prompt_template=PromptTemplate(
|
| 203 |
+
input_variables=["formula", "prop"],
|
| 204 |
+
template=f"What is the {self.prop_name} for {{formula}}?@@@\n{{prop}}###",
|
| 205 |
+
)
|
| 206 |
+
suffix = f"What is the {self.prop_name} for {{formula}}?@@@\n"
|
| 207 |
+
prompt = FewShotPromptTemplate(
|
| 208 |
+
# examples=examples,
|
| 209 |
+
example_prompt=prompt_template,
|
| 210 |
+
example_selector=example_selector,
|
| 211 |
+
prefix=prefix,
|
| 212 |
+
suffix=suffix,
|
| 213 |
+
input_variables=["formula"])
|
| 214 |
+
|
| 215 |
+
return prompt.format(formula=formula)
|
utils.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain.agents import Tool, tool
|
| 2 |
+
import requests
|
| 3 |
+
from langchain import OpenAI
|
| 4 |
+
from langchain import LLMMathChain, SerpAPIWrapper
|
| 5 |
+
from rdkit import Chem
|
| 6 |
+
|
| 7 |
+
@tool
|
| 8 |
+
def query2smiles(text):
|
| 9 |
+
'''This function queries the one given molecule name and returns a SMILES string from the record'''
|
| 10 |
+
try:#query the PubChem database
|
| 11 |
+
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/IsomericSMILES/JSON')
|
| 12 |
+
#convert the response to a json object
|
| 13 |
+
data = r.json()
|
| 14 |
+
#return the SMILES string
|
| 15 |
+
smi = data['PropertyTable']['Properties'][0]['IsomericSMILES']
|
| 16 |
+
# remove salts
|
| 17 |
+
return smi
|
| 18 |
+
except:
|
| 19 |
+
f"Could not find the IUPAC name for {text}"
|
| 20 |
+
|
| 21 |
+
@tool
|
| 22 |
+
def smiles2IUPAC(text):
|
| 23 |
+
'''This function queries the one given smiles name and returns a IUPAC name from the record'''
|
| 24 |
+
#query the PubChem database
|
| 25 |
+
try:
|
| 26 |
+
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/' + text + '/property/IUPACName/JSON')
|
| 27 |
+
data = r.json()
|
| 28 |
+
smi = data["PropertyTable"]["Properties"][0]["IUPACName"]
|
| 29 |
+
return smi
|
| 30 |
+
except:
|
| 31 |
+
return f"Could not find the IUPAC name for {text}"
|
| 32 |
+
|
| 33 |
+
@tool
|
| 34 |
+
def formula2IUPAC(text):
|
| 35 |
+
'''This function queries the one given chemical formula and returns a material name from the record.'''
|
| 36 |
+
try:
|
| 37 |
+
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/formula/' + text + '/property/IUPACName/JSON')
|
| 38 |
+
data = r.json()
|
| 39 |
+
print(data)
|
| 40 |
+
smi = data["PropertyTable"]["Properties"][0]["IUPACName"]
|
| 41 |
+
return smi
|
| 42 |
+
except:
|
| 43 |
+
return f"Could not find the IUPAC name for {text}"
|
| 44 |
+
|
| 45 |
+
@tool
|
| 46 |
+
def name2formula(text):
|
| 47 |
+
'''This function queries the one given material name and returns a chemical formula from the record.'''
|
| 48 |
+
try:
|
| 49 |
+
r = requests.get('https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/' + text + '/property/MolecularFormula/JSON')
|
| 50 |
+
data = r.json()
|
| 51 |
+
print(data)
|
| 52 |
+
smi = data["PropertyTable"]["Properties"][0]["MolecularFormula"]
|
| 53 |
+
return smi
|
| 54 |
+
except:
|
| 55 |
+
return f"Could not find the molecular formula for {text}"
|
| 56 |
+
|
| 57 |
+
@tool
|
| 58 |
+
def canonicalizeSMILES(smiles):
|
| 59 |
+
'''Given a smiles representation, this function returns a canonicalized version of the same smiles.
|
| 60 |
+
It's better to search for molecules in its canonicalized form'''
|
| 61 |
+
return Chem.MolToSmiles(Chem.MolFromSmiles(smiles))
|
| 62 |
+
|
| 63 |
+
@tool
|
| 64 |
+
def web_search(keywords, search_engine="google"):
|
| 65 |
+
'''Useful to do a simple google search.
|
| 66 |
+
Use this tool to find general information from websites.
|
| 67 |
+
Use keywords for your search.
|
| 68 |
+
'''
|
| 69 |
+
return SerpAPIWrapper(
|
| 70 |
+
serpapi_api_key=os.getenv("SERP_API_KEY"),
|
| 71 |
+
search_engine=search_engine
|
| 72 |
+
).run(keywords)
|
| 73 |
+
|
| 74 |
+
@tool
|
| 75 |
+
def LLM_predict(prompt):
|
| 76 |
+
''' This function receives a prompt generate with context by the create_context_prompt tool and request a completion to a language model. Then returns the completion'''
|
| 77 |
+
llm = OpenAI(
|
| 78 |
+
model_name='text-ada-001', #TODO: Maybe change to gpt-4 when ready
|
| 79 |
+
temperature=0.7,
|
| 80 |
+
n=1,
|
| 81 |
+
best_of=5,
|
| 82 |
+
top_p=1.0,
|
| 83 |
+
stop=["\n\n", "###", "#", "##"],
|
| 84 |
+
# model_kwargs=kwargs,
|
| 85 |
+
)
|
| 86 |
+
return llm.generate([prompt]).generations[0][0].text
|
| 87 |
+
|
| 88 |
+
common_tools = [
|
| 89 |
+
query2smiles,
|
| 90 |
+
smiles2IUPAC,
|
| 91 |
+
# formula2IUPAC,
|
| 92 |
+
# name2formula,
|
| 93 |
+
canonicalizeSMILES,
|
| 94 |
+
web_search,
|
| 95 |
+
LLM_predict
|
| 96 |
+
]
|