import os import molbloom import pandas as pd import requests from langchain.tools import BaseTool from utils import is_smiles class ChemSpace: def __init__(self, chemspace_api_key=None): self.chemspace_api_key = chemspace_api_key self._renew_token() # Create token def _renew_token(self): self.chemspace_token = requests.get( url="https://api.chem-space.com/auth/token", headers={ "Accept": "application/json", "Authorization": f"Bearer {self.chemspace_api_key}", }, ).json()["access_token"] def _make_api_request( self, query, request_type, count, categories, ): """ Make a generic request to chem-space API. Categories request. CSCS: Custom Request: Could be useful for requesting whole synthesis CSMB: Make-On-Demand Building Blocks CSSB: In-Stock Building Blocks CSSS: In-stock Screening Compounds CSMS: Make-On-Demand Screening Compounds """ def _do_request(): data = requests.request( "POST", url=f"https://api.chem-space.com/v3/search/{request_type}?count={count}&page=1&categories={categories}", headers={ "Accept": "application/json; version=3.1", "Authorization": f"Bearer {self.chemspace_token}", }, data={"SMILES": f"{query}"}, ).json() return data data = _do_request() # renew token if token is invalid if "message" in data.keys(): if data["message"] == "Your request was made with invalid credentials.": self._renew_token() data = _do_request() return data def _convert_single(self, query, search_type: str): """Do query for a single molecule""" data = self._make_api_request(query, "exact", 1, "CSCS,CSMB,CSSB") if data["count"] > 0: return data["items"][0][search_type] else: return "No data was found for this compound." def convert_mol_rep(self, query, search_type: str = "smiles"): if ", " in query: query_list = query.split(", ") else: query_list = [query] smi = "" try: for q in query_list: smi += f"{query}'s {search_type} is: {str(self._convert_single(q, search_type))}" return smi except Exception: return "The input provided is wrong. Input either a single molecule, or multiple molecules separated by a ', '" def buy_mol( self, smiles, request_type="exact", count=1, ): """ Get data about purchasing compounds. smiles: smiles string of the molecule you want to buy request_type: one of "exact", "sim" (search by similarity), "sub" (search by substructure). count: retrieve data for this many substances max. """ def purchasable_check( s, ): if not is_smiles(s): try: s = self.convert_mol_rep(s, "smiles") except: return "Invalid SMILES string." """Checks if molecule is available for purchase (ZINC20)""" try: r = molbloom.buy(s, canonicalize=True) except: print("invalid smiles") return False if r: return True else: return False purchasable = purchasable_check(smiles) if request_type == "exact": categories = "CSMB,CSSB" elif request_type in ["sim", "sub"]: categories = "CSSS,CSMS" data = self._make_api_request(smiles, request_type, count, categories) try: if data["count"] == 0: if purchasable: return "Compound is purchasable, but price is unknown." else: return "Compound is not purchasable." except KeyError: return "Invalid query, try something else. " print(f"Obtaining data for {data['count']} substances.") dfs = [] # Convert this data into df for item in data["items"]: dfs_tmp = [] smiles = item["smiles"] offers = item["offers"] for off in offers: df_tmp = pd.DataFrame(off["prices"]) df_tmp["vendorName"] = off["vendorName"] df_tmp["time"] = off["shipsWithin"] df_tmp["purity"] = off["purity"] dfs_tmp.append(df_tmp) df_this = pd.concat(dfs_tmp) df_this["smiles"] = smiles dfs.append(df_this) df = pd.concat(dfs).reset_index(drop=True) df["quantity"] = df["pack"].astype(str) + df["uom"] df["time"] = df["time"].astype(str) + " days" df = df.drop(columns=["pack", "uom"]) # Remove all entries that are not numbers df = df[df["priceUsd"].astype(str).str.isnumeric()] cheapest = df.iloc[df["priceUsd"].astype(float).idxmin()] return f"{cheapest['quantity']} of this molecule cost {cheapest['priceUsd']} USD and can be purchased at {cheapest['vendorName']}." class GetMoleculePrice(BaseTool): name :str = "GetMoleculePrice" description :str = "Get the cheapest available price of a molecule." chemspace_api_key: str = None url: str = None def __init__(self, chemspace_api_key: str = None): super().__init__() self.chemspace_api_key = chemspace_api_key self.url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}" def _run(self, query: str) -> str: if not self.chemspace_api_key: return "No Chemspace API key found. This tool may not be used without a Chemspace API key." try: chemspace = ChemSpace(self.chemspace_api_key) price = chemspace.buy_mol(query) return price except Exception as e: return str(e) async def _arun(self, query: str) -> str: """Use the tool asynchronously.""" raise NotImplementedError()