File size: 6,397 Bytes
64e9ead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import os

import molbloom
import pandas as pd
import requests
from langchain.tools import BaseTool

from utils import is_smiles


class ChemSpace:
    def __init__(self, chemspace_api_key=None):
        self.chemspace_api_key = chemspace_api_key
        self._renew_token()  # Create token

    def _renew_token(self):
        self.chemspace_token = requests.get(
            url="https://api.chem-space.com/auth/token",
            headers={
                "Accept": "application/json",
                "Authorization": f"Bearer {self.chemspace_api_key}",
            },
        ).json()["access_token"]
            
    def _make_api_request(
        self,
        query,
        request_type,
        count,
        categories,
    ):
        """
        Make a generic request to chem-space API.

        Categories request.
            CSCS: Custom Request: Could be useful for requesting whole synthesis
            CSMB: Make-On-Demand Building Blocks
            CSSB: In-Stock Building Blocks
            CSSS: In-stock Screening Compounds
            CSMS: Make-On-Demand Screening Compounds
        """

        def _do_request():
            data = requests.request(
                "POST",
                url=f"https://api.chem-space.com/v3/search/{request_type}?count={count}&page=1&categories={categories}",
                headers={
                    "Accept": "application/json; version=3.1",
                    "Authorization": f"Bearer {self.chemspace_token}",
                },
                data={"SMILES": f"{query}"},
            ).json()
            return data

        data = _do_request()

        # renew token if token is invalid
        if "message" in data.keys():
            if data["message"] == "Your request was made with invalid credentials.":
                self._renew_token()

        data = _do_request()
        return data

    def _convert_single(self, query, search_type: str):
        """Do query for a single molecule"""
        data = self._make_api_request(query, "exact", 1, "CSCS,CSMB,CSSB")
        if data["count"] > 0:
            return data["items"][0][search_type]
        else:
            return "No data was found for this compound."

    def convert_mol_rep(self, query, search_type: str = "smiles"):
        if ", " in query:
            query_list = query.split(", ")
        else:
            query_list = [query]
        smi = ""
        try:
            for q in query_list:
                smi += f"{query}'s {search_type} is: {str(self._convert_single(q, search_type))}"
                return smi
        except Exception:
            return "The input provided is wrong. Input either a single molecule, or multiple molecules separated by a ', '"

    def buy_mol(
        self,
        smiles,
        request_type="exact",
        count=1,
    ):
        """
        Get data about purchasing compounds.

        smiles: smiles string of the molecule you want to buy
        request_type: one of "exact", "sim" (search by similarity), "sub" (search by substructure).
        count: retrieve data for this many substances max.
        """

        def purchasable_check(
            s,
        ):
            if not is_smiles(s):
                try:
                    s = self.convert_mol_rep(s, "smiles")
                except:
                    return "Invalid SMILES string."

            """Checks if molecule is available for purchase (ZINC20)"""
            try:
                r = molbloom.buy(s, canonicalize=True)
            except:
                print("invalid smiles")
                return False
            if r:
                return True
            else:
                return False

        purchasable = purchasable_check(smiles)

        if request_type == "exact":
            categories = "CSMB,CSSB"
        elif request_type in ["sim", "sub"]:
            categories = "CSSS,CSMS"

        data = self._make_api_request(smiles, request_type, count, categories)

        try:
            if data["count"] == 0:
                if purchasable:
                    return "Compound is purchasable, but price is unknown."
                else:
                    return "Compound is not purchasable."
        except KeyError:
            return "Invalid query, try something else. "

        print(f"Obtaining data for {data['count']} substances.")

        dfs = []
        # Convert this data into df
        for item in data["items"]:
            dfs_tmp = []
            smiles = item["smiles"]
            offers = item["offers"]

            for off in offers:
                df_tmp = pd.DataFrame(off["prices"])
                df_tmp["vendorName"] = off["vendorName"]
                df_tmp["time"] = off["shipsWithin"]
                df_tmp["purity"] = off["purity"]

                dfs_tmp.append(df_tmp)

            df_this = pd.concat(dfs_tmp)
            df_this["smiles"] = smiles
            dfs.append(df_this)

        df = pd.concat(dfs).reset_index(drop=True)

        df["quantity"] = df["pack"].astype(str) + df["uom"]
        df["time"] = df["time"].astype(str) + " days"

        df = df.drop(columns=["pack", "uom"])
        # Remove all entries that are not numbers
        df = df[df["priceUsd"].astype(str).str.isnumeric()]

        cheapest = df.iloc[df["priceUsd"].astype(float).idxmin()]
        return f"{cheapest['quantity']} of this molecule cost {cheapest['priceUsd']} USD and can be purchased at {cheapest['vendorName']}."


class GetMoleculePrice(BaseTool):
    name :str = "GetMoleculePrice"
    description :str = "Get the cheapest available price of a molecule."
    chemspace_api_key: str = None
    url: str = None

    def __init__(self, chemspace_api_key: str = None):
        super().__init__()
        self.chemspace_api_key = chemspace_api_key
        self.url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{}/{}"

    def _run(self, query: str) -> str:
        if not self.chemspace_api_key:
            return "No Chemspace API key found. This tool may not be used without a Chemspace API key."
        try:
            chemspace = ChemSpace(self.chemspace_api_key)
            price = chemspace.buy_mol(query)
            return price
        except Exception as e:
            return str(e)

    async def _arun(self, query: str) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError()