ivanm151 commited on
Commit
6796365
·
1 Parent(s): ab560f6
.env_template ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ HF_TOKEN=token
2
+ MODELS_DIR=models
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/inspectionProfiles/Project_Default.xml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
5
+ <option name="ignoredIdentifiers">
6
+ <list>
7
+ <option value="dict.*" />
8
+ </list>
9
+ </option>
10
+ </inspection_tool>
11
+ </profile>
12
+ </component>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (clickerapp)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/molecular.iml" filepath="$PROJECT_DIR$/.idea/molecular.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/molecular.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="inheritedJdk" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,12 +1,10 @@
1
  ---
2
- title: Molecular
3
- emoji: 🔥
4
- colorFrom: gray
5
- colorTo: purple
6
  sdk: docker
7
  pinned: false
8
- license: mit
9
- short_description: ML module for "MoleMap" molecular prediction project
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Molecular Features Prediction
3
+ emoji: 😻
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app/app.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ load_dotenv()
3
+
4
+ from fastapi import FastAPI # noqa: E402
5
+ from app.routers import predict # noqa: E402
6
+ from app.routers import descriptors # noqa: E402
7
+ from app.routers import name # noqa: E402
8
+
9
+
10
+ app = FastAPI(title="Molecular Prediction API")
11
+
12
+ app.include_router(predict.router, prefix="/api")
13
+ app.include_router(descriptors.router, prefix="/api")
14
+ app.include_router(name.router, prefix="/api")
15
+
16
+
17
+ @app.get("/")
18
+ async def root():
19
+ return {"message": "Molecular Prediction API. Use POST /api/predict"}
app/routers/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from . import predict # noqa: F401
app/routers/descriptors.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .schemas import MoleculeDescriptors, InputData
2
+ from fastapi import APIRouter, HTTPException
3
+ from molecule import get_molecule_properties
4
+
5
+ router = APIRouter()
6
+
7
+
8
+ @router.post("/descriptor", response_model=MoleculeDescriptors)
9
+ async def descriptor(payload: InputData):
10
+ try:
11
+ props_dict = get_molecule_properties(payload.smiles)
12
+ props_model = MoleculeDescriptors(**props_dict)
13
+ return props_model
14
+ except ValueError as e:
15
+ raise HTTPException(status_code=400, detail=str(e))
16
+ except Exception as e:
17
+ raise HTTPException(status_code=500, detail=f"Ошибка сервера: {e}")
app/routers/name.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from .schemas import InputData, NameResponse
3
+ from molecule import get_name
4
+
5
+ router = APIRouter()
6
+
7
+
8
+ @router.post("/get_name", response_model=NameResponse)
9
+ async def get_name_function(payload: InputData):
10
+ return NameResponse(name=get_name(payload.smiles))
app/routers/predict.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from .schemas import InputData, PredictionResponse, PredictionShapResponse
3
+ from molecule import predict as predict_properties
4
+
5
+ router = APIRouter()
6
+
7
+
8
+ @router.post("/predict", response_model=PredictionResponse)
9
+ async def predict(payload: InputData):
10
+ try:
11
+ props = predict_properties(payload.smiles, shap=False)
12
+ print(props)
13
+ response = PredictionResponse(**props)
14
+ return response
15
+ except Exception as e:
16
+ raise HTTPException(status_code=400, detail=str(e))
17
+
18
+ @router.post("/predict_shap", response_model=PredictionShapResponse)
19
+ async def predict_shap(payload: InputData):
20
+ try:
21
+ props = predict_properties(payload.smiles, shap=True)
22
+ print(props)
23
+ response = PredictionShapResponse(**props)
24
+ return response
25
+ except Exception as e:
26
+ raise HTTPException(status_code=400, detail=str(e))
app/routers/schemas.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional
2
+ from pydantic import BaseModel, ConfigDict
3
+
4
+
5
+ class BaseProperties(BaseModel):
6
+ n_atoms: int
7
+ n_bonds: int
8
+ canonical_smiles: str
9
+ aromatic_rings: int
10
+ aliphatic_rings: int
11
+ heterocycles: int
12
+
13
+
14
+ class PhysChemProperties(BaseModel):
15
+ mol_weight: float
16
+ logp: float
17
+ tpsa: float
18
+ h_donors: int
19
+ h_acceptors: int
20
+ rotatable_bonds: int
21
+ fraction_csp3: float
22
+
23
+
24
+ class ChargeStats(BaseModel):
25
+ mean: float
26
+ max: float
27
+ min: float
28
+
29
+
30
+ class PharmacophoreProperties(BaseModel):
31
+ n_features: int
32
+ types: List[str]
33
+
34
+
35
+ class LipinskiProperties(BaseModel):
36
+ mw_ok: bool
37
+ logp_ok: bool
38
+ h_donors_ok: bool
39
+ h_acceptors_ok: bool
40
+ pass_: bool
41
+
42
+ class Config:
43
+ fields = {"pass_": "pass"}
44
+
45
+
46
+ class FingerprintInfo(BaseModel):
47
+ n_bits: int
48
+ bits_on: int
49
+
50
+
51
+ class ExtraDescriptors(BaseModel):
52
+ names: List[str]
53
+ values: List[float]
54
+
55
+
56
+ # ========================================================================
57
+
58
+
59
+ class MoleculeDescriptors(BaseModel):
60
+ smiles: str
61
+ base: BaseProperties
62
+ physchem: PhysChemProperties
63
+ charges: Optional[ChargeStats]
64
+ pharmacophore: PharmacophoreProperties
65
+ qed: Optional[float]
66
+ lipinski: LipinskiProperties
67
+ fingerprint: FingerprintInfo
68
+ extra_descriptors: ExtraDescriptors
69
+
70
+ class InputData(BaseModel):
71
+ smiles: str
72
+
73
+ class NameResponse(BaseModel):
74
+ name: str
75
+
76
+ class PredictionResponse(BaseModel):
77
+ solubility: float
78
+ logp: float
79
+ clintox: float
80
+ fdaapprov: float
81
+ cardiotoxicity: float
82
+
83
+ # Конфигурация модели
84
+ model_config = ConfigDict(
85
+ extra='allow',
86
+ json_schema_extra={
87
+ 'examples': [
88
+ {
89
+ 'solubility': -0.16123154,
90
+ 'logp': -1.2341234,
91
+ 'clintox': 1,
92
+ 'fdaapprov': 0,
93
+ 'cardiotoxicity': 0.12341234
94
+ }
95
+ ]
96
+ }
97
+ )
98
+
99
+
100
+ class ShapResponse(BaseModel):
101
+ pred: float
102
+ atom_shap: List[float]
103
+
104
+
105
+ class PredictionShapResponse(BaseModel):
106
+ solubility: ShapResponse
107
+ logp: ShapResponse
108
+ clintox: ShapResponse
109
+ fdaapprov: ShapResponse
110
+ cardiotoxicity: ShapResponse
111
+
112
+ # Конфигурация модели
113
+ model_config = ConfigDict(
114
+ extra='allow',
115
+ json_schema_extra={
116
+ 'examples': [
117
+ {
118
+ 'solubility': {'pred': -0.16123154, 'atom_shap': [0.0, 0.1, -0.05]},
119
+ 'logp': {'pred': -1.2341234, 'atom_shap': [0.0, 0.1, -0.05]},
120
+ 'clintox': {'pred': 1, 'atom_shap': [0.0, 0.1, -0.05]},
121
+ 'fdaapprov': {'pred': 0, 'atom_shap': [0.0, 0.1, -0.05]},
122
+ 'cardiotoxicity': {'pred': 0.1234567, 'atom_shap': [0.0, 0.1, -0.05]}
123
+ }
124
+ ]
125
+ }
126
+ )
models/cardiotoxicity.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55edf9b904ad80fb9710975a4136be567106f2664b0a54a45c011714b25d263e
3
+ size 5592855
models/clintox.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f001f049256112b96742554f0d57df7fa791ba76b2458d7865ccf86082281d5
3
+ size 646573
models/fdaapprov.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab63da51d4b38a5724ed7b45de991b6f56494c04ca433f8edf02722a154379f
3
+ size 646713
models/logp.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97c37285ea763d0fe981bacc9a565e7417b1741b21f8f95c8a07bfd6ecd1af78
3
+ size 4387995
models/solubility.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e54651bb859ee8b497884624bce9dc05a6c03ee6dbbda0240cf86e187132f3b
3
+ size 1134271
molecule/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .descriptors import get_molecule_properties # noqa: F401
2
+ from .predict import predict # noqa: F401
3
+ from .name import get_name # noqa: F401
molecule/descriptors.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from rdkit import Chem
2
+ from rdkit.Chem import (
3
+ Descriptors, rdMolDescriptors, Crippen, Lipinski, QED, AllChem,
4
+ ChemicalFeatures
5
+ )
6
+ from rdkit.ML.Descriptors import MoleculeDescriptors
7
+ from rdkit.Chem import rdMolDescriptors as rdmd
8
+ import numpy as np
9
+ import os
10
+
11
+
12
+ def load_feature_factory():
13
+ """Загружает стандартный RDKit FeatureFactory."""
14
+ from rdkit import RDConfig
15
+
16
+ fdef = os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef")
17
+ return ChemicalFeatures.BuildFeatureFactory(fdef)
18
+
19
+
20
+ def compute_gasteiger_stats(mol):
21
+ """Возвращает статистику по Gasteiger-зарядам или None."""
22
+ try:
23
+ AllChem.ComputeGasteigerCharges(mol)
24
+ charges = [a.GetDoubleProp("_GasteigerCharge") for a in mol.GetAtoms()]
25
+ return {
26
+ "mean": float(np.mean(charges)),
27
+ "max": float(max(charges)),
28
+ "min": float(min(charges)),
29
+ }
30
+ except Exception:
31
+ return None
32
+
33
+
34
+ def compute_morgan_fp(mol, radius=2, n_bits=2048):
35
+ fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=n_bits)
36
+ bitstring = fp.ToBitString()
37
+ return {
38
+ "n_bits": n_bits,
39
+ "bits_on": bitstring.count("1"),
40
+ }
41
+
42
+
43
+ def compute_extra_descriptors(mol, n_show=10):
44
+ calc = MoleculeDescriptors.MolecularDescriptorCalculator(
45
+ [name for name, _ in Descriptors._descList]
46
+ )
47
+ names = calc.GetDescriptorNames()
48
+ values = calc.CalcDescriptors(mol)
49
+ return {
50
+ "names": names[:n_show],
51
+ "values": values[:n_show],
52
+ }
53
+
54
+
55
+ def get_molecule_properties(smiles):
56
+ """Возвращает дескрипторы и свойства молекулы по SMILES."""
57
+ mol = Chem.MolFromSmiles(smiles)
58
+ if mol is None:
59
+ raise ValueError("Не удалось создать молекулу из SMILES.")
60
+
61
+ mol = Chem.AddHs(mol)
62
+
63
+ # --- Базовые свойства ---
64
+ base = {
65
+ "n_atoms": mol.GetNumAtoms(),
66
+ "n_bonds": mol.GetNumBonds(),
67
+ "canonical_smiles": Chem.MolToSmiles(Chem.RemoveHs(mol), canonical=True),
68
+ "aromatic_rings": rdmd.CalcNumAromaticRings(mol),
69
+ "aliphatic_rings": rdmd.CalcNumAliphaticRings(mol),
70
+ "heterocycles": rdmd.CalcNumHeterocycles(mol),
71
+ }
72
+
73
+ # --- Физико-химические ---
74
+ physchem = {
75
+ "mol_weight": Descriptors.MolWt(mol),
76
+ "logp": Crippen.MolLogP(mol),
77
+ "tpsa": Descriptors.TPSA(mol),
78
+ "h_donors": Lipinski.NumHDonors(mol),
79
+ "h_acceptors": Lipinski.NumHAcceptors(mol),
80
+ "rotatable_bonds": Descriptors.NumRotatableBonds(mol),
81
+ "fraction_csp3": rdMolDescriptors.CalcFractionCSP3(mol),
82
+ }
83
+
84
+ charges = compute_gasteiger_stats(mol)
85
+
86
+ factory = load_feature_factory()
87
+ feats = factory.GetFeaturesForMol(mol)
88
+ pharm = {
89
+ "n_features": len(feats),
90
+ "types": sorted({f.GetFamily() for f in feats}),
91
+ }
92
+
93
+ try:
94
+ qed_val = QED.qed(mol)
95
+ except Exception:
96
+ qed_val = None
97
+
98
+ lipinski_pass = (
99
+ physchem["mol_weight"] <= 500
100
+ and physchem["logp"] <= 5
101
+ and physchem["h_donors"] <= 5
102
+ and physchem["h_acceptors"] <= 10
103
+ )
104
+
105
+ lipinski = {
106
+ "mw_ok": physchem["mol_weight"] <= 500,
107
+ "logp_ok": physchem["logp"] <= 5,
108
+ "h_donors_ok": physchem["h_donors"] <= 5,
109
+ "h_acceptors_ok": physchem["h_acceptors"] <= 10,
110
+ "pass_": lipinski_pass,
111
+ }
112
+
113
+ fp = compute_morgan_fp(mol)
114
+ extra = compute_extra_descriptors(mol)
115
+
116
+ return {
117
+ "smiles": smiles,
118
+ "base": base,
119
+ "physchem": physchem,
120
+ "charges": charges,
121
+ "pharmacophore": pharm,
122
+ "qed": qed_val,
123
+ "lipinski": lipinski,
124
+ "fingerprint": fp,
125
+ "extra_descriptors": extra,
126
+ }
127
+
128
+ # Пример вывода
129
+ if __name__ == "__main__":
130
+ data = get_molecule_properties("CC(=O)OC1=CC=CC=C1C(=O)O")
131
+ for section, value in data.items():
132
+ print(f"\n=== {section.upper()} ===")
133
+ print(value)
molecule/model.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+ import torch
4
+ from rdkit import Chem
5
+ from rdkit.Chem import AllChem, DataStructs
6
+
7
+ def smiles_to_ecfp(smiles, radius=2, n_bits=1024):
8
+ mol = Chem.MolFromSmiles(smiles)
9
+ if mol is None:
10
+ return np.zeros(n_bits)
11
+ fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
12
+ arr = np.zeros(n_bits, dtype=int)
13
+ DataStructs.ConvertToNumpyArray(fp, arr)
14
+ return arr
15
+
16
+ class ModelWrapper:
17
+
18
+ def __init__(self, model_name: str = None):
19
+ self.model = None
20
+
21
+ model_name = os.path.join(os.environ.get("MODELS_DIR"), model_name)
22
+
23
+ print(model_name)
24
+
25
+ if model_name and os.path.exists(model_name):
26
+ try:
27
+ self.model = torch.load(model_name, map_location="cpu", weights_only=False)
28
+ except Exception as e:
29
+ print(e)
30
+ self.model = None
31
+ print(self.model)
32
+ self.featurizer = smiles_to_ecfp
33
+
34
+ def predict(self, X):
35
+
36
+ X = self.featurizer(X)
37
+
38
+ X = np.asarray(X, dtype=float)
39
+
40
+
41
+
42
+ # self.model.eval()
43
+ with torch.no_grad():
44
+ t = torch.tensor(X, dtype=torch.float32)
45
+ out = self.model(t)
46
+ # print(out.cpu().numpy().item())
47
+ return out.cpu().numpy().item()
molecule/name.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from rdkit import Chem
3
+
4
+ def get_pubchem_name(smiles):
5
+ url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{smiles}/property/IUPACName/JSON"
6
+ r = requests.get(url)
7
+ if r.status_code == 200:
8
+ data = r.json()
9
+ try:
10
+ cid = data['PropertyTable']['Properties'][0]['CID']
11
+ url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/synonyms/JSON"
12
+ r = requests.get(url)
13
+ if r.status_code == 200:
14
+ data = r.json()
15
+ name = data['InformationList']['Information'][0]['Synonym'][0]
16
+ return name
17
+ except (KeyError, IndexError):
18
+ return None
19
+ return None
20
+
21
+ def generate_readable_name(smiles):
22
+ mol = Chem.MolFromSmiles(smiles)
23
+ if mol is None:
24
+ return "UnknownMolecule"
25
+ atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]
26
+ # пример: C-C-O → CCO
27
+ return "".join(atoms)
28
+
29
+ def generate_short_signature(smiles, n=6):
30
+ mol = Chem.MolFromSmiles(smiles)
31
+ if mol is None:
32
+ return "MolX"
33
+ atoms = [atom.GetSymbol() for atom in mol.GetAtoms()]
34
+ signature = "".join(atoms[:n])
35
+ return f"{signature}-{len(atoms)}"
36
+
37
+
38
+ def get_name(smiles):
39
+
40
+ name = get_pubchem_name(smiles=smiles)
41
+
42
+ if name is None:
43
+ return "UnknownMolecule"
44
+
45
+ try:
46
+ name = generate_readable_name(smiles=smiles)
47
+ except Exception:
48
+ return "UnknownMolecule"
49
+ return name
50
+
51
+
52
+
53
+ if __name__ == '__main__':
54
+ smiles = "CCC1:C:C:C(CCOC2:C:C:C(CC3SC(=O)NC3=O):C:C:2):N:C:1"
55
+ print(get_name(smiles))
56
+ print(generate_readable_name(smiles))
57
+ print(generate_short_signature(smiles))
58
+ print(get_pubchem_name(smiles))
molecule/predict.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .model import ModelWrapper
2
+ import numpy as np
3
+ from rdkit import Chem
4
+ from rdkit.Chem import AllChem, DataStructs
5
+ import shap
6
+
7
+ def smiles_to_ecfp(smiles, radius=2, n_bits=1024):
8
+ mol = Chem.MolFromSmiles(smiles)
9
+ if mol is None:
10
+ return np.zeros(n_bits)
11
+ fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
12
+ arr = np.zeros(n_bits, dtype=int)
13
+ DataStructs.ConvertToNumpyArray(fp, arr)
14
+ return arr
15
+
16
+ models = [
17
+ ModelWrapper("solubility.pth"),
18
+ ModelWrapper("logp.pth"),
19
+ ModelWrapper("clintox.pth"),
20
+ ModelWrapper("fdaapprov.pth"),
21
+ ModelWrapper("cardiotoxicity.pth"),
22
+ ]
23
+
24
+ def solubility(X):
25
+ try:
26
+ X = smiles_to_ecfp(X)
27
+ X = np.asarray(X, dtype=float)
28
+ return models[0].model.predict([X]).item()
29
+ except Exception as e:
30
+ print(e)
31
+ return 0
32
+
33
+
34
+ def logp(X):
35
+ try:
36
+ X = smiles_to_ecfp(X)
37
+ X = np.asarray(X, dtype=float)
38
+ return models[1].model.predict([X]).item()
39
+ except Exception as e:
40
+ print(e)
41
+ return 0
42
+
43
+ def clintox(X):
44
+ try:
45
+ X = smiles_to_ecfp(X)
46
+ X = np.asarray(X, dtype=float)
47
+ return models[2].model.predict([X]).item()
48
+ except Exception as e:
49
+ print(e)
50
+ return 0
51
+
52
+ def fdaapprov(X):
53
+ try:
54
+ X = smiles_to_ecfp(X)
55
+ X = np.asarray(X, dtype=float)
56
+ return models[3].model.predict([X]).item()
57
+ except Exception as e:
58
+ print(e)
59
+ return 0
60
+
61
+ def cardiotoxicity(X):
62
+ try:
63
+ X = smiles_to_ecfp(X)
64
+ X = np.asarray(X, dtype=float)
65
+ return models[4].model.predict([X]).item()
66
+ except Exception as e:
67
+ print(e)
68
+ return 0
69
+
70
+ def solubility_shap(X, model_wrapper=models[0]):
71
+ """
72
+ Возвращает предсказание растворимости + данные для фронтенда:
73
+ atom_shap
74
+ """
75
+ try:
76
+ # 1. Morgan FP + bitInfo
77
+ mol = Chem.MolFromSmiles(X)
78
+ if mol is None:
79
+ return {"pred": 0, "atom_shap": [], "fp": [], "bitInfo": {}, "shap_values_bits": []}
80
+
81
+ bitInfo = {}
82
+ fp_vect = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024, bitInfo=bitInfo)
83
+ fp = np.zeros(1024, dtype=int)
84
+ DataStructs.ConvertToNumpyArray(fp_vect, fp)
85
+
86
+ # 2. Предсказание модели
87
+ X_input = np.asarray(fp, dtype=float).reshape(1,-1)
88
+ pred = model_wrapper.model.predict(X_input).item()
89
+
90
+ # 3. SHAP
91
+ if not hasattr(model_wrapper, "shap_explainer"):
92
+ # создаем explainer один раз
93
+ model_wrapper.shap_explainer = shap.TreeExplainer(model_wrapper.model)
94
+ shap_vals_bits = model_wrapper.shap_explainer.shap_values(X_input)[0]
95
+
96
+ # 4. Mapping SHAP -> атомы
97
+ atom_scores = np.zeros(mol.GetNumAtoms(), dtype=float)
98
+ for bit, val in enumerate(shap_vals_bits):
99
+ if bit in bitInfo:
100
+ atoms = [a for (a,r) in bitInfo[bit]]
101
+ for a in atoms:
102
+ atom_scores[a] += val
103
+
104
+ return {
105
+ "pred": pred,
106
+ "atom_shap": atom_scores.tolist()
107
+ }
108
+
109
+ except Exception as e:
110
+ print(e)
111
+ return {"pred": 0, "atom_shap": []}
112
+
113
+ def logp_shap(X, model_wrapper=models[1]):
114
+ """
115
+ Возвращает предсказание растворимости + данные для фронтенда:
116
+ atom_shap
117
+ """
118
+ try:
119
+ # 1. Morgan FP + bitInfo
120
+ mol = Chem.MolFromSmiles(X)
121
+ if mol is None:
122
+ return {"pred": 0, "atom_shap": [], "fp": [], "bitInfo": {}, "shap_values_bits": []}
123
+
124
+ bitInfo = {}
125
+ fp_vect = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024, bitInfo=bitInfo)
126
+ fp = np.zeros(1024, dtype=int)
127
+ DataStructs.ConvertToNumpyArray(fp_vect, fp)
128
+
129
+ # 2. Предсказание модели
130
+ X_input = np.asarray(fp, dtype=float).reshape(1,-1)
131
+ pred = model_wrapper.model.predict(X_input).item()
132
+
133
+ # 3. SHAP
134
+ if not hasattr(model_wrapper, "shap_explainer"):
135
+ # создаем explainer один раз
136
+ model_wrapper.shap_explainer = shap.TreeExplainer(model_wrapper.model)
137
+ shap_vals_bits = model_wrapper.shap_explainer.shap_values(X_input)[0]
138
+
139
+ # 4. Mapping SHAP -> атомы
140
+ atom_scores = np.zeros(mol.GetNumAtoms(), dtype=float)
141
+ for bit, val in enumerate(shap_vals_bits):
142
+ if bit in bitInfo:
143
+ atoms = [a for (a,r) in bitInfo[bit]]
144
+ for a in atoms:
145
+ atom_scores[a] += val
146
+
147
+ return {
148
+ "pred": pred,
149
+ "atom_shap": atom_scores.tolist()
150
+ }
151
+
152
+ except Exception as e:
153
+ print(e)
154
+ return {"pred": 0, "atom_shap": []}
155
+
156
+ def clintox_shap(X, model_wrapper=models[2]):
157
+ """
158
+ Возвращает предсказание ��астворимости + данные для фронтенда:
159
+ atom_shap
160
+ """
161
+ try:
162
+ # 1. Morgan FP + bitInfo
163
+ mol = Chem.MolFromSmiles(X)
164
+ if mol is None:
165
+ return {"pred": 0, "atom_shap": [], "fp": [], "bitInfo": {}, "shap_values_bits": []}
166
+
167
+ bitInfo = {}
168
+ fp_vect = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024, bitInfo=bitInfo)
169
+ fp = np.zeros(1024, dtype=int)
170
+ DataStructs.ConvertToNumpyArray(fp_vect, fp)
171
+
172
+ # 2. Предсказание модели
173
+ X_input = np.asarray(fp, dtype=float).reshape(1,-1)
174
+ pred = model_wrapper.model.predict(X_input).item()
175
+
176
+ # 3. SHAP
177
+ if not hasattr(model_wrapper, "shap_explainer"):
178
+ # создаем explainer один раз
179
+ model_wrapper.shap_explainer = shap.TreeExplainer(model_wrapper.model)
180
+ shap_vals_bits = model_wrapper.shap_explainer.shap_values(X_input)[0]
181
+
182
+ # 4. Mapping SHAP -> атомы
183
+ atom_scores = np.zeros(mol.GetNumAtoms(), dtype=float)
184
+ for bit, val in enumerate(shap_vals_bits):
185
+ if bit in bitInfo:
186
+ atoms = [a for (a,r) in bitInfo[bit]]
187
+ for a in atoms:
188
+ atom_scores[a] += val
189
+
190
+ return {
191
+ "pred": pred,
192
+ "atom_shap": atom_scores.tolist()
193
+ }
194
+
195
+ except Exception as e:
196
+ print(e)
197
+ return {"pred": 0, "atom_shap": []}
198
+
199
+ def fdaapprov_shap(X, model_wrapper=models[3]):
200
+ """
201
+ Возвращает предсказание растворимости + данные для фронтенда:
202
+ atom_shap
203
+ """
204
+ try:
205
+ # 1. Morgan FP + bitInfo
206
+ mol = Chem.MolFromSmiles(X)
207
+ if mol is None:
208
+ return {"pred": 0, "atom_shap": [], "fp": [], "bitInfo": {}, "shap_values_bits": []}
209
+
210
+ bitInfo = {}
211
+ fp_vect = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024, bitInfo=bitInfo)
212
+ fp = np.zeros(1024, dtype=int)
213
+ DataStructs.ConvertToNumpyArray(fp_vect, fp)
214
+
215
+ # 2. Предсказание модели
216
+ X_input = np.asarray(fp, dtype=float).reshape(1,-1)
217
+ pred = model_wrapper.model.predict(X_input).item()
218
+
219
+ # 3. SHAP
220
+ if not hasattr(model_wrapper, "shap_explainer"):
221
+ # создаем explainer один раз
222
+ model_wrapper.shap_explainer = shap.TreeExplainer(model_wrapper.model)
223
+ shap_vals_bits = model_wrapper.shap_explainer.shap_values(X_input)[0]
224
+
225
+ # 4. Mapping SHAP -> атомы
226
+ atom_scores = np.zeros(mol.GetNumAtoms(), dtype=float)
227
+ for bit, val in enumerate(shap_vals_bits):
228
+ if bit in bitInfo:
229
+ atoms = [a for (a,r) in bitInfo[bit]]
230
+ for a in atoms:
231
+ atom_scores[a] += val
232
+
233
+ return {
234
+ "pred": pred,
235
+ "atom_shap": atom_scores.tolist()
236
+ }
237
+
238
+ except Exception as e:
239
+ print(e)
240
+ return {"pred": 0, "atom_shap": []}
241
+
242
+
243
+ def cardiotoxicity_shap(X, model_wrapper=models[4]):
244
+ """
245
+ Возвращает предсказание растворимости + данные для фронтенда:
246
+ atom_shap
247
+ """
248
+ try:
249
+ # 1. Morgan FP + bitInfo
250
+ mol = Chem.MolFromSmiles(X)
251
+ if mol is None:
252
+ return {"pred": 0, "atom_shap": [], "fp": [], "bitInfo": {}, "shap_values_bits": []}
253
+
254
+ bitInfo = {}
255
+ fp_vect = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=1024, bitInfo=bitInfo)
256
+ fp = np.zeros(1024, dtype=int)
257
+ DataStructs.ConvertToNumpyArray(fp_vect, fp)
258
+
259
+ # 2. Предсказание модели
260
+ X_input = np.asarray(fp, dtype=float).reshape(1,-1)
261
+ pred = model_wrapper.model.predict(X_input).item()
262
+
263
+ # 3. SHAP
264
+ if not hasattr(model_wrapper, "shap_explainer"):
265
+ # создаем explainer один раз
266
+ model_wrapper.shap_explainer = shap.TreeExplainer(model_wrapper.model)
267
+ shap_vals_bits = model_wrapper.shap_explainer.shap_values(X_input)[0]
268
+
269
+ # 4. Mapping SHAP -> атомы
270
+ atom_scores = np.zeros(mol.GetNumAtoms(), dtype=float)
271
+ for bit, val in enumerate(shap_vals_bits):
272
+ if bit in bitInfo:
273
+ atoms = [a for (a,r) in bitInfo[bit]]
274
+ for a in atoms:
275
+ atom_scores[a] += val
276
+
277
+ return {
278
+ "pred": pred,
279
+ "atom_shap": atom_scores.tolist()
280
+ }
281
+
282
+ except Exception as e:
283
+ print(e)
284
+ return {"pred": 0, "atom_shap": []}
285
+
286
+
287
+ property_predictors = {
288
+ "solubility": solubility,
289
+ "logp": logp,
290
+ "clintox": clintox,
291
+ "fdaapprov": fdaapprov,
292
+ "cardiotoxicity": cardiotoxicity,
293
+ }
294
+
295
+ property_predictors_shap = {
296
+ "solubility": solubility_shap,
297
+ "logp": logp_shap,
298
+ "clintox": clintox_shap,
299
+ "fdaapprov": fdaapprov_shap,
300
+ "cardiotoxicity": cardiotoxicity_shap,
301
+ }
302
+
303
+
304
+ def predict(X, shap=False):
305
+ props = {}
306
+ try:
307
+ if shap:
308
+ for property in property_predictors_shap.keys():
309
+ props[property] = property_predictors_shap[property](X)
310
+ return props
311
+ else:
312
+ for property in property_predictors.keys():
313
+ props[property] = property_predictors[property](X)
314
+ return props
315
+ except Exception as e:
316
+ print(e)
317
+ return None
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ numpy
4
+ joblib
5
+ torch
6
+ rdkit
7
+ xgboost
8
+ scikit-learn
9
+ shap
10
+ requests