Spaces:
Build error
Build error
File size: 2,731 Bytes
444d15c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | """
This files includes a predict function for the Tox21.
As an input it takes a list of SMILES and it outputs a nested dictionary with
SMILES and target names as keys.
"""
# ---------------------------------------------------------------------------------------
# Dependencies
import json
import copy
from collections import defaultdict
import joblib
import numpy as np
from tqdm import tqdm
from src.model import Tox21RFClassifier
from src.preprocess import create_descriptors, FeaturePreprocessor
from src.utils import TASKS, normalize_config
# ---------------------------------------------------------------------------------------
CONFIG_FILE = "./config/config.json"
def predict(
smiles_list: list[str], default_prediction: float = 0.5
) -> dict[str, dict[str, float]]:
"""Applies the classifier to a list of SMILES strings. Returns prediction=0.0 for
any molecule that could not be cleaned.
Args:
smiles_list (list[str]): list of SMILES strings
Returns:
dict: nested prediction dictionary, following {'<smiles>': {'<target>': <pred>}}
"""
print(f"Received {len(smiles_list)} SMILES strings")
with open(CONFIG_FILE, "r") as f:
config = json.load(f)
config = normalize_config(config)
features, is_clean = create_descriptors(
smiles_list, config["descriptors"], **config["ecfp"]
)
print(f"Created descriptors for {sum(is_clean)} molecules.")
print(f"{len(is_clean) - sum(is_clean)} molecules removed during cleaning")
# setup model
model = Tox21RFClassifier()
preprocessor = FeaturePreprocessor(
feature_selection_config=config["feature_selection"],
feature_quantilization_config=config["feature_quantilization"],
descriptors=config["descriptors"],
max_samples=config["max_samples"],
scaler=config["scaler"],
)
model.load(config["ckpt_path"])
print(f"Loaded model from {config['ckpt_path']}")
state = joblib.load(config["preprocessor_path"])
preprocessor.set_state(state)
print(f"Loaded preprocessor from {config['preprocessor_path']}")
# make predicitons
predictions = defaultdict(dict)
print(f"Create predictions:")
preds = []
for target in tqdm(TASKS):
X = copy.deepcopy(features)
X = {descr: array[is_clean] for descr, array in X.items()}
X = preprocessor.transform(X)
preds = np.empty_like(is_clean, dtype=np.float64)
preds[~is_clean] = default_prediction
preds[is_clean] = model.predict(target, X)
for smiles, pred in zip(smiles_list, preds):
predictions[smiles][target] = float(pred)
if config["debug"]:
break
return predictions
|