Spaces:
Running
Running
| import numpy as np | |
| import torch | |
| from pathlib import Path | |
| from .model import Tox21SNN | |
| ECFP_END = 8192 | |
| MACCS_END = ECFP_END + 167 | |
| RDKIT_END = MACCS_END + 208 | |
| TOX_END = RDKIT_END + 1868 | |
| class FoldPredictor: | |
| def __init__(self, fold_data, device): | |
| self.device = device | |
| self.ecfp_indices = fold_data["ecfp_indices"] | |
| self.tox_indices = fold_data["tox_indices"] | |
| self.in_features = fold_data["in_features"] | |
| scaler = fold_data["scaler_state"] | |
| self.s1_mean = np.array(scaler["scaler1_mean"], dtype=np.float32) | |
| self.s1_scale = np.array(scaler["scaler1_scale"], dtype=np.float32) | |
| self.s2_mean = np.array(scaler["scaler2_mean"], dtype=np.float32) | |
| self.s2_scale = np.array(scaler["scaler2_scale"], dtype=np.float32) | |
| self.model = Tox21SNN(in_features=self.in_features, dropout=0.0) | |
| self.model.load_state_dict(fold_data["model_state"]) | |
| self.model.to(device) | |
| self.model.eval() | |
| def _select_features(self, X): | |
| return np.concatenate([ | |
| X[:, :ECFP_END][:, self.ecfp_indices], | |
| X[:, ECFP_END:MACCS_END], | |
| X[:, MACCS_END:RDKIT_END], | |
| X[:, RDKIT_END:TOX_END][:, self.tox_indices], | |
| X[:, TOX_END:] | |
| ], axis=1) | |
| def _scale(self, X): | |
| X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0) | |
| X = (X - self.s1_mean) / np.clip(self.s1_scale, 1e-10, None) | |
| X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0) | |
| X = np.tanh(X) | |
| X = (X - self.s2_mean) / np.clip(self.s2_scale, 1e-10, None) | |
| return X | |
| def predict(self, X_raw): | |
| X = self._select_features(X_raw) | |
| X = self._scale(X) | |
| X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0) | |
| tensor = torch.tensor(X, dtype=torch.float32, device=self.device) | |
| logits = self.model(tensor) | |
| return torch.sigmoid(logits).cpu().numpy() | |
| class Tox21Ensemble: | |
| def __init__(self, checkpoint_path, device=None): | |
| self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| self.predictors = [] | |
| checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False) | |
| self.n_folds = checkpoint["n_folds"] | |
| self.mean_auc = checkpoint["mean_auc"] | |
| for fold_data in checkpoint["folds"]: | |
| predictor = FoldPredictor(fold_data, self.device) | |
| self.predictors.append(predictor) | |
| def predict(self, X_raw): | |
| predictions = [] | |
| for predictor in self.predictors: | |
| pred = predictor.predict(X_raw) | |
| predictions.append(pred) | |
| return np.mean(predictions, axis=0) | |