import numpy as np import torch from pathlib import Path from .model import Tox21SNN ECFP_END = 8192 MACCS_END = ECFP_END + 167 RDKIT_END = MACCS_END + 208 TOX_END = RDKIT_END + 1868 class FoldPredictor: def __init__(self, fold_data, device): self.device = device self.ecfp_indices = fold_data["ecfp_indices"] self.tox_indices = fold_data["tox_indices"] self.in_features = fold_data["in_features"] scaler = fold_data["scaler_state"] self.s1_mean = np.array(scaler["scaler1_mean"], dtype=np.float32) self.s1_scale = np.array(scaler["scaler1_scale"], dtype=np.float32) self.s2_mean = np.array(scaler["scaler2_mean"], dtype=np.float32) self.s2_scale = np.array(scaler["scaler2_scale"], dtype=np.float32) self.model = Tox21SNN(in_features=self.in_features, dropout=0.0) self.model.load_state_dict(fold_data["model_state"]) self.model.to(device) self.model.eval() def _select_features(self, X): return np.concatenate([ X[:, :ECFP_END][:, self.ecfp_indices], X[:, ECFP_END:MACCS_END], X[:, MACCS_END:RDKIT_END], X[:, RDKIT_END:TOX_END][:, self.tox_indices], X[:, TOX_END:] ], axis=1) def _scale(self, X): X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0) X = (X - self.s1_mean) / np.clip(self.s1_scale, 1e-10, None) X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0) X = np.tanh(X) X = (X - self.s2_mean) / np.clip(self.s2_scale, 1e-10, None) return X @torch.no_grad() def predict(self, X_raw): X = self._select_features(X_raw) X = self._scale(X) X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0) tensor = torch.tensor(X, dtype=torch.float32, device=self.device) logits = self.model(tensor) return torch.sigmoid(logits).cpu().numpy() class Tox21Ensemble: def __init__(self, checkpoint_path, device=None): self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu") self.predictors = [] checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False) self.n_folds = checkpoint["n_folds"] self.mean_auc = checkpoint["mean_auc"] for fold_data in checkpoint["folds"]: predictor = FoldPredictor(fold_data, self.device) self.predictors.append(predictor) @torch.no_grad() def predict(self, X_raw): predictions = [] for predictor in self.predictors: pred = predictor.predict(X_raw) predictions.append(pred) return np.mean(predictions, axis=0)