rasayan-tox21 / src /ensemble.py
root
Initial commit: Rasayan Tox21 SNN Ensemble
0024d0e
import numpy as np
import torch
from pathlib import Path
from .model import Tox21SNN
ECFP_END = 8192
MACCS_END = ECFP_END + 167
RDKIT_END = MACCS_END + 208
TOX_END = RDKIT_END + 1868
class FoldPredictor:
def __init__(self, fold_data, device):
self.device = device
self.ecfp_indices = fold_data["ecfp_indices"]
self.tox_indices = fold_data["tox_indices"]
self.in_features = fold_data["in_features"]
scaler = fold_data["scaler_state"]
self.s1_mean = np.array(scaler["scaler1_mean"], dtype=np.float32)
self.s1_scale = np.array(scaler["scaler1_scale"], dtype=np.float32)
self.s2_mean = np.array(scaler["scaler2_mean"], dtype=np.float32)
self.s2_scale = np.array(scaler["scaler2_scale"], dtype=np.float32)
self.model = Tox21SNN(in_features=self.in_features, dropout=0.0)
self.model.load_state_dict(fold_data["model_state"])
self.model.to(device)
self.model.eval()
def _select_features(self, X):
return np.concatenate([
X[:, :ECFP_END][:, self.ecfp_indices],
X[:, ECFP_END:MACCS_END],
X[:, MACCS_END:RDKIT_END],
X[:, RDKIT_END:TOX_END][:, self.tox_indices],
X[:, TOX_END:]
], axis=1)
def _scale(self, X):
X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)
X = (X - self.s1_mean) / np.clip(self.s1_scale, 1e-10, None)
X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)
X = np.tanh(X)
X = (X - self.s2_mean) / np.clip(self.s2_scale, 1e-10, None)
return X
@torch.no_grad()
def predict(self, X_raw):
X = self._select_features(X_raw)
X = self._scale(X)
X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)
tensor = torch.tensor(X, dtype=torch.float32, device=self.device)
logits = self.model(tensor)
return torch.sigmoid(logits).cpu().numpy()
class Tox21Ensemble:
def __init__(self, checkpoint_path, device=None):
self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.predictors = []
checkpoint = torch.load(checkpoint_path, map_location="cpu", weights_only=False)
self.n_folds = checkpoint["n_folds"]
self.mean_auc = checkpoint["mean_auc"]
for fold_data in checkpoint["folds"]:
predictor = FoldPredictor(fold_data, self.device)
self.predictors.append(predictor)
@torch.no_grad()
def predict(self, X_raw):
predictions = []
for predictor in self.predictors:
pred = predictor.predict(X_raw)
predictions.append(pred)
return np.mean(predictions, axis=0)