Commit
Β·
20eb53e
1
Parent(s):
797907e
add test model and the train, test files
Browse files- TabM_NEO_training_0.pth +3 -0
- TabM_NEO_training_1.pth +3 -0
- TabM_NEO_training_2.pth +3 -0
- TabM_NEO_training_3.pth +3 -0
- data/tabm_test.tsv +3 -0
- data/tabm_train.tsv +3 -0
- run_tabm_hyperopt.sh +48 -0
- src/tabm_eval.py +382 -0
- src/tabm_train.py +487 -0
TabM_NEO_training_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd1a0a6368c7837eab7a6ecc41bcf96b9245b7ce1380f738b360acafa2da388a
|
| 3 |
+
size 289388
|
TabM_NEO_training_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66dc7122b51b67db5eaca4745514d494f70b1e359dcc1bf2aa2ebf11c765a2e9
|
| 3 |
+
size 5572025
|
TabM_NEO_training_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b4d7648d06b2e5795bd0032f0cba34386e86ee5a1fadd4d05ed9ad0eca9fffe
|
| 3 |
+
size 288915
|
TabM_NEO_training_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4695839cb1fb6ed3d5acb1f516085eaef3e45e90c16638ffcceb60e61f97f14b
|
| 3 |
+
size 637113
|
data/tabm_test.tsv
CHANGED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a30b0d4d859c7a2844539d28ef98c9ac23add6df054cefb242d23b117ea47dc
|
| 3 |
+
size 3686362
|
data/tabm_train.tsv
CHANGED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb0e395c5f8f3af544f9a4896626d5e51212564777a2e751bab6c4296634176a
|
| 3 |
+
size 13137157
|
run_tabm_hyperopt.sh
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# TabM Hyperparameter Search (sum_exp_rank) + Final Training + Evaluation
|
| 4 |
+
set -e
|
| 5 |
+
|
| 6 |
+
START_TS="$(date '+%F %T')"
|
| 7 |
+
START_EPOCH="$(date +%s)"
|
| 8 |
+
echo "[Start] ${START_TS}"
|
| 9 |
+
|
| 10 |
+
OUT_DIR="tabm_results_hyperopt_parallel"
|
| 11 |
+
mkdir -p "$OUT_DIR"
|
| 12 |
+
|
| 13 |
+
echo "[Hyperopt] Search for TabM hyperparameters (sum_exp_rank) and train the final model..."
|
| 14 |
+
python src/tabm_train.py \
|
| 15 |
+
--data_file data/tabm_train.tsv \
|
| 16 |
+
--model_out "$OUT_DIR/tabm_hyperopt_best.pth" \
|
| 17 |
+
--max_evals 30 \
|
| 18 |
+
--cv_folds 5 \
|
| 19 |
+
--epochs 20 \
|
| 20 |
+
--final_epochs 40 \
|
| 21 |
+
--batch_size 128 \
|
| 22 |
+
--alpha 0.005 \
|
| 23 |
+
--tune_k \
|
| 24 |
+
--device auto \
|
| 25 |
+
--nr_hyperopt_rep 4
|
| 26 |
+
|
| 27 |
+
MODEL_GLOB="$OUT_DIR/tabm_hyperopt_best_rep*.pth"
|
| 28 |
+
|
| 29 |
+
echo "Start evaluating (weighted average of multiple models)..."
|
| 30 |
+
|
| 31 |
+
python src/tabm_eval.py \
|
| 32 |
+
--model_glob "$MODEL_GLOB" \
|
| 33 |
+
--data_file achieve_features_test.tsv \
|
| 34 |
+
--output_file "$OUT_DIR/TabM_NEO_test.txt" \
|
| 35 |
+
--output_xlsx "$OUT_DIR/TabM_NEO_test.xlsx" \
|
| 36 |
+
--tesla_file "$OUT_DIR/TabM_NEO_test_tesla.txt" \
|
| 37 |
+
--tesla_xlsx "$OUT_DIR/TabM_NEO_test_tesla.xlsx" \
|
| 38 |
+
--device auto --batch_size 1024 --skip_no_cd8
|
| 39 |
+
|
| 40 |
+
echo "Evaluation completed!"
|
| 41 |
+
|
| 42 |
+
END_TS="$(date '+%F %T')"
|
| 43 |
+
END_EPOCH="$(date +%s)"
|
| 44 |
+
ELAPSED=$((END_EPOCH - START_EPOCH))
|
| 45 |
+
H=$((ELAPSED/3600))
|
| 46 |
+
M=$(((ELAPSED%3600)/60))
|
| 47 |
+
S=$((ELAPSED%60))
|
| 48 |
+
printf "[End] %s | Total elapsed: %02d:%02d:%02d\n" "$END_TS" "$H" "$M" "$S"
|
src/tabm_eval.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import os
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import torch
|
| 9 |
+
import tabm
|
| 10 |
+
from sklearn.metrics import precision_recall_curve, auc
|
| 11 |
+
|
| 12 |
+
def normalize_rt(s: pd.Series) -> pd.Series:
|
| 13 |
+
return s.astype(str).str.strip().str.upper()
|
| 14 |
+
|
| 15 |
+
def compute_patient_metrics(df_p: pd.DataFrame, y_prob: np.ndarray) -> tuple:
|
| 16 |
+
X_r = df_p.copy()
|
| 17 |
+
X_r['ML_pred'] = y_prob
|
| 18 |
+
X_r['response'] = (normalize_rt(X_r['response_type']) == 'CD8').astype(int)
|
| 19 |
+
|
| 20 |
+
X_r = X_r.sort_values(by=['ML_pred'], ascending=False).reset_index(drop=True)
|
| 21 |
+
|
| 22 |
+
idx_pos = np.where(X_r['response'].to_numpy() == 1)[0]
|
| 23 |
+
idx_tested = np.where(normalize_rt(X_r['response_type']) == 'NEGATIVE')[0]
|
| 24 |
+
|
| 25 |
+
def topk_counts(k: int):
|
| 26 |
+
k_eff = min(k, len(X_r))
|
| 27 |
+
nr_correct = int(np.sum(idx_pos < k_eff))
|
| 28 |
+
nr_tested = nr_correct + int(np.sum(idx_tested < k_eff))
|
| 29 |
+
return nr_correct, nr_tested
|
| 30 |
+
|
| 31 |
+
nr_correct20, nr_tested20 = topk_counts(20)
|
| 32 |
+
nr_correct50, nr_tested50 = topk_counts(50)
|
| 33 |
+
nr_correct100, nr_tested100 = topk_counts(100)
|
| 34 |
+
|
| 35 |
+
nr_immuno = int(np.sum(X_r['response'] == 1))
|
| 36 |
+
y_true = X_r['response'].to_numpy()
|
| 37 |
+
y_pred = X_r['ML_pred'].to_numpy()
|
| 38 |
+
|
| 39 |
+
alpha = 0.005
|
| 40 |
+
score = float(np.sum(np.exp(-alpha * idx_pos)))
|
| 41 |
+
|
| 42 |
+
if nr_immuno > 0:
|
| 43 |
+
sort_idx = np.argsort(idx_pos)
|
| 44 |
+
ranks_str = ",".join([f"{int(r+1)}" for r in idx_pos[sort_idx]])
|
| 45 |
+
mut_seqs = X_r.loc[X_r['response'] == 1, 'mutant_seq'].to_numpy()
|
| 46 |
+
mut_seqs_str = ",".join([str(s) for s in mut_seqs[sort_idx]])
|
| 47 |
+
genes = X_r.loc[X_r['response'] == 1, 'gene'].to_numpy()
|
| 48 |
+
genes_str = ",".join([str(g) for g in genes[sort_idx]])
|
| 49 |
+
else:
|
| 50 |
+
ranks_str = ""
|
| 51 |
+
mut_seqs_str = ""
|
| 52 |
+
genes_str = ""
|
| 53 |
+
|
| 54 |
+
return (X_r['ML_pred'].to_numpy(), X_r,
|
| 55 |
+
nr_correct20, nr_tested20,
|
| 56 |
+
nr_correct50, nr_tested50,
|
| 57 |
+
nr_correct100, nr_tested100,
|
| 58 |
+
nr_immuno, idx_pos, score,
|
| 59 |
+
ranks_str, mut_seqs_str, genes_str)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def predict_in_batches(model, X_all, device, batch_size=1024):
|
| 63 |
+
model.eval()
|
| 64 |
+
y_prob_all = []
|
| 65 |
+
|
| 66 |
+
with torch.inference_mode():
|
| 67 |
+
for i in range(0, len(X_all), batch_size):
|
| 68 |
+
batch_end = min(i + batch_size, len(X_all))
|
| 69 |
+
batch_X = X_all[i:batch_end].to(device)
|
| 70 |
+
|
| 71 |
+
batch_pred = model(batch_X).mean(1)
|
| 72 |
+
batch_pred = torch.softmax(batch_pred, dim=1)[:, 1]
|
| 73 |
+
|
| 74 |
+
y_prob_all.append(batch_pred.cpu())
|
| 75 |
+
|
| 76 |
+
del batch_X, batch_pred
|
| 77 |
+
if torch.cuda.is_available():
|
| 78 |
+
torch.cuda.empty_cache()
|
| 79 |
+
|
| 80 |
+
return torch.cat(y_prob_all, dim=0).numpy()
|
| 81 |
+
|
| 82 |
+
def main():
|
| 83 |
+
|
| 84 |
+
ap = argparse.ArgumentParser(description="TabM model evaluation, output format consistent with TestVotingClassifier")
|
| 85 |
+
ap.add_argument("--model_file", type=str, required=False, help="TabM model file, e.g. tabm_results/tabm_model.pth (mutually exclusive with --model_files/--model_glob, choose one of three)")
|
| 86 |
+
ap.add_argument("--model_files", type=str, nargs='*', default=None, help="Multiple model files for equal-weighted average prediction")
|
| 87 |
+
ap.add_argument("--model_glob", type=str, default=None, help="Use wildcards to match multiple model files (e.g. tabm_results/tabm_hyperopt_best_rep*.pth)")
|
| 88 |
+
ap.add_argument("--data_file", type=str, required=True, help="Input TSV: TestVoting_selection_neopep.tsv")
|
| 89 |
+
ap.add_argument("--output_file", type=str, required=True, help="Main result output file (header consistent with original)")
|
| 90 |
+
ap.add_argument("--tesla_file", type=str, default=None, help="TESLA score output file (for neopep task)")
|
| 91 |
+
ap.add_argument("--output_xlsx", type=str, default=None, help="Main result Excel output path (optional)")
|
| 92 |
+
ap.add_argument("--tesla_xlsx", type=str, default=None, help="TESLA result Excel output path (optional)")
|
| 93 |
+
ap.add_argument("--dataset_name", type=str, default=None, help="If no dataset column exists, use this value as Dataset column in TESLA")
|
| 94 |
+
ap.add_argument("--skip_no_cd8", action="store_true", help="Skip patients without CD8")
|
| 95 |
+
ap.add_argument("--device", type=str, default="auto", choices=["auto", "cuda", "cpu"],
|
| 96 |
+
help="Device selection: auto/cuda/cpu")
|
| 97 |
+
ap.add_argument("--batch_size", type=int, default=1024,
|
| 98 |
+
help="Batch size to avoid GPU memory overflow (default 1024)")
|
| 99 |
+
args = ap.parse_args()
|
| 100 |
+
|
| 101 |
+
# device selection
|
| 102 |
+
if args.device == "auto":
|
| 103 |
+
if torch.cuda.is_available():
|
| 104 |
+
device = torch.device('cuda:0')
|
| 105 |
+
print(f"π Auto-selected GPU: {torch.cuda.get_device_name(0)}")
|
| 106 |
+
print(f" GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
|
| 107 |
+
else:
|
| 108 |
+
device = torch.device('cpu')
|
| 109 |
+
print("β οΈ No GPU detected, using CPU")
|
| 110 |
+
elif args.device == "cuda":
|
| 111 |
+
if torch.cuda.is_available():
|
| 112 |
+
device = torch.device('cuda:0')
|
| 113 |
+
print(f"π Force using GPU: {torch.cuda.get_device_name(0)}")
|
| 114 |
+
else:
|
| 115 |
+
raise RuntimeError("CUDA specified but no GPU detected")
|
| 116 |
+
else:
|
| 117 |
+
device = torch.device('cpu')
|
| 118 |
+
print("οΈ Using CPU")
|
| 119 |
+
|
| 120 |
+
print(f" Batch size: {args.batch_size}")
|
| 121 |
+
|
| 122 |
+
# Read data
|
| 123 |
+
df = pd.read_csv(args.data_file, sep="\t", header=0, low_memory=False)
|
| 124 |
+
print(f"π Data shape: {df.shape}")
|
| 125 |
+
|
| 126 |
+
# Required columns check
|
| 127 |
+
required_cols = ["patient", "response_type", "gene", "mutant_seq"]
|
| 128 |
+
for c in required_cols:
|
| 129 |
+
if c not in df.columns:
|
| 130 |
+
raise KeyError(f"Missing required column: {c}")
|
| 131 |
+
|
| 132 |
+
# Feature columns = all columns except metadata columns
|
| 133 |
+
feature_cols = [c for c in df.columns if c not in required_cols]
|
| 134 |
+
# Dynamically read numeric features (no fixed column count processing)
|
| 135 |
+
X_all = df[feature_cols].apply(pd.to_numeric, errors='coerce').fillna(0.0).to_numpy()
|
| 136 |
+
print(f" Number of features: {X_all.shape[1]}")
|
| 137 |
+
|
| 138 |
+
# model files parsing
|
| 139 |
+
import glob as _glob
|
| 140 |
+
model_paths: list[str] = []
|
| 141 |
+
if args.model_files:
|
| 142 |
+
model_paths.extend(list(args.model_files))
|
| 143 |
+
if args.model_glob:
|
| 144 |
+
model_paths.extend(sorted(_glob.glob(args.model_glob)))
|
| 145 |
+
if not model_paths and args.model_file:
|
| 146 |
+
model_paths = [args.model_file]
|
| 147 |
+
if not model_paths:
|
| 148 |
+
raise FileNotFoundError("No model files found, please check!")
|
| 149 |
+
|
| 150 |
+
first_ckpt = torch.load(model_paths[0], map_location='cpu', weights_only=False)
|
| 151 |
+
model_args = first_ckpt['args']
|
| 152 |
+
|
| 153 |
+
def _predict_with_model(model_path: str, X_all_np: np.ndarray) -> np.ndarray:
|
| 154 |
+
if not os.path.exists(model_path):
|
| 155 |
+
raise FileNotFoundError(f"Model file not existed: {model_path}")
|
| 156 |
+
ckpt = torch.load(model_path, map_location='cpu', weights_only=False)
|
| 157 |
+
m_args = ckpt['args']
|
| 158 |
+
X_np = X_all_np
|
| 159 |
+
if ckpt.get("used_feature_idx") is not None:
|
| 160 |
+
try:
|
| 161 |
+
ufi = ckpt["used_feature_idx"]
|
| 162 |
+
import numpy as _np
|
| 163 |
+
ufi_arr = _np.array(ufi, dtype=int)
|
| 164 |
+
max_idx = X_np.shape[1] - 1
|
| 165 |
+
ufi_arr = ufi_arr[(ufi_arr >= 0) & (ufi_arr <= max_idx)]
|
| 166 |
+
if len(ufi_arr) > 0:
|
| 167 |
+
X_np = X_np[:, ufi_arr]
|
| 168 |
+
except Exception:
|
| 169 |
+
pass
|
| 170 |
+
X_tensor_cpu = torch.as_tensor(X_np, dtype=torch.float32)
|
| 171 |
+
num_embeddings = None
|
| 172 |
+
if getattr(m_args, 'use_embeddings', False):
|
| 173 |
+
if m_args.embedding_type == 'linear':
|
| 174 |
+
import rtdl_num_embeddings
|
| 175 |
+
num_embeddings = rtdl_num_embeddings.LinearReLUEmbeddings(X_tensor_cpu.shape[1])
|
| 176 |
+
elif m_args.embedding_type == 'periodic':
|
| 177 |
+
import rtdl_num_embeddings
|
| 178 |
+
num_embeddings = rtdl_num_embeddings.PeriodicEmbeddings(X_tensor_cpu.shape[1], lite=False)
|
| 179 |
+
elif m_args.embedding_type == 'piecewise':
|
| 180 |
+
import rtdl_num_embeddings
|
| 181 |
+
num_embeddings = rtdl_num_embeddings.PiecewiseLinearEmbeddings(
|
| 182 |
+
rtdl_num_embeddings.compute_bins(X_tensor_cpu, n_bins=48),
|
| 183 |
+
d_embedding=16,
|
| 184 |
+
activation=False,
|
| 185 |
+
version='B',
|
| 186 |
+
)
|
| 187 |
+
model = tabm.TabM.make(
|
| 188 |
+
n_num_features=X_tensor_cpu.shape[1],
|
| 189 |
+
cat_cardinalities=[],
|
| 190 |
+
d_out=2,
|
| 191 |
+
k=m_args.k,
|
| 192 |
+
n_blocks=m_args.n_blocks,
|
| 193 |
+
d_block=m_args.d_block,
|
| 194 |
+
num_embeddings=num_embeddings,
|
| 195 |
+
arch_type=getattr(m_args, 'arch_type', 'tabm'),
|
| 196 |
+
)
|
| 197 |
+
model.load_state_dict(ckpt['model_state_dict'])
|
| 198 |
+
model.to(device)
|
| 199 |
+
model.eval()
|
| 200 |
+
bs = max(256, args.batch_size)
|
| 201 |
+
probs_list = []
|
| 202 |
+
n = len(X_tensor_cpu)
|
| 203 |
+
with torch.inference_mode():
|
| 204 |
+
for i in range(0, n, bs):
|
| 205 |
+
j = min(i + bs, n)
|
| 206 |
+
xb = X_tensor_cpu[i:j].to(device)
|
| 207 |
+
logits = model(xb).mean(1)
|
| 208 |
+
probs = torch.softmax(logits, dim=1)[:, 1].detach().cpu().numpy()
|
| 209 |
+
probs_list.append(probs)
|
| 210 |
+
del xb, logits
|
| 211 |
+
if torch.cuda.is_available() and device.type == 'cuda':
|
| 212 |
+
torch.cuda.empty_cache()
|
| 213 |
+
if (i // bs) % 50 == 0:
|
| 214 |
+
print(f" batch {i//bs}/{(n+bs-1)//bs}")
|
| 215 |
+
return np.concatenate(probs_list, axis=0)
|
| 216 |
+
|
| 217 |
+
def _stringify(v):
|
| 218 |
+
try:
|
| 219 |
+
return repr(v)
|
| 220 |
+
except Exception:
|
| 221 |
+
try:
|
| 222 |
+
return str(v)
|
| 223 |
+
except Exception:
|
| 224 |
+
return "<unprintable>"
|
| 225 |
+
|
| 226 |
+
print("===== Saved Hyperparameters from checkpoint['args'] =====")
|
| 227 |
+
if hasattr(model_args, "__dict__"):
|
| 228 |
+
hp_items = sorted(vars(model_args).items())
|
| 229 |
+
elif isinstance(model_args, dict):
|
| 230 |
+
hp_items = sorted(model_args.items())
|
| 231 |
+
else:
|
| 232 |
+
try:
|
| 233 |
+
hp_items = sorted(model_args.__dict__.items())
|
| 234 |
+
except Exception:
|
| 235 |
+
hp_items = []
|
| 236 |
+
print("β οΈ Unable to enumerate contents of model_args")
|
| 237 |
+
for key, val in hp_items:
|
| 238 |
+
print(f"- {key}: {_stringify(val)}")
|
| 239 |
+
print("=========================================================")
|
| 240 |
+
|
| 241 |
+
def _p_dict(title, d):
|
| 242 |
+
try:
|
| 243 |
+
print(title)
|
| 244 |
+
for k in sorted(d.keys()):
|
| 245 |
+
try:
|
| 246 |
+
print(f"- {k}: {repr(d[k])}")
|
| 247 |
+
except Exception:
|
| 248 |
+
print(f"- {k}: <unprintable>")
|
| 249 |
+
print("=" * len(title))
|
| 250 |
+
except Exception:
|
| 251 |
+
pass
|
| 252 |
+
|
| 253 |
+
if isinstance(first_ckpt.get("training_args"), dict):
|
| 254 |
+
_p_dict("===== checkpoint['training_args'] =====", first_ckpt["training_args"])
|
| 255 |
+
|
| 256 |
+
if isinstance(first_ckpt.get("best_params"), dict):
|
| 257 |
+
_p_dict("===== checkpoint['best_params'] =====", first_ckpt["best_params"])
|
| 258 |
+
|
| 259 |
+
if isinstance(first_ckpt.get("full_args"), dict):
|
| 260 |
+
_p_dict("===== checkpoint['full_args'] =====", first_ckpt["full_args"])
|
| 261 |
+
|
| 262 |
+
if first_ckpt.get("used_feature_idx") is not None:
|
| 263 |
+
try:
|
| 264 |
+
ufi = first_ckpt["used_feature_idx"]
|
| 265 |
+
print("===== used_feature_idx =====")
|
| 266 |
+
print(f"- length: {len(ufi)}")
|
| 267 |
+
print(f"- head: {list(ufi[:10])}")
|
| 268 |
+
print("=" * 25)
|
| 269 |
+
except Exception:
|
| 270 |
+
print("===== used_feature_idx =====\n<unprintable>\n============================")
|
| 271 |
+
|
| 272 |
+
try:
|
| 273 |
+
print("===== Environment =====")
|
| 274 |
+
print(f"- torch: {torch.__version__}")
|
| 275 |
+
print(f"- cuda available: {torch.cuda.is_available()}")
|
| 276 |
+
if torch.cuda.is_available():
|
| 277 |
+
print(f"- device: {torch.cuda.get_device_name(0)}")
|
| 278 |
+
print(f"- cuda version: {torch.version.cuda}")
|
| 279 |
+
import tabm as _tabm_mod
|
| 280 |
+
print(f"- tabm: {getattr(_tabm_mod, '__version__', 'unknown')}")
|
| 281 |
+
print("========================")
|
| 282 |
+
except Exception:
|
| 283 |
+
pass
|
| 284 |
+
|
| 285 |
+
n_models = len(model_paths)
|
| 286 |
+
print(f"π Loading {n_models} models for equal-weighted average prediction...")
|
| 287 |
+
y_prob_all = None
|
| 288 |
+
for mp in model_paths:
|
| 289 |
+
print(f" -> {mp}")
|
| 290 |
+
probs = _predict_with_model(mp, X_all)
|
| 291 |
+
if y_prob_all is None:
|
| 292 |
+
y_prob_all = probs.astype(np.float64)
|
| 293 |
+
else:
|
| 294 |
+
y_prob_all += probs
|
| 295 |
+
y_prob_all = (y_prob_all / float(n_models)).astype(np.float64)
|
| 296 |
+
|
| 297 |
+
print(f"β
Prediction completed, total {len(y_prob_all)} samples; number of models={n_models}")
|
| 298 |
+
|
| 299 |
+
rows_main = []
|
| 300 |
+
rows_tesla = []
|
| 301 |
+
|
| 302 |
+
need_header = (not os.path.exists(args.output_file)) or (os.path.getsize(args.output_file) == 0)
|
| 303 |
+
with open(args.output_file, "a", encoding="utf-8") as f:
|
| 304 |
+
if need_header:
|
| 305 |
+
f.write("Patient\tNr_correct_top20\tNr_tested_top20\tNr_correct_top50\tNr_tested_top50\t"
|
| 306 |
+
"Nr_correct_top100\tNr_tested_top100\tNr_immunogenic\tNr_peptides\tClf_score\t"
|
| 307 |
+
"CD8_ranks\tCD8_mut_seqs\tCD8_genes\n")
|
| 308 |
+
|
| 309 |
+
for patient, df_p in df.groupby("patient", sort=False):
|
| 310 |
+
has_cd8 = (normalize_rt(df_p["response_type"]) == "CD8").any()
|
| 311 |
+
if args.skip_no_cd8 and not has_cd8:
|
| 312 |
+
continue
|
| 313 |
+
|
| 314 |
+
idx = df_p.index.to_numpy()
|
| 315 |
+
y_prob = y_prob_all[idx]
|
| 316 |
+
|
| 317 |
+
(y_pred_sorted, X_sorted,
|
| 318 |
+
nr_correct20, nr_tested20,
|
| 319 |
+
nr_correct50, nr_tested50,
|
| 320 |
+
nr_correct100, nr_tested100,
|
| 321 |
+
nr_immuno, r, score,
|
| 322 |
+
ranks_str, mut_seqs_str, genes_str) = compute_patient_metrics(df_p, y_prob)
|
| 323 |
+
|
| 324 |
+
f.write(f"{patient}\t{nr_correct20}\t{nr_tested20}\t{nr_correct50}\t{nr_tested50}\t"
|
| 325 |
+
f"{nr_correct100}\t{nr_tested100}\t{nr_immuno}\t{len(df_p)}\t{score:.6f}\t"
|
| 326 |
+
f"{ranks_str}\t{mut_seqs_str}\t{genes_str}\n")
|
| 327 |
+
|
| 328 |
+
rows_main.append({
|
| 329 |
+
"Patient": patient,
|
| 330 |
+
"Nr_correct_top20": nr_correct20,
|
| 331 |
+
"Nr_tested_top20": nr_tested20,
|
| 332 |
+
"Nr_correct_top50": nr_correct50,
|
| 333 |
+
"Nr_tested_top50": nr_tested50,
|
| 334 |
+
"Nr_correct_top100": nr_correct100,
|
| 335 |
+
"Nr_tested_top100": nr_tested100,
|
| 336 |
+
"Nr_immunogenic": nr_immuno,
|
| 337 |
+
"Nr_peptides": len(df_p),
|
| 338 |
+
"Clf_score": score,
|
| 339 |
+
"CD8_ranks": ranks_str,
|
| 340 |
+
"CD8_mut_seqs": mut_seqs_str,
|
| 341 |
+
"CD8_genes": genes_str,
|
| 342 |
+
})
|
| 343 |
+
|
| 344 |
+
if args.tesla_file or args.tesla_xlsx:
|
| 345 |
+
if "dataset" in df_p.columns:
|
| 346 |
+
dataset_val = str(df_p["dataset"].iloc[0])
|
| 347 |
+
else:
|
| 348 |
+
dataset_val = args.dataset_name if args.dataset_name is not None else ""
|
| 349 |
+
idx_nt = X_sorted['response_type'].astype(str) != 'not_tested'
|
| 350 |
+
y_pred_tesla = pd.Series(y_pred_sorted)[idx_nt].to_numpy()
|
| 351 |
+
y_tesla = X_sorted.loc[idx_nt, 'response'].to_numpy()
|
| 352 |
+
ttif = (nr_correct20 / nr_tested20) if nr_tested20 > 0 else 0.0
|
| 353 |
+
fr = (nr_correct100 / nr_immuno) if nr_immuno > 0 else 0.0
|
| 354 |
+
precision, recall, _ = precision_recall_curve(y_tesla, y_pred_tesla)
|
| 355 |
+
auprc = auc(recall, precision)
|
| 356 |
+
|
| 357 |
+
if args.tesla_file:
|
| 358 |
+
new_tesla = (not os.path.exists(args.tesla_file)) or (os.path.getsize(args.tesla_file) == 0)
|
| 359 |
+
with open(args.tesla_file, "a", encoding="utf-8") as tf:
|
| 360 |
+
if new_tesla:
|
| 361 |
+
tf.write("Dataset\tPatient\tTTIF\tFR\tAUPRC\n")
|
| 362 |
+
tf.write(f"{dataset_val}\t{patient}\t{ttif:.3f}\t{fr:.3f}\t{auprc:.3f}\n")
|
| 363 |
+
|
| 364 |
+
rows_tesla.append({
|
| 365 |
+
"Dataset": dataset_val,
|
| 366 |
+
"Patient": patient,
|
| 367 |
+
"TTIF": ttif,
|
| 368 |
+
"FR": fr,
|
| 369 |
+
"AUPRC": auprc,
|
| 370 |
+
})
|
| 371 |
+
|
| 372 |
+
if args.output_xlsx and rows_main:
|
| 373 |
+
os.makedirs(os.path.dirname(args.output_xlsx) or '.', exist_ok=True)
|
| 374 |
+
pd.DataFrame(rows_main).to_excel(args.output_xlsx, index=False)
|
| 375 |
+
if args.tesla_xlsx and rows_tesla:
|
| 376 |
+
os.makedirs(os.path.dirname(args.tesla_xlsx) or '.', exist_ok=True)
|
| 377 |
+
pd.DataFrame(rows_tesla).to_excel(args.tesla_xlsx, index=False)
|
| 378 |
+
|
| 379 |
+
print(f" Evaluation completed! Processed {len(rows_main)} patients")
|
| 380 |
+
|
| 381 |
+
if __name__ == "__main__":
|
| 382 |
+
main()
|
src/tabm_train.py
ADDED
|
@@ -0,0 +1,487 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import os
|
| 6 |
+
import random
|
| 7 |
+
from copy import deepcopy
|
| 8 |
+
from typing import Any, Dict
|
| 9 |
+
|
| 10 |
+
import numpy as np
|
| 11 |
+
import pandas as pd
|
| 12 |
+
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
|
| 13 |
+
from hyperopt.pyll.base import scope
|
| 14 |
+
from sklearn.model_selection import StratifiedKFold
|
| 15 |
+
|
| 16 |
+
import torch
|
| 17 |
+
import torch.nn as nn
|
| 18 |
+
import torch.optim
|
| 19 |
+
from torch import Tensor
|
| 20 |
+
|
| 21 |
+
import tabm
|
| 22 |
+
import rtdl_num_embeddings
|
| 23 |
+
|
| 24 |
+
def set_seed(seed: int) -> None:
|
| 25 |
+
random.seed(seed)
|
| 26 |
+
np.random.seed(seed + 1)
|
| 27 |
+
torch.manual_seed(seed + 2)
|
| 28 |
+
|
| 29 |
+
def _dump_model_info_sidecar(model_path: str) -> None:
|
| 30 |
+
try:
|
| 31 |
+
if not os.path.exists(model_path):
|
| 32 |
+
return
|
| 33 |
+
ckpt = torch.load(model_path, map_location='cpu', weights_only=False)
|
| 34 |
+
sidecar = os.path.splitext(model_path)[0] + ".info.txt"
|
| 35 |
+
with open(sidecar, "w", encoding="utf-8") as f:
|
| 36 |
+
def _p(title: str, d):
|
| 37 |
+
try:
|
| 38 |
+
f.write(title + "\n")
|
| 39 |
+
if hasattr(d, "__dict__"):
|
| 40 |
+
items = sorted(vars(d).items())
|
| 41 |
+
elif isinstance(d, dict):
|
| 42 |
+
items = sorted(d.items())
|
| 43 |
+
else:
|
| 44 |
+
try:
|
| 45 |
+
items = sorted(d.__dict__.items())
|
| 46 |
+
except Exception:
|
| 47 |
+
items = []
|
| 48 |
+
for k, v in items:
|
| 49 |
+
try:
|
| 50 |
+
f.write(f"- {k}: {repr(v)}\n")
|
| 51 |
+
except Exception:
|
| 52 |
+
f.write(f"- {k}: <unprintable>\n")
|
| 53 |
+
f.write("=" * len(title) + "\n")
|
| 54 |
+
except Exception:
|
| 55 |
+
pass
|
| 56 |
+
|
| 57 |
+
_p("===== checkpoint['args'] =====", ckpt.get('args'))
|
| 58 |
+
_p("===== checkpoint['training_args'] =====", ckpt.get('training_args', {}))
|
| 59 |
+
_p("===== checkpoint['best_params'] =====", ckpt.get('best_params', {}))
|
| 60 |
+
_p("===== checkpoint['full_args'] =====", ckpt.get('full_args', {}))
|
| 61 |
+
|
| 62 |
+
if ckpt.get("used_feature_idx") is not None:
|
| 63 |
+
ufi = ckpt["used_feature_idx"]
|
| 64 |
+
f.write("===== used_feature_idx =====\n")
|
| 65 |
+
try:
|
| 66 |
+
f.write(f"- length: {len(ufi)}\n")
|
| 67 |
+
f.write(f"- head: {list(ufi[:10])}\n")
|
| 68 |
+
except Exception:
|
| 69 |
+
f.write("<unprintable>\n")
|
| 70 |
+
f.write("=" * 25 + "\n")
|
| 71 |
+
|
| 72 |
+
# ENVs Info
|
| 73 |
+
try:
|
| 74 |
+
f.write("===== Environment =====\n")
|
| 75 |
+
f.write(f"- torch: {torch.__version__}\n")
|
| 76 |
+
f.write(f"- cuda available: {torch.cuda.is_available()}\n")
|
| 77 |
+
if torch.cuda.is_available():
|
| 78 |
+
f.write(f"- device: {torch.cuda.get_device_name(0)}\n")
|
| 79 |
+
f.write(f"- cuda version: {torch.version.cuda}\n")
|
| 80 |
+
import tabm as _tabm_mod
|
| 81 |
+
f.write(f"- tabm: {getattr(_tabm_mod, '__version__', 'unknown')}\n")
|
| 82 |
+
f.write("========================\n")
|
| 83 |
+
except Exception:
|
| 84 |
+
pass
|
| 85 |
+
except Exception:
|
| 86 |
+
pass
|
| 87 |
+
def load_training_data(data_file: str) -> tuple[np.ndarray, np.ndarray]:
|
| 88 |
+
# Read training data: first column as label, remaining columns as numerical features (adaptive number of columns)
|
| 89 |
+
# Using pandas for more robust parsing and to avoid 1D array errors caused by empty data
|
| 90 |
+
df = pd.read_csv(
|
| 91 |
+
data_file,
|
| 92 |
+
sep='\t',
|
| 93 |
+
header=0,
|
| 94 |
+
dtype=str,
|
| 95 |
+
keep_default_na=False,
|
| 96 |
+
na_filter=False,
|
| 97 |
+
engine='python',
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
if df.shape[0] == 0 or df.shape[1] < 2:
|
| 101 |
+
raise ValueError(
|
| 102 |
+
f"Incorrect training data format: {data_file}, requires at least 1 label column + 1 feature column, actual shape={df.shape}"
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
# Determine label column (prefer column named 'label', otherwise use the first column)
|
| 106 |
+
label_col = 'label' if 'label' in df.columns else df.columns[0]
|
| 107 |
+
|
| 108 |
+
# Parse labels as integers (non-numeric values will be set to 0)
|
| 109 |
+
y = pd.to_numeric(df[label_col], errors='coerce').fillna(0).astype(np.int64).to_numpy()
|
| 110 |
+
|
| 111 |
+
# Parse features as float32
|
| 112 |
+
feature_cols = [c for c in df.columns if c != label_col]
|
| 113 |
+
if len(feature_cols) == 0:
|
| 114 |
+
raise ValueError("No feature columns found")
|
| 115 |
+
|
| 116 |
+
X_df = df[feature_cols].apply(pd.to_numeric, errors='coerce').fillna(0.0)
|
| 117 |
+
X = X_df.to_numpy(dtype=np.float32)
|
| 118 |
+
|
| 119 |
+
return X, y
|
| 120 |
+
|
| 121 |
+
def build_num_embeddings(embedding_type: str, X_fold: np.ndarray) -> tuple[Any, np.ndarray]:
|
| 122 |
+
used_idx = np.arange(X_fold.shape[1])
|
| 123 |
+
if embedding_type == 'piecewise':
|
| 124 |
+
var = X_fold.var(axis=0)
|
| 125 |
+
used_idx = np.where(var > 0.0)[0]
|
| 126 |
+
X_fold = X_fold[:, used_idx]
|
| 127 |
+
if len(used_idx) < 1:
|
| 128 |
+
return None, used_idx
|
| 129 |
+
try:
|
| 130 |
+
X_tensor = torch.as_tensor(X_fold, dtype=torch.float32)
|
| 131 |
+
num_embeddings = rtdl_num_embeddings.PiecewiseLinearEmbeddings(
|
| 132 |
+
rtdl_num_embeddings.compute_bins(X_tensor, n_bins=48),
|
| 133 |
+
d_embedding=16,
|
| 134 |
+
activation=False,
|
| 135 |
+
version='B',
|
| 136 |
+
)
|
| 137 |
+
return num_embeddings, used_idx
|
| 138 |
+
except Exception:
|
| 139 |
+
return None, used_idx
|
| 140 |
+
elif embedding_type == 'linear':
|
| 141 |
+
return rtdl_num_embeddings.LinearReLUEmbeddings(X_fold.shape[1]), used_idx
|
| 142 |
+
elif embedding_type == 'periodic':
|
| 143 |
+
return rtdl_num_embeddings.PeriodicEmbeddings(X_fold.shape[1], lite=False), used_idx
|
| 144 |
+
else:
|
| 145 |
+
return None, used_idx
|
| 146 |
+
|
| 147 |
+
def make_model(n_features: int,
|
| 148 |
+
k: int,
|
| 149 |
+
n_blocks: int,
|
| 150 |
+
d_block: int,
|
| 151 |
+
num_embeddings: Any,
|
| 152 |
+
arch_type: str = 'tabm') -> nn.Module:
|
| 153 |
+
return tabm.TabM.make(
|
| 154 |
+
n_num_features=n_features,
|
| 155 |
+
cat_cardinalities=[],
|
| 156 |
+
d_out=2,
|
| 157 |
+
k=k,
|
| 158 |
+
n_blocks=n_blocks,
|
| 159 |
+
d_block=d_block,
|
| 160 |
+
num_embeddings=num_embeddings,
|
| 161 |
+
arch_type=arch_type,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
def train_one_epoch(model: nn.Module,
|
| 165 |
+
X: torch.Tensor,
|
| 166 |
+
y: torch.Tensor,
|
| 167 |
+
optimizer: torch.optim.Optimizer,
|
| 168 |
+
batch_size: int,
|
| 169 |
+
device: torch.device) -> float:
|
| 170 |
+
model.train()
|
| 171 |
+
indices = torch.randperm(len(X), device=device)
|
| 172 |
+
batches = indices.split(batch_size)
|
| 173 |
+
total_loss = 0.0
|
| 174 |
+
share_training_batches = True
|
| 175 |
+
|
| 176 |
+
def loss_fn(y_pred: Tensor, y_true: Tensor) -> Tensor:
|
| 177 |
+
# (B, k, 2) -> (B*k, 2)
|
| 178 |
+
y_pred = y_pred.flatten(0, 1)
|
| 179 |
+
if share_training_batches:
|
| 180 |
+
y_true = y_true.repeat_interleave(model.backbone.k)
|
| 181 |
+
else:
|
| 182 |
+
y_true = y_true.flatten(0, 1)
|
| 183 |
+
return nn.functional.cross_entropy(y_pred, y_true)
|
| 184 |
+
|
| 185 |
+
for batch_idx in batches:
|
| 186 |
+
optimizer.zero_grad()
|
| 187 |
+
logits = model(X[batch_idx])
|
| 188 |
+
loss = loss_fn(logits, y[batch_idx])
|
| 189 |
+
loss.backward()
|
| 190 |
+
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
|
| 191 |
+
optimizer.step()
|
| 192 |
+
total_loss += float(loss.detach().cpu())
|
| 193 |
+
return total_loss / max(1, len(batches))
|
| 194 |
+
|
| 195 |
+
def sum_rank_correct_numpy(y_true: np.ndarray, y_prob: np.ndarray, alpha: float = 0.005) -> float:
|
| 196 |
+
idx = np.argsort(-y_prob)
|
| 197 |
+
y_sorted = y_true[idx]
|
| 198 |
+
r = np.where(y_sorted == 1)[0]
|
| 199 |
+
return float(np.sum(np.exp(-alpha * r)))
|
| 200 |
+
|
| 201 |
+
@torch.inference_mode()
|
| 202 |
+
def evaluate_sum_exp_rank(model: nn.Module, X: torch.Tensor, y: torch.Tensor, device: torch.device, alpha: float = 0.005) -> float:
|
| 203 |
+
model.eval()
|
| 204 |
+
eval_bs = 8096
|
| 205 |
+
logits = torch.cat([
|
| 206 |
+
model(X[idx]).mean(1)
|
| 207 |
+
for idx in torch.arange(len(X), device=device).split(eval_bs)
|
| 208 |
+
])
|
| 209 |
+
probs_pos = torch.softmax(logits, dim=1)[:, 1].cpu().numpy()
|
| 210 |
+
y_true = y.cpu().numpy()
|
| 211 |
+
return sum_rank_correct_numpy(y_true, probs_pos, alpha)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def objective(params: Dict[str, Any],
|
| 215 |
+
X: np.ndarray,
|
| 216 |
+
y: np.ndarray,
|
| 217 |
+
device: torch.device,
|
| 218 |
+
seed: int,
|
| 219 |
+
cv_folds: int,
|
| 220 |
+
epochs: int,
|
| 221 |
+
batch_size: int,
|
| 222 |
+
alpha: float = 0.005) -> Dict[str, Any]:
|
| 223 |
+
|
| 224 |
+
k = int(params.get('k', 32))
|
| 225 |
+
n_blocks = int(params['n_blocks'])
|
| 226 |
+
d_block = int(params['d_block'])
|
| 227 |
+
lr = float(params['lr'])
|
| 228 |
+
wd_choice = params['weight_decay_choice'] # 0 or sampled
|
| 229 |
+
weight_decay = 0.0 if wd_choice == 0 else float(params['weight_decay_val'])
|
| 230 |
+
embedding_type = params['embedding_type'] # 'none'/'linear'/'periodic'/'piecewise'
|
| 231 |
+
arch_type = params['arch_type'] # 'tabm'/'tabm-mini'
|
| 232 |
+
|
| 233 |
+
cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=seed)
|
| 234 |
+
ap_scores: list[float] = []
|
| 235 |
+
|
| 236 |
+
for train_idx, val_idx in cv.split(X, y):
|
| 237 |
+
X_tr = X[train_idx]
|
| 238 |
+
y_tr = y[train_idx]
|
| 239 |
+
X_va = X[val_idx]
|
| 240 |
+
y_va = y[val_idx]
|
| 241 |
+
|
| 242 |
+
num_embeddings, used_idx = build_num_embeddings(embedding_type, X_tr)
|
| 243 |
+
X_tr_used = X_tr[:, used_idx] if len(used_idx) != X_tr.shape[1] else (X_tr if embedding_type != 'piecewise' else X_tr[:, used_idx])
|
| 244 |
+
X_va_used = X_va[:, used_idx] if embedding_type == 'piecewise' else X_va
|
| 245 |
+
|
| 246 |
+
n_features = X_tr_used.shape[1]
|
| 247 |
+
model = make_model(n_features, k, n_blocks, d_block, num_embeddings, arch_type).to(device)
|
| 248 |
+
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
|
| 249 |
+
|
| 250 |
+
X_tr_t = torch.as_tensor(X_tr_used, device=device)
|
| 251 |
+
y_tr_t = torch.as_tensor(y_tr, device=device)
|
| 252 |
+
X_va_t = torch.as_tensor(X_va_used, device=device)
|
| 253 |
+
y_va_t = torch.as_tensor(y_va, device=device)
|
| 254 |
+
|
| 255 |
+
for _ in range(epochs):
|
| 256 |
+
train_one_epoch(model, X_tr_t, y_tr_t, optimizer, batch_size, device)
|
| 257 |
+
|
| 258 |
+
score = evaluate_sum_exp_rank(model, X_va_t, y_va_t, device, alpha)
|
| 259 |
+
ap_scores.append(score)
|
| 260 |
+
|
| 261 |
+
mean_score = float(np.mean(ap_scores))
|
| 262 |
+
return {"loss": -mean_score, "status": STATUS_OK, "score": mean_score}
|
| 263 |
+
|
| 264 |
+
def train_final(X: np.ndarray,
|
| 265 |
+
y: np.ndarray,
|
| 266 |
+
best_params: Dict[str, Any],
|
| 267 |
+
device: torch.device,
|
| 268 |
+
final_epochs: int,
|
| 269 |
+
batch_size: int,
|
| 270 |
+
output_path: str,
|
| 271 |
+
seed: int,
|
| 272 |
+
alpha: float = 0.005) -> None:
|
| 273 |
+
k = int(best_params.get('k', 32))
|
| 274 |
+
n_blocks = int(best_params['n_blocks'])
|
| 275 |
+
d_block = int(best_params['d_block'])
|
| 276 |
+
lr = float(best_params['lr'])
|
| 277 |
+
wd_choice = best_params['weight_decay_choice']
|
| 278 |
+
weight_decay = 0.0 if wd_choice == 0 else float(best_params['weight_decay_val'])
|
| 279 |
+
embedding_type = best_params['embedding_type']
|
| 280 |
+
arch_type = best_params['arch_type']
|
| 281 |
+
|
| 282 |
+
num_embeddings, used_idx = build_num_embeddings(embedding_type, X)
|
| 283 |
+
X_used = X[:, used_idx] if embedding_type == 'piecewise' else X
|
| 284 |
+
n_features = X_used.shape[1]
|
| 285 |
+
|
| 286 |
+
model = make_model(n_features, k, n_blocks, d_block, num_embeddings, arch_type).to(device)
|
| 287 |
+
optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
|
| 288 |
+
|
| 289 |
+
X_t = torch.as_tensor(X_used, device=device)
|
| 290 |
+
y_t = torch.as_tensor(y, device=device)
|
| 291 |
+
|
| 292 |
+
for _ in range(final_epochs):
|
| 293 |
+
train_one_epoch(model, X_t, y_t, optimizer, batch_size, device)
|
| 294 |
+
|
| 295 |
+
os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
|
| 296 |
+
torch.save({
|
| 297 |
+
"model_state_dict": model.state_dict(),
|
| 298 |
+
"args": argparse.Namespace(
|
| 299 |
+
k=k,
|
| 300 |
+
n_blocks=n_blocks,
|
| 301 |
+
d_block=d_block,
|
| 302 |
+
use_embeddings=True if embedding_type in ("linear", "periodic", "piecewise") else False,
|
| 303 |
+
embedding_type=embedding_type,
|
| 304 |
+
arch_type=arch_type,
|
| 305 |
+
),
|
| 306 |
+
"best_params": deepcopy(best_params),
|
| 307 |
+
"training_args": {
|
| 308 |
+
"lr": lr,
|
| 309 |
+
"weight_decay_choice": wd_choice,
|
| 310 |
+
"weight_decay_val": weight_decay,
|
| 311 |
+
"batch_size": batch_size,
|
| 312 |
+
"final_epochs": final_epochs,
|
| 313 |
+
"seed": seed,
|
| 314 |
+
"alpha": alpha,
|
| 315 |
+
"device": str(device),
|
| 316 |
+
},
|
| 317 |
+
"used_feature_idx": used_idx,
|
| 318 |
+
"full_args": dict(
|
| 319 |
+
best_params=deepcopy(best_params),
|
| 320 |
+
final_epochs=final_epochs, batch_size=batch_size,
|
| 321 |
+
seed=seed, alpha=alpha, device=str(device),
|
| 322 |
+
),
|
| 323 |
+
"search_space": "hyperopt space v1",
|
| 324 |
+
}, output_path)
|
| 325 |
+
print(f"Final models saved into: {output_path}")
|
| 326 |
+
_dump_model_info_sidecar(output_path)
|
| 327 |
+
|
| 328 |
+
def hyperopt_search(X: np.ndarray,
|
| 329 |
+
y: np.ndarray,
|
| 330 |
+
device: torch.device,
|
| 331 |
+
seed: int,
|
| 332 |
+
cv_folds: int,
|
| 333 |
+
epochs: int,
|
| 334 |
+
batch_size: int,
|
| 335 |
+
alpha: float,
|
| 336 |
+
tune_k: bool,
|
| 337 |
+
max_evals: int) -> tuple[dict, float]:
|
| 338 |
+
space = {
|
| 339 |
+
"n_blocks": scope.int(hp.quniform("n_blocks", 2, 5, 1)),
|
| 340 |
+
"d_block": scope.int(hp.quniform("d_block", 64, 1024, 16)),
|
| 341 |
+
"lr": hp.loguniform("lr", np.log(1e-4), np.log(5e-3)),
|
| 342 |
+
"weight_decay_choice": hp.choice("weight_decay_choice", [0, 1]),
|
| 343 |
+
"weight_decay_val": hp.loguniform("weight_decay_val", np.log(1e-4), np.log(1e-1)),
|
| 344 |
+
"embedding_type": hp.choice("embedding_type", ["none", "linear", "periodic", "piecewise"]),
|
| 345 |
+
"arch_type": hp.choice("arch_type", ["tabm", "tabm-mini"]),
|
| 346 |
+
}
|
| 347 |
+
if tune_k:
|
| 348 |
+
space["k"] = scope.int(hp.quniform("k", 16, 32, 8))
|
| 349 |
+
else:
|
| 350 |
+
space["k"] = 32
|
| 351 |
+
|
| 352 |
+
def obj_fn(hparams):
|
| 353 |
+
return objective(hparams, X, y, device, seed, cv_folds, epochs, batch_size, alpha)
|
| 354 |
+
|
| 355 |
+
trials = Trials()
|
| 356 |
+
best = fmin(fn=obj_fn, space=space, algo=tpe.suggest, max_evals=max_evals, trials=trials)
|
| 357 |
+
best_trial = min(trials.trials, key=lambda t: t["result"]["loss"])
|
| 358 |
+
best_ap = -best_trial["result"]["loss"]
|
| 359 |
+
best_params = best_trial["misc"]["vals"].copy()
|
| 360 |
+
|
| 361 |
+
emb_choices = ["none", "linear", "periodic", "piecewise"]
|
| 362 |
+
best_params["embedding_type"] = emb_choices[int(best_params["embedding_type"][0])] if isinstance(best_params["embedding_type"], list) else best_params["embedding_type"]
|
| 363 |
+
arch_choices = ["tabm", "tabm-mini"]
|
| 364 |
+
best_params["arch_type"] = arch_choices[int(best_params["arch_type"][0])] if isinstance(best_params["arch_type"], list) else best_params["arch_type"]
|
| 365 |
+
if isinstance(best_params.get("k", 32), list):
|
| 366 |
+
best_params["k"] = int(best_params["k"][0])
|
| 367 |
+
for k_ in ["n_blocks", "d_block", "weight_decay_choice"]:
|
| 368 |
+
if isinstance(best_params[k_], list):
|
| 369 |
+
best_params[k_] = int(best_params[k_][0])
|
| 370 |
+
for k_ in ["lr", "weight_decay_val"]:
|
| 371 |
+
if isinstance(best_params[k_], list):
|
| 372 |
+
best_params[k_] = float(best_params[k_][0])
|
| 373 |
+
|
| 374 |
+
return best_params, float(best_ap)
|
| 375 |
+
|
| 376 |
+
def run_one_pipeline(rep_idx: int,
|
| 377 |
+
X: np.ndarray,
|
| 378 |
+
y: np.ndarray,
|
| 379 |
+
device_str: str,
|
| 380 |
+
args_dict: dict,
|
| 381 |
+
out_dir: str,
|
| 382 |
+
base: str,
|
| 383 |
+
ext: str) -> str:
|
| 384 |
+
device = torch.device(device_str)
|
| 385 |
+
rep_seed = int(args_dict["seed"]) + 997 * int(rep_idx)
|
| 386 |
+
set_seed(rep_seed)
|
| 387 |
+
|
| 388 |
+
print(f"[rep {rep_idx}] π Starting hyperparameter search (max_evals={args_dict['max_evals']}) ...")
|
| 389 |
+
best_params, best_ap = hyperopt_search(
|
| 390 |
+
X, y, device,
|
| 391 |
+
seed=rep_seed,
|
| 392 |
+
cv_folds=args_dict["cv_folds"],
|
| 393 |
+
epochs=args_dict["epochs"],
|
| 394 |
+
batch_size=args_dict["batch_size"],
|
| 395 |
+
alpha=args_dict["alpha"],
|
| 396 |
+
tune_k=args_dict["tune_k"],
|
| 397 |
+
max_evals=args_dict["max_evals"],
|
| 398 |
+
)
|
| 399 |
+
print(f"[rep {rep_idx}] π― Best sum_exp_rank={best_ap:.6f}")
|
| 400 |
+
print(f"[rep {rep_idx}] π― Best parameters={best_params}")
|
| 401 |
+
|
| 402 |
+
out_path = os.path.join(out_dir, f"{base}_rep{rep_idx}{ext}")
|
| 403 |
+
print(f"[rep {rep_idx}] ποΈ Starting final training and saving to: {out_path}")
|
| 404 |
+
train_final(
|
| 405 |
+
X, y, best_params, device,
|
| 406 |
+
final_epochs=args_dict["final_epochs"],
|
| 407 |
+
batch_size=args_dict["batch_size"],
|
| 408 |
+
output_path=out_path,
|
| 409 |
+
seed=rep_seed,
|
| 410 |
+
alpha=args_dict["alpha"],
|
| 411 |
+
)
|
| 412 |
+
return out_path
|
| 413 |
+
|
| 414 |
+
def main():
|
| 415 |
+
|
| 416 |
+
ap = argparse.ArgumentParser(description="TabM hyperparameter search (Hyperopt) with internal cross-validation, target=AUPRC; training set only, no external validation/test")
|
| 417 |
+
ap.add_argument("--data_file", type=str, default="Neopep_ml_with_labels.txt", help="Training data TSV")
|
| 418 |
+
ap.add_argument("--model_out", type=str, default="tabm_results/tabm_hyperopt_best.pth", help="Final model save path (or base name within directory)")
|
| 419 |
+
ap.add_argument("--max_evals", type=int, default=30, help="Number of Hyperopt evaluations per parallel repetition")
|
| 420 |
+
ap.add_argument("--cv_folds", type=int, default=5, help="Number of cross-validation folds")
|
| 421 |
+
ap.add_argument("--epochs", type=int, default=40, help="Training epochs per fold")
|
| 422 |
+
ap.add_argument("--final_epochs", type=int, default=120, help="Final model training epochs")
|
| 423 |
+
ap.add_argument("--batch_size", type=int, default=256, help="Batch size")
|
| 424 |
+
ap.add_argument("--seed", type=int, default=42, help="Random seed (each repetition will be offset when running in parallel)")
|
| 425 |
+
ap.add_argument("--alpha", type=float, default=0.005, help="Alpha for sum_exp_rank")
|
| 426 |
+
ap.add_argument("--tune_k", action="store_true", help="Whether to search for k together (default fixed at 32)")
|
| 427 |
+
ap.add_argument("--device", type=str, default="auto", help="Device selection: auto/cuda/cpu")
|
| 428 |
+
ap.add_argument("--nr_hyperopt_rep", type=int, default=1, help="Parallel repetition count: each independent hyperparameter search + final training")
|
| 429 |
+
args = ap.parse_args()
|
| 430 |
+
|
| 431 |
+
set_seed(args.seed)
|
| 432 |
+
|
| 433 |
+
# Device selection
|
| 434 |
+
if args.device == "auto":
|
| 435 |
+
if torch.cuda.is_available():
|
| 436 |
+
device = torch.device('cuda:0')
|
| 437 |
+
print(f"π Detected GPU: {torch.cuda.get_device_name(0)}")
|
| 438 |
+
print(f" GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
|
| 439 |
+
print(f" CUDA Version: {torch.version.cuda}")
|
| 440 |
+
else:
|
| 441 |
+
device = torch.device('cpu')
|
| 442 |
+
print("β οΈ No GPU detected, using CPU")
|
| 443 |
+
elif args.device == "cuda":
|
| 444 |
+
if torch.cuda.is_available():
|
| 445 |
+
device = torch.device('cuda:0')
|
| 446 |
+
print(f"π Forcing GPU usage: {torch.cuda.get_device_name(0)}")
|
| 447 |
+
else:
|
| 448 |
+
raise RuntimeError("CUDA specified but no GPU detected")
|
| 449 |
+
else:
|
| 450 |
+
device = torch.device('cpu')
|
| 451 |
+
print("π₯οΈ Using CPU")
|
| 452 |
+
|
| 453 |
+
X, y = load_training_data(args.data_file)
|
| 454 |
+
print(f"Training data: {X.shape}, Positive sample ratio: {np.mean(y):.5f}")
|
| 455 |
+
|
| 456 |
+
out_dir = os.path.dirname(args.model_out) or '.'
|
| 457 |
+
os.makedirs(out_dir, exist_ok=True)
|
| 458 |
+
base = os.path.splitext(os.path.basename(args.model_out))[0]
|
| 459 |
+
ext = os.path.splitext(args.model_out)[1] or '.pth'
|
| 460 |
+
|
| 461 |
+
args_dict = {
|
| 462 |
+
"seed": int(args.seed),
|
| 463 |
+
"cv_folds": int(args.cv_folds),
|
| 464 |
+
"epochs": int(args.epochs),
|
| 465 |
+
"final_epochs": int(args.final_epochs),
|
| 466 |
+
"batch_size": int(args.batch_size),
|
| 467 |
+
"alpha": float(args.alpha),
|
| 468 |
+
"tune_k": bool(args.tune_k),
|
| 469 |
+
"max_evals": int(args.max_evals),
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
from multiprocessing import get_context
|
| 473 |
+
ctx = get_context('spawn')
|
| 474 |
+
repeats = int(args.nr_hyperopt_rep)
|
| 475 |
+
print(f"π§΅ Parallel repetitions: {repeats} (each independent hyperparameter search + final training)")
|
| 476 |
+
|
| 477 |
+
with ctx.Pool(processes=repeats) as pool:
|
| 478 |
+
paths = pool.starmap(
|
| 479 |
+
run_one_pipeline,
|
| 480 |
+
[(i, X, y, str(device), args_dict, out_dir, base, ext) for i in range(repeats)]
|
| 481 |
+
)
|
| 482 |
+
print("Saved model files:")
|
| 483 |
+
for p in sorted(paths):
|
| 484 |
+
print("-", p)
|
| 485 |
+
|
| 486 |
+
if __name__ == "__main__":
|
| 487 |
+
main()
|