File size: 3,522 Bytes
9270c9a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | {
"cells": [
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"========================================================================\n",
"🏆 PATHOGENICITY LEADERBOARD — Bottom 100k ClinVar\n",
"========================================================================\n",
"Model | ROC-AUC | PR-AUC \n",
"------------------------------------------------------------------------\n",
"ClinPred | 0.9602 | 0.5921\n",
"REVEL | 0.9554 | 0.5828\n",
"AlphaMissense | 0.9526 | 0.5764\n",
"ESM1b | 0.9440 | 0.5492\n",
"BayesDel_addAF | 0.9355 | 0.6880\n",
"PrimateAI | 0.9298 | 0.4834\n",
"MPC | 0.9210 | 0.4960\n",
"EVE | 0.9188 | 0.5107\n",
"PathoPreter | 0.9123 | 0.6204\n",
"CADD_raw | 0.8980 | 0.6566\n",
"========================================================================\n"
]
}
],
"source": [
"import numpy as np\n",
"from sklearn.metrics import roc_auc_score, average_precision_score\n",
"\n",
"# Load saved PathoPreter probabilities\n",
"probs = np.load(\"100k_probs.npy\")\n",
"y_true = df[\"labels\"].values\n",
"\n",
"bench_cols = [\n",
" \"CADD_raw_rankscore\",\"REVEL_rankscore\",\"AlphaMissense_rankscore\",\n",
" \"ClinPred_rankscore\",\"PrimateAI_rankscore\",\"MPC_rankscore\",\n",
" \"BayesDel_addAF_rankscore\",\"EVE_rankscore\",\"ESM1b_rankscore\"\n",
"]\n",
"\n",
"bench_results = []\n",
"\n",
"# PathoPreter first\n",
"bench_results.append((\n",
" \"PathoPreter\",\n",
" roc_auc_score(y_true, probs),\n",
" average_precision_score(y_true, probs)\n",
"))\n",
"\n",
"# DBNSFP models\n",
"for c in bench_cols:\n",
" if c in df.columns:\n",
" scores = np.nan_to_num(df[c].to_numpy(), nan=0.0)\n",
" try:\n",
" auc = roc_auc_score(y_true, scores)\n",
" pr = average_precision_score(y_true, scores)\n",
" except Exception:\n",
" auc, pr = np.nan, np.nan\n",
" bench_results.append((c.replace(\"_rankscore\",\"\"), auc, pr))\n",
"\n",
"# Safe sort (NumPy 2.0 compliant)\n",
"bench_results = sorted(\n",
" bench_results,\n",
" key=lambda x: float(x[1]) if x[1] == x[1] else 0.0,\n",
" reverse=True\n",
")\n",
"\n",
"# Print leaderboard\n",
"print(\"\\n\" + \"=\"*72)\n",
"print(\"🏆 PATHOGENICITY LEADERBOARD — Bottom 100k ClinVar\")\n",
"print(\"=\"*72)\n",
"print(f\"{'Model':20s} | {'ROC-AUC':8s} | {'PR-AUC':8s}\")\n",
"print(\"-\"*72)\n",
"for name, auc, pr in bench_results:\n",
" print(f\"{name:20s} | {auc:8.4f} | {pr:8.4f}\")\n",
"print(\"=\"*72)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|