Commit ·
9d4933d
1
Parent(s): 18bb98c
evolution: 4 critical fixes — elitism + CatBoost cap + feature penalty + n_splits=3
Browse files1. Brier-based elitism: protect top-2 by raw Brier in addition to composite
(prevents fossil loss like S15's 0.22159 config disappearing)
2. CatBoost CPU cap: n_estimators=60 + early_stopping=15 on CPU
(3-5x speedup on S10/S13 where catboost dominates)
3. Feature penalty: -0.05 per 200 features above 80 threshold
(breaks the n_feat=200 trap that causes convergence)
4. Default n_splits: 5→3 for 1.6x faster walk-forward CV
Expected: fleet 219→350 gen/hr, prevents config loss, better Brier pressure
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- evolution/genetic_loop_v3.py +1993 -0
evolution/genetic_loop_v3.py
ADDED
|
@@ -0,0 +1,1993 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
NBA Quant AI — REAL Genetic Evolution Loop v4
|
| 4 |
+
================================================
|
| 5 |
+
RUNS 24/7 on HF Space or Google Colab.
|
| 6 |
+
|
| 7 |
+
This is NOT a fake LLM wrapper. This is REAL ML:
|
| 8 |
+
- Population of 500 individuals across 5 islands (100 per island)
|
| 9 |
+
- 13 model types: tree-based + neural nets (LSTM, Transformer, TabNet, etc.)
|
| 10 |
+
- NSGA-II Pareto front ranking (multi-objective: Brier, ROI, Sharpe, Calibration)
|
| 11 |
+
- Island migration every 10 generations for diversity
|
| 12 |
+
- Adaptive mutation: 0.15 -> 0.05 decay + stagnation boost
|
| 13 |
+
- Memory management: GC between evaluations for 16GB RAM
|
| 14 |
+
- Continuous cycles — saves after each generation
|
| 15 |
+
- Callbacks to VM after each cycle
|
| 16 |
+
- Population persistence (survives restarts)
|
| 17 |
+
|
| 18 |
+
Usage:
|
| 19 |
+
# On HF Space (24/7):
|
| 20 |
+
python evolution/genetic_loop_v3.py --continuous
|
| 21 |
+
|
| 22 |
+
# On Google Colab (manual):
|
| 23 |
+
!python genetic_loop_v3.py --generations 50
|
| 24 |
+
|
| 25 |
+
# Quick test:
|
| 26 |
+
python evolution/genetic_loop_v3.py --generations 5 --pop-size 50
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
import os, sys, json, time, random, math, warnings, traceback, gc
|
| 30 |
+
import numpy as np
|
| 31 |
+
from pathlib import Path
|
| 32 |
+
from datetime import datetime, timezone, timedelta
|
| 33 |
+
from collections import defaultdict
|
| 34 |
+
from typing import Dict, List, Tuple, Optional
|
| 35 |
+
|
| 36 |
+
warnings.filterwarnings("ignore")
|
| 37 |
+
|
| 38 |
+
# All model types the GA can evolve
|
| 39 |
+
CPU_MODEL_TYPES = [
|
| 40 |
+
"xgboost", "xgboost_brier", "lightgbm", "catboost", "random_forest", "extra_trees",
|
| 41 |
+
]
|
| 42 |
+
GPU_MODEL_TYPES = CPU_MODEL_TYPES + ["tabicl", "tabpfn"]
|
| 43 |
+
ALL_MODEL_TYPES = GPU_MODEL_TYPES + [
|
| 44 |
+
"stacking", "mlp", "lstm", "transformer", "tabnet",
|
| 45 |
+
"ft_transformer", "deep_ensemble", "autogluon",
|
| 46 |
+
]
|
| 47 |
+
NEURAL_NET_TYPES = {"lstm", "transformer", "tabnet", "ft_transformer", "deep_ensemble", "mlp", "autogluon"}
|
| 48 |
+
ICL_MODEL_TYPES = {"tabicl", "tabpfn"} # In-context learning models (GPU, no hyperparams to tune)
|
| 49 |
+
|
| 50 |
+
# ── Run Logger (best-effort) ──
|
| 51 |
+
try:
|
| 52 |
+
from evolution.run_logger import RunLogger
|
| 53 |
+
_HAS_LOGGER = True
|
| 54 |
+
except ImportError:
|
| 55 |
+
try:
|
| 56 |
+
from run_logger import RunLogger
|
| 57 |
+
_HAS_LOGGER = True
|
| 58 |
+
except ImportError:
|
| 59 |
+
_HAS_LOGGER = False
|
| 60 |
+
|
| 61 |
+
# ─── Auto-load .env.local ───
|
| 62 |
+
_env_file = Path(__file__).resolve().parent.parent / ".env.local"
|
| 63 |
+
if not _env_file.exists():
|
| 64 |
+
_env_file = Path("/app/.env.local")
|
| 65 |
+
if _env_file.exists():
|
| 66 |
+
for _line in _env_file.read_text().splitlines():
|
| 67 |
+
_line = _line.strip()
|
| 68 |
+
if _line and not _line.startswith("#") and "=" in _line:
|
| 69 |
+
_line = _line.replace("export ", "")
|
| 70 |
+
_k, _, _v = _line.partition("=")
|
| 71 |
+
os.environ.setdefault(_k.strip(), _v.strip("'\""))
|
| 72 |
+
|
| 73 |
+
# ─── Paths ───
|
| 74 |
+
BASE_DIR = Path(__file__).resolve().parent.parent
|
| 75 |
+
DATA_DIR = BASE_DIR / "data"
|
| 76 |
+
HIST_DIR = DATA_DIR / "historical"
|
| 77 |
+
RESULTS_DIR = DATA_DIR / "results"
|
| 78 |
+
STATE_DIR = DATA_DIR / "evolution-state"
|
| 79 |
+
for d in [DATA_DIR, HIST_DIR, RESULTS_DIR, STATE_DIR]:
|
| 80 |
+
d.mkdir(parents=True, exist_ok=True)
|
| 81 |
+
|
| 82 |
+
VM_CALLBACK_URL = os.environ.get("VM_CALLBACK_URL", "http://34.136.180.66:8080")
|
| 83 |
+
ODDS_API_KEY = os.environ.get("ODDS_API_KEY", "")
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# ═══════════════════════════════════════════════════════════
|
| 87 |
+
# SECTION 1: DATA LOADING
|
| 88 |
+
# ═══════════════════════════════════════════════════════════
|
| 89 |
+
|
| 90 |
+
TEAM_MAP = {
|
| 91 |
+
"Atlanta Hawks": "ATL", "Boston Celtics": "BOS", "Brooklyn Nets": "BKN",
|
| 92 |
+
"Charlotte Hornets": "CHA", "Chicago Bulls": "CHI", "Cleveland Cavaliers": "CLE",
|
| 93 |
+
"Dallas Mavericks": "DAL", "Denver Nuggets": "DEN", "Detroit Pistons": "DET",
|
| 94 |
+
"Golden State Warriors": "GSW", "Houston Rockets": "HOU", "Indiana Pacers": "IND",
|
| 95 |
+
"Los Angeles Clippers": "LAC", "Los Angeles Lakers": "LAL", "Memphis Grizzlies": "MEM",
|
| 96 |
+
"Miami Heat": "MIA", "Milwaukee Bucks": "MIL", "Minnesota Timberwolves": "MIN",
|
| 97 |
+
"New Orleans Pelicans": "NOP", "New York Knicks": "NYK", "Oklahoma City Thunder": "OKC",
|
| 98 |
+
"Orlando Magic": "ORL", "Philadelphia 76ers": "PHI", "Phoenix Suns": "PHX",
|
| 99 |
+
"Portland Trail Blazers": "POR", "Sacramento Kings": "SAC", "San Antonio Spurs": "SAS",
|
| 100 |
+
"Toronto Raptors": "TOR", "Utah Jazz": "UTA", "Washington Wizards": "WAS",
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
ARENA_COORDS = {
|
| 104 |
+
"ATL": (33.757, -84.396), "BOS": (42.366, -71.062), "BKN": (40.683, -73.976),
|
| 105 |
+
"CHA": (35.225, -80.839), "CHI": (41.881, -87.674), "CLE": (41.496, -81.688),
|
| 106 |
+
"DAL": (32.790, -96.810), "DEN": (39.749, -105.008), "DET": (42.341, -83.055),
|
| 107 |
+
"GSW": (37.768, -122.388), "HOU": (29.751, -95.362), "IND": (39.764, -86.156),
|
| 108 |
+
"LAC": (34.043, -118.267), "LAL": (34.043, -118.267), "MEM": (35.138, -90.051),
|
| 109 |
+
"MIA": (25.781, -80.187), "MIL": (43.045, -87.917), "MIN": (44.980, -93.276),
|
| 110 |
+
"NOP": (29.949, -90.082), "NYK": (40.751, -73.994), "OKC": (35.463, -97.515),
|
| 111 |
+
"ORL": (28.539, -81.384), "PHI": (39.901, -75.172), "PHX": (33.446, -112.071),
|
| 112 |
+
"POR": (45.532, -122.667), "SAC": (38.580, -121.500), "SAS": (29.427, -98.438),
|
| 113 |
+
"TOR": (43.643, -79.379), "UTA": (40.768, -111.901), "WAS": (38.898, -77.021),
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
ARENA_ALTITUDE = {
|
| 117 |
+
"DEN": 5280, "UTA": 4226, "PHX": 1086, "OKC": 1201, "SAS": 650,
|
| 118 |
+
"DAL": 430, "HOU": 43, "MEM": 337, "ATL": 1050, "CHA": 751,
|
| 119 |
+
"IND": 715, "CHI": 594, "MIL": 617, "MIN": 830, "DET": 600,
|
| 120 |
+
"CLE": 653, "BOS": 141, "NYK": 33, "BKN": 33, "PHI": 39,
|
| 121 |
+
"WAS": 25, "MIA": 6, "ORL": 82, "NOP": 7, "TOR": 250,
|
| 122 |
+
"POR": 50, "SAC": 30, "GSW": 12, "LAL": 305, "LAC": 305,
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
TIMEZONE_ET = {
|
| 126 |
+
"ATL": 0, "BOS": 0, "BKN": 0, "CHA": 0, "CHI": -1, "CLE": 0,
|
| 127 |
+
"DAL": -1, "DEN": -2, "DET": 0, "GSW": -3, "HOU": -1, "IND": 0,
|
| 128 |
+
"LAC": -3, "LAL": -3, "MEM": -1, "MIA": 0, "MIL": -1, "MIN": -1,
|
| 129 |
+
"NOP": -1, "NYK": 0, "OKC": -1, "ORL": 0, "PHI": 0, "PHX": -2,
|
| 130 |
+
"POR": -3, "SAC": -3, "SAS": -1, "TOR": 0, "UTA": -2, "WAS": 0,
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
WINDOWS = [3, 5, 7, 10, 15, 20]
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def resolve(name):
|
| 137 |
+
if name in TEAM_MAP: return TEAM_MAP[name]
|
| 138 |
+
if len(name) == 3 and name.isupper(): return name
|
| 139 |
+
for full, abbr in TEAM_MAP.items():
|
| 140 |
+
if name in full: return abbr
|
| 141 |
+
return name[:3].upper() if name else None
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def haversine(lat1, lon1, lat2, lon2):
|
| 145 |
+
R = 3959
|
| 146 |
+
dlat, dlon = math.radians(lat2 - lat1), math.radians(lon2 - lon1)
|
| 147 |
+
a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
|
| 148 |
+
return R * 2 * math.asin(math.sqrt(a))
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def pull_seasons():
|
| 152 |
+
"""Pull NBA game data from nba_api, cache locally."""
|
| 153 |
+
try:
|
| 154 |
+
from nba_api.stats.endpoints import leaguegamefinder
|
| 155 |
+
except ImportError:
|
| 156 |
+
print("[DATA] nba_api not installed, using cached data only")
|
| 157 |
+
return
|
| 158 |
+
|
| 159 |
+
existing = {f.stem.replace("games-", "") for f in HIST_DIR.glob("games-*.json")}
|
| 160 |
+
targets = ["2018-19", "2019-20", "2020-21", "2021-22", "2022-23", "2023-24", "2024-25", "2025-26"]
|
| 161 |
+
missing = [s for s in targets if s not in existing]
|
| 162 |
+
if not missing:
|
| 163 |
+
print(f"[DATA] All {len(targets)} seasons cached")
|
| 164 |
+
return
|
| 165 |
+
|
| 166 |
+
for season in missing:
|
| 167 |
+
print(f"[DATA] Pulling {season}...")
|
| 168 |
+
try:
|
| 169 |
+
time.sleep(3)
|
| 170 |
+
finder = leaguegamefinder.LeagueGameFinder(
|
| 171 |
+
season_nullable=season, league_id_nullable="00",
|
| 172 |
+
season_type_nullable="Regular Season", timeout=60
|
| 173 |
+
)
|
| 174 |
+
df = finder.get_data_frames()[0]
|
| 175 |
+
if df.empty:
|
| 176 |
+
continue
|
| 177 |
+
pairs = {}
|
| 178 |
+
for _, row in df.iterrows():
|
| 179 |
+
gid = row["GAME_ID"]
|
| 180 |
+
if gid not in pairs:
|
| 181 |
+
pairs[gid] = []
|
| 182 |
+
pairs[gid].append({
|
| 183 |
+
"team_name": row.get("TEAM_NAME", ""),
|
| 184 |
+
"matchup": row.get("MATCHUP", ""),
|
| 185 |
+
"pts": int(row["PTS"]) if row.get("PTS") is not None else None,
|
| 186 |
+
"game_date": row.get("GAME_DATE", ""),
|
| 187 |
+
})
|
| 188 |
+
games = []
|
| 189 |
+
for gid, teams in pairs.items():
|
| 190 |
+
if len(teams) != 2:
|
| 191 |
+
continue
|
| 192 |
+
home = next((t for t in teams if " vs. " in str(t.get("matchup", ""))), None)
|
| 193 |
+
away = next((t for t in teams if " @ " in str(t.get("matchup", ""))), None)
|
| 194 |
+
if not home or not away or home["pts"] is None:
|
| 195 |
+
continue
|
| 196 |
+
games.append({
|
| 197 |
+
"game_date": home["game_date"],
|
| 198 |
+
"home_team": home["team_name"], "away_team": away["team_name"],
|
| 199 |
+
"home": {"team_name": home["team_name"], "pts": home["pts"]},
|
| 200 |
+
"away": {"team_name": away["team_name"], "pts": away["pts"]},
|
| 201 |
+
})
|
| 202 |
+
if games:
|
| 203 |
+
(HIST_DIR / f"games-{season}.json").write_text(json.dumps(games))
|
| 204 |
+
print(f" {len(games)} games saved")
|
| 205 |
+
except Exception as e:
|
| 206 |
+
print(f" Error pulling {season}: {e}")
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def load_all_games():
|
| 210 |
+
"""Load all cached game data."""
|
| 211 |
+
games = []
|
| 212 |
+
for f in sorted(HIST_DIR.glob("games-*.json")):
|
| 213 |
+
data = json.loads(f.read_text())
|
| 214 |
+
items = data if isinstance(data, list) else data.get("games", [])
|
| 215 |
+
games.extend(items)
|
| 216 |
+
games.sort(key=lambda g: g.get("game_date", g.get("date", "")))
|
| 217 |
+
return games
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
# ═══════════════════════════════════════════════════════════
|
| 221 |
+
# SECTION 2: FEATURE ENGINE
|
| 222 |
+
# ═══════════════════════════════════════════════════════════
|
| 223 |
+
|
| 224 |
+
FEATURE_ENGINE_VERSION = "genetic-loop-v3"
|
| 225 |
+
|
| 226 |
+
def build_features(games):
|
| 227 |
+
"""Build features from raw game data. Tries real NBAFeatureEngine first, falls back to inline."""
|
| 228 |
+
try:
|
| 229 |
+
from features.engine import NBAFeatureEngine
|
| 230 |
+
engine = NBAFeatureEngine(skip_placeholder=True)
|
| 231 |
+
X, y, feature_names = engine.build(games)
|
| 232 |
+
X = np.nan_to_num(np.array(X, dtype=np.float64), nan=0.0, posinf=1e6, neginf=-1e6)
|
| 233 |
+
y = np.array(y, dtype=np.int32)
|
| 234 |
+
print(f"[ENGINE] Real NBAFeatureEngine: {X.shape[1]} features, {len(y)} games")
|
| 235 |
+
return X, y, feature_names
|
| 236 |
+
except Exception as e:
|
| 237 |
+
print(f"[ENGINE] NBAFeatureEngine import failed ({e}), using inline fallback")
|
| 238 |
+
return _build_features_inline(games)
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def _build_features_inline(games):
|
| 242 |
+
"""Fallback: Build 250+ inline features from raw game data. Returns X, y, feature_names."""
|
| 243 |
+
team_results = defaultdict(list)
|
| 244 |
+
team_last = {}
|
| 245 |
+
team_elo = defaultdict(lambda: 1500.0)
|
| 246 |
+
X, y = [], []
|
| 247 |
+
feature_names = []
|
| 248 |
+
first = True
|
| 249 |
+
|
| 250 |
+
for game in games:
|
| 251 |
+
hr, ar = game.get("home_team", ""), game.get("away_team", "")
|
| 252 |
+
if "home" in game and isinstance(game["home"], dict):
|
| 253 |
+
h, a = game["home"], game.get("away", {})
|
| 254 |
+
hs, as_ = h.get("pts"), a.get("pts")
|
| 255 |
+
if not hr: hr = h.get("team_name", "")
|
| 256 |
+
if not ar: ar = a.get("team_name", "")
|
| 257 |
+
else:
|
| 258 |
+
hs, as_ = game.get("home_score"), game.get("away_score")
|
| 259 |
+
if hs is None or as_ is None:
|
| 260 |
+
continue
|
| 261 |
+
hs, as_ = int(hs), int(as_)
|
| 262 |
+
home, away = resolve(hr), resolve(ar)
|
| 263 |
+
if not home or not away:
|
| 264 |
+
continue
|
| 265 |
+
gd = game.get("game_date", game.get("date", ""))[:10]
|
| 266 |
+
hr_ = team_results[home]
|
| 267 |
+
ar_ = team_results[away]
|
| 268 |
+
|
| 269 |
+
if len(hr_) < 5 or len(ar_) < 5:
|
| 270 |
+
team_results[home].append((gd, hs > as_, hs - as_, away, hs, as_))
|
| 271 |
+
team_results[away].append((gd, as_ > hs, as_ - hs, home, as_, hs))
|
| 272 |
+
team_last[home] = gd
|
| 273 |
+
team_last[away] = gd
|
| 274 |
+
K = 20
|
| 275 |
+
exp_h = 1 / (1 + 10 ** ((team_elo[away] - team_elo[home] - 50) / 400))
|
| 276 |
+
team_elo[home] += K * ((1 if hs > as_ else 0) - exp_h)
|
| 277 |
+
team_elo[away] += K * ((0 if hs > as_ else 1) - (1 - exp_h))
|
| 278 |
+
continue
|
| 279 |
+
|
| 280 |
+
def wp(r, n):
|
| 281 |
+
s = r[-n:]
|
| 282 |
+
return sum(1 for x in s if x[1]) / len(s) if s else 0.5
|
| 283 |
+
|
| 284 |
+
def pd(r, n):
|
| 285 |
+
s = r[-n:]
|
| 286 |
+
return sum(x[2] for x in s) / len(s) if s else 0.0
|
| 287 |
+
|
| 288 |
+
def ppg(r, n):
|
| 289 |
+
s = r[-n:]
|
| 290 |
+
return sum(x[4] for x in s) / len(s) if s else 100.0
|
| 291 |
+
|
| 292 |
+
def papg(r, n):
|
| 293 |
+
s = r[-n:]
|
| 294 |
+
return sum(x[5] for x in s) / len(s) if s else 100.0
|
| 295 |
+
|
| 296 |
+
def strk(r):
|
| 297 |
+
if not r: return 0
|
| 298 |
+
s, l = 0, r[-1][1]
|
| 299 |
+
for x in reversed(r):
|
| 300 |
+
if x[1] == l:
|
| 301 |
+
s += 1
|
| 302 |
+
else:
|
| 303 |
+
break
|
| 304 |
+
return s if l else -s
|
| 305 |
+
|
| 306 |
+
def close_pct(r, n):
|
| 307 |
+
s = r[-n:]
|
| 308 |
+
return sum(1 for x in s if abs(x[2]) <= 5) / len(s) if s else 0.5
|
| 309 |
+
|
| 310 |
+
def blowout_pct(r, n):
|
| 311 |
+
s = r[-n:]
|
| 312 |
+
return sum(1 for x in s if abs(x[2]) >= 15) / len(s) if s else 0.0
|
| 313 |
+
|
| 314 |
+
def consistency(r, n):
|
| 315 |
+
s = r[-n:]
|
| 316 |
+
if len(s) < 3: return 0.0
|
| 317 |
+
m = [x[2] for x in s]
|
| 318 |
+
avg = sum(m) / len(m)
|
| 319 |
+
return (sum((v - avg) ** 2 for v in m) / len(m)) ** 0.5
|
| 320 |
+
|
| 321 |
+
def rest(t):
|
| 322 |
+
last = team_last.get(t)
|
| 323 |
+
if not last or not gd: return 3
|
| 324 |
+
try:
|
| 325 |
+
return max(0, (datetime.strptime(gd[:10], "%Y-%m-%d") - datetime.strptime(last[:10], "%Y-%m-%d")).days)
|
| 326 |
+
except Exception:
|
| 327 |
+
return 3
|
| 328 |
+
|
| 329 |
+
def sos(r, n=10):
|
| 330 |
+
rec = r[-n:]
|
| 331 |
+
if not rec: return 0.5
|
| 332 |
+
ops = [wp(team_results[x[3]], 82) for x in rec if team_results[x[3]]]
|
| 333 |
+
return sum(ops) / len(ops) if ops else 0.5
|
| 334 |
+
|
| 335 |
+
def travel_dist(r, team):
|
| 336 |
+
if not r: return 0
|
| 337 |
+
last_opp = r[-1][3]
|
| 338 |
+
if last_opp in ARENA_COORDS and team in ARENA_COORDS:
|
| 339 |
+
return haversine(*ARENA_COORDS[last_opp], *ARENA_COORDS[team])
|
| 340 |
+
return 0
|
| 341 |
+
|
| 342 |
+
h_rest, a_rest = rest(home), rest(away)
|
| 343 |
+
try:
|
| 344 |
+
dt = datetime.strptime(gd, "%Y-%m-%d")
|
| 345 |
+
month, dow = dt.month, dt.weekday()
|
| 346 |
+
except Exception:
|
| 347 |
+
month, dow = 1, 2
|
| 348 |
+
|
| 349 |
+
sp = max(0, min(1, (month - 10) / 7)) if month >= 10 else max(0, min(1, (month + 2) / 7))
|
| 350 |
+
|
| 351 |
+
row = []
|
| 352 |
+
names = []
|
| 353 |
+
|
| 354 |
+
# 1. ROLLING PERFORMANCE (96 features)
|
| 355 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 356 |
+
for w in WINDOWS:
|
| 357 |
+
row.extend([wp(tr, w), pd(tr, w), ppg(tr, w), papg(tr, w),
|
| 358 |
+
ppg(tr, w) - papg(tr, w), close_pct(tr, w), blowout_pct(tr, w),
|
| 359 |
+
ppg(tr, w) + papg(tr, w)])
|
| 360 |
+
if first:
|
| 361 |
+
names.extend([f"{prefix}_wp{w}", f"{prefix}_pd{w}", f"{prefix}_ppg{w}",
|
| 362 |
+
f"{prefix}_papg{w}", f"{prefix}_margin{w}", f"{prefix}_close{w}",
|
| 363 |
+
f"{prefix}_blowout{w}", f"{prefix}_ou{w}"])
|
| 364 |
+
|
| 365 |
+
# 2. MOMENTUM (16 features)
|
| 366 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 367 |
+
row.extend([strk(tr), abs(strk(tr)),
|
| 368 |
+
wp(tr, 5) - wp(tr, 82), wp(tr, 3) - wp(tr, 10),
|
| 369 |
+
ppg(tr, 5) - ppg(tr, 20), papg(tr, 5) - papg(tr, 20),
|
| 370 |
+
consistency(tr, 10), consistency(tr, 5)])
|
| 371 |
+
if first:
|
| 372 |
+
names.extend([f"{prefix}_streak", f"{prefix}_streak_abs",
|
| 373 |
+
f"{prefix}_form5v82", f"{prefix}_form3v10",
|
| 374 |
+
f"{prefix}_scoring_trend", f"{prefix}_defense_trend",
|
| 375 |
+
f"{prefix}_consistency10", f"{prefix}_consistency5"])
|
| 376 |
+
|
| 377 |
+
# 3. REST & SCHEDULE (16 features)
|
| 378 |
+
h_travel = travel_dist(hr_, home)
|
| 379 |
+
a_travel = travel_dist(ar_, away)
|
| 380 |
+
row.extend([
|
| 381 |
+
min(h_rest, 7), min(a_rest, 7), h_rest - a_rest,
|
| 382 |
+
1.0 if h_rest <= 1 else 0.0, 1.0 if a_rest <= 1 else 0.0,
|
| 383 |
+
h_travel / 1000, a_travel / 1000, (h_travel - a_travel) / 1000,
|
| 384 |
+
ARENA_ALTITUDE.get(home, 500) / 5280, ARENA_ALTITUDE.get(away, 500) / 5280,
|
| 385 |
+
(ARENA_ALTITUDE.get(home, 500) - ARENA_ALTITUDE.get(away, 500)) / 5280,
|
| 386 |
+
abs(TIMEZONE_ET.get(home, 0) - TIMEZONE_ET.get(away, 0)),
|
| 387 |
+
0, 0, 0, 0,
|
| 388 |
+
])
|
| 389 |
+
if first:
|
| 390 |
+
names.extend(["h_rest", "a_rest", "rest_adv", "h_b2b", "a_b2b",
|
| 391 |
+
"h_travel", "a_travel", "travel_adv",
|
| 392 |
+
"h_altitude", "a_altitude", "altitude_delta",
|
| 393 |
+
"tz_shift", "h_games_7d", "a_games_7d", "sched_density", "pad1"])
|
| 394 |
+
|
| 395 |
+
# 4. OPPONENT-ADJUSTED (12 features)
|
| 396 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 397 |
+
s5 = sos(tr, 5)
|
| 398 |
+
s10 = sos(tr, 10)
|
| 399 |
+
ss = sos(tr, 82)
|
| 400 |
+
wp_above = sum(1 for r in tr if wp(team_results[r[3]], 82) > 0.5 and r[1]) / max(
|
| 401 |
+
sum(1 for r in tr if wp(team_results[r[3]], 82) > 0.5), 1)
|
| 402 |
+
wp_below = sum(1 for r in tr if wp(team_results[r[3]], 82) <= 0.5 and r[1]) / max(
|
| 403 |
+
sum(1 for r in tr if wp(team_results[r[3]], 82) <= 0.5), 1)
|
| 404 |
+
row.extend([s5, s10, ss, wp_above, wp_below, 0])
|
| 405 |
+
if first:
|
| 406 |
+
names.extend([f"{prefix}_sos5", f"{prefix}_sos10", f"{prefix}_sos_season",
|
| 407 |
+
f"{prefix}_wp_above500", f"{prefix}_wp_below500", f"{prefix}_margin_quality"])
|
| 408 |
+
|
| 409 |
+
# 5. MATCHUP & ELO (12 features)
|
| 410 |
+
row.extend([
|
| 411 |
+
wp(hr_, 10) - wp(ar_, 10), pd(hr_, 10) - pd(ar_, 10),
|
| 412 |
+
ppg(hr_, 10) - papg(ar_, 10), ppg(ar_, 10) - papg(hr_, 10),
|
| 413 |
+
abs(ppg(hr_, 10) + papg(hr_, 10) - ppg(ar_, 10) - papg(ar_, 10)),
|
| 414 |
+
consistency(hr_, 10) - consistency(ar_, 10),
|
| 415 |
+
team_elo[home], team_elo[away], team_elo[home] - team_elo[away] + 50,
|
| 416 |
+
(team_elo[home] - 1500) / 100, (team_elo[away] - 1500) / 100,
|
| 417 |
+
(team_elo[home] - team_elo[away]) / 100,
|
| 418 |
+
])
|
| 419 |
+
if first:
|
| 420 |
+
names.extend(["rel_strength", "rel_pd", "off_matchup", "def_matchup",
|
| 421 |
+
"tempo_diff", "consistency_edge",
|
| 422 |
+
"elo_home", "elo_away", "elo_diff",
|
| 423 |
+
"elo_home_norm", "elo_away_norm", "elo_diff_norm"])
|
| 424 |
+
|
| 425 |
+
# 6. CONTEXT (12 features)
|
| 426 |
+
row.extend([
|
| 427 |
+
1.0, sp, math.sin(2 * math.pi * month / 12), math.cos(2 * math.pi * month / 12),
|
| 428 |
+
dow / 6.0, 1.0 if dow >= 5 else 0.0,
|
| 429 |
+
min(len(hr_), 82) / 82.0, min(len(ar_), 82) / 82.0,
|
| 430 |
+
wp(hr_, 82) + wp(ar_, 82), wp(hr_, 82) - wp(ar_, 82),
|
| 431 |
+
1.0 if wp(hr_, 82) > 0.5 and wp(ar_, 82) > 0.5 else 0.0,
|
| 432 |
+
ppg(hr_, 10) + ppg(ar_, 10),
|
| 433 |
+
])
|
| 434 |
+
if first:
|
| 435 |
+
names.extend(["home_court", "season_phase", "month_sin", "month_cos",
|
| 436 |
+
"day_of_week", "is_weekend", "h_games_pct", "a_games_pct",
|
| 437 |
+
"combined_wp", "wp_diff", "playoff_race", "expected_total"])
|
| 438 |
+
|
| 439 |
+
# 7. CROSS-WINDOW MOMENTUM (20 features) — trend acceleration
|
| 440 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 441 |
+
# Short vs long momentum (5 vs 20)
|
| 442 |
+
wp_accel = wp(tr, 3) - 2 * wp(tr, 10) + wp(tr, 20) if len(tr) >= 20 else 0.0
|
| 443 |
+
pd_accel = pd(tr, 3) - 2 * pd(tr, 10) + pd(tr, 20) if len(tr) >= 20 else 0.0
|
| 444 |
+
# Pythagorean expected win rate (Bill James)
|
| 445 |
+
pts_for = sum(x[4] for x in tr[-20:]) if len(tr) >= 5 else 100
|
| 446 |
+
pts_against = sum(x[5] for x in tr[-20:]) if len(tr) >= 5 else 100
|
| 447 |
+
pyth_exp = pts_for ** 13.91 / max(1, pts_for ** 13.91 + pts_against ** 13.91) if pts_for > 0 else 0.5
|
| 448 |
+
# Scoring volatility
|
| 449 |
+
pts_list = [x[4] for x in tr[-10:]] if len(tr) >= 5 else [100]
|
| 450 |
+
pts_vol = (sum((p - sum(pts_list)/len(pts_list))**2 for p in pts_list) / len(pts_list)) ** 0.5 if len(pts_list) > 1 else 0
|
| 451 |
+
# Home/away specific win rates
|
| 452 |
+
home_games = [x for x in tr if x[3] != home] if prefix == "h" else [x for x in tr if x[3] != away]
|
| 453 |
+
ha_wp = sum(1 for x in home_games[-20:] if x[1]) / max(len(home_games[-20:]), 1)
|
| 454 |
+
# Opponent quality of recent wins
|
| 455 |
+
recent_wins = [x for x in tr[-10:] if x[1]]
|
| 456 |
+
win_quality = sum(wp(team_results[x[3]], 82) for x in recent_wins) / max(len(recent_wins), 1) if recent_wins else 0.5
|
| 457 |
+
# Margin trend (linear slope over last 10 games)
|
| 458 |
+
margins_10 = [x[2] for x in tr[-10:]] if len(tr) >= 5 else [0]
|
| 459 |
+
if len(margins_10) >= 3:
|
| 460 |
+
x_vals = list(range(len(margins_10)))
|
| 461 |
+
x_mean = sum(x_vals) / len(x_vals)
|
| 462 |
+
y_mean = sum(margins_10) / len(margins_10)
|
| 463 |
+
num = sum((x - x_mean) * (y - y_mean) for x, y in zip(x_vals, margins_10))
|
| 464 |
+
den = sum((x - x_mean) ** 2 for x in x_vals)
|
| 465 |
+
margin_slope = num / den if den > 0 else 0.0
|
| 466 |
+
else:
|
| 467 |
+
margin_slope = 0.0
|
| 468 |
+
row.extend([
|
| 469 |
+
wp(tr, 5) - wp(tr, 20) if len(tr) >= 20 else 0.0,
|
| 470 |
+
wp_accel, pd_accel, pyth_exp,
|
| 471 |
+
pts_vol / 10.0, # normalized
|
| 472 |
+
ha_wp, win_quality,
|
| 473 |
+
margin_slope,
|
| 474 |
+
ppg(tr, 3) / max(ppg(tr, 20), 1), # recent scoring ratio
|
| 475 |
+
papg(tr, 3) / max(papg(tr, 20), 1), # recent defense ratio
|
| 476 |
+
])
|
| 477 |
+
if first:
|
| 478 |
+
names.extend([f"{prefix}_wp5v20", f"{prefix}_wp_accel", f"{prefix}_pd_accel",
|
| 479 |
+
f"{prefix}_pyth_exp", f"{prefix}_pts_vol",
|
| 480 |
+
f"{prefix}_location_wp", f"{prefix}_win_quality",
|
| 481 |
+
f"{prefix}_margin_slope", f"{prefix}_off_ratio", f"{prefix}_def_ratio"])
|
| 482 |
+
|
| 483 |
+
# 8. INTERACTION FEATURES (12 features) — key cross-terms
|
| 484 |
+
elo_d = team_elo[home] - team_elo[away] + 50
|
| 485 |
+
rest_adv = h_rest - a_rest
|
| 486 |
+
wp_d = wp(hr_, 10) - wp(ar_, 10)
|
| 487 |
+
row.extend([
|
| 488 |
+
elo_d * rest_adv / 10.0, # elo × rest interaction
|
| 489 |
+
wp_d * rest_adv / 3.0, # form × rest interaction
|
| 490 |
+
elo_d * (1 if h_rest <= 1 else 0), # elo × b2b penalty
|
| 491 |
+
wp_d ** 2, # squared wp diff (nonlinearity)
|
| 492 |
+
elo_d ** 2 / 10000.0, # squared elo diff
|
| 493 |
+
(ppg(hr_, 10) - papg(ar_, 10)) * (ppg(ar_, 10) - papg(hr_, 10)), # off×def interaction
|
| 494 |
+
consistency(hr_, 10) * consistency(ar_, 10) / 100.0, # consistency product
|
| 495 |
+
wp(hr_, 82) * wp(ar_, 82), # season quality product
|
| 496 |
+
(wp(hr_, 5) - wp(hr_, 20)) * (wp(ar_, 5) - wp(ar_, 20)), # momentum alignment
|
| 497 |
+
abs(ppg(hr_, 10) + papg(hr_, 10) - ppg(ar_, 10) - papg(ar_, 10)) * elo_d / 1000.0, # tempo×elo
|
| 498 |
+
(1.0 if wp(hr_, 82) > 0.6 else 0.0) * (1.0 if wp(ar_, 82) < 0.4 else 0.0), # mismatch flag
|
| 499 |
+
float(h_rest >= 3 and a_rest <= 1), # rest mismatch flag
|
| 500 |
+
])
|
| 501 |
+
if first:
|
| 502 |
+
names.extend(["elo_rest_interact", "form_rest_interact", "elo_b2b_penalty",
|
| 503 |
+
"wp_diff_sq", "elo_diff_sq", "off_def_interact",
|
| 504 |
+
"consistency_product", "quality_product", "momentum_align",
|
| 505 |
+
"tempo_elo_interact", "mismatch_flag", "rest_mismatch_flag"])
|
| 506 |
+
|
| 507 |
+
# 9. NEW HIGH-IMPACT FEATURES (50 features, windows [5, 10])
|
| 508 |
+
NEW_WINDOWS = [5, 10]
|
| 509 |
+
|
| 510 |
+
# Helper: home/away split win% (home team plays at home, away team plays away)
|
| 511 |
+
def home_split_wp(r, n, is_home_team):
|
| 512 |
+
"""Win% for home-only or away-only games over last n."""
|
| 513 |
+
if is_home_team:
|
| 514 |
+
# home team's results when they were the home team (opponent is different city)
|
| 515 |
+
loc_games = [x for x in r if x[3] != home][-n:]
|
| 516 |
+
else:
|
| 517 |
+
loc_games = [x for x in r if x[3] != away][-n:]
|
| 518 |
+
if not loc_games:
|
| 519 |
+
return wp(r, n) # fallback to overall
|
| 520 |
+
return sum(1 for x in loc_games if x[1]) / len(loc_games)
|
| 521 |
+
|
| 522 |
+
def away_split_wp(r, n, is_home_team):
|
| 523 |
+
"""Win% for away-only games over last n."""
|
| 524 |
+
if is_home_team:
|
| 525 |
+
loc_games = [x for x in r if x[3] == home][-n:]
|
| 526 |
+
else:
|
| 527 |
+
loc_games = [x for x in r if x[3] == away][-n:]
|
| 528 |
+
if not loc_games:
|
| 529 |
+
return wp(r, n)
|
| 530 |
+
return sum(1 for x in loc_games if x[1]) / len(loc_games)
|
| 531 |
+
|
| 532 |
+
def net_rating(r, n):
|
| 533 |
+
"""Net points per game over window (proxy for net rating)."""
|
| 534 |
+
s = r[-n:]
|
| 535 |
+
if not s:
|
| 536 |
+
return 0.0
|
| 537 |
+
return sum(x[4] - x[5] for x in s) / len(s)
|
| 538 |
+
|
| 539 |
+
def pace_proxy(r, n):
|
| 540 |
+
"""Approximate pace as total points per game (proxy when possession data absent)."""
|
| 541 |
+
s = r[-n:]
|
| 542 |
+
if not s:
|
| 543 |
+
return 200.0
|
| 544 |
+
return sum(x[4] + x[5] for x in s) / len(s)
|
| 545 |
+
|
| 546 |
+
def h2h_wp(hr, ar, n):
|
| 547 |
+
"""Head-to-head win% for home team vs this specific away team over last n meetings."""
|
| 548 |
+
meetings = [x for x in hr if x[3] == away][-n:]
|
| 549 |
+
if not meetings:
|
| 550 |
+
return 0.5
|
| 551 |
+
return sum(1 for x in meetings if x[1]) / len(meetings)
|
| 552 |
+
|
| 553 |
+
def sos_window(r, n):
|
| 554 |
+
"""Average opponent win% over last n games (Strength of Schedule)."""
|
| 555 |
+
rec = r[-n:]
|
| 556 |
+
if not rec:
|
| 557 |
+
return 0.5
|
| 558 |
+
ops = [wp(team_results[x[3]], 82) for x in rec if team_results[x[3]]]
|
| 559 |
+
return sum(ops) / len(ops) if ops else 0.5
|
| 560 |
+
|
| 561 |
+
# 9a. Net Rating (windows 5, 10) — 4 features
|
| 562 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 563 |
+
for w in NEW_WINDOWS:
|
| 564 |
+
row.append(net_rating(tr, w))
|
| 565 |
+
if first:
|
| 566 |
+
names.append(f"{prefix}_net_rating{w}")
|
| 567 |
+
|
| 568 |
+
# 9b. Pace proxy (windows 5, 10) — 4 features
|
| 569 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 570 |
+
for w in NEW_WINDOWS:
|
| 571 |
+
row.append(pace_proxy(tr, w))
|
| 572 |
+
if first:
|
| 573 |
+
names.append(f"{prefix}_pace{w}")
|
| 574 |
+
|
| 575 |
+
# 9c. Rest days (already exists as h_rest/a_rest, add explicit named vars for clarity)
|
| 576 |
+
# These are already in section 3 above; skip to avoid duplication.
|
| 577 |
+
|
| 578 |
+
# 9d. Home/Away Win% Split (windows 5, 10) — 4 features each side = 8 features
|
| 579 |
+
for w in NEW_WINDOWS:
|
| 580 |
+
row.append(home_split_wp(hr_, w, is_home_team=True)) # h home-venue wp
|
| 581 |
+
row.append(away_split_wp(ar_, w, is_home_team=False)) # a away-venue wp
|
| 582 |
+
if first:
|
| 583 |
+
names.append(f"h_home_wp{w}")
|
| 584 |
+
names.append(f"a_away_wp{w}")
|
| 585 |
+
|
| 586 |
+
# 9e. Matchup H2H record (windows 5, 10) — 2 features
|
| 587 |
+
for w in NEW_WINDOWS:
|
| 588 |
+
row.append(h2h_wp(hr_, ar_, w))
|
| 589 |
+
if first:
|
| 590 |
+
names.append(f"h_h2h_wp{w}")
|
| 591 |
+
|
| 592 |
+
# 9f. Strength of Schedule windows 5, 10 (distinct from existing sos5/sos10 in sec 4)
|
| 593 |
+
# sec 4 already has h_sos5, h_sos10 — skip to avoid duplication.
|
| 594 |
+
|
| 595 |
+
# 9g. Streak type: signed streak (positive=wins, negative=losses) — 2 features
|
| 596 |
+
# (strk() already included in section 2 as h_streak/a_streak; skip duplicate.)
|
| 597 |
+
|
| 598 |
+
# 9h. Pace × Net Rating interaction — 4 features (home + away, windows 5 and 10)
|
| 599 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 600 |
+
for w in NEW_WINDOWS:
|
| 601 |
+
p = pace_proxy(tr, w)
|
| 602 |
+
n_r = net_rating(tr, w)
|
| 603 |
+
row.append((p * n_r) / 1000.0) # scaled
|
| 604 |
+
if first:
|
| 605 |
+
names.append(f"{prefix}_pace_net_interact{w}")
|
| 606 |
+
|
| 607 |
+
# 9i. Pythagorean-adjusted net rating (per-100-possessions approximation) — 4 features
|
| 608 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 609 |
+
for w in NEW_WINDOWS:
|
| 610 |
+
s = tr[-w:]
|
| 611 |
+
if s:
|
| 612 |
+
total_pts_for = sum(x[4] for x in s)
|
| 613 |
+
total_pts_ag = sum(x[5] for x in s)
|
| 614 |
+
n_games = len(s)
|
| 615 |
+
avg_pace = (total_pts_for + total_pts_ag) / max(n_games, 1)
|
| 616 |
+
# net per 100 possessions approximation
|
| 617 |
+
net_per100 = ((total_pts_for - total_pts_ag) / max(n_games, 1)) / max(avg_pace / 100.0, 1.0)
|
| 618 |
+
else:
|
| 619 |
+
net_per100 = 0.0
|
| 620 |
+
row.append(net_per100)
|
| 621 |
+
if first:
|
| 622 |
+
names.append(f"{prefix}_net_per100_{w}")
|
| 623 |
+
|
| 624 |
+
# 9j. Recent opponent quality (win% of opponents faced) — 4 features (windows 5, 10)
|
| 625 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 626 |
+
for w in NEW_WINDOWS:
|
| 627 |
+
row.append(sos_window(tr, w))
|
| 628 |
+
if first:
|
| 629 |
+
names.append(f"{prefix}_opp_quality{w}")
|
| 630 |
+
|
| 631 |
+
# ── SECTION 10: EXPONENTIALLY-WEIGHTED MOMENTUM FEATURES (~28 features) ──
|
| 632 |
+
# EWM uses manual exponential decay (no pandas needed) for each team's history.
|
| 633 |
+
# Halflife h means the weight of a game h games ago is 0.5x the weight of the current.
|
| 634 |
+
# alpha = 1 - exp(-ln(2) / halflife) => older games decay exponentially.
|
| 635 |
+
|
| 636 |
+
def ewm_win(r, halflife):
|
| 637 |
+
"""EWM of wins (0/1) with given halflife in games."""
|
| 638 |
+
s = [x[1] for x in r]
|
| 639 |
+
if not s:
|
| 640 |
+
return 0.5
|
| 641 |
+
alpha = 1.0 - math.exp(-math.log(2) / max(halflife, 0.5))
|
| 642 |
+
val, w_sum = 0.0, 0.0
|
| 643 |
+
for i, v in enumerate(s):
|
| 644 |
+
w = (1 - alpha) ** (len(s) - 1 - i)
|
| 645 |
+
val += w * float(v)
|
| 646 |
+
w_sum += w
|
| 647 |
+
return val / w_sum if w_sum > 0 else 0.5
|
| 648 |
+
|
| 649 |
+
def ewm_pd(r, halflife):
|
| 650 |
+
"""EWM of point differentials with given halflife."""
|
| 651 |
+
s = [x[2] for x in r]
|
| 652 |
+
if not s:
|
| 653 |
+
return 0.0
|
| 654 |
+
alpha = 1.0 - math.exp(-math.log(2) / max(halflife, 0.5))
|
| 655 |
+
val, w_sum = 0.0, 0.0
|
| 656 |
+
for i, v in enumerate(s):
|
| 657 |
+
w = (1 - alpha) ** (len(s) - 1 - i)
|
| 658 |
+
val += w * v
|
| 659 |
+
w_sum += w
|
| 660 |
+
return val / w_sum if w_sum > 0 else 0.0
|
| 661 |
+
|
| 662 |
+
def ewm_ppg(r, halflife):
|
| 663 |
+
"""EWM of points scored per game."""
|
| 664 |
+
s = [x[4] for x in r]
|
| 665 |
+
if not s:
|
| 666 |
+
return 100.0
|
| 667 |
+
alpha = 1.0 - math.exp(-math.log(2) / max(halflife, 0.5))
|
| 668 |
+
val, w_sum = 0.0, 0.0
|
| 669 |
+
for i, v in enumerate(s):
|
| 670 |
+
w = (1 - alpha) ** (len(s) - 1 - i)
|
| 671 |
+
val += w * v
|
| 672 |
+
w_sum += w
|
| 673 |
+
return val / w_sum if w_sum > 0 else 100.0
|
| 674 |
+
|
| 675 |
+
def ewm_papg(r, halflife):
|
| 676 |
+
"""EWM of opponent points per game (defensive rating proxy)."""
|
| 677 |
+
s = [x[5] for x in r]
|
| 678 |
+
if not s:
|
| 679 |
+
return 100.0
|
| 680 |
+
alpha = 1.0 - math.exp(-math.log(2) / max(halflife, 0.5))
|
| 681 |
+
val, w_sum = 0.0, 0.0
|
| 682 |
+
for i, v in enumerate(s):
|
| 683 |
+
w = (1 - alpha) ** (len(s) - 1 - i)
|
| 684 |
+
val += w * v
|
| 685 |
+
w_sum += w
|
| 686 |
+
return val / w_sum if w_sum > 0 else 100.0
|
| 687 |
+
|
| 688 |
+
def streak_decay_score(r):
|
| 689 |
+
"""current_streak x (1 / (1 + games_since_last_loss)).
|
| 690 |
+
Captures both streak length and recency of last loss."""
|
| 691 |
+
if not r:
|
| 692 |
+
return 0.0
|
| 693 |
+
cur_streak = 0
|
| 694 |
+
last_result = r[-1][1]
|
| 695 |
+
for x in reversed(r):
|
| 696 |
+
if x[1] == last_result:
|
| 697 |
+
cur_streak += 1
|
| 698 |
+
else:
|
| 699 |
+
break
|
| 700 |
+
if not last_result:
|
| 701 |
+
return -float(cur_streak) # losing streak: negative
|
| 702 |
+
# Count consecutive games back since last loss (= win streak length)
|
| 703 |
+
games_since_loss = 0
|
| 704 |
+
for x in reversed(r):
|
| 705 |
+
if not x[1]:
|
| 706 |
+
break
|
| 707 |
+
games_since_loss += 1
|
| 708 |
+
return cur_streak * (1.0 / (1 + games_since_loss))
|
| 709 |
+
|
| 710 |
+
def fatigue_index(r, n=5):
|
| 711 |
+
"""Sum of (1/rest_days) for last n inter-game gaps — high = compressed schedule."""
|
| 712 |
+
recent = r[-n:]
|
| 713 |
+
if len(recent) < 2:
|
| 714 |
+
return 0.0
|
| 715 |
+
total = 0.0
|
| 716 |
+
for i in range(1, len(recent)):
|
| 717 |
+
try:
|
| 718 |
+
d1 = datetime.strptime(recent[i - 1][0][:10], "%Y-%m-%d")
|
| 719 |
+
d2 = datetime.strptime(recent[i][0][:10], "%Y-%m-%d")
|
| 720 |
+
gap = max(1, abs((d2 - d1).days))
|
| 721 |
+
total += 1.0 / gap
|
| 722 |
+
except Exception:
|
| 723 |
+
total += 0.5 # fallback: assume 2-day gap
|
| 724 |
+
return total
|
| 725 |
+
|
| 726 |
+
def b2b_delta(r, metric_idx=2):
|
| 727 |
+
"""B2B performance delta: avg metric in B2B games minus avg in normal-rest games.
|
| 728 |
+
B2B = previous game was <= 1 day ago."""
|
| 729 |
+
b2b_vals, normal_vals = [], []
|
| 730 |
+
for i in range(1, len(r)):
|
| 731 |
+
try:
|
| 732 |
+
d1 = datetime.strptime(r[i - 1][0][:10], "%Y-%m-%d")
|
| 733 |
+
d2 = datetime.strptime(r[i][0][:10], "%Y-%m-%d")
|
| 734 |
+
gap = abs((d2 - d1).days)
|
| 735 |
+
except Exception:
|
| 736 |
+
gap = 2
|
| 737 |
+
val = r[i][metric_idx]
|
| 738 |
+
if gap <= 1:
|
| 739 |
+
b2b_vals.append(val)
|
| 740 |
+
else:
|
| 741 |
+
normal_vals.append(val)
|
| 742 |
+
b2b_avg = sum(b2b_vals) / len(b2b_vals) if b2b_vals else 0.0
|
| 743 |
+
normal_avg = sum(normal_vals) / len(normal_vals) if normal_vals else 0.0
|
| 744 |
+
return b2b_avg - normal_avg
|
| 745 |
+
|
| 746 |
+
def travel_burden(r, n=7):
|
| 747 |
+
"""Count unique opponents (proxy for unique cities visited) in last n games.
|
| 748 |
+
More unique opponents correlates with more travel across the schedule."""
|
| 749 |
+
recent = r[-n:]
|
| 750 |
+
if not recent:
|
| 751 |
+
return 0
|
| 752 |
+
return len({x[3] for x in recent})
|
| 753 |
+
|
| 754 |
+
# 10a. EWM Win Probability — halflives [3, 5, 10] x 2 teams = 6 features
|
| 755 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 756 |
+
for hl in [3, 5, 10]:
|
| 757 |
+
row.append(ewm_win(tr, hl))
|
| 758 |
+
if first:
|
| 759 |
+
names.append(f"{prefix}_ewm_win_hl{hl}")
|
| 760 |
+
|
| 761 |
+
# 10b. EWM Point Differential — halflives [3, 5, 10] x 2 teams = 6 features
|
| 762 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 763 |
+
for hl in [3, 5, 10]:
|
| 764 |
+
row.append(ewm_pd(tr, hl) / 10.0) # normalize: typical margins ~0–20 pts
|
| 765 |
+
if first:
|
| 766 |
+
names.append(f"{prefix}_ewm_pd_hl{hl}")
|
| 767 |
+
|
| 768 |
+
# 10c. EWM Offensive Rating (halflife=5) — 2 features
|
| 769 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 770 |
+
row.append(ewm_ppg(tr, 5) / 100.0) # normalize to ~1.0 range
|
| 771 |
+
if first:
|
| 772 |
+
names.append(f"{prefix}_ewm_off_hl5")
|
| 773 |
+
|
| 774 |
+
# 10d. EWM Defensive Rating (halflife=5) — 2 features
|
| 775 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 776 |
+
row.append(ewm_papg(tr, 5) / 100.0)
|
| 777 |
+
if first:
|
| 778 |
+
names.append(f"{prefix}_ewm_def_hl5")
|
| 779 |
+
|
| 780 |
+
# 10e. Streak Decay Score — 2 features
|
| 781 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 782 |
+
row.append(streak_decay_score(tr))
|
| 783 |
+
if first:
|
| 784 |
+
names.append(f"{prefix}_streak_decay")
|
| 785 |
+
|
| 786 |
+
# 10f. Fatigue Index (last 5 games) — 2 features
|
| 787 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 788 |
+
row.append(fatigue_index(tr, n=5))
|
| 789 |
+
if first:
|
| 790 |
+
names.append(f"{prefix}_fatigue_idx")
|
| 791 |
+
|
| 792 |
+
# 10g. B2B Performance Delta (point margin) — 2 features
|
| 793 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 794 |
+
row.append(b2b_delta(tr, metric_idx=2) / 10.0) # normalized margin delta
|
| 795 |
+
if first:
|
| 796 |
+
names.append(f"{prefix}_b2b_margin_delta")
|
| 797 |
+
|
| 798 |
+
# 10h. Travel Burden (unique cities proxy over last 7 games) — 2 features
|
| 799 |
+
for prefix, tr in [("h", hr_), ("a", ar_)]:
|
| 800 |
+
row.append(float(travel_burden(tr, n=7)) / 7.0) # normalize to [0, 1]
|
| 801 |
+
if first:
|
| 802 |
+
names.append(f"{prefix}_travel_burden7")
|
| 803 |
+
|
| 804 |
+
# 10i. Cross-team EWM interaction features — 4 features
|
| 805 |
+
row.append(ewm_win(hr_, 3) - ewm_win(ar_, 3)) # home vs away momentum (hl=3)
|
| 806 |
+
row.append(ewm_win(hr_, 5) - ewm_win(ar_, 5)) # home vs away momentum (hl=5)
|
| 807 |
+
row.append((ewm_pd(hr_, 5) - ewm_pd(ar_, 5)) / 10.0) # relative margin quality (hl=5)
|
| 808 |
+
row.append((ewm_ppg(hr_, 5) - ewm_papg(ar_, 5)) / 100.0) # home offense vs away defense
|
| 809 |
+
if first:
|
| 810 |
+
names.extend(["ewm_win_diff_hl3", "ewm_win_diff_hl5",
|
| 811 |
+
"ewm_pd_diff_hl5", "ewm_off_vs_def_hl5"])
|
| 812 |
+
|
| 813 |
+
X.append(row)
|
| 814 |
+
y.append(1 if hs > as_ else 0)
|
| 815 |
+
if first:
|
| 816 |
+
feature_names = names
|
| 817 |
+
first = False
|
| 818 |
+
|
| 819 |
+
team_results[home].append((gd, hs > as_, hs - as_, away, hs, as_))
|
| 820 |
+
team_results[away].append((gd, as_ > hs, as_ - hs, home, as_, hs))
|
| 821 |
+
team_last[home] = gd
|
| 822 |
+
team_last[away] = gd
|
| 823 |
+
K = 20
|
| 824 |
+
exp_h = 1 / (1 + 10 ** ((team_elo[away] - team_elo[home] - 50) / 400))
|
| 825 |
+
team_elo[home] += K * ((1 if hs > as_ else 0) - exp_h)
|
| 826 |
+
team_elo[away] += K * ((0 if hs > as_ else 1) - (1 - exp_h))
|
| 827 |
+
|
| 828 |
+
X = np.nan_to_num(np.array(X, dtype=np.float64))
|
| 829 |
+
y = np.array(y, dtype=np.int32)
|
| 830 |
+
return X, y, feature_names
|
| 831 |
+
|
| 832 |
+
|
| 833 |
+
# ═══════════════════════════════════════════════════════════
|
| 834 |
+
# SECTION 3: INDIVIDUAL (feature mask + hyperparameters)
|
| 835 |
+
# ═══════════════════════════════════════════════════════════
|
| 836 |
+
|
| 837 |
+
class Individual:
|
| 838 |
+
"""One model configuration: feature selection mask + hyperparameters."""
|
| 839 |
+
|
| 840 |
+
def __init__(self, n_features, target=100, model_type=None):
|
| 841 |
+
prob = target / max(n_features, 1)
|
| 842 |
+
self.features = [1 if random.random() < prob else 0 for _ in range(n_features)]
|
| 843 |
+
self.hyperparams = {
|
| 844 |
+
"n_estimators": random.randint(100, 600),
|
| 845 |
+
"max_depth": random.randint(3, 10),
|
| 846 |
+
"learning_rate": 10 ** random.uniform(-2.5, -0.5),
|
| 847 |
+
"subsample": random.uniform(0.5, 1.0),
|
| 848 |
+
"colsample_bytree": random.uniform(0.3, 1.0),
|
| 849 |
+
"min_child_weight": random.randint(1, 15),
|
| 850 |
+
"reg_alpha": 10 ** random.uniform(-6, 1),
|
| 851 |
+
"reg_lambda": 10 ** random.uniform(-6, 1),
|
| 852 |
+
"model_type": model_type or random.choice(GPU_MODEL_TYPES),
|
| 853 |
+
"calibration": random.choice(["isotonic", "sigmoid", "none"]),
|
| 854 |
+
# Neural net hyperparams
|
| 855 |
+
"nn_hidden_dims": random.choice([64, 128, 256]),
|
| 856 |
+
"nn_n_layers": random.randint(2, 4),
|
| 857 |
+
"nn_dropout": random.uniform(0.1, 0.5),
|
| 858 |
+
"nn_epochs": random.randint(20, 100),
|
| 859 |
+
"nn_batch_size": random.choice([32, 64, 128]),
|
| 860 |
+
}
|
| 861 |
+
self.fitness = {"brier": 1.0, "roi": 0.0, "sharpe": 0.0, "calibration": 1.0, "calibration_error": 1.0, "composite": 0.0}
|
| 862 |
+
self.pareto_rank = 999
|
| 863 |
+
self.crowding_dist = 0.0
|
| 864 |
+
self.island_id = -1
|
| 865 |
+
self.generation = 0
|
| 866 |
+
self.birth_generation = 0
|
| 867 |
+
self._enforce_feature_cap()
|
| 868 |
+
|
| 869 |
+
def selected_indices(self):
|
| 870 |
+
return [i for i, b in enumerate(self.features) if b]
|
| 871 |
+
|
| 872 |
+
def to_dict(self):
|
| 873 |
+
return {
|
| 874 |
+
"n_features": self.n_features,
|
| 875 |
+
"hyperparams": {k: v for k, v in self.hyperparams.items()},
|
| 876 |
+
"fitness": dict(self.fitness),
|
| 877 |
+
"generation": self.generation,
|
| 878 |
+
}
|
| 879 |
+
|
| 880 |
+
@staticmethod
|
| 881 |
+
def _hamming_distance(f1, f2):
|
| 882 |
+
"""Normalized Hamming distance between two binary feature masks (0.0 – 1.0)."""
|
| 883 |
+
n = len(f1)
|
| 884 |
+
if n == 0:
|
| 885 |
+
return 0.0
|
| 886 |
+
return sum(a != b for a, b in zip(f1, f2)) / n
|
| 887 |
+
|
| 888 |
+
@staticmethod
|
| 889 |
+
def crossover(p1, p2):
|
| 890 |
+
"""Crossover on features + blend hyperparams.
|
| 891 |
+
|
| 892 |
+
Crossover type is selected based on parent similarity:
|
| 893 |
+
- Parents very similar (Hamming < 0.1): uniform crossover.
|
| 894 |
+
Picks each bit independently, generating more variation between
|
| 895 |
+
nearly-identical individuals.
|
| 896 |
+
- Otherwise: classic two-point crossover.
|
| 897 |
+
"""
|
| 898 |
+
child = Individual.__new__(Individual)
|
| 899 |
+
n = len(p1.features)
|
| 900 |
+
parent_hamming = Individual._hamming_distance(p1.features, p2.features)
|
| 901 |
+
if parent_hamming < 0.1:
|
| 902 |
+
# Uniform crossover: each position drawn independently
|
| 903 |
+
child.features = [
|
| 904 |
+
p1.features[i] if random.random() < 0.5 else p2.features[i]
|
| 905 |
+
for i in range(n)
|
| 906 |
+
]
|
| 907 |
+
else:
|
| 908 |
+
pt1 = random.randint(0, n - 1)
|
| 909 |
+
pt2 = random.randint(pt1, n - 1)
|
| 910 |
+
child.features = p1.features[:pt1] + p2.features[pt1:pt2] + p1.features[pt2:]
|
| 911 |
+
|
| 912 |
+
child.hyperparams = {}
|
| 913 |
+
for key in p1.hyperparams:
|
| 914 |
+
if isinstance(p1.hyperparams[key], (int, float)):
|
| 915 |
+
w = random.random()
|
| 916 |
+
val = w * p1.hyperparams[key] + (1 - w) * p2.hyperparams[key]
|
| 917 |
+
if isinstance(p1.hyperparams[key], int):
|
| 918 |
+
val = int(round(val))
|
| 919 |
+
child.hyperparams[key] = val
|
| 920 |
+
else:
|
| 921 |
+
child.hyperparams[key] = random.choice([p1.hyperparams[key], p2.hyperparams[key]])
|
| 922 |
+
|
| 923 |
+
child.fitness = {"brier": 1.0, "roi": 0.0, "sharpe": 0.0, "calibration": 1.0, "calibration_error": 1.0, "composite": 0.0}
|
| 924 |
+
child.generation = max(p1.generation, p2.generation) + 1
|
| 925 |
+
child.birth_generation = child.generation
|
| 926 |
+
child.pareto_rank = 999
|
| 927 |
+
child.crowding_dist = 0.0
|
| 928 |
+
child.island_id = -1
|
| 929 |
+
child._enforce_feature_cap()
|
| 930 |
+
return child
|
| 931 |
+
|
| 932 |
+
MAX_FEATURES = 200 # Hard cap — individuals above this waste compute
|
| 933 |
+
|
| 934 |
+
def _enforce_feature_cap(self):
|
| 935 |
+
"""If feature count exceeds MAX_FEATURES, randomly drop excess features."""
|
| 936 |
+
selected = [i for i, b in enumerate(self.features) if b]
|
| 937 |
+
if len(selected) > self.MAX_FEATURES:
|
| 938 |
+
to_drop = random.sample(selected, len(selected) - self.MAX_FEATURES)
|
| 939 |
+
for idx in to_drop:
|
| 940 |
+
self.features[idx] = 0
|
| 941 |
+
self.n_features = sum(self.features)
|
| 942 |
+
|
| 943 |
+
def mutate(self, rate=0.03):
|
| 944 |
+
"""Mutate features and hyperparameters."""
|
| 945 |
+
for i in range(len(self.features)):
|
| 946 |
+
if random.random() < rate:
|
| 947 |
+
self.features[i] = 1 - self.features[i]
|
| 948 |
+
self._enforce_feature_cap()
|
| 949 |
+
if random.random() < 0.15:
|
| 950 |
+
self.hyperparams["n_estimators"] = max(50, self.hyperparams["n_estimators"] + random.randint(-100, 100))
|
| 951 |
+
if random.random() < 0.15:
|
| 952 |
+
self.hyperparams["max_depth"] = max(2, min(12, self.hyperparams["max_depth"] + random.randint(-2, 2)))
|
| 953 |
+
if random.random() < 0.15:
|
| 954 |
+
self.hyperparams["learning_rate"] *= 10 ** random.uniform(-0.3, 0.3)
|
| 955 |
+
self.hyperparams["learning_rate"] = max(0.001, min(0.5, self.hyperparams["learning_rate"]))
|
| 956 |
+
if random.random() < 0.08:
|
| 957 |
+
self.hyperparams["model_type"] = random.choice(GPU_MODEL_TYPES)
|
| 958 |
+
if random.random() < 0.05:
|
| 959 |
+
self.hyperparams["calibration"] = random.choice(["isotonic", "sigmoid", "none"])
|
| 960 |
+
# Neural net hyperparams
|
| 961 |
+
if random.random() < 0.10:
|
| 962 |
+
self.hyperparams["nn_hidden_dims"] = random.choice([64, 128, 256, 512])
|
| 963 |
+
if random.random() < 0.10:
|
| 964 |
+
self.hyperparams["nn_n_layers"] = max(1, min(6, self.hyperparams.get("nn_n_layers", 2) + random.randint(-1, 1)))
|
| 965 |
+
if random.random() < 0.10:
|
| 966 |
+
self.hyperparams["nn_dropout"] = max(0.0, min(0.7, self.hyperparams.get("nn_dropout", 0.3) + random.uniform(-0.1, 0.1)))
|
| 967 |
+
|
| 968 |
+
|
| 969 |
+
# ═══════════════════════════════════════════════════════════
|
| 970 |
+
# SECTION 4: FITNESS EVALUATION (multi-objective)
|
| 971 |
+
# ═══════════════════════════════════════════════════════════
|
| 972 |
+
|
| 973 |
+
def evaluate_individual(ind, X, y, n_splits=5, use_gpu=False, _eval_counter=[0]):
|
| 974 |
+
"""
|
| 975 |
+
Evaluate one individual via walk-forward backtest.
|
| 976 |
+
Multi-objective: Brier + ROI + Sharpe + Calibration.
|
| 977 |
+
Includes memory management for 16GB RAM with 500 individuals.
|
| 978 |
+
|
| 979 |
+
Post-hoc Platt Scaling (added 2026-03-21):
|
| 980 |
+
Each train fold is split 80/20 into train_proper + calibration_set.
|
| 981 |
+
A LogisticRegression is fitted on (raw_probs_cal, y_cal) and used to
|
| 982 |
+
transform test probabilities → calibrated probabilities before computing
|
| 983 |
+
all downstream metrics (Brier, ROI, ECE). This removes systematic
|
| 984 |
+
over/under-confidence from tree-based models without touching cv=3 inner
|
| 985 |
+
calibration, giving an expected Brier improvement of -0.008 to -0.015.
|
| 986 |
+
"""
|
| 987 |
+
_eval_counter[0] += 1
|
| 988 |
+
if _eval_counter[0] % 10 == 0:
|
| 989 |
+
gc.collect()
|
| 990 |
+
from sklearn.model_selection import TimeSeriesSplit
|
| 991 |
+
from sklearn.metrics import brier_score_loss
|
| 992 |
+
from sklearn.calibration import CalibratedClassifierCV
|
| 993 |
+
from sklearn.linear_model import LogisticRegression
|
| 994 |
+
|
| 995 |
+
selected = ind.selected_indices()
|
| 996 |
+
if len(selected) < 15 or len(selected) > Individual.MAX_FEATURES:
|
| 997 |
+
ind.fitness = {"brier": 0.30, "roi": -0.10, "sharpe": -1.0, "calibration": 0.15, "calibration_error": 0.15, "composite": -1.0}
|
| 998 |
+
return
|
| 999 |
+
|
| 1000 |
+
X_sub = X[:, selected]
|
| 1001 |
+
X_sub = np.nan_to_num(X_sub, nan=0.0, posinf=1e6, neginf=-1e6)
|
| 1002 |
+
tscv = TimeSeriesSplit(n_splits=n_splits)
|
| 1003 |
+
hp = ind.hyperparams
|
| 1004 |
+
|
| 1005 |
+
model = _build_model(hp, use_gpu)
|
| 1006 |
+
if model is None:
|
| 1007 |
+
ind.fitness["composite"] = -1.0
|
| 1008 |
+
return
|
| 1009 |
+
|
| 1010 |
+
is_icl = hp["model_type"] in ICL_MODEL_TYPES
|
| 1011 |
+
briers, rois, all_probs, all_y = [], [], [], []
|
| 1012 |
+
|
| 1013 |
+
for ti, vi in tscv.split(X_sub):
|
| 1014 |
+
try:
|
| 1015 |
+
# ── ICL models (TabICLv2, TabPFN): no clone via get_params, no calibration wrapper ──
|
| 1016 |
+
if is_icl:
|
| 1017 |
+
m = _build_model(hp, use_gpu)
|
| 1018 |
+
m.fit(X_sub[ti], y[ti])
|
| 1019 |
+
probs = m.predict_proba(X_sub[vi])[:, 1]
|
| 1020 |
+
else:
|
| 1021 |
+
# ── Platt Scaling: split train fold 80/20 → proper + calibration ──
|
| 1022 |
+
cal_split = max(1, int(len(ti) * 0.20))
|
| 1023 |
+
ti_proper = ti[:-cal_split]
|
| 1024 |
+
ti_cal = ti[-cal_split:]
|
| 1025 |
+
|
| 1026 |
+
m = type(model)(**model.get_params())
|
| 1027 |
+
if hp["calibration"] != "none":
|
| 1028 |
+
m = CalibratedClassifierCV(m, method=hp["calibration"], cv=3)
|
| 1029 |
+
|
| 1030 |
+
m.fit(X_sub[ti_proper], y[ti_proper])
|
| 1031 |
+
|
| 1032 |
+
raw_cal = m.predict_proba(X_sub[ti_cal])[:, 1].reshape(-1, 1)
|
| 1033 |
+
y_cal = y[ti_cal]
|
| 1034 |
+
|
| 1035 |
+
platt = LogisticRegression(C=1.0, solver="lbfgs", max_iter=200, random_state=42)
|
| 1036 |
+
platt.fit(raw_cal, y_cal)
|
| 1037 |
+
|
| 1038 |
+
raw_test = m.predict_proba(X_sub[vi])[:, 1].reshape(-1, 1)
|
| 1039 |
+
probs = platt.predict_proba(raw_test)[:, 1]
|
| 1040 |
+
|
| 1041 |
+
briers.append(brier_score_loss(y[vi], probs))
|
| 1042 |
+
rois.append(_simulate_betting(probs, y[vi]))
|
| 1043 |
+
all_probs.extend(probs)
|
| 1044 |
+
all_y.extend(y[vi])
|
| 1045 |
+
except Exception:
|
| 1046 |
+
briers.append(0.28)
|
| 1047 |
+
rois.append(-0.05)
|
| 1048 |
+
|
| 1049 |
+
avg_brier = np.mean(briers)
|
| 1050 |
+
avg_roi = np.mean(rois)
|
| 1051 |
+
sharpe = np.mean(rois) / max(np.std(rois), 0.01) if len(rois) > 1 else 0.0
|
| 1052 |
+
cal_err = _calibration_error(np.array(all_probs), np.array(all_y)) if all_probs else 0.15
|
| 1053 |
+
|
| 1054 |
+
# Multi-objective composite fitness (higher = better)
|
| 1055 |
+
# Feature penalty: penalize bloated individuals (n_features > 80)
|
| 1056 |
+
n_feat = ind.n_features
|
| 1057 |
+
feat_penalty = max(0, (n_feat - 80) / 200) * 0.05 # up to -0.03 for 200 features
|
| 1058 |
+
|
| 1059 |
+
composite = (
|
| 1060 |
+
0.40 * (1 - avg_brier) + # Brier: lower is better
|
| 1061 |
+
0.25 * max(0, avg_roi) + # ROI: higher is better
|
| 1062 |
+
0.20 * max(0, sharpe / 3) + # Sharpe: higher is better
|
| 1063 |
+
0.15 * (1 - cal_err) # Calibration: lower is better
|
| 1064 |
+
- feat_penalty # Parsimony pressure for n_features > 80
|
| 1065 |
+
)
|
| 1066 |
+
|
| 1067 |
+
ind.fitness = {
|
| 1068 |
+
"brier": round(avg_brier, 5),
|
| 1069 |
+
"roi": round(avg_roi, 4),
|
| 1070 |
+
"sharpe": round(sharpe, 4),
|
| 1071 |
+
"calibration": round(cal_err, 4),
|
| 1072 |
+
"calibration_error": round(cal_err, 4), # ECE with 10 bins, on calibrated probs
|
| 1073 |
+
"composite": round(composite, 5),
|
| 1074 |
+
}
|
| 1075 |
+
|
| 1076 |
+
|
| 1077 |
+
def _build_model(hp, use_gpu=False):
|
| 1078 |
+
"""Build ML model from hyperparameters."""
|
| 1079 |
+
mt = hp["model_type"]
|
| 1080 |
+
try:
|
| 1081 |
+
if mt == "xgboost":
|
| 1082 |
+
import xgboost as xgb
|
| 1083 |
+
params = {
|
| 1084 |
+
"n_estimators": hp["n_estimators"],
|
| 1085 |
+
"max_depth": hp["max_depth"],
|
| 1086 |
+
"learning_rate": hp["learning_rate"],
|
| 1087 |
+
"subsample": hp["subsample"],
|
| 1088 |
+
"colsample_bytree": hp["colsample_bytree"],
|
| 1089 |
+
"min_child_weight": hp["min_child_weight"],
|
| 1090 |
+
"reg_alpha": hp["reg_alpha"],
|
| 1091 |
+
"reg_lambda": hp["reg_lambda"],
|
| 1092 |
+
"eval_metric": "logloss",
|
| 1093 |
+
"random_state": 42,
|
| 1094 |
+
"n_jobs": -1,
|
| 1095 |
+
"tree_method": "hist",
|
| 1096 |
+
}
|
| 1097 |
+
if use_gpu:
|
| 1098 |
+
params["device"] = "cuda"
|
| 1099 |
+
return xgb.XGBClassifier(**params)
|
| 1100 |
+
elif mt == "lightgbm":
|
| 1101 |
+
import lightgbm as lgbm
|
| 1102 |
+
return lgbm.LGBMClassifier(
|
| 1103 |
+
n_estimators=hp["n_estimators"],
|
| 1104 |
+
max_depth=hp["max_depth"],
|
| 1105 |
+
learning_rate=hp["learning_rate"],
|
| 1106 |
+
subsample=hp["subsample"],
|
| 1107 |
+
num_leaves=min(2 ** hp["max_depth"] - 1, 127),
|
| 1108 |
+
reg_alpha=hp["reg_alpha"],
|
| 1109 |
+
reg_lambda=hp["reg_lambda"],
|
| 1110 |
+
verbose=-1, random_state=42, n_jobs=-1,
|
| 1111 |
+
)
|
| 1112 |
+
elif mt == "catboost":
|
| 1113 |
+
from catboost import CatBoostClassifier
|
| 1114 |
+
# CPU speed fix: cap iterations to 60 on CPU (catboost is 3-5x slower than lightgbm)
|
| 1115 |
+
_cat_iters = hp["n_estimators"]
|
| 1116 |
+
if not use_gpu:
|
| 1117 |
+
_cat_iters = min(_cat_iters, 60)
|
| 1118 |
+
_cat_params = dict(
|
| 1119 |
+
iterations=_cat_iters,
|
| 1120 |
+
depth=min(hp["max_depth"], 10),
|
| 1121 |
+
learning_rate=hp["learning_rate"],
|
| 1122 |
+
l2_leaf_reg=hp["reg_lambda"],
|
| 1123 |
+
verbose=0, random_state=42,
|
| 1124 |
+
)
|
| 1125 |
+
if not use_gpu:
|
| 1126 |
+
_cat_params["early_stopping_rounds"] = 15
|
| 1127 |
+
return CatBoostClassifier(**_cat_params)
|
| 1128 |
+
elif mt == "random_forest":
|
| 1129 |
+
from sklearn.ensemble import RandomForestClassifier
|
| 1130 |
+
return RandomForestClassifier(
|
| 1131 |
+
n_estimators=hp["n_estimators"],
|
| 1132 |
+
max_depth=hp["max_depth"],
|
| 1133 |
+
min_samples_leaf=max(1, hp["min_child_weight"]),
|
| 1134 |
+
random_state=42, n_jobs=-1,
|
| 1135 |
+
)
|
| 1136 |
+
elif mt == "extra_trees":
|
| 1137 |
+
from sklearn.ensemble import ExtraTreesClassifier
|
| 1138 |
+
return ExtraTreesClassifier(
|
| 1139 |
+
n_estimators=hp["n_estimators"],
|
| 1140 |
+
max_depth=hp["max_depth"],
|
| 1141 |
+
min_samples_leaf=max(1, hp["min_child_weight"]),
|
| 1142 |
+
random_state=42, n_jobs=-1,
|
| 1143 |
+
)
|
| 1144 |
+
elif mt == "xgboost_brier":
|
| 1145 |
+
import xgboost as xgb
|
| 1146 |
+
def _brier_objective(y_true, y_pred):
|
| 1147 |
+
grad = 2.0 * (y_pred - y_true)
|
| 1148 |
+
hess = np.full_like(grad, 2.0)
|
| 1149 |
+
return grad, hess
|
| 1150 |
+
params = {
|
| 1151 |
+
"n_estimators": hp["n_estimators"],
|
| 1152 |
+
"max_depth": hp["max_depth"],
|
| 1153 |
+
"learning_rate": hp["learning_rate"],
|
| 1154 |
+
"subsample": hp["subsample"],
|
| 1155 |
+
"colsample_bytree": hp["colsample_bytree"],
|
| 1156 |
+
"min_child_weight": hp["min_child_weight"],
|
| 1157 |
+
"reg_alpha": hp["reg_alpha"],
|
| 1158 |
+
"reg_lambda": hp["reg_lambda"],
|
| 1159 |
+
"objective": _brier_objective,
|
| 1160 |
+
"random_state": 42,
|
| 1161 |
+
"n_jobs": -1,
|
| 1162 |
+
"tree_method": "hist",
|
| 1163 |
+
}
|
| 1164 |
+
if use_gpu:
|
| 1165 |
+
params["device"] = "cuda"
|
| 1166 |
+
return xgb.XGBClassifier(**params)
|
| 1167 |
+
elif mt == "tabicl":
|
| 1168 |
+
from tabicl import TabICLClassifier
|
| 1169 |
+
return TabICLClassifier()
|
| 1170 |
+
elif mt == "tabpfn":
|
| 1171 |
+
from tabpfn import TabPFNClassifier
|
| 1172 |
+
return TabPFNClassifier(device="cuda" if use_gpu else "cpu")
|
| 1173 |
+
elif mt == "stacking":
|
| 1174 |
+
from sklearn.ensemble import StackingClassifier, RandomForestClassifier, GradientBoostingClassifier
|
| 1175 |
+
from sklearn.linear_model import LogisticRegression
|
| 1176 |
+
estimators = [
|
| 1177 |
+
("rf", RandomForestClassifier(n_estimators=100, max_depth=hp["max_depth"], random_state=42, n_jobs=-1)),
|
| 1178 |
+
("gb", GradientBoostingClassifier(n_estimators=100, max_depth=min(hp["max_depth"], 6), learning_rate=hp["learning_rate"], random_state=42)),
|
| 1179 |
+
]
|
| 1180 |
+
try:
|
| 1181 |
+
import xgboost as xgb
|
| 1182 |
+
estimators.append(("xgb", xgb.XGBClassifier(n_estimators=100, max_depth=hp["max_depth"], learning_rate=hp["learning_rate"], eval_metric="logloss", random_state=42, n_jobs=-1)))
|
| 1183 |
+
except ImportError:
|
| 1184 |
+
pass
|
| 1185 |
+
return StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(max_iter=500), cv=3, n_jobs=-1)
|
| 1186 |
+
elif mt == "mlp":
|
| 1187 |
+
from sklearn.neural_network import MLPClassifier
|
| 1188 |
+
hidden = tuple([hp.get("nn_hidden_dims", 128)] * hp.get("nn_n_layers", 2))
|
| 1189 |
+
return MLPClassifier(
|
| 1190 |
+
hidden_layer_sizes=hidden,
|
| 1191 |
+
learning_rate_init=hp["learning_rate"],
|
| 1192 |
+
max_iter=hp.get("nn_epochs", 50),
|
| 1193 |
+
alpha=hp["reg_alpha"],
|
| 1194 |
+
random_state=42,
|
| 1195 |
+
)
|
| 1196 |
+
else:
|
| 1197 |
+
# Fallback for unknown types (lstm, transformer, tabnet, etc.) — use GBM
|
| 1198 |
+
from sklearn.ensemble import GradientBoostingClassifier
|
| 1199 |
+
return GradientBoostingClassifier(
|
| 1200 |
+
n_estimators=min(hp["n_estimators"], 200),
|
| 1201 |
+
max_depth=hp["max_depth"],
|
| 1202 |
+
learning_rate=hp["learning_rate"],
|
| 1203 |
+
random_state=42,
|
| 1204 |
+
)
|
| 1205 |
+
except ImportError:
|
| 1206 |
+
from sklearn.ensemble import GradientBoostingClassifier
|
| 1207 |
+
return GradientBoostingClassifier(
|
| 1208 |
+
n_estimators=min(hp["n_estimators"], 200),
|
| 1209 |
+
max_depth=hp["max_depth"],
|
| 1210 |
+
learning_rate=hp["learning_rate"],
|
| 1211 |
+
random_state=42,
|
| 1212 |
+
)
|
| 1213 |
+
return None
|
| 1214 |
+
|
| 1215 |
+
|
| 1216 |
+
def _simulate_betting(probs, actuals, edge=0.05, vig=0.045):
|
| 1217 |
+
"""Simulate flat betting with realistic market odds (including vig).
|
| 1218 |
+
|
| 1219 |
+
Market line estimated as midpoint between our model and 50/50 (conservative).
|
| 1220 |
+
Payout at market decimal odds with vig baked in.
|
| 1221 |
+
This gives a realistic ROI vs the old fair-value (1/prob) approach.
|
| 1222 |
+
"""
|
| 1223 |
+
stake = 10
|
| 1224 |
+
profit = 0
|
| 1225 |
+
n_bets = 0
|
| 1226 |
+
for prob, actual in zip(probs, actuals):
|
| 1227 |
+
# Market prob ~ halfway between our model and 50/50
|
| 1228 |
+
market_prob = 0.5 + (prob - 0.5) * 0.5
|
| 1229 |
+
if prob > 0.5 + edge:
|
| 1230 |
+
# Bet home: market pays at their (less favorable) odds with vig
|
| 1231 |
+
market_decimal = 1.0 / (market_prob * (1 + vig / 2))
|
| 1232 |
+
n_bets += 1
|
| 1233 |
+
if actual == 1:
|
| 1234 |
+
profit += stake * (market_decimal - 1)
|
| 1235 |
+
else:
|
| 1236 |
+
profit -= stake
|
| 1237 |
+
elif prob < 0.5 - edge:
|
| 1238 |
+
# Bet away
|
| 1239 |
+
away_market = 1.0 - market_prob
|
| 1240 |
+
market_decimal = 1.0 / (away_market * (1 + vig / 2))
|
| 1241 |
+
n_bets += 1
|
| 1242 |
+
if actual == 0:
|
| 1243 |
+
profit += stake * (market_decimal - 1)
|
| 1244 |
+
else:
|
| 1245 |
+
profit -= stake
|
| 1246 |
+
return profit / (n_bets * stake) if n_bets > 0 else 0.0
|
| 1247 |
+
|
| 1248 |
+
|
| 1249 |
+
def _calibration_error(probs, actuals, n_bins=10):
|
| 1250 |
+
"""Expected Calibration Error (ECE)."""
|
| 1251 |
+
if len(probs) == 0:
|
| 1252 |
+
return 1.0
|
| 1253 |
+
bins = np.linspace(0, 1, n_bins + 1)
|
| 1254 |
+
ece = 0
|
| 1255 |
+
for i in range(n_bins):
|
| 1256 |
+
mask = (probs >= bins[i]) & (probs < bins[i + 1])
|
| 1257 |
+
if mask.sum() == 0:
|
| 1258 |
+
continue
|
| 1259 |
+
ece += mask.sum() / len(probs) * abs(probs[mask].mean() - actuals[mask].mean())
|
| 1260 |
+
return ece
|
| 1261 |
+
|
| 1262 |
+
|
| 1263 |
+
# ═══════════════════════════════════════════════════════════
|
| 1264 |
+
# SECTION 5: GENETIC EVOLUTION ENGINE
|
| 1265 |
+
# ═══════════════════════════════════════════════════════════
|
| 1266 |
+
|
| 1267 |
+
class GeneticEvolutionEngine:
|
| 1268 |
+
"""
|
| 1269 |
+
REAL genetic evolution engine.
|
| 1270 |
+
Runs continuously, evolving a population of model configs.
|
| 1271 |
+
"""
|
| 1272 |
+
|
| 1273 |
+
def __init__(self, pop_size=500, elite_size=25, mutation_rate=0.15,
|
| 1274 |
+
crossover_rate=0.85, target_features=100, n_splits=3,
|
| 1275 |
+
n_islands=5, migration_interval=10, migrants_per_island=5):
|
| 1276 |
+
self.pop_size = pop_size
|
| 1277 |
+
self.elite_size = elite_size
|
| 1278 |
+
self.base_mutation_rate = mutation_rate
|
| 1279 |
+
self.mutation_rate = mutation_rate
|
| 1280 |
+
self.mut_floor = 0.05
|
| 1281 |
+
self.mut_decay = 0.995
|
| 1282 |
+
self.crossover_rate = crossover_rate
|
| 1283 |
+
self.target_features = target_features
|
| 1284 |
+
self.n_splits = n_splits
|
| 1285 |
+
self.n_islands = n_islands
|
| 1286 |
+
self.island_size = pop_size // n_islands
|
| 1287 |
+
self.migration_interval = migration_interval
|
| 1288 |
+
self.migrants_per_island = migrants_per_island
|
| 1289 |
+
|
| 1290 |
+
self.population = []
|
| 1291 |
+
self.generation = 0
|
| 1292 |
+
self.best_ever = None
|
| 1293 |
+
self.history = []
|
| 1294 |
+
self.stagnation_counter = 0
|
| 1295 |
+
self.use_gpu = False
|
| 1296 |
+
# Hamming diversity tracking
|
| 1297 |
+
self._pop_centroid = None # float list — mean feature mask over population
|
| 1298 |
+
self._hamming_diversity = 1.0 # normalized average pairwise Hamming distance
|
| 1299 |
+
self._no_improve_counter = 0 # gens without best-ever composite improvement
|
| 1300 |
+
|
| 1301 |
+
# Detect GPU
|
| 1302 |
+
try:
|
| 1303 |
+
import xgboost as xgb
|
| 1304 |
+
_test = xgb.XGBClassifier(n_estimators=5, max_depth=3, tree_method="hist", device="cuda")
|
| 1305 |
+
_test.fit(np.random.randn(50, 5), np.random.randint(0, 2, 50))
|
| 1306 |
+
self.use_gpu = True
|
| 1307 |
+
print("[GPU] XGBoost CUDA: ENABLED")
|
| 1308 |
+
except Exception:
|
| 1309 |
+
print("[GPU] XGBoost CUDA: disabled, using CPU")
|
| 1310 |
+
|
| 1311 |
+
def initialize(self, n_features):
|
| 1312 |
+
"""Create initial random population."""
|
| 1313 |
+
self.n_features = n_features
|
| 1314 |
+
self.population = [Individual(n_features, self.target_features) for _ in range(self.pop_size)]
|
| 1315 |
+
print(f"[INIT] Population: {self.pop_size} individuals, {n_features} feature candidates, "
|
| 1316 |
+
f"~{self.target_features} target features")
|
| 1317 |
+
|
| 1318 |
+
def restore_state(self):
|
| 1319 |
+
"""Restore population from saved state (survive restarts)."""
|
| 1320 |
+
state_file = STATE_DIR / "population.json"
|
| 1321 |
+
if not state_file.exists():
|
| 1322 |
+
return False
|
| 1323 |
+
try:
|
| 1324 |
+
state = json.loads(state_file.read_text())
|
| 1325 |
+
self.generation = state["generation"]
|
| 1326 |
+
self.n_features = state["n_features"]
|
| 1327 |
+
self.history = state.get("history", [])
|
| 1328 |
+
self.stagnation_counter = state.get("stagnation_counter", 0)
|
| 1329 |
+
self.mutation_rate = state.get("mutation_rate", self.base_mutation_rate)
|
| 1330 |
+
|
| 1331 |
+
self.population = []
|
| 1332 |
+
for ind_data in state["population"]:
|
| 1333 |
+
ind = Individual.__new__(Individual)
|
| 1334 |
+
ind.features = ind_data["features"]
|
| 1335 |
+
ind.hyperparams = ind_data["hyperparams"]
|
| 1336 |
+
ind.fitness = ind_data["fitness"]
|
| 1337 |
+
ind.generation = ind_data.get("generation", 0)
|
| 1338 |
+
ind.birth_generation = ind_data.get("birth_generation", ind.generation)
|
| 1339 |
+
ind.n_features = sum(ind.features)
|
| 1340 |
+
self.population.append(ind)
|
| 1341 |
+
|
| 1342 |
+
if state.get("best_ever"):
|
| 1343 |
+
be = state["best_ever"]
|
| 1344 |
+
self.best_ever = Individual.__new__(Individual)
|
| 1345 |
+
self.best_ever.features = be["features"]
|
| 1346 |
+
self.best_ever.hyperparams = be["hyperparams"]
|
| 1347 |
+
self.best_ever.fitness = be["fitness"]
|
| 1348 |
+
self.best_ever.generation = be.get("generation", 0)
|
| 1349 |
+
self.best_ever.n_features = sum(self.best_ever.features)
|
| 1350 |
+
|
| 1351 |
+
print(f"[RESTORE] Generation {self.generation}, {len(self.population)} individuals, "
|
| 1352 |
+
f"best Brier={self.best_ever.fitness['brier']:.4f}" if self.best_ever else "")
|
| 1353 |
+
return True
|
| 1354 |
+
except Exception as e:
|
| 1355 |
+
print(f"[RESTORE] Failed: {e}")
|
| 1356 |
+
return False
|
| 1357 |
+
|
| 1358 |
+
def resize_population_features(self, new_n_features):
|
| 1359 |
+
"""Resize feature masks if feature count changed (e.g., new features added)."""
|
| 1360 |
+
old_n = self.n_features
|
| 1361 |
+
if old_n == new_n_features:
|
| 1362 |
+
return
|
| 1363 |
+
delta = new_n_features - old_n
|
| 1364 |
+
print(f"[RESIZE] Feature count changed: {old_n} -> {new_n_features} (delta={delta})")
|
| 1365 |
+
self.n_features = new_n_features
|
| 1366 |
+
for ind in self.population:
|
| 1367 |
+
if len(ind.features) < new_n_features:
|
| 1368 |
+
# Extend with random activation for new features (50% chance each)
|
| 1369 |
+
ind.features.extend([1 if random.random() < 0.3 else 0 for _ in range(new_n_features - len(ind.features))])
|
| 1370 |
+
elif len(ind.features) > new_n_features:
|
| 1371 |
+
ind.features = ind.features[:new_n_features]
|
| 1372 |
+
ind.n_features = sum(ind.features)
|
| 1373 |
+
if self.best_ever:
|
| 1374 |
+
if len(self.best_ever.features) < new_n_features:
|
| 1375 |
+
self.best_ever.features.extend([0] * (new_n_features - len(self.best_ever.features)))
|
| 1376 |
+
elif len(self.best_ever.features) > new_n_features:
|
| 1377 |
+
self.best_ever.features = self.best_ever.features[:new_n_features]
|
| 1378 |
+
self.best_ever.n_features = sum(self.best_ever.features)
|
| 1379 |
+
print(f"[RESIZE] All {len(self.population)} individuals resized")
|
| 1380 |
+
|
| 1381 |
+
def save_state(self):
|
| 1382 |
+
"""Save population state to survive restarts."""
|
| 1383 |
+
state = {
|
| 1384 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 1385 |
+
"generation": self.generation,
|
| 1386 |
+
"n_features": self.n_features,
|
| 1387 |
+
"stagnation_counter": self.stagnation_counter,
|
| 1388 |
+
"mutation_rate": self.mutation_rate,
|
| 1389 |
+
"population": [
|
| 1390 |
+
{
|
| 1391 |
+
"features": ind.features,
|
| 1392 |
+
"hyperparams": {k: (float(v) if isinstance(v, (np.floating,)) else v)
|
| 1393 |
+
for k, v in ind.hyperparams.items()},
|
| 1394 |
+
"fitness": ind.fitness,
|
| 1395 |
+
"generation": ind.generation,
|
| 1396 |
+
"birth_generation": getattr(ind, 'birth_generation', ind.generation),
|
| 1397 |
+
}
|
| 1398 |
+
for ind in self.population
|
| 1399 |
+
],
|
| 1400 |
+
"best_ever": {
|
| 1401 |
+
"features": self.best_ever.features,
|
| 1402 |
+
"hyperparams": {k: (float(v) if isinstance(v, (np.floating,)) else v)
|
| 1403 |
+
for k, v in self.best_ever.hyperparams.items()},
|
| 1404 |
+
"fitness": self.best_ever.fitness,
|
| 1405 |
+
"generation": self.best_ever.generation,
|
| 1406 |
+
} if self.best_ever else None,
|
| 1407 |
+
"history": self.history[-200:],
|
| 1408 |
+
}
|
| 1409 |
+
(STATE_DIR / "population.json").write_text(json.dumps(state, default=str))
|
| 1410 |
+
|
| 1411 |
+
# ── Hamming Diversity Utilities ──────────────────────────────────────────
|
| 1412 |
+
|
| 1413 |
+
def _update_pop_centroid(self):
|
| 1414 |
+
"""Compute and cache the population centroid (mean feature mask).
|
| 1415 |
+
|
| 1416 |
+
The centroid[i] is the fraction of individuals that have feature i active.
|
| 1417 |
+
Used by _tournament_select for crowding distance.
|
| 1418 |
+
"""
|
| 1419 |
+
if not self.population:
|
| 1420 |
+
return
|
| 1421 |
+
n = len(self.population[0].features)
|
| 1422 |
+
centroid = [0.0] * n
|
| 1423 |
+
for ind in self.population:
|
| 1424 |
+
for i, v in enumerate(ind.features):
|
| 1425 |
+
centroid[i] += v
|
| 1426 |
+
pop_len = len(self.population)
|
| 1427 |
+
self._pop_centroid = [c / pop_len for c in centroid]
|
| 1428 |
+
|
| 1429 |
+
def _compute_hamming_diversity(self, sample_size=50):
|
| 1430 |
+
"""Compute the normalized average pairwise Hamming distance of the population.
|
| 1431 |
+
|
| 1432 |
+
Exact O(N²) computation is expensive for pop_size=500, so we use a
|
| 1433 |
+
random sample of up to `sample_size` pairs for efficiency.
|
| 1434 |
+
|
| 1435 |
+
Returns a float in [0, 1]. A value of 0 means all feature masks are
|
| 1436 |
+
identical; a value of 1 means every bit differs between every pair.
|
| 1437 |
+
"""
|
| 1438 |
+
pop = self.population
|
| 1439 |
+
if len(pop) < 2:
|
| 1440 |
+
return 1.0
|
| 1441 |
+
n_feat = len(pop[0].features)
|
| 1442 |
+
if n_feat == 0:
|
| 1443 |
+
return 0.0
|
| 1444 |
+
|
| 1445 |
+
# Random sampling: up to sample_size² / 2 pairs
|
| 1446 |
+
indices = list(range(len(pop)))
|
| 1447 |
+
random.shuffle(indices)
|
| 1448 |
+
sample = indices[:sample_size]
|
| 1449 |
+
|
| 1450 |
+
total_dist = 0.0
|
| 1451 |
+
n_pairs = 0
|
| 1452 |
+
for i in range(len(sample)):
|
| 1453 |
+
for j in range(i + 1, len(sample)):
|
| 1454 |
+
f1 = pop[sample[i]].features
|
| 1455 |
+
f2 = pop[sample[j]].features
|
| 1456 |
+
total_dist += sum(a != b for a, b in zip(f1, f2)) / n_feat
|
| 1457 |
+
n_pairs += 1
|
| 1458 |
+
|
| 1459 |
+
return total_dist / n_pairs if n_pairs > 0 else 1.0
|
| 1460 |
+
|
| 1461 |
+
def evolve_one_generation(self, X, y):
|
| 1462 |
+
"""Run one generation of evolution. Returns best individual."""
|
| 1463 |
+
self.generation += 1
|
| 1464 |
+
gen_start = time.time()
|
| 1465 |
+
|
| 1466 |
+
# 1. Evaluate all individuals
|
| 1467 |
+
for i, ind in enumerate(self.population):
|
| 1468 |
+
evaluate_individual(ind, X, y, self.n_splits, self.use_gpu)
|
| 1469 |
+
if (i + 1) % 10 == 0:
|
| 1470 |
+
print(f" Evaluated {i+1}/{len(self.population)}...", end="\r")
|
| 1471 |
+
|
| 1472 |
+
# 2. Sort by composite fitness (higher = better)
|
| 1473 |
+
self.population.sort(key=lambda x: x.fitness["composite"], reverse=True)
|
| 1474 |
+
best = self.population[0]
|
| 1475 |
+
|
| 1476 |
+
# 3. Track best ever
|
| 1477 |
+
prev_best_brier = self.best_ever.fitness["brier"] if self.best_ever else 1.0
|
| 1478 |
+
if self.best_ever is None or best.fitness["composite"] > self.best_ever.fitness["composite"]:
|
| 1479 |
+
self.best_ever = Individual.__new__(Individual)
|
| 1480 |
+
self.best_ever.features = best.features[:]
|
| 1481 |
+
self.best_ever.hyperparams = dict(best.hyperparams)
|
| 1482 |
+
self.best_ever.fitness = dict(best.fitness)
|
| 1483 |
+
self.best_ever.n_features = best.n_features
|
| 1484 |
+
self.best_ever.generation = self.generation
|
| 1485 |
+
|
| 1486 |
+
# 4. Stagnation detection — track BOTH Brier and composite
|
| 1487 |
+
prev_best_composite = self.best_ever.fitness["composite"] if self.best_ever and hasattr(self.best_ever, 'fitness') else 0.0
|
| 1488 |
+
brier_stagnant = abs(best.fitness["brier"] - prev_best_brier) < 0.0005
|
| 1489 |
+
composite_stagnant = abs(best.fitness["composite"] - prev_best_composite) < 0.001
|
| 1490 |
+
if brier_stagnant and composite_stagnant:
|
| 1491 |
+
self.stagnation_counter += 1
|
| 1492 |
+
self._no_improve_counter += 1
|
| 1493 |
+
elif not brier_stagnant:
|
| 1494 |
+
self.stagnation_counter = max(0, self.stagnation_counter - 2) # Partial reset
|
| 1495 |
+
self._no_improve_counter = 0
|
| 1496 |
+
else:
|
| 1497 |
+
self.stagnation_counter = max(0, self.stagnation_counter - 1)
|
| 1498 |
+
self._no_improve_counter = 0
|
| 1499 |
+
|
| 1500 |
+
# 4b. Hamming Diversity Monitor
|
| 1501 |
+
# Compute normalized average pairwise Hamming distance; also refresh centroid
|
| 1502 |
+
# (used by crowding-aware tournament selection below).
|
| 1503 |
+
self._hamming_diversity = self._compute_hamming_diversity(sample_size=50)
|
| 1504 |
+
self._update_pop_centroid()
|
| 1505 |
+
if self._hamming_diversity < 0.15:
|
| 1506 |
+
print(f" [DIVERSITY-LOW] Hamming diversity={self._hamming_diversity:.3f} < 0.15 threshold")
|
| 1507 |
+
|
| 1508 |
+
# 4c. Adaptive Mutation Rate — diversity-driven formula
|
| 1509 |
+
# Base = 0.03; rises smoothly toward 0.10 as diversity falls below 0.25.
|
| 1510 |
+
# Formula: mutation_rate = 0.03 + 0.07 * max(0, 1 - diversity / 0.25)
|
| 1511 |
+
diversity_mutation = 0.03 + 0.07 * max(0.0, 1.0 - self._hamming_diversity / 0.25)
|
| 1512 |
+
# Stagnation boosts applied on top (capped at 0.25)
|
| 1513 |
+
if self.stagnation_counter >= 10:
|
| 1514 |
+
self.mutation_rate = min(0.15, diversity_mutation * 1.8)
|
| 1515 |
+
print(f" [STAGNATION-CRITICAL] {self.stagnation_counter} gens — "
|
| 1516 |
+
f"mutation rate -> {self.mutation_rate:.3f} (diversity={self._hamming_diversity:.3f})")
|
| 1517 |
+
elif self.stagnation_counter >= 7:
|
| 1518 |
+
self.mutation_rate = min(0.15, diversity_mutation * 1.5)
|
| 1519 |
+
print(f" [STAGNATION] {self.stagnation_counter} gens — "
|
| 1520 |
+
f"mutation rate -> {self.mutation_rate:.3f} (diversity={self._hamming_diversity:.3f})")
|
| 1521 |
+
elif self.stagnation_counter >= 3:
|
| 1522 |
+
self.mutation_rate = min(0.12, diversity_mutation * 1.2)
|
| 1523 |
+
else:
|
| 1524 |
+
# Normal regime: formula drives the rate directly
|
| 1525 |
+
self.mutation_rate = diversity_mutation
|
| 1526 |
+
|
| 1527 |
+
# 5. Record history
|
| 1528 |
+
self.history.append({
|
| 1529 |
+
"gen": self.generation,
|
| 1530 |
+
"best_brier": best.fitness["brier"],
|
| 1531 |
+
"best_roi": best.fitness["roi"],
|
| 1532 |
+
"best_sharpe": best.fitness["sharpe"],
|
| 1533 |
+
"best_composite": best.fitness["composite"],
|
| 1534 |
+
"best_calibration_error": best.fitness.get("calibration_error", best.fitness.get("calibration", 1.0)),
|
| 1535 |
+
"n_features": best.n_features,
|
| 1536 |
+
"model_type": best.hyperparams["model_type"],
|
| 1537 |
+
"mutation_rate": round(self.mutation_rate, 4),
|
| 1538 |
+
"avg_composite": round(np.mean([ind.fitness["composite"] for ind in self.population]), 5),
|
| 1539 |
+
"pop_diversity": round(np.std([ind.n_features for ind in self.population]), 1),
|
| 1540 |
+
"hamming_diversity": round(self._hamming_diversity, 4),
|
| 1541 |
+
})
|
| 1542 |
+
|
| 1543 |
+
elapsed = time.time() - gen_start
|
| 1544 |
+
ece_val = best.fitness.get("calibration_error", best.fitness.get("calibration", 1.0))
|
| 1545 |
+
print(f" Gen {self.generation}: Brier={best.fitness['brier']:.4f} "
|
| 1546 |
+
f"ROI={best.fitness['roi']:.1%} Sharpe={best.fitness['sharpe']:.2f} "
|
| 1547 |
+
f"ECE={ece_val:.4f} Features={best.n_features} Model={best.hyperparams['model_type']} "
|
| 1548 |
+
f"Composite={best.fitness['composite']:.4f} "
|
| 1549 |
+
f"Diversity={self._hamming_diversity:.3f} MutRate={self.mutation_rate:.3f} ({elapsed:.0f}s)")
|
| 1550 |
+
|
| 1551 |
+
# 6. Create next generation
|
| 1552 |
+
new_pop = []
|
| 1553 |
+
|
| 1554 |
+
# Elitism — protect top by composite AND top by raw Brier (prevents fossil loss)
|
| 1555 |
+
def _clone_individual(src):
|
| 1556 |
+
clone = Individual.__new__(Individual)
|
| 1557 |
+
clone.features = src.features[:]
|
| 1558 |
+
clone.hyperparams = dict(src.hyperparams)
|
| 1559 |
+
clone.fitness = dict(src.fitness)
|
| 1560 |
+
clone.n_features = src.n_features
|
| 1561 |
+
clone.generation = src.generation
|
| 1562 |
+
clone.birth_generation = getattr(src, 'birth_generation', src.generation)
|
| 1563 |
+
return clone
|
| 1564 |
+
|
| 1565 |
+
# Top elite_size by composite (already sorted)
|
| 1566 |
+
elite_ids = set()
|
| 1567 |
+
for i in range(min(self.elite_size, len(self.population))):
|
| 1568 |
+
new_pop.append(_clone_individual(self.population[i]))
|
| 1569 |
+
elite_ids.add(id(self.population[i]))
|
| 1570 |
+
|
| 1571 |
+
# Also protect top-2 by raw Brier score (lower = better) if not already elite
|
| 1572 |
+
brier_sorted = sorted(self.population, key=lambda x: x.fitness["brier"])
|
| 1573 |
+
for ind in brier_sorted[:2]:
|
| 1574 |
+
if id(ind) not in elite_ids:
|
| 1575 |
+
new_pop.append(_clone_individual(ind))
|
| 1576 |
+
elite_ids.add(id(ind))
|
| 1577 |
+
|
| 1578 |
+
# Aging: remove individuals that have survived > 15 generations without improvement
|
| 1579 |
+
MAX_AGE = 15
|
| 1580 |
+
aged_out = 0
|
| 1581 |
+
for i in range(len(new_pop) - 1, self.elite_size - 1, -1):
|
| 1582 |
+
if i < len(new_pop):
|
| 1583 |
+
age = self.generation - getattr(new_pop[i], 'birth_generation', 0)
|
| 1584 |
+
if age > MAX_AGE and new_pop[i].fitness["composite"] < new_pop[0].fitness["composite"] * 0.95:
|
| 1585 |
+
new_pop.pop(i)
|
| 1586 |
+
aged_out += 1
|
| 1587 |
+
if aged_out > 0:
|
| 1588 |
+
print(f" [AGING] {aged_out} stale individuals removed")
|
| 1589 |
+
|
| 1590 |
+
# Injection: smarter — at stagnation >= 7 inject targeted mutants of best, not just random
|
| 1591 |
+
n_inject = 0
|
| 1592 |
+
if self.stagnation_counter >= 7:
|
| 1593 |
+
n_inject = self.pop_size // 4
|
| 1594 |
+
# Half random, half targeted mutations of best individual
|
| 1595 |
+
n_random = n_inject // 2
|
| 1596 |
+
n_mutant = n_inject - n_random
|
| 1597 |
+
for _ in range(n_random):
|
| 1598 |
+
new_pop.append(Individual(self.n_features, self.target_features))
|
| 1599 |
+
# Targeted mutants: take best, apply heavy mutation
|
| 1600 |
+
for _ in range(n_mutant):
|
| 1601 |
+
mutant = Individual.__new__(Individual)
|
| 1602 |
+
mutant.features = self.population[0].features[:]
|
| 1603 |
+
mutant.hyperparams = dict(self.population[0].hyperparams)
|
| 1604 |
+
mutant.fitness = {"brier": 1.0, "roi": 0.0, "sharpe": 0.0, "calibration": 1.0, "calibration_error": 1.0, "composite": 0.0}
|
| 1605 |
+
mutant.birth_generation = self.generation
|
| 1606 |
+
mutant.n_features = self.population[0].n_features
|
| 1607 |
+
mutant.generation = self.generation
|
| 1608 |
+
mutant.mutate(0.25) # Heavy mutation
|
| 1609 |
+
new_pop.append(mutant)
|
| 1610 |
+
print(f" [INJECTION] {n_random} random + {n_mutant} targeted mutants (stagnation={self.stagnation_counter})")
|
| 1611 |
+
elif self.stagnation_counter >= 3:
|
| 1612 |
+
# Mild injection: 10% fresh individuals
|
| 1613 |
+
n_inject = self.pop_size // 10
|
| 1614 |
+
for _ in range(n_inject):
|
| 1615 |
+
new_pop.append(Individual(self.n_features, self.target_features))
|
| 1616 |
+
print(f" [INJECTION-MILD] {n_inject} fresh individuals (stagnation={self.stagnation_counter})")
|
| 1617 |
+
|
| 1618 |
+
# Diversity Injection: triggered independently when diversity is critically low
|
| 1619 |
+
# (diversity < 0.15) OR when there has been no fitness improvement for 5
|
| 1620 |
+
# consecutive generations — whichever happens first. Elites are always kept.
|
| 1621 |
+
diversity_trigger = (self._hamming_diversity < 0.15) or (self._no_improve_counter >= 5)
|
| 1622 |
+
if diversity_trigger and n_inject == 0:
|
| 1623 |
+
# Inject 20% of population as freshly randomized individuals (elites already in new_pop)
|
| 1624 |
+
n_diversity_inject = max(1, self.pop_size // 5)
|
| 1625 |
+
# Cap to avoid going way over pop_size before the fill loop
|
| 1626 |
+
slots_remaining = max(0, self.pop_size - len(new_pop) - n_diversity_inject)
|
| 1627 |
+
for _ in range(n_diversity_inject):
|
| 1628 |
+
new_pop.append(Individual(self.n_features, self.target_features))
|
| 1629 |
+
trigger_reason = (
|
| 1630 |
+
f"diversity={self._hamming_diversity:.3f}<0.15"
|
| 1631 |
+
if self._hamming_diversity < 0.15
|
| 1632 |
+
else f"no_improve={self._no_improve_counter}>=5"
|
| 1633 |
+
)
|
| 1634 |
+
print(f" [DIVERSITY-INJECT] {n_diversity_inject} fresh individuals injected "
|
| 1635 |
+
f"({trigger_reason}), elites preserved")
|
| 1636 |
+
|
| 1637 |
+
# Fill with crossover + mutation
|
| 1638 |
+
while len(new_pop) < self.pop_size:
|
| 1639 |
+
# Diversity-aware tournament: 80% fitness-based, 20% diversity-based
|
| 1640 |
+
if random.random() < 0.2:
|
| 1641 |
+
p1 = self._diversity_select(7)
|
| 1642 |
+
p2 = self._tournament_select(7)
|
| 1643 |
+
else:
|
| 1644 |
+
p1 = self._tournament_select(7)
|
| 1645 |
+
p2 = self._tournament_select(7)
|
| 1646 |
+
if random.random() < self.crossover_rate:
|
| 1647 |
+
child = Individual.crossover(p1, p2)
|
| 1648 |
+
else:
|
| 1649 |
+
child = Individual.__new__(Individual)
|
| 1650 |
+
child.features = p1.features[:]
|
| 1651 |
+
child.hyperparams = dict(p1.hyperparams)
|
| 1652 |
+
child.fitness = dict(p1.fitness)
|
| 1653 |
+
child.n_features = p1.n_features
|
| 1654 |
+
child.generation = self.generation
|
| 1655 |
+
child.birth_generation = self.generation
|
| 1656 |
+
child.mutate(self.mutation_rate)
|
| 1657 |
+
new_pop.append(child)
|
| 1658 |
+
|
| 1659 |
+
self.population = new_pop[:self.pop_size]
|
| 1660 |
+
return best
|
| 1661 |
+
|
| 1662 |
+
def _tournament_select(self, k=7):
|
| 1663 |
+
"""Tournament selection with crowding.
|
| 1664 |
+
|
| 1665 |
+
Standard tournament selection, but when two candidates have similar
|
| 1666 |
+
composite fitness (within 5%), prefer the one that is more unique —
|
| 1667 |
+
measured by Hamming distance from the population centroid. This
|
| 1668 |
+
implements a lightweight niching pressure that rewards exploration
|
| 1669 |
+
without discarding high-quality individuals.
|
| 1670 |
+
"""
|
| 1671 |
+
contestants = random.sample(self.population, min(k, len(self.population)))
|
| 1672 |
+
best = max(contestants, key=lambda x: x.fitness["composite"])
|
| 1673 |
+
best_fit = best.fitness["composite"]
|
| 1674 |
+
|
| 1675 |
+
# Among contestants within 5% of the best, prefer the most unique one
|
| 1676 |
+
similar = [c for c in contestants if best_fit > 0 and
|
| 1677 |
+
abs(c.fitness["composite"] - best_fit) / max(abs(best_fit), 1e-9) < 0.05]
|
| 1678 |
+
if len(similar) > 1 and hasattr(self, '_pop_centroid') and self._pop_centroid is not None:
|
| 1679 |
+
centroid = self._pop_centroid
|
| 1680 |
+
def _dist_from_centroid(ind):
|
| 1681 |
+
f = ind.features
|
| 1682 |
+
n = len(f)
|
| 1683 |
+
if n == 0 or len(centroid) != n:
|
| 1684 |
+
return 0.0
|
| 1685 |
+
return sum(abs(f[i] - centroid[i]) for i in range(n)) / n
|
| 1686 |
+
best = max(similar, key=_dist_from_centroid)
|
| 1687 |
+
|
| 1688 |
+
return best
|
| 1689 |
+
|
| 1690 |
+
def _diversity_select(self, k=7):
|
| 1691 |
+
"""Diversity-preserving selection: pick the most unique individual from k random."""
|
| 1692 |
+
contestants = random.sample(self.population, min(k, len(self.population)))
|
| 1693 |
+
if not self.population:
|
| 1694 |
+
return contestants[0]
|
| 1695 |
+
# Measure uniqueness: how different is this individual's feature set from the elite?
|
| 1696 |
+
elite_features = set()
|
| 1697 |
+
for i, ind in enumerate(self.population[:self.elite_size]):
|
| 1698 |
+
elite_features.update(ind.selected_indices())
|
| 1699 |
+
best_diversity = -1
|
| 1700 |
+
best_ind = contestants[0]
|
| 1701 |
+
for c in contestants:
|
| 1702 |
+
c_features = set(c.selected_indices())
|
| 1703 |
+
if not c_features:
|
| 1704 |
+
continue
|
| 1705 |
+
overlap = len(c_features & elite_features) / max(len(c_features), 1)
|
| 1706 |
+
diversity = 1.0 - overlap
|
| 1707 |
+
# Weight by fitness to avoid picking terrible individuals
|
| 1708 |
+
score = diversity * 0.6 + max(0, c.fitness["composite"]) * 0.4
|
| 1709 |
+
if score > best_diversity:
|
| 1710 |
+
best_diversity = score
|
| 1711 |
+
best_ind = c
|
| 1712 |
+
return best_ind
|
| 1713 |
+
|
| 1714 |
+
def save_cycle_results(self, feature_names):
|
| 1715 |
+
"""Save results after a cycle of generations."""
|
| 1716 |
+
if not self.best_ever:
|
| 1717 |
+
return
|
| 1718 |
+
|
| 1719 |
+
selected_names = [feature_names[i] for i in self.best_ever.selected_indices()
|
| 1720 |
+
if i < len(feature_names)]
|
| 1721 |
+
|
| 1722 |
+
results = {
|
| 1723 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 1724 |
+
"generation": self.generation,
|
| 1725 |
+
"population_size": self.pop_size,
|
| 1726 |
+
"feature_candidates": self.n_features,
|
| 1727 |
+
"mutation_rate": round(self.mutation_rate, 4),
|
| 1728 |
+
"stagnation_counter": self.stagnation_counter,
|
| 1729 |
+
"gpu": self.use_gpu,
|
| 1730 |
+
"best": {
|
| 1731 |
+
"brier": self.best_ever.fitness["brier"],
|
| 1732 |
+
"roi": self.best_ever.fitness["roi"],
|
| 1733 |
+
"sharpe": self.best_ever.fitness["sharpe"],
|
| 1734 |
+
"calibration": self.best_ever.fitness["calibration"],
|
| 1735 |
+
"calibration_error": self.best_ever.fitness.get("calibration_error", self.best_ever.fitness["calibration"]),
|
| 1736 |
+
"composite": self.best_ever.fitness["composite"],
|
| 1737 |
+
"n_features": self.best_ever.n_features,
|
| 1738 |
+
"model_type": self.best_ever.hyperparams["model_type"],
|
| 1739 |
+
"hyperparams": {k: (float(v) if isinstance(v, (np.floating, np.integer)) else v)
|
| 1740 |
+
for k, v in self.best_ever.hyperparams.items()},
|
| 1741 |
+
"selected_features": selected_names[:50],
|
| 1742 |
+
},
|
| 1743 |
+
"top5": [ind.to_dict() for ind in sorted(
|
| 1744 |
+
self.population, key=lambda x: x.fitness["composite"], reverse=True
|
| 1745 |
+
)[:5]],
|
| 1746 |
+
"history_last20": self.history[-20:],
|
| 1747 |
+
}
|
| 1748 |
+
|
| 1749 |
+
# Save timestamped + latest
|
| 1750 |
+
ts = datetime.now().strftime("%Y%m%d-%H%M")
|
| 1751 |
+
(RESULTS_DIR / f"evolution-{ts}.json").write_text(json.dumps(results, indent=2, default=str))
|
| 1752 |
+
(RESULTS_DIR / "evolution-latest.json").write_text(json.dumps(results, indent=2, default=str))
|
| 1753 |
+
return results
|
| 1754 |
+
|
| 1755 |
+
|
| 1756 |
+
# ═══════════════════════════════════════════════════════════
|
| 1757 |
+
# SECTION 6: VM CALLBACK
|
| 1758 |
+
# ═══════════════════════════════════════════════════════════
|
| 1759 |
+
|
| 1760 |
+
def callback_to_vm(results):
|
| 1761 |
+
"""POST results to VM data server (best-effort)."""
|
| 1762 |
+
import urllib.request
|
| 1763 |
+
try:
|
| 1764 |
+
url = f"{VM_CALLBACK_URL}/callback/evolution"
|
| 1765 |
+
body = json.dumps(results, default=str).encode()
|
| 1766 |
+
req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json"})
|
| 1767 |
+
resp = urllib.request.urlopen(req, timeout=10)
|
| 1768 |
+
print(f" [CALLBACK] VM notified: {resp.status}")
|
| 1769 |
+
except Exception as e:
|
| 1770 |
+
# Best-effort, don't block on failure
|
| 1771 |
+
print(f" [CALLBACK] VM unreachable: {e}")
|
| 1772 |
+
|
| 1773 |
+
# Also try to write to shared mon-ipad data if accessible
|
| 1774 |
+
try:
|
| 1775 |
+
shared = Path("/home/termius/mon-ipad/data/nba-agent/evolution-latest.json")
|
| 1776 |
+
if shared.parent.exists():
|
| 1777 |
+
shared.write_text(json.dumps(results, indent=2, default=str))
|
| 1778 |
+
print(f" [CALLBACK] Wrote to mon-ipad")
|
| 1779 |
+
except Exception:
|
| 1780 |
+
pass
|
| 1781 |
+
|
| 1782 |
+
|
| 1783 |
+
# ═══════════════════════════════════════════════════════════
|
| 1784 |
+
# SECTION 7: MAIN LOOP (continuous 24/7)
|
| 1785 |
+
# ═══════════════════════════════════════════════════════════
|
| 1786 |
+
|
| 1787 |
+
def run_continuous(generations_per_cycle=10, total_cycles=None, pop_size=500,
|
| 1788 |
+
target_features=100, n_splits=5, cool_down=30):
|
| 1789 |
+
"""
|
| 1790 |
+
Main entry point — runs genetic evolution CONTINUOUSLY.
|
| 1791 |
+
|
| 1792 |
+
Args:
|
| 1793 |
+
generations_per_cycle: Generations per cycle before saving/callback
|
| 1794 |
+
total_cycles: None = infinite (24/7 mode)
|
| 1795 |
+
pop_size: Population size
|
| 1796 |
+
target_features: Target number of features per individual
|
| 1797 |
+
n_splits: Walk-forward backtest splits
|
| 1798 |
+
cool_down: Seconds between cycles
|
| 1799 |
+
"""
|
| 1800 |
+
print("=" * 70)
|
| 1801 |
+
print(" NBA QUANT AI — REAL GENETIC EVOLUTION LOOP v3")
|
| 1802 |
+
print(f" Started: {datetime.now(timezone.utc).isoformat()}")
|
| 1803 |
+
print(f" Pop: {pop_size} | Target features: {target_features}")
|
| 1804 |
+
print(f" Gens/cycle: {generations_per_cycle} | Cycles: {'INFINITE' if total_cycles is None else total_cycles}")
|
| 1805 |
+
print("=" * 70)
|
| 1806 |
+
|
| 1807 |
+
# 1. Pull data
|
| 1808 |
+
print("\n[PHASE 1] Loading data...")
|
| 1809 |
+
pull_seasons()
|
| 1810 |
+
games = load_all_games()
|
| 1811 |
+
print(f" {len(games)} games loaded")
|
| 1812 |
+
if len(games) < 500:
|
| 1813 |
+
print(" ERROR: Not enough games!")
|
| 1814 |
+
return
|
| 1815 |
+
|
| 1816 |
+
# 2. Build features
|
| 1817 |
+
print("\n[PHASE 2] Building features...")
|
| 1818 |
+
X, y, feature_names = build_features(games)
|
| 1819 |
+
print(f" Feature matrix: {X.shape} ({len(feature_names)} features)")
|
| 1820 |
+
|
| 1821 |
+
# 3. Initialize engine
|
| 1822 |
+
print("\n[PHASE 3] Initializing engine...")
|
| 1823 |
+
engine = GeneticEvolutionEngine(
|
| 1824 |
+
pop_size=pop_size, elite_size=max(5, pop_size // 20), mutation_rate=0.15,
|
| 1825 |
+
crossover_rate=0.85, target_features=target_features, n_splits=n_splits,
|
| 1826 |
+
n_islands=5, migration_interval=10, migrants_per_island=5,
|
| 1827 |
+
)
|
| 1828 |
+
|
| 1829 |
+
# Try to restore previous state
|
| 1830 |
+
if not engine.restore_state():
|
| 1831 |
+
engine.initialize(X.shape[1])
|
| 1832 |
+
else:
|
| 1833 |
+
# Resize population if feature count changed (new features added)
|
| 1834 |
+
engine.resize_population_features(X.shape[1])
|
| 1835 |
+
|
| 1836 |
+
# ── Supabase Run Logger + Auto-Cut ──
|
| 1837 |
+
run_logger = None
|
| 1838 |
+
if _HAS_LOGGER:
|
| 1839 |
+
try:
|
| 1840 |
+
run_logger = RunLogger(local_dir=str(RESULTS_DIR / "run-logs"))
|
| 1841 |
+
print("[RUN-LOGGER] Supabase logging + auto-cut ACTIVE")
|
| 1842 |
+
except Exception as e:
|
| 1843 |
+
print(f"[RUN-LOGGER] Init failed: {e}")
|
| 1844 |
+
|
| 1845 |
+
# 4. CONTINUOUS EVOLUTION LOOP
|
| 1846 |
+
cycle = 0
|
| 1847 |
+
while True:
|
| 1848 |
+
cycle += 1
|
| 1849 |
+
if total_cycles is not None and cycle > total_cycles:
|
| 1850 |
+
break
|
| 1851 |
+
|
| 1852 |
+
cycle_start = time.time()
|
| 1853 |
+
print(f"\n{'='*60}")
|
| 1854 |
+
print(f" CYCLE {cycle} — Starting {generations_per_cycle} generations")
|
| 1855 |
+
print(f" Time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}")
|
| 1856 |
+
print(f"{'='*60}")
|
| 1857 |
+
|
| 1858 |
+
for gen in range(generations_per_cycle):
|
| 1859 |
+
try:
|
| 1860 |
+
gen_start = time.time()
|
| 1861 |
+
best = engine.evolve_one_generation(X, y)
|
| 1862 |
+
|
| 1863 |
+
# ── Log generation + auto-cut ──
|
| 1864 |
+
if run_logger and best:
|
| 1865 |
+
try:
|
| 1866 |
+
pop_div = float(np.std([ind.n_features for ind in engine.population]))
|
| 1867 |
+
avg_comp = float(np.mean([ind.fitness["composite"] for ind in engine.population]))
|
| 1868 |
+
run_logger.log_generation(
|
| 1869 |
+
cycle=cycle, generation=engine.generation,
|
| 1870 |
+
best={"brier": best.fitness["brier"], "roi": best.fitness["roi"],
|
| 1871 |
+
"sharpe": best.fitness["sharpe"], "composite": best.fitness["composite"],
|
| 1872 |
+
"n_features": best.n_features, "model_type": best.hyperparams["model_type"]},
|
| 1873 |
+
mutation_rate=engine.mutation_rate, avg_composite=avg_comp,
|
| 1874 |
+
pop_diversity=pop_div, duration_s=time.time() - gen_start)
|
| 1875 |
+
|
| 1876 |
+
# Auto-cut check
|
| 1877 |
+
cut_actions = run_logger.check_auto_cut(best.fitness, {
|
| 1878 |
+
"mutation_rate": engine.mutation_rate,
|
| 1879 |
+
"stagnation": engine.stagnation_counter,
|
| 1880 |
+
"pop_size": engine.pop_size,
|
| 1881 |
+
"pop_diversity": pop_div,
|
| 1882 |
+
})
|
| 1883 |
+
for action in cut_actions:
|
| 1884 |
+
atype = action["type"]
|
| 1885 |
+
params = action.get("params", {})
|
| 1886 |
+
if atype == "config" and "mutation_rate" in params:
|
| 1887 |
+
engine.mutation_rate = params["mutation_rate"]
|
| 1888 |
+
elif atype == "emergency_diversify":
|
| 1889 |
+
n_new = engine.pop_size // 3
|
| 1890 |
+
engine.population = sorted(engine.population, key=lambda x: x.fitness["composite"], reverse=True)[:engine.pop_size - n_new]
|
| 1891 |
+
for _ in range(n_new):
|
| 1892 |
+
engine.population.append(Individual(engine.n_features, engine.target_features))
|
| 1893 |
+
print(f" [AUTO-CUT] Diversified: {n_new} fresh individuals")
|
| 1894 |
+
elif atype == "full_reset":
|
| 1895 |
+
engine.population = sorted(engine.population, key=lambda x: x.fitness["composite"], reverse=True)[:engine.elite_size]
|
| 1896 |
+
while len(engine.population) < engine.pop_size:
|
| 1897 |
+
engine.population.append(Individual(engine.n_features, engine.target_features))
|
| 1898 |
+
engine.stagnation_counter = 0
|
| 1899 |
+
print(f" [AUTO-CUT] FULL RESET executed")
|
| 1900 |
+
except Exception as e:
|
| 1901 |
+
print(f" [RUN-LOGGER] Error: {e}")
|
| 1902 |
+
except Exception as e:
|
| 1903 |
+
print(f" [ERROR] Generation failed: {e}")
|
| 1904 |
+
traceback.print_exc()
|
| 1905 |
+
continue
|
| 1906 |
+
|
| 1907 |
+
# Save state (survives restarts)
|
| 1908 |
+
engine.save_state()
|
| 1909 |
+
|
| 1910 |
+
# Save results
|
| 1911 |
+
results = engine.save_cycle_results(feature_names)
|
| 1912 |
+
|
| 1913 |
+
cycle_elapsed = time.time() - cycle_start
|
| 1914 |
+
print(f"\n Cycle {cycle} complete in {cycle_elapsed:.0f}s")
|
| 1915 |
+
|
| 1916 |
+
if engine.best_ever:
|
| 1917 |
+
print(f" BEST EVER: Brier={engine.best_ever.fitness['brier']:.4f} "
|
| 1918 |
+
f"ROI={engine.best_ever.fitness['roi']:.1%} "
|
| 1919 |
+
f"Features={engine.best_ever.n_features}")
|
| 1920 |
+
|
| 1921 |
+
# ── Log cycle to Supabase ──
|
| 1922 |
+
if run_logger and results and engine.best_ever:
|
| 1923 |
+
try:
|
| 1924 |
+
pop_div = float(np.std([ind.n_features for ind in engine.population]))
|
| 1925 |
+
avg_comp = float(np.mean([ind.fitness["composite"] for ind in engine.population]))
|
| 1926 |
+
run_logger.log_cycle(
|
| 1927 |
+
cycle=cycle, generation=engine.generation,
|
| 1928 |
+
best=engine.best_ever.fitness | {"n_features": engine.best_ever.n_features,
|
| 1929 |
+
"model_type": engine.best_ever.hyperparams["model_type"]},
|
| 1930 |
+
pop_size=engine.pop_size, mutation_rate=engine.mutation_rate,
|
| 1931 |
+
crossover_rate=engine.crossover_rate, stagnation=engine.stagnation_counter,
|
| 1932 |
+
games=len(games), feature_candidates=X.shape[1],
|
| 1933 |
+
cycle_duration_s=cycle_elapsed, avg_composite=avg_comp, pop_diversity=pop_div,
|
| 1934 |
+
top5=results.get("top5"), selected_features=results.get("best", {}).get("selected_features"))
|
| 1935 |
+
print(f" [RUN-LOGGER] Cycle {cycle} logged to Supabase")
|
| 1936 |
+
except Exception as e:
|
| 1937 |
+
print(f" [RUN-LOGGER] Cycle log error: {e}")
|
| 1938 |
+
|
| 1939 |
+
# Callback to VM
|
| 1940 |
+
if results:
|
| 1941 |
+
callback_to_vm(results)
|
| 1942 |
+
|
| 1943 |
+
# Refresh data periodically (every 10 cycles)
|
| 1944 |
+
if cycle % 10 == 0:
|
| 1945 |
+
print("\n [REFRESH] Pulling latest game data...")
|
| 1946 |
+
try:
|
| 1947 |
+
pull_seasons()
|
| 1948 |
+
new_games = load_all_games()
|
| 1949 |
+
if len(new_games) > len(games):
|
| 1950 |
+
games = new_games
|
| 1951 |
+
X, y, feature_names = build_features(games)
|
| 1952 |
+
print(f" [REFRESH] Updated: {X.shape}")
|
| 1953 |
+
except Exception as e:
|
| 1954 |
+
print(f" [REFRESH] Failed: {e}")
|
| 1955 |
+
|
| 1956 |
+
if total_cycles is None:
|
| 1957 |
+
print(f"\n Cooling down {cool_down}s before next cycle...")
|
| 1958 |
+
time.sleep(cool_down)
|
| 1959 |
+
|
| 1960 |
+
print("\n" + "=" * 70)
|
| 1961 |
+
print(" EVOLUTION COMPLETE")
|
| 1962 |
+
if engine.best_ever:
|
| 1963 |
+
print(f" Final best: Brier={engine.best_ever.fitness['brier']:.4f} "
|
| 1964 |
+
f"ROI={engine.best_ever.fitness['roi']:.1%}")
|
| 1965 |
+
print("=" * 70)
|
| 1966 |
+
|
| 1967 |
+
|
| 1968 |
+
# ═══════════════════════════════════════════════════════════
|
| 1969 |
+
# CLI ENTRY POINT
|
| 1970 |
+
# ═══════════════════════════════════════════════════════════
|
| 1971 |
+
|
| 1972 |
+
if __name__ == "__main__":
|
| 1973 |
+
import argparse
|
| 1974 |
+
parser = argparse.ArgumentParser(description="NBA Quant Genetic Evolution Loop v3")
|
| 1975 |
+
parser.add_argument("--continuous", action="store_true", help="Run 24/7 (no cycle limit)")
|
| 1976 |
+
parser.add_argument("--generations", type=int, default=10, help="Generations per cycle (default: 10)")
|
| 1977 |
+
parser.add_argument("--cycles", type=int, default=None, help="Number of cycles (default: infinite)")
|
| 1978 |
+
parser.add_argument("--pop-size", type=int, default=500, help="Population size (default: 500)")
|
| 1979 |
+
parser.add_argument("--target-features", type=int, default=100, help="Target features (default: 100)")
|
| 1980 |
+
parser.add_argument("--splits", type=int, default=5, help="Walk-forward splits (default: 5)")
|
| 1981 |
+
parser.add_argument("--cooldown", type=int, default=30, help="Seconds between cycles (default: 30)")
|
| 1982 |
+
args = parser.parse_args()
|
| 1983 |
+
|
| 1984 |
+
cycles = None if args.continuous else (args.cycles or 1)
|
| 1985 |
+
|
| 1986 |
+
run_continuous(
|
| 1987 |
+
generations_per_cycle=args.generations,
|
| 1988 |
+
total_cycles=cycles,
|
| 1989 |
+
pop_size=args.pop_size,
|
| 1990 |
+
target_features=args.target_features,
|
| 1991 |
+
n_splits=args.splits,
|
| 1992 |
+
cool_down=args.cooldown,
|
| 1993 |
+
)
|