Nomos42 commited on
Commit
246e2bc
·
verified ·
1 Parent(s): ba22691

feat: evolution island (exploitation)

Browse files
Files changed (2) hide show
  1. app.py +180 -5
  2. data/calibration-map.json +41 -18
app.py CHANGED
@@ -2569,9 +2569,8 @@ def evolution_loop():
2569
  (cycle, generation, best_brier, best_roi, best_sharpe, best_calibration,
2570
  best_composite, best_features, best_model_type, pop_size, mutation_rate,
2571
  crossover_rate, stagnation, games, feature_candidates, cycle_duration_s,
2572
- avg_composite, pop_diversity, top5, selected_features,
2573
- space_id, cycles_no_brier_improvement)
2574
- VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
2575
  (cycle, generation, float(best_ever.fitness["brier"]), float(best_ever.fitness["roi"]),
2576
  float(best_ever.fitness["sharpe"]), float(best_ever.fitness.get("calibration", 0)),
2577
  float(best_ever.fitness["composite"]), int(best_ever.n_features),
@@ -2579,8 +2578,7 @@ def evolution_loop():
2579
  float(CROSSOVER_RATE), stagnation, len(games), n_feat,
2580
  float(time.time() - cycle_start), avg_comp, pop_diversity,
2581
  json.dumps(results.get("top5", [])[:5], default=str),
2582
- json.dumps(sel_names[:50], default=str),
2583
- _SPACE_ID, _cycles_no_brier_improvement))
2584
  _cur.close()
2585
  _dbconn.close()
2586
  log("[SUPABASE] Cycle logged OK")
@@ -3287,6 +3285,183 @@ async def api_predict(request: Request):
3287
  return JSONResponse({"error": f"prediction failed: {str(e)[:200]}"}, status_code=500)
3288
 
3289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3290
  with gr.Blocks(title="NOMOS NBA QUANT — Genetic Evolution", theme=gr.themes.Monochrome()) as app:
3291
  gr.Markdown("# NOMOS NBA QUANT AI — Island Model Genetic Evolution 24/7")
3292
  gr.Markdown("*500 individuals across 5 islands (NSGA-II Pareto ranking). 13 model types including neural nets. Multi-objective: Brier + ROI + Sharpe + Calibration.*")
 
2569
  (cycle, generation, best_brier, best_roi, best_sharpe, best_calibration,
2570
  best_composite, best_features, best_model_type, pop_size, mutation_rate,
2571
  crossover_rate, stagnation, games, feature_candidates, cycle_duration_s,
2572
+ avg_composite, pop_diversity, top5, selected_features)
2573
+ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
 
2574
  (cycle, generation, float(best_ever.fitness["brier"]), float(best_ever.fitness["roi"]),
2575
  float(best_ever.fitness["sharpe"]), float(best_ever.fitness.get("calibration", 0)),
2576
  float(best_ever.fitness["composite"]), int(best_ever.n_features),
 
2578
  float(CROSSOVER_RATE), stagnation, len(games), n_feat,
2579
  float(time.time() - cycle_start), avg_comp, pop_diversity,
2580
  json.dumps(results.get("top5", [])[:5], default=str),
2581
+ json.dumps(sel_names[:50], default=str)))
 
2582
  _cur.close()
2583
  _dbconn.close()
2584
  log("[SUPABASE] Cycle logged OK")
 
3285
  return JSONResponse({"error": f"prediction failed: {str(e)[:200]}"}, status_code=500)
3286
 
3287
 
3288
+ @control_api.post("/api/fit-calibration")
3289
+ async def api_fit_calibration(request: Request):
3290
+ """Fit isotonic calibration from the best model's walk-forward holdout predictions.
3291
+
3292
+ Uses the best evolved individual to produce OOS predictions via TimeSeriesSplit,
3293
+ then fits isotonic regression on (raw_prob, outcome) pairs.
3294
+
3295
+ Returns JSON breakpoints compatible with IsotonicPostCalibrator.
3296
+
3297
+ Body (optional):
3298
+ {"n_splits": 5, "n_breakpoints": 20, "write_local": true}
3299
+ """
3300
+ global _evo_X, _evo_y, _evo_features, _evo_best
3301
+
3302
+ if _evo_best is None or _evo_X is None:
3303
+ return JSONResponse({"error": "evolution not ready"}, status_code=503)
3304
+
3305
+ try:
3306
+ body = {}
3307
+ try:
3308
+ body = await request.json()
3309
+ except Exception:
3310
+ pass
3311
+
3312
+ n_splits = body.get("n_splits", 5)
3313
+ n_breakpoints = body.get("n_breakpoints", 20)
3314
+ write_local = body.get("write_local", True)
3315
+
3316
+ best = _evo_best
3317
+ selected = [i for i, b in enumerate(best["features"]) if b]
3318
+ hp = dict(best["hyperparams"])
3319
+
3320
+ if len(selected) < 5:
3321
+ return JSONResponse({"error": "best individual has too few features"}, status_code=503)
3322
+
3323
+ X_sub = np.nan_to_num(_evo_X[:, selected], nan=0.0, posinf=1e6, neginf=-1e6)
3324
+ y = _evo_y
3325
+
3326
+ # Build the base model (no in-loop calibration — we want raw probs)
3327
+ hp_build = dict(hp)
3328
+ hp_build["calibration"] = "none"
3329
+ hp_build["n_estimators"] = min(hp.get("n_estimators", 150), 200)
3330
+ hp_build["max_depth"] = min(hp.get("max_depth", 6), 8)
3331
+ if hp_build.get("model_type") == "stacking":
3332
+ hp_build["model_type"] = "xgboost"
3333
+
3334
+ model = _build(hp_build)
3335
+ if model is None:
3336
+ return JSONResponse({"error": "model build failed"}, status_code=500)
3337
+
3338
+ # Walk-forward: collect OOS raw predictions
3339
+ tscv = TimeSeriesSplit(n_splits=n_splits)
3340
+ all_raw = []
3341
+ all_actual = []
3342
+ PURGE_GAP = 5
3343
+
3344
+ for ti, vi in tscv.split(X_sub):
3345
+ try:
3346
+ ti_safe = ti[:-PURGE_GAP] if len(ti) > PURGE_GAP + 50 else ti
3347
+ m = clone(model)
3348
+ m.fit(X_sub[ti_safe], y[ti_safe])
3349
+ p = m.predict_proba(X_sub[vi])[:, 1]
3350
+ p = np.clip(p, 0.025, 0.975)
3351
+ all_raw.extend(p.tolist())
3352
+ all_actual.extend(y[vi].tolist())
3353
+ except Exception:
3354
+ pass
3355
+
3356
+ if len(all_raw) < 50:
3357
+ return JSONResponse({
3358
+ "error": f"insufficient OOS predictions ({len(all_raw)}), need 50+"
3359
+ }, status_code=422)
3360
+
3361
+ raw_arr = np.array(all_raw)
3362
+ act_arr = np.array(all_actual)
3363
+
3364
+ # Brier before calibration
3365
+ brier_before = float(np.mean((raw_arr - act_arr) ** 2))
3366
+
3367
+ # Fit isotonic regression (sklearn is available on HF Space)
3368
+ from sklearn.isotonic import IsotonicRegression
3369
+ ir = IsotonicRegression(out_of_bounds="clip", y_min=0.0, y_max=1.0)
3370
+ ir.fit(raw_arr, act_arr)
3371
+
3372
+ # Produce compact breakpoints on a fine grid
3373
+ x_grid = np.linspace(0.0, 1.0, max(n_breakpoints, 10))
3374
+ y_grid = ir.predict(x_grid)
3375
+ y_grid = np.maximum.accumulate(y_grid) # ensure strict monotonicity
3376
+
3377
+ # Brier after calibration (in-sample, for reporting)
3378
+ cal_probs = ir.predict(raw_arr)
3379
+ brier_after = float(np.mean((cal_probs - act_arr) ** 2))
3380
+
3381
+ # ECE before/after
3382
+ def _ece_quick(probs, actuals, n_bins=10):
3383
+ bins_e = np.linspace(0, 1, n_bins + 1)
3384
+ ece_val = 0.0
3385
+ for i in range(n_bins):
3386
+ mask = (probs >= bins_e[i]) & (probs < bins_e[i + 1])
3387
+ if mask.sum() == 0:
3388
+ continue
3389
+ ece_val += mask.sum() / len(probs) * abs(probs[mask].mean() - actuals[mask].mean())
3390
+ return float(ece_val)
3391
+
3392
+ ece_before = _ece_quick(raw_arr, act_arr)
3393
+ ece_after = _ece_quick(cal_probs, act_arr)
3394
+
3395
+ now = datetime.now(timezone.utc).isoformat()
3396
+ result = {
3397
+ "x_points": [round(float(x), 4) for x in x_grid],
3398
+ "y_points": [round(float(y), 4) for y in y_grid],
3399
+ "metadata": {
3400
+ "identity": False,
3401
+ "n_samples": len(all_raw),
3402
+ "n_breakpoints": len(x_grid),
3403
+ "fitted_on": "HF Space",
3404
+ "fitted_at": now,
3405
+ "model_type": hp.get("model_type", "unknown"),
3406
+ "n_features": len(selected),
3407
+ "generation": best.get("generation", -1),
3408
+ "brier_before": round(brier_before, 5),
3409
+ "brier_after": round(brier_after, 5),
3410
+ "brier_delta": round(brier_after - brier_before, 5),
3411
+ "ece_before": round(ece_before, 5),
3412
+ "ece_after": round(ece_after, 5),
3413
+ },
3414
+ # Also output calibration-map.json format for scripts/calibration.py
3415
+ "calibration_map": {
3416
+ "_meta": {
3417
+ "version": "3.0",
3418
+ "created": now[:10],
3419
+ "generated_at": now,
3420
+ "source": "HF Space /api/fit-calibration (sklearn IsotonicRegression)",
3421
+ "model_version": f"{hp.get('model_type', 'unknown')}-gen{best.get('generation', '?')}",
3422
+ "n_games_used": len(all_raw),
3423
+ "brier_before": round(brier_before, 5),
3424
+ "brier_after": round(brier_after, 5),
3425
+ "ece_before": round(ece_before, 5),
3426
+ "ece_after": round(ece_after, 5),
3427
+ },
3428
+ "bin_edges": [0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
3429
+ "raw_centers": [0.1, 0.3, 0.5, 0.7, 0.9],
3430
+ "calibrated_centers": [
3431
+ round(float(ir.predict(np.array([c]))[0]), 4)
3432
+ for c in [0.1, 0.3, 0.5, 0.7, 0.9]
3433
+ ],
3434
+ "bin_counts": [
3435
+ int(((raw_arr >= lo) & (raw_arr < hi)).sum())
3436
+ for lo, hi in [(0.0, 0.2), (0.2, 0.4), (0.4, 0.6), (0.6, 0.8), (0.8, 1.0)]
3437
+ ],
3438
+ },
3439
+ }
3440
+
3441
+ # Optionally write to local persistent storage
3442
+ if write_local:
3443
+ bp_path = DATA_DIR / "isotonic_breakpoints.json"
3444
+ bp_path.write_text(json.dumps({
3445
+ "x_points": result["x_points"],
3446
+ "y_points": result["y_points"],
3447
+ "metadata": result["metadata"],
3448
+ }, indent=2))
3449
+ cm_path = DATA_DIR / "calibration-map.json"
3450
+ cm_path.write_text(json.dumps(result["calibration_map"], indent=2))
3451
+ # Update in-memory calibration map
3452
+ global _CAL_MAP
3453
+ _CAL_MAP = (
3454
+ result["calibration_map"]["bin_edges"],
3455
+ result["calibration_map"]["raw_centers"],
3456
+ result["calibration_map"]["calibrated_centers"],
3457
+ )
3458
+
3459
+ return JSONResponse(result)
3460
+
3461
+ except Exception as e:
3462
+ return JSONResponse({"error": f"fit-calibration failed: {str(e)[:300]}"}, status_code=500)
3463
+
3464
+
3465
  with gr.Blocks(title="NOMOS NBA QUANT — Genetic Evolution", theme=gr.themes.Monochrome()) as app:
3466
  gr.Markdown("# NOMOS NBA QUANT AI — Island Model Genetic Evolution 24/7")
3467
  gr.Markdown("*500 individuals across 5 islands (NSGA-II Pareto ranking). 13 model types including neural nets. Multi-objective: Brier + ROI + Sharpe + Calibration.*")
data/calibration-map.json CHANGED
@@ -1,22 +1,45 @@
1
  {
2
  "_meta": {
3
- "version": "1.0",
4
- "created": "2026-03-31",
5
- "source": "D5 Evaluation — ECE 0.2758 audit",
6
- "model_version": "ensemble-v1 / Brier 0.21570",
7
- "notes": "60-70% bin is severely over-confident: only 16.7% actual win rate vs 65% predicted. Corrected via D5 audit. Update this map periodically from HF space evaluation results.",
8
- "ece_before": 0.2758,
9
- "n_games_used": 31,
10
- "date_range": "2026-03-15 to 2026-03-31"
 
 
 
 
11
  },
12
- "bin_edges": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
13
- "bin_counts": [0, 0, 2, 4, 5, 6, 6, 4, 3, 1],
14
- "raw_centers": [0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95],
15
- "calibrated_centers": [0.05, 0.15, 0.25, 0.35, 0.45, 0.50, 0.35, 0.65, 0.80, 0.92],
16
- "bin_notes": {
17
- "bin_6": "60-70% bucket: raw 0.65 -> calibrated 0.35 (D5: only 16.7% actual win rate in this bucket)",
18
- "bin_7": "70-80% bucket: raw 0.75 -> calibrated 0.65 (moderate over-confidence detected)",
19
- "bin_8": "80-90% bucket: raw 0.85 -> calibrated 0.80 (slight over-confidence, minor correction)",
20
- "bin_9": "90-100% bucket: raw 0.95 -> calibrated 0.92 (small correction, low sample size)"
21
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  }
 
1
  {
2
  "_meta": {
3
+ "version": "2.0",
4
+ "created": "2026-04-10",
5
+ "generated_at": "2026-04-10T19:35:17.183416+00:00",
6
+ "source": "scripts/calibration_fit.py (Pool Adjacent Violators + empirical blend)",
7
+ "model_version": "ensemble v1 / real_predictions_loader",
8
+ "notes": "Rebuilt from real matched predictions (prospective, no look-ahead). Previous 31-game hand-tuned map over-corrected bin 6 0.65->0.35. This fit uses 70% PAV curve + 30% empirical bin rate to borrow strength from neighbouring bins in low-count regions.",
9
+ "n_games_used": 104,
10
+ "date_range": "2026-03-16 to 2026-04-05",
11
+ "brier_before": 0.24246,
12
+ "brier_after": 0.23057,
13
+ "ece_before": 0.123,
14
+ "ece_after": 0.06217
15
  },
16
+ "bin_edges": [
17
+ 0.0,
18
+ 0.2,
19
+ 0.4,
20
+ 0.6,
21
+ 0.8,
22
+ 1.0
23
+ ],
24
+ "bin_counts": [
25
+ 4,
26
+ 20,
27
+ 26,
28
+ 44,
29
+ 10
30
+ ],
31
+ "raw_centers": [
32
+ 0.1,
33
+ 0.3,
34
+ 0.5,
35
+ 0.7,
36
+ 0.9
37
+ ],
38
+ "calibrated_centers": [
39
+ 0.075,
40
+ 0.4417,
41
+ 0.5365,
42
+ 0.5556,
43
+ 0.97
44
+ ]
45
  }