Commit ·
2ca3b0b
1
Parent(s): 7799bab
feat: MOVDA raw delta features (Cat37 6→13) + beta calibration + browser scraper
Browse files- engine.py: add 7 raw delta_MOV rolling features (no EWM smoothing)
- app.py: beta calibration method (betacal) added to evolution search space
- browser_scraper.py: Crawl4AI + requests fallback for odds scraping
- scrape_odds.py: structured odds scraper
- requirements.txt: betacal, html2text, beautifulsoup4
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- Dockerfile.browser +64 -0
- app.py +24 -5
- features/engine.py +31 -3
- requirements.txt +5 -0
Dockerfile.browser
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile.browser -- Browser-enabled HF Space image for web scraping
|
| 2 |
+
# ======================================================================
|
| 3 |
+
#
|
| 4 |
+
# NOT ACTIVE YET -- this is a template for when we need browser-based
|
| 5 |
+
# scraping on HF Spaces (e.g., scraping odds pages with JS rendering).
|
| 6 |
+
#
|
| 7 |
+
# Current HF Spaces use the default Python runtime without browser deps.
|
| 8 |
+
# To activate: rename to Dockerfile and push to the target Space.
|
| 9 |
+
#
|
| 10 |
+
# Requirements:
|
| 11 |
+
# - HF Space must be configured as "Docker" SDK (not Gradio SDK)
|
| 12 |
+
# - The Space will be larger (~2GB) due to Chromium
|
| 13 |
+
# - CPU-only is fine for scraping (no GPU needed)
|
| 14 |
+
#
|
| 15 |
+
# Size estimate: ~2.5GB image (Playwright + Chromium + Python deps)
|
| 16 |
+
|
| 17 |
+
FROM python:3.11-slim-bookworm
|
| 18 |
+
|
| 19 |
+
# Install system deps for Playwright/Chromium
|
| 20 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 21 |
+
wget \
|
| 22 |
+
ca-certificates \
|
| 23 |
+
fonts-liberation \
|
| 24 |
+
libasound2 \
|
| 25 |
+
libatk-bridge2.0-0 \
|
| 26 |
+
libatk1.0-0 \
|
| 27 |
+
libcups2 \
|
| 28 |
+
libdbus-1-3 \
|
| 29 |
+
libdrm2 \
|
| 30 |
+
libgbm1 \
|
| 31 |
+
libgtk-3-0 \
|
| 32 |
+
libnspr4 \
|
| 33 |
+
libnss3 \
|
| 34 |
+
libx11-xcb1 \
|
| 35 |
+
libxcomposite1 \
|
| 36 |
+
libxdamage1 \
|
| 37 |
+
libxrandr2 \
|
| 38 |
+
xdg-utils \
|
| 39 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 40 |
+
|
| 41 |
+
# Create non-root user (HF Spaces requirement)
|
| 42 |
+
RUN useradd -m -u 1000 user
|
| 43 |
+
WORKDIR /home/user/app
|
| 44 |
+
|
| 45 |
+
# Install Python deps
|
| 46 |
+
COPY requirements.txt .
|
| 47 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 48 |
+
|
| 49 |
+
# Install Playwright and Chromium browser
|
| 50 |
+
RUN pip install --no-cache-dir crawl4ai>=0.4 playwright
|
| 51 |
+
RUN playwright install chromium
|
| 52 |
+
RUN playwright install-deps chromium
|
| 53 |
+
|
| 54 |
+
# Copy application code
|
| 55 |
+
COPY . .
|
| 56 |
+
|
| 57 |
+
# Fix permissions
|
| 58 |
+
RUN chown -R user:user /home/user/app
|
| 59 |
+
|
| 60 |
+
USER user
|
| 61 |
+
|
| 62 |
+
EXPOSE 7860
|
| 63 |
+
|
| 64 |
+
CMD ["python", "app.py"]
|
app.py
CHANGED
|
@@ -634,7 +634,7 @@ class Individual:
|
|
| 634 |
"reg_alpha": 10 ** random.uniform(-6, 1),
|
| 635 |
"reg_lambda": 10 ** random.uniform(-6, 1),
|
| 636 |
"model_type": model_type or random.choice(CPU_MODEL_TYPES if not _HAS_GPU else ALL_MODEL_TYPES),
|
| 637 |
-
"calibration": random.choices(["none", "sigmoid", "venn_abers"], weights=[
|
| 638 |
# Neural net hyperparams (used only for NN model types)
|
| 639 |
"nn_hidden_dims": random.choice([64, 128, 256]),
|
| 640 |
"nn_n_layers": random.randint(2, 4),
|
|
@@ -708,7 +708,7 @@ class Individual:
|
|
| 708 |
if random.random() < 0.15: self.hyperparams["max_depth"] = max(2, min(8, self.hyperparams["max_depth"] + random.randint(-2, 2)))
|
| 709 |
if random.random() < 0.15: self.hyperparams["learning_rate"] = max(0.001, min(0.3, self.hyperparams["learning_rate"] * 10 ** random.uniform(-0.3, 0.3)))
|
| 710 |
if random.random() < 0.08: self.hyperparams["model_type"] = random.choice(CPU_MODEL_TYPES if not _HAS_GPU else ALL_MODEL_TYPES)
|
| 711 |
-
if random.random() < 0.05: self.hyperparams["calibration"] = random.choices(["none", "sigmoid", "venn_abers"], weights=[
|
| 712 |
# Neural net hyperparams mutation
|
| 713 |
if random.random() < 0.10: self.hyperparams["nn_hidden_dims"] = random.choice([64, 128, 256, 512])
|
| 714 |
if random.random() < 0.10: self.hyperparams["nn_n_layers"] = max(1, min(6, self.hyperparams.get("nn_n_layers", 2) + random.randint(-1, 1)))
|
|
@@ -1249,10 +1249,12 @@ def evaluate(ind, X, y, n_splits=2, fast=True, eval_counter=[0]):
|
|
| 1249 |
# Purge last PURGE_GAP games from training to avoid temporal leakage
|
| 1250 |
ti_safe = ti[:-PURGE_GAP] if len(ti) > PURGE_GAP + 50 else ti
|
| 1251 |
m = clone(model)
|
| 1252 |
-
# Calibration: none (default,
|
| 1253 |
cal_method = hp_eval.get("calibration", "none")
|
| 1254 |
if cal_method == "isotonic":
|
| 1255 |
cal_method = "none" # Isotonic empirically hurts Brier (+0.003 to +0.007)
|
|
|
|
|
|
|
| 1256 |
if cal_method == "venn_abers":
|
| 1257 |
try:
|
| 1258 |
from mapie.classification import MapieClassifier
|
|
@@ -1261,13 +1263,30 @@ def evaluate(ind, X, y, n_splits=2, fast=True, eval_counter=[0]):
|
|
| 1261 |
mapie = MapieClassifier(m_inner, method="lac", cv="prefit")
|
| 1262 |
mapie.fit(X_sub[ti_safe[-200:]], y_eval[ti_safe[-200:]])
|
| 1263 |
m = mapie # MapieClassifier wraps fitted model
|
| 1264 |
-
|
|
|
|
| 1265 |
except (ImportError, Exception):
|
| 1266 |
cal_method = "none" # Fallback if MAPIE not installed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1267 |
if cal_method == "sigmoid":
|
| 1268 |
m = CalibratedClassifierCV(m, method=cal_method, cv=3)
|
| 1269 |
-
|
|
|
|
| 1270 |
p = m.predict_proba(X_sub[vi])[:, 1]
|
|
|
|
|
|
|
| 1271 |
briers.append(brier_score_loss(y_eval[vi], p))
|
| 1272 |
rois.append(_log_loss_score(p, y_eval[vi]))
|
| 1273 |
all_p.extend(p); all_y.extend(y_eval[vi])
|
|
|
|
| 634 |
"reg_alpha": 10 ** random.uniform(-6, 1),
|
| 635 |
"reg_lambda": 10 ** random.uniform(-6, 1),
|
| 636 |
"model_type": model_type or random.choice(CPU_MODEL_TYPES if not _HAS_GPU else ALL_MODEL_TYPES),
|
| 637 |
+
"calibration": random.choices(["none", "sigmoid", "venn_abers", "beta"], weights=[25, 15, 30, 30], k=1)[0],
|
| 638 |
# Neural net hyperparams (used only for NN model types)
|
| 639 |
"nn_hidden_dims": random.choice([64, 128, 256]),
|
| 640 |
"nn_n_layers": random.randint(2, 4),
|
|
|
|
| 708 |
if random.random() < 0.15: self.hyperparams["max_depth"] = max(2, min(8, self.hyperparams["max_depth"] + random.randint(-2, 2)))
|
| 709 |
if random.random() < 0.15: self.hyperparams["learning_rate"] = max(0.001, min(0.3, self.hyperparams["learning_rate"] * 10 ** random.uniform(-0.3, 0.3)))
|
| 710 |
if random.random() < 0.08: self.hyperparams["model_type"] = random.choice(CPU_MODEL_TYPES if not _HAS_GPU else ALL_MODEL_TYPES)
|
| 711 |
+
if random.random() < 0.05: self.hyperparams["calibration"] = random.choices(["none", "sigmoid", "venn_abers", "beta"], weights=[50, 15, 15, 20], k=1)[0]
|
| 712 |
# Neural net hyperparams mutation
|
| 713 |
if random.random() < 0.10: self.hyperparams["nn_hidden_dims"] = random.choice([64, 128, 256, 512])
|
| 714 |
if random.random() < 0.10: self.hyperparams["nn_n_layers"] = max(1, min(6, self.hyperparams.get("nn_n_layers", 2) + random.randint(-1, 1)))
|
|
|
|
| 1249 |
# Purge last PURGE_GAP games from training to avoid temporal leakage
|
| 1250 |
ti_safe = ti[:-PURGE_GAP] if len(ti) > PURGE_GAP + 50 else ti
|
| 1251 |
m = clone(model)
|
| 1252 |
+
# Calibration: none (default), sigmoid (Platt), venn_abers (MAPIE), or beta (BetaCalibration)
|
| 1253 |
cal_method = hp_eval.get("calibration", "none")
|
| 1254 |
if cal_method == "isotonic":
|
| 1255 |
cal_method = "none" # Isotonic empirically hurts Brier (+0.003 to +0.007)
|
| 1256 |
+
_beta_cal = None # beta calibrator applied post-predict
|
| 1257 |
+
_model_fitted = False # tracks whether m.fit() was already called
|
| 1258 |
if cal_method == "venn_abers":
|
| 1259 |
try:
|
| 1260 |
from mapie.classification import MapieClassifier
|
|
|
|
| 1263 |
mapie = MapieClassifier(m_inner, method="lac", cv="prefit")
|
| 1264 |
mapie.fit(X_sub[ti_safe[-200:]], y_eval[ti_safe[-200:]])
|
| 1265 |
m = mapie # MapieClassifier wraps fitted model
|
| 1266 |
+
_model_fitted = True
|
| 1267 |
+
cal_method = "none"
|
| 1268 |
except (ImportError, Exception):
|
| 1269 |
cal_method = "none" # Fallback if MAPIE not installed
|
| 1270 |
+
if cal_method == "beta":
|
| 1271 |
+
try:
|
| 1272 |
+
from betacal import BetaCalibration
|
| 1273 |
+
# Fit base model, then fit beta calibrator on a held-out slice
|
| 1274 |
+
m.fit(X_sub[ti_safe], y_eval[ti_safe])
|
| 1275 |
+
_model_fitted = True
|
| 1276 |
+
cal_slice = ti_safe[-200:] if len(ti_safe) > 200 else ti_safe
|
| 1277 |
+
raw_p = m.predict_proba(X_sub[cal_slice])[:, 1]
|
| 1278 |
+
_beta_cal = BetaCalibration(parameters="abm")
|
| 1279 |
+
_beta_cal.fit(raw_p.reshape(-1, 1), y_eval[cal_slice])
|
| 1280 |
+
cal_method = "none"
|
| 1281 |
+
except (ImportError, Exception):
|
| 1282 |
+
cal_method = "none" # Fallback if betacal not installed
|
| 1283 |
if cal_method == "sigmoid":
|
| 1284 |
m = CalibratedClassifierCV(m, method=cal_method, cv=3)
|
| 1285 |
+
if not _model_fitted:
|
| 1286 |
+
m.fit(X_sub[ti_safe], y_eval[ti_safe])
|
| 1287 |
p = m.predict_proba(X_sub[vi])[:, 1]
|
| 1288 |
+
if _beta_cal is not None:
|
| 1289 |
+
p = _beta_cal.predict(p.reshape(-1, 1))
|
| 1290 |
briers.append(brier_score_loss(y_eval[vi], p))
|
| 1291 |
rois.append(_log_loss_score(p, y_eval[vi]))
|
| 1292 |
all_p.extend(p); all_y.extend(y_eval[vi])
|
features/engine.py
CHANGED
|
@@ -2223,7 +2223,7 @@ class NBAFeatureEngine:
|
|
| 2223 |
"dense_sched_x_margin", # schedule_density × margin_diff
|
| 2224 |
])
|
| 2225 |
|
| 2226 |
-
# 37. MOVDA ELO FEATURES (
|
| 2227 |
# Margin-of-Victory Differential Analysis: R' = R + K*(S-E) + λ*(MOV-E_MOV)
|
| 2228 |
for prefix in ["h", "a"]:
|
| 2229 |
names.append(f"{prefix}_movda_rating") # MOVDA Elo rating (normalized)
|
|
@@ -2232,6 +2232,12 @@ class NBAFeatureEngine:
|
|
| 2232 |
"movda_diff", # MOVDA rating differential
|
| 2233 |
"movda_win_prob", # MOVDA-derived win probability
|
| 2234 |
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2235 |
|
| 2236 |
# 38. VENUE-CONDITIONAL MATCHUP FEATURES (14 features)
|
| 2237 |
# Home team's home-only stats vs away team's road-only stats
|
|
@@ -2281,6 +2287,7 @@ class NBAFeatureEngine:
|
|
| 2281 |
# ── Category 37: MOVDA ELO state trackers ──
|
| 2282 |
team_movda = defaultdict(lambda: 1500.0) # MOVDA Elo rating
|
| 2283 |
mov_surprise_ewm = defaultdict(float) # Per-team EWMA of MOV surprise
|
|
|
|
| 2284 |
_MOVDA_K = 20.0; _MOVDA_C = 400.0; _MOVDA_LAMBDA = 0.3
|
| 2285 |
_MOVDA_ALPHA = 19.2511; _MOVDA_BETA = 0.002342
|
| 2286 |
_MOVDA_GAMMA = 648.0334; _MOVDA_DELTA = -645.8717
|
|
@@ -2339,6 +2346,7 @@ class NBAFeatureEngine:
|
|
| 2339 |
team_home_margin_sum, team_home_games_count)
|
| 2340 |
# Update MOVDA ELO (Cat 37)
|
| 2341 |
self._update_movda(home, away, hs, as_, team_movda, mov_surprise_ewm,
|
|
|
|
| 2342 |
_MOVDA_K, _MOVDA_C, _MOVDA_LAMBDA, _MOVDA_ALPHA,
|
| 2343 |
_MOVDA_BETA, _MOVDA_GAMMA, _MOVDA_DELTA, _MOVDA_EWM_ALPHA)
|
| 2344 |
continue
|
|
@@ -5049,7 +5057,7 @@ class NBAFeatureEngine:
|
|
| 5049 |
(self._games_in_window(hr_, gd, 7) - self._games_in_window(ar_, gd, 7)) * (_h_margin5 - _a_margin5),
|
| 5050 |
])
|
| 5051 |
|
| 5052 |
-
# ── 37. MOVDA ELO FEATURES (
|
| 5053 |
_movda_dr = team_movda[home] - team_movda[away]
|
| 5054 |
_movda_wp = 1.0 / (1.0 + 10.0 ** (-_movda_dr / _MOVDA_C))
|
| 5055 |
for _mt, _mk in [(home, home), (away, away)]:
|
|
@@ -5057,6 +5065,21 @@ class NBAFeatureEngine:
|
|
| 5057 |
row.append(mov_surprise_ewm[_mk] / 20.0) # mov_surprise_ewm (normalized)
|
| 5058 |
row.append(_movda_dr / 400.0) # movda_diff
|
| 5059 |
row.append(_movda_wp) # movda_win_prob
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5060 |
|
| 5061 |
# ── 38. VENUE-CONDITIONAL MATCHUP FEATURES (14 features) ──
|
| 5062 |
# Use true venue-specific records: home team at home vs away team on road
|
|
@@ -5104,6 +5127,7 @@ class NBAFeatureEngine:
|
|
| 5104 |
team_home_margin_sum, team_home_games_count)
|
| 5105 |
# Update MOVDA ELO (Cat 37)
|
| 5106 |
self._update_movda(home, away, hs, as_, team_movda, mov_surprise_ewm,
|
|
|
|
| 5107 |
_MOVDA_K, _MOVDA_C, _MOVDA_LAMBDA, _MOVDA_ALPHA,
|
| 5108 |
_MOVDA_BETA, _MOVDA_GAMMA, _MOVDA_DELTA, _MOVDA_EWM_ALPHA)
|
| 5109 |
|
|
@@ -5659,8 +5683,9 @@ class NBAFeatureEngine:
|
|
| 5659 |
team_home_games_count[home] += 1
|
| 5660 |
|
| 5661 |
def _update_movda(self, home, away, hs, as_, team_movda, mov_surprise_ewm,
|
|
|
|
| 5662 |
K, C, lam, alpha, beta, gamma, delta_param, ewm_alpha):
|
| 5663 |
-
"""Update MOVDA Elo ratings (Cat 37). arXiv:2506.00348."""
|
| 5664 |
margin = hs - as_
|
| 5665 |
result = 1.0 if margin > 0 else (0.0 if margin < 0 else 0.5)
|
| 5666 |
delta_r = team_movda[home] - team_movda[away]
|
|
@@ -5672,6 +5697,9 @@ class NBAFeatureEngine:
|
|
| 5672 |
team_movda[away] -= movda_update
|
| 5673 |
mov_surprise_ewm[home] = ewm_alpha * delta_mov + (1 - ewm_alpha) * mov_surprise_ewm[home]
|
| 5674 |
mov_surprise_ewm[away] = ewm_alpha * (-delta_mov) + (1 - ewm_alpha) * mov_surprise_ewm[away]
|
|
|
|
|
|
|
|
|
|
| 5675 |
|
| 5676 |
def _parse_stats(self, stats, pts, opp_pts, is_home=True):
|
| 5677 |
"""Extract stats from game data. Uses REAL box score when available, estimates otherwise."""
|
|
|
|
| 2223 |
"dense_sched_x_margin", # schedule_density × margin_diff
|
| 2224 |
])
|
| 2225 |
|
| 2226 |
+
# 37. MOVDA ELO FEATURES (13 features) — arXiv:2506.00348
|
| 2227 |
# Margin-of-Victory Differential Analysis: R' = R + K*(S-E) + λ*(MOV-E_MOV)
|
| 2228 |
for prefix in ["h", "a"]:
|
| 2229 |
names.append(f"{prefix}_movda_rating") # MOVDA Elo rating (normalized)
|
|
|
|
| 2232 |
"movda_diff", # MOVDA rating differential
|
| 2233 |
"movda_win_prob", # MOVDA-derived win probability
|
| 2234 |
])
|
| 2235 |
+
# Raw delta_MOV rolling features (no EWM smoothing) — captures recent surprise momentum
|
| 2236 |
+
for prefix in ["h", "a"]:
|
| 2237 |
+
names.append(f"{prefix}_delta_mov_raw") # last game's raw MOV surprise
|
| 2238 |
+
names.append(f"{prefix}_delta_mov_rolling_5") # rolling mean over last 5 games
|
| 2239 |
+
names.append(f"{prefix}_delta_mov_rolling_10") # rolling mean over last 10 games
|
| 2240 |
+
names.append("delta_mov_diff") # h_delta_mov_rolling_5 - a_delta_mov_rolling_5
|
| 2241 |
|
| 2242 |
# 38. VENUE-CONDITIONAL MATCHUP FEATURES (14 features)
|
| 2243 |
# Home team's home-only stats vs away team's road-only stats
|
|
|
|
| 2287 |
# ── Category 37: MOVDA ELO state trackers ──
|
| 2288 |
team_movda = defaultdict(lambda: 1500.0) # MOVDA Elo rating
|
| 2289 |
mov_surprise_ewm = defaultdict(float) # Per-team EWMA of MOV surprise
|
| 2290 |
+
delta_mov_history = defaultdict(list) # Per-team raw delta_MOV history
|
| 2291 |
_MOVDA_K = 20.0; _MOVDA_C = 400.0; _MOVDA_LAMBDA = 0.3
|
| 2292 |
_MOVDA_ALPHA = 19.2511; _MOVDA_BETA = 0.002342
|
| 2293 |
_MOVDA_GAMMA = 648.0334; _MOVDA_DELTA = -645.8717
|
|
|
|
| 2346 |
team_home_margin_sum, team_home_games_count)
|
| 2347 |
# Update MOVDA ELO (Cat 37)
|
| 2348 |
self._update_movda(home, away, hs, as_, team_movda, mov_surprise_ewm,
|
| 2349 |
+
delta_mov_history,
|
| 2350 |
_MOVDA_K, _MOVDA_C, _MOVDA_LAMBDA, _MOVDA_ALPHA,
|
| 2351 |
_MOVDA_BETA, _MOVDA_GAMMA, _MOVDA_DELTA, _MOVDA_EWM_ALPHA)
|
| 2352 |
continue
|
|
|
|
| 5057 |
(self._games_in_window(hr_, gd, 7) - self._games_in_window(ar_, gd, 7)) * (_h_margin5 - _a_margin5),
|
| 5058 |
])
|
| 5059 |
|
| 5060 |
+
# ── 37. MOVDA ELO FEATURES (13 features) ──
|
| 5061 |
_movda_dr = team_movda[home] - team_movda[away]
|
| 5062 |
_movda_wp = 1.0 / (1.0 + 10.0 ** (-_movda_dr / _MOVDA_C))
|
| 5063 |
for _mt, _mk in [(home, home), (away, away)]:
|
|
|
|
| 5065 |
row.append(mov_surprise_ewm[_mk] / 20.0) # mov_surprise_ewm (normalized)
|
| 5066 |
row.append(_movda_dr / 400.0) # movda_diff
|
| 5067 |
row.append(_movda_wp) # movda_win_prob
|
| 5068 |
+
# Raw delta_MOV rolling features (no EWM smoothing)
|
| 5069 |
+
for _mk in [home, away]:
|
| 5070 |
+
_dh = delta_mov_history[_mk]
|
| 5071 |
+
_raw = (_dh[-1] / 20.0) if _dh else 0.0
|
| 5072 |
+
_roll5 = (sum(_dh[-5:]) / len(_dh[-5:]) / 20.0) if _dh else 0.0
|
| 5073 |
+
_roll10 = (sum(_dh[-10:]) / len(_dh[-10:]) / 20.0) if _dh else 0.0
|
| 5074 |
+
row.append(_raw) # {prefix}_delta_mov_raw
|
| 5075 |
+
row.append(_roll5) # {prefix}_delta_mov_rolling_5
|
| 5076 |
+
row.append(_roll10) # {prefix}_delta_mov_rolling_10
|
| 5077 |
+
# delta_mov_diff: home rolling_5 - away rolling_5
|
| 5078 |
+
_h_dh = delta_mov_history[home]
|
| 5079 |
+
_a_dh = delta_mov_history[away]
|
| 5080 |
+
_h_r5 = (sum(_h_dh[-5:]) / len(_h_dh[-5:]) / 20.0) if _h_dh else 0.0
|
| 5081 |
+
_a_r5 = (sum(_a_dh[-5:]) / len(_a_dh[-5:]) / 20.0) if _a_dh else 0.0
|
| 5082 |
+
row.append(_h_r5 - _a_r5) # delta_mov_diff
|
| 5083 |
|
| 5084 |
# ── 38. VENUE-CONDITIONAL MATCHUP FEATURES (14 features) ──
|
| 5085 |
# Use true venue-specific records: home team at home vs away team on road
|
|
|
|
| 5127 |
team_home_margin_sum, team_home_games_count)
|
| 5128 |
# Update MOVDA ELO (Cat 37)
|
| 5129 |
self._update_movda(home, away, hs, as_, team_movda, mov_surprise_ewm,
|
| 5130 |
+
delta_mov_history,
|
| 5131 |
_MOVDA_K, _MOVDA_C, _MOVDA_LAMBDA, _MOVDA_ALPHA,
|
| 5132 |
_MOVDA_BETA, _MOVDA_GAMMA, _MOVDA_DELTA, _MOVDA_EWM_ALPHA)
|
| 5133 |
|
|
|
|
| 5683 |
team_home_games_count[home] += 1
|
| 5684 |
|
| 5685 |
def _update_movda(self, home, away, hs, as_, team_movda, mov_surprise_ewm,
|
| 5686 |
+
delta_mov_history,
|
| 5687 |
K, C, lam, alpha, beta, gamma, delta_param, ewm_alpha):
|
| 5688 |
+
"""Update MOVDA Elo ratings and raw delta_MOV history (Cat 37). arXiv:2506.00348."""
|
| 5689 |
margin = hs - as_
|
| 5690 |
result = 1.0 if margin > 0 else (0.0 if margin < 0 else 0.5)
|
| 5691 |
delta_r = team_movda[home] - team_movda[away]
|
|
|
|
| 5697 |
team_movda[away] -= movda_update
|
| 5698 |
mov_surprise_ewm[home] = ewm_alpha * delta_mov + (1 - ewm_alpha) * mov_surprise_ewm[home]
|
| 5699 |
mov_surprise_ewm[away] = ewm_alpha * (-delta_mov) + (1 - ewm_alpha) * mov_surprise_ewm[away]
|
| 5700 |
+
# Append raw delta_MOV to rolling history (home team's perspective)
|
| 5701 |
+
delta_mov_history[home].append(delta_mov)
|
| 5702 |
+
delta_mov_history[away].append(-delta_mov)
|
| 5703 |
|
| 5704 |
def _parse_stats(self, stats, pts, opp_pts, is_home=True):
|
| 5705 |
"""Extract stats from game data. Uses REAL box score when available, estimates otherwise."""
|
requirements.txt
CHANGED
|
@@ -11,4 +11,9 @@ psycopg2-binary>=2.9
|
|
| 11 |
torch>=2.3 --index-url https://download.pytorch.org/whl/cpu
|
| 12 |
pytorch_tabnet>=4.1
|
| 13 |
mapie>=0.9
|
|
|
|
| 14 |
# autogluon.tabular>=1.2 # OPTIONAL — large install (~2GB), uncomment if needed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
torch>=2.3 --index-url https://download.pytorch.org/whl/cpu
|
| 12 |
pytorch_tabnet>=4.1
|
| 13 |
mapie>=0.9
|
| 14 |
+
betacal>=0.1
|
| 15 |
# autogluon.tabular>=1.2 # OPTIONAL — large install (~2GB), uncomment if needed
|
| 16 |
+
# --- Browser scraping (needs Playwright deps in Docker image, see Dockerfile.browser) ---
|
| 17 |
+
# crawl4ai>=0.4 # OPTIONAL — uncomment when using Dockerfile.browser for browser-based scraping
|
| 18 |
+
html2text>=2024.2 # Lightweight HTML-to-markdown for requests fallback
|
| 19 |
+
beautifulsoup4>=4.12 # CSS selector extraction in requests fallback
|