""" PREDICTION ENGINE — The Brain Pure math/statistics module. No Selenium. No browser. Loads historical data, builds team profiles, computes predictions. Used by both the 1X2 and Double Chance predictors + backtester. """ import os import numpy as np from collections import defaultdict from math import factorial, exp def poisson_pmf(k, lam): """Poisson probability mass function without scipy dependency.""" if lam <= 0: return 1.0 if k == 0 else 0.0 return (lam ** k) * exp(-lam) / factorial(k) class MatchData: """Parsed match result.""" __slots__ = ('home', 'away', 'hs', 'as_') def __init__(self, home, away, hs, as_): self.home = home self.away = away self.hs = hs self.as_ = as_ @property def result(self): if self.hs > self.as_: return 'H' if self.hs < self.as_: return 'A' return 'D' @property def dc_outcome(self): """Which Double Chance option wins: HoD, HoA, or DoA.""" r = self.result if r == 'H': return 'HoA' # Home win covers HoD AND HoA if r == 'D': return 'HoD' # Draw covers HoD AND DoA return 'DoA' # Away win covers HoA AND DoA def dc_covers(self, option): """Does this result cover a given DC option?""" r = self.result if option == 'HoD': return r in ('H', 'D') if option == 'HoA': return r in ('H', 'A') if option == 'DoA': return r in ('D', 'A') return False # ───────────────────────────────────────────── # TEAM ALIASES — BRN (2024 data) = BRE (current) # ───────────────────────────────────────────── TEAM_ALIASES = {'BRN': 'BRE'} def normalize_team(name): return TEAM_ALIASES.get(name, name) def load_results_file(path): """Load a results txt file → list[MatchData].""" matches = [] if not os.path.exists(path): return matches with open(path, 'r', encoding='utf-8') as f: for line in f: line = line.strip() if not line: continue parts = line.split() if len(parts) != 3: continue home = normalize_team(parts[0]) away = normalize_team(parts[2]) score = parts[1].split(':') if len(score) != 2: continue try: matches.append(MatchData(home, away, int(score[0]), int(score[1]))) except ValueError: continue return matches class PredictionEngine: """ The statistical brain. Loads all historical data and precomputes: - Team win/draw/loss rates (home & away separately) - Head-to-head records - Goal scoring/conceding averages (for Poisson) - Recent form (Markov transition matrices) - League-wide base rates """ def __init__(self, data_dir='.'): self.data_dir = data_dir self.matches = [] self._load_all_data() self._build_indices() # ── DATA LOADING ─────────────────────────── def _load_all_data(self): """Load both result files.""" for fname in ['2024-england_virtual_results.txt', 'england_virtual_results.txt']: path = os.path.join(self.data_dir, fname) self.matches.extend(load_results_file(path)) print(f"[Engine] Loaded {len(self.matches)} total matches") def _build_indices(self): """Build lookup structures from match data.""" self.teams = set() # Per-team stats self.home_matches = defaultdict(list) # team → [MatchData played at home] self.away_matches = defaultdict(list) # team → [MatchData played away] self.h2h = defaultdict(list) # (home, away) → [MatchData] self.all_by_team = defaultdict(list) # team → [MatchData] in order for m in self.matches: self.teams.add(m.home) self.teams.add(m.away) self.home_matches[m.home].append(m) self.away_matches[m.away].append(m) self.h2h[(m.home, m.away)].append(m) self.all_by_team[m.home].append(m) self.all_by_team[m.away].append(m) # League-wide averages if self.matches: total_goals = sum(m.hs + m.as_ for m in self.matches) self.league_avg_goals = total_goals / len(self.matches) self.league_avg_per_side = self.league_avg_goals / 2 results = [m.result for m in self.matches] self.league_home_rate = results.count('H') / len(results) self.league_draw_rate = results.count('D') / len(results) self.league_away_rate = results.count('A') / len(results) else: self.league_avg_goals = 2.5 self.league_avg_per_side = 1.25 self.league_home_rate = 0.40 self.league_draw_rate = 0.25 self.league_away_rate = 0.35 print(f"[Engine] {len(self.teams)} teams indexed") print(f"[Engine] League avg goals/match: {self.league_avg_goals:.2f}") print(f"[Engine] Base rates: H={self.league_home_rate:.1%} D={self.league_draw_rate:.1%} A={self.league_away_rate:.1%}") # ── TEAM STATISTICS ──────────────────────── def team_home_record(self, team): """Returns (win_rate, draw_rate, loss_rate, avg_scored, avg_conceded) at home.""" ms = self.home_matches.get(team, []) if not ms: return self.league_home_rate, self.league_draw_rate, self.league_away_rate, self.league_avg_per_side, self.league_avg_per_side w = sum(1 for m in ms if m.result == 'H') d = sum(1 for m in ms if m.result == 'D') l = len(ms) - w - d gs = sum(m.hs for m in ms) gc = sum(m.as_ for m in ms) n = len(ms) return w/n, d/n, l/n, gs/n, gc/n def team_away_record(self, team): """Returns (win_rate, draw_rate, loss_rate, avg_scored, avg_conceded) away.""" ms = self.away_matches.get(team, []) if not ms: return self.league_away_rate, self.league_draw_rate, self.league_home_rate, self.league_avg_per_side, self.league_avg_per_side w = sum(1 for m in ms if m.result == 'A') d = sum(1 for m in ms if m.result == 'D') l = len(ms) - w - d gs = sum(m.as_ for m in ms) gc = sum(m.hs for m in ms) n = len(ms) return w/n, d/n, l/n, gs/n, gc/n def head_to_head(self, home, away): """H2H record for this exact matchup (home=home, away=away).""" ms = self.h2h.get((home, away), []) if not ms: return None n = len(ms) hw = sum(1 for m in ms if m.result == 'H') dr = sum(1 for m in ms if m.result == 'D') aw = n - hw - dr return {'matches': n, 'home_win': hw/n, 'draw': dr/n, 'away_win': aw/n} def recent_form(self, team, last_n=15): """Last N results for a team (W/D/L sequence).""" ms = self.all_by_team.get(team, [])[-last_n:] form = [] for m in ms: if m.home == team: form.append(m.result.replace('H','W').replace('A','L')) else: r = m.result form.append('W' if r == 'A' else ('L' if r == 'H' else 'D')) return form def markov_transition(self, team, last_n=20): """Build Markov transition matrix from recent form.""" form = self.recent_form(team, last_n) if len(form) < 3: return {'W': {'W': 0.4, 'D': 0.3, 'L': 0.3}, 'D': {'W': 0.35, 'D': 0.3, 'L': 0.35}, 'L': {'W': 0.3, 'D': 0.3, 'L': 0.4}} trans = defaultdict(lambda: defaultdict(int)) for i in range(len(form) - 1): trans[form[i]][form[i+1]] += 1 # Normalize result = {} for state in ['W', 'D', 'L']: total = sum(trans[state].values()) if total == 0: result[state] = {'W': 1/3, 'D': 1/3, 'L': 1/3} else: result[state] = {s: trans[state][s]/total for s in ['W', 'D', 'L']} return result # ── POISSON MODEL ────────────────────────── def poisson_predict(self, home, away): """ Predict match outcome probabilities using Poisson distribution. Returns dict with P(H), P(D), P(A). """ h_wr, h_dr, h_lr, h_gs, h_gc = self.team_home_record(home) a_wr, a_dr, a_lr, a_gs, a_gc = self.team_away_record(away) # Attack & defense strengths relative to league average home_attack = h_gs / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0 home_defense = h_gc / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0 away_attack = a_gs / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0 away_defense = a_gc / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0 # Expected goals exp_home = home_attack * away_defense * self.league_avg_per_side exp_away = away_attack * home_defense * self.league_avg_per_side # Clamp to reasonable range exp_home = max(0.3, min(exp_home, 4.0)) exp_away = max(0.3, min(exp_away, 4.0)) p_h = 0.0; p_d = 0.0; p_a = 0.0 for i in range(8): pi = poisson_pmf(i, exp_home) for j in range(8): pj = poisson_pmf(j, exp_away) p = pi * pj if i > j: p_h += p elif i == j: p_d += p else: p_a += p total = p_h + p_d + p_a if total > 0: p_h /= total; p_d /= total; p_a /= total return {'H': p_h, 'D': p_d, 'A': p_a, 'exp_home': exp_home, 'exp_away': exp_away} # ── ODDS CONVERSION ─────────────────────── @staticmethod def odds_to_probs(home_odds, draw_odds, away_odds): """Convert 1X2 odds to true probabilities (remove bookmaker margin).""" raw_h = 1.0 / home_odds raw_d = 1.0 / draw_odds raw_a = 1.0 / away_odds overround = raw_h + raw_d + raw_a return raw_h / overround, raw_d / overround, raw_a / overround @staticmethod def dc_odds_to_probs(hod, hoa, doa): """Convert Double Chance odds to implied probabilities.""" raw_hod = 1.0 / hod raw_hoa = 1.0 / hoa raw_doa = 1.0 / doa overround = raw_hod + raw_hoa + raw_doa return raw_hod / overround, raw_hoa / overround, raw_doa / overround # ── 1X2 PREDICTION ───────────────────────── def predict_1x2(self, home, away, h_odds=None, d_odds=None, a_odds=None): """ Full Bayesian-style 1X2 prediction. Fuses: odds-implied probs, Poisson model, historical rates, h2h record, and Markov form. Returns: dict with final probabilities and confidence. """ home = normalize_team(home) away = normalize_team(away) # 1) Odds-implied probabilities if h_odds and d_odds and a_odds: p_h_odds, p_d_odds, p_a_odds = self.odds_to_probs(h_odds, d_odds, a_odds) w_odds = 0.25 else: p_h_odds = self.league_home_rate p_d_odds = self.league_draw_rate p_a_odds = self.league_away_rate w_odds = 0.0 # 2) Poisson model poisson = self.poisson_predict(home, away) w_poisson = 0.25 # 3) Historical team rates h_wr, h_dr, h_lr, _, _ = self.team_home_record(home) a_wr, a_dr, a_lr, _, _ = self.team_away_record(away) # Combine: P(H) ~ home's home_win_rate, P(A) ~ away's away_win_rate p_h_hist = (h_wr + (1 - a_wr)) / 2 p_a_hist = (a_wr + (1 - h_wr)) / 2 p_d_hist = 1.0 - p_h_hist - p_a_hist p_d_hist = max(0.05, p_d_hist) total = p_h_hist + p_d_hist + p_a_hist p_h_hist /= total; p_d_hist /= total; p_a_hist /= total w_hist = 0.25 # 4) Head-to-head h2h = self.head_to_head(home, away) if h2h and h2h['matches'] >= 3: p_h_h2h = h2h['home_win'] p_d_h2h = h2h['draw'] p_a_h2h = h2h['away_win'] w_h2h = 0.15 else: p_h_h2h = p_h_hist; p_d_h2h = p_d_hist; p_a_h2h = p_a_hist w_h2h = 0.05 # 5) Markov form h_form = self.recent_form(home, 10) a_form = self.recent_form(away, 10) h_trans = self.markov_transition(home, 15) a_trans = self.markov_transition(away, 15) if h_form: h_state = h_form[-1] h_next_w = h_trans[h_state]['W'] else: h_next_w = 0.33 if a_form: a_state = a_form[-1] a_next_w = a_trans[a_state]['W'] else: a_next_w = 0.33 p_h_form = h_next_w * 0.6 + (1 - a_next_w) * 0.4 p_a_form = a_next_w * 0.6 + (1 - h_next_w) * 0.4 p_d_form = 1.0 - p_h_form - p_a_form p_d_form = max(0.05, p_d_form) total = p_h_form + p_d_form + p_a_form p_h_form /= total; p_d_form /= total; p_a_form /= total w_form = 0.10 # Normalize weights w_total = w_odds + w_poisson + w_hist + w_h2h + w_form w_odds /= w_total; w_poisson /= w_total; w_hist /= w_total w_h2h /= w_total; w_form /= w_total # Fuse p_h = (w_odds * p_h_odds + w_poisson * poisson['H'] + w_hist * p_h_hist + w_h2h * p_h_h2h + w_form * p_h_form) p_d = (w_odds * p_d_odds + w_poisson * poisson['D'] + w_hist * p_d_hist + w_h2h * p_d_h2h + w_form * p_d_form) p_a = (w_odds * p_a_odds + w_poisson * poisson['A'] + w_hist * p_a_hist + w_h2h * p_a_h2h + w_form * p_a_form) total = p_h + p_d + p_a p_h /= total; p_d /= total; p_a /= total best = max(('H', p_h), ('D', p_d), ('A', p_a), key=lambda x: x[1]) confidence = best[1] if confidence >= 0.55: level = "HIGH" elif confidence >= 0.42: level = "MEDIUM" else: level = "LOW" return { 'prediction': best[0], 'confidence': confidence, 'level': level, 'probs': {'H': p_h, 'D': p_d, 'A': p_a}, 'poisson': poisson, 'skip': confidence < 0.38 } # ── DOUBLE CHANCE PREDICTION ─────────────── def predict_dc(self, home, away, hod_odds=None, hoa_odds=None, doa_odds=None, h_odds=None, d_odds=None, a_odds=None): """ Full Double Chance prediction using EDGE-BASED approach. Key insight: raw DC probabilities always favor HoA (since P(H)+P(A) dominates when home advantage is strong). Instead, we compute each option's EDGE relative to the league baseline for that option, identifying matchup-specific deviations. """ home = normalize_team(home) away = normalize_team(away) # Get 1X2 probabilities from full model r1x2 = self.predict_1x2(home, away, h_odds, d_odds, a_odds) p_h = r1x2['probs']['H'] p_d = r1x2['probs']['D'] p_a = r1x2['probs']['A'] # This match's DC probabilities p_hod = p_h + p_d p_hoa = p_h + p_a p_doa = p_d + p_a # League-wide DC baselines base_hod = self.league_home_rate + self.league_draw_rate base_hoa = self.league_home_rate + self.league_away_rate base_doa = self.league_draw_rate + self.league_away_rate # EDGE = how much better than baseline this matchup is for each option edge_hod = p_hod - base_hod edge_hoa = p_hoa - base_hoa edge_doa = p_doa - base_doa # H2H DC frequency adjustment h2h_matches = self.h2h.get((home, away), []) if len(h2h_matches) >= 3: n = len(h2h_matches) hod_freq = sum(1 for m in h2h_matches if m.dc_covers('HoD')) / n hoa_freq = sum(1 for m in h2h_matches if m.dc_covers('HoA')) / n doa_freq = sum(1 for m in h2h_matches if m.dc_covers('DoA')) / n # H2H edge (deviation from base) edge_hod = 0.70 * edge_hod + 0.30 * (hod_freq - base_hod) edge_hoa = 0.70 * edge_hoa + 0.30 * (hoa_freq - base_hoa) edge_doa = 0.70 * edge_doa + 0.30 * (doa_freq - base_doa) # DC odds-implied edge (if available) asym_hod = asym_hoa = asym_doa = 0.0 if hod_odds and hoa_odds and doa_odds: p_hod_odds, p_hoa_odds, p_doa_odds = self.dc_odds_to_probs(hod_odds, hoa_odds, doa_odds) # Odds edge = implied prob minus model prob (positive = bookie thinks more likely) asym_hod = p_hod_odds - p_hod asym_hoa = p_hoa_odds - p_hoa asym_doa = p_doa_odds - p_doa # Blend odds signal into edge (40% weight to odds) odds_edge_hod = p_hod_odds - base_hod odds_edge_hoa = p_hoa_odds - base_hoa odds_edge_doa = p_doa_odds - base_doa edge_hod = 0.60 * edge_hod + 0.40 * odds_edge_hod edge_hoa = 0.60 * edge_hoa + 0.40 * odds_edge_hoa edge_doa = 0.60 * edge_doa + 0.40 * odds_edge_doa # Pick by highest edge edges = {'HoD': edge_hod, 'HoA': edge_hoa, 'DoA': edge_doa} best = max(edges, key=edges.get) # Confidence = the actual probability of the chosen option probs = {'HoD': p_hod, 'HoA': p_hoa, 'DoA': p_doa} confidence = probs[best] # Confidence thresholds for DC (base rates are ~66%, so higher bar) if confidence >= 0.72: level = "HIGH" elif confidence >= 0.65: level = "MEDIUM" else: level = "LOW" return { 'prediction': best, 'confidence': confidence, 'level': level, 'probs': probs, 'edges': edges, 'asymmetry': {'HoD': asym_hod, 'HoA': asym_hoa, 'DoA': asym_doa}, 'underlying_1x2': r1x2['probs'], 'skip': confidence < 0.60 } # ── QUICK TEST ───────────────────────────────── if __name__ == '__main__': engine = PredictionEngine() print("\n" + "="*60) print("SAMPLE 1X2 PREDICTIONS") print("="*60) for home, away in [('MCI', 'CHE'), ('ARS', 'TOT'), ('LIV', 'MUN'), ('NEW', 'BRE'), ('FUL', 'WOL')]: r = engine.predict_1x2(home, away) print(f"\n{home} vs {away}:") print(f" H={r['probs']['H']:.1%} D={r['probs']['D']:.1%} A={r['probs']['A']:.1%}") print(f" Prediction: {r['prediction']} ({r['level']}, {r['confidence']:.1%})") print(f" Poisson xG: {r['poisson']['exp_home']:.2f} - {r['poisson']['exp_away']:.2f}") if r['skip']: print(f" ⚠️ SKIP — low confidence") print("\n" + "="*60) print("SAMPLE DOUBLE CHANCE PREDICTIONS") print("="*60) for home, away in [('MCI', 'CHE'), ('ARS', 'TOT'), ('LIV', 'MUN')]: r = engine.predict_dc(home, away) print(f"\n{home} vs {away}:") print(f" HoD={r['probs']['HoD']:.1%} HoA={r['probs']['HoA']:.1%} DoA={r['probs']['DoA']:.1%}") print(f" Prediction: {r['prediction']} ({r['level']}, {r['confidence']:.1%})")