| """ |
| PREDICTION ENGINE β The Brain |
| Pure math/statistics module. No Selenium. No browser. |
| Loads historical data, builds team profiles, computes predictions. |
| Used by both the 1X2 and Double Chance predictors + backtester. |
| """ |
| import os |
| import numpy as np |
| from collections import defaultdict |
| from math import factorial, exp |
|
|
|
|
| def poisson_pmf(k, lam): |
| """Poisson probability mass function without scipy dependency.""" |
| if lam <= 0: |
| return 1.0 if k == 0 else 0.0 |
| return (lam ** k) * exp(-lam) / factorial(k) |
|
|
|
|
| class MatchData: |
| """Parsed match result.""" |
| __slots__ = ('home', 'away', 'hs', 'as_') |
| def __init__(self, home, away, hs, as_): |
| self.home = home |
| self.away = away |
| self.hs = hs |
| self.as_ = as_ |
|
|
| @property |
| def result(self): |
| if self.hs > self.as_: return 'H' |
| if self.hs < self.as_: return 'A' |
| return 'D' |
|
|
| @property |
| def dc_outcome(self): |
| """Which Double Chance option wins: HoD, HoA, or DoA.""" |
| r = self.result |
| if r == 'H': return 'HoA' |
| if r == 'D': return 'HoD' |
| return 'DoA' |
|
|
| def dc_covers(self, option): |
| """Does this result cover a given DC option?""" |
| r = self.result |
| if option == 'HoD': return r in ('H', 'D') |
| if option == 'HoA': return r in ('H', 'A') |
| if option == 'DoA': return r in ('D', 'A') |
| return False |
|
|
|
|
| |
| |
| |
| TEAM_ALIASES = {'BRN': 'BRE'} |
|
|
| def normalize_team(name): |
| return TEAM_ALIASES.get(name, name) |
|
|
|
|
| def load_results_file(path): |
| """Load a results txt file β list[MatchData].""" |
| matches = [] |
| if not os.path.exists(path): |
| return matches |
| with open(path, 'r', encoding='utf-8') as f: |
| for line in f: |
| line = line.strip() |
| if not line: |
| continue |
| parts = line.split() |
| if len(parts) != 3: |
| continue |
| home = normalize_team(parts[0]) |
| away = normalize_team(parts[2]) |
| score = parts[1].split(':') |
| if len(score) != 2: |
| continue |
| try: |
| matches.append(MatchData(home, away, int(score[0]), int(score[1]))) |
| except ValueError: |
| continue |
| return matches |
|
|
|
|
| class PredictionEngine: |
| """ |
| The statistical brain. |
| |
| Loads all historical data and precomputes: |
| - Team win/draw/loss rates (home & away separately) |
| - Head-to-head records |
| - Goal scoring/conceding averages (for Poisson) |
| - Recent form (Markov transition matrices) |
| - League-wide base rates |
| """ |
|
|
| def __init__(self, data_dir='.'): |
| self.data_dir = data_dir |
| self.matches = [] |
| self._load_all_data() |
| self._build_indices() |
|
|
| |
| def _load_all_data(self): |
| """Load both result files.""" |
| for fname in ['2024-england_virtual_results.txt', 'england_virtual_results.txt']: |
| path = os.path.join(self.data_dir, fname) |
| self.matches.extend(load_results_file(path)) |
| print(f"[Engine] Loaded {len(self.matches)} total matches") |
|
|
| def _build_indices(self): |
| """Build lookup structures from match data.""" |
| self.teams = set() |
| |
| self.home_matches = defaultdict(list) |
| self.away_matches = defaultdict(list) |
| self.h2h = defaultdict(list) |
| self.all_by_team = defaultdict(list) |
|
|
| for m in self.matches: |
| self.teams.add(m.home) |
| self.teams.add(m.away) |
| self.home_matches[m.home].append(m) |
| self.away_matches[m.away].append(m) |
| self.h2h[(m.home, m.away)].append(m) |
| self.all_by_team[m.home].append(m) |
| self.all_by_team[m.away].append(m) |
|
|
| |
| if self.matches: |
| total_goals = sum(m.hs + m.as_ for m in self.matches) |
| self.league_avg_goals = total_goals / len(self.matches) |
| self.league_avg_per_side = self.league_avg_goals / 2 |
| results = [m.result for m in self.matches] |
| self.league_home_rate = results.count('H') / len(results) |
| self.league_draw_rate = results.count('D') / len(results) |
| self.league_away_rate = results.count('A') / len(results) |
| else: |
| self.league_avg_goals = 2.5 |
| self.league_avg_per_side = 1.25 |
| self.league_home_rate = 0.40 |
| self.league_draw_rate = 0.25 |
| self.league_away_rate = 0.35 |
|
|
| print(f"[Engine] {len(self.teams)} teams indexed") |
| print(f"[Engine] League avg goals/match: {self.league_avg_goals:.2f}") |
| print(f"[Engine] Base rates: H={self.league_home_rate:.1%} D={self.league_draw_rate:.1%} A={self.league_away_rate:.1%}") |
|
|
| |
| def team_home_record(self, team): |
| """Returns (win_rate, draw_rate, loss_rate, avg_scored, avg_conceded) at home.""" |
| ms = self.home_matches.get(team, []) |
| if not ms: |
| return self.league_home_rate, self.league_draw_rate, self.league_away_rate, self.league_avg_per_side, self.league_avg_per_side |
| w = sum(1 for m in ms if m.result == 'H') |
| d = sum(1 for m in ms if m.result == 'D') |
| l = len(ms) - w - d |
| gs = sum(m.hs for m in ms) |
| gc = sum(m.as_ for m in ms) |
| n = len(ms) |
| return w/n, d/n, l/n, gs/n, gc/n |
|
|
| def team_away_record(self, team): |
| """Returns (win_rate, draw_rate, loss_rate, avg_scored, avg_conceded) away.""" |
| ms = self.away_matches.get(team, []) |
| if not ms: |
| return self.league_away_rate, self.league_draw_rate, self.league_home_rate, self.league_avg_per_side, self.league_avg_per_side |
| w = sum(1 for m in ms if m.result == 'A') |
| d = sum(1 for m in ms if m.result == 'D') |
| l = len(ms) - w - d |
| gs = sum(m.as_ for m in ms) |
| gc = sum(m.hs for m in ms) |
| n = len(ms) |
| return w/n, d/n, l/n, gs/n, gc/n |
|
|
| def head_to_head(self, home, away): |
| """H2H record for this exact matchup (home=home, away=away).""" |
| ms = self.h2h.get((home, away), []) |
| if not ms: |
| return None |
| n = len(ms) |
| hw = sum(1 for m in ms if m.result == 'H') |
| dr = sum(1 for m in ms if m.result == 'D') |
| aw = n - hw - dr |
| return {'matches': n, 'home_win': hw/n, 'draw': dr/n, 'away_win': aw/n} |
|
|
| def recent_form(self, team, last_n=15): |
| """Last N results for a team (W/D/L sequence).""" |
| ms = self.all_by_team.get(team, [])[-last_n:] |
| form = [] |
| for m in ms: |
| if m.home == team: |
| form.append(m.result.replace('H','W').replace('A','L')) |
| else: |
| r = m.result |
| form.append('W' if r == 'A' else ('L' if r == 'H' else 'D')) |
| return form |
|
|
| def markov_transition(self, team, last_n=20): |
| """Build Markov transition matrix from recent form.""" |
| form = self.recent_form(team, last_n) |
| if len(form) < 3: |
| return {'W': {'W': 0.4, 'D': 0.3, 'L': 0.3}, |
| 'D': {'W': 0.35, 'D': 0.3, 'L': 0.35}, |
| 'L': {'W': 0.3, 'D': 0.3, 'L': 0.4}} |
| trans = defaultdict(lambda: defaultdict(int)) |
| for i in range(len(form) - 1): |
| trans[form[i]][form[i+1]] += 1 |
| |
| result = {} |
| for state in ['W', 'D', 'L']: |
| total = sum(trans[state].values()) |
| if total == 0: |
| result[state] = {'W': 1/3, 'D': 1/3, 'L': 1/3} |
| else: |
| result[state] = {s: trans[state][s]/total for s in ['W', 'D', 'L']} |
| return result |
|
|
| |
| def poisson_predict(self, home, away): |
| """ |
| Predict match outcome probabilities using Poisson distribution. |
| Returns dict with P(H), P(D), P(A). |
| """ |
| h_wr, h_dr, h_lr, h_gs, h_gc = self.team_home_record(home) |
| a_wr, a_dr, a_lr, a_gs, a_gc = self.team_away_record(away) |
|
|
| |
| home_attack = h_gs / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0 |
| home_defense = h_gc / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0 |
| away_attack = a_gs / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0 |
| away_defense = a_gc / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0 |
|
|
| |
| exp_home = home_attack * away_defense * self.league_avg_per_side |
| exp_away = away_attack * home_defense * self.league_avg_per_side |
|
|
| |
| exp_home = max(0.3, min(exp_home, 4.0)) |
| exp_away = max(0.3, min(exp_away, 4.0)) |
|
|
| p_h = 0.0; p_d = 0.0; p_a = 0.0 |
| for i in range(8): |
| pi = poisson_pmf(i, exp_home) |
| for j in range(8): |
| pj = poisson_pmf(j, exp_away) |
| p = pi * pj |
| if i > j: p_h += p |
| elif i == j: p_d += p |
| else: p_a += p |
|
|
| total = p_h + p_d + p_a |
| if total > 0: |
| p_h /= total; p_d /= total; p_a /= total |
| return {'H': p_h, 'D': p_d, 'A': p_a, 'exp_home': exp_home, 'exp_away': exp_away} |
|
|
| |
| @staticmethod |
| def odds_to_probs(home_odds, draw_odds, away_odds): |
| """Convert 1X2 odds to true probabilities (remove bookmaker margin).""" |
| raw_h = 1.0 / home_odds |
| raw_d = 1.0 / draw_odds |
| raw_a = 1.0 / away_odds |
| overround = raw_h + raw_d + raw_a |
| return raw_h / overround, raw_d / overround, raw_a / overround |
|
|
| @staticmethod |
| def dc_odds_to_probs(hod, hoa, doa): |
| """Convert Double Chance odds to implied probabilities.""" |
| raw_hod = 1.0 / hod |
| raw_hoa = 1.0 / hoa |
| raw_doa = 1.0 / doa |
| overround = raw_hod + raw_hoa + raw_doa |
| return raw_hod / overround, raw_hoa / overround, raw_doa / overround |
|
|
| |
| def predict_1x2(self, home, away, h_odds=None, d_odds=None, a_odds=None): |
| """ |
| Full Bayesian-style 1X2 prediction. |
| Fuses: odds-implied probs, Poisson model, historical rates, |
| h2h record, and Markov form. |
| Returns: dict with final probabilities and confidence. |
| """ |
| home = normalize_team(home) |
| away = normalize_team(away) |
|
|
| |
| if h_odds and d_odds and a_odds: |
| p_h_odds, p_d_odds, p_a_odds = self.odds_to_probs(h_odds, d_odds, a_odds) |
| w_odds = 0.25 |
| else: |
| p_h_odds = self.league_home_rate |
| p_d_odds = self.league_draw_rate |
| p_a_odds = self.league_away_rate |
| w_odds = 0.0 |
|
|
| |
| poisson = self.poisson_predict(home, away) |
| w_poisson = 0.25 |
|
|
| |
| h_wr, h_dr, h_lr, _, _ = self.team_home_record(home) |
| a_wr, a_dr, a_lr, _, _ = self.team_away_record(away) |
| |
| p_h_hist = (h_wr + (1 - a_wr)) / 2 |
| p_a_hist = (a_wr + (1 - h_wr)) / 2 |
| p_d_hist = 1.0 - p_h_hist - p_a_hist |
| p_d_hist = max(0.05, p_d_hist) |
| total = p_h_hist + p_d_hist + p_a_hist |
| p_h_hist /= total; p_d_hist /= total; p_a_hist /= total |
| w_hist = 0.25 |
|
|
| |
| h2h = self.head_to_head(home, away) |
| if h2h and h2h['matches'] >= 3: |
| p_h_h2h = h2h['home_win'] |
| p_d_h2h = h2h['draw'] |
| p_a_h2h = h2h['away_win'] |
| w_h2h = 0.15 |
| else: |
| p_h_h2h = p_h_hist; p_d_h2h = p_d_hist; p_a_h2h = p_a_hist |
| w_h2h = 0.05 |
|
|
| |
| h_form = self.recent_form(home, 10) |
| a_form = self.recent_form(away, 10) |
| h_trans = self.markov_transition(home, 15) |
| a_trans = self.markov_transition(away, 15) |
|
|
| if h_form: |
| h_state = h_form[-1] |
| h_next_w = h_trans[h_state]['W'] |
| else: |
| h_next_w = 0.33 |
|
|
| if a_form: |
| a_state = a_form[-1] |
| a_next_w = a_trans[a_state]['W'] |
| else: |
| a_next_w = 0.33 |
|
|
| p_h_form = h_next_w * 0.6 + (1 - a_next_w) * 0.4 |
| p_a_form = a_next_w * 0.6 + (1 - h_next_w) * 0.4 |
| p_d_form = 1.0 - p_h_form - p_a_form |
| p_d_form = max(0.05, p_d_form) |
| total = p_h_form + p_d_form + p_a_form |
| p_h_form /= total; p_d_form /= total; p_a_form /= total |
| w_form = 0.10 |
|
|
| |
| w_total = w_odds + w_poisson + w_hist + w_h2h + w_form |
| w_odds /= w_total; w_poisson /= w_total; w_hist /= w_total |
| w_h2h /= w_total; w_form /= w_total |
|
|
| |
| p_h = (w_odds * p_h_odds + w_poisson * poisson['H'] + |
| w_hist * p_h_hist + w_h2h * p_h_h2h + w_form * p_h_form) |
| p_d = (w_odds * p_d_odds + w_poisson * poisson['D'] + |
| w_hist * p_d_hist + w_h2h * p_d_h2h + w_form * p_d_form) |
| p_a = (w_odds * p_a_odds + w_poisson * poisson['A'] + |
| w_hist * p_a_hist + w_h2h * p_a_h2h + w_form * p_a_form) |
|
|
| total = p_h + p_d + p_a |
| p_h /= total; p_d /= total; p_a /= total |
|
|
| best = max(('H', p_h), ('D', p_d), ('A', p_a), key=lambda x: x[1]) |
| confidence = best[1] |
|
|
| if confidence >= 0.55: |
| level = "HIGH" |
| elif confidence >= 0.42: |
| level = "MEDIUM" |
| else: |
| level = "LOW" |
|
|
| return { |
| 'prediction': best[0], |
| 'confidence': confidence, |
| 'level': level, |
| 'probs': {'H': p_h, 'D': p_d, 'A': p_a}, |
| 'poisson': poisson, |
| 'skip': confidence < 0.38 |
| } |
|
|
| |
| def predict_dc(self, home, away, hod_odds=None, hoa_odds=None, doa_odds=None, |
| h_odds=None, d_odds=None, a_odds=None): |
| """ |
| Full Double Chance prediction using EDGE-BASED approach. |
| |
| Key insight: raw DC probabilities always favor HoA (since P(H)+P(A) dominates |
| when home advantage is strong). Instead, we compute each option's EDGE |
| relative to the league baseline for that option, identifying matchup-specific |
| deviations. |
| """ |
| home = normalize_team(home) |
| away = normalize_team(away) |
|
|
| |
| r1x2 = self.predict_1x2(home, away, h_odds, d_odds, a_odds) |
| p_h = r1x2['probs']['H'] |
| p_d = r1x2['probs']['D'] |
| p_a = r1x2['probs']['A'] |
|
|
| |
| p_hod = p_h + p_d |
| p_hoa = p_h + p_a |
| p_doa = p_d + p_a |
|
|
| |
| base_hod = self.league_home_rate + self.league_draw_rate |
| base_hoa = self.league_home_rate + self.league_away_rate |
| base_doa = self.league_draw_rate + self.league_away_rate |
|
|
| |
| edge_hod = p_hod - base_hod |
| edge_hoa = p_hoa - base_hoa |
| edge_doa = p_doa - base_doa |
|
|
| |
| h2h_matches = self.h2h.get((home, away), []) |
| if len(h2h_matches) >= 3: |
| n = len(h2h_matches) |
| hod_freq = sum(1 for m in h2h_matches if m.dc_covers('HoD')) / n |
| hoa_freq = sum(1 for m in h2h_matches if m.dc_covers('HoA')) / n |
| doa_freq = sum(1 for m in h2h_matches if m.dc_covers('DoA')) / n |
| |
| edge_hod = 0.70 * edge_hod + 0.30 * (hod_freq - base_hod) |
| edge_hoa = 0.70 * edge_hoa + 0.30 * (hoa_freq - base_hoa) |
| edge_doa = 0.70 * edge_doa + 0.30 * (doa_freq - base_doa) |
|
|
| |
| asym_hod = asym_hoa = asym_doa = 0.0 |
| if hod_odds and hoa_odds and doa_odds: |
| p_hod_odds, p_hoa_odds, p_doa_odds = self.dc_odds_to_probs(hod_odds, hoa_odds, doa_odds) |
| |
| asym_hod = p_hod_odds - p_hod |
| asym_hoa = p_hoa_odds - p_hoa |
| asym_doa = p_doa_odds - p_doa |
| |
| odds_edge_hod = p_hod_odds - base_hod |
| odds_edge_hoa = p_hoa_odds - base_hoa |
| odds_edge_doa = p_doa_odds - base_doa |
| edge_hod = 0.60 * edge_hod + 0.40 * odds_edge_hod |
| edge_hoa = 0.60 * edge_hoa + 0.40 * odds_edge_hoa |
| edge_doa = 0.60 * edge_doa + 0.40 * odds_edge_doa |
|
|
| |
| edges = {'HoD': edge_hod, 'HoA': edge_hoa, 'DoA': edge_doa} |
| best = max(edges, key=edges.get) |
|
|
| |
| probs = {'HoD': p_hod, 'HoA': p_hoa, 'DoA': p_doa} |
| confidence = probs[best] |
|
|
| |
| if confidence >= 0.72: |
| level = "HIGH" |
| elif confidence >= 0.65: |
| level = "MEDIUM" |
| else: |
| level = "LOW" |
|
|
| return { |
| 'prediction': best, |
| 'confidence': confidence, |
| 'level': level, |
| 'probs': probs, |
| 'edges': edges, |
| 'asymmetry': {'HoD': asym_hod, 'HoA': asym_hoa, 'DoA': asym_doa}, |
| 'underlying_1x2': r1x2['probs'], |
| 'skip': confidence < 0.60 |
| } |
|
|
|
|
| |
| if __name__ == '__main__': |
| engine = PredictionEngine() |
|
|
| print("\n" + "="*60) |
| print("SAMPLE 1X2 PREDICTIONS") |
| print("="*60) |
| for home, away in [('MCI', 'CHE'), ('ARS', 'TOT'), ('LIV', 'MUN'), ('NEW', 'BRE'), ('FUL', 'WOL')]: |
| r = engine.predict_1x2(home, away) |
| print(f"\n{home} vs {away}:") |
| print(f" H={r['probs']['H']:.1%} D={r['probs']['D']:.1%} A={r['probs']['A']:.1%}") |
| print(f" Prediction: {r['prediction']} ({r['level']}, {r['confidence']:.1%})") |
| print(f" Poisson xG: {r['poisson']['exp_home']:.2f} - {r['poisson']['exp_away']:.2f}") |
| if r['skip']: |
| print(f" β οΈ SKIP β low confidence") |
|
|
| print("\n" + "="*60) |
| print("SAMPLE DOUBLE CHANCE PREDICTIONS") |
| print("="*60) |
| for home, away in [('MCI', 'CHE'), ('ARS', 'TOT'), ('LIV', 'MUN')]: |
| r = engine.predict_dc(home, away) |
| print(f"\n{home} vs {away}:") |
| print(f" HoD={r['probs']['HoD']:.1%} HoA={r['probs']['HoA']:.1%} DoA={r['probs']['DoA']:.1%}") |
| print(f" Prediction: {r['prediction']} ({r['level']}, {r['confidence']:.1%})") |
|
|