"""
PREDICTION ENGINE — The Brain
Pure math/statistics module. No Selenium. No browser.
Loads historical data, builds team profiles, computes predictions.
Used by both the 1X2 and Double Chance predictors + backtester.
"""
import os
import numpy as np
from collections import defaultdict
from math import factorial, exp


def poisson_pmf(k, lam):
    """Poisson probability mass function without scipy dependency."""
    if lam <= 0:
        return 1.0 if k == 0 else 0.0
    return (lam ** k) * exp(-lam) / factorial(k)


class MatchData:
    """Parsed match result."""
    __slots__ = ('home', 'away', 'hs', 'as_')
    def __init__(self, home, away, hs, as_):
        self.home = home
        self.away = away
        self.hs = hs
        self.as_ = as_

    @property
    def result(self):
        if self.hs > self.as_: return 'H'
        if self.hs < self.as_: return 'A'
        return 'D'

    @property
    def dc_outcome(self):
        """Which Double Chance option wins: HoD, HoA, or DoA."""
        r = self.result
        if r == 'H': return 'HoA'   # Home win covers HoD AND HoA
        if r == 'D': return 'HoD'   # Draw covers HoD AND DoA
        return 'DoA'                 # Away win covers HoA AND DoA

    def dc_covers(self, option):
        """Does this result cover a given DC option?"""
        r = self.result
        if option == 'HoD': return r in ('H', 'D')
        if option == 'HoA': return r in ('H', 'A')
        if option == 'DoA': return r in ('D', 'A')
        return False


# ─────────────────────────────────────────────
# TEAM ALIASES — BRN (2024 data) = BRE (current)
# ─────────────────────────────────────────────
TEAM_ALIASES = {'BRN': 'BRE'}

def normalize_team(name):
    return TEAM_ALIASES.get(name, name)


def load_results_file(path):
    """Load a results txt file → list[MatchData]."""
    matches = []
    if not os.path.exists(path):
        return matches
    with open(path, 'r', encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            parts = line.split()
            if len(parts) != 3:
                continue
            home = normalize_team(parts[0])
            away = normalize_team(parts[2])
            score = parts[1].split(':')
            if len(score) != 2:
                continue
            try:
                matches.append(MatchData(home, away, int(score[0]), int(score[1])))
            except ValueError:
                continue
    return matches


class PredictionEngine:
    """
    The statistical brain.
    
    Loads all historical data and precomputes:
    - Team win/draw/loss rates (home & away separately)
    - Head-to-head records
    - Goal scoring/conceding averages (for Poisson)
    - Recent form (Markov transition matrices)
    - League-wide base rates
    """

    def __init__(self, data_dir='.'):
        self.data_dir = data_dir
        self.matches = []
        self._load_all_data()
        self._build_indices()

    # ── DATA LOADING ───────────────────────────
    def _load_all_data(self):
        """Load both result files."""
        for fname in ['2024-england_virtual_results.txt', 'england_virtual_results.txt']:
            path = os.path.join(self.data_dir, fname)
            self.matches.extend(load_results_file(path))
        print(f"[Engine] Loaded {len(self.matches)} total matches")

    def _build_indices(self):
        """Build lookup structures from match data."""
        self.teams = set()
        # Per-team stats
        self.home_matches = defaultdict(list)   # team → [MatchData played at home]
        self.away_matches = defaultdict(list)   # team → [MatchData played away]
        self.h2h = defaultdict(list)            # (home, away) → [MatchData]
        self.all_by_team = defaultdict(list)    # team → [MatchData] in order

        for m in self.matches:
            self.teams.add(m.home)
            self.teams.add(m.away)
            self.home_matches[m.home].append(m)
            self.away_matches[m.away].append(m)
            self.h2h[(m.home, m.away)].append(m)
            self.all_by_team[m.home].append(m)
            self.all_by_team[m.away].append(m)

        # League-wide averages
        if self.matches:
            total_goals = sum(m.hs + m.as_ for m in self.matches)
            self.league_avg_goals = total_goals / len(self.matches)
            self.league_avg_per_side = self.league_avg_goals / 2
            results = [m.result for m in self.matches]
            self.league_home_rate = results.count('H') / len(results)
            self.league_draw_rate = results.count('D') / len(results)
            self.league_away_rate = results.count('A') / len(results)
        else:
            self.league_avg_goals = 2.5
            self.league_avg_per_side = 1.25
            self.league_home_rate = 0.40
            self.league_draw_rate = 0.25
            self.league_away_rate = 0.35

        print(f"[Engine] {len(self.teams)} teams indexed")
        print(f"[Engine] League avg goals/match: {self.league_avg_goals:.2f}")
        print(f"[Engine] Base rates: H={self.league_home_rate:.1%} D={self.league_draw_rate:.1%} A={self.league_away_rate:.1%}")

    # ── TEAM STATISTICS ────────────────────────
    def team_home_record(self, team):
        """Returns (win_rate, draw_rate, loss_rate, avg_scored, avg_conceded) at home."""
        ms = self.home_matches.get(team, [])
        if not ms:
            return self.league_home_rate, self.league_draw_rate, self.league_away_rate, self.league_avg_per_side, self.league_avg_per_side
        w = sum(1 for m in ms if m.result == 'H')
        d = sum(1 for m in ms if m.result == 'D')
        l = len(ms) - w - d
        gs = sum(m.hs for m in ms)
        gc = sum(m.as_ for m in ms)
        n = len(ms)
        return w/n, d/n, l/n, gs/n, gc/n

    def team_away_record(self, team):
        """Returns (win_rate, draw_rate, loss_rate, avg_scored, avg_conceded) away."""
        ms = self.away_matches.get(team, [])
        if not ms:
            return self.league_away_rate, self.league_draw_rate, self.league_home_rate, self.league_avg_per_side, self.league_avg_per_side
        w = sum(1 for m in ms if m.result == 'A')
        d = sum(1 for m in ms if m.result == 'D')
        l = len(ms) - w - d
        gs = sum(m.as_ for m in ms)
        gc = sum(m.hs for m in ms)
        n = len(ms)
        return w/n, d/n, l/n, gs/n, gc/n

    def head_to_head(self, home, away):
        """H2H record for this exact matchup (home=home, away=away)."""
        ms = self.h2h.get((home, away), [])
        if not ms:
            return None
        n = len(ms)
        hw = sum(1 for m in ms if m.result == 'H')
        dr = sum(1 for m in ms if m.result == 'D')
        aw = n - hw - dr
        return {'matches': n, 'home_win': hw/n, 'draw': dr/n, 'away_win': aw/n}

    def recent_form(self, team, last_n=15):
        """Last N results for a team (W/D/L sequence)."""
        ms = self.all_by_team.get(team, [])[-last_n:]
        form = []
        for m in ms:
            if m.home == team:
                form.append(m.result.replace('H','W').replace('A','L'))
            else:
                r = m.result
                form.append('W' if r == 'A' else ('L' if r == 'H' else 'D'))
        return form

    def markov_transition(self, team, last_n=20):
        """Build Markov transition matrix from recent form."""
        form = self.recent_form(team, last_n)
        if len(form) < 3:
            return {'W': {'W': 0.4, 'D': 0.3, 'L': 0.3},
                    'D': {'W': 0.35, 'D': 0.3, 'L': 0.35},
                    'L': {'W': 0.3, 'D': 0.3, 'L': 0.4}}
        trans = defaultdict(lambda: defaultdict(int))
        for i in range(len(form) - 1):
            trans[form[i]][form[i+1]] += 1
        # Normalize
        result = {}
        for state in ['W', 'D', 'L']:
            total = sum(trans[state].values())
            if total == 0:
                result[state] = {'W': 1/3, 'D': 1/3, 'L': 1/3}
            else:
                result[state] = {s: trans[state][s]/total for s in ['W', 'D', 'L']}
        return result

    # ── POISSON MODEL ──────────────────────────
    def poisson_predict(self, home, away):
        """
        Predict match outcome probabilities using Poisson distribution.
        Returns dict with P(H), P(D), P(A).
        """
        h_wr, h_dr, h_lr, h_gs, h_gc = self.team_home_record(home)
        a_wr, a_dr, a_lr, a_gs, a_gc = self.team_away_record(away)

        # Attack & defense strengths relative to league average
        home_attack = h_gs / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0
        home_defense = h_gc / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0
        away_attack = a_gs / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0
        away_defense = a_gc / self.league_avg_per_side if self.league_avg_per_side > 0 else 1.0

        # Expected goals
        exp_home = home_attack * away_defense * self.league_avg_per_side
        exp_away = away_attack * home_defense * self.league_avg_per_side

        # Clamp to reasonable range
        exp_home = max(0.3, min(exp_home, 4.0))
        exp_away = max(0.3, min(exp_away, 4.0))

        p_h = 0.0; p_d = 0.0; p_a = 0.0
        for i in range(8):
            pi = poisson_pmf(i, exp_home)
            for j in range(8):
                pj = poisson_pmf(j, exp_away)
                p = pi * pj
                if i > j: p_h += p
                elif i == j: p_d += p
                else: p_a += p

        total = p_h + p_d + p_a
        if total > 0:
            p_h /= total; p_d /= total; p_a /= total
        return {'H': p_h, 'D': p_d, 'A': p_a, 'exp_home': exp_home, 'exp_away': exp_away}

    # ── ODDS CONVERSION ───────────────────────
    @staticmethod
    def odds_to_probs(home_odds, draw_odds, away_odds):
        """Convert 1X2 odds to true probabilities (remove bookmaker margin)."""
        raw_h = 1.0 / home_odds
        raw_d = 1.0 / draw_odds
        raw_a = 1.0 / away_odds
        overround = raw_h + raw_d + raw_a
        return raw_h / overround, raw_d / overround, raw_a / overround

    @staticmethod
    def dc_odds_to_probs(hod, hoa, doa):
        """Convert Double Chance odds to implied probabilities."""
        raw_hod = 1.0 / hod
        raw_hoa = 1.0 / hoa
        raw_doa = 1.0 / doa
        overround = raw_hod + raw_hoa + raw_doa
        return raw_hod / overround, raw_hoa / overround, raw_doa / overround

    # ── 1X2 PREDICTION ─────────────────────────
    def predict_1x2(self, home, away, h_odds=None, d_odds=None, a_odds=None):
        """
        Full Bayesian-style 1X2 prediction.
        Fuses: odds-implied probs, Poisson model, historical rates, 
               h2h record, and Markov form.
        Returns: dict with final probabilities and confidence.
        """
        home = normalize_team(home)
        away = normalize_team(away)

        # 1) Odds-implied probabilities
        if h_odds and d_odds and a_odds:
            p_h_odds, p_d_odds, p_a_odds = self.odds_to_probs(h_odds, d_odds, a_odds)
            w_odds = 0.25
        else:
            p_h_odds = self.league_home_rate
            p_d_odds = self.league_draw_rate
            p_a_odds = self.league_away_rate
            w_odds = 0.0

        # 2) Poisson model
        poisson = self.poisson_predict(home, away)
        w_poisson = 0.25

        # 3) Historical team rates
        h_wr, h_dr, h_lr, _, _ = self.team_home_record(home)
        a_wr, a_dr, a_lr, _, _ = self.team_away_record(away)
        # Combine: P(H) ~ home's home_win_rate, P(A) ~ away's away_win_rate
        p_h_hist = (h_wr + (1 - a_wr)) / 2
        p_a_hist = (a_wr + (1 - h_wr)) / 2
        p_d_hist = 1.0 - p_h_hist - p_a_hist
        p_d_hist = max(0.05, p_d_hist)
        total = p_h_hist + p_d_hist + p_a_hist
        p_h_hist /= total; p_d_hist /= total; p_a_hist /= total
        w_hist = 0.25

        # 4) Head-to-head
        h2h = self.head_to_head(home, away)
        if h2h and h2h['matches'] >= 3:
            p_h_h2h = h2h['home_win']
            p_d_h2h = h2h['draw']
            p_a_h2h = h2h['away_win']
            w_h2h = 0.15
        else:
            p_h_h2h = p_h_hist; p_d_h2h = p_d_hist; p_a_h2h = p_a_hist
            w_h2h = 0.05

        # 5) Markov form
        h_form = self.recent_form(home, 10)
        a_form = self.recent_form(away, 10)
        h_trans = self.markov_transition(home, 15)
        a_trans = self.markov_transition(away, 15)

        if h_form:
            h_state = h_form[-1]
            h_next_w = h_trans[h_state]['W']
        else:
            h_next_w = 0.33

        if a_form:
            a_state = a_form[-1]
            a_next_w = a_trans[a_state]['W']
        else:
            a_next_w = 0.33

        p_h_form = h_next_w * 0.6 + (1 - a_next_w) * 0.4
        p_a_form = a_next_w * 0.6 + (1 - h_next_w) * 0.4
        p_d_form = 1.0 - p_h_form - p_a_form
        p_d_form = max(0.05, p_d_form)
        total = p_h_form + p_d_form + p_a_form
        p_h_form /= total; p_d_form /= total; p_a_form /= total
        w_form = 0.10

        # Normalize weights
        w_total = w_odds + w_poisson + w_hist + w_h2h + w_form
        w_odds /= w_total; w_poisson /= w_total; w_hist /= w_total
        w_h2h /= w_total; w_form /= w_total

        # Fuse
        p_h = (w_odds * p_h_odds + w_poisson * poisson['H'] +
               w_hist * p_h_hist + w_h2h * p_h_h2h + w_form * p_h_form)
        p_d = (w_odds * p_d_odds + w_poisson * poisson['D'] +
               w_hist * p_d_hist + w_h2h * p_d_h2h + w_form * p_d_form)
        p_a = (w_odds * p_a_odds + w_poisson * poisson['A'] +
               w_hist * p_a_hist + w_h2h * p_a_h2h + w_form * p_a_form)

        total = p_h + p_d + p_a
        p_h /= total; p_d /= total; p_a /= total

        best = max(('H', p_h), ('D', p_d), ('A', p_a), key=lambda x: x[1])
        confidence = best[1]

        if confidence >= 0.55:
            level = "HIGH"
        elif confidence >= 0.42:
            level = "MEDIUM"
        else:
            level = "LOW"

        return {
            'prediction': best[0],
            'confidence': confidence,
            'level': level,
            'probs': {'H': p_h, 'D': p_d, 'A': p_a},
            'poisson': poisson,
            'skip': confidence < 0.38
        }

    # ── DOUBLE CHANCE PREDICTION ───────────────
    def predict_dc(self, home, away, hod_odds=None, hoa_odds=None, doa_odds=None,
                   h_odds=None, d_odds=None, a_odds=None):
        """
        Full Double Chance prediction using EDGE-BASED approach.
        
        Key insight: raw DC probabilities always favor HoA (since P(H)+P(A) dominates
        when home advantage is strong). Instead, we compute each option's EDGE 
        relative to the league baseline for that option, identifying matchup-specific 
        deviations.
        """
        home = normalize_team(home)
        away = normalize_team(away)

        # Get 1X2 probabilities from full model
        r1x2 = self.predict_1x2(home, away, h_odds, d_odds, a_odds)
        p_h = r1x2['probs']['H']
        p_d = r1x2['probs']['D']
        p_a = r1x2['probs']['A']

        # This match's DC probabilities
        p_hod = p_h + p_d
        p_hoa = p_h + p_a
        p_doa = p_d + p_a

        # League-wide DC baselines
        base_hod = self.league_home_rate + self.league_draw_rate
        base_hoa = self.league_home_rate + self.league_away_rate
        base_doa = self.league_draw_rate + self.league_away_rate

        # EDGE = how much better than baseline this matchup is for each option
        edge_hod = p_hod - base_hod
        edge_hoa = p_hoa - base_hoa
        edge_doa = p_doa - base_doa

        # H2H DC frequency adjustment
        h2h_matches = self.h2h.get((home, away), [])
        if len(h2h_matches) >= 3:
            n = len(h2h_matches)
            hod_freq = sum(1 for m in h2h_matches if m.dc_covers('HoD')) / n
            hoa_freq = sum(1 for m in h2h_matches if m.dc_covers('HoA')) / n
            doa_freq = sum(1 for m in h2h_matches if m.dc_covers('DoA')) / n
            # H2H edge (deviation from base)
            edge_hod = 0.70 * edge_hod + 0.30 * (hod_freq - base_hod)
            edge_hoa = 0.70 * edge_hoa + 0.30 * (hoa_freq - base_hoa)
            edge_doa = 0.70 * edge_doa + 0.30 * (doa_freq - base_doa)

        # DC odds-implied edge (if available)
        asym_hod = asym_hoa = asym_doa = 0.0
        if hod_odds and hoa_odds and doa_odds:
            p_hod_odds, p_hoa_odds, p_doa_odds = self.dc_odds_to_probs(hod_odds, hoa_odds, doa_odds)
            # Odds edge = implied prob minus model prob (positive = bookie thinks more likely)
            asym_hod = p_hod_odds - p_hod
            asym_hoa = p_hoa_odds - p_hoa
            asym_doa = p_doa_odds - p_doa
            # Blend odds signal into edge (40% weight to odds)
            odds_edge_hod = p_hod_odds - base_hod
            odds_edge_hoa = p_hoa_odds - base_hoa
            odds_edge_doa = p_doa_odds - base_doa
            edge_hod = 0.60 * edge_hod + 0.40 * odds_edge_hod
            edge_hoa = 0.60 * edge_hoa + 0.40 * odds_edge_hoa
            edge_doa = 0.60 * edge_doa + 0.40 * odds_edge_doa

        # Pick by highest edge
        edges = {'HoD': edge_hod, 'HoA': edge_hoa, 'DoA': edge_doa}
        best = max(edges, key=edges.get)

        # Confidence = the actual probability of the chosen option
        probs = {'HoD': p_hod, 'HoA': p_hoa, 'DoA': p_doa}
        confidence = probs[best]

        # Confidence thresholds for DC (base rates are ~66%, so higher bar)
        if confidence >= 0.72:
            level = "HIGH"
        elif confidence >= 0.65:
            level = "MEDIUM"
        else:
            level = "LOW"

        return {
            'prediction': best,
            'confidence': confidence,
            'level': level,
            'probs': probs,
            'edges': edges,
            'asymmetry': {'HoD': asym_hod, 'HoA': asym_hoa, 'DoA': asym_doa},
            'underlying_1x2': r1x2['probs'],
            'skip': confidence < 0.60
        }


# ── QUICK TEST ─────────────────────────────────
if __name__ == '__main__':
    engine = PredictionEngine()

    print("\n" + "="*60)
    print("SAMPLE 1X2 PREDICTIONS")
    print("="*60)
    for home, away in [('MCI', 'CHE'), ('ARS', 'TOT'), ('LIV', 'MUN'), ('NEW', 'BRE'), ('FUL', 'WOL')]:
        r = engine.predict_1x2(home, away)
        print(f"\n{home} vs {away}:")
        print(f"  H={r['probs']['H']:.1%}  D={r['probs']['D']:.1%}  A={r['probs']['A']:.1%}")
        print(f"  Prediction: {r['prediction']} ({r['level']}, {r['confidence']:.1%})")
        print(f"  Poisson xG: {r['poisson']['exp_home']:.2f} - {r['poisson']['exp_away']:.2f}")
        if r['skip']:
            print(f"  ⚠️  SKIP — low confidence")

    print("\n" + "="*60)
    print("SAMPLE DOUBLE CHANCE PREDICTIONS")
    print("="*60)
    for home, away in [('MCI', 'CHE'), ('ARS', 'TOT'), ('LIV', 'MUN')]:
        r = engine.predict_dc(home, away)
        print(f"\n{home} vs {away}:")
        print(f"  HoD={r['probs']['HoD']:.1%}  HoA={r['probs']['HoA']:.1%}  DoA={r['probs']['DoA']:.1%}")
        print(f"  Prediction: {r['prediction']} ({r['level']}, {r['confidence']:.1%})")