#!/usr/bin/env python3 """ ═══════════════════════════════════════════════════════════════════════════════ DIXON-COLES POISSON MODEL — Streamlit App Modelo bivariado con corrección de baja puntuación para predicción de fútbol Fuente de datos: football-data.co.uk ═══════════════════════════════════════════════════════════════════════════════ Instalación: pip install streamlit pandas numpy scipy requests reportlab plotly Ejecutar: streamlit run dixon_coles_app.py """ import math import io import base64 from datetime import datetime from io import StringIO import numpy as np import pandas as pd import streamlit as st import plotly.graph_objects as go import plotly.express as px from reportlab.lib.pagesizes import A4 from reportlab.lib import colors from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle from reportlab.lib.units import mm, cm from reportlab.platypus import ( SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, HRFlowable ) from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT # ═══════════════════════════════════════════════════════════════════════════════ # CONFIGURACIÓN # ═══════════════════════════════════════════════════════════════════════════════ st.set_page_config( page_title="Dixon-Coles Engine", page_icon="⚽", layout="wide", initial_sidebar_state="expanded", ) LEAGUES = { "E0": ("Premier League", "England", "🏴󠁧󠁢󠁥󠁮󠁧󠁿"), "E1": ("Championship", "England", "🏴󠁧󠁢󠁥󠁮󠁧󠁿"), "E2": ("League One", "England", "🏴󠁧󠁢󠁥󠁮󠁧󠁿"), "E3": ("League Two", "England", "🏴󠁧󠁢󠁥󠁮󠁧󠁿"), "EC": ("Conference", "England", "🏴󠁧󠁢󠁥󠁮󠁧󠁿"), "SC0": ("Premiership", "Scotland", "🏴󠁧󠁢󠁳󠁣󠁴󠁿"), "SC1": ("Championship", "Scotland", "🏴󠁧󠁢󠁳󠁣󠁴󠁿"), "SC2": ("League One", "Scotland", "🏴󠁧󠁢󠁳󠁣󠁴󠁿"), "SC3": ("League Two", "Scotland", "🏴󠁧󠁢󠁳󠁣󠁴󠁿"), "D1": ("Bundesliga", "Germany", "🇩🇪"), "D2": ("2. Bundesliga", "Germany", "🇩🇪"), "SP1": ("La Liga", "Spain", "🇪🇸"), "SP2": ("Segunda División", "Spain", "🇪🇸"), "I1": ("Serie A", "Italy", "🇮🇹"), "I2": ("Serie B", "Italy", "🇮🇹"), "F1": ("Ligue 1", "France", "🇫🇷"), "F2": ("Ligue 2", "France", "🇫🇷"), "N1": ("Eredivisie", "Netherlands", "🇳🇱"), "B1": ("Jupiler Pro League", "Belgium", "🇧🇪"), "P1": ("Primeira Liga", "Portugal", "🇵🇹"), "T1": ("Süper Lig", "Turkey", "🇹🇷"), "G1": ("Super League", "Greece", "🇬🇷"), } SEASONS = {"2526": "2025/26", "2425": "2024/25", "2324": "2023/24"} BASE_URL = "https://www.football-data.co.uk" # ═══════════════════════════════════════════════════════════════════════════════ # CSS PERSONALIZADO # ═══════════════════════════════════════════════════════════════════════════════ st.markdown(""" """, unsafe_allow_html=True) # ═══════════════════════════════════════════════════════════════════════════════ # DESCARGA DE DATOS (con manejo robusto de CSV) # ═══════════════════════════════════════════════════════════════════════════════ def _robust_read_csv(content_bytes: bytes) -> pd.DataFrame: """ Lee CSV manejando: - BOM UTF-8 (\\ufeff) - Múltiples secciones con headers repetidos dentro del mismo archivo - Encoding Windows-1252 vs UTF-8 """ # Intentar decodificar for enc in ["utf-8-sig", "utf-8", "latin-1", "cp1252"]: try: text = content_bytes.decode(enc) break except (UnicodeDecodeError, UnicodeError): continue else: text = content_bytes.decode("utf-8", errors="replace") # El fixtures.csv de football-data.co.uk a veces tiene headers repetidos # (una sección por liga). Filtrar las filas que son headers duplicados. lines = text.strip().split("\n") if not lines: return pd.DataFrame() header = lines[0].strip().replace("\ufeff", "") clean_lines = [header] for line in lines[1:]: stripped = line.strip() if not stripped: continue # Saltar filas que sean headers duplicados if stripped.startswith("Div,Date,") or stripped.startswith("\ufeffDiv,Date,"): continue clean_lines.append(stripped) clean_text = "\n".join(clean_lines) df = pd.read_csv(StringIO(clean_text)) df.columns = [c.strip().replace("\ufeff", "") for c in df.columns] return df @st.cache_data(ttl=600, show_spinner=False) def fetch_results(league_code: str, season: str) -> pd.DataFrame: """Descarga resultados históricos""" import requests url = f"{BASE_URL}/mmz4281/{season}/{league_code}.csv" resp = requests.get(url, timeout=30) resp.raise_for_status() df = _robust_read_csv(resp.content) required = ["HomeTeam", "AwayTeam", "FTHG", "FTAG", "Date"] missing = [c for c in required if c not in df.columns] if missing: raise ValueError(f"Columnas faltantes: {missing}. Disponibles: {list(df.columns[:10])}") df = df.dropna(subset=["FTHG", "FTAG"]) df["FTHG"] = df["FTHG"].astype(int) df["FTAG"] = df["FTAG"].astype(int) return df @st.cache_data(ttl=600, show_spinner=False) def fetch_fixtures(league_code: str) -> pd.DataFrame: """Descarga próximos partidos""" import requests url = f"{BASE_URL}/fixtures.csv" resp = requests.get(url, timeout=30) resp.raise_for_status() df = _robust_read_csv(resp.content) if "Div" not in df.columns: raise ValueError(f"Columna 'Div' no encontrada. Columnas: {list(df.columns[:10])}") df = df[df["Div"] == league_code].copy() return df # ═══════════════════════════════════════════════════════════════════════════════ # MODELO DIXON-COLES # ═══════════════════════════════════════════════════════════════════════════════ def parse_date(date_str: str) -> datetime: try: parts = str(date_str).strip().split("/") if len(parts) == 3: d, m, y = int(parts[0]), int(parts[1]), int(parts[2]) if y < 100: y += 2000 return datetime(y, m, d) except Exception: pass return datetime.now() def poisson_pmf(k: int, lam: float) -> float: if lam <= 0: return 1.0 if k == 0 else 0.0 return (lam ** k) * math.exp(-lam) / math.factorial(k) def dixon_coles_tau(x, y, lam_h, lam_a, rho): if x == 0 and y == 0: return 1.0 - lam_h * lam_a * rho elif x == 0 and y == 1: return 1.0 + lam_h * rho elif x == 1 and y == 0: return 1.0 + lam_a * rho elif x == 1 and y == 1: return 1.0 - rho return 1.0 def time_decay_weight(days_ago, xi=0.003): return math.exp(-xi * max(days_ago, 0)) class DixonColesModel: def __init__(self, xi=0.003, max_iter=80): self.xi = xi self.max_iter = max_iter self.attack = {} self.defense = {} self.home_adv = 0.25 self.rho = -0.05 self.teams = [] self.n_matches = 0 def fit(self, df, progress_callback=None): matches = [] now = datetime.now() for _, row in df.iterrows(): h, a = row["HomeTeam"], row["AwayTeam"] hg, ag = int(row["FTHG"]), int(row["FTAG"]) d = parse_date(row["Date"]) days_ago = (now - d).days w = time_decay_weight(days_ago, self.xi) matches.append({"h": h, "a": a, "hg": hg, "ag": ag, "w": w}) self.n_matches = len(matches) self.teams = sorted(set(m["h"] for m in matches) | set(m["a"] for m in matches)) n_teams = len(self.teams) if n_teams < 4 or len(matches) < 10: raise ValueError(f"Datos insuficientes: {len(matches)} partidos, {n_teams} equipos") attack = {t: 1.0 for t in self.teams} defense = {t: 1.0 for t in self.teams} home_adv = 0.25 rho = -0.05 best_ll = -float("inf") for iteration in range(self.max_iter): if progress_callback: progress_callback(iteration / self.max_iter) new_attack, new_defense = {}, {} for team in self.teams: att_num = att_den = def_num = def_den = 0.0 for m in matches: w = m["w"] if m["h"] == team: att_num += m["hg"] * w att_den += defense[m["a"]] * math.exp(home_adv) * w def_num += m["ag"] * w def_den += attack[m["a"]] * w if m["a"] == team: att_num += m["ag"] * w att_den += defense[m["h"]] * w def_num += m["hg"] * w def_den += attack[m["h"]] * math.exp(home_adv) * w new_attack[team] = att_num / max(att_den, 1e-8) new_defense[team] = def_num / max(def_den, 1e-8) geo_att = math.exp(sum(math.log(max(new_attack[t], 1e-8)) for t in self.teams) / n_teams) geo_def = math.exp(sum(math.log(max(new_defense[t], 1e-8)) for t in self.teams) / n_teams) for t in self.teams: new_attack[t] /= geo_att new_defense[t] /= geo_def ha_num = sum(m["hg"] * m["w"] for m in matches) ha_den = sum(new_attack[m["h"]] * new_defense[m["a"]] * m["w"] for m in matches) new_ha = math.log(max(ha_num / max(ha_den, 1e-8), 0.5)) best_rho = rho best_ll_iter = -float("inf") for r in np.arange(-0.15, 0.06, 0.01): ll = 0.0 for m in matches: lh = new_attack[m["h"]] * new_defense[m["a"]] * math.exp(new_ha) la = new_attack[m["a"]] * new_defense[m["h"]] tau = dixon_coles_tau(m["hg"], m["ag"], lh, la, r) p1 = poisson_pmf(m["hg"], lh) p2 = poisson_pmf(m["ag"], la) if tau > 0 and p1 > 0 and p2 > 0: ll += m["w"] * (math.log(p1) + math.log(p2) + math.log(tau)) if ll > best_ll_iter: best_ll_iter = ll best_rho = r attack, defense, home_adv, rho = new_attack, new_defense, new_ha, best_rho best_ll = best_ll_iter self.attack = attack self.defense = defense self.home_adv = home_adv self.rho = rho self.log_likelihood = best_ll if progress_callback: progress_callback(1.0) return self def predict(self, home_team, away_team, max_goals=7): if home_team not in self.attack or away_team not in self.attack: return None lam_h = self.attack[home_team] * self.defense[away_team] * math.exp(self.home_adv) lam_a = self.attack[away_team] * self.defense[home_team] matrix = np.zeros((max_goals + 1, max_goals + 1)) for i in range(max_goals + 1): for j in range(max_goals + 1): tau = dixon_coles_tau(i, j, lam_h, lam_a, self.rho) matrix[i][j] = poisson_pmf(i, lam_h) * poisson_pmf(j, lam_a) * tau total = matrix.sum() matrix /= total pH = sum(matrix[i][j] for i in range(max_goals+1) for j in range(max_goals+1) if i > j) pD = sum(matrix[i][i] for i in range(max_goals+1)) pA = sum(matrix[i][j] for i in range(max_goals+1) for j in range(max_goals+1) if i < j) o25 = sum(matrix[i][j] for i in range(max_goals+1) for j in range(max_goals+1) if i+j > 2) btts = sum(matrix[i][j] for i in range(1, max_goals+1) for j in range(1, max_goals+1)) scores = [] for i in range(min(6, max_goals+1)): for j in range(min(6, max_goals+1)): scores.append((i, j, matrix[i][j])) scores.sort(key=lambda x: x[2], reverse=True) return { "home": home_team, "away": away_team, "lambda_h": lam_h, "lambda_a": lam_a, "p_home": pH, "p_draw": pD, "p_away": pA, "over_25": o25, "under_25": 1 - o25, "btts_yes": btts, "btts_no": 1 - btts, "odds_home": 1/max(pH,.001), "odds_draw": 1/max(pD,.001), "odds_away": 1/max(pA,.001), "odds_over25": 1/max(o25,.001), "odds_under25": 1/max(1-o25,.001), "top_scores": scores[:8], "matrix": matrix, "atk_home": self.attack[home_team], "def_home": self.defense[home_team], "atk_away": self.attack[away_team], "def_away": self.defense[away_team], } def predict_fixtures(self, fixtures_df): preds = [] for _, row in fixtures_df.iterrows(): pred = self.predict(row["HomeTeam"], row["AwayTeam"]) if pred: pred["date"] = row.get("Date", "") pred["time"] = row.get("Time", "") preds.append(pred) return preds def get_rankings(self): rows = [] for t in self.teams: atk, defe = self.attack[t], self.defense[t] rows.append({ "Equipo": t, "ATK": atk, "DEF": defe, "Power": atk / max(defe, 0.01), "xG/90 (H)": atk * math.exp(self.home_adv), "xGA/90": defe, }) df = pd.DataFrame(rows).sort_values("Power", ascending=False).reset_index(drop=True) df.index += 1 df.index.name = "#" return df # ═══════════════════════════════════════════════════════════════════════════════ # GENERADOR DE PDF # ═══════════════════════════════════════════════════════════════════════════════ def generate_pdf(model, predictions, rankings_df, league_name, season_label): """Genera un reporte PDF profesional con los resultados del modelo""" buf = io.BytesIO() doc = SimpleDocTemplate( buf, pagesize=A4, topMargin=20*mm, bottomMargin=15*mm, leftMargin=15*mm, rightMargin=15*mm ) styles = getSampleStyleSheet() styles.add(ParagraphStyle( "CustomTitle", parent=styles["Title"], fontSize=22, spaceAfter=4, textColor=colors.HexColor("#1a1a2e"), fontName="Helvetica-Bold" )) styles.add(ParagraphStyle( "CustomSubtitle", parent=styles["Normal"], fontSize=10, textColor=colors.HexColor("#666680"), spaceAfter=14, fontName="Helvetica" )) styles.add(ParagraphStyle( "SectionHead", parent=styles["Heading2"], fontSize=14, textColor=colors.HexColor("#1a1a2e"), spaceBefore=16, spaceAfter=8, fontName="Helvetica-Bold" )) styles.add(ParagraphStyle( "CellText", parent=styles["Normal"], fontSize=8, fontName="Helvetica", leading=10 )) styles.add(ParagraphStyle( "CellBold", parent=styles["Normal"], fontSize=8, fontName="Helvetica-Bold", leading=10 )) styles.add(ParagraphStyle( "SmallText", parent=styles["Normal"], fontSize=7, textColor=colors.HexColor("#888888"), leading=9 )) story = [] # ── PORTADA ── story.append(Spacer(1, 30*mm)) story.append(Paragraph("Dixon-Coles Poisson Model", styles["CustomTitle"])) story.append(Paragraph( f"{league_name} | Temporada {season_label} | " f"Generado: {datetime.now().strftime('%d/%m/%Y %H:%M')}", styles["CustomSubtitle"] )) story.append(HRFlowable(width="100%", thickness=1, color=colors.HexColor("#e0e0e0"))) story.append(Spacer(1, 6*mm)) # Parámetros del modelo params_data = [ ["Parámetro", "Valor", "Descripción"], ["Partidos analizados", str(model.n_matches), "Total de partidos históricos usados"], ["Equipos", str(len(model.teams)), "Equipos en la liga"], ["rho (p)", f"{model.rho:.4f}", "Corrección Dixon-Coles para marcadores bajos"], ["Home Advantage", f"{math.exp(model.home_adv):.3f}x", "Factor multiplicativo de ventaja local"], ["xi (decay)", f"{model.xi}", "Parámetro de decaimiento temporal"], ["Iteraciones", str(model.max_iter), "Iteraciones MLE para convergencia"], ["Log-Likelihood", f"{model.log_likelihood:.1f}", "Log-verosimilitud del modelo calibrado"], ] params_table = Table(params_data, colWidths=[40*mm, 30*mm, 100*mm]) params_table.setStyle(TableStyle([ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a1a2e")), ("TEXTCOLOR", (0, 0), (-1, 0), colors.white), ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), ("FONTSIZE", (0, 0), (-1, -1), 8), ("FONTNAME", (0, 1), (-1, -1), "Helvetica"), ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8f8fc")]), ("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e0e0e8")), ("TOPPADDING", (0, 0), (-1, -1), 4), ("BOTTOMPADDING", (0, 0), (-1, -1), 4), ("LEFTPADDING", (0, 0), (-1, -1), 6), ])) story.append(params_table) # ── POWER RANKINGS ── story.append(Spacer(1, 8*mm)) story.append(Paragraph("Power Rankings", styles["SectionHead"])) rank_header = ["#", "Equipo", "ATK (a)", "DEF (b)", "Power", "xG/90 (H)", "xGA/90"] rank_data = [rank_header] for i, row in rankings_df.iterrows(): rank_data.append([ str(i), row["Equipo"], f"{row['ATK']:.3f}", f"{row['DEF']:.3f}", f"{row['Power']:.3f}", f"{row['xG/90 (H)']:.3f}", f"{row['xGA/90']:.3f}", ]) col_w = [10*mm, 38*mm, 22*mm, 22*mm, 22*mm, 25*mm, 22*mm] rank_table = Table(rank_data, colWidths=col_w, repeatRows=1) rank_style = [ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a1a2e")), ("TEXTCOLOR", (0, 0), (-1, 0), colors.white), ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), ("FONTSIZE", (0, 0), (-1, -1), 7), ("FONTNAME", (0, 1), (-1, -1), "Helvetica"), ("ALIGN", (0, 0), (-1, -1), "CENTER"), ("ALIGN", (1, 0), (1, -1), "LEFT"), ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8f8fc")]), ("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e0e0e8")), ("TOPPADDING", (0, 0), (-1, -1), 3), ("BOTTOMPADDING", (0, 0), (-1, -1), 3), ("LEFTPADDING", (0, 0), (-1, -1), 4), ] # Top 3 verde, 4-7 amarillo for row_i in range(1, min(4, len(rank_data))): rank_style.append(("TEXTCOLOR", (0, row_i), (0, row_i), colors.HexColor("#16a34a"))) rank_style.append(("FONTNAME", (0, row_i), (0, row_i), "Helvetica-Bold")) for row_i in range(4, min(8, len(rank_data))): rank_style.append(("TEXTCOLOR", (0, row_i), (0, row_i), colors.HexColor("#ca8a04"))) rank_table.setStyle(TableStyle(rank_style)) story.append(rank_table) # ── PREDICCIONES ── if predictions: story.append(PageBreak()) story.append(Paragraph("Predicciones - Proximos Partidos", styles["SectionHead"])) pred_header = [ "Fecha", "Local", "Visitante", "xG H", "xG A", "P(1)", "P(X)", "P(2)", "O2.5", "BTTS", "Score" ] pred_data = [pred_header] for p in predictions: winner = "1" if p["p_home"] > max(p["p_draw"], p["p_away"]) else \ "2" if p["p_away"] > max(p["p_home"], p["p_draw"]) else "X" pred_data.append([ str(p.get("date", "")), p["home"], p["away"], f"{p['lambda_h']:.2f}", f"{p['lambda_a']:.2f}", f"{p['p_home']*100:.0f}%", f"{p['p_draw']*100:.0f}%", f"{p['p_away']*100:.0f}%", f"{p['over_25']*100:.0f}%", f"{p['btts_yes']*100:.0f}%", f"{p['top_scores'][0][0]}-{p['top_scores'][0][1]}", ]) pred_col_w = [18*mm, 28*mm, 28*mm, 14*mm, 14*mm, 14*mm, 14*mm, 14*mm, 14*mm, 14*mm, 14*mm] pred_table = Table(pred_data, colWidths=pred_col_w, repeatRows=1) pred_table.setStyle(TableStyle([ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a1a2e")), ("TEXTCOLOR", (0, 0), (-1, 0), colors.white), ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), ("FONTSIZE", (0, 0), (-1, -1), 7), ("FONTNAME", (0, 1), (-1, -1), "Helvetica"), ("ALIGN", (3, 0), (-1, -1), "CENTER"), ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8f8fc")]), ("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e0e0e8")), ("TOPPADDING", (0, 0), (-1, -1), 3), ("BOTTOMPADDING", (0, 0), (-1, -1), 3), ("LEFTPADDING", (0, 0), (-1, -1), 3), ])) story.append(pred_table) # Detalle por partido story.append(Spacer(1, 6*mm)) story.append(Paragraph("Detalle por Partido", styles["SectionHead"])) for idx, p in enumerate(predictions): if idx > 0 and idx % 3 == 0: story.append(PageBreak()) story.append(Spacer(1, 3*mm)) story.append(Paragraph( f"{p['home']} vs {p['away']} " f"| {p.get('date','')} {p.get('time','')}", styles["Normal"] )) story.append(Spacer(1, 2*mm)) detail_data = [ ["Mercado", "Prob.", "Cuota", "", "Mercado", "Prob.", "Cuota"], ["1 (Local)", f"{p['p_home']*100:.1f}%", f"{p['odds_home']:.2f}", "", "Over 2.5", f"{p['over_25']*100:.1f}%", f"{p['odds_over25']:.2f}"], ["X (Empate)", f"{p['p_draw']*100:.1f}%", f"{p['odds_draw']:.2f}", "", "Under 2.5", f"{p['under_25']*100:.1f}%", f"{p['odds_under25']:.2f}"], ["2 (Visit.)", f"{p['p_away']*100:.1f}%", f"{p['odds_away']:.2f}", "", "BTTS Si", f"{p['btts_yes']*100:.1f}%", ""], ] # Scores scores_str = " | ".join(f"{s[0]}-{s[1]} ({s[2]*100:.1f}%)" for s in p["top_scores"][:4]) detail_data.append(["Scores", scores_str, "", "", "", "", ""]) # xG row detail_data.append([ f"xG {p['home']}", f"{p['lambda_h']:.3f}", "", "", f"xG {p['away']}", f"{p['lambda_a']:.3f}", "" ]) detail_data.append([ f"ATK/DEF", f"{p['atk_home']:.3f}/{p['def_home']:.3f}", "", "", "ATK/DEF", f"{p['atk_away']:.3f}/{p['def_away']:.3f}", "" ]) det_col_w = [22*mm, 28*mm, 18*mm, 4*mm, 22*mm, 28*mm, 18*mm] det_table = Table(detail_data, colWidths=det_col_w) det_table.setStyle(TableStyle([ ("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#e8e8f0")), ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"), ("FONTSIZE", (0, 0), (-1, -1), 7), ("FONTNAME", (0, 1), (-1, -1), "Helvetica"), ("GRID", (0, 0), (-1, -1), 0.3, colors.HexColor("#e0e0e8")), ("TOPPADDING", (0, 0), (-1, -1), 2), ("BOTTOMPADDING", (0, 0), (-1, -1), 2), ("LEFTPADDING", (0, 0), (-1, -1), 3), ("SPAN", (1, 4), (6, 4)), # scores row span ])) story.append(det_table) # ── METODOLOGÍA ── story.append(PageBreak()) story.append(Paragraph("Metodologia Dixon-Coles", styles["SectionHead"])) method_text = f""" Base: Distribucion Poisson Bivariada — P(X=k) = (lambda^k x e^(-lambda)) / k!

Parametros por equipo estimados por MLE iterativo ({model.max_iter} iteraciones):
- alpha (Ataque): capacidad ofensiva relativa. alpha > 1 = mejor que el promedio.
- beta (Defensa): vulnerabilidad defensiva. beta < 1 = mejor defensa.
- gamma (Home Advantage): {math.exp(model.home_adv):.3f}x
- rho (Dixon-Coles): {model.rho:.4f}

Goles esperados:
lambda_local = alpha_local x beta_visitante x e^gamma
lambda_visitante = alpha_visitante x beta_local

Correccion Dixon-Coles (tau): Ajusta P(0-0), P(1-0), P(0-1), P(1-1) para capturar la dependencia real entre goles. Con rho < 0 los empates son mas probables.

Decaimiento temporal: w(t) = e^(-xi x t), xi={model.xi}. Partidos recientes pesan mas.

Fuente de datos: football-data.co.uk """ story.append(Paragraph(method_text, styles["Normal"])) # Footer story.append(Spacer(1, 10*mm)) story.append(HRFlowable(width="100%", thickness=0.5, color=colors.HexColor("#cccccc"))) story.append(Paragraph( "Dixon-Coles (1997) | Solo fines analiticos | Generado con Dixon-Coles Engine", styles["SmallText"] )) doc.build(story) buf.seek(0) return buf # ═══════════════════════════════════════════════════════════════════════════════ # INTERFAZ STREAMLIT # ═══════════════════════════════════════════════════════════════════════════════ def main(): # ── SIDEBAR ── with st.sidebar: st.markdown("### ⚽ Dixon-Coles Engine") st.markdown("---") # Liga league_options = {code: f"{info[2]} {info[1]} — {info[0]}" for code, info in LEAGUES.items()} league = st.selectbox("Liga", options=list(LEAGUES.keys()), format_func=lambda x: league_options[x], index=0) # Temporada season = st.selectbox("Temporada", options=list(SEASONS.keys()), format_func=lambda x: SEASONS[x]) # Parámetros avanzados with st.expander("Parámetros avanzados", expanded=False): xi = st.slider("ξ (Time Decay)", 0.0, 0.02, 0.003, 0.001, help="Controla cuánto peso tienen los partidos recientes vs antiguos. " "Mayor = más peso a partidos recientes.") max_iter = st.slider("Iteraciones MLE", 20, 150, 80, 10, help="Número de iteraciones para la estimación de parámetros.") st.markdown("---") fetch_btn = st.button("⚡ Obtener Datos y Calcular", type="primary", use_container_width=True) # Upload local st.markdown("---") st.markdown("##### 📂 O carga archivos locales") uploaded_results = st.file_uploader("CSV Resultados", type="csv", key="res") uploaded_fixtures = st.file_uploader("CSV Fixtures", type="csv", key="fix") local_btn = st.button("📊 Calcular con archivos locales", use_container_width=True) # ── HEADER ── st.markdown('

Dixon-Coles Poisson Model

', unsafe_allow_html=True) league_info = LEAGUES[league] st.markdown( f'

{league_info[2]} {league_info[0]} ({league_info[1]}) · ' f'Temporada {SEASONS[season]} · football-data.co.uk

', unsafe_allow_html=True ) # ── LÓGICA PRINCIPAL ── model = None predictions = [] rankings_df = None if fetch_btn: try: with st.spinner("📥 Descargando resultados..."): results_df = fetch_results(league, season) st.success(f"✅ {len(results_df)} partidos descargados") with st.spinner("📥 Descargando fixtures..."): fixtures_df = fetch_fixtures(league) st.success(f"✅ {len(fixtures_df)} fixtures encontrados") progress = st.progress(0, text="⚙️ Calibrando modelo Dixon-Coles...") model = DixonColesModel(xi=xi, max_iter=max_iter) model.fit(results_df, progress_callback=lambda p: progress.progress(p, text=f"⚙️ Iteración {int(p*max_iter)}/{max_iter}")) progress.empty() predictions = model.predict_fixtures(fixtures_df) rankings_df = model.get_rankings() st.session_state["model"] = model st.session_state["predictions"] = predictions st.session_state["rankings_df"] = rankings_df st.session_state["league_name"] = f"{league_info[2]} {league_info[0]}" st.session_state["season_label"] = SEASONS[season] except Exception as e: st.error(f"❌ Error: {e}") return elif local_btn and uploaded_results: try: results_df = pd.read_csv(uploaded_results, encoding="utf-8-sig") results_df.columns = [c.strip().replace("\ufeff", "") for c in results_df.columns] results_df = results_df.dropna(subset=["FTHG", "FTAG"]) results_df["FTHG"] = results_df["FTHG"].astype(int) results_df["FTAG"] = results_df["FTAG"].astype(int) st.success(f"✅ {len(results_df)} partidos cargados") fixtures_df = pd.DataFrame() if uploaded_fixtures: fixtures_df = pd.read_csv(uploaded_fixtures, encoding="utf-8-sig") fixtures_df.columns = [c.strip().replace("\ufeff", "") for c in fixtures_df.columns] if "Div" in fixtures_df.columns: fixtures_df = fixtures_df[fixtures_df["Div"] == league] st.success(f"✅ {len(fixtures_df)} fixtures cargados") progress = st.progress(0, text="⚙️ Calibrando modelo...") model = DixonColesModel(xi=xi, max_iter=max_iter) model.fit(results_df, progress_callback=lambda p: progress.progress(p)) progress.empty() predictions = model.predict_fixtures(fixtures_df) if len(fixtures_df) > 0 else [] rankings_df = model.get_rankings() st.session_state["model"] = model st.session_state["predictions"] = predictions st.session_state["rankings_df"] = rankings_df st.session_state["league_name"] = f"{league_info[2]} {league_info[0]}" st.session_state["season_label"] = SEASONS[season] except Exception as e: st.error(f"❌ Error: {e}") return # Recuperar del session state if "model" in st.session_state: model = st.session_state["model"] predictions = st.session_state["predictions"] rankings_df = st.session_state["rankings_df"] if model is None: st.info("👈 Selecciona una liga y pulsa **⚡ Obtener Datos y Calcular** para empezar.") st.markdown("---") # Metodología estática with st.expander("📖 ¿Cómo funciona el modelo Dixon-Coles?", expanded=True): st.markdown(""" **El modelo Dixon-Coles (1997)** es una extensión del modelo Poisson bivariado que corrige la subestimación de empates y marcadores bajos. **Parámetros por equipo:** - **α (Ataque):** Capacidad ofensiva relativa. α > 1 = mejor que el promedio. - **β (Defensa):** Vulnerabilidad defensiva. β < 1 = mejor defensa. **Goles esperados:** - `λ_local = α_local × β_visitante × e^γ` - `λ_visitante = α_visitante × β_local` **Corrección τ (tau):** Ajusta probabilidades de 0-0, 1-0, 0-1, 1-1 para capturar la dependencia real entre goles de ambos equipos. **Fuente de datos:** [football-data.co.uk](https://www.football-data.co.uk) """) return # ── MÉTRICAS GLOBALES ── cols = st.columns(6) metrics = [ ("Partidos", str(model.n_matches), "#4f46e5"), ("Equipos", str(len(model.teams)), "#059669"), ("ρ (rho)", f"{model.rho:.4f}", "#d97706"), ("Home Adv", f"{math.exp(model.home_adv):.3f}x", "#dc2626"), ("ξ Decay", f"{model.xi}", "#0891b2"), ("LogLik", f"{model.log_likelihood:.0f}", "#7c3aed"), ] for col, (label, value, color) in zip(cols, metrics): col.markdown(f"""

{label}

{value}

""", unsafe_allow_html=True) st.markdown("") # ── TABS ── tab1, tab2, tab3, tab4 = st.tabs([ f"📊 Predicciones ({len(predictions)})", "🏆 Power Rankings", "🔬 Simulador", "📖 Metodología" ]) # ── TAB: PREDICCIONES ── with tab1: if not predictions: st.warning("No hay fixtures disponibles para esta liga. " "Los fixtures se publican normalmente la semana del partido.") else: # Tabla resumen sum_rows = [] for p in predictions: w = "1" if p["p_home"] > max(p["p_draw"], p["p_away"]) else \ "2" if p["p_away"] > max(p["p_home"], p["p_draw"]) else "X" sum_rows.append({ "Fecha": p.get("date", ""), "Local": p["home"], "Visitante": p["away"], "xG H": f"{p['lambda_h']:.2f}", "xG A": f"{p['lambda_a']:.2f}", "Pred": w, "P(1)": f"{p['p_home']*100:.0f}%", "P(X)": f"{p['p_draw']*100:.0f}%", "P(2)": f"{p['p_away']*100:.0f}%", "O2.5": f"{p['over_25']*100:.0f}%", "BTTS": f"{p['btts_yes']*100:.0f}%", "Score": f"{p['top_scores'][0][0]}-{p['top_scores'][0][1]}", }) st.dataframe(pd.DataFrame(sum_rows), use_container_width=True, hide_index=True) # Detalle por partido st.markdown("### Detalle por Partido") for i, p in enumerate(predictions): winner = "🟢 LOCAL" if p["p_home"] > max(p["p_draw"], p["p_away"]) else \ "🔴 VISITANTE" if p["p_away"] > max(p["p_home"], p["p_draw"]) else "🟡 EMPATE" with st.expander(f"**{p['home']}** vs **{p['away']}** — {p.get('date','')} | {winner}", expanded=(i == 0)): c1, c2 = st.columns(2) with c1: st.markdown("#### Resultado 1X2") fig = go.Figure(go.Bar( x=[p["p_home"]*100, p["p_draw"]*100, p["p_away"]*100], y=["1 (Local)", "X (Empate)", "2 (Visitante)"], orientation="h", marker_color=["#059669", "#d97706", "#dc2626"], text=[f"{p['p_home']*100:.1f}%", f"{p['p_draw']*100:.1f}%", f"{p['p_away']*100:.1f}%"], textposition="auto", )) fig.update_layout( height=180, margin=dict(l=0, r=0, t=10, b=10), plot_bgcolor="rgba(255,255,255,1)", paper_bgcolor="rgba(255,255,255,1)", font_color="#1f2937", xaxis=dict(visible=False), yaxis=dict(autorange="reversed"), ) st.plotly_chart(fig, use_container_width=True, key=f"pred_bar_{i}") st.markdown("**Cuotas implícitas:**") q1, q2, q3 = st.columns(3) q1.metric("1", f"{p['odds_home']:.2f}") q2.metric("X", f"{p['odds_draw']:.2f}") q3.metric("2", f"{p['odds_away']:.2f}") with c2: st.markdown("#### Mercados") m1, m2 = st.columns(2) m1.metric("Over 2.5", f"{p['over_25']*100:.1f}%") m2.metric("Under 2.5", f"{p['under_25']*100:.1f}%") m1.metric("BTTS Sí", f"{p['btts_yes']*100:.1f}%") m2.metric("BTTS No", f"{p['btts_no']*100:.1f}%") st.markdown("**Marcadores más probables:**") scores_str = " | ".join( f"**{s[0]}-{s[1]}** ({s[2]*100:.1f}%)" for s in p["top_scores"][:5] ) st.markdown(scores_str) # Heatmap de la matriz st.markdown("#### Matriz de Probabilidades") mat = p["matrix"][:6, :6] * 100 fig_hm = go.Figure(go.Heatmap( z=mat, x=[str(j) for j in range(6)], y=[str(i) for i in range(6)], colorscale="Viridis", text=np.round(mat, 1), texttemplate="%{text}%", textfont=dict(size=10), hovertemplate="Local %{y} - Visitante %{x}: %{z:.1f}%", )) fig_hm.update_layout( height=300, margin=dict(l=0, r=0, t=30, b=0), xaxis_title=f"Goles {p['away']}", yaxis_title=f"Goles {p['home']}", plot_bgcolor="rgba(255,255,255,1)", paper_bgcolor="rgba(255,255,255,1)", font_color="#1f2937", yaxis=dict(autorange="reversed"), ) st.plotly_chart(fig_hm, use_container_width=True, key=f"pred_hm_{i}") # ── TAB: RANKINGS ── with tab2: st.markdown("### Power Rankings") st.markdown("**Power = ATK / DEF** — Mayor es mejor. ATK > 1 = ataque superior al promedio. DEF < 1 = defensa superior.") # Tabla fmt_df = rankings_df.copy() for col in ["ATK", "DEF", "Power", "xG/90 (H)", "xGA/90"]: fmt_df[col] = fmt_df[col].map(lambda x: f"{x:.3f}") st.dataframe(fmt_df, use_container_width=True) # Gráfico ATK vs DEF st.markdown("### Ataque vs Defensa") fig_scatter = go.Figure() for _, row in rankings_df.iterrows(): color = "#059669" if row["Power"] > 1.3 else "#dc2626" if row["Power"] < 0.7 else "#4f46e5" fig_scatter.add_trace(go.Scatter( x=[row["DEF"]], y=[row["ATK"]], mode="markers+text", text=[row["Equipo"]], textposition="top center", textfont=dict(size=9, color="#374151"), marker=dict(size=row["Power"]*10, color=color, line=dict(width=1, color="#d1d5db")), hovertemplate=f"{row['Equipo']}
ATK: {row['ATK']:.3f}
DEF: {row['DEF']:.3f}
Power: {row['Power']:.3f}", showlegend=False, )) fig_scatter.update_layout( height=500, margin=dict(l=20, r=20, t=30, b=20), xaxis_title="DEF (β) — menor = mejor defensa →", yaxis_title="ATK (α) — mayor = mejor ataque →", plot_bgcolor="rgba(247,248,252,1)", paper_bgcolor="rgba(255,255,255,1)", font_color="#1f2937", shapes=[ dict(type="line", x0=1, x1=1, y0=0, y1=3, line=dict(color="#d1d5db", dash="dash")), dict(type="line", x0=0, x1=3, y0=1, y1=1, line=dict(color="#d1d5db", dash="dash")), ] ) st.plotly_chart(fig_scatter, use_container_width=True, key="rankings_scatter") # ── TAB: SIMULADOR ── with tab3: st.markdown("### Simulador de Partido") st.markdown("Selecciona dos equipos para generar una predicción personalizada.") c1, c2 = st.columns(2) with c1: home_team = st.selectbox("Equipo Local", model.teams, index=0) with c2: away_options = [t for t in model.teams if t != home_team] away_team = st.selectbox("Equipo Visitante", away_options, index=min(1, len(away_options)-1)) if st.button("🔮 Predecir", use_container_width=True): pred = model.predict(home_team, away_team) if pred: winner = "🟢 LOCAL" if pred["p_home"] > max(pred["p_draw"], pred["p_away"]) else \ "🔴 VISITANTE" if pred["p_away"] > max(pred["p_home"], pred["p_draw"]) else "🟡 EMPATE" st.markdown(f"## {home_team} vs {away_team} — {winner}") m1, m2, m3, m4 = st.columns(4) m1.metric("xG Local", f"{pred['lambda_h']:.3f}") m2.metric("xG Visitante", f"{pred['lambda_a']:.3f}") m3.metric("Over 2.5", f"{pred['over_25']*100:.1f}%") m4.metric("BTTS", f"{pred['btts_yes']*100:.1f}%") c1, c2 = st.columns(2) with c1: fig = go.Figure(go.Bar( x=[pred["p_home"]*100, pred["p_draw"]*100, pred["p_away"]*100], y=["1 (Local)", "X (Empate)", "2 (Visitante)"], orientation="h", marker_color=["#059669", "#d97706", "#dc2626"], text=[f"{pred['p_home']*100:.1f}%", f"{pred['p_draw']*100:.1f}%", f"{pred['p_away']*100:.1f}%"], textposition="auto", )) fig.update_layout( height=180, margin=dict(l=0, r=0, t=10, b=10), plot_bgcolor="rgba(255,255,255,1)", paper_bgcolor="rgba(255,255,255,1)", font_color="#1f2937", xaxis=dict(visible=False), yaxis=dict(autorange="reversed"), ) st.plotly_chart(fig, use_container_width=True, key="sim_bar") with c2: mat = pred["matrix"][:6, :6] * 100 fig_hm = go.Figure(go.Heatmap( z=mat, x=[str(j) for j in range(6)], y=[str(i) for i in range(6)], colorscale="Viridis", text=np.round(mat, 1), texttemplate="%{text}%", textfont=dict(size=10), )) fig_hm.update_layout( height=300, margin=dict(l=0, r=0, t=10, b=0), xaxis_title=f"Goles {away_team}", yaxis_title=f"Goles {home_team}", plot_bgcolor="rgba(255,255,255,1)", paper_bgcolor="rgba(255,255,255,1)", font_color="#1f2937", yaxis=dict(autorange="reversed"), ) st.plotly_chart(fig_hm, use_container_width=True, key="sim_hm") st.markdown("**Marcadores más probables:**") for s in pred["top_scores"][:6]: st.markdown(f"- **{s[0]}-{s[1]}**: {s[2]*100:.1f}%") # ── TAB: METODOLOGÍA ── with tab4: st.markdown("### Modelo Dixon-Coles: Explicación Completa") st.markdown(""" #### 1. Base: Distribución Poisson Bivariada Los goles en fútbol se modelan como eventos que siguen una distribución de Poisson: `P(X = k) = (λ^k × e^(-λ)) / k!` Donde **λ** es el número esperado de goles (xG del modelo). #### 2. Parámetros por Equipo Cada equipo tiene dos parámetros estimados iterativamente: - **α (Ataque):** Capacidad ofensiva relativa. Valores > 1 indican ataque superior al promedio. - **β (Defensa):** Vulnerabilidad defensiva. Valores < 1 indican defensa superior. """) st.info(f""" **Parámetros globales del modelo actual:** - **γ (Home Advantage):** {math.exp(model.home_adv):.3f}x - **ρ (Rho Dixon-Coles):** {model.rho:.4f} - **ξ (Time Decay):** {model.xi} - **Log-Likelihood:** {model.log_likelihood:.1f} """) st.markdown(f""" #### 3. Cálculo de Goles Esperados ``` λ_local = α_local × β_visitante × e^γ λ_visitante = α_visitante × β_local ``` #### 4. Corrección Dixon-Coles (τ) ``` τ(0,0) = 1 - λ_h × λ_a × ρ τ(1,0) = 1 + λ_a × ρ τ(0,1) = 1 + λ_h × ρ τ(1,1) = 1 - ρ τ(x,y) = 1 para otros marcadores ``` #### 5. Decaimiento Temporal ``` w(t) = e^(-ξ × t) donde ξ = {model.xi} ``` - Hace 30 días: peso = {time_decay_weight(30, model.xi):.3f} - Hace 90 días: peso = {time_decay_weight(90, model.xi):.3f} - Hace 180 días: peso = {time_decay_weight(180, model.xi):.3f} #### 6. Proceso de Estimación Se inicializan α=1, β=1. En cada iteración ({model.max_iter} total) se recalcula cada parámetro como la razón entre goles observados y esperados (ponderados). Se normalizan, se re-estima γ, y se optimiza ρ por grid search. #### 7. Fuente de Datos - **Resultados:** `football-data.co.uk/mmz4281/{{season}}/{{league}}.csv` - **Fixtures:** `football-data.co.uk/fixtures.csv` """) # ── DESCARGA PDF ── st.markdown("---") st.markdown("### 📄 Descargar Reporte PDF") if st.button("📥 Generar y Descargar PDF", type="primary", use_container_width=True): with st.spinner("Generando PDF..."): league_name = st.session_state.get("league_name", f"{league_info[2]} {league_info[0]}") season_label = st.session_state.get("season_label", SEASONS[season]) pdf_buf = generate_pdf(model, predictions, rankings_df, league_name, season_label) b64 = base64.b64encode(pdf_buf.read()).decode() filename = f"Dixon_Coles_{league}_{season}_{datetime.now().strftime('%Y%m%d_%H%M')}.pdf" st.download_button( label=f"⬇️ Descargar {filename}", data=pdf_buf.getvalue(), file_name=filename, mime="application/pdf", use_container_width=True, ) st.success(f"✅ PDF generado: {filename}") if __name__ == "__main__": main()