#!/usr/bin/env python3
"""
═══════════════════════════════════════════════════════════════════════════════
DIXON-COLES POISSON MODEL — Streamlit App
Modelo bivariado con corrección de baja puntuación para predicción de fútbol
Fuente de datos: football-data.co.uk
═══════════════════════════════════════════════════════════════════════════════
Instalación:
pip install streamlit pandas numpy scipy requests reportlab plotly
Ejecutar:
streamlit run dixon_coles_app.py
"""
import math
import io
import base64
from datetime import datetime
from io import StringIO
import numpy as np
import pandas as pd
import streamlit as st
import plotly.graph_objects as go
import plotly.express as px
from reportlab.lib.pagesizes import A4
from reportlab.lib import colors
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import mm, cm
from reportlab.platypus import (
SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
PageBreak, HRFlowable
)
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT
# ═══════════════════════════════════════════════════════════════════════════════
# CONFIGURACIÓN
# ═══════════════════════════════════════════════════════════════════════════════
st.set_page_config(
page_title="Dixon-Coles Engine",
page_icon="⚽",
layout="wide",
initial_sidebar_state="expanded",
)
LEAGUES = {
"E0": ("Premier League", "England", "🏴"),
"E1": ("Championship", "England", "🏴"),
"E2": ("League One", "England", "🏴"),
"E3": ("League Two", "England", "🏴"),
"EC": ("Conference", "England", "🏴"),
"SC0": ("Premiership", "Scotland", "🏴"),
"SC1": ("Championship", "Scotland", "🏴"),
"SC2": ("League One", "Scotland", "🏴"),
"SC3": ("League Two", "Scotland", "🏴"),
"D1": ("Bundesliga", "Germany", "🇩🇪"),
"D2": ("2. Bundesliga", "Germany", "🇩🇪"),
"SP1": ("La Liga", "Spain", "🇪🇸"),
"SP2": ("Segunda División", "Spain", "🇪🇸"),
"I1": ("Serie A", "Italy", "🇮🇹"),
"I2": ("Serie B", "Italy", "🇮🇹"),
"F1": ("Ligue 1", "France", "🇫🇷"),
"F2": ("Ligue 2", "France", "🇫🇷"),
"N1": ("Eredivisie", "Netherlands", "🇳🇱"),
"B1": ("Jupiler Pro League", "Belgium", "🇧🇪"),
"P1": ("Primeira Liga", "Portugal", "🇵🇹"),
"T1": ("Süper Lig", "Turkey", "🇹🇷"),
"G1": ("Super League", "Greece", "🇬🇷"),
}
SEASONS = {"2526": "2025/26", "2425": "2024/25", "2324": "2023/24"}
BASE_URL = "https://www.football-data.co.uk"
# ═══════════════════════════════════════════════════════════════════════════════
# CSS PERSONALIZADO
# ═══════════════════════════════════════════════════════════════════════════════
st.markdown("""
""", unsafe_allow_html=True)
# ═══════════════════════════════════════════════════════════════════════════════
# DESCARGA DE DATOS (con manejo robusto de CSV)
# ═══════════════════════════════════════════════════════════════════════════════
def _robust_read_csv(content_bytes: bytes) -> pd.DataFrame:
"""
Lee CSV manejando:
- BOM UTF-8 (\\ufeff)
- Múltiples secciones con headers repetidos dentro del mismo archivo
- Encoding Windows-1252 vs UTF-8
"""
# Intentar decodificar
for enc in ["utf-8-sig", "utf-8", "latin-1", "cp1252"]:
try:
text = content_bytes.decode(enc)
break
except (UnicodeDecodeError, UnicodeError):
continue
else:
text = content_bytes.decode("utf-8", errors="replace")
# El fixtures.csv de football-data.co.uk a veces tiene headers repetidos
# (una sección por liga). Filtrar las filas que son headers duplicados.
lines = text.strip().split("\n")
if not lines:
return pd.DataFrame()
header = lines[0].strip().replace("\ufeff", "")
clean_lines = [header]
for line in lines[1:]:
stripped = line.strip()
if not stripped:
continue
# Saltar filas que sean headers duplicados
if stripped.startswith("Div,Date,") or stripped.startswith("\ufeffDiv,Date,"):
continue
clean_lines.append(stripped)
clean_text = "\n".join(clean_lines)
df = pd.read_csv(StringIO(clean_text))
df.columns = [c.strip().replace("\ufeff", "") for c in df.columns]
return df
@st.cache_data(ttl=600, show_spinner=False)
def fetch_results(league_code: str, season: str) -> pd.DataFrame:
"""Descarga resultados históricos"""
import requests
url = f"{BASE_URL}/mmz4281/{season}/{league_code}.csv"
resp = requests.get(url, timeout=30)
resp.raise_for_status()
df = _robust_read_csv(resp.content)
required = ["HomeTeam", "AwayTeam", "FTHG", "FTAG", "Date"]
missing = [c for c in required if c not in df.columns]
if missing:
raise ValueError(f"Columnas faltantes: {missing}. Disponibles: {list(df.columns[:10])}")
df = df.dropna(subset=["FTHG", "FTAG"])
df["FTHG"] = df["FTHG"].astype(int)
df["FTAG"] = df["FTAG"].astype(int)
return df
@st.cache_data(ttl=600, show_spinner=False)
def fetch_fixtures(league_code: str) -> pd.DataFrame:
"""Descarga próximos partidos"""
import requests
url = f"{BASE_URL}/fixtures.csv"
resp = requests.get(url, timeout=30)
resp.raise_for_status()
df = _robust_read_csv(resp.content)
if "Div" not in df.columns:
raise ValueError(f"Columna 'Div' no encontrada. Columnas: {list(df.columns[:10])}")
df = df[df["Div"] == league_code].copy()
return df
# ═══════════════════════════════════════════════════════════════════════════════
# MODELO DIXON-COLES
# ═══════════════════════════════════════════════════════════════════════════════
def parse_date(date_str: str) -> datetime:
try:
parts = str(date_str).strip().split("/")
if len(parts) == 3:
d, m, y = int(parts[0]), int(parts[1]), int(parts[2])
if y < 100:
y += 2000
return datetime(y, m, d)
except Exception:
pass
return datetime.now()
def poisson_pmf(k: int, lam: float) -> float:
if lam <= 0:
return 1.0 if k == 0 else 0.0
return (lam ** k) * math.exp(-lam) / math.factorial(k)
def dixon_coles_tau(x, y, lam_h, lam_a, rho):
if x == 0 and y == 0:
return 1.0 - lam_h * lam_a * rho
elif x == 0 and y == 1:
return 1.0 + lam_h * rho
elif x == 1 and y == 0:
return 1.0 + lam_a * rho
elif x == 1 and y == 1:
return 1.0 - rho
return 1.0
def time_decay_weight(days_ago, xi=0.003):
return math.exp(-xi * max(days_ago, 0))
class DixonColesModel:
def __init__(self, xi=0.003, max_iter=80):
self.xi = xi
self.max_iter = max_iter
self.attack = {}
self.defense = {}
self.home_adv = 0.25
self.rho = -0.05
self.teams = []
self.n_matches = 0
def fit(self, df, progress_callback=None):
matches = []
now = datetime.now()
for _, row in df.iterrows():
h, a = row["HomeTeam"], row["AwayTeam"]
hg, ag = int(row["FTHG"]), int(row["FTAG"])
d = parse_date(row["Date"])
days_ago = (now - d).days
w = time_decay_weight(days_ago, self.xi)
matches.append({"h": h, "a": a, "hg": hg, "ag": ag, "w": w})
self.n_matches = len(matches)
self.teams = sorted(set(m["h"] for m in matches) | set(m["a"] for m in matches))
n_teams = len(self.teams)
if n_teams < 4 or len(matches) < 10:
raise ValueError(f"Datos insuficientes: {len(matches)} partidos, {n_teams} equipos")
attack = {t: 1.0 for t in self.teams}
defense = {t: 1.0 for t in self.teams}
home_adv = 0.25
rho = -0.05
best_ll = -float("inf")
for iteration in range(self.max_iter):
if progress_callback:
progress_callback(iteration / self.max_iter)
new_attack, new_defense = {}, {}
for team in self.teams:
att_num = att_den = def_num = def_den = 0.0
for m in matches:
w = m["w"]
if m["h"] == team:
att_num += m["hg"] * w
att_den += defense[m["a"]] * math.exp(home_adv) * w
def_num += m["ag"] * w
def_den += attack[m["a"]] * w
if m["a"] == team:
att_num += m["ag"] * w
att_den += defense[m["h"]] * w
def_num += m["hg"] * w
def_den += attack[m["h"]] * math.exp(home_adv) * w
new_attack[team] = att_num / max(att_den, 1e-8)
new_defense[team] = def_num / max(def_den, 1e-8)
geo_att = math.exp(sum(math.log(max(new_attack[t], 1e-8)) for t in self.teams) / n_teams)
geo_def = math.exp(sum(math.log(max(new_defense[t], 1e-8)) for t in self.teams) / n_teams)
for t in self.teams:
new_attack[t] /= geo_att
new_defense[t] /= geo_def
ha_num = sum(m["hg"] * m["w"] for m in matches)
ha_den = sum(new_attack[m["h"]] * new_defense[m["a"]] * m["w"] for m in matches)
new_ha = math.log(max(ha_num / max(ha_den, 1e-8), 0.5))
best_rho = rho
best_ll_iter = -float("inf")
for r in np.arange(-0.15, 0.06, 0.01):
ll = 0.0
for m in matches:
lh = new_attack[m["h"]] * new_defense[m["a"]] * math.exp(new_ha)
la = new_attack[m["a"]] * new_defense[m["h"]]
tau = dixon_coles_tau(m["hg"], m["ag"], lh, la, r)
p1 = poisson_pmf(m["hg"], lh)
p2 = poisson_pmf(m["ag"], la)
if tau > 0 and p1 > 0 and p2 > 0:
ll += m["w"] * (math.log(p1) + math.log(p2) + math.log(tau))
if ll > best_ll_iter:
best_ll_iter = ll
best_rho = r
attack, defense, home_adv, rho = new_attack, new_defense, new_ha, best_rho
best_ll = best_ll_iter
self.attack = attack
self.defense = defense
self.home_adv = home_adv
self.rho = rho
self.log_likelihood = best_ll
if progress_callback:
progress_callback(1.0)
return self
def predict(self, home_team, away_team, max_goals=7):
if home_team not in self.attack or away_team not in self.attack:
return None
lam_h = self.attack[home_team] * self.defense[away_team] * math.exp(self.home_adv)
lam_a = self.attack[away_team] * self.defense[home_team]
matrix = np.zeros((max_goals + 1, max_goals + 1))
for i in range(max_goals + 1):
for j in range(max_goals + 1):
tau = dixon_coles_tau(i, j, lam_h, lam_a, self.rho)
matrix[i][j] = poisson_pmf(i, lam_h) * poisson_pmf(j, lam_a) * tau
total = matrix.sum()
matrix /= total
pH = sum(matrix[i][j] for i in range(max_goals+1) for j in range(max_goals+1) if i > j)
pD = sum(matrix[i][i] for i in range(max_goals+1))
pA = sum(matrix[i][j] for i in range(max_goals+1) for j in range(max_goals+1) if i < j)
o25 = sum(matrix[i][j] for i in range(max_goals+1) for j in range(max_goals+1) if i+j > 2)
btts = sum(matrix[i][j] for i in range(1, max_goals+1) for j in range(1, max_goals+1))
scores = []
for i in range(min(6, max_goals+1)):
for j in range(min(6, max_goals+1)):
scores.append((i, j, matrix[i][j]))
scores.sort(key=lambda x: x[2], reverse=True)
return {
"home": home_team, "away": away_team,
"lambda_h": lam_h, "lambda_a": lam_a,
"p_home": pH, "p_draw": pD, "p_away": pA,
"over_25": o25, "under_25": 1 - o25,
"btts_yes": btts, "btts_no": 1 - btts,
"odds_home": 1/max(pH,.001), "odds_draw": 1/max(pD,.001), "odds_away": 1/max(pA,.001),
"odds_over25": 1/max(o25,.001), "odds_under25": 1/max(1-o25,.001),
"top_scores": scores[:8], "matrix": matrix,
"atk_home": self.attack[home_team], "def_home": self.defense[home_team],
"atk_away": self.attack[away_team], "def_away": self.defense[away_team],
}
def predict_fixtures(self, fixtures_df):
preds = []
for _, row in fixtures_df.iterrows():
pred = self.predict(row["HomeTeam"], row["AwayTeam"])
if pred:
pred["date"] = row.get("Date", "")
pred["time"] = row.get("Time", "")
preds.append(pred)
return preds
def get_rankings(self):
rows = []
for t in self.teams:
atk, defe = self.attack[t], self.defense[t]
rows.append({
"Equipo": t, "ATK": atk, "DEF": defe,
"Power": atk / max(defe, 0.01),
"xG/90 (H)": atk * math.exp(self.home_adv),
"xGA/90": defe,
})
df = pd.DataFrame(rows).sort_values("Power", ascending=False).reset_index(drop=True)
df.index += 1
df.index.name = "#"
return df
# ═══════════════════════════════════════════════════════════════════════════════
# GENERADOR DE PDF
# ═══════════════════════════════════════════════════════════════════════════════
def generate_pdf(model, predictions, rankings_df, league_name, season_label):
"""Genera un reporte PDF profesional con los resultados del modelo"""
buf = io.BytesIO()
doc = SimpleDocTemplate(
buf, pagesize=A4,
topMargin=20*mm, bottomMargin=15*mm,
leftMargin=15*mm, rightMargin=15*mm
)
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(
"CustomTitle", parent=styles["Title"],
fontSize=22, spaceAfter=4, textColor=colors.HexColor("#1a1a2e"),
fontName="Helvetica-Bold"
))
styles.add(ParagraphStyle(
"CustomSubtitle", parent=styles["Normal"],
fontSize=10, textColor=colors.HexColor("#666680"),
spaceAfter=14, fontName="Helvetica"
))
styles.add(ParagraphStyle(
"SectionHead", parent=styles["Heading2"],
fontSize=14, textColor=colors.HexColor("#1a1a2e"),
spaceBefore=16, spaceAfter=8, fontName="Helvetica-Bold"
))
styles.add(ParagraphStyle(
"CellText", parent=styles["Normal"],
fontSize=8, fontName="Helvetica", leading=10
))
styles.add(ParagraphStyle(
"CellBold", parent=styles["Normal"],
fontSize=8, fontName="Helvetica-Bold", leading=10
))
styles.add(ParagraphStyle(
"SmallText", parent=styles["Normal"],
fontSize=7, textColor=colors.HexColor("#888888"), leading=9
))
story = []
# ── PORTADA ──
story.append(Spacer(1, 30*mm))
story.append(Paragraph("Dixon-Coles Poisson Model", styles["CustomTitle"]))
story.append(Paragraph(
f"{league_name} | Temporada {season_label} | "
f"Generado: {datetime.now().strftime('%d/%m/%Y %H:%M')}",
styles["CustomSubtitle"]
))
story.append(HRFlowable(width="100%", thickness=1, color=colors.HexColor("#e0e0e0")))
story.append(Spacer(1, 6*mm))
# Parámetros del modelo
params_data = [
["Parámetro", "Valor", "Descripción"],
["Partidos analizados", str(model.n_matches), "Total de partidos históricos usados"],
["Equipos", str(len(model.teams)), "Equipos en la liga"],
["rho (p)", f"{model.rho:.4f}", "Corrección Dixon-Coles para marcadores bajos"],
["Home Advantage", f"{math.exp(model.home_adv):.3f}x", "Factor multiplicativo de ventaja local"],
["xi (decay)", f"{model.xi}", "Parámetro de decaimiento temporal"],
["Iteraciones", str(model.max_iter), "Iteraciones MLE para convergencia"],
["Log-Likelihood", f"{model.log_likelihood:.1f}", "Log-verosimilitud del modelo calibrado"],
]
params_table = Table(params_data, colWidths=[40*mm, 30*mm, 100*mm])
params_table.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a1a2e")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 8),
("FONTNAME", (0, 1), (-1, -1), "Helvetica"),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8f8fc")]),
("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e0e0e8")),
("TOPPADDING", (0, 0), (-1, -1), 4),
("BOTTOMPADDING", (0, 0), (-1, -1), 4),
("LEFTPADDING", (0, 0), (-1, -1), 6),
]))
story.append(params_table)
# ── POWER RANKINGS ──
story.append(Spacer(1, 8*mm))
story.append(Paragraph("Power Rankings", styles["SectionHead"]))
rank_header = ["#", "Equipo", "ATK (a)", "DEF (b)", "Power", "xG/90 (H)", "xGA/90"]
rank_data = [rank_header]
for i, row in rankings_df.iterrows():
rank_data.append([
str(i),
row["Equipo"],
f"{row['ATK']:.3f}",
f"{row['DEF']:.3f}",
f"{row['Power']:.3f}",
f"{row['xG/90 (H)']:.3f}",
f"{row['xGA/90']:.3f}",
])
col_w = [10*mm, 38*mm, 22*mm, 22*mm, 22*mm, 25*mm, 22*mm]
rank_table = Table(rank_data, colWidths=col_w, repeatRows=1)
rank_style = [
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a1a2e")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 7),
("FONTNAME", (0, 1), (-1, -1), "Helvetica"),
("ALIGN", (0, 0), (-1, -1), "CENTER"),
("ALIGN", (1, 0), (1, -1), "LEFT"),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8f8fc")]),
("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e0e0e8")),
("TOPPADDING", (0, 0), (-1, -1), 3),
("BOTTOMPADDING", (0, 0), (-1, -1), 3),
("LEFTPADDING", (0, 0), (-1, -1), 4),
]
# Top 3 verde, 4-7 amarillo
for row_i in range(1, min(4, len(rank_data))):
rank_style.append(("TEXTCOLOR", (0, row_i), (0, row_i), colors.HexColor("#16a34a")))
rank_style.append(("FONTNAME", (0, row_i), (0, row_i), "Helvetica-Bold"))
for row_i in range(4, min(8, len(rank_data))):
rank_style.append(("TEXTCOLOR", (0, row_i), (0, row_i), colors.HexColor("#ca8a04")))
rank_table.setStyle(TableStyle(rank_style))
story.append(rank_table)
# ── PREDICCIONES ──
if predictions:
story.append(PageBreak())
story.append(Paragraph("Predicciones - Proximos Partidos", styles["SectionHead"]))
pred_header = [
"Fecha", "Local", "Visitante", "xG H", "xG A",
"P(1)", "P(X)", "P(2)", "O2.5", "BTTS", "Score"
]
pred_data = [pred_header]
for p in predictions:
winner = "1" if p["p_home"] > max(p["p_draw"], p["p_away"]) else \
"2" if p["p_away"] > max(p["p_home"], p["p_draw"]) else "X"
pred_data.append([
str(p.get("date", "")),
p["home"], p["away"],
f"{p['lambda_h']:.2f}", f"{p['lambda_a']:.2f}",
f"{p['p_home']*100:.0f}%", f"{p['p_draw']*100:.0f}%", f"{p['p_away']*100:.0f}%",
f"{p['over_25']*100:.0f}%", f"{p['btts_yes']*100:.0f}%",
f"{p['top_scores'][0][0]}-{p['top_scores'][0][1]}",
])
pred_col_w = [18*mm, 28*mm, 28*mm, 14*mm, 14*mm, 14*mm, 14*mm, 14*mm, 14*mm, 14*mm, 14*mm]
pred_table = Table(pred_data, colWidths=pred_col_w, repeatRows=1)
pred_table.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#1a1a2e")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 7),
("FONTNAME", (0, 1), (-1, -1), "Helvetica"),
("ALIGN", (3, 0), (-1, -1), "CENTER"),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#f8f8fc")]),
("GRID", (0, 0), (-1, -1), 0.5, colors.HexColor("#e0e0e8")),
("TOPPADDING", (0, 0), (-1, -1), 3),
("BOTTOMPADDING", (0, 0), (-1, -1), 3),
("LEFTPADDING", (0, 0), (-1, -1), 3),
]))
story.append(pred_table)
# Detalle por partido
story.append(Spacer(1, 6*mm))
story.append(Paragraph("Detalle por Partido", styles["SectionHead"]))
for idx, p in enumerate(predictions):
if idx > 0 and idx % 3 == 0:
story.append(PageBreak())
story.append(Spacer(1, 3*mm))
story.append(Paragraph(
f"{p['home']} vs {p['away']} "
f"| {p.get('date','')} {p.get('time','')}",
styles["Normal"]
))
story.append(Spacer(1, 2*mm))
detail_data = [
["Mercado", "Prob.", "Cuota", "", "Mercado", "Prob.", "Cuota"],
["1 (Local)", f"{p['p_home']*100:.1f}%", f"{p['odds_home']:.2f}", "",
"Over 2.5", f"{p['over_25']*100:.1f}%", f"{p['odds_over25']:.2f}"],
["X (Empate)", f"{p['p_draw']*100:.1f}%", f"{p['odds_draw']:.2f}", "",
"Under 2.5", f"{p['under_25']*100:.1f}%", f"{p['odds_under25']:.2f}"],
["2 (Visit.)", f"{p['p_away']*100:.1f}%", f"{p['odds_away']:.2f}", "",
"BTTS Si", f"{p['btts_yes']*100:.1f}%", ""],
]
# Scores
scores_str = " | ".join(f"{s[0]}-{s[1]} ({s[2]*100:.1f}%)" for s in p["top_scores"][:4])
detail_data.append(["Scores", scores_str, "", "", "", "", ""])
# xG row
detail_data.append([
f"xG {p['home']}", f"{p['lambda_h']:.3f}", "",
"", f"xG {p['away']}", f"{p['lambda_a']:.3f}", ""
])
detail_data.append([
f"ATK/DEF", f"{p['atk_home']:.3f}/{p['def_home']:.3f}", "",
"", "ATK/DEF", f"{p['atk_away']:.3f}/{p['def_away']:.3f}", ""
])
det_col_w = [22*mm, 28*mm, 18*mm, 4*mm, 22*mm, 28*mm, 18*mm]
det_table = Table(detail_data, colWidths=det_col_w)
det_table.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#e8e8f0")),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, -1), 7),
("FONTNAME", (0, 1), (-1, -1), "Helvetica"),
("GRID", (0, 0), (-1, -1), 0.3, colors.HexColor("#e0e0e8")),
("TOPPADDING", (0, 0), (-1, -1), 2),
("BOTTOMPADDING", (0, 0), (-1, -1), 2),
("LEFTPADDING", (0, 0), (-1, -1), 3),
("SPAN", (1, 4), (6, 4)), # scores row span
]))
story.append(det_table)
# ── METODOLOGÍA ──
story.append(PageBreak())
story.append(Paragraph("Metodologia Dixon-Coles", styles["SectionHead"]))
method_text = f"""
Base: Distribucion Poisson Bivariada — P(X=k) = (lambda^k x e^(-lambda)) / k!
Parametros por equipo estimados por MLE iterativo ({model.max_iter} iteraciones):
- alpha (Ataque): capacidad ofensiva relativa. alpha > 1 = mejor que el promedio.
- beta (Defensa): vulnerabilidad defensiva. beta < 1 = mejor defensa.
- gamma (Home Advantage): {math.exp(model.home_adv):.3f}x
- rho (Dixon-Coles): {model.rho:.4f}
Goles esperados:
lambda_local = alpha_local x beta_visitante x e^gamma
lambda_visitante = alpha_visitante x beta_local
Correccion Dixon-Coles (tau): Ajusta P(0-0), P(1-0), P(0-1), P(1-1) para capturar
la dependencia real entre goles. Con rho < 0 los empates son mas probables.
Decaimiento temporal: w(t) = e^(-xi x t), xi={model.xi}.
Partidos recientes pesan mas.
Fuente de datos: football-data.co.uk
"""
story.append(Paragraph(method_text, styles["Normal"]))
# Footer
story.append(Spacer(1, 10*mm))
story.append(HRFlowable(width="100%", thickness=0.5, color=colors.HexColor("#cccccc")))
story.append(Paragraph(
"Dixon-Coles (1997) | Solo fines analiticos | Generado con Dixon-Coles Engine",
styles["SmallText"]
))
doc.build(story)
buf.seek(0)
return buf
# ═══════════════════════════════════════════════════════════════════════════════
# INTERFAZ STREAMLIT
# ═══════════════════════════════════════════════════════════════════════════════
def main():
# ── SIDEBAR ──
with st.sidebar:
st.markdown("### ⚽ Dixon-Coles Engine")
st.markdown("---")
# Liga
league_options = {code: f"{info[2]} {info[1]} — {info[0]}" for code, info in LEAGUES.items()}
league = st.selectbox("Liga", options=list(LEAGUES.keys()),
format_func=lambda x: league_options[x], index=0)
# Temporada
season = st.selectbox("Temporada", options=list(SEASONS.keys()),
format_func=lambda x: SEASONS[x])
# Parámetros avanzados
with st.expander("Parámetros avanzados", expanded=False):
xi = st.slider("ξ (Time Decay)", 0.0, 0.02, 0.003, 0.001,
help="Controla cuánto peso tienen los partidos recientes vs antiguos. "
"Mayor = más peso a partidos recientes.")
max_iter = st.slider("Iteraciones MLE", 20, 150, 80, 10,
help="Número de iteraciones para la estimación de parámetros.")
st.markdown("---")
fetch_btn = st.button("⚡ Obtener Datos y Calcular", type="primary", use_container_width=True)
# Upload local
st.markdown("---")
st.markdown("##### 📂 O carga archivos locales")
uploaded_results = st.file_uploader("CSV Resultados", type="csv", key="res")
uploaded_fixtures = st.file_uploader("CSV Fixtures", type="csv", key="fix")
local_btn = st.button("📊 Calcular con archivos locales", use_container_width=True)
# ── HEADER ──
st.markdown('
{league_info[2]} {league_info[0]} ({league_info[1]}) · ' f'Temporada {SEASONS[season]} · football-data.co.uk
', unsafe_allow_html=True ) # ── LÓGICA PRINCIPAL ── model = None predictions = [] rankings_df = None if fetch_btn: try: with st.spinner("📥 Descargando resultados..."): results_df = fetch_results(league, season) st.success(f"✅ {len(results_df)} partidos descargados") with st.spinner("📥 Descargando fixtures..."): fixtures_df = fetch_fixtures(league) st.success(f"✅ {len(fixtures_df)} fixtures encontrados") progress = st.progress(0, text="⚙️ Calibrando modelo Dixon-Coles...") model = DixonColesModel(xi=xi, max_iter=max_iter) model.fit(results_df, progress_callback=lambda p: progress.progress(p, text=f"⚙️ Iteración {int(p*max_iter)}/{max_iter}")) progress.empty() predictions = model.predict_fixtures(fixtures_df) rankings_df = model.get_rankings() st.session_state["model"] = model st.session_state["predictions"] = predictions st.session_state["rankings_df"] = rankings_df st.session_state["league_name"] = f"{league_info[2]} {league_info[0]}" st.session_state["season_label"] = SEASONS[season] except Exception as e: st.error(f"❌ Error: {e}") return elif local_btn and uploaded_results: try: results_df = pd.read_csv(uploaded_results, encoding="utf-8-sig") results_df.columns = [c.strip().replace("\ufeff", "") for c in results_df.columns] results_df = results_df.dropna(subset=["FTHG", "FTAG"]) results_df["FTHG"] = results_df["FTHG"].astype(int) results_df["FTAG"] = results_df["FTAG"].astype(int) st.success(f"✅ {len(results_df)} partidos cargados") fixtures_df = pd.DataFrame() if uploaded_fixtures: fixtures_df = pd.read_csv(uploaded_fixtures, encoding="utf-8-sig") fixtures_df.columns = [c.strip().replace("\ufeff", "") for c in fixtures_df.columns] if "Div" in fixtures_df.columns: fixtures_df = fixtures_df[fixtures_df["Div"] == league] st.success(f"✅ {len(fixtures_df)} fixtures cargados") progress = st.progress(0, text="⚙️ Calibrando modelo...") model = DixonColesModel(xi=xi, max_iter=max_iter) model.fit(results_df, progress_callback=lambda p: progress.progress(p)) progress.empty() predictions = model.predict_fixtures(fixtures_df) if len(fixtures_df) > 0 else [] rankings_df = model.get_rankings() st.session_state["model"] = model st.session_state["predictions"] = predictions st.session_state["rankings_df"] = rankings_df st.session_state["league_name"] = f"{league_info[2]} {league_info[0]}" st.session_state["season_label"] = SEASONS[season] except Exception as e: st.error(f"❌ Error: {e}") return # Recuperar del session state if "model" in st.session_state: model = st.session_state["model"] predictions = st.session_state["predictions"] rankings_df = st.session_state["rankings_df"] if model is None: st.info("👈 Selecciona una liga y pulsa **⚡ Obtener Datos y Calcular** para empezar.") st.markdown("---") # Metodología estática with st.expander("📖 ¿Cómo funciona el modelo Dixon-Coles?", expanded=True): st.markdown(""" **El modelo Dixon-Coles (1997)** es una extensión del modelo Poisson bivariado que corrige la subestimación de empates y marcadores bajos. **Parámetros por equipo:** - **α (Ataque):** Capacidad ofensiva relativa. α > 1 = mejor que el promedio. - **β (Defensa):** Vulnerabilidad defensiva. β < 1 = mejor defensa. **Goles esperados:** - `λ_local = α_local × β_visitante × e^γ` - `λ_visitante = α_visitante × β_local` **Corrección τ (tau):** Ajusta probabilidades de 0-0, 1-0, 0-1, 1-1 para capturar la dependencia real entre goles de ambos equipos. **Fuente de datos:** [football-data.co.uk](https://www.football-data.co.uk) """) return # ── MÉTRICAS GLOBALES ── cols = st.columns(6) metrics = [ ("Partidos", str(model.n_matches), "#4f46e5"), ("Equipos", str(len(model.teams)), "#059669"), ("ρ (rho)", f"{model.rho:.4f}", "#d97706"), ("Home Adv", f"{math.exp(model.home_adv):.3f}x", "#dc2626"), ("ξ Decay", f"{model.xi}", "#0891b2"), ("LogLik", f"{model.log_likelihood:.0f}", "#7c3aed"), ] for col, (label, value, color) in zip(cols, metrics): col.markdown(f"""