|
|
|
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from scipy.optimize import minimize |
|
|
from scipy.stats import poisson |
|
|
from itertools import product as iterproduct |
|
|
import warnings |
|
|
import io |
|
|
import datetime |
|
|
|
|
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Predictor de Futbol", |
|
|
page_icon="⚽", |
|
|
layout="wide", |
|
|
initial_sidebar_state="expanded" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.sidebar.title("Configuracion") |
|
|
|
|
|
LIGAS_DISPONIBLES = { |
|
|
'SP1': 'La Liga (Espana)', |
|
|
'SP2': 'Segunda Division (Espana)', |
|
|
'E0': 'Premier League (Inglaterra)', |
|
|
'E1': 'Championship (Inglaterra)', |
|
|
'D1': 'Bundesliga (Alemania)', |
|
|
'D2': '2. Bundesliga (Alemania)', |
|
|
'I1': 'Serie A (Italia)', |
|
|
'I2': 'Serie B (Italia)', |
|
|
'F1': 'Ligue 1 (Francia)', |
|
|
'F2': 'Ligue 2 (Francia)', |
|
|
'N1': 'Eredivisie (Holanda)', |
|
|
'B1': 'Jupiler Pro League (Belgica)', |
|
|
'P1': 'Primeira Liga (Portugal)', |
|
|
'T1': 'Super Lig (Turquia)', |
|
|
'G1': 'Super League (Grecia)', |
|
|
} |
|
|
|
|
|
LIGA = st.sidebar.selectbox( |
|
|
"Liga", |
|
|
options=list(LIGAS_DISPONIBLES.keys()), |
|
|
format_func=lambda x: f"{x} - {LIGAS_DISPONIBLES[x]}", |
|
|
index=0 |
|
|
) |
|
|
|
|
|
ULTIMOS_N_LOCAL = st.sidebar.slider("Ultimos N como local", 3, 10, 5) |
|
|
MAX_GOLES = st.sidebar.slider("Goles maximos simulacion", 5, 12, 8) |
|
|
|
|
|
|
|
|
URL_HISTORICO = f'https://www.football-data.co.uk/mmz4281/2526/{LIGA}.csv' |
|
|
URL_FIXTURES = 'https://www.football-data.co.uk/fixtures.csv' |
|
|
|
|
|
st.sidebar.markdown("---") |
|
|
st.sidebar.markdown(f"**URL Historico:** `{URL_HISTORICO}`") |
|
|
st.sidebar.markdown(f"**URL Fixtures:** `{URL_FIXTURES}`") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.title("Predictor de Futbol Generico") |
|
|
st.caption("Dixon-Coles (Goles/Resultados) + Poisson Independiente (Corners, Tiros, etc.)") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def tau(x, y, lam, mu, rho): |
|
|
"""Correccion Dixon-Coles para marcadores bajos.""" |
|
|
if x == 0 and y == 0: |
|
|
return 1 - lam * mu * rho |
|
|
elif x == 0 and y == 1: |
|
|
return 1 + lam * rho |
|
|
elif x == 1 and y == 0: |
|
|
return 1 + mu * rho |
|
|
elif x == 1 and y == 1: |
|
|
return 1 - rho |
|
|
else: |
|
|
return 1.0 |
|
|
|
|
|
def dc_log_likelihood(params, df, team_idx, n_teams): |
|
|
"""Log-likelihood negativa del modelo Dixon-Coles.""" |
|
|
attack = params[:n_teams] |
|
|
defence = params[n_teams:2*n_teams] |
|
|
home = params[2*n_teams] |
|
|
rho = params[2*n_teams + 1] |
|
|
log_lik = 0.0 |
|
|
for _, row in df.iterrows(): |
|
|
hi = team_idx[row['HomeTeam']] |
|
|
ai = team_idx[row['AwayTeam']] |
|
|
lam = np.exp(attack[hi] + defence[ai] + home) |
|
|
mu = np.exp(attack[ai] + defence[hi]) |
|
|
x, y = int(row['FTHG']), int(row['FTAG']) |
|
|
p = poisson.pmf(x, lam) * poisson.pmf(y, mu) * tau(x, y, lam, mu, rho) |
|
|
log_lik += np.log(max(p, 1e-20)) |
|
|
return -log_lik |
|
|
|
|
|
def fit_dixon_coles(df, team_idx, n_teams): |
|
|
"""Ajustar modelo Dixon-Coles.""" |
|
|
n_params = 2 * n_teams + 2 |
|
|
x0 = np.zeros(n_params) |
|
|
x0[2*n_teams] = 0.25 |
|
|
x0[2*n_teams+1] = -0.1 |
|
|
cons = [{'type': 'eq', 'fun': lambda p: np.sum(p[:n_teams])}] |
|
|
bounds = [(None, None)] * (2*n_teams) + [(None, None)] + [(-1.5, 1.5)] |
|
|
res = minimize(dc_log_likelihood, x0, args=(df, team_idx, n_teams), |
|
|
method='SLSQP', constraints=cons, bounds=bounds, |
|
|
options={'maxiter': 300, 'ftol': 1e-6}) |
|
|
attack = res.x[:n_teams] |
|
|
defence = res.x[n_teams:2*n_teams] |
|
|
home = res.x[2*n_teams] |
|
|
rho = res.x[2*n_teams+1] |
|
|
return attack, defence, home, rho |
|
|
|
|
|
def get_last_n_home(df, teams, n=5): |
|
|
"""Ultimos n partidos como LOCAL de cada equipo.""" |
|
|
frames = [] |
|
|
for team in teams: |
|
|
home_games = df[df['HomeTeam'] == team].copy() |
|
|
frames.append(home_games.tail(n)) |
|
|
return pd.concat(frames).drop_duplicates() if frames else pd.DataFrame() |
|
|
|
|
|
def fit_poisson_simple(df, team_idx, n_teams): |
|
|
"""Modelo Poisson independiente (estable para muestras pequenas).""" |
|
|
n_params = 2 * n_teams + 1 |
|
|
x0 = np.zeros(n_params) |
|
|
x0[2*n_teams] = 0.25 |
|
|
def neg_ll(params): |
|
|
attack = params[:n_teams] |
|
|
defence = params[n_teams:2*n_teams] |
|
|
home = params[2*n_teams] |
|
|
ll = 0.0 |
|
|
for _, row in df.iterrows(): |
|
|
hi = team_idx[row['HomeTeam']] |
|
|
ai = team_idx[row['AwayTeam']] |
|
|
lam = np.exp(attack[hi] + defence[ai] + home) |
|
|
mu = np.exp(attack[ai] + defence[hi]) |
|
|
ll += poisson.logpmf(int(row['FTHG']), max(lam, 0.01)) |
|
|
ll += poisson.logpmf(int(row['FTAG']), max(mu, 0.01)) |
|
|
return -ll |
|
|
cons = [{'type': 'eq', 'fun': lambda p: np.sum(p[:n_teams])}] |
|
|
res = minimize(neg_ll, x0, method='SLSQP', constraints=cons, |
|
|
options={'maxiter': 300}) |
|
|
return res.x[:n_teams], res.x[n_teams:2*n_teams], res.x[2*n_teams] |
|
|
|
|
|
def compute_rates(df, col_home, col_away, teams): |
|
|
"""Tasas promedio por equipo para una estadistica. |
|
|
|
|
|
Para un par (HS, AS): |
|
|
- h_rates[team] = promedio de HS cuando el equipo juega de LOCAL |
|
|
(tiros que HACE el equipo en casa) |
|
|
- a_rates[team] = promedio de AS cuando el equipo juega de VISITANTE |
|
|
(tiros que HACE el equipo fuera) |
|
|
""" |
|
|
h_rates, a_rates = {}, {} |
|
|
for team in teams: |
|
|
hg = df.loc[df['HomeTeam'] == team, col_home] |
|
|
ag = df.loc[df['AwayTeam'] == team, col_away] |
|
|
h_rates[team] = hg.mean() if len(hg) > 0 and hg.notna().any() else 0 |
|
|
a_rates[team] = ag.mean() if len(ag) > 0 and ag.notna().any() else 0 |
|
|
return h_rates, a_rates |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict_stat(home_team, away_team, h_rates, a_rates): |
|
|
"""Prediccion Poisson para estadistica generica (CORREGIDA).""" |
|
|
lh = max(h_rates.get(home_team, 0), 0.01) |
|
|
la = max(a_rates.get(away_team, 0), 0.01) |
|
|
total = lh + la |
|
|
|
|
|
|
|
|
line = round(total * 2) / 2 |
|
|
if line == 0: |
|
|
line = 0.5 |
|
|
|
|
|
|
|
|
MAX_RANGE = 50 |
|
|
|
|
|
|
|
|
is_half_line = (line % 1) != 0 |
|
|
if is_half_line: |
|
|
|
|
|
p_over = sum(poisson.pmf(i, lh) * poisson.pmf(j, la) |
|
|
for i in range(MAX_RANGE) for j in range(MAX_RANGE) |
|
|
if i + j > line) |
|
|
else: |
|
|
|
|
|
p_over = sum(poisson.pmf(i, lh) * poisson.pmf(j, la) |
|
|
for i in range(MAX_RANGE) for j in range(MAX_RANGE) |
|
|
if i + j >= line) |
|
|
|
|
|
return {'exp_h': lh, 'exp_a': la, 'total': total, |
|
|
'line': line, 'over': p_over, 'under': 1 - p_over} |
|
|
|
|
|
def predict_goals_dc(home_team, away_team, attack, defence, home_adv, rho, team_idx, max_g=8): |
|
|
"""Prediccion Dixon-Coles.""" |
|
|
hi, ai = team_idx[home_team], team_idx[away_team] |
|
|
lam = np.exp(attack[hi] + defence[ai] + home_adv) |
|
|
mu = np.exp(attack[ai] + defence[hi]) |
|
|
prob = np.zeros((max_g, max_g)) |
|
|
for i, j in iterproduct(range(max_g), range(max_g)): |
|
|
prob[i, j] = poisson.pmf(i, lam) * poisson.pmf(j, mu) * tau(i, j, lam, mu, rho) |
|
|
prob /= prob.sum() |
|
|
p_h = np.sum(np.tril(prob, -1)) |
|
|
p_d = np.sum(np.diag(prob)) |
|
|
p_a = np.sum(np.triu(prob, 1)) |
|
|
o25 = sum(prob[i,j] for i in range(max_g) for j in range(max_g) if i+j > 2) |
|
|
btts = sum(prob[i,j] for i in range(1, max_g) for j in range(1, max_g)) |
|
|
flat = prob.flatten() |
|
|
top5 = np.argsort(flat)[::-1][:5] |
|
|
top_scores = [(idx // max_g, idx % max_g, flat[idx]) for idx in top5] |
|
|
return {'exp_h': lam, 'exp_a': mu, 'home': p_h, 'draw': p_d, 'away': p_a, |
|
|
'o25': o25, 'u25': 1-o25, 'btts_y': btts, 'btts_n': 1-btts, 'top': top_scores} |
|
|
|
|
|
def predict_goals_poisson(home_team, away_team, attack, defence, home_adv, team_idx, max_g=8): |
|
|
"""Prediccion Poisson independiente.""" |
|
|
hi, ai = team_idx[home_team], team_idx[away_team] |
|
|
lam = np.exp(attack[hi] + defence[ai] + home_adv) |
|
|
mu = np.exp(attack[ai] + defence[hi]) |
|
|
prob = np.zeros((max_g, max_g)) |
|
|
for i, j in iterproduct(range(max_g), range(max_g)): |
|
|
prob[i, j] = poisson.pmf(i, lam) * poisson.pmf(j, mu) |
|
|
prob /= prob.sum() |
|
|
p_h = np.sum(np.tril(prob, -1)) |
|
|
p_d = np.sum(np.diag(prob)) |
|
|
p_a = np.sum(np.triu(prob, 1)) |
|
|
o25 = sum(prob[i,j] for i in range(max_g) for j in range(max_g) if i+j > 2) |
|
|
btts = sum(prob[i,j] for i in range(1, max_g) for j in range(1, max_g)) |
|
|
flat = prob.flatten() |
|
|
top5 = np.argsort(flat)[::-1][:5] |
|
|
top_scores = [(idx // max_g, idx % max_g, flat[idx]) for idx in top5] |
|
|
return {'exp_h': lam, 'exp_a': mu, 'home': p_h, 'draw': p_d, 'away': p_a, |
|
|
'o25': o25, 'u25': 1-o25, 'btts_y': btts, 'btts_n': 1-btts, 'top': top_scores} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_pdf(summary_rows, match_details, stat_pairs, liga, ultimos_n): |
|
|
"""Genera un PDF con el reporte completo de predicciones.""" |
|
|
from reportlab.lib.pagesizes import letter, landscape |
|
|
from reportlab.lib import colors |
|
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle |
|
|
from reportlab.lib.units import mm |
|
|
from reportlab.platypus import (SimpleDocTemplate, Paragraph, Spacer, |
|
|
Table, TableStyle, PageBreak) |
|
|
|
|
|
buffer = io.BytesIO() |
|
|
doc = SimpleDocTemplate(buffer, pagesize=landscape(letter), |
|
|
leftMargin=15*mm, rightMargin=15*mm, |
|
|
topMargin=15*mm, bottomMargin=15*mm) |
|
|
styles = getSampleStyleSheet() |
|
|
|
|
|
title_style = ParagraphStyle('CustomTitle', parent=styles['Title'], |
|
|
fontSize=18, spaceAfter=6) |
|
|
subtitle_style = ParagraphStyle('CustomSubtitle', parent=styles['Heading2'], |
|
|
fontSize=12, spaceAfter=4) |
|
|
small_style = ParagraphStyle('Small', parent=styles['Normal'], |
|
|
fontSize=7, leading=9) |
|
|
header_style = ParagraphStyle('Header', parent=styles['Normal'], |
|
|
fontSize=7, leading=9, textColor=colors.white) |
|
|
|
|
|
story = [] |
|
|
|
|
|
|
|
|
story.append(Paragraph("Predictor de Futbol - Reporte de Predicciones", title_style)) |
|
|
story.append(Paragraph( |
|
|
f"Liga: {liga} - {LIGAS_DISPONIBLES.get(liga, liga)} | " |
|
|
f"Fecha: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')} | " |
|
|
f"Modelo: Dixon-Coles + Poisson", styles['Normal'])) |
|
|
story.append(Spacer(1, 12)) |
|
|
|
|
|
|
|
|
story.append(Paragraph("Resumen Compacto - Todos los Fixtures", subtitle_style)) |
|
|
|
|
|
if summary_rows: |
|
|
headers = ['Partido', '1 (%)', 'X (%)', '2 (%)', 'O2.5 (%)', |
|
|
'U2.5 (%)', 'BTTS (%)'] |
|
|
for _, _, label in stat_pairs: |
|
|
short = label.replace('CORNERS', 'CRN').replace('TIROS TOTALES', 'TIROS').replace('TIROS A PORTERIA (SOT)', 'SOT') |
|
|
headers.append(short) |
|
|
|
|
|
table_data = [[Paragraph(h, header_style) for h in headers]] |
|
|
|
|
|
for row in summary_rows: |
|
|
r = [ |
|
|
Paragraph(str(row.get('Partido', '')), small_style), |
|
|
Paragraph(str(row.get('1 (%)', '')), small_style), |
|
|
Paragraph(str(row.get('X (%)', '')), small_style), |
|
|
Paragraph(str(row.get('2 (%)', '')), small_style), |
|
|
Paragraph(str(row.get('O2.5 (%)', '')), small_style), |
|
|
Paragraph(str(row.get('U2.5 (%)', '')), small_style), |
|
|
Paragraph(str(row.get('BTTS (%)', '')), small_style), |
|
|
] |
|
|
for _, _, label in stat_pairs: |
|
|
short = label.replace('CORNERS', 'CRN').replace('TIROS TOTALES', 'TIROS').replace('TIROS A PORTERIA (SOT)', 'SOT') |
|
|
r.append(Paragraph(str(row.get(f'{short} Total', '')), small_style)) |
|
|
table_data.append(r) |
|
|
|
|
|
n_cols = len(headers) |
|
|
col_widths = [130] + [52] * (n_cols - 1) |
|
|
|
|
|
t = Table(table_data, colWidths=col_widths, repeatRows=1) |
|
|
t.setStyle(TableStyle([ |
|
|
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1a1a2e')), |
|
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.white), |
|
|
('FONTSIZE', (0, 0), (-1, -1), 7), |
|
|
('ALIGN', (1, 0), (-1, -1), 'CENTER'), |
|
|
('ALIGN', (0, 0), (0, -1), 'LEFT'), |
|
|
('GRID', (0, 0), (-1, -1), 0.5, colors.grey), |
|
|
('ROWBACKGROUNDS', (0, 1), (-1, -1), |
|
|
[colors.white, colors.HexColor('#f0f0f5')]), |
|
|
('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), |
|
|
('TOPPADDING', (0, 0), (-1, -1), 2), |
|
|
('BOTTOMPADDING', (0, 0), (-1, -1), 2), |
|
|
])) |
|
|
story.append(t) |
|
|
|
|
|
story.append(PageBreak()) |
|
|
|
|
|
|
|
|
story.append(Paragraph("Predicciones Detalladas por Partido", subtitle_style)) |
|
|
story.append(Spacer(1, 6)) |
|
|
|
|
|
for match in match_details: |
|
|
home = match['home'] |
|
|
away = match['away'] |
|
|
date = match.get('date', '') |
|
|
r_f = match['r_full'] |
|
|
r_l = match['r_last'] |
|
|
|
|
|
header_text = f"{home} vs {away}" |
|
|
if date: |
|
|
header_text = f"{date} | {header_text}" |
|
|
|
|
|
story.append(Paragraph(f"<b>{header_text}</b>", styles['Heading3'])) |
|
|
|
|
|
|
|
|
goal_headers = ['Metrica', 'Temporada (Dixon-Coles)', |
|
|
f'Ult.{ultimos_n} Local (Poisson)'] |
|
|
goal_data = [ |
|
|
[Paragraph(h, header_style) for h in goal_headers], |
|
|
['Goles esp. Local', f"{r_f['exp_h']:.2f}", f"{r_l['exp_h']:.2f}"], |
|
|
['Goles esp. Visitante', f"{r_f['exp_a']:.2f}", f"{r_l['exp_a']:.2f}"], |
|
|
['P(Victoria Local)', f"{r_f['home']*100:.1f}%", f"{r_l['home']*100:.1f}%"], |
|
|
['P(Empate)', f"{r_f['draw']*100:.1f}%", f"{r_l['draw']*100:.1f}%"], |
|
|
['P(Victoria Visitante)', f"{r_f['away']*100:.1f}%", f"{r_l['away']*100:.1f}%"], |
|
|
['P(Over 2.5)', f"{r_f['o25']*100:.1f}%", f"{r_l['o25']*100:.1f}%"], |
|
|
['P(Under 2.5)', f"{r_f['u25']*100:.1f}%", f"{r_l['u25']*100:.1f}%"], |
|
|
['P(BTTS Si)', f"{r_f['btts_y']*100:.1f}%", f"{r_l['btts_y']*100:.1f}%"], |
|
|
['P(BTTS No)', f"{r_f['btts_n']*100:.1f}%", f"{r_l['btts_n']*100:.1f}%"], |
|
|
] |
|
|
|
|
|
tg = Table(goal_data, colWidths=[160, 170, 170], repeatRows=1) |
|
|
tg.setStyle(TableStyle([ |
|
|
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#1a1a2e')), |
|
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.white), |
|
|
('FONTSIZE', (0, 0), (-1, -1), 8), |
|
|
('ALIGN', (1, 0), (-1, -1), 'CENTER'), |
|
|
('GRID', (0, 0), (-1, -1), 0.5, colors.grey), |
|
|
('ROWBACKGROUNDS', (0, 1), (-1, -1), |
|
|
[colors.white, colors.HexColor('#f0f0f5')]), |
|
|
('TOPPADDING', (0, 0), (-1, -1), 2), |
|
|
('BOTTOMPADDING', (0, 0), (-1, -1), 2), |
|
|
])) |
|
|
story.append(tg) |
|
|
story.append(Spacer(1, 4)) |
|
|
|
|
|
|
|
|
top_text_f = " | ".join([f"{home} {hg}-{ag} {away} ({p*100:.1f}%)" |
|
|
for hg, ag, p in r_f['top'][:3]]) |
|
|
top_text_l = " | ".join([f"{home} {hg}-{ag} {away} ({p*100:.1f}%)" |
|
|
for hg, ag, p in r_l['top'][:3]]) |
|
|
story.append(Paragraph( |
|
|
f"<b>Top marcadores (Temporada):</b> {top_text_f}", small_style)) |
|
|
story.append(Paragraph( |
|
|
f"<b>Top marcadores (Ult.{ultimos_n}):</b> {top_text_l}", small_style)) |
|
|
|
|
|
|
|
|
for stat_info in match.get('stats', []): |
|
|
label = stat_info['label'] |
|
|
s_f = stat_info['s_full'] |
|
|
s_l = stat_info['s_last'] |
|
|
|
|
|
stat_tbl_data = [ |
|
|
[Paragraph(h, header_style) for h in |
|
|
[label, 'Temporada', f'Ult.{ultimos_n} Local']], |
|
|
['Esperado Local', f"{s_f['exp_h']:.1f}", f"{s_l['exp_h']:.1f}"], |
|
|
['Esperado Visitante', f"{s_f['exp_a']:.1f}", f"{s_l['exp_a']:.1f}"], |
|
|
['Total esperado', f"{s_f['total']:.1f}", f"{s_l['total']:.1f}"], |
|
|
['Linea sugerida', f"{s_f['line']:.1f}", f"{s_l['line']:.1f}"], |
|
|
['P(Over linea)', f"{s_f['over']*100:.1f}%", f"{s_l['over']*100:.1f}%"], |
|
|
['P(Under linea)', f"{s_f['under']*100:.1f}%", f"{s_l['under']*100:.1f}%"], |
|
|
] |
|
|
|
|
|
ts = Table(stat_tbl_data, colWidths=[160, 170, 170], repeatRows=1) |
|
|
ts.setStyle(TableStyle([ |
|
|
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2d3436')), |
|
|
('TEXTCOLOR', (0, 0), (-1, 0), colors.white), |
|
|
('FONTSIZE', (0, 0), (-1, -1), 8), |
|
|
('ALIGN', (1, 0), (-1, -1), 'CENTER'), |
|
|
('GRID', (0, 0), (-1, -1), 0.5, colors.grey), |
|
|
('ROWBACKGROUNDS', (0, 1), (-1, -1), |
|
|
[colors.white, colors.HexColor('#f5f5fa')]), |
|
|
('TOPPADDING', (0, 0), (-1, -1), 2), |
|
|
('BOTTOMPADDING', (0, 0), (-1, -1), 2), |
|
|
])) |
|
|
story.append(ts) |
|
|
story.append(Spacer(1, 3)) |
|
|
|
|
|
story.append(Spacer(1, 10)) |
|
|
|
|
|
|
|
|
story.append(Spacer(1, 12)) |
|
|
story.append(Paragraph( |
|
|
"1/X/2 = Probabilidad resultado | O2.5/U2.5 = Over/Under 2.5 goles | " |
|
|
"BTTS = Ambos marcan | Stats = Total esperado | " |
|
|
"Modelo: Dixon-Coles (goles) + Poisson (stats)", |
|
|
small_style)) |
|
|
|
|
|
doc.build(story) |
|
|
buffer.seek(0) |
|
|
return buffer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@st.cache_data(ttl=3600, show_spinner="Descargando datos...") |
|
|
def load_data(liga, url_hist, url_fix): |
|
|
df_hist = pd.read_csv(url_hist, encoding='utf-8-sig') |
|
|
df_fix = pd.read_csv(url_fix, encoding='utf-8-sig') |
|
|
if 'Div' in df_fix.columns: |
|
|
df_fix = df_fix[df_fix['Div'] == liga].copy() |
|
|
return df_hist, df_fix |
|
|
|
|
|
@st.cache_data(ttl=3600, show_spinner="Ajustando modelos...") |
|
|
def run_models(_df_hist, _df_fix, liga, ultimos_n, max_goles): |
|
|
df_hist = _df_hist.copy() |
|
|
df_fix = _df_fix.copy() |
|
|
|
|
|
for col in ['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']: |
|
|
assert col in df_hist.columns, f"Columna '{col}' no encontrada" |
|
|
df_hist['FTHG'] = pd.to_numeric(df_hist['FTHG'], errors='coerce') |
|
|
df_hist['FTAG'] = pd.to_numeric(df_hist['FTAG'], errors='coerce') |
|
|
df_hist = df_hist.dropna(subset=['FTHG', 'FTAG']) |
|
|
df_hist['FTHG'] = df_hist['FTHG'].astype(int) |
|
|
df_hist['FTAG'] = df_hist['FTAG'].astype(int) |
|
|
|
|
|
STAT_COLS = {'HS': 'Tiros Local', 'AS': 'Tiros Visitante', |
|
|
'HST': 'SoT Local', 'AST': 'SoT Visitante', |
|
|
'HC': 'Corners Local', 'AC': 'Corners Visitante'} |
|
|
available_stats = {} |
|
|
for col in STAT_COLS: |
|
|
if col in df_hist.columns: |
|
|
df_hist[col] = pd.to_numeric(df_hist[col], errors='coerce') |
|
|
if df_hist[col].notna().sum() > 0: |
|
|
available_stats[col] = STAT_COLS[col] |
|
|
|
|
|
teams = sorted(set(df_hist['HomeTeam'].unique()) | |
|
|
set(df_hist['AwayTeam'].unique())) |
|
|
n_teams = len(teams) |
|
|
team_idx = {t: i for i, t in enumerate(teams)} |
|
|
|
|
|
|
|
|
atk_full, dfe_full, home_full, rho_full = fit_dixon_coles( |
|
|
df_hist, team_idx, n_teams) |
|
|
|
|
|
|
|
|
df_last_n = get_last_n_home(df_hist, teams, n=ultimos_n) |
|
|
atk_ln, dfe_ln, home_ln = fit_poisson_simple(df_last_n, team_idx, n_teams) |
|
|
|
|
|
|
|
|
stat_pairs = [] |
|
|
if 'HC' in available_stats and 'AC' in available_stats: |
|
|
stat_pairs.append(('HC', 'AC', 'CORNERS')) |
|
|
if 'HS' in available_stats and 'AS' in available_stats: |
|
|
stat_pairs.append(('HS', 'AS', 'TIROS TOTALES')) |
|
|
if 'HST' in available_stats and 'AST' in available_stats: |
|
|
stat_pairs.append(('HST', 'AST', 'TIROS A PORTERIA (SOT)')) |
|
|
|
|
|
stat_rates = {} |
|
|
for col_h, col_a, label in stat_pairs: |
|
|
stat_rates[(col_h, col_a, 'full')] = compute_rates( |
|
|
df_hist, col_h, col_a, teams) |
|
|
stat_rates[(col_h, col_a, 'last')] = compute_rates( |
|
|
df_last_n, col_h, col_a, teams) |
|
|
|
|
|
return { |
|
|
'teams': teams, 'n_teams': n_teams, 'team_idx': team_idx, |
|
|
'atk_full': atk_full, 'dfe_full': dfe_full, |
|
|
'home_full': home_full, 'rho_full': rho_full, |
|
|
'atk_ln': atk_ln, 'dfe_ln': dfe_ln, 'home_ln': home_ln, |
|
|
'stat_pairs': stat_pairs, 'stat_rates': stat_rates, |
|
|
'available_stats': available_stats, |
|
|
'df_hist': df_hist, 'df_fix': df_fix, 'df_last_n': df_last_n |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
df_hist, df_fix = load_data(LIGA, URL_HISTORICO, URL_FIXTURES) |
|
|
except Exception as e: |
|
|
st.error(f"Error descargando datos: {e}") |
|
|
st.stop() |
|
|
|
|
|
if len(df_fix) == 0: |
|
|
st.warning("No se encontraron fixtures para esta liga.") |
|
|
st.stop() |
|
|
|
|
|
with st.spinner("Ajustando modelos Dixon-Coles y Poisson..."): |
|
|
model = run_models(df_hist, df_fix, LIGA, ULTIMOS_N_LOCAL, MAX_GOLES) |
|
|
|
|
|
teams = model['teams'] |
|
|
team_idx = model['team_idx'] |
|
|
df_fix_filtered = model['df_fix'] |
|
|
|
|
|
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
col1.metric("Partidos historicos", len(model['df_hist'])) |
|
|
col2.metric("Fixtures", len(df_fix_filtered)) |
|
|
col3.metric("Equipos", model['n_teams']) |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
summary_rows = [] |
|
|
match_details = [] |
|
|
|
|
|
for _, fx in df_fix_filtered.iterrows(): |
|
|
home, away = fx['HomeTeam'], fx['AwayTeam'] |
|
|
if home not in team_idx or away not in team_idx: |
|
|
continue |
|
|
|
|
|
date = fx.get('Date', '') |
|
|
|
|
|
r_f = predict_goals_dc(home, away, model['atk_full'], model['dfe_full'], |
|
|
model['home_full'], model['rho_full'], |
|
|
team_idx, MAX_GOLES) |
|
|
r_l = predict_goals_poisson(home, away, model['atk_ln'], model['dfe_ln'], |
|
|
model['home_ln'], team_idx, MAX_GOLES) |
|
|
|
|
|
row_data = { |
|
|
'Partido': f"{home} vs {away}", |
|
|
'Date': date, |
|
|
'1 (%)': round(r_f['home']*100, 1), |
|
|
'X (%)': round(r_f['draw']*100, 1), |
|
|
'2 (%)': round(r_f['away']*100, 1), |
|
|
'O2.5 (%)': round(r_f['o25']*100, 1), |
|
|
'U2.5 (%)': round(r_f['u25']*100, 1), |
|
|
'BTTS (%)': round(r_f['btts_y']*100, 1), |
|
|
} |
|
|
|
|
|
match_stat_details = [] |
|
|
for col_h, col_a, label in model['stat_pairs']: |
|
|
hr_f, ar_f = model['stat_rates'][(col_h, col_a, 'full')] |
|
|
hr_l, ar_l = model['stat_rates'][(col_h, col_a, 'last')] |
|
|
s_f = predict_stat(home, away, hr_f, ar_f) |
|
|
s_l = predict_stat(home, away, hr_l, ar_l) |
|
|
|
|
|
short = label.replace('CORNERS', 'CRN').replace( |
|
|
'TIROS TOTALES', 'TIROS').replace( |
|
|
'TIROS A PORTERIA (SOT)', 'SOT') |
|
|
row_data[f'{short} Total'] = round(s_f['total'], 1) |
|
|
|
|
|
match_stat_details.append({ |
|
|
'label': label, 's_full': s_f, 's_last': s_l |
|
|
}) |
|
|
|
|
|
summary_rows.append(row_data) |
|
|
match_details.append({ |
|
|
'home': home, 'away': away, 'date': date, |
|
|
'r_full': r_f, 'r_last': r_l, |
|
|
'stats': match_stat_details |
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.header("Resumen Compacto - Todos los Fixtures") |
|
|
|
|
|
if summary_rows: |
|
|
df_summary = pd.DataFrame(summary_rows) |
|
|
st.dataframe(df_summary, use_container_width=True, hide_index=True) |
|
|
else: |
|
|
st.warning("No hay partidos para mostrar.") |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.header("Descargar Reporte") |
|
|
|
|
|
if summary_rows: |
|
|
try: |
|
|
pdf_buffer = generate_pdf( |
|
|
summary_rows, match_details, model['stat_pairs'], |
|
|
LIGA, ULTIMOS_N_LOCAL) |
|
|
fecha_str = datetime.datetime.now().strftime('%Y%m%d') |
|
|
nombre_pdf = f"predicciones_{LIGA}_{fecha_str}.pdf" |
|
|
|
|
|
st.download_button( |
|
|
label="Descargar reporte completo en PDF", |
|
|
data=pdf_buffer, |
|
|
file_name=nombre_pdf, |
|
|
mime="application/pdf", |
|
|
type="primary" |
|
|
) |
|
|
st.caption( |
|
|
"El PDF incluye el resumen compacto y el detalle de cada partido.") |
|
|
except ImportError: |
|
|
st.error( |
|
|
"Se requiere 'reportlab'. Instala con: pip install reportlab") |
|
|
except Exception as e: |
|
|
st.error(f"Error generando PDF: {e}") |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.header("Predicciones Detalladas por Partido") |
|
|
|
|
|
LBL_FULL = "Temporada (Dixon-Coles)" |
|
|
LBL_LAST = f"Ult.{ULTIMOS_N_LOCAL} Local (Poisson)" |
|
|
|
|
|
for match in match_details: |
|
|
home = match['home'] |
|
|
away = match['away'] |
|
|
date = match.get('date', '') |
|
|
r_f = match['r_full'] |
|
|
r_l = match['r_last'] |
|
|
|
|
|
header = f"{home} vs {away}" |
|
|
if date: |
|
|
header = f"{date} | {header}" |
|
|
|
|
|
with st.expander(header, expanded=False): |
|
|
|
|
|
st.subheader("Goles y Resultados") |
|
|
goals_data = { |
|
|
'Metrica': [ |
|
|
'Goles esp. Local', 'Goles esp. Visitante', |
|
|
'P(Victoria Local)', 'P(Empate)', 'P(Victoria Visitante)', |
|
|
'P(Over 2.5)', 'P(Under 2.5)', |
|
|
'P(BTTS Si)', 'P(BTTS No)'], |
|
|
LBL_FULL: [ |
|
|
f"{r_f['exp_h']:.2f}", f"{r_f['exp_a']:.2f}", |
|
|
f"{r_f['home']*100:.1f}%", f"{r_f['draw']*100:.1f}%", |
|
|
f"{r_f['away']*100:.1f}%", |
|
|
f"{r_f['o25']*100:.1f}%", f"{r_f['u25']*100:.1f}%", |
|
|
f"{r_f['btts_y']*100:.1f}%", f"{r_f['btts_n']*100:.1f}%"], |
|
|
LBL_LAST: [ |
|
|
f"{r_l['exp_h']:.2f}", f"{r_l['exp_a']:.2f}", |
|
|
f"{r_l['home']*100:.1f}%", f"{r_l['draw']*100:.1f}%", |
|
|
f"{r_l['away']*100:.1f}%", |
|
|
f"{r_l['o25']*100:.1f}%", f"{r_l['u25']*100:.1f}%", |
|
|
f"{r_l['btts_y']*100:.1f}%", f"{r_l['btts_n']*100:.1f}%"] |
|
|
} |
|
|
st.dataframe(pd.DataFrame(goals_data), |
|
|
use_container_width=True, hide_index=True) |
|
|
|
|
|
|
|
|
col_t1, col_t2 = st.columns(2) |
|
|
with col_t1: |
|
|
st.markdown("**Top 5 marcadores (Temporada)**") |
|
|
for hg, ag, p in r_f['top']: |
|
|
st.write(f"{home} {hg}-{ag} {away} -> {p*100:.1f}%") |
|
|
with col_t2: |
|
|
st.markdown(f"**Top 5 marcadores (Ult.{ULTIMOS_N_LOCAL} Local)**") |
|
|
for hg, ag, p in r_l['top']: |
|
|
st.write(f"{home} {hg}-{ag} {away} -> {p*100:.1f}%") |
|
|
|
|
|
|
|
|
for stat_info in match.get('stats', []): |
|
|
label = stat_info['label'] |
|
|
s_f = stat_info['s_full'] |
|
|
s_l = stat_info['s_last'] |
|
|
|
|
|
st.subheader(label) |
|
|
stat_data = { |
|
|
'Metrica': [ |
|
|
'Esperado Local', 'Esperado Visitante', |
|
|
'Total esperado', 'Linea sugerida', |
|
|
'P(Over linea)', 'P(Under linea)'], |
|
|
'Temporada': [ |
|
|
f"{s_f['exp_h']:.1f}", f"{s_f['exp_a']:.1f}", |
|
|
f"{s_f['total']:.1f}", f"{s_f['line']:.1f}", |
|
|
f"{s_f['over']*100:.1f}%", f"{s_f['under']*100:.1f}%"], |
|
|
f'Ult.{ULTIMOS_N_LOCAL} Local': [ |
|
|
f"{s_l['exp_h']:.1f}", f"{s_l['exp_a']:.1f}", |
|
|
f"{s_l['total']:.1f}", f"{s_l['line']:.1f}", |
|
|
f"{s_l['over']*100:.1f}%", f"{s_l['under']*100:.1f}%"] |
|
|
} |
|
|
st.dataframe(pd.DataFrame(stat_data), |
|
|
use_container_width=True, hide_index=True) |
|
|
|
|
|
st.markdown("---") |
|
|
st.caption( |
|
|
"1/X/2 = Probabilidad resultado | O2.5/U2.5 = Over/Under 2.5 goles | " |
|
|
"BTTS = Ambos marcan") |
|
|
st.caption( |
|
|
"Modelo: Dixon-Coles (goles) + Poisson (stats) -- Temporada completa") |