daniel-saed's picture
Upload 21 files
c2aaace verified
# ===========================
# SISTEMA DE PREDICCIÓN DE CORNERS - OPTIMIZADO PARA APUESTAS (VERSIÓN COMPLETA)
# ===========================
import requests
import tempfile
import numpy as np
import pandas as pd
import joblib
from scipy.stats import poisson
from scipy import stats
import os
import sys
from src.process_data.process_dataset import get_dataframes,get_head_2_head,get_points_from_result,get_team_ppp,get_ppp_difference,get_average
#from process_data.process_dataset import get_dataframes,get_head_2_head,get_points_from_result,get_team_ppp,get_ppp_difference,get_average
#project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
#sys.path.insert(0, project_root)
# ===========================
# 1. FUNCIONES FIABILIDAD
# ===========================
def analizar_fiabilidad_equipos(df_database, temporada="2526", min_partidos=5):
"""
Análisis completo de fiabilidad para apuestas de corners
No solo varianza, sino consistencia, tendencias y patrones
"""
df_temp = df_database[df_database['season'] == temporada].copy()
resultados = []
equipos = pd.concat([df_temp['team'], df_temp['opponent']]).unique()
for equipo in equipos:
# Partidos del equipo
partidos_equipo = df_temp[df_temp['team'] == equipo]
if len(partidos_equipo) < min_partidos:
continue
ck_sacados = partidos_equipo['Pass Types_CK'].values
# ===========================
# 1. MÉTRICAS DE VARIABILIDAD
# ===========================
media = ck_sacados.mean()
std = ck_sacados.std()
cv = (std / media * 100) if media > 0 else 0
# ===========================
# 2. MÉTRICAS DE CONSISTENCIA
# ===========================
# 2.1 Porcentaje de partidos cerca de la media (±2 corners)
cerca_media = np.sum(np.abs(ck_sacados - media) <= 2) / len(ck_sacados) * 100
# 2.2 Rachas (detectar equipos con "explosiones" de corners)
cambios_bruscos = np.sum(np.abs(np.diff(ck_sacados)) > 4)
pct_cambios_bruscos = cambios_bruscos / (len(ck_sacados) - 1) * 100
# 2.3 Cuartiles (Q1, Q2=mediana, Q3)
q1, q2, q3 = np.percentile(ck_sacados, [25, 50, 75])
iqr = q3 - q1 # Rango intercuartílico (más robusto que std)
# ===========================
# 3. MÉTRICAS DE TENDENCIA
# ===========================
# 3.1 Tendencia lineal (¿mejora/empeora con el tiempo?)
jornadas = np.arange(len(ck_sacados))
slope, intercept, r_value, p_value, std_err = stats.linregress(jornadas, ck_sacados)
# 3.2 Autocorrelación (¿resultado actual predice el siguiente?)
if len(ck_sacados) > 2:
autocorr = np.corrcoef(ck_sacados[:-1], ck_sacados[1:])[0, 1]
else:
autocorr = 0
# ===========================
# 4. MÉTRICAS DE OUTLIERS
# ===========================
# 4.1 Detección de valores atípicos (método IQR)
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
outliers = np.sum((ck_sacados < lower_bound) | (ck_sacados > upper_bound))
pct_outliers = outliers / len(ck_sacados) * 100
# 4.2 Z-score máximo
z_scores = np.abs(stats.zscore(ck_sacados))
max_z = z_scores.max()
# ===========================
# 5. MÉTRICAS DE RANGO
# ===========================
rango = ck_sacados.max() - ck_sacados.min()
rango_normalizado = rango / media if media > 0 else 0
# ===========================
# 6. SCORE GLOBAL DE FIABILIDAD
# ===========================
# Penalizaciones (0-100, menor = peor)
score_cv = max(0, 100 - cv * 2) # CV alto = mala
score_consistencia = cerca_media # Más cerca de media = mejor
score_cambios = max(0, 100 - pct_cambios_bruscos * 2) # Cambios bruscos = malo
score_outliers = max(0, 100 - pct_outliers * 3) # Outliers = malo
score_iqr = max(0, 100 - iqr * 10) # IQR grande = malo
# Score final (promedio ponderado)
score_fiabilidad = (
score_cv * 0.25 +
score_consistencia * 0.30 +
score_cambios * 0.20 +
score_outliers * 0.15 +
score_iqr * 0.10
)
# ===========================
# 7. CLASIFICACIÓN MULTI-CRITERIO
# ===========================
# Clasificación basada en score
if score_fiabilidad >= 70:
nivel = "EXCELENTE ⭐⭐⭐"
color = "#27ae60"
elif score_fiabilidad >= 55:
nivel = "BUENO ✅"
color = "#2ecc71"
elif score_fiabilidad >= 40:
nivel = "ACEPTABLE 🟡"
color = "#f39c12"
elif score_fiabilidad >= 25:
nivel = "REGULAR ⚠️"
color = "#e67e22"
else:
nivel = "EVITAR ⛔"
color = "#e74c3c"
resultados.append({
'Equipo': equipo,
'Partidos': len(ck_sacados),
# Estadísticas básicas
'Media_CK': round(media, 2),
'Mediana_CK': round(q2, 2),
'Std_CK': round(std, 2),
'CV_%': round(cv, 1),
# Consistencia
'Pct_Cerca_Media': round(cerca_media, 1),
'Cambios_Bruscos_%': round(pct_cambios_bruscos, 1),
'IQR': round(iqr, 2),
# Rango
'Rango': int(rango),
'Rango_Norm': round(rango_normalizado, 2),
'Min': int(ck_sacados.min()),
'Max': int(ck_sacados.max()),
# Outliers
'Outliers': int(outliers),
'Pct_Outliers': round(pct_outliers, 1),
'Max_ZScore': round(max_z, 2),
# Tendencia
'Tendencia_Slope': round(slope, 3),
'Autocorr': round(autocorr, 3),
# Score y clasificación
'Score_Fiabilidad': round(score_fiabilidad, 1),
'Nivel': nivel,
'Color': color
})
df_resultado = pd.DataFrame(resultados)
df_resultado = df_resultado.sort_values('Score_Fiabilidad', ascending=False)
return df_resultado
def mostrar_analisis_fiabilidad(df_analisis, top_n=10):
"""
Muestra el análisis completo de fiabilidad
"""
print("\n" + "=" * 120)
print("🎯 ANÁLISIS DE FIABILIDAD PARA APUESTAS - CORNERS")
print("=" * 120)
# TOP EQUIPOS FIABLES
print(f"\n⭐ TOP {top_n} EQUIPOS MÁS FIABLES")
print("-" * 120)
top_fiables = df_analisis.head(top_n)
for idx, row in top_fiables.iterrows():
print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
print(f" 📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
print(f" ✅ {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
print(f" ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
print(f" 📈 Rango: {row['Min']}-{row['Max']} ({row['Rango']} corners)")
# TOP EQUIPOS NO FIABLES
print(f"\n\n⛔ TOP {top_n} EQUIPOS MENOS FIABLES")
print("-" * 120)
top_no_fiables = df_analisis.tail(top_n)
for idx, row in top_no_fiables.iterrows():
print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
print(f" 📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
print(f" ❌ Solo {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
print(f" ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
# ESTADÍSTICAS GENERALES
print(f"\n\n📊 DISTRIBUCIÓN POR NIVEL DE FIABILIDAD")
print("-" * 120)
print(df_analisis['Nivel'].value_counts())
print(f"\n📈 ESTADÍSTICAS DE SCORE:")
print(f" Media: {df_analisis['Score_Fiabilidad'].mean():.1f}")
print(f" Mediana: {df_analisis['Score_Fiabilidad'].median():.1f}")
print(f" Score máximo: {df_analisis['Score_Fiabilidad'].max():.1f}")
print(f" Score mínimo: {df_analisis['Score_Fiabilidad'].min():.1f}")
def obtener_fiabilidad_partido(local, visitante, df_analisis):
"""
Evalúa la fiabilidad de un partido específico
"""
datos_local = df_analisis[df_analisis['Equipo'] == local]
datos_away = df_analisis[df_analisis['Equipo'] == visitante]
if datos_local.empty or datos_away.empty:
return {
'fiabilidad': 'DESCONOCIDO',
'score': 0,
'mensaje': '⚠️ Datos insuficientes'
}
score_local = datos_local['Score_Fiabilidad'].values[0]
score_away = datos_away['Score_Fiabilidad'].values[0]
score_promedio = (score_local + score_away) / 2
# Clasificación del partido
if score_promedio >= 65:
fiabilidad = "MUY ALTA ⭐⭐⭐"
mensaje = "✅ EXCELENTE PARTIDO PARA APOSTAR"
elif score_promedio >= 50:
fiabilidad = "ALTA ✅"
mensaje = "✅ BUEN PARTIDO PARA APOSTAR"
elif score_promedio >= 35:
fiabilidad = "MEDIA 🟡"
mensaje = "🟡 APOSTAR CON PRECAUCIÓN"
else:
fiabilidad = "BAJA ⛔"
mensaje = "⛔ EVITAR APUESTA"
return {
'fiabilidad': fiabilidad,
'score_local': score_local,
'score_away': score_away,
'score_promedio': score_promedio,
'nivel_local': datos_local['Nivel'].values[0],
'nivel_away': datos_away['Nivel'].values[0],
'mensaje': mensaje,
# Datos adicionales útiles
'cv_local': datos_local['CV_%'].values[0],
'cv_away': datos_away['CV_%'].values[0],
'consistencia_local': datos_local['Pct_Cerca_Media'].values[0],
'consistencia_away': datos_away['Pct_Cerca_Media'].values[0]
}
def calcular_probabilidades_poisson(lambda_pred, rango_inferior=5, rango_superior=5):
"""Calcula probabilidades usando distribución de Poisson"""
valor_central = int(round(lambda_pred))
valores_analizar = range(
max(0, valor_central - rango_inferior),
valor_central + rango_superior + 1
)
probabilidades_exactas = {}
for k in valores_analizar:
prob = poisson.pmf(k, lambda_pred) * 100
probabilidades_exactas[k] = prob
# ✅ CORRECCIÓN: MISMAS LÍNEAS PARA OVER Y UNDER
lines = [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]
probabilidades_over = {}
for linea in lines:
prob_over = (1 - poisson.cdf(linea, lambda_pred)) * 100
probabilidades_over[linea] = prob_over
probabilidades_under = {}
for linea in lines: # ✅ CAMBIO: usar la misma lista
prob_under = poisson.cdf(linea, lambda_pred) * 100
probabilidades_under[linea] = prob_under
return {
'exactas': probabilidades_exactas,
'over': probabilidades_over,
'under': probabilidades_under
}
def clasificar_confianza(prob):
"""Clasifica la confianza según probabilidad"""
if prob >= 66:
return "ALTA ✅"
elif prob >= 55:
return "MEDIA ⚠️"
else:
return "BAJA ❌"
'''
def get_dataframes(df, season, round_num, local, away, league=None):
"""Retorna 8 DataFrames filtrados por equipo, venue y liga"""
season_round = (df['season'] == season) & (df['round'] < round_num)
if league is not None:
season_round = season_round & (df['league'] == league)
def filter_and_split(team_filter):
filtered = df[season_round & team_filter].copy()
home = filtered[filtered['venue'] == "Home"]
away = filtered[filtered['venue'] == "Away"]
return home, away
local_home, local_away = filter_and_split(df['team'] == local)
local_opp_home, local_opp_away = filter_and_split(df['opponent'] == local)
away_home, away_away = filter_and_split(df['team'] == away)
away_opp_home, away_opp_away = filter_and_split(df['opponent'] == away)
return (local_home, local_away, local_opp_home, local_opp_away,
away_home, away_away, away_opp_home, away_opp_away)
def get_head_2_head(df, local, away, seasons=None, league=None):
"""Obtiene últimos 3 enfrentamientos directos"""
if seasons is None:
seasons = []
df_filtered = df[df['season'].isin(seasons)] if seasons else df
if league is not None:
df_filtered = df_filtered[df_filtered['league'] == league]
local_h2h = df_filtered[(df_filtered['team'] == local) & (df_filtered['opponent'] == away)]
away_h2h = df_filtered[(df_filtered['team'] == away) & (df_filtered['opponent'] == local)]
if len(local_h2h) < 4:
return local_h2h.tail(2), away_h2h.tail(2)
return local_h2h.tail(3), away_h2h.tail(3)
def get_average(df, is_team=False, lst_avg=None):
"""Calcula promedios de estadísticas (VERSIÓN COMPLETA)"""
if len(df) == 0:
if is_team:
# ✅ Retornar 23 valores (métricas avanzadas)
return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
return (0, 0, 0, 0, 0, 0, 0, 0, 0)
if is_team:
# ===========================
# ESTADÍSTICAS BÁSICAS (NORMALIZADAS)
# ===========================
avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3]
avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4]
avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2]
avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1]
# ✅ VARIANZA DE CORNERS
var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8]
avg_poss = (df['Poss'].sum() / len(df)) - 50
avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5]
avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6]
# ===========================
# MÉTRICAS OFENSIVAS AVANZADAS
# ===========================
total_sh = df['Standard_Sh'].sum()
sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0
xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0
total_touches = df['Touches_Touches'].sum()
attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0
total_poss = df['Poss'].sum()
possession_shot = (total_sh / total_poss) if total_poss > 0 else 0
# ===========================
# MÉTRICAS DE CREACIÓN
# ===========================
total_passes = df['Total_Att'].sum()
progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0
final_third_involvement = (df['1/3'].sum() / total_passes) if total_passes > 0 else 0
total_sca = df['SCA Types_SCA'].sum()
assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0
creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0
# ===========================
# MÉTRICAS DEFENSIVAS
# ===========================
total_tackles = df['Tackles_Tkl'].sum()
high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0
interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0
total_defensive_actions = total_tackles + df['Int'].sum()
clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0
# ===========================
# MÉTRICAS DE POSESIÓN
# ===========================
total_carries = df['Carries_Carries'].sum()
progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0
total_prog_passes = df['PrgP'].sum()
carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0
# ===========================
# ÍNDICES COMPUESTOS
# ===========================
avg_gf_raw = df['GF'].mean()
avg_xg_raw = df['Expected_xG'].mean()
avg_sot = df['Standard_SoT'].mean()
avg_sh = df['Standard_Sh'].mean()
offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0
avg_prgp = df['PrgP'].mean()
avg_prgc = df['Carries_PrgC'].mean()
avg_poss_raw = df['Poss'].mean()
transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0
# ✅ RETORNAR 23 VALORES
return (
avg_ck, # 0
var_ck, # 1 - ✅ NUEVO
avg_xg, # 2
avg_sca, # 3
avg_cross, # 4
avg_poss, # 5
avg_att_3rd, # 6
avg_gf, # 7
avg_ga, # 8
sh_accuracy, # 9
xg_shot, # 10
attacking_presence, # 11
possession_shot, # 12
progressive_pass_ratio, # 13
final_third_involvement, # 14
assist_sca, # 15
creative_efficiency, # 16
high_press_intensity, # 17
interception_tackle, # 18
clearance_ratio, # 19
progressive_carry_ratio, # 20
carry_pass_balance, # 21
offensive_index, # 22
transition_index # 23
)
# ===========================
# PROMEDIOS DE LIGA (is_team=False)
# ===========================
avg_cross = df['Performance_Crs'].mean()
avg_att_3rd = df['Touches_Att 3rd'].mean()
avg_sca = df['SCA Types_SCA'].mean()
avg_xg = df['Expected_xG'].mean()
var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
avg_ck = df['Pass Types_CK'].mean()
avg_gf = df['GF'].mean()
avg_ga = df['GA'].mean()
avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0
return (
var_ck, # 0
avg_xg, # 1
avg_sca, # 2
avg_cross, # 3
avg_att_3rd, # 4
avg_gf, # 5
avg_ga, # 6
avg_sh, # 7
avg_ck # 8
)
def get_points_from_result(result):
"""Convierte resultado (W/D/L) a puntos"""
if result == 'W':
return 3
elif result == 'D':
return 1
else:
return 0
def get_team_ppp(df, team, season, round_num, league=None):
"""Calcula puntos por partido (PPP) de un equipo"""
team_matches = df[
(df['team'] == team) &
(df['season'] == season) &
(df['round'] < round_num)
]
if league is not None:
team_matches = team_matches[team_matches['league'] == league]
if len(team_matches) == 0:
return 0.0
total_points = team_matches['result'].apply(get_points_from_result).sum()
ppp = total_points / len(team_matches)
return ppp
def get_ppp_difference(df, local, away, season, round_num, league=None):
"""Calcula diferencia de PPP entre local y visitante"""
local_ppp = get_team_ppp(df, local, season, round_num, league)
away_ppp = get_team_ppp(df, away, season, round_num, league)
return local_ppp - away_ppp
'''
def predecir_corners(local, visitante, jornada, temporada="2526", league_code="ESP",df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
"""
Predice corners totales con análisis completo para apuestas
Args:
local: Equipo local
visitante: Equipo visitante
jornada: Número de jornada
temporada: Temporada (formato "2526")
league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
"""
print(f"\n{'='*80}")
print(f"🏟️ {local} vs {visitante}")
print(f"📅 Temporada {temporada} | Jornada {jornada} | Liga: {league_code}")
print(f"{'='*80}")
if jornada < 5:
return {
"error": "❌ Se necesitan al menos 5 jornadas previas",
"prediccion": None
}
try:
# ===========================
# EXTRAER FEATURES (igual que antes)
# ===========================
lst_avg = get_average(
df_database[
(df_database['season'] == temporada) &
(df_database['round'] < jornada) &
(df_database['league'] == league_code)
],
is_team=False
)
(team1_home, team1_away, team1_opp_home, team1_opp_away,
team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes(
df_database, temporada, jornada, local, visitante, league=league_code
)
index = lst_years.index(temporada)
result = lst_years[:index+1]
team1_h2h, team2_h2h = get_head_2_head(
df_database, local, visitante, seasons=result, league=league_code
)
local_ppp = get_team_ppp(df_database, local, temporada, jornada, league=league_code)
away_ppp = get_team_ppp(df_database, visitante, temporada, jornada, league=league_code)
ppp_diff = local_ppp - away_ppp
# ===========================
# CONSTRUIR DICCIONARIO DE FEATURES (igual que antes)
# ===========================
def create_line(df, is_form=True, is_team=False, use_advanced=True):
if is_form:
df = df[-6:]
if use_advanced:
return get_average(df, is_team, lst_avg)
else:
result = get_average(df, is_team, lst_avg)
return result[:9]
dic_features = {}
dic_features['ppp_local'] = (local_ppp,)
dic_features['ppp_away'] = (away_ppp,)
dic_features['ppp_difference'] = (ppp_diff,)
dic_features['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True)
dic_features['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True)
dic_features['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True)
dic_features['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True)
dic_features['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True)
dic_features['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True)
dic_features['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True)
dic_features['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True)
dic_features['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True)
dic_features['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True)
dic_features['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False)
dic_features['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False)
league_dummies = {
'league_ESP': 1 if league_code == 'ESP' else 0,
'league_GER': 1 if league_code == 'GER' else 0,
'league_FRA': 1 if league_code == 'FRA' else 0,
'league_ITA': 1 if league_code == 'ITA' else 0,
'league_NED': 1 if league_code == 'NED' else 0,
'league_ENG': 1 if league_code == 'ENG' else 0,
'league_POR': 1 if league_code == 'POR' else 0,
'league_BEL': 1 if league_code == 'BEL' else 0
}
for key, value in league_dummies.items():
dic_features[key] = (value,)
# ===========================
# CONSTRUIR VECTOR DE FEATURES
# ===========================
lst_base_advanced = [
"avg_ck", "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga",
"sh_accuracy", "xg_shot", "attacking_presence", "possession_shot",
"progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency",
"high_press_intensity", "interception_tackle", "clearance_ratio",
"progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index"
]
lst_base_original = [
"var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga", "avg_ck"
]
lst_features_values = []
lst_features_names = []
for key in dic_features:
lst_features_values.extend(list(dic_features[key]))
if key in ['ppp_local', 'ppp_away', 'ppp_difference']:
lst_features_names.append(key)
elif key.startswith('league_'):
lst_features_names.append(key)
elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']:
lst_features_names.extend([f"{key}_{col}" for col in lst_base_original])
else:
lst_features_names.extend([f"{key}_{col}" for col in lst_base_advanced])
df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
expected_features = scaler.feature_names_in_
if len(df_input.columns) != len(expected_features):
print(f"\n⚠️ ERROR: Número de features no coincide")
print(f" Esperadas: {len(expected_features)}")
print(f" Recibidas: {len(df_input.columns)}")
return {"error": "Desajuste de features", "prediccion": None}
df_input = df_input[expected_features]
X_input_scaled = pd.DataFrame(
scaler.transform(df_input),
columns=df_input.columns
)
# ===========================
# PREDICCIÓN
# ===========================
prediccion = xgb_model.predict(X_input_scaled)[0]
# ===========================
# ✅ ANÁLISIS PROBABILÍSTICO CON POISSON
# ===========================
analisis = calcular_probabilidades_poisson(prediccion, rango_inferior=5, rango_superior=5)
# ===========================
# ESTADÍSTICAS DETALLADAS
# ===========================
local_ck_home = team1_home['Pass Types_CK'].mean() if len(team1_home) > 0 else 0
local_xg_home = team1_home['Expected_xG'].mean() if len(team1_home) > 0 else 0
local_poss_home = team1_home['Poss'].mean() if len(team1_home) > 0 else 0
away_ck_away = team2_away['Pass Types_CK'].mean() if len(team2_away) > 0 else 0
away_xg_away = team2_away['Expected_xG'].mean() if len(team2_away) > 0 else 0
away_poss_away = team2_away['Poss'].mean() if len(team2_away) > 0 else 0
local_ck_received = team1_opp_home['Pass Types_CK'].mean() if len(team1_opp_home) > 0 else 0
away_ck_received = team2_opp_away['Pass Types_CK'].mean() if len(team2_opp_away) > 0 else 0
partido_ck_esperado = local_ck_home + away_ck_away
h2h_ck_local = team1_h2h['Pass Types_CK'].mean() if len(team1_h2h) > 0 else 0
h2h_ck_away = team2_h2h['Pass Types_CK'].mean() if len(team2_h2h) > 0 else 0
h2h_total = h2h_ck_local + h2h_ck_away
# ===========================
# ✅ MOSTRAR RESULTADOS CON PROBABILIDADES
# ===========================
print(f"\n🎲 PREDICCIÓN MODELO: {prediccion:.2f} corners totales")
print(f" PPP: {local} ({local_ppp:.2f}) vs {visitante} ({away_ppp:.2f}) | Diff: {ppp_diff:+.2f}")
print(f"\n📊 ESTADÍSTICAS HISTÓRICAS:")
print(f" {local} (Casa): {local_ck_home:.1f} CK/partido | xG: {local_xg_home:.2f} | Poss: {local_poss_home:.1f}%")
print(f" {visitante} (Fuera): {away_ck_away:.1f} CK/partido | xG: {away_xg_away:.2f} | Poss: {away_poss_away:.1f}%")
print(f" Corners recibidos: {local} ({local_ck_received:.1f}) | {visitante} ({away_ck_received:.1f})")
print(f" Total esperado (suma): {partido_ck_esperado:.1f} corners")
if len(team1_h2h) > 0 or len(team2_h2h) > 0:
print(f"\n🔄 HEAD TO HEAD (últimos {max(len(team1_h2h), len(team2_h2h))} partidos):")
print(f" {local}: {h2h_ck_local:.1f} CK/partido")
print(f" {visitante}: {h2h_ck_away:.1f} CK/partido")
print(f" Promedio total: {h2h_total:.1f} corners")
# ===========================
# ✅ MOSTRAR PROBABILIDADES EXACTAS
# ===========================
valor_mas_probable = max(analisis['exactas'].items(), key=lambda x: x[1])
print(f"\n📈 PROBABILIDADES EXACTAS (Poisson):")
for k in sorted(analisis['exactas'].keys()):
prob = analisis['exactas'][k]
bar = '█' * int(prob / 2)
marca = ' ⭐' if k == valor_mas_probable[0] else ''
print(f" {k:2d} corners: {prob:5.2f}% {bar}{marca}")
print(f"\n✅ Valor más probable: {valor_mas_probable[0]} corners ({valor_mas_probable[1]:.2f}%)")
# ✅ RANGO DE 80% CONFIANZA
probs_sorted = sorted(analisis['exactas'].items(), key=lambda x: x[1], reverse=True)
cumsum = 0
rango_80 = []
for val, prob in probs_sorted:
cumsum += prob
rango_80.append(val)
if cumsum >= 80:
break
print(f"📊 Rango 80% confianza: {min(rango_80)}-{max(rango_80)} corners")
# ===========================
# ✅ MOSTRAR OVER/UNDER CON CUOTAS IMPLÍCITAS
# ===========================
print(f"\n🎯 ANÁLISIS OVER/UNDER:")
print(f"{'Línea':<10} {'Prob Over':<12} {'Cuota Impl':<12} {'Confianza':<15} {'Prob Under':<12} {'Cuota Impl':<12}")
print("-" * 85)
for linea in [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]:
prob_over = analisis['over'][linea]
prob_under = analisis['under'][linea]
# Cuotas implícitas (inverso de probabilidad en decimal)
cuota_impl_over = 100 / prob_over if prob_over > 0 else 999
cuota_impl_under = 100 / prob_under if prob_under > 0 else 999
conf_over = clasificar_confianza(prob_over)
print(f"O/U {linea:<5} {prob_over:6.2f}% @{cuota_impl_over:5.2f} {conf_over:<15} {prob_under:6.2f}% @{cuota_impl_under:5.2f}")
# ===========================
# ✅ RECOMENDACIONES CON CUOTAS
# ===========================
print(f"\n💡 RECOMENDACIONES DE APUESTA:")
mejores_over = [(l, p) for l, p in analisis['over'].items() if p >= 55]
mejores_under = [(l, p) for l, p in analisis['under'].items() if p >= 55]
if mejores_over:
print(f"\n✅ OVER con confianza MEDIA/ALTA:")
for linea, prob in sorted(mejores_over, key=lambda x: x[1], reverse=True):
cuota_impl = 100 / prob
conf = clasificar_confianza(prob)
print(f" • Over {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
if mejores_under:
print(f"\n✅ UNDER con confianza MEDIA/ALTA:")
for linea, prob in sorted(mejores_under, key=lambda x: x[1], reverse=True):
cuota_impl = 100 / prob
conf = clasificar_confianza(prob)
print(f" • Under {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
if not mejores_over and not mejores_under:
print(f" ⚠️ No hay apuestas con confianza MEDIA o superior")
# ===========================
# ✅ ANÁLISIS DE RIESGO
# ===========================
df_varianza_temp = analizar_fiabilidad_equipos(df_database, temporada=temporada, min_partidos=3)
riesgo = obtener_fiabilidad_partido(local, visitante, df_varianza_temp)
print(f"\n⚠️ ANÁLISIS DE RIESGO:")
print(f" Local ({local}): {riesgo['nivel_local']} (CV: {riesgo['cv_local']:.1f}%)")
print(f" Away ({visitante}): {riesgo['nivel_away']} (CV: {riesgo['cv_away']:.1f}%)")
print(f" 🎲 FIABILIDAD PARTIDO: {riesgo['fiabilidad']} (Score: {riesgo['score_promedio']:.1f})")
print(f" 💡 {riesgo['mensaje']}")
# ===========================
# RETORNAR DICCIONARIO COMPLETO
# ===========================
return {
"prediccion": round(prediccion, 2),
"local": local,
"visitante": visitante,
"ppp_local": local_ppp,
"ppp_away": away_ppp,
"ppp_diff": ppp_diff,
"riesgo": riesgo,
"stats": {
"local_ck": local_ck_home,
"away_ck": away_ck_away,
"local_ck_received": local_ck_received,
"away_ck_received": away_ck_received,
"h2h_total": h2h_total,
"partido_esperado": partido_ck_esperado
},
"probabilidades_exactas": analisis['exactas'],
"probabilidades_over": analisis['over'],
"probabilidades_under": analisis['under'],
"valor_mas_probable": valor_mas_probable[0],
"prob_mas_probable": valor_mas_probable[1],
"rango_80": (min(rango_80), max(rango_80))
}
except Exception as e:
print(f"\n❌ ERROR: {str(e)}")
import traceback
traceback.print_exc()
return {"error": str(e), "prediccion": None}
def predecir_partidos_batch(partidos, jornada, temporada="2526", league_code="ESP", export_csv=True, filename=None,df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
"""
Predice corners para múltiples partidos y exporta resultados a CSV
Args:
partidos: Lista de tuplas [(local1, visitante1), (local2, visitante2), ...]
jornada: Número de jornada
temporada: Temporada (formato "2526")
league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
export_csv: Si True, exporta a CSV
filename: Nombre del archivo CSV (opcional)
Returns:
DataFrame con todos los resultados
"""
resultados = []
print("\n" + "=" * 120)
print(f"🎯 PROCESANDO {len(partidos)} PARTIDOS - {league_code} | J{jornada} | Temporada {temporada}")
print("=" * 120)
for idx, (local, visitante) in enumerate(partidos, 1):
print(f"\n[{idx}/{len(partidos)}] Procesando: {local} vs {visitante}...")
resultado = predecir_corners(
local=local,
visitante=visitante,
jornada=jornada,
temporada=temporada,
league_code=league_code,
df_database=df_database,
xgb_model=xgb_model,
scaler=scaler,
lst_years=lst_years)
if resultado.get("error"):
print(f" ❌ Error: {resultado['error']}")
continue
# ===========================
# CONSTRUIR FILA DE DATOS
# ===========================
fila = {
'Partido': f"{local} vs {visitante}",
'Local': local,
'Visitante': visitante,
'Liga': league_code,
'Jornada': jornada,
'Temporada': temporada,
# Predicción
'Prediccion': resultado['prediccion'],
'Valor_Mas_Probable': resultado['valor_mas_probable'],
'Prob_Valor_Mas_Probable_%': round(resultado['prob_mas_probable'], 2),
'Rango_80%_Min': resultado['rango_80'][0],
'Rango_80%_Max': resultado['rango_80'][1],
# PPP
'PPP_Local': round(resultado['ppp_local'], 2),
'PPP_Away': round(resultado['ppp_away'], 2),
'PPP_Diferencia': round(resultado['ppp_diff'], 2),
# Estadísticas históricas
'CK_Local_Casa': round(resultado['stats']['local_ck'], 1),
'CK_Away_Fuera': round(resultado['stats']['away_ck'], 1),
'CK_Local_Recibidos': round(resultado['stats']['local_ck_received'], 1),
'CK_Away_Recibidos': round(resultado['stats']['away_ck_received'], 1),
'CK_Esperado_Suma': round(resultado['stats']['partido_esperado'], 1),
'CK_H2H_Total': round(resultado['stats']['h2h_total'], 1) if resultado['stats']['h2h_total'] > 0 else 'N/A',
# Riesgo
'Fiabilidad_Partido': resultado['riesgo']['fiabilidad'],
'Score_Fiabilidad': round(resultado['riesgo']['score_promedio'], 1),
'Nivel_Local': resultado['riesgo']['nivel_local'],
'Nivel_Away': resultado['riesgo']['nivel_away'],
'CV_Local_%': round(resultado['riesgo']['cv_local'], 1),
'CV_Away_%': round(resultado['riesgo']['cv_away'], 1),
}
# ===========================
# OVER 6.5 a 10.5
# ===========================
for linea in [6.5, 7.5, 8.5, 9.5, 10.5]:
prob = resultado['probabilidades_over'].get(linea, 0)
cuota_impl = round(100 / prob, 2) if prob > 0 else 999
conf = clasificar_confianza(prob)
fila[f'Over_{linea}_Prob_%'] = round(prob, 2)
fila[f'Over_{linea}_Cuota'] = cuota_impl
fila[f'Over_{linea}_Confianza'] = conf
# ===========================
# UNDER 12.5 a 9.5
# ===========================
for linea in [12.5, 11.5, 10.5, 9.5]:
prob = resultado['probabilidades_under'].get(linea, 0)
cuota_impl = round(100 / prob, 2) if prob > 0 else 999
conf = clasificar_confianza(prob)
fila[f'Under_{linea}_Prob_%'] = round(prob, 2)
fila[f'Under_{linea}_Cuota'] = cuota_impl
fila[f'Under_{linea}_Confianza'] = conf
# ===========================
# RECOMENDACIONES
# ===========================
mejores_over = [(l, p) for l, p in resultado['probabilidades_over'].items() if p >= 55]
mejores_under = [(l, p) for l, p in resultado['probabilidades_under'].items() if p >= 55]
if resultado['riesgo']['score_promedio'] < 35:
fila['Recomendacion'] = "⛔ EVITAR - Baja fiabilidad"
fila['Es_Apostable'] = "NO"
elif not mejores_over and not mejores_under:
fila['Recomendacion'] = "⚠️ NO RECOMENDADO - Sin confianza suficiente"
fila['Es_Apostable'] = "NO"
else:
recomendaciones = []
if mejores_over:
mejor_over = max(mejores_over, key=lambda x: x[1])
cuota_over = round(100 / mejor_over[1], 2)
recomendaciones.append(f"Over {mejor_over[0]} ({mejor_over[1]:.1f}% @{cuota_over})")
if mejores_under:
mejor_under = max(mejores_under, key=lambda x: x[1])
cuota_under = round(100 / mejor_under[1], 2)
recomendaciones.append(f"Under {mejor_under[0]} ({mejor_under[1]:.1f}% @{cuota_under})")
fila['Recomendacion'] = " | ".join(recomendaciones)
if resultado['riesgo']['score_promedio'] >= 65:
fila['Es_Apostable'] = "SÍ ⭐⭐⭐"
elif resultado['riesgo']['score_promedio'] >= 50:
fila['Es_Apostable'] = "SÍ ✅"
else:
fila['Es_Apostable'] = "PRECAUCIÓN 🟡"
fila['Mensaje_Riesgo'] = resultado['riesgo']['mensaje']
resultados.append(fila)
print(f" ✅ Completado")
# ===========================
# CREAR DATAFRAME
# ===========================
df_resultados = pd.DataFrame(resultados)
print("\n" + "=" * 120)
print(f"✅ PROCESAMIENTO COMPLETADO: {len(df_resultados)} partidos analizados")
print("=" * 120)
# ===========================
# EXPORTAR A CSV
# ===========================
if export_csv and len(df_resultados) > 0:
if filename is None:
filename = f"predicciones_{league_code}_J{jornada}_{temporada}.csv"
df_resultados.to_csv(filename, index=False, encoding='utf-8-sig')
print(f"\n💾 Resultados exportados a: {filename}")
# ===========================
# RESUMEN
# ===========================
print(f"\n📊 RESUMEN DE APUESTAS:")
print(f" Partidos apostables: {len(df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')])} / {len(df_resultados)}")
print(f" Partidos ALTA confianza (⭐⭐⭐): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ⭐⭐⭐'])}")
print(f" Partidos MEDIA confianza (✅): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ✅'])}")
print(f" Partidos a evitar (⛔): {len(df_resultados[df_resultados['Es_Apostable'] == 'NO'])}")
return df_resultados
def mostrar_resumen_batch(df_resultados):
"""Muestra resumen visual de los resultados"""
print("\n" + "=" * 120)
print("🎯 MEJORES OPORTUNIDADES DE APUESTA")
print("=" * 120)
# Filtrar solo apostables
df_apostables = df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')].copy()
if len(df_apostables) == 0:
print("\n⚠️ No se encontraron partidos con oportunidades de apuesta")
return
# Ordenar por score de fiabilidad
df_apostables = df_apostables.sort_values('Score_Fiabilidad', ascending=False)
for idx, row in df_apostables.iterrows():
print(f"\n{'='*120}")
print(f"🏟️ {row['Partido']}")
print(f"{'='*120}")
print(f"📊 Predicción: {row['Prediccion']:.2f} corners | Valor más probable: {row['Valor_Mas_Probable']} ({row['Prob_Valor_Mas_Probable_%']:.1f}%)")
print(f"📈 Histórico: Local {row['CK_Local_Casa']:.1f} CK | Away {row['CK_Away_Fuera']:.1f} CK | H2H: {row['CK_H2H_Total']}")
print(f"🎲 Fiabilidad: {row['Fiabilidad_Partido']} (Score: {row['Score_Fiabilidad']:.1f}/100)")
print(f"💡 {row['Recomendacion']}")
# Mostrar líneas con alta probabilidad
print(f"\n 📌 Líneas destacadas:")
for linea in [7.5, 8.5, 9.5, 10.5]:
over_prob = row.get(f'Over_{linea}_Prob_%', 0)
under_prob = row.get(f'Under_{linea}_Prob_%', 0)
if over_prob >= 55:
cuota = row.get(f'Over_{linea}_Cuota', 0)
conf = row.get(f'Over_{linea}_Confianza', '')
print(f" • Over {linea}: {over_prob:.1f}% @{cuota:.2f} - {conf}")
if under_prob >= 55:
cuota = row.get(f'Under_{linea}_Cuota', 0)
conf = row.get(f'Under_{linea}_Confianza', '')
print(f" • Under {linea}: {under_prob:.1f}% @{cuota:.2f} - {conf}")
class USE_MODEL():
def __init__(self):
self.load_models()
self.load_data()
self.init_variables()
def load_models(self):
"""Cargar modelos desde GitHub usando raw URLs"""
print("📦 Cargando modelos desde GitHub...")
# URLs de descarga directa (raw.githubusercontent.com)
base_url = "https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/models"
model_url = f"{base_url}/xgboost_corners_v4_retrain.pkl"
scaler_url = f"{base_url}/scaler_corners_v4_retrain.pkl"
try:
# Descargar modelo
print(f"📥 Descargando modelo desde: {model_url}")
response_model = requests.get(model_url, timeout=30)
response_model.raise_for_status()
# Descargar scaler
print(f"📥 Descargando scaler desde: {scaler_url}")
response_scaler = requests.get(scaler_url, timeout=30)
response_scaler.raise_for_status()
# Guardar temporalmente y cargar
with tempfile.NamedTemporaryFile(delete=False, suffix='.pkl') as tmp_model:
tmp_model.write(response_model.content)
tmp_model_path = tmp_model.name
with tempfile.NamedTemporaryFile(delete=False, suffix='.pkl') as tmp_scaler:
tmp_scaler.write(response_scaler.content)
tmp_scaler_path = tmp_scaler.name
# Cargar modelos desde archivos temporales
self.xgb_model = joblib.load(tmp_model_path)
self.scaler = joblib.load(tmp_scaler_path)
# Limpiar archivos temporales
os.unlink(tmp_model_path)
os.unlink(tmp_scaler_path)
print("✅ Modelos cargados correctamente desde GitHub")
except requests.exceptions.RequestException as e:
raise Exception(f"❌ Error descargando modelos: {str(e)}")
except Exception as e:
raise Exception(f"❌ Error cargando modelos: {str(e)}")
def load_data(self):
"""Cargar datos desde GitHub"""
print("📂 Cargando datos desde GitHub...")
base_url = "https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/dataset/cleaned"
historic_url = f"{base_url}/dataset_cleaned.csv"
current_url = f"{base_url}/dataset_cleaned_current_year.csv"
try:
# Cargar dataset histórico
print(f"📥 Descargando dataset histórico...")
self.df_dataset_historic = pd.read_csv(historic_url)
print(f"✅ Dataset histórico cargado: {len(self.df_dataset_historic)} registros")
# Intentar cargar año actual
try:
print(f"📥 Descargando dataset año actual...")
self.df_dataset_current_year = pd.read_csv(current_url)
print(f"✅ Dataset año actual cargado: {len(self.df_dataset_current_year)} registros")
self.df_dataset = pd.concat([self.df_dataset_historic, self.df_dataset_current_year])
except:
print("⚠️ No se pudo cargar dataset del año actual, usando solo histórico")
self.df_dataset = self.df_dataset_historic
# Limpieza
self.df_dataset["season"] = self.df_dataset["season"].astype(str)
self.df_dataset["Performance_Save%"].fillna(0, inplace=True)
print(f"✅ Total registros: {len(self.df_dataset)}")
except Exception as e:
raise FileNotFoundError(
f"\n❌ ERROR: No se pudieron cargar los datos desde GitHub\n"
f" Error: {str(e)}\n\n"
f"💡 Verifica que los archivos existan en el repositorio\n"
)
def init_variables(self):
self.lst_years = ["1819", "1920", "2021", "2122", "2223", "2324", "2425", "2526"]
print("✅ Variables inicializadas")
def consume_model_batch(self,partidos,jornada,temporada,league_code):
df_predict = predecir_partidos_batch(
partidos=partidos,
jornada=jornada,
temporada=temporada,
league_code=league_code,
export_csv=True,
filename=f"results\{league_code}\{league_code}-{temporada}-{jornada}-predicciones.csv",
df_database = self.df_dataset,
xgb_model = self.xgb_model,
scaler=self.scaler,
lst_years=self.lst_years
)
# Mostrar resumen
return df_predict
def consume_model_single(self,local,visitante,jornada,temporada,league_code):
return predecir_corners(
local=local,
visitante=visitante,
jornada=jornada,
temporada=temporada,
league_code=league_code,
df_database = self.df_dataset,
xgb_model = self.xgb_model,
scaler=self.scaler,
lst_years=self.lst_years
)
def kelly_stats(self,p, odds, fraction=0.2):
b = odds - 1
q = 1 - p
f_star = (b * p - q) / b
f_star = max(f_star, 0) # evita negativos
return f_star * fraction # usa 0.1 para Kelly 10%