# =========================== # SISTEMA DE PREDICCIÓN DE CORNERS - OPTIMIZADO PARA APUESTAS (VERSIÓN COMPLETA) # =========================== import requests import tempfile import numpy as np import pandas as pd import joblib from scipy.stats import poisson from scipy import stats import os import sys from src.process_data.process_dataset import get_dataframes,get_head_2_head,get_points_from_result,get_team_ppp,get_ppp_difference,get_average #from process_data.process_dataset import get_dataframes,get_head_2_head,get_points_from_result,get_team_ppp,get_ppp_difference,get_average #project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')) #sys.path.insert(0, project_root) # =========================== # 1. FUNCIONES FIABILIDAD # =========================== def analizar_fiabilidad_equipos(df_database, temporada="2526", min_partidos=5): """ Análisis completo de fiabilidad para apuestas de corners No solo varianza, sino consistencia, tendencias y patrones """ df_temp = df_database[df_database['season'] == temporada].copy() resultados = [] equipos = pd.concat([df_temp['team'], df_temp['opponent']]).unique() for equipo in equipos: # Partidos del equipo partidos_equipo = df_temp[df_temp['team'] == equipo] if len(partidos_equipo) < min_partidos: continue ck_sacados = partidos_equipo['Pass Types_CK'].values # =========================== # 1. MÉTRICAS DE VARIABILIDAD # =========================== media = ck_sacados.mean() std = ck_sacados.std() cv = (std / media * 100) if media > 0 else 0 # =========================== # 2. MÉTRICAS DE CONSISTENCIA # =========================== # 2.1 Porcentaje de partidos cerca de la media (±2 corners) cerca_media = np.sum(np.abs(ck_sacados - media) <= 2) / len(ck_sacados) * 100 # 2.2 Rachas (detectar equipos con "explosiones" de corners) cambios_bruscos = np.sum(np.abs(np.diff(ck_sacados)) > 4) pct_cambios_bruscos = cambios_bruscos / (len(ck_sacados) - 1) * 100 # 2.3 Cuartiles (Q1, Q2=mediana, Q3) q1, q2, q3 = np.percentile(ck_sacados, [25, 50, 75]) iqr = q3 - q1 # Rango intercuartílico (más robusto que std) # =========================== # 3. MÉTRICAS DE TENDENCIA # =========================== # 3.1 Tendencia lineal (¿mejora/empeora con el tiempo?) jornadas = np.arange(len(ck_sacados)) slope, intercept, r_value, p_value, std_err = stats.linregress(jornadas, ck_sacados) # 3.2 Autocorrelación (¿resultado actual predice el siguiente?) if len(ck_sacados) > 2: autocorr = np.corrcoef(ck_sacados[:-1], ck_sacados[1:])[0, 1] else: autocorr = 0 # =========================== # 4. MÉTRICAS DE OUTLIERS # =========================== # 4.1 Detección de valores atípicos (método IQR) lower_bound = q1 - 1.5 * iqr upper_bound = q3 + 1.5 * iqr outliers = np.sum((ck_sacados < lower_bound) | (ck_sacados > upper_bound)) pct_outliers = outliers / len(ck_sacados) * 100 # 4.2 Z-score máximo z_scores = np.abs(stats.zscore(ck_sacados)) max_z = z_scores.max() # =========================== # 5. MÉTRICAS DE RANGO # =========================== rango = ck_sacados.max() - ck_sacados.min() rango_normalizado = rango / media if media > 0 else 0 # =========================== # 6. SCORE GLOBAL DE FIABILIDAD # =========================== # Penalizaciones (0-100, menor = peor) score_cv = max(0, 100 - cv * 2) # CV alto = mala score_consistencia = cerca_media # Más cerca de media = mejor score_cambios = max(0, 100 - pct_cambios_bruscos * 2) # Cambios bruscos = malo score_outliers = max(0, 100 - pct_outliers * 3) # Outliers = malo score_iqr = max(0, 100 - iqr * 10) # IQR grande = malo # Score final (promedio ponderado) score_fiabilidad = ( score_cv * 0.25 + score_consistencia * 0.30 + score_cambios * 0.20 + score_outliers * 0.15 + score_iqr * 0.10 ) # =========================== # 7. CLASIFICACIÓN MULTI-CRITERIO # =========================== # Clasificación basada en score if score_fiabilidad >= 70: nivel = "EXCELENTE ⭐⭐⭐" color = "#27ae60" elif score_fiabilidad >= 55: nivel = "BUENO ✅" color = "#2ecc71" elif score_fiabilidad >= 40: nivel = "ACEPTABLE 🟡" color = "#f39c12" elif score_fiabilidad >= 25: nivel = "REGULAR ⚠️" color = "#e67e22" else: nivel = "EVITAR ⛔" color = "#e74c3c" resultados.append({ 'Equipo': equipo, 'Partidos': len(ck_sacados), # Estadísticas básicas 'Media_CK': round(media, 2), 'Mediana_CK': round(q2, 2), 'Std_CK': round(std, 2), 'CV_%': round(cv, 1), # Consistencia 'Pct_Cerca_Media': round(cerca_media, 1), 'Cambios_Bruscos_%': round(pct_cambios_bruscos, 1), 'IQR': round(iqr, 2), # Rango 'Rango': int(rango), 'Rango_Norm': round(rango_normalizado, 2), 'Min': int(ck_sacados.min()), 'Max': int(ck_sacados.max()), # Outliers 'Outliers': int(outliers), 'Pct_Outliers': round(pct_outliers, 1), 'Max_ZScore': round(max_z, 2), # Tendencia 'Tendencia_Slope': round(slope, 3), 'Autocorr': round(autocorr, 3), # Score y clasificación 'Score_Fiabilidad': round(score_fiabilidad, 1), 'Nivel': nivel, 'Color': color }) df_resultado = pd.DataFrame(resultados) df_resultado = df_resultado.sort_values('Score_Fiabilidad', ascending=False) return df_resultado def mostrar_analisis_fiabilidad(df_analisis, top_n=10): """ Muestra el análisis completo de fiabilidad """ print("\n" + "=" * 120) print("🎯 ANÁLISIS DE FIABILIDAD PARA APUESTAS - CORNERS") print("=" * 120) # TOP EQUIPOS FIABLES print(f"\n⭐ TOP {top_n} EQUIPOS MÁS FIABLES") print("-" * 120) top_fiables = df_analisis.head(top_n) for idx, row in top_fiables.iterrows(): print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}") print(f" 📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%") print(f" ✅ {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}") print(f" ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%") print(f" 📈 Rango: {row['Min']}-{row['Max']} ({row['Rango']} corners)") # TOP EQUIPOS NO FIABLES print(f"\n\n⛔ TOP {top_n} EQUIPOS MENOS FIABLES") print("-" * 120) top_no_fiables = df_analisis.tail(top_n) for idx, row in top_no_fiables.iterrows(): print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}") print(f" 📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%") print(f" ❌ Solo {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}") print(f" ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%") # ESTADÍSTICAS GENERALES print(f"\n\n📊 DISTRIBUCIÓN POR NIVEL DE FIABILIDAD") print("-" * 120) print(df_analisis['Nivel'].value_counts()) print(f"\n📈 ESTADÍSTICAS DE SCORE:") print(f" Media: {df_analisis['Score_Fiabilidad'].mean():.1f}") print(f" Mediana: {df_analisis['Score_Fiabilidad'].median():.1f}") print(f" Score máximo: {df_analisis['Score_Fiabilidad'].max():.1f}") print(f" Score mínimo: {df_analisis['Score_Fiabilidad'].min():.1f}") def obtener_fiabilidad_partido(local, visitante, df_analisis): """ Evalúa la fiabilidad de un partido específico """ datos_local = df_analisis[df_analisis['Equipo'] == local] datos_away = df_analisis[df_analisis['Equipo'] == visitante] if datos_local.empty or datos_away.empty: return { 'fiabilidad': 'DESCONOCIDO', 'score': 0, 'mensaje': '⚠️ Datos insuficientes' } score_local = datos_local['Score_Fiabilidad'].values[0] score_away = datos_away['Score_Fiabilidad'].values[0] score_promedio = (score_local + score_away) / 2 # Clasificación del partido if score_promedio >= 65: fiabilidad = "MUY ALTA ⭐⭐⭐" mensaje = "✅ EXCELENTE PARTIDO PARA APOSTAR" elif score_promedio >= 50: fiabilidad = "ALTA ✅" mensaje = "✅ BUEN PARTIDO PARA APOSTAR" elif score_promedio >= 35: fiabilidad = "MEDIA 🟡" mensaje = "🟡 APOSTAR CON PRECAUCIÓN" else: fiabilidad = "BAJA ⛔" mensaje = "⛔ EVITAR APUESTA" return { 'fiabilidad': fiabilidad, 'score_local': score_local, 'score_away': score_away, 'score_promedio': score_promedio, 'nivel_local': datos_local['Nivel'].values[0], 'nivel_away': datos_away['Nivel'].values[0], 'mensaje': mensaje, # Datos adicionales útiles 'cv_local': datos_local['CV_%'].values[0], 'cv_away': datos_away['CV_%'].values[0], 'consistencia_local': datos_local['Pct_Cerca_Media'].values[0], 'consistencia_away': datos_away['Pct_Cerca_Media'].values[0] } def calcular_probabilidades_poisson(lambda_pred, rango_inferior=5, rango_superior=5): """Calcula probabilidades usando distribución de Poisson""" valor_central = int(round(lambda_pred)) valores_analizar = range( max(0, valor_central - rango_inferior), valor_central + rango_superior + 1 ) probabilidades_exactas = {} for k in valores_analizar: prob = poisson.pmf(k, lambda_pred) * 100 probabilidades_exactas[k] = prob # ✅ CORRECCIÓN: MISMAS LÍNEAS PARA OVER Y UNDER lines = [7.5, 8.5, 9.5, 10.5, 11.5, 12.5] probabilidades_over = {} for linea in lines: prob_over = (1 - poisson.cdf(linea, lambda_pred)) * 100 probabilidades_over[linea] = prob_over probabilidades_under = {} for linea in lines: # ✅ CAMBIO: usar la misma lista prob_under = poisson.cdf(linea, lambda_pred) * 100 probabilidades_under[linea] = prob_under return { 'exactas': probabilidades_exactas, 'over': probabilidades_over, 'under': probabilidades_under } def clasificar_confianza(prob): """Clasifica la confianza según probabilidad""" if prob >= 66: return "ALTA ✅" elif prob >= 55: return "MEDIA ⚠️" else: return "BAJA ❌" ''' def get_dataframes(df, season, round_num, local, away, league=None): """Retorna 8 DataFrames filtrados por equipo, venue y liga""" season_round = (df['season'] == season) & (df['round'] < round_num) if league is not None: season_round = season_round & (df['league'] == league) def filter_and_split(team_filter): filtered = df[season_round & team_filter].copy() home = filtered[filtered['venue'] == "Home"] away = filtered[filtered['venue'] == "Away"] return home, away local_home, local_away = filter_and_split(df['team'] == local) local_opp_home, local_opp_away = filter_and_split(df['opponent'] == local) away_home, away_away = filter_and_split(df['team'] == away) away_opp_home, away_opp_away = filter_and_split(df['opponent'] == away) return (local_home, local_away, local_opp_home, local_opp_away, away_home, away_away, away_opp_home, away_opp_away) def get_head_2_head(df, local, away, seasons=None, league=None): """Obtiene últimos 3 enfrentamientos directos""" if seasons is None: seasons = [] df_filtered = df[df['season'].isin(seasons)] if seasons else df if league is not None: df_filtered = df_filtered[df_filtered['league'] == league] local_h2h = df_filtered[(df_filtered['team'] == local) & (df_filtered['opponent'] == away)] away_h2h = df_filtered[(df_filtered['team'] == away) & (df_filtered['opponent'] == local)] if len(local_h2h) < 4: return local_h2h.tail(2), away_h2h.tail(2) return local_h2h.tail(3), away_h2h.tail(3) def get_average(df, is_team=False, lst_avg=None): """Calcula promedios de estadísticas (VERSIÓN COMPLETA)""" if len(df) == 0: if is_team: # ✅ Retornar 23 valores (métricas avanzadas) return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) return (0, 0, 0, 0, 0, 0, 0, 0, 0) if is_team: # =========================== # ESTADÍSTICAS BÁSICAS (NORMALIZADAS) # =========================== avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3] avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4] avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2] avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1] # ✅ VARIANZA DE CORNERS var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0 avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8] avg_poss = (df['Poss'].sum() / len(df)) - 50 avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5] avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6] # =========================== # MÉTRICAS OFENSIVAS AVANZADAS # =========================== total_sh = df['Standard_Sh'].sum() sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0 xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0 total_touches = df['Touches_Touches'].sum() attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0 total_poss = df['Poss'].sum() possession_shot = (total_sh / total_poss) if total_poss > 0 else 0 # =========================== # MÉTRICAS DE CREACIÓN # =========================== total_passes = df['Total_Att'].sum() progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0 final_third_involvement = (df['1/3'].sum() / total_passes) if total_passes > 0 else 0 total_sca = df['SCA Types_SCA'].sum() assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0 creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0 # =========================== # MÉTRICAS DEFENSIVAS # =========================== total_tackles = df['Tackles_Tkl'].sum() high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0 interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0 total_defensive_actions = total_tackles + df['Int'].sum() clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0 # =========================== # MÉTRICAS DE POSESIÓN # =========================== total_carries = df['Carries_Carries'].sum() progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0 total_prog_passes = df['PrgP'].sum() carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0 # =========================== # ÍNDICES COMPUESTOS # =========================== avg_gf_raw = df['GF'].mean() avg_xg_raw = df['Expected_xG'].mean() avg_sot = df['Standard_SoT'].mean() avg_sh = df['Standard_Sh'].mean() offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0 avg_prgp = df['PrgP'].mean() avg_prgc = df['Carries_PrgC'].mean() avg_poss_raw = df['Poss'].mean() transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0 # ✅ RETORNAR 23 VALORES return ( avg_ck, # 0 var_ck, # 1 - ✅ NUEVO avg_xg, # 2 avg_sca, # 3 avg_cross, # 4 avg_poss, # 5 avg_att_3rd, # 6 avg_gf, # 7 avg_ga, # 8 sh_accuracy, # 9 xg_shot, # 10 attacking_presence, # 11 possession_shot, # 12 progressive_pass_ratio, # 13 final_third_involvement, # 14 assist_sca, # 15 creative_efficiency, # 16 high_press_intensity, # 17 interception_tackle, # 18 clearance_ratio, # 19 progressive_carry_ratio, # 20 carry_pass_balance, # 21 offensive_index, # 22 transition_index # 23 ) # =========================== # PROMEDIOS DE LIGA (is_team=False) # =========================== avg_cross = df['Performance_Crs'].mean() avg_att_3rd = df['Touches_Att 3rd'].mean() avg_sca = df['SCA Types_SCA'].mean() avg_xg = df['Expected_xG'].mean() var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0 avg_ck = df['Pass Types_CK'].mean() avg_gf = df['GF'].mean() avg_ga = df['GA'].mean() avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0 return ( var_ck, # 0 avg_xg, # 1 avg_sca, # 2 avg_cross, # 3 avg_att_3rd, # 4 avg_gf, # 5 avg_ga, # 6 avg_sh, # 7 avg_ck # 8 ) def get_points_from_result(result): """Convierte resultado (W/D/L) a puntos""" if result == 'W': return 3 elif result == 'D': return 1 else: return 0 def get_team_ppp(df, team, season, round_num, league=None): """Calcula puntos por partido (PPP) de un equipo""" team_matches = df[ (df['team'] == team) & (df['season'] == season) & (df['round'] < round_num) ] if league is not None: team_matches = team_matches[team_matches['league'] == league] if len(team_matches) == 0: return 0.0 total_points = team_matches['result'].apply(get_points_from_result).sum() ppp = total_points / len(team_matches) return ppp def get_ppp_difference(df, local, away, season, round_num, league=None): """Calcula diferencia de PPP entre local y visitante""" local_ppp = get_team_ppp(df, local, season, round_num, league) away_ppp = get_team_ppp(df, away, season, round_num, league) return local_ppp - away_ppp ''' def predecir_corners(local, visitante, jornada, temporada="2526", league_code="ESP",df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]): """ Predice corners totales con análisis completo para apuestas Args: local: Equipo local visitante: Equipo visitante jornada: Número de jornada temporada: Temporada (formato "2526") league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED") """ print(f"\n{'='*80}") print(f"🏟️ {local} vs {visitante}") print(f"📅 Temporada {temporada} | Jornada {jornada} | Liga: {league_code}") print(f"{'='*80}") if jornada < 5: return { "error": "❌ Se necesitan al menos 5 jornadas previas", "prediccion": None } try: # =========================== # EXTRAER FEATURES (igual que antes) # =========================== lst_avg = get_average( df_database[ (df_database['season'] == temporada) & (df_database['round'] < jornada) & (df_database['league'] == league_code) ], is_team=False ) (team1_home, team1_away, team1_opp_home, team1_opp_away, team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes( df_database, temporada, jornada, local, visitante, league=league_code ) index = lst_years.index(temporada) result = lst_years[:index+1] team1_h2h, team2_h2h = get_head_2_head( df_database, local, visitante, seasons=result, league=league_code ) local_ppp = get_team_ppp(df_database, local, temporada, jornada, league=league_code) away_ppp = get_team_ppp(df_database, visitante, temporada, jornada, league=league_code) ppp_diff = local_ppp - away_ppp # =========================== # CONSTRUIR DICCIONARIO DE FEATURES (igual que antes) # =========================== def create_line(df, is_form=True, is_team=False, use_advanced=True): if is_form: df = df[-6:] if use_advanced: return get_average(df, is_team, lst_avg) else: result = get_average(df, is_team, lst_avg) return result[:9] dic_features = {} dic_features['ppp_local'] = (local_ppp,) dic_features['ppp_away'] = (away_ppp,) dic_features['ppp_difference'] = (ppp_diff,) dic_features['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True) dic_features['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True) dic_features['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True) dic_features['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True) dic_features['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True) dic_features['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True) dic_features['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True) dic_features['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True) dic_features['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True) dic_features['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True) dic_features['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False) dic_features['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False) league_dummies = { 'league_ESP': 1 if league_code == 'ESP' else 0, 'league_GER': 1 if league_code == 'GER' else 0, 'league_FRA': 1 if league_code == 'FRA' else 0, 'league_ITA': 1 if league_code == 'ITA' else 0, 'league_NED': 1 if league_code == 'NED' else 0, 'league_ENG': 1 if league_code == 'ENG' else 0, 'league_POR': 1 if league_code == 'POR' else 0, 'league_BEL': 1 if league_code == 'BEL' else 0 } for key, value in league_dummies.items(): dic_features[key] = (value,) # =========================== # CONSTRUIR VECTOR DE FEATURES # =========================== lst_base_advanced = [ "avg_ck", "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga", "sh_accuracy", "xg_shot", "attacking_presence", "possession_shot", "progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency", "high_press_intensity", "interception_tackle", "clearance_ratio", "progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index" ] lst_base_original = [ "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga", "avg_ck" ] lst_features_values = [] lst_features_names = [] for key in dic_features: lst_features_values.extend(list(dic_features[key])) if key in ['ppp_local', 'ppp_away', 'ppp_difference']: lst_features_names.append(key) elif key.startswith('league_'): lst_features_names.append(key) elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']: lst_features_names.extend([f"{key}_{col}" for col in lst_base_original]) else: lst_features_names.extend([f"{key}_{col}" for col in lst_base_advanced]) df_input = pd.DataFrame([lst_features_values], columns=lst_features_names) expected_features = scaler.feature_names_in_ if len(df_input.columns) != len(expected_features): print(f"\n⚠️ ERROR: Número de features no coincide") print(f" Esperadas: {len(expected_features)}") print(f" Recibidas: {len(df_input.columns)}") return {"error": "Desajuste de features", "prediccion": None} df_input = df_input[expected_features] X_input_scaled = pd.DataFrame( scaler.transform(df_input), columns=df_input.columns ) # =========================== # PREDICCIÓN # =========================== prediccion = xgb_model.predict(X_input_scaled)[0] # =========================== # ✅ ANÁLISIS PROBABILÍSTICO CON POISSON # =========================== analisis = calcular_probabilidades_poisson(prediccion, rango_inferior=5, rango_superior=5) # =========================== # ESTADÍSTICAS DETALLADAS # =========================== local_ck_home = team1_home['Pass Types_CK'].mean() if len(team1_home) > 0 else 0 local_xg_home = team1_home['Expected_xG'].mean() if len(team1_home) > 0 else 0 local_poss_home = team1_home['Poss'].mean() if len(team1_home) > 0 else 0 away_ck_away = team2_away['Pass Types_CK'].mean() if len(team2_away) > 0 else 0 away_xg_away = team2_away['Expected_xG'].mean() if len(team2_away) > 0 else 0 away_poss_away = team2_away['Poss'].mean() if len(team2_away) > 0 else 0 local_ck_received = team1_opp_home['Pass Types_CK'].mean() if len(team1_opp_home) > 0 else 0 away_ck_received = team2_opp_away['Pass Types_CK'].mean() if len(team2_opp_away) > 0 else 0 partido_ck_esperado = local_ck_home + away_ck_away h2h_ck_local = team1_h2h['Pass Types_CK'].mean() if len(team1_h2h) > 0 else 0 h2h_ck_away = team2_h2h['Pass Types_CK'].mean() if len(team2_h2h) > 0 else 0 h2h_total = h2h_ck_local + h2h_ck_away # =========================== # ✅ MOSTRAR RESULTADOS CON PROBABILIDADES # =========================== print(f"\n🎲 PREDICCIÓN MODELO: {prediccion:.2f} corners totales") print(f" PPP: {local} ({local_ppp:.2f}) vs {visitante} ({away_ppp:.2f}) | Diff: {ppp_diff:+.2f}") print(f"\n📊 ESTADÍSTICAS HISTÓRICAS:") print(f" {local} (Casa): {local_ck_home:.1f} CK/partido | xG: {local_xg_home:.2f} | Poss: {local_poss_home:.1f}%") print(f" {visitante} (Fuera): {away_ck_away:.1f} CK/partido | xG: {away_xg_away:.2f} | Poss: {away_poss_away:.1f}%") print(f" Corners recibidos: {local} ({local_ck_received:.1f}) | {visitante} ({away_ck_received:.1f})") print(f" Total esperado (suma): {partido_ck_esperado:.1f} corners") if len(team1_h2h) > 0 or len(team2_h2h) > 0: print(f"\n🔄 HEAD TO HEAD (últimos {max(len(team1_h2h), len(team2_h2h))} partidos):") print(f" {local}: {h2h_ck_local:.1f} CK/partido") print(f" {visitante}: {h2h_ck_away:.1f} CK/partido") print(f" Promedio total: {h2h_total:.1f} corners") # =========================== # ✅ MOSTRAR PROBABILIDADES EXACTAS # =========================== valor_mas_probable = max(analisis['exactas'].items(), key=lambda x: x[1]) print(f"\n📈 PROBABILIDADES EXACTAS (Poisson):") for k in sorted(analisis['exactas'].keys()): prob = analisis['exactas'][k] bar = '█' * int(prob / 2) marca = ' ⭐' if k == valor_mas_probable[0] else '' print(f" {k:2d} corners: {prob:5.2f}% {bar}{marca}") print(f"\n✅ Valor más probable: {valor_mas_probable[0]} corners ({valor_mas_probable[1]:.2f}%)") # ✅ RANGO DE 80% CONFIANZA probs_sorted = sorted(analisis['exactas'].items(), key=lambda x: x[1], reverse=True) cumsum = 0 rango_80 = [] for val, prob in probs_sorted: cumsum += prob rango_80.append(val) if cumsum >= 80: break print(f"📊 Rango 80% confianza: {min(rango_80)}-{max(rango_80)} corners") # =========================== # ✅ MOSTRAR OVER/UNDER CON CUOTAS IMPLÍCITAS # =========================== print(f"\n🎯 ANÁLISIS OVER/UNDER:") print(f"{'Línea':<10} {'Prob Over':<12} {'Cuota Impl':<12} {'Confianza':<15} {'Prob Under':<12} {'Cuota Impl':<12}") print("-" * 85) for linea in [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]: prob_over = analisis['over'][linea] prob_under = analisis['under'][linea] # Cuotas implícitas (inverso de probabilidad en decimal) cuota_impl_over = 100 / prob_over if prob_over > 0 else 999 cuota_impl_under = 100 / prob_under if prob_under > 0 else 999 conf_over = clasificar_confianza(prob_over) print(f"O/U {linea:<5} {prob_over:6.2f}% @{cuota_impl_over:5.2f} {conf_over:<15} {prob_under:6.2f}% @{cuota_impl_under:5.2f}") # =========================== # ✅ RECOMENDACIONES CON CUOTAS # =========================== print(f"\n💡 RECOMENDACIONES DE APUESTA:") mejores_over = [(l, p) for l, p in analisis['over'].items() if p >= 55] mejores_under = [(l, p) for l, p in analisis['under'].items() if p >= 55] if mejores_over: print(f"\n✅ OVER con confianza MEDIA/ALTA:") for linea, prob in sorted(mejores_over, key=lambda x: x[1], reverse=True): cuota_impl = 100 / prob conf = clasificar_confianza(prob) print(f" • Over {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}") if mejores_under: print(f"\n✅ UNDER con confianza MEDIA/ALTA:") for linea, prob in sorted(mejores_under, key=lambda x: x[1], reverse=True): cuota_impl = 100 / prob conf = clasificar_confianza(prob) print(f" • Under {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}") if not mejores_over and not mejores_under: print(f" ⚠️ No hay apuestas con confianza MEDIA o superior") # =========================== # ✅ ANÁLISIS DE RIESGO # =========================== df_varianza_temp = analizar_fiabilidad_equipos(df_database, temporada=temporada, min_partidos=3) riesgo = obtener_fiabilidad_partido(local, visitante, df_varianza_temp) print(f"\n⚠️ ANÁLISIS DE RIESGO:") print(f" Local ({local}): {riesgo['nivel_local']} (CV: {riesgo['cv_local']:.1f}%)") print(f" Away ({visitante}): {riesgo['nivel_away']} (CV: {riesgo['cv_away']:.1f}%)") print(f" 🎲 FIABILIDAD PARTIDO: {riesgo['fiabilidad']} (Score: {riesgo['score_promedio']:.1f})") print(f" 💡 {riesgo['mensaje']}") # =========================== # RETORNAR DICCIONARIO COMPLETO # =========================== return { "prediccion": round(prediccion, 2), "local": local, "visitante": visitante, "ppp_local": local_ppp, "ppp_away": away_ppp, "ppp_diff": ppp_diff, "riesgo": riesgo, "stats": { "local_ck": local_ck_home, "away_ck": away_ck_away, "local_ck_received": local_ck_received, "away_ck_received": away_ck_received, "h2h_total": h2h_total, "partido_esperado": partido_ck_esperado }, "probabilidades_exactas": analisis['exactas'], "probabilidades_over": analisis['over'], "probabilidades_under": analisis['under'], "valor_mas_probable": valor_mas_probable[0], "prob_mas_probable": valor_mas_probable[1], "rango_80": (min(rango_80), max(rango_80)) } except Exception as e: print(f"\n❌ ERROR: {str(e)}") import traceback traceback.print_exc() return {"error": str(e), "prediccion": None} def predecir_partidos_batch(partidos, jornada, temporada="2526", league_code="ESP", export_csv=True, filename=None,df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]): """ Predice corners para múltiples partidos y exporta resultados a CSV Args: partidos: Lista de tuplas [(local1, visitante1), (local2, visitante2), ...] jornada: Número de jornada temporada: Temporada (formato "2526") league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED") export_csv: Si True, exporta a CSV filename: Nombre del archivo CSV (opcional) Returns: DataFrame con todos los resultados """ resultados = [] print("\n" + "=" * 120) print(f"🎯 PROCESANDO {len(partidos)} PARTIDOS - {league_code} | J{jornada} | Temporada {temporada}") print("=" * 120) for idx, (local, visitante) in enumerate(partidos, 1): print(f"\n[{idx}/{len(partidos)}] Procesando: {local} vs {visitante}...") resultado = predecir_corners( local=local, visitante=visitante, jornada=jornada, temporada=temporada, league_code=league_code, df_database=df_database, xgb_model=xgb_model, scaler=scaler, lst_years=lst_years) if resultado.get("error"): print(f" ❌ Error: {resultado['error']}") continue # =========================== # CONSTRUIR FILA DE DATOS # =========================== fila = { 'Partido': f"{local} vs {visitante}", 'Local': local, 'Visitante': visitante, 'Liga': league_code, 'Jornada': jornada, 'Temporada': temporada, # Predicción 'Prediccion': resultado['prediccion'], 'Valor_Mas_Probable': resultado['valor_mas_probable'], 'Prob_Valor_Mas_Probable_%': round(resultado['prob_mas_probable'], 2), 'Rango_80%_Min': resultado['rango_80'][0], 'Rango_80%_Max': resultado['rango_80'][1], # PPP 'PPP_Local': round(resultado['ppp_local'], 2), 'PPP_Away': round(resultado['ppp_away'], 2), 'PPP_Diferencia': round(resultado['ppp_diff'], 2), # Estadísticas históricas 'CK_Local_Casa': round(resultado['stats']['local_ck'], 1), 'CK_Away_Fuera': round(resultado['stats']['away_ck'], 1), 'CK_Local_Recibidos': round(resultado['stats']['local_ck_received'], 1), 'CK_Away_Recibidos': round(resultado['stats']['away_ck_received'], 1), 'CK_Esperado_Suma': round(resultado['stats']['partido_esperado'], 1), 'CK_H2H_Total': round(resultado['stats']['h2h_total'], 1) if resultado['stats']['h2h_total'] > 0 else 'N/A', # Riesgo 'Fiabilidad_Partido': resultado['riesgo']['fiabilidad'], 'Score_Fiabilidad': round(resultado['riesgo']['score_promedio'], 1), 'Nivel_Local': resultado['riesgo']['nivel_local'], 'Nivel_Away': resultado['riesgo']['nivel_away'], 'CV_Local_%': round(resultado['riesgo']['cv_local'], 1), 'CV_Away_%': round(resultado['riesgo']['cv_away'], 1), } # =========================== # OVER 6.5 a 10.5 # =========================== for linea in [6.5, 7.5, 8.5, 9.5, 10.5]: prob = resultado['probabilidades_over'].get(linea, 0) cuota_impl = round(100 / prob, 2) if prob > 0 else 999 conf = clasificar_confianza(prob) fila[f'Over_{linea}_Prob_%'] = round(prob, 2) fila[f'Over_{linea}_Cuota'] = cuota_impl fila[f'Over_{linea}_Confianza'] = conf # =========================== # UNDER 12.5 a 9.5 # =========================== for linea in [12.5, 11.5, 10.5, 9.5]: prob = resultado['probabilidades_under'].get(linea, 0) cuota_impl = round(100 / prob, 2) if prob > 0 else 999 conf = clasificar_confianza(prob) fila[f'Under_{linea}_Prob_%'] = round(prob, 2) fila[f'Under_{linea}_Cuota'] = cuota_impl fila[f'Under_{linea}_Confianza'] = conf # =========================== # RECOMENDACIONES # =========================== mejores_over = [(l, p) for l, p in resultado['probabilidades_over'].items() if p >= 55] mejores_under = [(l, p) for l, p in resultado['probabilidades_under'].items() if p >= 55] if resultado['riesgo']['score_promedio'] < 35: fila['Recomendacion'] = "⛔ EVITAR - Baja fiabilidad" fila['Es_Apostable'] = "NO" elif not mejores_over and not mejores_under: fila['Recomendacion'] = "⚠️ NO RECOMENDADO - Sin confianza suficiente" fila['Es_Apostable'] = "NO" else: recomendaciones = [] if mejores_over: mejor_over = max(mejores_over, key=lambda x: x[1]) cuota_over = round(100 / mejor_over[1], 2) recomendaciones.append(f"Over {mejor_over[0]} ({mejor_over[1]:.1f}% @{cuota_over})") if mejores_under: mejor_under = max(mejores_under, key=lambda x: x[1]) cuota_under = round(100 / mejor_under[1], 2) recomendaciones.append(f"Under {mejor_under[0]} ({mejor_under[1]:.1f}% @{cuota_under})") fila['Recomendacion'] = " | ".join(recomendaciones) if resultado['riesgo']['score_promedio'] >= 65: fila['Es_Apostable'] = "SÍ ⭐⭐⭐" elif resultado['riesgo']['score_promedio'] >= 50: fila['Es_Apostable'] = "SÍ ✅" else: fila['Es_Apostable'] = "PRECAUCIÓN 🟡" fila['Mensaje_Riesgo'] = resultado['riesgo']['mensaje'] resultados.append(fila) print(f" ✅ Completado") # =========================== # CREAR DATAFRAME # =========================== df_resultados = pd.DataFrame(resultados) print("\n" + "=" * 120) print(f"✅ PROCESAMIENTO COMPLETADO: {len(df_resultados)} partidos analizados") print("=" * 120) # =========================== # EXPORTAR A CSV # =========================== if export_csv and len(df_resultados) > 0: if filename is None: filename = f"predicciones_{league_code}_J{jornada}_{temporada}.csv" df_resultados.to_csv(filename, index=False, encoding='utf-8-sig') print(f"\n💾 Resultados exportados a: {filename}") # =========================== # RESUMEN # =========================== print(f"\n📊 RESUMEN DE APUESTAS:") print(f" Partidos apostables: {len(df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')])} / {len(df_resultados)}") print(f" Partidos ALTA confianza (⭐⭐⭐): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ⭐⭐⭐'])}") print(f" Partidos MEDIA confianza (✅): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ✅'])}") print(f" Partidos a evitar (⛔): {len(df_resultados[df_resultados['Es_Apostable'] == 'NO'])}") return df_resultados def mostrar_resumen_batch(df_resultados): """Muestra resumen visual de los resultados""" print("\n" + "=" * 120) print("🎯 MEJORES OPORTUNIDADES DE APUESTA") print("=" * 120) # Filtrar solo apostables df_apostables = df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')].copy() if len(df_apostables) == 0: print("\n⚠️ No se encontraron partidos con oportunidades de apuesta") return # Ordenar por score de fiabilidad df_apostables = df_apostables.sort_values('Score_Fiabilidad', ascending=False) for idx, row in df_apostables.iterrows(): print(f"\n{'='*120}") print(f"🏟️ {row['Partido']}") print(f"{'='*120}") print(f"📊 Predicción: {row['Prediccion']:.2f} corners | Valor más probable: {row['Valor_Mas_Probable']} ({row['Prob_Valor_Mas_Probable_%']:.1f}%)") print(f"📈 Histórico: Local {row['CK_Local_Casa']:.1f} CK | Away {row['CK_Away_Fuera']:.1f} CK | H2H: {row['CK_H2H_Total']}") print(f"🎲 Fiabilidad: {row['Fiabilidad_Partido']} (Score: {row['Score_Fiabilidad']:.1f}/100)") print(f"💡 {row['Recomendacion']}") # Mostrar líneas con alta probabilidad print(f"\n 📌 Líneas destacadas:") for linea in [7.5, 8.5, 9.5, 10.5]: over_prob = row.get(f'Over_{linea}_Prob_%', 0) under_prob = row.get(f'Under_{linea}_Prob_%', 0) if over_prob >= 55: cuota = row.get(f'Over_{linea}_Cuota', 0) conf = row.get(f'Over_{linea}_Confianza', '') print(f" • Over {linea}: {over_prob:.1f}% @{cuota:.2f} - {conf}") if under_prob >= 55: cuota = row.get(f'Under_{linea}_Cuota', 0) conf = row.get(f'Under_{linea}_Confianza', '') print(f" • Under {linea}: {under_prob:.1f}% @{cuota:.2f} - {conf}") class USE_MODEL(): def __init__(self): self.load_models() self.load_data() self.init_variables() def load_models(self): """Cargar modelos desde GitHub usando raw URLs""" print("📦 Cargando modelos desde GitHub...") # URLs de descarga directa (raw.githubusercontent.com) base_url = "https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/models" model_url = f"{base_url}/xgboost_corners_v4_retrain.pkl" scaler_url = f"{base_url}/scaler_corners_v4_retrain.pkl" try: # Descargar modelo print(f"📥 Descargando modelo desde: {model_url}") response_model = requests.get(model_url, timeout=30) response_model.raise_for_status() # Descargar scaler print(f"📥 Descargando scaler desde: {scaler_url}") response_scaler = requests.get(scaler_url, timeout=30) response_scaler.raise_for_status() # Guardar temporalmente y cargar with tempfile.NamedTemporaryFile(delete=False, suffix='.pkl') as tmp_model: tmp_model.write(response_model.content) tmp_model_path = tmp_model.name with tempfile.NamedTemporaryFile(delete=False, suffix='.pkl') as tmp_scaler: tmp_scaler.write(response_scaler.content) tmp_scaler_path = tmp_scaler.name # Cargar modelos desde archivos temporales self.xgb_model = joblib.load(tmp_model_path) self.scaler = joblib.load(tmp_scaler_path) # Limpiar archivos temporales os.unlink(tmp_model_path) os.unlink(tmp_scaler_path) print("✅ Modelos cargados correctamente desde GitHub") except requests.exceptions.RequestException as e: raise Exception(f"❌ Error descargando modelos: {str(e)}") except Exception as e: raise Exception(f"❌ Error cargando modelos: {str(e)}") def load_data(self): """Cargar datos desde GitHub""" print("📂 Cargando datos desde GitHub...") base_url = "https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/dataset/cleaned" historic_url = f"{base_url}/dataset_cleaned.csv" current_url = f"{base_url}/dataset_cleaned_current_year.csv" try: # Cargar dataset histórico print(f"📥 Descargando dataset histórico...") self.df_dataset_historic = pd.read_csv(historic_url) print(f"✅ Dataset histórico cargado: {len(self.df_dataset_historic)} registros") # Intentar cargar año actual try: print(f"📥 Descargando dataset año actual...") self.df_dataset_current_year = pd.read_csv(current_url) print(f"✅ Dataset año actual cargado: {len(self.df_dataset_current_year)} registros") self.df_dataset = pd.concat([self.df_dataset_historic, self.df_dataset_current_year]) except: print("⚠️ No se pudo cargar dataset del año actual, usando solo histórico") self.df_dataset = self.df_dataset_historic # Limpieza self.df_dataset["season"] = self.df_dataset["season"].astype(str) self.df_dataset["Performance_Save%"].fillna(0, inplace=True) print(f"✅ Total registros: {len(self.df_dataset)}") except Exception as e: raise FileNotFoundError( f"\n❌ ERROR: No se pudieron cargar los datos desde GitHub\n" f" Error: {str(e)}\n\n" f"💡 Verifica que los archivos existan en el repositorio\n" ) def init_variables(self): self.lst_years = ["1819", "1920", "2021", "2122", "2223", "2324", "2425", "2526"] print("✅ Variables inicializadas") def consume_model_batch(self,partidos,jornada,temporada,league_code): df_predict = predecir_partidos_batch( partidos=partidos, jornada=jornada, temporada=temporada, league_code=league_code, export_csv=True, filename=f"results\{league_code}\{league_code}-{temporada}-{jornada}-predicciones.csv", df_database = self.df_dataset, xgb_model = self.xgb_model, scaler=self.scaler, lst_years=self.lst_years ) # Mostrar resumen return df_predict def consume_model_single(self,local,visitante,jornada,temporada,league_code): return predecir_corners( local=local, visitante=visitante, jornada=jornada, temporada=temporada, league_code=league_code, df_database = self.df_dataset, xgb_model = self.xgb_model, scaler=self.scaler, lst_years=self.lst_years ) def kelly_stats(self,p, odds, fraction=0.2): b = odds - 1 q = 1 - p f_star = (b * p - q) / b f_star = max(f_star, 0) # evita negativos return f_star * fraction # usa 0.1 para Kelly 10%