Spaces:

daniel-saed
/

corner-forecast

Running

App Files Files Community

daniel-saed commited on Nov 21, 2025

Commit

c2aaace

verified ·

1 Parent(s): 315039f

Upload 21 files

Browse files

Files changed (21) hide show

Dockerfile_STREAMLIT +19 -0
requirements.txt +32 -0
src/api/__init__.py +0 -0
src/api/__pycache__/__init__.cpython-311.pyc +0 -0
src/api/__pycache__/api.cpython-311.pyc +0 -0
src/api/__pycache__/load.cpython-311.pyc +0 -0
src/api/api.py +191 -0
src/api/load.py +1208 -0
src/models/__init__.py +0 -0
src/models/test_model.py +1148 -0
src/models/train_model.py +425 -0
src/process_data/__init__.py +0 -0
src/process_data/__pycache__/__init__.cpython-311.pyc +0 -0
src/process_data/__pycache__/process_dataset.cpython-311.pyc +0 -0
src/process_data/generate_dataset.py +211 -0
src/process_data/process_dataset.py +584 -0
src/utils/__init__.py +0 -0
src/utils/__pycache__/__init__.cpython-311.pyc +0 -0
src/utils/__pycache__/helper.cpython-311.pyc +0 -0
src/utils/helper.py +18 -0
streamlit_app.py +812 -0

Dockerfile_STREAMLIT ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.11-slim
+# Establecer directorio de trabajo
+WORKDIR /app
+# Copiar requirements
+COPY requirements.txt .
+# Instalar dependencias
+RUN pip install --no-cache-dir -r requirements.txt
+# Copiar todo el código
+COPY . .
+# Exponer el puerto que usa Hugging Face Spaces
+EXPOSE 7860
+# ✅ COMANDO PARA FASTAPI EN HUGGING FACE SPACES
+CMD ["uvicorn", "src.api.api:app", "--host", "0.0.0.0", "--port", "7860"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,32 @@

+# Core Data Processing
+pandas>=2.0.0
+numpy>=1.24.0
+# Machine Learning
+scikit-learn>=1.3.0
+xgboost>=2.0.0
+# Statistics
+scipy>=1.11.0
+# Data Collection
+soccerdata>=1.4.0
+# Experiment Tracking & Model Management
+mlflow>=2.8.0
+# Model Persistence
+joblib>=1.3.0
+fastapi>=0.115.4
+# Security
+python-dotenv>=1.0.0
+# Model
+joblib>=1.3.0
+streamlit>=1.28.0
+plotly
+requests

src/api/__init__.py ADDED Viewed

File without changes

src/api/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (162 Bytes). View file

src/api/__pycache__/api.cpython-311.pyc ADDED Viewed

Binary file (7.16 kB). View file

src/api/__pycache__/load.cpython-311.pyc ADDED Viewed

Binary file (48.8 kB). View file

src/api/api.py ADDED Viewed

	@@ -0,0 +1,191 @@

+# ===========================
+# SISTEMA DE PREDICCIÓN DE CORNERS - OPTIMIZADO PARA APUESTAS (VERSIÓN COMPLETA)
+# ===========================
+import numpy as np
+import pandas as pd
+import os
+from fastapi.responses import JSONResponse
+from fastapi import Depends, FastAPI, HTTPException
+from fastapi.security.api_key import APIKeyHeader
+from fastapi import Security
+from fastapi.responses import JSONResponse
+from dotenv import load_dotenv
+from src.api.load import USE_MODEL
+#from load import USE_MODEL
+load_dotenv()
+model = USE_MODEL()
+app = FastAPI()
+# ===========================
+# CONFIGURACIÓN API KEY
+# ===========================
+API_KEY = os.getenv("API_KEY")  # ⚠️ CÁMBIALA POR UNA SEGURA
+api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
+async def get_api_key(api_key: str = Security(api_key_header)):
+    """Validar API Key"""
+    if api_key != API_KEY:
+        raise HTTPException(
+            status_code=401,
+            detail="API Key inválida o faltante"
+        )
+    return api_key
+# ===========================
+# HELPER: CONVERTIR NUMPY/PANDAS A TIPOS NATIVOS
+# ===========================
+def convert_to_native(val):
+    """Convierte tipos NumPy/Pandas a tipos nativos de Python"""
+    if isinstance(val, (np.integer, np.int64, np.int32, np.int16, np.int8)):
+        return int(val)
+    elif isinstance(val, (np.floating, np.float64, np.float32, np.float16)):
+        return float(val)
+    elif isinstance(val, np.ndarray):
+        return [convert_to_native(item) for item in val.tolist()]
+    elif isinstance(val, dict):
+        return {key: convert_to_native(value) for key, value in val.items()}
+    elif isinstance(val, (list, tuple)):
+        return [convert_to_native(item) for item in val]
+    elif isinstance(val, pd.Series):
+        return convert_to_native(val.to_dict())
+    elif isinstance(val, pd.DataFrame):
+        return convert_to_native(val.to_dict(orient='records'))
+    elif pd.isna(val):
+        return None
+    else:
+        return val
+# ===========================
+# ENDPOINTS
+# ===========================
+@app.get("/")
+def read_root():
+    """Endpoint raíz con información de la API"""
+    return {
+        "api": "Corners Prediction API",
+        "version": "1.0.0",
+        "status": "active",
+        "endpoints": {
+            "/": "Información de la API",
+            "/items/": "Predicción de corners (requiere API Key)",
+            "/health": "Estado de salud"
+        },
+        "auth": "Requiere header: X-API-Key"
+    }
+@app.get("/items/")
+def predict_corners(
+    local: str,
+    visitante: str,
+    jornada: int,
+    league_code: str,
+    temporada: str = "2526",
+    api_key: str = Depends(get_api_key)  # ✅ PROTEGIDO
+):
+    """
+    Predecir corners para un partido de fútbol
+    Args:
+        local: Nombre del equipo local (requerido)
+        visitante: Nombre del equipo visitante (requerido)
+        jornada: Número de jornada (requerido, min: 1)
+        league_code: Código de liga (requerido: ESP, GER, FRA, ITA, ENG, NED, POR, BEL)
+        temporada: Temporada en formato AABB (default: "2526")
+    Returns:
+        JSON con predicción y análisis completo
+    Example:
+        GET /items/?local=Barcelona&visitante=Real%20Madrid&jornada=15&league_code=ESP&temporada=2526
+        Headers: X-API-Key: tu-clave-secreta-aqui
+    """
+    # ===========================
+    # VALIDACIONES
+    # ===========================
+    # Validar campos obligatorios
+    if not local or not visitante:
+        raise HTTPException(
+            status_code=400,
+            detail="Los parámetros 'local' y 'visitante' son obligatorios"
+        )
+    # Validar jornada
+    if jornada < 1:
+        raise HTTPException(
+            status_code=400,
+            detail="La jornada debe ser mayor o igual a 1"
+        )
+    # Validar liga
+    valid_leagues = ["ESP", "GER", "FRA", "ITA", "ENG", "NED", "POR", "BEL"]
+    if league_code not in valid_leagues:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Liga inválida. Ligas válidas: {', '.join(valid_leagues)}"
+        )
+    # ===========================
+    # PREDICCIÓN
+    # ===========================
+    try:
+        resultado = model.consume_model_single(
+            local=local,
+            visitante=visitante,
+            jornada=jornada,
+            temporada=temporada,
+            league_code=league_code
+        )
+        # Verificar si hubo error en la predicción
+        if resultado.get("error"):
+            raise HTTPException(
+                status_code=422,
+                detail=f"Error en predicción: {resultado['error']}"
+            )
+        # ✅ CONVERTIR TIPOS NUMPY A NATIVOS
+        resultado_limpio = convert_to_native(resultado)
+        # Agregar metadata
+        resultado_limpio["metadata"] = {
+            "api_version": "1.0.0",
+            "model_version": "v4",
+            "timestamp": pd.Timestamp.now().isoformat()
+        }
+        return JSONResponse(
+            status_code=200,
+            content=resultado_limpio
+        )
+    except HTTPException:
+        # Re-lanzar excepciones HTTP
+        raise
+    except Exception as e:
+        # Capturar cualquier otro error
+        import traceback
+        error_detail = {
+            "error": str(e),
+            "type": type(e).__name__,
+            "traceback": traceback.format_exc() if app.debug else None
+        }
+        return JSONResponse(
+            status_code=500,
+            content=error_detail
+        )

src/api/load.py ADDED Viewed

	@@ -0,0 +1,1208 @@

+# ===========================
+# SISTEMA DE PREDICCIÓN DE CORNERS - OPTIMIZADO PARA APUESTAS (VERSIÓN COMPLETA)
+# ===========================
+import requests
+import tempfile
+import numpy as np
+import pandas as pd
+import joblib
+from scipy.stats import poisson
+from scipy import stats
+import os
+import sys
+from src.process_data.process_dataset import get_dataframes,get_head_2_head,get_points_from_result,get_team_ppp,get_ppp_difference,get_average
+#from process_data.process_dataset import get_dataframes,get_head_2_head,get_points_from_result,get_team_ppp,get_ppp_difference,get_average
+#project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
+#sys.path.insert(0, project_root)
+# ===========================
+# 1. FUNCIONES FIABILIDAD
+# ===========================
+def analizar_fiabilidad_equipos(df_database, temporada="2526", min_partidos=5):
+    """
+    Análisis completo de fiabilidad para apuestas de corners
+    No solo varianza, sino consistencia, tendencias y patrones
+    """
+    df_temp = df_database[df_database['season'] == temporada].copy()
+    resultados = []
+    equipos = pd.concat([df_temp['team'], df_temp['opponent']]).unique()
+    for equipo in equipos:
+        # Partidos del equipo
+        partidos_equipo = df_temp[df_temp['team'] == equipo]
+        if len(partidos_equipo) < min_partidos:
+            continue
+        ck_sacados = partidos_equipo['Pass Types_CK'].values
+        # ===========================
+        # 1. MÉTRICAS DE VARIABILIDAD
+        # ===========================
+        media = ck_sacados.mean()
+        std = ck_sacados.std()
+        cv = (std / media * 100) if media > 0 else 0
+        # ===========================
+        # 2. MÉTRICAS DE CONSISTENCIA
+        # ===========================
+        # 2.1 Porcentaje de partidos cerca de la media (±2 corners)
+        cerca_media = np.sum(np.abs(ck_sacados - media) <= 2) / len(ck_sacados) * 100
+        # 2.2 Rachas (detectar equipos con "explosiones" de corners)
+        cambios_bruscos = np.sum(np.abs(np.diff(ck_sacados)) > 4)
+        pct_cambios_bruscos = cambios_bruscos / (len(ck_sacados) - 1) * 100
+        # 2.3 Cuartiles (Q1, Q2=mediana, Q3)
+        q1, q2, q3 = np.percentile(ck_sacados, [25, 50, 75])
+        iqr = q3 - q1  # Rango intercuartílico (más robusto que std)
+        # ===========================
+        # 3. MÉTRICAS DE TENDENCIA
+        # ===========================
+        # 3.1 Tendencia lineal (¿mejora/empeora con el tiempo?)
+        jornadas = np.arange(len(ck_sacados))
+        slope, intercept, r_value, p_value, std_err = stats.linregress(jornadas, ck_sacados)
+        # 3.2 Autocorrelación (¿resultado actual predice el siguiente?)
+        if len(ck_sacados) > 2:
+            autocorr = np.corrcoef(ck_sacados[:-1], ck_sacados[1:])[0, 1]
+        else:
+            autocorr = 0
+        # ===========================
+        # 4. MÉTRICAS DE OUTLIERS
+        # ===========================
+        # 4.1 Detección de valores atípicos (método IQR)
+        lower_bound = q1 - 1.5 * iqr
+        upper_bound = q3 + 1.5 * iqr
+        outliers = np.sum((ck_sacados < lower_bound) | (ck_sacados > upper_bound))
+        pct_outliers = outliers / len(ck_sacados) * 100
+        # 4.2 Z-score máximo
+        z_scores = np.abs(stats.zscore(ck_sacados))
+        max_z = z_scores.max()
+        # ===========================
+        # 5. MÉTRICAS DE RANGO
+        # ===========================
+        rango = ck_sacados.max() - ck_sacados.min()
+        rango_normalizado = rango / media if media > 0 else 0
+        # ===========================
+        # 6. SCORE GLOBAL DE FIABILIDAD
+        # ===========================
+        # Penalizaciones (0-100, menor = peor)
+        score_cv = max(0, 100 - cv * 2)  # CV alto = mala
+        score_consistencia = cerca_media  # Más cerca de media = mejor
+        score_cambios = max(0, 100 - pct_cambios_bruscos * 2)  # Cambios bruscos = malo
+        score_outliers = max(0, 100 - pct_outliers * 3)  # Outliers = malo
+        score_iqr = max(0, 100 - iqr * 10)  # IQR grande = malo
+        # Score final (promedio ponderado)
+        score_fiabilidad = (
+            score_cv * 0.25 +
+            score_consistencia * 0.30 +
+            score_cambios * 0.20 +
+            score_outliers * 0.15 +
+            score_iqr * 0.10
+        )
+        # ===========================
+        # 7. CLASIFICACIÓN MULTI-CRITERIO
+        # ===========================
+        # Clasificación basada en score
+        if score_fiabilidad >= 70:
+            nivel = "EXCELENTE ⭐⭐⭐"
+            color = "#27ae60"
+        elif score_fiabilidad >= 55:
+            nivel = "BUENO ✅"
+            color = "#2ecc71"
+        elif score_fiabilidad >= 40:
+            nivel = "ACEPTABLE 🟡"
+            color = "#f39c12"
+        elif score_fiabilidad >= 25:
+            nivel = "REGULAR ⚠️"
+            color = "#e67e22"
+        else:
+            nivel = "EVITAR ⛔"
+            color = "#e74c3c"
+        resultados.append({
+            'Equipo': equipo,
+            'Partidos': len(ck_sacados),
+            # Estadísticas básicas
+            'Media_CK': round(media, 2),
+            'Mediana_CK': round(q2, 2),
+            'Std_CK': round(std, 2),
+            'CV_%': round(cv, 1),
+            # Consistencia
+            'Pct_Cerca_Media': round(cerca_media, 1),
+            'Cambios_Bruscos_%': round(pct_cambios_bruscos, 1),
+            'IQR': round(iqr, 2),
+            # Rango
+            'Rango': int(rango),
+            'Rango_Norm': round(rango_normalizado, 2),
+            'Min': int(ck_sacados.min()),
+            'Max': int(ck_sacados.max()),
+            # Outliers
+            'Outliers': int(outliers),
+            'Pct_Outliers': round(pct_outliers, 1),
+            'Max_ZScore': round(max_z, 2),
+            # Tendencia
+            'Tendencia_Slope': round(slope, 3),
+            'Autocorr': round(autocorr, 3),
+            # Score y clasificación
+            'Score_Fiabilidad': round(score_fiabilidad, 1),
+            'Nivel': nivel,
+            'Color': color
+        })
+    df_resultado = pd.DataFrame(resultados)
+    df_resultado = df_resultado.sort_values('Score_Fiabilidad', ascending=False)
+    return df_resultado
+def mostrar_analisis_fiabilidad(df_analisis, top_n=10):
+    """
+    Muestra el análisis completo de fiabilidad
+    """
+    print("\n" + "=" * 120)
+    print("🎯 ANÁLISIS DE FIABILIDAD PARA APUESTAS - CORNERS")
+    print("=" * 120)
+    # TOP EQUIPOS FIABLES
+    print(f"\n⭐ TOP {top_n} EQUIPOS MÁS FIABLES")
+    print("-" * 120)
+    top_fiables = df_analisis.head(top_n)
+    for idx, row in top_fiables.iterrows():
+        print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
+        print(f"  📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
+        print(f"  ✅ {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
+        print(f"  ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
+        print(f"  📈 Rango: {row['Min']}-{row['Max']} ({row['Rango']} corners)")
+    # TOP EQUIPOS NO FIABLES
+    print(f"\n\n⛔ TOP {top_n} EQUIPOS MENOS FIABLES")
+    print("-" * 120)
+    top_no_fiables = df_analisis.tail(top_n)
+    for idx, row in top_no_fiables.iterrows():
+        print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
+        print(f"  📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
+        print(f"  ❌ Solo {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
+        print(f"  ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
+    # ESTADÍSTICAS GENERALES
+    print(f"\n\n📊 DISTRIBUCIÓN POR NIVEL DE FIABILIDAD")
+    print("-" * 120)
+    print(df_analisis['Nivel'].value_counts())
+    print(f"\n📈 ESTADÍSTICAS DE SCORE:")
+    print(f"  Media: {df_analisis['Score_Fiabilidad'].mean():.1f}")
+    print(f"  Mediana: {df_analisis['Score_Fiabilidad'].median():.1f}")
+    print(f"  Score máximo: {df_analisis['Score_Fiabilidad'].max():.1f}")
+    print(f"  Score mínimo: {df_analisis['Score_Fiabilidad'].min():.1f}")
+def obtener_fiabilidad_partido(local, visitante, df_analisis):
+    """
+    Evalúa la fiabilidad de un partido específico
+    """
+    datos_local = df_analisis[df_analisis['Equipo'] == local]
+    datos_away = df_analisis[df_analisis['Equipo'] == visitante]
+    if datos_local.empty or datos_away.empty:
+        return {
+            'fiabilidad': 'DESCONOCIDO',
+            'score': 0,
+            'mensaje': '⚠️ Datos insuficientes'
+        }
+    score_local = datos_local['Score_Fiabilidad'].values[0]
+    score_away = datos_away['Score_Fiabilidad'].values[0]
+    score_promedio = (score_local + score_away) / 2
+    # Clasificación del partido
+    if score_promedio >= 65:
+        fiabilidad = "MUY ALTA ⭐⭐⭐"
+        mensaje = "✅ EXCELENTE PARTIDO PARA APOSTAR"
+    elif score_promedio >= 50:
+        fiabilidad = "ALTA ✅"
+        mensaje = "✅ BUEN PARTIDO PARA APOSTAR"
+    elif score_promedio >= 35:
+        fiabilidad = "MEDIA 🟡"
+        mensaje = "🟡 APOSTAR CON PRECAUCIÓN"
+    else:
+        fiabilidad = "BAJA ⛔"
+        mensaje = "⛔ EVITAR APUESTA"
+    return {
+        'fiabilidad': fiabilidad,
+        'score_local': score_local,
+        'score_away': score_away,
+        'score_promedio': score_promedio,
+        'nivel_local': datos_local['Nivel'].values[0],
+        'nivel_away': datos_away['Nivel'].values[0],
+        'mensaje': mensaje,
+        # Datos adicionales útiles
+        'cv_local': datos_local['CV_%'].values[0],
+        'cv_away': datos_away['CV_%'].values[0],
+        'consistencia_local': datos_local['Pct_Cerca_Media'].values[0],
+        'consistencia_away': datos_away['Pct_Cerca_Media'].values[0]
+    }
+def calcular_probabilidades_poisson(lambda_pred, rango_inferior=5, rango_superior=5):
+    """Calcula probabilidades usando distribución de Poisson"""
+    valor_central = int(round(lambda_pred))
+    valores_analizar = range(
+        max(0, valor_central - rango_inferior),
+        valor_central + rango_superior + 1
+    )
+    probabilidades_exactas = {}
+    for k in valores_analizar:
+        prob = poisson.pmf(k, lambda_pred) * 100
+        probabilidades_exactas[k] = prob
+    # ✅ CORRECCIÓN: MISMAS LÍNEAS PARA OVER Y UNDER
+    lines = [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]
+    probabilidades_over = {}
+    for linea in lines:
+        prob_over = (1 - poisson.cdf(linea, lambda_pred)) * 100
+        probabilidades_over[linea] = prob_over
+    probabilidades_under = {}
+    for linea in lines:  # ✅ CAMBIO: usar la misma lista
+        prob_under = poisson.cdf(linea, lambda_pred) * 100
+        probabilidades_under[linea] = prob_under
+    return {
+        'exactas': probabilidades_exactas,
+        'over': probabilidades_over,
+        'under': probabilidades_under
+    }
+def clasificar_confianza(prob):
+    """Clasifica la confianza según probabilidad"""
+    if prob >= 66:
+        return "ALTA ✅"
+    elif prob >= 55:
+        return "MEDIA ⚠️"
+    else:
+        return "BAJA ❌"
+'''
+def get_dataframes(df, season, round_num, local, away, league=None):
+    """Retorna 8 DataFrames filtrados por equipo, venue y liga"""
+    season_round = (df['season'] == season) & (df['round'] < round_num)
+    if league is not None:
+        season_round = season_round & (df['league'] == league)
+    def filter_and_split(team_filter):
+        filtered = df[season_round & team_filter].copy()
+        home = filtered[filtered['venue'] == "Home"]
+        away = filtered[filtered['venue'] == "Away"]
+        return home, away
+    local_home, local_away = filter_and_split(df['team'] == local)
+    local_opp_home, local_opp_away = filter_and_split(df['opponent'] == local)
+    away_home, away_away = filter_and_split(df['team'] == away)
+    away_opp_home, away_opp_away = filter_and_split(df['opponent'] == away)
+    return (local_home, local_away, local_opp_home, local_opp_away,
+            away_home, away_away, away_opp_home, away_opp_away)
+def get_head_2_head(df, local, away, seasons=None, league=None):
+    """Obtiene últimos 3 enfrentamientos directos"""
+    if seasons is None:
+        seasons = []
+    df_filtered = df[df['season'].isin(seasons)] if seasons else df
+    if league is not None:
+        df_filtered = df_filtered[df_filtered['league'] == league]
+    local_h2h = df_filtered[(df_filtered['team'] == local) & (df_filtered['opponent'] == away)]
+    away_h2h = df_filtered[(df_filtered['team'] == away) & (df_filtered['opponent'] == local)]
+    if len(local_h2h) < 4:
+        return local_h2h.tail(2), away_h2h.tail(2)
+    return local_h2h.tail(3), away_h2h.tail(3)
+def get_average(df, is_team=False, lst_avg=None):
+    """Calcula promedios de estadísticas (VERSIÓN COMPLETA)"""
+    if len(df) == 0:
+        if is_team:
+            # ✅ Retornar 23 valores (métricas avanzadas)
+            return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+        return (0, 0, 0, 0, 0, 0, 0, 0, 0)
+    if is_team:
+        # ===========================
+        # ESTADÍSTICAS BÁSICAS (NORMALIZADAS)
+        # ===========================
+        avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3]
+        avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4]
+        avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2]
+        avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1]
+        # ✅ VARIANZA DE CORNERS
+        var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
+        avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8]
+        avg_poss = (df['Poss'].sum() / len(df)) - 50
+        avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5]
+        avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6]
+        # ===========================
+        # MÉTRICAS OFENSIVAS AVANZADAS
+        # ===========================
+        total_sh = df['Standard_Sh'].sum()
+        sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0
+        xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0
+        total_touches = df['Touches_Touches'].sum()
+        attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0
+        total_poss = df['Poss'].sum()
+        possession_shot = (total_sh / total_poss) if total_poss > 0 else 0
+        # ===========================
+        # MÉTRICAS DE CREACIÓN
+        # ===========================
+        total_passes = df['Total_Att'].sum()
+        progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0
+        final_third_involvement = (df['1/3'].sum() / total_passes) if total_passes > 0 else 0
+        total_sca = df['SCA Types_SCA'].sum()
+        assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0
+        creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0
+        # ===========================
+        # MÉTRICAS DEFENSIVAS
+        # ===========================
+        total_tackles = df['Tackles_Tkl'].sum()
+        high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0
+        interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0
+        total_defensive_actions = total_tackles + df['Int'].sum()
+        clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0
+        # ===========================
+        # MÉTRICAS DE POSESIÓN
+        # ===========================
+        total_carries = df['Carries_Carries'].sum()
+        progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0
+        total_prog_passes = df['PrgP'].sum()
+        carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0
+        # ===========================
+        # ÍNDICES COMPUESTOS
+        # ===========================
+        avg_gf_raw = df['GF'].mean()
+        avg_xg_raw = df['Expected_xG'].mean()
+        avg_sot = df['Standard_SoT'].mean()
+        avg_sh = df['Standard_Sh'].mean()
+        offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0
+        avg_prgp = df['PrgP'].mean()
+        avg_prgc = df['Carries_PrgC'].mean()
+        avg_poss_raw = df['Poss'].mean()
+        transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0
+        # ✅ RETORNAR 23 VALORES
+        return (
+            avg_ck,           # 0
+            var_ck,           # 1 - ✅ NUEVO
+            avg_xg,           # 2
+            avg_sca,          # 3
+            avg_cross,        # 4
+            avg_poss,         # 5
+            avg_att_3rd,      # 6
+            avg_gf,           # 7
+            avg_ga,           # 8
+            sh_accuracy,      # 9
+            xg_shot,          # 10
+            attacking_presence,  # 11
+            possession_shot,  # 12
+            progressive_pass_ratio,  # 13
+            final_third_involvement,  # 14
+            assist_sca,       # 15
+            creative_efficiency,  # 16
+            high_press_intensity,  # 17
+            interception_tackle,  # 18
+            clearance_ratio,  # 19
+            progressive_carry_ratio,  # 20
+            carry_pass_balance,  # 21
+            offensive_index,  # 22
+            transition_index  # 23
+        )
+    # ===========================
+    # PROMEDIOS DE LIGA (is_team=False)
+    # ===========================
+    avg_cross = df['Performance_Crs'].mean()
+    avg_att_3rd = df['Touches_Att 3rd'].mean()
+    avg_sca = df['SCA Types_SCA'].mean()
+    avg_xg = df['Expected_xG'].mean()
+    var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
+    avg_ck = df['Pass Types_CK'].mean()
+    avg_gf = df['GF'].mean()
+    avg_ga = df['GA'].mean()
+    avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0
+    return (
+        var_ck,      # 0
+        avg_xg,      # 1
+        avg_sca,     # 2
+        avg_cross,   # 3
+        avg_att_3rd, # 4
+        avg_gf,      # 5
+        avg_ga,      # 6
+        avg_sh,      # 7
+        avg_ck       # 8
+    )
+def get_points_from_result(result):
+    """Convierte resultado (W/D/L) a puntos"""
+    if result == 'W':
+        return 3
+    elif result == 'D':
+        return 1
+    else:
+        return 0
+def get_team_ppp(df, team, season, round_num, league=None):
+    """Calcula puntos por partido (PPP) de un equipo"""
+    team_matches = df[
+        (df['team'] == team) &
+        (df['season'] == season) &
+        (df['round'] < round_num)
+    ]
+    if league is not None:
+        team_matches = team_matches[team_matches['league'] == league]
+    if len(team_matches) == 0:
+        return 0.0
+    total_points = team_matches['result'].apply(get_points_from_result).sum()
+    ppp = total_points / len(team_matches)
+    return ppp
+def get_ppp_difference(df, local, away, season, round_num, league=None):
+    """Calcula diferencia de PPP entre local y visitante"""
+    local_ppp = get_team_ppp(df, local, season, round_num, league)
+    away_ppp = get_team_ppp(df, away, season, round_num, league)
+    return local_ppp - away_ppp
+'''
+def predecir_corners(local, visitante, jornada, temporada="2526", league_code="ESP",df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
+    """
+    Predice corners totales con análisis completo para apuestas
+    Args:
+        local: Equipo local
+        visitante: Equipo visitante
+        jornada: Número de jornada
+        temporada: Temporada (formato "2526")
+        league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
+    """
+    print(f"\n{'='*80}")
+    print(f"🏟️  {local} vs {visitante}")
+    print(f"📅 Temporada {temporada} | Jornada {jornada} | Liga: {league_code}")
+    print(f"{'='*80}")
+    if jornada < 5:
+        return {
+            "error": "❌ Se necesitan al menos 5 jornadas previas",
+            "prediccion": None
+        }
+    try:
+        # ===========================
+        # EXTRAER FEATURES (igual que antes)
+        # ===========================
+        lst_avg = get_average(
+            df_database[
+                (df_database['season'] == temporada) &
+                (df_database['round'] < jornada) &
+                (df_database['league'] == league_code)
+            ],
+            is_team=False
+        )
+        (team1_home, team1_away, team1_opp_home, team1_opp_away,
+         team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes(
+            df_database, temporada, jornada, local, visitante, league=league_code
+        )
+        index = lst_years.index(temporada)
+        result = lst_years[:index+1]
+        team1_h2h, team2_h2h = get_head_2_head(
+            df_database, local, visitante, seasons=result, league=league_code
+        )
+        local_ppp = get_team_ppp(df_database, local, temporada, jornada, league=league_code)
+        away_ppp = get_team_ppp(df_database, visitante, temporada, jornada, league=league_code)
+        ppp_diff = local_ppp - away_ppp
+        # ===========================
+        # CONSTRUIR DICCIONARIO DE FEATURES (igual que antes)
+        # ===========================
+        def create_line(df, is_form=True, is_team=False, use_advanced=True):
+            if is_form:
+                df = df[-6:]
+            if use_advanced:
+                return get_average(df, is_team, lst_avg)
+            else:
+                result = get_average(df, is_team, lst_avg)
+                return result[:9]
+        dic_features = {}
+        dic_features['ppp_local'] = (local_ppp,)
+        dic_features['ppp_away'] = (away_ppp,)
+        dic_features['ppp_difference'] = (ppp_diff,)
+        dic_features['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True)
+        dic_features['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True)
+        dic_features['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True)
+        dic_features['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True)
+        dic_features['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True)
+        dic_features['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True)
+        dic_features['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True)
+        dic_features['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True)
+        dic_features['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True)
+        dic_features['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True)
+        dic_features['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False)
+        dic_features['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False)
+        league_dummies = {
+                    'league_ESP': 1 if league_code == 'ESP' else 0,
+                    'league_GER': 1 if league_code == 'GER' else 0,
+                    'league_FRA': 1 if league_code == 'FRA' else 0,
+                    'league_ITA': 1 if league_code == 'ITA' else 0,
+                    'league_NED': 1 if league_code == 'NED' else 0,
+                    'league_ENG': 1 if league_code == 'ENG' else 0,
+                    'league_POR': 1 if league_code == 'POR' else 0,
+                    'league_BEL': 1 if league_code == 'BEL' else 0
+                }
+        for key, value in league_dummies.items():
+            dic_features[key] = (value,)
+        # ===========================
+        # CONSTRUIR VECTOR DE FEATURES
+        # ===========================
+        lst_base_advanced = [
+            "avg_ck", "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga",
+            "sh_accuracy", "xg_shot", "attacking_presence", "possession_shot",
+            "progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency",
+            "high_press_intensity", "interception_tackle", "clearance_ratio",
+            "progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index"
+        ]
+        lst_base_original = [
+            "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga", "avg_ck"
+        ]
+        lst_features_values = []
+        lst_features_names = []
+        for key in dic_features:
+            lst_features_values.extend(list(dic_features[key]))
+            if key in ['ppp_local', 'ppp_away', 'ppp_difference']:
+                lst_features_names.append(key)
+            elif key.startswith('league_'):
+                lst_features_names.append(key)
+            elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']:
+                lst_features_names.extend([f"{key}_{col}" for col in lst_base_original])
+            else:
+                lst_features_names.extend([f"{key}_{col}" for col in lst_base_advanced])
+        df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
+        expected_features = scaler.feature_names_in_
+        if len(df_input.columns) != len(expected_features):
+            print(f"\n⚠️ ERROR: Número de features no coincide")
+            print(f"   Esperadas: {len(expected_features)}")
+            print(f"   Recibidas: {len(df_input.columns)}")
+            return {"error": "Desajuste de features", "prediccion": None}
+        df_input = df_input[expected_features]
+        X_input_scaled = pd.DataFrame(
+            scaler.transform(df_input),
+            columns=df_input.columns
+        )
+        # ===========================
+        # PREDICCIÓN
+        # ===========================
+        prediccion = xgb_model.predict(X_input_scaled)[0]
+        # ===========================
+        # ✅ ANÁLISIS PROBABILÍSTICO CON POISSON
+        # ===========================
+        analisis = calcular_probabilidades_poisson(prediccion, rango_inferior=5, rango_superior=5)
+        # ===========================
+        # ESTADÍSTICAS DETALLADAS
+        # ===========================
+        local_ck_home = team1_home['Pass Types_CK'].mean() if len(team1_home) > 0 else 0
+        local_xg_home = team1_home['Expected_xG'].mean() if len(team1_home) > 0 else 0
+        local_poss_home = team1_home['Poss'].mean() if len(team1_home) > 0 else 0
+        away_ck_away = team2_away['Pass Types_CK'].mean() if len(team2_away) > 0 else 0
+        away_xg_away = team2_away['Expected_xG'].mean() if len(team2_away) > 0 else 0
+        away_poss_away = team2_away['Poss'].mean() if len(team2_away) > 0 else 0
+        local_ck_received = team1_opp_home['Pass Types_CK'].mean() if len(team1_opp_home) > 0 else 0
+        away_ck_received = team2_opp_away['Pass Types_CK'].mean() if len(team2_opp_away) > 0 else 0
+        partido_ck_esperado = local_ck_home + away_ck_away
+        h2h_ck_local = team1_h2h['Pass Types_CK'].mean() if len(team1_h2h) > 0 else 0
+        h2h_ck_away = team2_h2h['Pass Types_CK'].mean() if len(team2_h2h) > 0 else 0
+        h2h_total = h2h_ck_local + h2h_ck_away
+        # ===========================
+        # ✅ MOSTRAR RESULTADOS CON PROBABILIDADES
+        # ===========================
+        print(f"\n🎲 PREDICCIÓN MODELO: {prediccion:.2f} corners totales")
+        print(f"   PPP: {local} ({local_ppp:.2f}) vs {visitante} ({away_ppp:.2f}) | Diff: {ppp_diff:+.2f}")
+        print(f"\n📊 ESTADÍSTICAS HISTÓRICAS:")
+        print(f"   {local} (Casa): {local_ck_home:.1f} CK/partido | xG: {local_xg_home:.2f} | Poss: {local_poss_home:.1f}%")
+        print(f"   {visitante} (Fuera): {away_ck_away:.1f} CK/partido | xG: {away_xg_away:.2f} | Poss: {away_poss_away:.1f}%")
+        print(f"   Corners recibidos: {local} ({local_ck_received:.1f}) | {visitante} ({away_ck_received:.1f})")
+        print(f"   Total esperado (suma): {partido_ck_esperado:.1f} corners")
+        if len(team1_h2h) > 0 or len(team2_h2h) > 0:
+            print(f"\n🔄 HEAD TO HEAD (últimos {max(len(team1_h2h), len(team2_h2h))} partidos):")
+            print(f"   {local}: {h2h_ck_local:.1f} CK/partido")
+            print(f"   {visitante}: {h2h_ck_away:.1f} CK/partido")
+            print(f"   Promedio total: {h2h_total:.1f} corners")
+        # ===========================
+        # ✅ MOSTRAR PROBABILIDADES EXACTAS
+        # ===========================
+        valor_mas_probable = max(analisis['exactas'].items(), key=lambda x: x[1])
+        print(f"\n📈 PROBABILIDADES EXACTAS (Poisson):")
+        for k in sorted(analisis['exactas'].keys()):
+            prob = analisis['exactas'][k]
+            bar = '█' * int(prob / 2)
+            marca = ' ⭐' if k == valor_mas_probable[0] else ''
+            print(f"   {k:2d} corners: {prob:5.2f}% {bar}{marca}")
+        print(f"\n✅ Valor más probable: {valor_mas_probable[0]} corners ({valor_mas_probable[1]:.2f}%)")
+        # ✅ RANGO DE 80% CONFIANZA
+        probs_sorted = sorted(analisis['exactas'].items(), key=lambda x: x[1], reverse=True)
+        cumsum = 0
+        rango_80 = []
+        for val, prob in probs_sorted:
+            cumsum += prob
+            rango_80.append(val)
+            if cumsum >= 80:
+                break
+        print(f"📊 Rango 80% confianza: {min(rango_80)}-{max(rango_80)} corners")
+        # ===========================
+        # ✅ MOSTRAR OVER/UNDER CON CUOTAS IMPLÍCITAS
+        # ===========================
+        print(f"\n🎯 ANÁLISIS OVER/UNDER:")
+        print(f"{'Línea':<10} {'Prob Over':<12} {'Cuota Impl':<12} {'Confianza':<15} {'Prob Under':<12} {'Cuota Impl':<12}")
+        print("-" * 85)
+        for linea in [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]:
+            prob_over = analisis['over'][linea]
+            prob_under = analisis['under'][linea]
+            # Cuotas implícitas (inverso de probabilidad en decimal)
+            cuota_impl_over = 100 / prob_over if prob_over > 0 else 999
+            cuota_impl_under = 100 / prob_under if prob_under > 0 else 999
+            conf_over = clasificar_confianza(prob_over)
+            print(f"O/U {linea:<5} {prob_over:6.2f}%     @{cuota_impl_over:5.2f}      {conf_over:<15} {prob_under:6.2f}%     @{cuota_impl_under:5.2f}")
+        # ===========================
+        # ✅ RECOMENDACIONES CON CUOTAS
+        # ===========================
+        print(f"\n💡 RECOMENDACIONES DE APUESTA:")
+        mejores_over = [(l, p) for l, p in analisis['over'].items() if p >= 55]
+        mejores_under = [(l, p) for l, p in analisis['under'].items() if p >= 55]
+        if mejores_over:
+            print(f"\n✅ OVER con confianza MEDIA/ALTA:")
+            for linea, prob in sorted(mejores_over, key=lambda x: x[1], reverse=True):
+                cuota_impl = 100 / prob
+                conf = clasificar_confianza(prob)
+                print(f"   • Over {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
+        if mejores_under:
+            print(f"\n✅ UNDER con confianza MEDIA/ALTA:")
+            for linea, prob in sorted(mejores_under, key=lambda x: x[1], reverse=True):
+                cuota_impl = 100 / prob
+                conf = clasificar_confianza(prob)
+                print(f"   • Under {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
+        if not mejores_over and not mejores_under:
+            print(f"   ⚠️ No hay apuestas con confianza MEDIA o superior")
+        # ===========================
+        # ✅ ANÁLISIS DE RIESGO
+        # ===========================
+        df_varianza_temp = analizar_fiabilidad_equipos(df_database, temporada=temporada, min_partidos=3)
+        riesgo = obtener_fiabilidad_partido(local, visitante, df_varianza_temp)
+        print(f"\n⚠️ ANÁLISIS DE RIESGO:")
+        print(f"   Local ({local}): {riesgo['nivel_local']} (CV: {riesgo['cv_local']:.1f}%)")
+        print(f"   Away ({visitante}): {riesgo['nivel_away']} (CV: {riesgo['cv_away']:.1f}%)")
+        print(f"   🎲 FIABILIDAD PARTIDO: {riesgo['fiabilidad']} (Score: {riesgo['score_promedio']:.1f})")
+        print(f"   💡 {riesgo['mensaje']}")
+        # ===========================
+        # RETORNAR DICCIONARIO COMPLETO
+        # ===========================
+        return {
+            "prediccion": round(prediccion, 2),
+            "local": local,
+            "visitante": visitante,
+            "ppp_local": local_ppp,
+            "ppp_away": away_ppp,
+            "ppp_diff": ppp_diff,
+            "riesgo": riesgo,
+            "stats": {
+                "local_ck": local_ck_home,
+                "away_ck": away_ck_away,
+                "local_ck_received": local_ck_received,
+                "away_ck_received": away_ck_received,
+                "h2h_total": h2h_total,
+                "partido_esperado": partido_ck_esperado
+            },
+            "probabilidades_exactas": analisis['exactas'],
+            "probabilidades_over": analisis['over'],
+            "probabilidades_under": analisis['under'],
+            "valor_mas_probable": valor_mas_probable[0],
+            "prob_mas_probable": valor_mas_probable[1],
+            "rango_80": (min(rango_80), max(rango_80))
+        }
+    except Exception as e:
+        print(f"\n❌ ERROR: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return {"error": str(e), "prediccion": None}
+def predecir_partidos_batch(partidos, jornada, temporada="2526", league_code="ESP", export_csv=True, filename=None,df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
+    """
+    Predice corners para múltiples partidos y exporta resultados a CSV
+    Args:
+        partidos: Lista de tuplas [(local1, visitante1), (local2, visitante2), ...]
+        jornada: Número de jornada
+        temporada: Temporada (formato "2526")
+        league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
+        export_csv: Si True, exporta a CSV
+        filename: Nombre del archivo CSV (opcional)
+    Returns:
+        DataFrame con todos los resultados
+    """
+    resultados = []
+    print("\n" + "=" * 120)
+    print(f"🎯 PROCESANDO {len(partidos)} PARTIDOS - {league_code} | J{jornada} | Temporada {temporada}")
+    print("=" * 120)
+    for idx, (local, visitante) in enumerate(partidos, 1):
+        print(f"\n[{idx}/{len(partidos)}] Procesando: {local} vs {visitante}...")
+        resultado = predecir_corners(
+            local=local,
+            visitante=visitante,
+            jornada=jornada,
+            temporada=temporada,
+            league_code=league_code,
+            df_database=df_database,
+            xgb_model=xgb_model,
+            scaler=scaler,
+            lst_years=lst_years)
+        if resultado.get("error"):
+            print(f"   ❌ Error: {resultado['error']}")
+            continue
+        # ===========================
+        # CONSTRUIR FILA DE DATOS
+        # ===========================
+        fila = {
+            'Partido': f"{local} vs {visitante}",
+            'Local': local,
+            'Visitante': visitante,
+            'Liga': league_code,
+            'Jornada': jornada,
+            'Temporada': temporada,
+            # Predicción
+            'Prediccion': resultado['prediccion'],
+            'Valor_Mas_Probable': resultado['valor_mas_probable'],
+            'Prob_Valor_Mas_Probable_%': round(resultado['prob_mas_probable'], 2),
+            'Rango_80%_Min': resultado['rango_80'][0],
+            'Rango_80%_Max': resultado['rango_80'][1],
+            # PPP
+            'PPP_Local': round(resultado['ppp_local'], 2),
+            'PPP_Away': round(resultado['ppp_away'], 2),
+            'PPP_Diferencia': round(resultado['ppp_diff'], 2),
+            # Estadísticas históricas
+            'CK_Local_Casa': round(resultado['stats']['local_ck'], 1),
+            'CK_Away_Fuera': round(resultado['stats']['away_ck'], 1),
+            'CK_Local_Recibidos': round(resultado['stats']['local_ck_received'], 1),
+            'CK_Away_Recibidos': round(resultado['stats']['away_ck_received'], 1),
+            'CK_Esperado_Suma': round(resultado['stats']['partido_esperado'], 1),
+            'CK_H2H_Total': round(resultado['stats']['h2h_total'], 1) if resultado['stats']['h2h_total'] > 0 else 'N/A',
+            # Riesgo
+            'Fiabilidad_Partido': resultado['riesgo']['fiabilidad'],
+            'Score_Fiabilidad': round(resultado['riesgo']['score_promedio'], 1),
+            'Nivel_Local': resultado['riesgo']['nivel_local'],
+            'Nivel_Away': resultado['riesgo']['nivel_away'],
+            'CV_Local_%': round(resultado['riesgo']['cv_local'], 1),
+            'CV_Away_%': round(resultado['riesgo']['cv_away'], 1),
+        }
+        # ===========================
+        # OVER 6.5 a 10.5
+        # ===========================
+        for linea in [6.5, 7.5, 8.5, 9.5, 10.5]:
+            prob = resultado['probabilidades_over'].get(linea, 0)
+            cuota_impl = round(100 / prob, 2) if prob > 0 else 999
+            conf = clasificar_confianza(prob)
+            fila[f'Over_{linea}_Prob_%'] = round(prob, 2)
+            fila[f'Over_{linea}_Cuota'] = cuota_impl
+            fila[f'Over_{linea}_Confianza'] = conf
+        # ===========================
+        # UNDER 12.5 a 9.5
+        # ===========================
+        for linea in [12.5, 11.5, 10.5, 9.5]:
+            prob = resultado['probabilidades_under'].get(linea, 0)
+            cuota_impl = round(100 / prob, 2) if prob > 0 else 999
+            conf = clasificar_confianza(prob)
+            fila[f'Under_{linea}_Prob_%'] = round(prob, 2)
+            fila[f'Under_{linea}_Cuota'] = cuota_impl
+            fila[f'Under_{linea}_Confianza'] = conf
+        # ===========================
+        # RECOMENDACIONES
+        # ===========================
+        mejores_over = [(l, p) for l, p in resultado['probabilidades_over'].items() if p >= 55]
+        mejores_under = [(l, p) for l, p in resultado['probabilidades_under'].items() if p >= 55]
+        if resultado['riesgo']['score_promedio'] < 35:
+            fila['Recomendacion'] = "⛔ EVITAR - Baja fiabilidad"
+            fila['Es_Apostable'] = "NO"
+        elif not mejores_over and not mejores_under:
+            fila['Recomendacion'] = "⚠️ NO RECOMENDADO - Sin confianza suficiente"
+            fila['Es_Apostable'] = "NO"
+        else:
+            recomendaciones = []
+            if mejores_over:
+                mejor_over = max(mejores_over, key=lambda x: x[1])
+                cuota_over = round(100 / mejor_over[1], 2)
+                recomendaciones.append(f"Over {mejor_over[0]} ({mejor_over[1]:.1f}% @{cuota_over})")
+            if mejores_under:
+                mejor_under = max(mejores_under, key=lambda x: x[1])
+                cuota_under = round(100 / mejor_under[1], 2)
+                recomendaciones.append(f"Under {mejor_under[0]} ({mejor_under[1]:.1f}% @{cuota_under})")
+            fila['Recomendacion'] = " | ".join(recomendaciones)
+            if resultado['riesgo']['score_promedio'] >= 65:
+                fila['Es_Apostable'] = "SÍ ⭐⭐⭐"
+            elif resultado['riesgo']['score_promedio'] >= 50:
+                fila['Es_Apostable'] = "SÍ ✅"
+            else:
+                fila['Es_Apostable'] = "PRECAUCIÓN 🟡"
+        fila['Mensaje_Riesgo'] = resultado['riesgo']['mensaje']
+        resultados.append(fila)
+        print(f"   ✅ Completado")
+    # ===========================
+    # CREAR DATAFRAME
+    # ===========================
+    df_resultados = pd.DataFrame(resultados)
+    print("\n" + "=" * 120)
+    print(f"✅ PROCESAMIENTO COMPLETADO: {len(df_resultados)} partidos analizados")
+    print("=" * 120)
+    # ===========================
+    # EXPORTAR A CSV
+    # ===========================
+    if export_csv and len(df_resultados) > 0:
+        if filename is None:
+            filename = f"predicciones_{league_code}_J{jornada}_{temporada}.csv"
+        df_resultados.to_csv(filename, index=False, encoding='utf-8-sig')
+        print(f"\n💾 Resultados exportados a: {filename}")
+    # ===========================
+    # RESUMEN
+    # ===========================
+    print(f"\n📊 RESUMEN DE APUESTAS:")
+    print(f"   Partidos apostables: {len(df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')])} / {len(df_resultados)}")
+    print(f"   Partidos ALTA confianza (⭐⭐⭐): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ⭐⭐⭐'])}")
+    print(f"   Partidos MEDIA confianza (✅): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ✅'])}")
+    print(f"   Partidos a evitar (⛔): {len(df_resultados[df_resultados['Es_Apostable'] == 'NO'])}")
+    return df_resultados
+def mostrar_resumen_batch(df_resultados):
+    """Muestra resumen visual de los resultados"""
+    print("\n" + "=" * 120)
+    print("🎯 MEJORES OPORTUNIDADES DE APUESTA")
+    print("=" * 120)
+    # Filtrar solo apostables
+    df_apostables = df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')].copy()
+    if len(df_apostables) == 0:
+        print("\n⚠️ No se encontraron partidos con oportunidades de apuesta")
+        return
+    # Ordenar por score de fiabilidad
+    df_apostables = df_apostables.sort_values('Score_Fiabilidad', ascending=False)
+    for idx, row in df_apostables.iterrows():
+        print(f"\n{'='*120}")
+        print(f"🏟️  {row['Partido']}")
+        print(f"{'='*120}")
+        print(f"📊 Predicción: {row['Prediccion']:.2f} corners | Valor más probable: {row['Valor_Mas_Probable']} ({row['Prob_Valor_Mas_Probable_%']:.1f}%)")
+        print(f"📈 Histórico: Local {row['CK_Local_Casa']:.1f} CK | Away {row['CK_Away_Fuera']:.1f} CK | H2H: {row['CK_H2H_Total']}")
+        print(f"🎲 Fiabilidad: {row['Fiabilidad_Partido']} (Score: {row['Score_Fiabilidad']:.1f}/100)")
+        print(f"💡 {row['Recomendacion']}")
+        # Mostrar líneas con alta probabilidad
+        print(f"\n   📌 Líneas destacadas:")
+        for linea in [7.5, 8.5, 9.5, 10.5]:
+            over_prob = row.get(f'Over_{linea}_Prob_%', 0)
+            under_prob = row.get(f'Under_{linea}_Prob_%', 0)
+            if over_prob >= 55:
+                cuota = row.get(f'Over_{linea}_Cuota', 0)
+                conf = row.get(f'Over_{linea}_Confianza', '')
+                print(f"   • Over {linea}: {over_prob:.1f}% @{cuota:.2f} - {conf}")
+            if under_prob >= 55:
+                cuota = row.get(f'Under_{linea}_Cuota', 0)
+                conf = row.get(f'Under_{linea}_Confianza', '')
+                print(f"   • Under {linea}: {under_prob:.1f}% @{cuota:.2f} - {conf}")
+class USE_MODEL():
+    def __init__(self):
+        self.load_models()
+        self.load_data()
+        self.init_variables()
+    def load_models(self):
+        """Cargar modelos desde GitHub usando raw URLs"""
+        print("📦 Cargando modelos desde GitHub...")
+        # URLs de descarga directa (raw.githubusercontent.com)
+        base_url = "https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/models"
+        model_url = f"{base_url}/xgboost_corners_v4_retrain.pkl"
+        scaler_url = f"{base_url}/scaler_corners_v4_retrain.pkl"
+        try:
+            # Descargar modelo
+            print(f"📥 Descargando modelo desde: {model_url}")
+            response_model = requests.get(model_url, timeout=30)
+            response_model.raise_for_status()
+            # Descargar scaler
+            print(f"📥 Descargando scaler desde: {scaler_url}")
+            response_scaler = requests.get(scaler_url, timeout=30)
+            response_scaler.raise_for_status()
+            # Guardar temporalmente y cargar
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.pkl') as tmp_model:
+                tmp_model.write(response_model.content)
+                tmp_model_path = tmp_model.name
+            with tempfile.NamedTemporaryFile(delete=False, suffix='.pkl') as tmp_scaler:
+                tmp_scaler.write(response_scaler.content)
+                tmp_scaler_path = tmp_scaler.name
+            # Cargar modelos desde archivos temporales
+            self.xgb_model = joblib.load(tmp_model_path)
+            self.scaler = joblib.load(tmp_scaler_path)
+            # Limpiar archivos temporales
+            os.unlink(tmp_model_path)
+            os.unlink(tmp_scaler_path)
+            print("✅ Modelos cargados correctamente desde GitHub")
+        except requests.exceptions.RequestException as e:
+            raise Exception(f"❌ Error descargando modelos: {str(e)}")
+        except Exception as e:
+            raise Exception(f"❌ Error cargando modelos: {str(e)}")
+    def load_data(self):
+        """Cargar datos desde GitHub"""
+        print("📂 Cargando datos desde GitHub...")
+        base_url = "https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/dataset/cleaned"
+        historic_url = f"{base_url}/dataset_cleaned.csv"
+        current_url = f"{base_url}/dataset_cleaned_current_year.csv"
+        try:
+            # Cargar dataset histórico
+            print(f"📥 Descargando dataset histórico...")
+            self.df_dataset_historic = pd.read_csv(historic_url)
+            print(f"✅ Dataset histórico cargado: {len(self.df_dataset_historic)} registros")
+            # Intentar cargar año actual
+            try:
+                print(f"📥 Descargando dataset año actual...")
+                self.df_dataset_current_year = pd.read_csv(current_url)
+                print(f"✅ Dataset año actual cargado: {len(self.df_dataset_current_year)} registros")
+                self.df_dataset = pd.concat([self.df_dataset_historic, self.df_dataset_current_year])
+            except:
+                print("⚠️ No se pudo cargar dataset del año actual, usando solo histórico")
+                self.df_dataset = self.df_dataset_historic
+            # Limpieza
+            self.df_dataset["season"] = self.df_dataset["season"].astype(str)
+            self.df_dataset["Performance_Save%"].fillna(0, inplace=True)
+            print(f"✅ Total registros: {len(self.df_dataset)}")
+        except Exception as e:
+            raise FileNotFoundError(
+                f"\n❌ ERROR: No se pudieron cargar los datos desde GitHub\n"
+                f"   Error: {str(e)}\n\n"
+                f"💡 Verifica que los archivos existan en el repositorio\n"
+            )
+    def init_variables(self):
+        self.lst_years = ["1819", "1920", "2021", "2122", "2223", "2324", "2425", "2526"]
+        print("✅ Variables inicializadas")
+    def consume_model_batch(self,partidos,jornada,temporada,league_code):
+        df_predict = predecir_partidos_batch(
+            partidos=partidos,
+            jornada=jornada,
+            temporada=temporada,
+            league_code=league_code,
+            export_csv=True,
+            filename=f"results\{league_code}\{league_code}-{temporada}-{jornada}-predicciones.csv",
+            df_database = self.df_dataset,
+            xgb_model = self.xgb_model,
+            scaler=self.scaler,
+            lst_years=self.lst_years
+        )
+        # Mostrar resumen
+        return df_predict
+    def consume_model_single(self,local,visitante,jornada,temporada,league_code):
+         return predecir_corners(
+            local=local,
+            visitante=visitante,
+            jornada=jornada,
+            temporada=temporada,
+            league_code=league_code,
+            df_database = self.df_dataset,
+            xgb_model = self.xgb_model,
+            scaler=self.scaler,
+            lst_years=self.lst_years
+        )
+    def kelly_stats(self,p, odds, fraction=0.2):
+        b = odds - 1
+        q = 1 - p
+        f_star = (b * p - q) / b
+        f_star = max(f_star, 0)  # evita negativos
+        return f_star * fraction  # usa 0.1 para Kelly 10%

src/models/__init__.py ADDED Viewed

File without changes

src/models/test_model.py ADDED Viewed

	@@ -0,0 +1,1148 @@

+# ===========================
+# SISTEMA DE PREDICCIÓN DE CORNERS - OPTIMIZADO PARA APUESTAS (VERSIÓN COMPLETA)
+# ===========================
+import numpy as np
+import pandas as pd
+import joblib
+from scipy.stats import poisson
+from scipy import stats
+# ===========================
+# 1. FUNCIONES FIABILIDAD
+# ===========================
+def analizar_fiabilidad_equipos(df_database, temporada="2526", min_partidos=5):
+    """
+    Análisis completo de fiabilidad para apuestas de corners
+    No solo varianza, sino consistencia, tendencias y patrones
+    """
+    df_temp = df_database[df_database['season'] == temporada].copy()
+    resultados = []
+    equipos = pd.concat([df_temp['team'], df_temp['opponent']]).unique()
+    for equipo in equipos:
+        # Partidos del equipo
+        partidos_equipo = df_temp[df_temp['team'] == equipo]
+        if len(partidos_equipo) < min_partidos:
+            continue
+        ck_sacados = partidos_equipo['Pass Types_CK'].values
+        # ===========================
+        # 1. MÉTRICAS DE VARIABILIDAD
+        # ===========================
+        media = ck_sacados.mean()
+        std = ck_sacados.std()
+        cv = (std / media * 100) if media > 0 else 0
+        # ===========================
+        # 2. MÉTRICAS DE CONSISTENCIA
+        # ===========================
+        # 2.1 Porcentaje de partidos cerca de la media (±2 corners)
+        cerca_media = np.sum(np.abs(ck_sacados - media) <= 2) / len(ck_sacados) * 100
+        # 2.2 Rachas (detectar equipos con "explosiones" de corners)
+        cambios_bruscos = np.sum(np.abs(np.diff(ck_sacados)) > 4)
+        pct_cambios_bruscos = cambios_bruscos / (len(ck_sacados) - 1) * 100
+        # 2.3 Cuartiles (Q1, Q2=mediana, Q3)
+        q1, q2, q3 = np.percentile(ck_sacados, [25, 50, 75])
+        iqr = q3 - q1  # Rango intercuartílico (más robusto que std)
+        # ===========================
+        # 3. MÉTRICAS DE TENDENCIA
+        # ===========================
+        # 3.1 Tendencia lineal (¿mejora/empeora con el tiempo?)
+        jornadas = np.arange(len(ck_sacados))
+        slope, intercept, r_value, p_value, std_err = stats.linregress(jornadas, ck_sacados)
+        # 3.2 Autocorrelación (¿resultado actual predice el siguiente?)
+        if len(ck_sacados) > 2:
+            autocorr = np.corrcoef(ck_sacados[:-1], ck_sacados[1:])[0, 1]
+        else:
+            autocorr = 0
+        # ===========================
+        # 4. MÉTRICAS DE OUTLIERS
+        # ===========================
+        # 4.1 Detección de valores atípicos (método IQR)
+        lower_bound = q1 - 1.5 * iqr
+        upper_bound = q3 + 1.5 * iqr
+        outliers = np.sum((ck_sacados < lower_bound) | (ck_sacados > upper_bound))
+        pct_outliers = outliers / len(ck_sacados) * 100
+        # 4.2 Z-score máximo
+        z_scores = np.abs(stats.zscore(ck_sacados))
+        max_z = z_scores.max()
+        # ===========================
+        # 5. MÉTRICAS DE RANGO
+        # ===========================
+        rango = ck_sacados.max() - ck_sacados.min()
+        rango_normalizado = rango / media if media > 0 else 0
+        # ===========================
+        # 6. SCORE GLOBAL DE FIABILIDAD
+        # ===========================
+        # Penalizaciones (0-100, menor = peor)
+        score_cv = max(0, 100 - cv * 2)  # CV alto = mala
+        score_consistencia = cerca_media  # Más cerca de media = mejor
+        score_cambios = max(0, 100 - pct_cambios_bruscos * 2)  # Cambios bruscos = malo
+        score_outliers = max(0, 100 - pct_outliers * 3)  # Outliers = malo
+        score_iqr = max(0, 100 - iqr * 10)  # IQR grande = malo
+        # Score final (promedio ponderado)
+        score_fiabilidad = (
+            score_cv * 0.25 +
+            score_consistencia * 0.30 +
+            score_cambios * 0.20 +
+            score_outliers * 0.15 +
+            score_iqr * 0.10
+        )
+        # ===========================
+        # 7. CLASIFICACIÓN MULTI-CRITERIO
+        # ===========================
+        # Clasificación basada en score
+        if score_fiabilidad >= 70:
+            nivel = "EXCELENTE ⭐⭐⭐"
+            color = "#27ae60"
+        elif score_fiabilidad >= 55:
+            nivel = "BUENO ✅"
+            color = "#2ecc71"
+        elif score_fiabilidad >= 40:
+            nivel = "ACEPTABLE 🟡"
+            color = "#f39c12"
+        elif score_fiabilidad >= 25:
+            nivel = "REGULAR ⚠️"
+            color = "#e67e22"
+        else:
+            nivel = "EVITAR ⛔"
+            color = "#e74c3c"
+        resultados.append({
+            'Equipo': equipo,
+            'Partidos': len(ck_sacados),
+            # Estadísticas básicas
+            'Media_CK': round(media, 2),
+            'Mediana_CK': round(q2, 2),
+            'Std_CK': round(std, 2),
+            'CV_%': round(cv, 1),
+            # Consistencia
+            'Pct_Cerca_Media': round(cerca_media, 1),
+            'Cambios_Bruscos_%': round(pct_cambios_bruscos, 1),
+            'IQR': round(iqr, 2),
+            # Rango
+            'Rango': int(rango),
+            'Rango_Norm': round(rango_normalizado, 2),
+            'Min': int(ck_sacados.min()),
+            'Max': int(ck_sacados.max()),
+            # Outliers
+            'Outliers': int(outliers),
+            'Pct_Outliers': round(pct_outliers, 1),
+            'Max_ZScore': round(max_z, 2),
+            # Tendencia
+            'Tendencia_Slope': round(slope, 3),
+            'Autocorr': round(autocorr, 3),
+            # Score y clasificación
+            'Score_Fiabilidad': round(score_fiabilidad, 1),
+            'Nivel': nivel,
+            'Color': color
+        })
+    df_resultado = pd.DataFrame(resultados)
+    df_resultado = df_resultado.sort_values('Score_Fiabilidad', ascending=False)
+    return df_resultado
+def mostrar_analisis_fiabilidad(df_analisis, top_n=10):
+    """
+    Muestra el análisis completo de fiabilidad
+    """
+    print("\n" + "=" * 120)
+    print("🎯 ANÁLISIS DE FIABILIDAD PARA APUESTAS - CORNERS")
+    print("=" * 120)
+    # TOP EQUIPOS FIABLES
+    print(f"\n⭐ TOP {top_n} EQUIPOS MÁS FIABLES")
+    print("-" * 120)
+    top_fiables = df_analisis.head(top_n)
+    for idx, row in top_fiables.iterrows():
+        print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
+        print(f"  📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
+        print(f"  ✅ {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
+        print(f"  ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
+        print(f"  📈 Rango: {row['Min']}-{row['Max']} ({row['Rango']} corners)")
+    # TOP EQUIPOS NO FIABLES
+    print(f"\n\n⛔ TOP {top_n} EQUIPOS MENOS FIABLES")
+    print("-" * 120)
+    top_no_fiables = df_analisis.tail(top_n)
+    for idx, row in top_no_fiables.iterrows():
+        print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
+        print(f"  📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
+        print(f"  ❌ Solo {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
+        print(f"  ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
+    # ESTADÍSTICAS GENERALES
+    print(f"\n\n📊 DISTRIBUCIÓN POR NIVEL DE FIABILIDAD")
+    print("-" * 120)
+    print(df_analisis['Nivel'].value_counts())
+    print(f"\n📈 ESTADÍSTICAS DE SCORE:")
+    print(f"  Media: {df_analisis['Score_Fiabilidad'].mean():.1f}")
+    print(f"  Mediana: {df_analisis['Score_Fiabilidad'].median():.1f}")
+    print(f"  Score máximo: {df_analisis['Score_Fiabilidad'].max():.1f}")
+    print(f"  Score mínimo: {df_analisis['Score_Fiabilidad'].min():.1f}")
+def obtener_fiabilidad_partido(local, visitante, df_analisis):
+    """
+    Evalúa la fiabilidad de un partido específico
+    """
+    datos_local = df_analisis[df_analisis['Equipo'] == local]
+    datos_away = df_analisis[df_analisis['Equipo'] == visitante]
+    if datos_local.empty or datos_away.empty:
+        return {
+            'fiabilidad': 'DESCONOCIDO',
+            'score': 0,
+            'mensaje': '⚠️ Datos insuficientes'
+        }
+    score_local = datos_local['Score_Fiabilidad'].values[0]
+    score_away = datos_away['Score_Fiabilidad'].values[0]
+    score_promedio = (score_local + score_away) / 2
+    # Clasificación del partido
+    if score_promedio >= 65:
+        fiabilidad = "MUY ALTA ⭐⭐⭐"
+        mensaje = "✅ EXCELENTE PARTIDO PARA APOSTAR"
+    elif score_promedio >= 50:
+        fiabilidad = "ALTA ✅"
+        mensaje = "✅ BUEN PARTIDO PARA APOSTAR"
+    elif score_promedio >= 35:
+        fiabilidad = "MEDIA 🟡"
+        mensaje = "🟡 APOSTAR CON PRECAUCIÓN"
+    else:
+        fiabilidad = "BAJA ⛔"
+        mensaje = "⛔ EVITAR APUESTA"
+    return {
+        'fiabilidad': fiabilidad,
+        'score_local': score_local,
+        'score_away': score_away,
+        'score_promedio': score_promedio,
+        'nivel_local': datos_local['Nivel'].values[0],
+        'nivel_away': datos_away['Nivel'].values[0],
+        'mensaje': mensaje,
+        # Datos adicionales útiles
+        'cv_local': datos_local['CV_%'].values[0],
+        'cv_away': datos_away['CV_%'].values[0],
+        'consistencia_local': datos_local['Pct_Cerca_Media'].values[0],
+        'consistencia_away': datos_away['Pct_Cerca_Media'].values[0]
+    }
+def calcular_probabilidades_poisson(lambda_pred, rango_inferior=5, rango_superior=5):
+    """Calcula probabilidades usando distribución de Poisson"""
+    valor_central = int(round(lambda_pred))
+    valores_analizar = range(
+        max(0, valor_central - rango_inferior),
+        valor_central + rango_superior + 1
+    )
+    probabilidades_exactas = {}
+    for k in valores_analizar:
+        prob = poisson.pmf(k, lambda_pred) * 100
+        probabilidades_exactas[k] = prob
+    # ✅ CORRECCIÓN: MISMAS LÍNEAS PARA OVER Y UNDER
+    lines = [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]
+    probabilidades_over = {}
+    for linea in lines:
+        prob_over = (1 - poisson.cdf(linea, lambda_pred)) * 100
+        probabilidades_over[linea] = prob_over
+    probabilidades_under = {}
+    for linea in lines:  # ✅ CAMBIO: usar la misma lista
+        prob_under = poisson.cdf(linea, lambda_pred) * 100
+        probabilidades_under[linea] = prob_under
+    return {
+        'exactas': probabilidades_exactas,
+        'over': probabilidades_over,
+        'under': probabilidades_under
+    }
+def clasificar_confianza(prob):
+    """Clasifica la confianza según probabilidad"""
+    if prob >= 66:
+        return "ALTA ✅"
+    elif prob >= 55:
+        return "MEDIA ⚠️"
+    else:
+        return "BAJA ❌"
+def get_dataframes(df, season, round_num, local, away, league=None):
+    """Retorna 8 DataFrames filtrados por equipo, venue y liga"""
+    season_round = (df['season'] == season) & (df['round'] < round_num)
+    if league is not None:
+        season_round = season_round & (df['league'] == league)
+    def filter_and_split(team_filter):
+        filtered = df[season_round & team_filter].copy()
+        home = filtered[filtered['venue'] == "Home"]
+        away = filtered[filtered['venue'] == "Away"]
+        return home, away
+    local_home, local_away = filter_and_split(df['team'] == local)
+    local_opp_home, local_opp_away = filter_and_split(df['opponent'] == local)
+    away_home, away_away = filter_and_split(df['team'] == away)
+    away_opp_home, away_opp_away = filter_and_split(df['opponent'] == away)
+    return (local_home, local_away, local_opp_home, local_opp_away,
+            away_home, away_away, away_opp_home, away_opp_away)
+def get_head_2_head(df, local, away, seasons=None, league=None):
+    """Obtiene últimos 3 enfrentamientos directos"""
+    if seasons is None:
+        seasons = []
+    df_filtered = df[df['season'].isin(seasons)] if seasons else df
+    if league is not None:
+        df_filtered = df_filtered[df_filtered['league'] == league]
+    local_h2h = df_filtered[(df_filtered['team'] == local) & (df_filtered['opponent'] == away)]
+    away_h2h = df_filtered[(df_filtered['team'] == away) & (df_filtered['opponent'] == local)]
+    if len(local_h2h) < 4:
+        return local_h2h.tail(2), away_h2h.tail(2)
+    return local_h2h.tail(3), away_h2h.tail(3)
+def get_average(df, is_team=False, lst_avg=None):
+    """Calcula promedios de estadísticas (VERSIÓN COMPLETA)"""
+    if len(df) == 0:
+        if is_team:
+            # ✅ Retornar 23 valores (métricas avanzadas)
+            return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+        return (0, 0, 0, 0, 0, 0, 0, 0, 0)
+    if is_team:
+        # ===========================
+        # ESTADÍSTICAS BÁSICAS (NORMALIZADAS)
+        # ===========================
+        avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3]
+        avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4]
+        avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2]
+        avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1]
+        # ✅ VARIANZA DE CORNERS
+        var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
+        avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8]
+        avg_poss = (df['Poss'].sum() / len(df)) - 50
+        avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5]
+        avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6]
+        # ===========================
+        # MÉTRICAS OFENSIVAS AVANZADAS
+        # ===========================
+        total_sh = df['Standard_Sh'].sum()
+        sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0
+        xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0
+        total_touches = df['Touches_Touches'].sum()
+        attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0
+        total_poss = df['Poss'].sum()
+        possession_shot = (total_sh / total_poss) if total_poss > 0 else 0
+        # ===========================
+        # MÉTRICAS DE CREACIÓN
+        # ===========================
+        total_passes = df['Total_Att'].sum()
+        progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0
+        final_third_involvement = (df['1/3'].sum() / total_passes) if total_passes > 0 else 0
+        total_sca = df['SCA Types_SCA'].sum()
+        assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0
+        creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0
+        # ===========================
+        # MÉTRICAS DEFENSIVAS
+        # ===========================
+        total_tackles = df['Tackles_Tkl'].sum()
+        high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0
+        interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0
+        total_defensive_actions = total_tackles + df['Int'].sum()
+        clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0
+        # ===========================
+        # MÉTRICAS DE POSESIÓN
+        # ===========================
+        total_carries = df['Carries_Carries'].sum()
+        progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0
+        total_prog_passes = df['PrgP'].sum()
+        carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0
+        # ===========================
+        # ÍNDICES COMPUESTOS
+        # ===========================
+        avg_gf_raw = df['GF'].mean()
+        avg_xg_raw = df['Expected_xG'].mean()
+        avg_sot = df['Standard_SoT'].mean()
+        avg_sh = df['Standard_Sh'].mean()
+        offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0
+        avg_prgp = df['PrgP'].mean()
+        avg_prgc = df['Carries_PrgC'].mean()
+        avg_poss_raw = df['Poss'].mean()
+        transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0
+        # ✅ RETORNAR 23 VALORES
+        return (
+            avg_ck,           # 0
+            var_ck,           # 1 - ✅ NUEVO
+            avg_xg,           # 2
+            avg_sca,          # 3
+            avg_cross,        # 4
+            avg_poss,         # 5
+            avg_att_3rd,      # 6
+            avg_gf,           # 7
+            avg_ga,           # 8
+            sh_accuracy,      # 9
+            xg_shot,          # 10
+            attacking_presence,  # 11
+            possession_shot,  # 12
+            progressive_pass_ratio,  # 13
+            final_third_involvement,  # 14
+            assist_sca,       # 15
+            creative_efficiency,  # 16
+            high_press_intensity,  # 17
+            interception_tackle,  # 18
+            clearance_ratio,  # 19
+            progressive_carry_ratio,  # 20
+            carry_pass_balance,  # 21
+            offensive_index,  # 22
+            transition_index  # 23
+        )
+    # ===========================
+    # PROMEDIOS DE LIGA (is_team=False)
+    # ===========================
+    avg_cross = df['Performance_Crs'].mean()
+    avg_att_3rd = df['Touches_Att 3rd'].mean()
+    avg_sca = df['SCA Types_SCA'].mean()
+    avg_xg = df['Expected_xG'].mean()
+    var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
+    avg_ck = df['Pass Types_CK'].mean()
+    avg_gf = df['GF'].mean()
+    avg_ga = df['GA'].mean()
+    avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0
+    return (
+        var_ck,      # 0
+        avg_xg,      # 1
+        avg_sca,     # 2
+        avg_cross,   # 3
+        avg_att_3rd, # 4
+        avg_gf,      # 5
+        avg_ga,      # 6
+        avg_sh,      # 7
+        avg_ck       # 8
+    )
+def get_points_from_result(result):
+    """Convierte resultado (W/D/L) a puntos"""
+    if result == 'W':
+        return 3
+    elif result == 'D':
+        return 1
+    else:
+        return 0
+def get_team_ppp(df, team, season, round_num, league=None):
+    """Calcula puntos por partido (PPP) de un equipo"""
+    team_matches = df[
+        (df['team'] == team) &
+        (df['season'] == season) &
+        (df['round'] < round_num)
+    ]
+    if league is not None:
+        team_matches = team_matches[team_matches['league'] == league]
+    if len(team_matches) == 0:
+        return 0.0
+    total_points = team_matches['result'].apply(get_points_from_result).sum()
+    ppp = total_points / len(team_matches)
+    return ppp
+def get_ppp_difference(df, local, away, season, round_num, league=None):
+    """Calcula diferencia de PPP entre local y visitante"""
+    local_ppp = get_team_ppp(df, local, season, round_num, league)
+    away_ppp = get_team_ppp(df, away, season, round_num, league)
+    return local_ppp - away_ppp
+def predecir_corners(local, visitante, jornada, temporada="2526", league_code="ESP",df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
+    """
+    Predice corners totales con análisis completo para apuestas
+    Args:
+        local: Equipo local
+        visitante: Equipo visitante
+        jornada: Número de jornada
+        temporada: Temporada (formato "2526")
+        league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
+    """
+    print(f"\n{'='*80}")
+    print(f"🏟️  {local} vs {visitante}")
+    print(f"📅 Temporada {temporada} | Jornada {jornada} | Liga: {league_code}")
+    print(f"{'='*80}")
+    if jornada < 5:
+        return {
+            "error": "❌ Se necesitan al menos 5 jornadas previas",
+            "prediccion": None
+        }
+    try:
+        # ===========================
+        # EXTRAER FEATURES (igual que antes)
+        # ===========================
+        lst_avg = get_average(
+            df_database[
+                (df_database['season'] == temporada) &
+                (df_database['round'] < jornada) &
+                (df_database['league'] == league_code)
+            ],
+            is_team=False
+        )
+        (team1_home, team1_away, team1_opp_home, team1_opp_away,
+         team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes(
+            df_database, temporada, jornada, local, visitante, league=league_code
+        )
+        index = lst_years.index(temporada)
+        result = lst_years[:index+1]
+        team1_h2h, team2_h2h = get_head_2_head(
+            df_database, local, visitante, seasons=result, league=league_code
+        )
+        local_ppp = get_team_ppp(df_database, local, temporada, jornada, league=league_code)
+        away_ppp = get_team_ppp(df_database, visitante, temporada, jornada, league=league_code)
+        ppp_diff = local_ppp - away_ppp
+        # ===========================
+        # CONSTRUIR DICCIONARIO DE FEATURES (igual que antes)
+        # ===========================
+        def create_line(df, is_form=True, is_team=False, use_advanced=True):
+            if is_form:
+                df = df[-6:]
+            if use_advanced:
+                return get_average(df, is_team, lst_avg)
+            else:
+                result = get_average(df, is_team, lst_avg)
+                return result[:9]
+        dic_features = {}
+        dic_features['ppp_local'] = (local_ppp,)
+        dic_features['ppp_away'] = (away_ppp,)
+        dic_features['ppp_difference'] = (ppp_diff,)
+        dic_features['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True)
+        dic_features['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True)
+        dic_features['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True)
+        dic_features['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True)
+        dic_features['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True)
+        dic_features['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True)
+        dic_features['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True)
+        dic_features['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True)
+        dic_features['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True)
+        dic_features['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True)
+        dic_features['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False)
+        dic_features['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False)
+        league_dummies = {
+                    'league_ESP': 1 if league_code == 'ESP' else 0,
+                    'league_GER': 1 if league_code == 'GER' else 0,
+                    'league_FRA': 1 if league_code == 'FRA' else 0,
+                    'league_ITA': 1 if league_code == 'ITA' else 0,
+                    'league_NED': 1 if league_code == 'NED' else 0,
+                    'league_ENG': 1 if league_code == 'ENG' else 0,
+                    'league_POR': 1 if league_code == 'POR' else 0,
+                    'league_BEL': 1 if league_code == 'BEL' else 0
+                }
+        for key, value in league_dummies.items():
+            dic_features[key] = (value,)
+        # ===========================
+        # CONSTRUIR VECTOR DE FEATURES
+        # ===========================
+        lst_base_advanced = [
+            "avg_ck", "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga",
+            "sh_accuracy", "xg_shot", "attacking_presence", "possession_shot",
+            "progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency",
+            "high_press_intensity", "interception_tackle", "clearance_ratio",
+            "progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index"
+        ]
+        lst_base_original = [
+            "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga", "avg_ck"
+        ]
+        lst_features_values = []
+        lst_features_names = []
+        for key in dic_features:
+            lst_features_values.extend(list(dic_features[key]))
+            if key in ['ppp_local', 'ppp_away', 'ppp_difference']:
+                lst_features_names.append(key)
+            elif key.startswith('league_'):
+                lst_features_names.append(key)
+            elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']:
+                lst_features_names.extend([f"{key}_{col}" for col in lst_base_original])
+            else:
+                lst_features_names.extend([f"{key}_{col}" for col in lst_base_advanced])
+        df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
+        expected_features = scaler.feature_names_in_
+        if len(df_input.columns) != len(expected_features):
+            print(f"\n⚠️ ERROR: Número de features no coincide")
+            print(f"   Esperadas: {len(expected_features)}")
+            print(f"   Recibidas: {len(df_input.columns)}")
+            return {"error": "Desajuste de features", "prediccion": None}
+        df_input = df_input[expected_features]
+        X_input_scaled = pd.DataFrame(
+            scaler.transform(df_input),
+            columns=df_input.columns
+        )
+        # ===========================
+        # PREDICCIÓN
+        # ===========================
+        prediccion = xgb_model.predict(X_input_scaled)[0]
+        # ===========================
+        # ✅ ANÁLISIS PROBABILÍSTICO CON POISSON
+        # ===========================
+        analisis = calcular_probabilidades_poisson(prediccion, rango_inferior=5, rango_superior=5)
+        # ===========================
+        # ESTADÍSTICAS DETALLADAS
+        # ===========================
+        local_ck_home = team1_home['Pass Types_CK'].mean() if len(team1_home) > 0 else 0
+        local_xg_home = team1_home['Expected_xG'].mean() if len(team1_home) > 0 else 0
+        local_poss_home = team1_home['Poss'].mean() if len(team1_home) > 0 else 0
+        away_ck_away = team2_away['Pass Types_CK'].mean() if len(team2_away) > 0 else 0
+        away_xg_away = team2_away['Expected_xG'].mean() if len(team2_away) > 0 else 0
+        away_poss_away = team2_away['Poss'].mean() if len(team2_away) > 0 else 0
+        local_ck_received = team1_opp_home['Pass Types_CK'].mean() if len(team1_opp_home) > 0 else 0
+        away_ck_received = team2_opp_away['Pass Types_CK'].mean() if len(team2_opp_away) > 0 else 0
+        partido_ck_esperado = local_ck_home + away_ck_away
+        h2h_ck_local = team1_h2h['Pass Types_CK'].mean() if len(team1_h2h) > 0 else 0
+        h2h_ck_away = team2_h2h['Pass Types_CK'].mean() if len(team2_h2h) > 0 else 0
+        h2h_total = h2h_ck_local + h2h_ck_away
+        # ===========================
+        # ✅ MOSTRAR RESULTADOS CON PROBABILIDADES
+        # ===========================
+        print(f"\n🎲 PREDICCIÓN MODELO: {prediccion:.2f} corners totales")
+        print(f"   PPP: {local} ({local_ppp:.2f}) vs {visitante} ({away_ppp:.2f}) | Diff: {ppp_diff:+.2f}")
+        print(f"\n📊 ESTADÍSTICAS HISTÓRICAS:")
+        print(f"   {local} (Casa): {local_ck_home:.1f} CK/partido | xG: {local_xg_home:.2f} | Poss: {local_poss_home:.1f}%")
+        print(f"   {visitante} (Fuera): {away_ck_away:.1f} CK/partido | xG: {away_xg_away:.2f} | Poss: {away_poss_away:.1f}%")
+        print(f"   Corners recibidos: {local} ({local_ck_received:.1f}) | {visitante} ({away_ck_received:.1f})")
+        print(f"   Total esperado (suma): {partido_ck_esperado:.1f} corners")
+        if len(team1_h2h) > 0 or len(team2_h2h) > 0:
+            print(f"\n🔄 HEAD TO HEAD (últimos {max(len(team1_h2h), len(team2_h2h))} partidos):")
+            print(f"   {local}: {h2h_ck_local:.1f} CK/partido")
+            print(f"   {visitante}: {h2h_ck_away:.1f} CK/partido")
+            print(f"   Promedio total: {h2h_total:.1f} corners")
+        # ===========================
+        # ✅ MOSTRAR PROBABILIDADES EXACTAS
+        # ===========================
+        valor_mas_probable = max(analisis['exactas'].items(), key=lambda x: x[1])
+        print(f"\n📈 PROBABILIDADES EXACTAS (Poisson):")
+        for k in sorted(analisis['exactas'].keys()):
+            prob = analisis['exactas'][k]
+            bar = '█' * int(prob / 2)
+            marca = ' ⭐' if k == valor_mas_probable[0] else ''
+            print(f"   {k:2d} corners: {prob:5.2f}% {bar}{marca}")
+        print(f"\n✅ Valor más probable: {valor_mas_probable[0]} corners ({valor_mas_probable[1]:.2f}%)")
+        # ✅ RANGO DE 80% CONFIANZA
+        probs_sorted = sorted(analisis['exactas'].items(), key=lambda x: x[1], reverse=True)
+        cumsum = 0
+        rango_80 = []
+        for val, prob in probs_sorted:
+            cumsum += prob
+            rango_80.append(val)
+            if cumsum >= 80:
+                break
+        print(f"📊 Rango 80% confianza: {min(rango_80)}-{max(rango_80)} corners")
+        # ===========================
+        # ✅ MOSTRAR OVER/UNDER CON CUOTAS IMPLÍCITAS
+        # ===========================
+        print(f"\n🎯 ANÁLISIS OVER/UNDER:")
+        print(f"{'Línea':<10} {'Prob Over':<12} {'Cuota Impl':<12} {'Confianza':<15} {'Prob Under':<12} {'Cuota Impl':<12}")
+        print("-" * 85)
+        for linea in [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]:
+            prob_over = analisis['over'][linea]
+            prob_under = analisis['under'][linea]
+            # Cuotas implícitas (inverso de probabilidad en decimal)
+            cuota_impl_over = 100 / prob_over if prob_over > 0 else 999
+            cuota_impl_under = 100 / prob_under if prob_under > 0 else 999
+            conf_over = clasificar_confianza(prob_over)
+            print(f"O/U {linea:<5} {prob_over:6.2f}%     @{cuota_impl_over:5.2f}      {conf_over:<15} {prob_under:6.2f}%     @{cuota_impl_under:5.2f}")
+        # ===========================
+        # ✅ RECOMENDACIONES CON CUOTAS
+        # ===========================
+        print(f"\n💡 RECOMENDACIONES DE APUESTA:")
+        mejores_over = [(l, p) for l, p in analisis['over'].items() if p >= 55]
+        mejores_under = [(l, p) for l, p in analisis['under'].items() if p >= 55]
+        if mejores_over:
+            print(f"\n✅ OVER con confianza MEDIA/ALTA:")
+            for linea, prob in sorted(mejores_over, key=lambda x: x[1], reverse=True):
+                cuota_impl = 100 / prob
+                conf = clasificar_confianza(prob)
+                print(f"   • Over {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
+        if mejores_under:
+            print(f"\n✅ UNDER con confianza MEDIA/ALTA:")
+            for linea, prob in sorted(mejores_under, key=lambda x: x[1], reverse=True):
+                cuota_impl = 100 / prob
+                conf = clasificar_confianza(prob)
+                print(f"   • Under {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
+        if not mejores_over and not mejores_under:
+            print(f"   ⚠️ No hay apuestas con confianza MEDIA o superior")
+        # ===========================
+        # ✅ ANÁLISIS DE RIESGO
+        # ===========================
+        df_varianza_temp = analizar_fiabilidad_equipos(df_database, temporada=temporada, min_partidos=3)
+        riesgo = obtener_fiabilidad_partido(local, visitante, df_varianza_temp)
+        print(f"\n⚠️ ANÁLISIS DE RIESGO:")
+        print(f"   Local ({local}): {riesgo['nivel_local']} (CV: {riesgo['cv_local']:.1f}%)")
+        print(f"   Away ({visitante}): {riesgo['nivel_away']} (CV: {riesgo['cv_away']:.1f}%)")
+        print(f"   🎲 FIABILIDAD PARTIDO: {riesgo['fiabilidad']} (Score: {riesgo['score_promedio']:.1f})")
+        print(f"   💡 {riesgo['mensaje']}")
+        # ===========================
+        # RETORNAR DICCIONARIO COMPLETO
+        # ===========================
+        return {
+            "prediccion": round(prediccion, 2),
+            "local": local,
+            "visitante": visitante,
+            "ppp_local": local_ppp,
+            "ppp_away": away_ppp,
+            "ppp_diff": ppp_diff,
+            "riesgo": riesgo,
+            "stats": {
+                "local_ck": local_ck_home,
+                "away_ck": away_ck_away,
+                "local_ck_received": local_ck_received,
+                "away_ck_received": away_ck_received,
+                "h2h_total": h2h_total,
+                "partido_esperado": partido_ck_esperado
+            },
+            "probabilidades_exactas": analisis['exactas'],
+            "probabilidades_over": analisis['over'],
+            "probabilidades_under": analisis['under'],
+            "valor_mas_probable": valor_mas_probable[0],
+            "prob_mas_probable": valor_mas_probable[1],
+            "rango_80": (min(rango_80), max(rango_80))
+        }
+    except Exception as e:
+        print(f"\n❌ ERROR: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        return {"error": str(e), "prediccion": None}
+def predecir_partidos_batch(partidos, jornada, temporada="2526", league_code="ESP", export_csv=True, filename=None,df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
+    """
+    Predice corners para múltiples partidos y exporta resultados a CSV
+    Args:
+        partidos: Lista de tuplas [(local1, visitante1), (local2, visitante2), ...]
+        jornada: Número de jornada
+        temporada: Temporada (formato "2526")
+        league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
+        export_csv: Si True, exporta a CSV
+        filename: Nombre del archivo CSV (opcional)
+    Returns:
+        DataFrame con todos los resultados
+    """
+    resultados = []
+    print("\n" + "=" * 120)
+    print(f"🎯 PROCESANDO {len(partidos)} PARTIDOS - {league_code} | J{jornada} | Temporada {temporada}")
+    print("=" * 120)
+    for idx, (local, visitante) in enumerate(partidos, 1):
+        print(f"\n[{idx}/{len(partidos)}] Procesando: {local} vs {visitante}...")
+        resultado = predecir_corners(
+            local=local,
+            visitante=visitante,
+            jornada=jornada,
+            temporada=temporada,
+            league_code=league_code,
+            df_database=df_database,
+            xgb_model=xgb_model,
+            scaler=scaler,
+            lst_years=lst_years)
+        if resultado.get("error"):
+            print(f"   ❌ Error: {resultado['error']}")
+            continue
+        # ===========================
+        # CONSTRUIR FILA DE DATOS
+        # ===========================
+        fila = {
+            'Partido': f"{local} vs {visitante}",
+            'Local': local,
+            'Visitante': visitante,
+            'Liga': league_code,
+            'Jornada': jornada,
+            'Temporada': temporada,
+            # Predicción
+            'Prediccion': resultado['prediccion'],
+            'Valor_Mas_Probable': resultado['valor_mas_probable'],
+            'Prob_Valor_Mas_Probable_%': round(resultado['prob_mas_probable'], 2),
+            'Rango_80%_Min': resultado['rango_80'][0],
+            'Rango_80%_Max': resultado['rango_80'][1],
+            # PPP
+            'PPP_Local': round(resultado['ppp_local'], 2),
+            'PPP_Away': round(resultado['ppp_away'], 2),
+            'PPP_Diferencia': round(resultado['ppp_diff'], 2),
+            # Estadísticas históricas
+            'CK_Local_Casa': round(resultado['stats']['local_ck'], 1),
+            'CK_Away_Fuera': round(resultado['stats']['away_ck'], 1),
+            'CK_Local_Recibidos': round(resultado['stats']['local_ck_received'], 1),
+            'CK_Away_Recibidos': round(resultado['stats']['away_ck_received'], 1),
+            'CK_Esperado_Suma': round(resultado['stats']['partido_esperado'], 1),
+            'CK_H2H_Total': round(resultado['stats']['h2h_total'], 1) if resultado['stats']['h2h_total'] > 0 else 'N/A',
+            # Riesgo
+            'Fiabilidad_Partido': resultado['riesgo']['fiabilidad'],
+            'Score_Fiabilidad': round(resultado['riesgo']['score_promedio'], 1),
+            'Nivel_Local': resultado['riesgo']['nivel_local'],
+            'Nivel_Away': resultado['riesgo']['nivel_away'],
+            'CV_Local_%': round(resultado['riesgo']['cv_local'], 1),
+            'CV_Away_%': round(resultado['riesgo']['cv_away'], 1),
+        }
+        # ===========================
+        # OVER 6.5 a 10.5
+        # ===========================
+        for linea in [6.5, 7.5, 8.5, 9.5, 10.5]:
+            prob = resultado['probabilidades_over'].get(linea, 0)
+            cuota_impl = round(100 / prob, 2) if prob > 0 else 999
+            conf = clasificar_confianza(prob)
+            fila[f'Over_{linea}_Prob_%'] = round(prob, 2)
+            fila[f'Over_{linea}_Cuota'] = cuota_impl
+            fila[f'Over_{linea}_Confianza'] = conf
+        # ===========================
+        # UNDER 12.5 a 9.5
+        # ===========================
+        for linea in [12.5, 11.5, 10.5, 9.5]:
+            prob = resultado['probabilidades_under'].get(linea, 0)
+            cuota_impl = round(100 / prob, 2) if prob > 0 else 999
+            conf = clasificar_confianza(prob)
+            fila[f'Under_{linea}_Prob_%'] = round(prob, 2)
+            fila[f'Under_{linea}_Cuota'] = cuota_impl
+            fila[f'Under_{linea}_Confianza'] = conf
+        # ===========================
+        # RECOMENDACIONES
+        # ===========================
+        mejores_over = [(l, p) for l, p in resultado['probabilidades_over'].items() if p >= 55]
+        mejores_under = [(l, p) for l, p in resultado['probabilidades_under'].items() if p >= 55]
+        if resultado['riesgo']['score_promedio'] < 35:
+            fila['Recomendacion'] = "⛔ EVITAR - Baja fiabilidad"
+            fila['Es_Apostable'] = "NO"
+        elif not mejores_over and not mejores_under:
+            fila['Recomendacion'] = "⚠️ NO RECOMENDADO - Sin confianza suficiente"
+            fila['Es_Apostable'] = "NO"
+        else:
+            recomendaciones = []
+            if mejores_over:
+                mejor_over = max(mejores_over, key=lambda x: x[1])
+                cuota_over = round(100 / mejor_over[1], 2)
+                recomendaciones.append(f"Over {mejor_over[0]} ({mejor_over[1]:.1f}% @{cuota_over})")
+            if mejores_under:
+                mejor_under = max(mejores_under, key=lambda x: x[1])
+                cuota_under = round(100 / mejor_under[1], 2)
+                recomendaciones.append(f"Under {mejor_under[0]} ({mejor_under[1]:.1f}% @{cuota_under})")
+            fila['Recomendacion'] = " | ".join(recomendaciones)
+            if resultado['riesgo']['score_promedio'] >= 65:
+                fila['Es_Apostable'] = "SÍ ⭐⭐⭐"
+            elif resultado['riesgo']['score_promedio'] >= 50:
+                fila['Es_Apostable'] = "SÍ ✅"
+            else:
+                fila['Es_Apostable'] = "PRECAUCIÓN 🟡"
+        fila['Mensaje_Riesgo'] = resultado['riesgo']['mensaje']
+        resultados.append(fila)
+        print(f"   ✅ Completado")
+    # ===========================
+    # CREAR DATAFRAME
+    # ===========================
+    df_resultados = pd.DataFrame(resultados)
+    print("\n" + "=" * 120)
+    print(f"✅ PROCESAMIENTO COMPLETADO: {len(df_resultados)} partidos analizados")
+    print("=" * 120)
+    # ===========================
+    # EXPORTAR A CSV
+    # ===========================
+    if export_csv and len(df_resultados) > 0:
+        if filename is None:
+            filename = f"predicciones_{league_code}_J{jornada}_{temporada}.csv"
+        df_resultados.to_csv(filename, index=False, encoding='utf-8-sig')
+        print(f"\n💾 Resultados exportados a: {filename}")
+    # ===========================
+    # RESUMEN
+    # ===========================
+    print(f"\n📊 RESUMEN DE APUESTAS:")
+    print(f"   Partidos apostables: {len(df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')])} / {len(df_resultados)}")
+    print(f"   Partidos ALTA confianza (⭐⭐⭐): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ⭐⭐⭐'])}")
+    print(f"   Partidos MEDIA confianza (✅): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ✅'])}")
+    print(f"   Partidos a evitar (⛔): {len(df_resultados[df_resultados['Es_Apostable'] == 'NO'])}")
+    return df_resultados
+def mostrar_resumen_batch(df_resultados):
+    """Muestra resumen visual de los resultados"""
+    print("\n" + "=" * 120)
+    print("🎯 MEJORES OPORTUNIDADES DE APUESTA")
+    print("=" * 120)
+    # Filtrar solo apostables
+    df_apostables = df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')].copy()
+    if len(df_apostables) == 0:
+        print("\n⚠️ No se encontraron partidos con oportunidades de apuesta")
+        return
+    # Ordenar por score de fiabilidad
+    df_apostables = df_apostables.sort_values('Score_Fiabilidad', ascending=False)
+    for idx, row in df_apostables.iterrows():
+        print(f"\n{'='*120}")
+        print(f"🏟️  {row['Partido']}")
+        print(f"{'='*120}")
+        print(f"📊 Predicción: {row['Prediccion']:.2f} corners | Valor más probable: {row['Valor_Mas_Probable']} ({row['Prob_Valor_Mas_Probable_%']:.1f}%)")
+        print(f"📈 Histórico: Local {row['CK_Local_Casa']:.1f} CK | Away {row['CK_Away_Fuera']:.1f} CK | H2H: {row['CK_H2H_Total']}")
+        print(f"🎲 Fiabilidad: {row['Fiabilidad_Partido']} (Score: {row['Score_Fiabilidad']:.1f}/100)")
+        print(f"💡 {row['Recomendacion']}")
+        # Mostrar líneas con alta probabilidad
+        print(f"\n   📌 Líneas destacadas:")
+        for linea in [7.5, 8.5, 9.5, 10.5]:
+            over_prob = row.get(f'Over_{linea}_Prob_%', 0)
+            under_prob = row.get(f'Under_{linea}_Prob_%', 0)
+            if over_prob >= 55:
+                cuota = row.get(f'Over_{linea}_Cuota', 0)
+                conf = row.get(f'Over_{linea}_Confianza', '')
+                print(f"   • Over {linea}: {over_prob:.1f}% @{cuota:.2f} - {conf}")
+            if under_prob >= 55:
+                cuota = row.get(f'Under_{linea}_Cuota', 0)
+                conf = row.get(f'Under_{linea}_Confianza', '')
+                print(f"   • Under {linea}: {under_prob:.1f}% @{cuota:.2f} - {conf}")
+class USE_MODEL():
+    def __init__(self):
+        self.load_models()
+        self.load_data()
+        self.init_variables()
+    def init_variables(self):
+        self.lst_years = ["1819", "1920", "2021", "2122", "2223", "2324", "2425", "2526"]
+        print("Variables Loaded...")
+    def load_data(self):
+        #self.df_dataset = pd.read_csv(r"dataset\processed\dataset_processed.csv")
+        import os
+        #load clean dataset generated on generate_dataset.py
+        self.df_dataset_historic = pd.read_csv("dataset/cleaned/dataset_cleaned.csv")
+        if os.path.exists(r"dataset/cleaned/dataset_cleaned_current_year.csv"):
+            self.df_dataset_current_year = pd.read_csv("dataset/cleaned/dataset_cleaned_current_year.csv")
+            self.df_dataset = pd.concat([self.df_dataset_historic,self.df_dataset_current_year])
+        else:
+            self.df_dataset = self.df_dataset_historic
+        self.df_dataset["season"] = self.df_dataset["season"].astype(str)
+        self.df_dataset["Performance_Save%"].fillna(0)
+        print("Data Loaded...")
+    def load_models(self):
+        self.xgb_model = joblib.load('models/xgboost_corners_optimized_v2_6_leagues.pkl')
+        self.scaler = joblib.load('models/scaler_corners_xgb_v2_6_leagues.pkl')
+        print("Models Ready...")
+    def consume_model(self,partidos,jornada,temporada,league_code):
+        df_predict = predecir_partidos_batch(
+            partidos=partidos,
+            jornada=jornada,
+            temporada=temporada,
+            league_code=league_code,
+            export_csv=True,
+            filename=f"results\{league_code}\{league_code}-{temporada}-{jornada}-predicciones.csv",
+            df_database = self.df_dataset,
+            xgb_model = self.xgb_model,
+            scaler=self.scaler,
+            lst_years=self.lst_years
+        )
+        # Mostrar resumen
+        mostrar_resumen_batch(df_predict)
+    def kelly_stats(self,p, odds, fraction=0.2):
+        b = odds - 1
+        q = 1 - p
+        f_star = (b * p - q) / b
+        f_star = max(f_star, 0)  # evita negativos
+        return f_star * fraction  # usa 0.1 para Kelly 10%
+a = USE_MODEL()
+partidos = [
+    ("Werder Bremen", "Wolfsburg"),
+    ("Hoffenheim", "RB Leipzig"),
+    ("Leverkusen", "Heidenheim"),
+    ("Hamburger SV", "Dortmund"),
+    ("Union Berlin", "Bayern"),
+    ("Gladbach", "Köln"),
+    ("Freiburg", "St. Pauli"),
+    ("Stuttgart", "Augsburg"),
+    ("Eint Frankfurt", "Mainz 05")
+]
+a.consume_model(
+    partidos=partidos,
+    jornada=10,
+    temporada="2526",
+    league_code="GER"
+)

src/models/train_model.py ADDED Viewed

	@@ -0,0 +1,425 @@

+import numpy as np
+import pandas as pd
+import json
+import os
+from datetime import datetime
+# MLflow
+import mlflow
+import mlflow.sklearn
+import mlflow.xgboost
+from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, KFold
+from sklearn.preprocessing import StandardScaler
+from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
+from xgboost import XGBRegressor
+import joblib
+class TRAIN_MODEL():
+    def __init__(self, nombre, use_grid_search=False, config_path="config/model_config.json"):
+        """
+        Entrenar modelo con tracking MLflow
+        Args:
+            nombre: Identificador del modelo (ej: "v3_production")
+            use_grid_search: True = buscar hiperparámetros, False = usar config guardado
+            config_path: Ruta al archivo de configuración con hiperparámetros
+        """
+        # ===========================
+        # CONFIGURACIÓN MLFLOW
+        # ===========================
+        mlflow.set_tracking_uri("file:./mlruns")
+        mlflow.set_experiment("corners_prediction")
+        self.nombre = nombre
+        self.use_grid_search = use_grid_search
+        self.config_path = config_path
+        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        # Iniciar run de MLflow
+        with mlflow.start_run(run_name=f"{nombre}_{self.timestamp}") as run:
+            self.run_id = run.info.run_id
+            print(f"\n{'='*80}")
+            print(f"🚀 Entrenamiento iniciado con MLflow")
+            print(f"   Run ID: {self.run_id}")
+            print(f"   Nombre: {nombre}")
+            print(f"   GridSearch: {'SÍ' if use_grid_search else 'NO (usando config)'}")
+            print(f"{'='*80}\n")
+            # Tags básicos
+            mlflow.set_tags({
+                "model_name": nombre,
+                "timestamp": self.timestamp,
+                "grid_search_used": str(use_grid_search),
+                "framework": "XGBoost",
+                "task": "regression"
+            })
+            # Pipeline de entrenamiento
+            try:
+                self.init_variables()
+                self.load_dataset()
+                self.split_train_test(0.15)
+                self.define_model()
+                if use_grid_search:
+                    print("🔍 Ejecutando GridSearch (puede tardar)...")
+                    self.train_grid_search()
+                    self.save_best_params()  # Guardar para futuros entrenamientos
+                else:
+                    print("⚡ Usando hiperparámetros guardados (rápido)")
+                    self.load_best_params()
+                self.train_model()
+                self.test_and_eval()
+                self.top_features()
+                self.save_models(nombre)
+                mlflow.set_tag("status", "SUCCESS")
+                print(f"\n✅ Entrenamiento completado")
+                print(f"📊 Ver en MLflow UI: mlflow ui")
+            except Exception as e:
+                mlflow.set_tag("status", "FAILED")
+                print(f"\n❌ Error: {e}")
+                raise
+    def init_variables(self):
+        """Definir espacio de búsqueda para GridSearch"""
+        # ✅ GRID INTELIGENTE (~243 combinaciones = 1-3 horas)
+        self.param_grid = {
+            'n_estimators': [200],              # 1 valor (200 suele ser óptimo)
+            'max_depth': [3, 4, 5],             # 3 valores (clave)
+            'learning_rate': [0.02, 0.03],      # 2 valores (0.01 es muy lento)
+            'reg_alpha': [3.0, 5.0],            # 2 valores
+            'reg_lambda': [5.0, 8.0],           # 2 valores
+            'gamma': [0.5, 1.0],                # 2 valores
+            'subsample': [0.7],                 # 1 valor (0.7 suele funcionar)
+            'colsample_bytree': [0.7],          # 1 valor
+            'colsample_bylevel': [0.6],         # 1 valor
+            'min_child_weight': [5, 7]          # 2 valores
+        }
+        # Combinaciones: 1 × 3 × 2 × 2 × 2 × 2 × 1 × 1 × 1 × 2 = 192
+        # Tiempo: ~1.5-3 horas ⏱️
+        # Loggear configuración del grid
+        if self.use_grid_search:
+            for param, values in self.param_grid.items():
+                mlflow.log_param(f"grid_{param}", str(values))
+        print("✅ Variables inicializadas")
+    def load_dataset(self):
+        """Cargar y preparar dataset"""
+        self.df_data = pd.read_csv("dataset/processed/dataset_processed.csv")
+        self.y = self.df_data["y"]
+        self.df_data = self.df_data.drop(["y"], axis=1)
+        self.y_array = np.array(self.y).flatten()
+        # Filtrar outliers (3-17 corners)
+        mask = (self.y_array >= 3) & (self.y_array <= 17)
+        self.df_data = self.df_data[mask].copy()
+        self.y_array = self.y_array[mask]
+        # Limpiar nulos
+        if self.df_data.isnull().any().any():
+            self.df_data = self.df_data.fillna(0)
+        # Loggear info del dataset
+        mlflow.log_params({
+            "dataset_samples": len(self.df_data),
+            "dataset_features": self.df_data.shape[1],
+            "target_min": float(self.y_array.min()),
+            "target_max": float(self.y_array.max()),
+            "target_mean": float(self.y_array.mean()),
+            "target_std": float(self.y_array.std())
+        })
+        print(f"✅ Dataset cargado: {self.df_data.shape}")
+    def split_train_test(self, test_size_):
+        """Dividir datos en train/val/test"""
+        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
+            self.df_data, self.y_array,
+            test_size=test_size_,
+            random_state=42,
+            shuffle=True
+        )
+        # Escalar
+        self.scaler = StandardScaler()
+        self.X_train = pd.DataFrame(
+            self.scaler.fit_transform(self.X_train),
+            columns=self.X_train.columns
+        )
+        self.X_test = pd.DataFrame(
+            self.scaler.transform(self.X_test),
+            columns=self.X_test.columns
+        )
+        # Split validación
+        self.X_train_fit, self.X_val, self.y_train_fit, self.y_val = train_test_split(
+            self.X_train, self.y_train,
+            test_size=0.15,
+            random_state=43
+        )
+        # Loggear splits
+        mlflow.log_params({
+            "train_samples": len(self.X_train_fit),
+            "val_samples": len(self.X_val),
+            "test_samples": len(self.X_test),
+            "test_size": test_size_
+        })
+        print(f"✅ Train: {len(self.X_train_fit)} | Val: {len(self.X_val)} | Test: {len(self.X_test)}")
+    def define_model(self):
+        """Definir modelo base y GridSearch"""
+        self.xgb_base = XGBRegressor(
+            objective="reg:squarederror",
+            tree_method="hist",
+            random_state=42,
+            n_jobs=-1,
+            verbosity=0
+        )
+        if self.use_grid_search:
+            self.kfold = KFold(n_splits=5, shuffle=True, random_state=42)
+            self.mae_scorer = make_scorer(mean_absolute_error, greater_is_better=False)
+            self.grid_search = GridSearchCV(
+                estimator=self.xgb_base,
+                param_grid=self.param_grid,
+                cv=self.kfold,
+                scoring=self.mae_scorer,
+                n_jobs=-1,
+                verbose=2,
+                return_train_score=True
+            )
+    def train_grid_search(self):
+        """Ejecutar GridSearch y guardar mejores params"""
+        print("\n🔍 Buscando mejores hiperparámetros...")
+        self.grid_search.fit(self.X_train_fit, self.y_train_fit)
+        # Mejores parámetros
+        self.best_params = self.grid_search.best_params_
+        # Loggear en MLflow
+        for param, value in self.best_params.items():
+            mlflow.log_param(f"best_{param}", value)
+        mlflow.log_metric("cv_best_mae", -self.grid_search.best_score_)
+        print(f"\n✅ Mejores hiperparámetros encontrados:")
+        for param, value in self.best_params.items():
+            print(f"   {param}: {value}")
+        print(f"   CV MAE: {-self.grid_search.best_score_:.4f}")
+    def save_best_params(self):
+        """Guardar mejores hiperparámetros en archivo JSON"""
+        os.makedirs("config", exist_ok=True)
+        config = {
+            "model_name": self.nombre,
+            "timestamp": self.timestamp,
+            "best_params": self.best_params,
+            "cv_mae": float(-self.grid_search.best_score_),
+            "run_id": self.run_id
+        }
+        with open(self.config_path, 'w') as f:
+            json.dump(config, f, indent=4)
+        # Loggear archivo en MLflow
+        mlflow.log_artifact(self.config_path)
+        print(f"💾 Hiperparámetros guardados en: {self.config_path}")
+    def load_best_params(self):
+        """Cargar hiperparámetros desde archivo JSON"""
+        if not os.path.exists(self.config_path):
+            raise FileNotFoundError(
+                f"No se encontró {self.config_path}. "
+                "Ejecuta primero con use_grid_search=True"
+            )
+        with open(self.config_path, 'r') as f:
+            config = json.load(f)
+        self.best_params = config["best_params"]
+        # Loggear params en MLflow
+        for param, value in self.best_params.items():
+            mlflow.log_param(f"loaded_{param}", value)
+        mlflow.log_param("config_source", self.config_path)
+        mlflow.log_param("previous_cv_mae", config.get("cv_mae", "N/A"))
+        print(f"✅ Hiperparámetros cargados desde: {self.config_path}")
+        print(f"   Origen: {config.get('model_name', 'unknown')} ({config.get('timestamp', 'unknown')})")
+    def train_model(self):
+        """Entrenar modelo final con mejores params"""
+        self.xgb_model = XGBRegressor(
+            **self.best_params,
+            objective="reg:squarederror",
+            tree_method="hist",
+            random_state=42,
+            n_jobs=-1,
+            verbosity=0
+        )
+        self.xgb_model.fit(
+            self.X_train_fit,
+            self.y_train_fit,
+            eval_set=[(self.X_val, self.y_val)],
+            verbose=False
+        )
+        print("✅ Modelo entrenado")
+    def test_and_eval(self):
+        """Evaluar y loggear métricas"""
+        # Predicciones
+        y_train_pred = self.xgb_model.predict(self.X_train_fit)
+        y_val_pred = self.xgb_model.predict(self.X_val)
+        y_test_pred = self.xgb_model.predict(self.X_test)
+        # Calcular métricas
+        metrics = {
+            'train': {
+                'mae': mean_absolute_error(self.y_train_fit, y_train_pred),
+                'rmse': np.sqrt(mean_squared_error(self.y_train_fit, y_train_pred)),
+                'r2': r2_score(self.y_train_fit, y_train_pred)
+            },
+            'val': {
+                'mae': mean_absolute_error(self.y_val, y_val_pred),
+                'rmse': np.sqrt(mean_squared_error(self.y_val, y_val_pred)),
+                'r2': r2_score(self.y_val, y_val_pred)
+            },
+            'test': {
+                'mae': mean_absolute_error(self.y_test, y_test_pred),
+                'rmse': np.sqrt(mean_squared_error(self.y_test, y_test_pred)),
+                'r2': r2_score(self.y_test, y_test_pred)
+            }
+        }
+        # Loggear TODAS las métricas en MLflow
+        for set_name, set_metrics in metrics.items():
+            for metric_name, value in set_metrics.items():
+                mlflow.log_metric(f"{set_name}_{metric_name}", value)
+        # Cross-validation
+        cv_mae = cross_val_score(
+            self.xgb_model, self.X_train, self.y_train,
+            cv=5, scoring='neg_mean_absolute_error'
+        )
+        cv_r2 = cross_val_score(
+            self.xgb_model, self.X_train, self.y_train,
+            cv=5, scoring='r2'
+        )
+        mlflow.log_metric("cv_mae_mean", -cv_mae.mean())
+        mlflow.log_metric("cv_mae_std", cv_mae.std())
+        mlflow.log_metric("cv_r2_mean", cv_r2.mean())
+        mlflow.log_metric("cv_r2_std", cv_r2.std())
+        # Análisis de errores
+        test_errors = np.abs(self.y_test - y_test_pred)
+        mlflow.log_metric("test_error_median", float(np.median(test_errors)))
+        mlflow.log_metric("test_error_p90", float(np.percentile(test_errors, 90)))
+        mlflow.log_metric("test_pct_error_lt_2", float((test_errors < 2.0).sum() / len(test_errors) * 100))
+        # Gap de overfitting
+        gap = metrics['train']['r2'] - metrics['test']['r2']
+        mlflow.log_metric("overfitting_gap", gap)
+        print(f"\n📊 MÉTRICAS:")
+        print(f"   Train MAE: {metrics['train']['mae']:.4f} | R²: {metrics['train']['r2']:.4f}")
+        print(f"   Val   MAE: {metrics['val']['mae']:.4f} | R²: {metrics['val']['r2']:.4f}")
+        print(f"   Test  MAE: {metrics['test']['mae']:.4f} | R²: {metrics['test']['r2']:.4f}")
+        print(f"   CV    MAE: {-cv_mae.mean():.4f} ± {cv_mae.std():.4f}")
+        print(f"   Overfitting Gap: {gap:.4f}")
+    def top_features(self):
+        """Guardar importancia de features"""
+        feature_importance = pd.DataFrame({
+            'feature': self.df_data.columns,
+            'importance': self.xgb_model.feature_importances_
+        }).sort_values('importance', ascending=False)
+        # Guardar CSV
+        feature_importance.to_csv(f"models/feature_importance_{self.nombre}.csv", index=False)
+        mlflow.log_artifact(f"models/feature_importance_{self.nombre}.csv")
+        # Loggear top 10
+        for idx, row in feature_importance.head(10).iterrows():
+            mlflow.log_metric(f"feat_imp_{row['feature']}", row['importance'])
+        print(f"\n🔍 Top 5 features:")
+        for idx, row in feature_importance.head(5).iterrows():
+            print(f"   {row['feature']}: {row['importance']:.4f}")
+    def save_models(self, nombre):
+        """Guardar modelos localmente y en MLflow"""
+        os.makedirs("models", exist_ok=True)
+        # Paths
+        model_path = f'models/xgboost_corners_{nombre}.pkl'
+        scaler_path = f'models/scaler_corners_{nombre}.pkl'
+        # Guardar archivos
+        joblib.dump(self.xgb_model, model_path)
+        joblib.dump(self.scaler, scaler_path)
+        # Loggear en MLflow
+        mlflow.xgboost.log_model(
+            self.xgb_model,
+            artifact_path="model",
+            registered_model_name=f"corners_predictor"
+        )
+        mlflow.log_artifact(scaler_path, artifact_path="preprocessing")
+        print(f"\n💾 Modelos guardados:")
+        print(f"   {model_path}")
+        print(f"   {scaler_path}")
+        print(f"   MLflow Model Registry ✓")
+# ===========================
+# USO
+# ===========================
+if __name__ == "__main__":
+    # ========================================
+    # OPCIÓN 1: Primera vez o cada 3-6 meses
+    # Ejecutar GridSearch (LENTO, 30-60 min)
+    # ========================================
+    # model = TRAIN_MODEL(
+    #     nombre="v4_grid_search",
+    #     use_grid_search=True  # Busca mejores hiperparámetros
+    # )
+    # ========================================
+    # OPCIÓN 2: Reentrenamiento regular
+    # Usar hiperparámetros guardados (RÁPIDO, 2-5 min)
+    # ========================================
+    model = TRAIN_MODEL(
+        nombre="v4_retrain",
+        use_grid_search=True  # Usa config/model_config.json
+    )

src/process_data/__init__.py ADDED Viewed

File without changes

src/process_data/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (171 Bytes). View file

src/process_data/__pycache__/process_dataset.cpython-311.pyc ADDED Viewed

Binary file (27.7 kB). View file

src/process_data/generate_dataset.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import sys
+import os
+# Añadir la ruta raíz del proyecto al PYTHONPATH
+project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
+sys.path.insert(0, project_root)
+from src.utils.helper import desactivar_advertencias
+import soccerdata as sd
+import pandas as pd
+def extract_local(game_str):
+    try:
+        parts = game_str.split(" ", 1)[1].split("-")
+        return parts[0].strip() if len(parts) > 0 else None
+    except (IndexError, AttributeError):
+        return None
+def extract_away(game_str):
+    try:
+        parts = game_str.split(" ", 1)[1].split("-")
+        return parts[1].strip() if len(parts) > 1 else None
+    except (IndexError, AttributeError):
+        return None
+class GENERATE_DATASET():
+    def __init__(self,current_year):
+        print("Clase GENERATE_DATASET Inicializada")
+        desactivar_advertencias()
+        self.init_variables()
+        self.mergue_raw_data_all_leagues(current_year)
+        self.process_and_output_dataset(current_year)
+    def init_variables(self):
+        #Years to get from datasource
+        self.LST_YEARS_CONFIG = [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]
+        self.dic_historic_all_leagues = {
+            "ENG": {},
+            "ESP": {},
+            "GER": {},
+            "FRA": {},
+            "ITA": {},
+            "NED": {},
+            "ENG2": {},
+            "POR": {},
+            "BEL": {}
+        }
+        self.df_database = pd.DataFrame()
+        # Diccionary to name leagues to get from datasource
+        self.DIC_LEAGUES_CONFIG = {
+            "ENG": {
+                "name": "ENG-Premier League",
+                "code": "ENG"
+            },
+            "POR": {
+                "name": "POR-Primeira Liga",
+                "code": "POR"
+            },
+            "BEL": {
+                "name": "BEL-Belgian Pro League",
+                "code": "BEL"
+            },
+            "ESP": {
+                "name": "ESP-La Liga",
+                "code": "ESP"
+            },
+            "GER": {
+                "name": "GER-Bundesliga",
+                "code": "GER"
+            },
+            "FRA": {
+                "name": "FRA-Ligue 1",
+                "code": "FRA"
+            },
+            "ITA": {
+                "name": "ITA-Serie A",
+                "code": "ITA"
+            },
+            "NED": {
+                "name": "NED-Eredivisie",
+                "code": "NED"
+            }
+        }
+        lst_base = ['season','date','game','round','day','venue','team','GF','GA','opponent',"result"]
+        lst_columns_shooting = ['Expected_xG','Standard_Sh','Standard_SoT','Standard_Dist']
+        lst_columns_passing_type = ['Pass Types_CK']
+        lst_columns_passing = ['Total_Att','Long_Att','Ast','1/3','PrgP']
+        lst_columns_defensive = ['Tackles_Att 3rd','Tackles_Tkl','Blocks_Blocks','Int','Clr']
+        lst_columns_keeper = ['Performance_Save%']
+        lst_columns_shot_creation = ['SCA Types_SCA']
+        lst_columns_misc = ['Performance_Crs']
+        lst_columns_possesion = ['Poss', 'Touches_Att 3rd','Carries_PrgC','Touches_Touches','Touches_Att Pen','Carries_Carries','Carries_1/3','Carries_CPA']
+        self.lst_columns_combined = lst_base + lst_columns_passing_type +lst_columns_passing+lst_columns_defensive+lst_columns_shooting+lst_columns_keeper+lst_columns_shot_creation+lst_columns_misc+lst_columns_possesion
+        print("-Variables inicializadas")
+    def get_raw_data_from_source(self,league,year):
+        print(f"\nLiga {league}... 📅 Año {year}...", end=" ")
+                    # Extraer equipos local/visitante
+        if league["name"] in ["NED-Eredivisie","POR-Primeira Liga","ENG-Championship"] and year == 2017:
+            return
+        # Crear scraper para la liga específica
+        fbref = sd.FBref(leagues=league["name"], seasons=year)
+        # Leer estadísticas
+        team_season_shooting = fbref.read_team_match_stats(stat_type="shooting",opponent_stats = False)
+        team_season_passing_types = fbref.read_team_match_stats(stat_type="passing_types",opponent_stats = False)
+        team_season_passing = fbref.read_team_match_stats(stat_type="passing",opponent_stats = False)
+        team_season_defensive = fbref.read_team_match_stats(stat_type="defense",opponent_stats = False)
+        team_season_goalkeeping = fbref.read_team_match_stats(stat_type="keeper",opponent_stats = False)
+        team_season_goal_shot_creation = fbref.read_team_match_stats(stat_type="goal_shot_creation",opponent_stats = False)
+        team_season_goal_misc = fbref.read_team_match_stats(stat_type="misc",opponent_stats = False)
+        team_season_goal_possession = fbref.read_team_match_stats(stat_type="possession",opponent_stats = False)
+        df_concat = pd.concat([team_season_shooting,team_season_passing_types,team_season_passing,team_season_defensive,
+                        team_season_goalkeeping,team_season_goal_shot_creation,team_season_goal_misc,team_season_goal_possession], axis=1)
+        # Reset index
+        df_reset = df_concat.copy().reset_index()
+        # Aplanar MultiIndex
+        df_reset.columns = [
+            '_'.join(col).strip('_') if isinstance(col, tuple) else col
+            for col in df_reset.columns.values
+        ]
+        # Eliminar duplicados
+        df_reset = df_reset.loc[:, ~df_reset.columns.duplicated()]
+        df_filtered = df_reset[self.lst_columns_combined]
+        df_filtered["local"] = df_filtered["game"].apply(extract_local)
+        df_filtered["away"] = df_filtered["game"].apply(extract_away)
+        # Agregar código de liga
+        df_filtered["league"] = league["code"]
+        df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated(keep='first')]
+        # Verificar valores problemáticos
+        problematic = df_filtered[df_filtered["away"].isna()]
+        if len(problematic) > 0:
+            print(f"⚠️ {len(problematic)} registros con formato incorrecto")
+        else:
+            print(f"✅ {len(df_filtered)} partidos extraídos")
+        return df_filtered
+    def mergue_raw_data_all_leagues(self, current_year):
+        all_dataframes = []
+        if current_year == True:
+        #Process only current year
+            for league_key, league_info in self.DIC_LEAGUES_CONFIG.items():
+                self.dic_historic_all_leagues[league_key][self.LST_YEARS_CONFIG[-1]] = self.get_raw_data_from_source(league_info,self.LST_YEARS_CONFIG[-1])
+        else:
+        #Process all years needed execpt for current year
+            for league_key, league_info in self.DIC_LEAGUES_CONFIG.items():
+                for year in self.LST_YEARS_CONFIG:
+                    if year == 2025:
+                        continue
+                    self.dic_historic_all_leagues[league_key][year] = self.get_raw_data_from_source(league_info,year)
+        for league_key, dic_historic in self.dic_historic_all_leagues.items():
+            for year, df in dic_historic.items():
+                all_dataframes.append(df)
+        self.df_database = pd.concat(all_dataframes, ignore_index=True)
+        print("Dataset conbinado")
+    def process_and_output_dataset(self,current_year):
+        # Filtrar solo Matchweek
+        self.df_database = self.df_database[self.df_database['round'].str.contains("Matchweek", na=False)]
+        self.df_database['round'] = self.df_database['round'].str.replace("Matchweek ", "")
+        # Convertir tipos
+        self.df_database['round'] = self.df_database['round'].astype(int)
+        self.df_database['GF'] = self.df_database['GF'].astype(int)
+        self.df_database['GA'] = self.df_database['GA'].astype(int)
+        self.df_database = self.df_database.drop_duplicates()
+        if current_year == True:
+            self.df_database.to_csv("dataset\cleaned\dataset_cleaned_current_year.csv",index=False)
+        else:
+            self.df_database.to_csv("dataset\cleaned\dataset_cleaned.csv",index=False)
+        print("Dataset cleaned and saved on dataset\cleaned")
+a = GENERATE_DATASET(False)

src/process_data/process_dataset.py ADDED Viewed

	@@ -0,0 +1,584 @@

+import pandas as pd
+import os
+def get_ck(df, season, round_num, local, away, league=None):
+    """Obtiene corners totales de un partido específico"""
+    season_round = (df['season'] == season) & (df['round'] == round_num)
+    if league is not None:
+        season_round = season_round & (df['league'] == league)
+    df = df[season_round]
+    df_local = df[df['team'] == local]
+    df_away = df[df['team'] == away]
+    total_ck = df_local["Pass Types_CK"].sum() + df_away["Pass Types_CK"].sum()
+    return total_ck
+def get_dataframes(df, season, round_num, local, away, league=None):
+    """Retorna 8 DataFrames filtrados por equipo, venue y liga"""
+    season_round = (df['season'] == season) & (df['round'] < round_num)
+    if league is not None:
+        season_round = season_round & (df['league'] == league)
+    def filter_and_split(team_filter):
+        filtered = df[season_round & team_filter].copy()
+        home = filtered[filtered['venue'] == "Home"]
+        away = filtered[filtered['venue'] == "Away"]
+        return home, away
+    local_home, local_away = filter_and_split(df['team'] == local)
+    local_opp_home, local_opp_away = filter_and_split(df['opponent'] == local)
+    away_home, away_away = filter_and_split(df['team'] == away)
+    away_opp_home, away_opp_away = filter_and_split(df['opponent'] == away)
+    return (local_home, local_away, local_opp_home, local_opp_away,
+            away_home, away_away, away_opp_home, away_opp_away)
+def get_head_2_head(df, local, away, seasons=None, league=None):
+    """Obtiene últimos 3 enfrentamientos directos"""
+    if seasons is None:
+        seasons = []
+    df_filtered = df[df['season'].isin(seasons)] if seasons else df
+    if league is not None:
+        df_filtered = df_filtered[df_filtered['league'] == league]
+    local_h2h = df_filtered[(df_filtered['team'] == local) & (df_filtered['opponent'] == away)]
+    away_h2h = df_filtered[(df_filtered['team'] == away) & (df_filtered['opponent'] == local)]
+    if len(local_h2h) < 4:
+        return local_h2h.tail(2), away_h2h.tail(2)
+    return local_h2h.tail(3), away_h2h.tail(3)
+def get_points_from_result(result):
+    """Convierte resultado (W/D/L) a puntos"""
+    if result == 'W':
+        return 3
+    elif result == 'D':
+        return 1
+    else:
+        return 0
+# ✅ NUEVA FUNCIÓN: Calcular PPP (Puntos Por Partido)
+def get_team_ppp(df, team, season, round_num, league=None):
+    """
+    Calcula puntos por partido (PPP) de un equipo
+    Args:
+        df: DataFrame completo
+        team: Nombre del equipo
+        season: Temporada
+        round_num: Número de jornada (NO incluye esta jornada)
+        league: Código de liga (opcional)
+    Returns:
+        float: Puntos por partido (0-3)
+    """
+    team_matches = df[
+        (df['team'] == team) &
+        (df['season'] == season) &
+        (df['round'] < round_num)
+    ]
+    if league is not None:
+        team_matches = team_matches[team_matches['league'] == league]
+    if len(team_matches) == 0:
+        return 0.0
+    total_points = team_matches['result'].apply(get_points_from_result).sum()
+    ppp = total_points / len(team_matches)
+    return ppp
+# ✅ NUEVA FUNCIÓN: Calcular diferencia de PPP
+def get_ppp_difference(df, local, away, season, round_num, league=None):
+    """
+    Calcula la diferencia de puntos por partido entre local y visitante
+    Args:
+        df: DataFrame completo
+        local: Equipo local
+        away: Equipo visitante
+        season: Temporada
+        round_num: Jornada actual
+        league: Código de liga (opcional)
+    Returns:
+        float: Diferencia de PPP (local - away)
+    """
+    local_ppp = get_team_ppp(df, local, season, round_num, league)
+    away_ppp = get_team_ppp(df, away, season, round_num, league)
+    return local_ppp - away_ppp
+def get_average(df, is_team=False, lst_avg=None):
+    """Calcula promedios de estadísticas"""
+    if len(df) == 0:
+        # Retornar valores por defecto si el DataFrame está vacío
+        if is_team:
+            return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+        return (0, 0, 0, 0, 0, 0, 0, 0)
+    if is_team:
+        # ===========================
+        # ESTADÍSTICAS BÁSICAS (NORMALIZADAS)
+        # ===========================
+        avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3]
+        avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4]
+        avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2]
+        avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1]
+        # ✅ CAMBIO: VARIANZA EN VEZ DE PROMEDIO DE CK
+        var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
+        avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8]
+        avg_poss = (df['Poss'].sum() / len(df)) - 50
+        avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5]
+        avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6]
+        # ===========================
+        # MÉTRICAS OFENSIVAS AVANZADAS
+        # ===========================
+        # Precisión de tiros
+        total_sh = df['Standard_Sh'].sum()
+        sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0
+        # Eficiencia xG por tiro
+        xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0
+        # Presencia atacante (% toques en área rival)
+        total_touches = df['Touches_Touches'].sum()
+        attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0
+        # Tiros por posesión
+        total_poss = df['Poss'].sum()
+        possession_shot = (total_sh / total_poss) if total_poss > 0 else 0
+        # Distancia promedio de tiros
+        standard_dist = df['Standard_Dist'].mean() if 'Standard_Dist' in df.columns else 0
+        # ===========================
+        # MÉTRICAS DE CREACIÓN
+        # ===========================
+        # Ratio de pases progresivos
+        total_passes = df['Total_Att'].sum()
+        progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0
+        # Participación en último tercio
+        final_third_passes = df['1/3'].sum()
+        final_third_involvement = (final_third_passes / total_passes) if total_passes > 0 else 0
+        # Ratio de pases largos
+        long_ball_ratio = (df['Long_Att'].sum() / total_passes) if total_passes > 0 else 0
+        # Asistencias por SCA
+        total_sca = df['SCA Types_SCA'].sum()
+        assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0
+        # Dependencia de centros
+        cross_dependency = (df['Performance_Crs'].sum() / total_passes) if total_passes > 0 else 0
+        # Eficiencia creativa
+        creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0
+        # ===========================
+        # MÉTRICAS DEFENSIVAS
+        # ===========================
+        # Intensidad de presión alta
+        total_tackles = df['Tackles_Tkl'].sum()
+        high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0
+        # Ratio intercepciones/tackles
+        interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0
+        # Ratio bloqueos/tackles
+        blocks_tackle = (df['Blocks_Blocks'].sum() / total_tackles) if total_tackles > 0 else 0
+        # Ratio de despejes
+        total_defensive_actions = total_tackles + df['Int'].sum()
+        clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0
+        # ===========================
+        # MÉTRICAS DE PORTERÍA
+        # ===========================
+        # Rendimiento del portero normalizado
+        avg_save_pct = df['Performance_Save%'].mean() if 'Performance_Save%' in df.columns else 0
+        avg_xg_against = df['Expected_xG'].mean() if len(df) > 0 else 1
+        performance_save = (avg_save_pct / (1 / avg_xg_against)) if avg_xg_against > 0 else 0
+        # ===========================
+        # MÉTRICAS DE POSESIÓN
+        # ===========================
+        # Ratio de conducciones progresivas
+        total_carries = df['Carries_Carries'].sum()
+        progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0
+        # Ratio de conducciones al área
+        penalty_carry_ratio = (df['Carries_CPA'].sum() / total_carries) if total_carries > 0 else 0
+        # Balance conducción/pase progresivo
+        total_prog_passes = df['PrgP'].sum()
+        carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0
+        # ===========================
+        # ÍNDICES COMPUESTOS
+        # ===========================
+        # Índice ofensivo
+        avg_gf_raw = df['GF'].mean()
+        avg_xg_raw = df['Expected_xG'].mean()
+        avg_sot = df['Standard_SoT'].mean()
+        avg_sh = df['Standard_Sh'].mean()
+        offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0
+        # Índice defensivo
+        avg_int = df['Int'].mean()
+        avg_tkl = df['Tackles_Tkl'].mean()
+        avg_clr = df['Clr'].mean()
+        defensive_index = avg_save_pct * (avg_int / (avg_tkl + avg_clr)) if (avg_tkl + avg_clr) > 0 else 0
+        # Índice de control de posesión
+        avg_touches_att = df['Touches_Att 3rd'].mean()
+        avg_carries_third = df['Carries_1/3'].mean() if 'Carries_1/3' in df.columns else 0
+        avg_touches_total = df['Touches_Touches'].mean()
+        possession_control_index = ((avg_touches_att + avg_carries_third) / avg_touches_total) if avg_touches_total > 0 else 0
+        # Índice de transición
+        avg_prgp = df['PrgP'].mean()
+        avg_prgc = df['Carries_PrgC'].mean()
+        avg_poss_raw = df['Poss'].mean()
+        transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0
+        # ✅ RETORNAR TODAS LAS MÉTRICAS (23 valores)
+        return (
+            avg_ck,
+            var_ck,  # 0 - ✅ CAMBIADO: varianza en vez de promedio
+            avg_xg,  # 1
+            avg_sca,  # 2
+            avg_cross,  # 3
+            avg_poss,  # 4
+            avg_att_3rd,  # 5
+            avg_gf,  # 6
+            avg_ga,  # 7
+            sh_accuracy,  # 8
+            xg_shot,  # 9
+            attacking_presence,  # 10
+            possession_shot,  # 11
+            progressive_pass_ratio,  # 12
+            final_third_involvement,  # 13
+            assist_sca,  # 14
+            creative_efficiency,  # 15
+            high_press_intensity,  # 16
+            interception_tackle,  # 17
+            clearance_ratio,  # 18
+            progressive_carry_ratio,  # 19
+            carry_pass_balance,  # 20
+            offensive_index,  # 21
+            transition_index  # 22
+        )
+    # ===========================
+    # PROMEDIOS DE LIGA (is_team=False)
+    # ===========================
+    avg_cross = df['Performance_Crs'].mean()
+    avg_att_3rd = df['Touches_Att 3rd'].mean()
+    avg_sca = df['SCA Types_SCA'].mean()
+    avg_xg = df['Expected_xG'].mean()
+    # ✅ CAMBIO: VARIANZA EN VEZ DE PROMEDIO DE CK
+    var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
+    avg_ck = df['Pass Types_CK'].mean()
+    avg_gf = df['GF'].mean()
+    avg_ga = df['GA'].mean()
+    # ✅ AGREGAR MÉTRICAS BÁSICAS PARA NORMALIZACIÓN
+    avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0
+    return (
+        var_ck,  # 0 - ✅ CAMBIADO
+        avg_xg,  # 1
+        avg_sca,  # 2
+        avg_cross,  # 3
+        avg_att_3rd,  # 4
+        avg_gf,  # 5
+        avg_ga,  # 6
+        avg_sh,  # 7 - NUEVO
+        avg_ck
+    )
+class PROCESS_DATA():
+    def __init__(self,use_one_hot_encoding):
+        self.USE_ONE_HOT_ENCODING = use_one_hot_encoding
+        self.init_variables()
+        self.load_clean_dataset()
+        self.process_all_matches()
+        self.clean_and_ouput_dataset()
+        # Excluir temporada 1718 si es necesario
+    def init_variables(self):
+        self.y = []
+        self.lst_data = []
+        self.lst_years = ["1819", "1920", "2021", "2122", "2223", "2324", "2425", "2526"]
+        # ✅ CONSTRUIR VECTOR DE FEATURES CON NOMBRES DESCRIPTIVOS
+        self.lst_base_advanced = [
+            "avg_ck","var_ck",  # ✅ CAMBIADO
+            "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga",
+            "sh_accuracy", "xg_shot", "attacking_presence", "possession_shot",
+            "progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency",
+            "high_press_intensity", "interception_tackle", "clearance_ratio",
+            "progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index"
+        ]
+        self.lst_base_original = [
+            "var_ck","xg", "sca", "cross", "poss", "att_3rd", "gf", "ga","avg_ck"
+        ]
+        print("Variables inicializadas")
+    def load_clean_dataset(self):
+        #load clean dataset generated on generate_dataset.py
+        self.df_dataset_historic = pd.read_csv("dataset/cleaned/dataset_cleaned.csv")
+        if os.path.exists(r"dataset/cleaned/dataset_cleaned_current_year.csv"):
+            self.df_dataset_current_year = pd.read_csv("dataset/cleaned/dataset_cleaned_current_year.csv")
+            self.df_dataset = pd.concat([self.df_dataset_historic,self.df_dataset_current_year])
+        else:
+            self.df_dataset = self.df_dataset_historic
+        self.df_dataset["season"] = self.df_dataset["season"].astype(str)
+        self.df_dataset["Performance_Save%"].fillna(0)
+        self.df_dataset_export = self.df_dataset.copy()
+        #filter data to get key elements on mathces
+        self.df_dataset_export = self.df_dataset_export.drop_duplicates(subset=["game", "league"])
+        self.df_dataset_export = self.df_dataset_export[["local", "away", "round", "season", "date", "league"]]
+        #load all unique matches on a list to process
+        self.lst_matches = self.df_dataset_export.values.tolist()
+        self.lst_matches = [row for row in self.lst_matches if row[3] != "1718"]
+        print("dataset loaded")
+    def process_all_matches(self):
+        for i in self.lst_matches:
+            if i[2] < 5:
+                continue
+            local = i[0]
+            away = i[1]
+            round_num = i[2]
+            season = i[3]
+            date = i[4]
+            league_code = i[5]
+            dic_df = {}
+            # Promedios de liga
+            lst_avg = get_average(
+                self.df_dataset[
+                    (self.df_dataset['season'] == season) &
+                    (self.df_dataset['round'] < round_num) &
+                    (self.df_dataset['league'] == league_code)
+                ],
+                is_team=False
+            )
+            # ✅ FUNCIÓN MEJORADA: Maneja métricas originales y avanzadas
+            def create_line(df, is_form=True, is_team=False, use_advanced=True):
+                """
+                Args:
+                    df: DataFrame con datos del equipo
+                    is_form: Si True, toma solo últimos 8 partidos
+                    is_team: Si True, normaliza contra promedios de liga
+                    use_advanced: Si True, incluye métricas avanzadas (23 valores)
+                                Si False, solo métricas originales (8 valores)
+                """
+                if is_form:
+                    df = df[-6:]
+                if use_advanced:
+                    # Retorna 23 valores (todas las métricas)
+                    return get_average(df, is_team, lst_avg)
+                else:
+                    # Retorna solo 8 valores originales
+                    result = get_average(df, is_team, lst_avg)
+                    return result[:9]  # Primeros 8 valores
+            # Extraer DataFrames
+            (team1_home, team1_away, team1_opp_home, team1_opp_away,
+            team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes(
+                self.df_dataset, season, round_num, local, away, league=league_code
+            )
+            # Corners reales
+            ck = get_ck(self.df_dataset, season, round_num, local, away, league=league_code)
+            self.y.append(ck)
+            # Head to Head
+            index = self.lst_years.index(season)
+            result = self.lst_years[:index+1]
+            team1_h2h, team2_h2h = get_head_2_head(
+                self.df_dataset, local, away, seasons=result, league=league_code
+            )
+            # ✅ PPP
+            local_ppp = get_team_ppp(self.df_dataset, local, season, round_num, league=league_code)
+            away_ppp = get_team_ppp(self.df_dataset, away, season, round_num, league=league_code)
+            ppp_diff = local_ppp - away_ppp
+            dic_df['ppp_local'] = (local_ppp,)
+            dic_df['ppp_away'] = (away_ppp,)
+            dic_df['ppp_difference'] = (ppp_diff,)
+            # ✅ FEATURES CON MÉTRICAS AVANZADAS (23 valores cada una)
+            dic_df['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True)
+            dic_df['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True)
+            dic_df['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True)
+            dic_df['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True)
+            dic_df['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True)
+            dic_df['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True)
+            dic_df['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True)
+            dic_df['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True)
+            dic_df['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True)
+            dic_df['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True)
+            # ✅ FEATURES CON MÉTRICAS ORIGINALES (8 valores) - SOLO PARA OPONENTES
+            dic_df['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False)
+            dic_df['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False)
+            # One-Hot Encoding
+            if self.USE_ONE_HOT_ENCODING:
+                league_dummies = {
+                    'league_ESP': 1 if league_code == 'ESP' else 0,
+                    'league_GER': 1 if league_code == 'GER' else 0,
+                    'league_FRA': 1 if league_code == 'FRA' else 0,
+                    'league_ITA': 1 if league_code == 'ITA' else 0,
+                    'league_NED': 1 if league_code == 'NED' else 0,
+                    'league_ENG': 1 if league_code == 'ENG' else 0,
+                    'league_POR': 1 if league_code == 'POR' else 0,
+                    'league_BEL': 1 if league_code == 'BEL' else 0
+                }
+                for key, value in league_dummies.items():
+                    dic_df[key] = (value,)
+            lst_features_values = []
+            self.lst_features_values = []
+            for key in dic_df:
+                lst_features_values.extend(list(dic_df[key]))
+                # Casos especiales
+                if key in ['ppp_local', 'ppp_away', 'ppp_difference']:
+                    self.lst_features_values.append(key)
+                elif key.startswith('league_'):
+                    self.lst_features_values.append(key)
+                elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']:
+                    # ✅ Métricas ORIGINALES (8 valores)
+                    self.lst_features_values.extend([f"{key}_{col}" for col in self.lst_base_original])
+                else:
+                    # ✅ Métricas AVANZADAS (23 valores)
+                    self.lst_features_values.extend([f"{key}_{col}" for col in self.lst_base_advanced])
+            self.lst_data.append(lst_features_values)
+        print("Dataset processed")
+    def clean_and_ouput_dataset(self):
+        self.df_data = pd.DataFrame(data=self.lst_data, columns=self.lst_features_values)
+        print(f"\n✅ PROCESAMIENTO COMPLETADO:")
+        print(f"   Shape inicial: {self.df_data.shape}")
+        print(f"   Total partidos: {len(self.df_data)}")
+        print(f"   Features totales: {self.df_data.shape[1]}")
+        # ===========================
+        # LIMPIEZA DE DATOS NULOS
+        # ===========================
+        print(f"\n🧹 LIMPIANDO DATOS NULOS...")
+        import numpy as np
+        nulos_antes_X = self.df_data.isnull().sum().sum()
+        nulos_antes_y = np.isnan(self.y).sum() if isinstance(self.y, np.ndarray) else sum(pd.isna(self.y))
+        print(f"   Nulos en X (antes): {nulos_antes_X}")
+        print(f"   Nulos en Y (antes): {nulos_antes_y}")
+        y_array = np.array(self.y).flatten()
+        mask_valid_X = ~self.df_data.isnull().any(axis=1)
+        mask_valid_y = ~np.isnan(y_array)
+        mask_combined = mask_valid_X & mask_valid_y
+        self.df_data = self.df_data[mask_combined].reset_index(drop=True)
+        y_array = y_array[mask_combined]
+        print(f"\n✅ LIMPIEZA COMPLETADA:")
+        print(f"   Nulos en X (después): {self.df_data.isnull().sum().sum()}")
+        print(f"   Nulos en Y (después): {np.isnan(y_array).sum()}")
+        print(f"   Filas eliminadas: {len(mask_combined) - mask_combined.sum()}")
+        print(f"   Shape final: {self.df_data.shape}")
+        # ===========================
+        # VERIFICACIÓN FINAL
+        # ===========================
+        print(f"\n🔍 VERIFICACIÓN DE NUEVAS FEATURES:")
+        print(f"   ✅ Features con 'var_ck': {len([c for c in self.df_data.columns if 'var_ck' in c])}")
+        print(f"   ✅ Features con métricas avanzadas: {len([c for c in self.df_data.columns if any(m in c for m in ['sh_accuracy', 'offensive_index'])])}")
+        print(f"   ✅ Features de oponentes (8 valores): {len([c for c in self.df_data.columns if 'opp' in c])}")
+        print("\n" + "=" * 80)
+        print("✅ PROCESO COMPLETADO - DATOS LISTOS PARA ENTRENAMIENTO")
+        print("=" * 80)
+        self.y = y_array.tolist()
+        self.df_data["y"] = self.y
+        self.df_data.to_csv("dataset\processed\dataset_processed.csv",index=False)
+        print("Dataset")
+#a = PROCESS_DATA(True)

src/utils/__init__.py ADDED Viewed

File without changes

src/utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (164 Bytes). View file

src/utils/__pycache__/helper.cpython-311.pyc ADDED Viewed

Binary file (1 kB). View file

src/utils/helper.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import pandas as pd
+import warnings
+import os
+def desactivar_advertencias():
+    warnings.filterwarnings('ignore')
+    # Ignorar warnings específicos de bibliotecas comunes
+    warnings.filterwarnings('ignore', category=DeprecationWarning)
+    warnings.filterwarnings('ignore', category=FutureWarning)
+    warnings.filterwarnings('ignore', category=UserWarning)
+    os.environ['PYTHONWARNINGS'] = 'ignore'
+    pd.options.mode.chained_assignment = None  # Desactivar SettingWithCopyWarning
+    print("Advertencias desactivadas...")

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,812 @@

+import streamlit as st
+import pandas as pd
+from datetime import datetime
+import requests
+import plotly.graph_objects as go
+import plotly.express as px
+import numpy as np
+from scipy import stats as scipy_stats
+from dotenv import load_dotenv
+import os
+load_dotenv()
+API_KEY = os.getenv("API_KEY")  # ⚠️ CÁMBIALA POR UNA SEGURA
+# --- CONFIGURACIÓN INICIAL ---
+st.set_page_config(layout="wide", page_title="Corners Forecast", page_icon="⚽")
+# 👈 AÑADIR MARGEN AL LAYOUT WIDE
+st.markdown("""
+    <style>
+        .block-container {
+            padding-left: 5rem;
+            padding-right: 5rem;
+            max-width: 1400px;
+            margin: 0 auto;
+        }
+    </style>
+""", unsafe_allow_html=True)
+# --- CONSTANTES DEL MODELO ---
+MSE_MODELO = 1.9
+RMSE_MODELO = 2.42
+R2_MODELO = 0.39
+N_SIMULACIONES = 5000  # 👈 REDUCIDO A 5000
+# --- FUNCIONES AUXILIARES ---
+def probabilidad_a_momio(probabilidad):
+    """Convierte probabilidad (%) a momio decimal"""
+    if probabilidad <= 0:
+        return 0
+    return round(100 / probabilidad, 2)
+def clasificar_valor_apuesta(momio_real, momio_modelo):
+    """Determina si hay valor en la apuesta"""
+    if momio_real > momio_modelo * 1.1:
+        return "🟢 EXCELENTE VALOR"
+    elif momio_real > momio_modelo:
+        return "🟡 BUEN VALOR"
+    else:
+        return "🔴 SIN VALOR"
+@st.cache_data(ttl=3600)  # 👈 CACHE 1 HORA
+def simular_lambda_montecarlo(lambda_pred, sigma=RMSE_MODELO, n_sims=N_SIMULACIONES):
+    """Genera simulaciones Monte Carlo con CACHE"""
+    lambdas = np.random.normal(lambda_pred, sigma, n_sims)
+    lambdas = np.maximum(lambdas, 0.1)
+    return lambdas
+@st.cache_data(ttl=3600)  # 👈 CACHE 1 HORA
+def calcular_probabilidades_con_incertidumbre(lambda_pred, linea, tipo='over', sigma=RMSE_MODELO, n_sims=N_SIMULACIONES):
+    """Calcula probabilidades con CACHE"""
+    lambdas_sim = simular_lambda_montecarlo(lambda_pred, sigma, n_sims)
+    probs = []
+    if tipo == 'over':
+        for lam in lambdas_sim:
+            prob = 1 - scipy_stats.poisson.cdf(int(linea), lam)
+            probs.append(prob * 100)
+    else:
+        for lam in lambdas_sim:
+            prob = scipy_stats.poisson.cdf(int(linea) - 1, lam)
+            probs.append(prob * 100)
+    probs = np.array(probs)
+    return {
+        'prob_media': np.mean(probs),
+        'prob_low': np.percentile(probs, 5),
+        'prob_high': np.percentile(probs, 95),
+        'prob_std': np.std(probs),
+        'distribucion': probs
+    }
+def calcular_expected_value(prob_media, momio_casa):
+    """Calcula Expected Value (EV)"""
+    prob_decimal = prob_media / 100
+    ev = (prob_decimal * momio_casa) - 1
+    return ev * 100
+def calcular_kelly_criterion(prob_media, momio_casa):
+    """Calcula Kelly Criterion"""
+    p = prob_media / 100
+    if momio_casa <= 1:
+        return 0
+    kelly = (p * momio_casa - 1) / (momio_casa - 1)
+    if kelly < 0:
+        return 0
+    return min(kelly, 0.25)
+def recomendar_apuesta_avanzada(prob_media, prob_low, prob_high, momio_casa):
+    """Sistema avanzado de recomendación"""
+    prob_casa = (1 / momio_casa) * 100
+    ev = calcular_expected_value(prob_media, momio_casa)
+    kelly = calcular_kelly_criterion(prob_media, momio_casa)
+    kelly_conservador = kelly * 0.25
+    ev_positivo = ev > 0
+    confianza_alta = prob_low > prob_casa
+    margen_seguridad = (prob_media - prob_casa) / prob_casa
+    if confianza_alta and ev > 5 and margen_seguridad > 0.1:
+        nivel = "EXCELENTE"
+        emoji = "🟢"
+        recomendar = True
+    elif confianza_alta and ev > 0:
+        nivel = "BUENA"
+        emoji = "🟡"
+        recomendar = True
+    elif ev > 0:
+        nivel = "MODERADA"
+        emoji = "🟠"
+        recomendar = False
+    else:
+        nivel = "MALA"
+        emoji = "🔴"
+        recomendar = False
+    return {
+        'recomendar': recomendar,
+        'nivel': nivel,
+        'emoji': emoji,
+        'ev': ev,
+        'kelly': kelly * 100,
+        'kelly_conservador': kelly_conservador * 100,
+        'prob_casa': prob_casa,
+        'prob_media': prob_media,
+        'prob_low': prob_low,
+        'prob_high': prob_high,
+        'margen_seguridad': margen_seguridad * 100,
+        'ev_positivo': ev_positivo,
+        'confianza_alta': confianza_alta
+    }
+# --- DICCIONARIO DE LIGAS ---
+LEAGUES_DICT = {
+    "Ligue 1": "FRA",
+    "La Liga": "ESP",
+    "Premier League": "ENG",
+    "Eredivisie": "NED",
+    "Liga NOS": "POR",
+    "Pro League": "BEL",
+    "Bundesliga": "GER",
+    "Serie A": "ITA"
+}
+# --- HEADER ---
+st.markdown("<h1 style='text-align: center;'>Corners Forecast</h1>", unsafe_allow_html=True)
+# --- CARGAR DATOS ---
+@st.cache_data  # 👈 CACHE PERMANENTE
+def cargar_datos():
+    df = pd.read_csv(r"https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/dataset/cleaned/dataset_cleaned.csv")
+    return df[['local','league']].drop_duplicates()
+df = cargar_datos()
+# --- INICIALIZAR SESSION STATE ---
+if 'prediccion_realizada' not in st.session_state:
+    st.session_state.prediccion_realizada = False
+if 'resultado_api' not in st.session_state:
+    st.session_state.resultado_api = None
+st.markdown("")
+# --- SELECCIÓN DE PARÁMETROS ---
+col1, col2, col3 = st.columns([1, 1, 1])
+with col2:
+    option = st.selectbox(
+        "🏆 Liga",
+        ["La Liga", "Premier League", "Ligue 1", "Serie A", "Eredivisie", "Liga NOS", "Pro League", "Bundesliga"],
+        index=None,
+        placeholder="Selecciona liga",
+    )
+st.write("")
+col_jornada1, col_jornada2, col_jornada3, col_jornada4 = st.columns([2, 1, 1, 2])
+with col_jornada2:
+    if option:
+        jornada = st.number_input("📅 Jornada", min_value=5, max_value=42, value=15, step=1)
+with col_jornada3:
+    if option:
+        temporada = st.selectbox(
+            "Temporada",
+            [2526, 2425, 2324, 2223, 2122],
+            index=0
+        )
+st.write("")
+cl2, cl3, cl4 = st.columns([ 4, 1, 4])
+with cl2:
+    if option:
+        if jornada:
+            option_local = st.selectbox(
+                "🏠 Equipo Local",
+                list(df["local"][df["league"] == LEAGUES_DICT[option]]),
+                index=None,
+                placeholder="Equipo local",
+            )
+with cl3:
+    if option:
+        st.write("")
+        st.write("")
+        st.markdown("<h3 style='text-align: center'>VS</h3>", unsafe_allow_html=True)
+with cl4:
+    if option:
+        if jornada:
+            option_away = st.selectbox(
+                "✈️ Equipo Visitante",
+                list(df["local"][df["league"] == LEAGUES_DICT[option]]),
+                index=None,
+                placeholder="Equipo visitante",
+            )
+# --- BOTÓN PARA GENERAR PREDICCIÓN ---
+if option and option_local and option_away:
+    st.markdown("---")
+    col_btn1, col_btn2, col_btn3 = st.columns([1, 1, 1])
+    with col_btn2:
+        # 👈 BOTÓN PARA EJECUTAR PREDICCIÓN
+        if st.button("Generar Predicción", type="secondary", use_container_width=True):
+            st.session_state.prediccion_realizada = True
+            st.session_state.resultado_api = None  # Reset resultado
+    st.write("")
+    st.write("")
+# --- REALIZAR PREDICCIÓN (SOLO SI SE PRESIONÓ EL BOTÓN) ---
+if option and option_local and option_away and st.session_state.prediccion_realizada:
+    # Si no hay resultado en cache, hacer petición
+    if st.session_state.resultado_api is None:
+        with st.spinner('🔮 Generando predicción con análisis de incertidumbre...'):
+            url = "https://daniel-saed-futbol-corners-forecast-api.hf.space/items/"
+            #url = "http://localhost:7860//items/"
+            headers = {"X-API-Key": API_KEY}
+            params = {
+                "local": option_local,
+                "visitante": option_away,
+                "jornada": jornada,
+                "league_code": LEAGUES_DICT[option],
+                "temporada": str(temporada)
+            }
+            try:
+                response = requests.get(url, headers=headers, params=params, timeout=30)
+                if response.status_code == 200:
+                    st.session_state.resultado_api = response.json()  # 👈 GUARDAR EN SESSION
+                    st.success("✅ Predicción generada")
+                elif response.status_code == 401:
+                    st.error("❌ Error de Autenticación - API Key inválida")
+                    st.stop()
+                elif response.status_code == 400:
+                    st.error(f"❌ Error: {response.json().get('detail', 'Parámetros inválidos')}")
+                    st.stop()
+                else:
+                    st.error(f"❌ Error {response.status_code}")
+                    st.stop()
+            except requests.exceptions.Timeout:
+                st.error("⏱️ Timeout - Intenta de nuevo")
+                st.stop()
+            except requests.exceptions.ConnectionError:
+                st.error("🌐 Error de conexión")
+                st.stop()
+            except Exception as e:
+                st.error(f"❌ Error: {str(e)}")
+                import traceback
+                st.code(traceback.format_exc())
+                st.stop()
+    # --- MOSTRAR RESULTADOS (DESDE SESSION STATE) ---
+    if st.session_state.resultado_api:
+        resultado = st.session_state.resultado_api
+        lambda_pred = resultado['prediccion']
+        st.write("")
+        st.write("")
+        # ============================================
+        # 1. PREDICCIÓN PRINCIPAL
+        # ============================================
+        lambda_low = max(0, lambda_pred - 1.96 * RMSE_MODELO)
+        lambda_high = lambda_pred + 1.96 * RMSE_MODELO
+        st.markdown("## 🎯 Predicción de Corners")
+        st.write("")
+        # Métricas principales con Streamlit nativo
+        col_pred1, col_pred2, col_pred3 = st.columns(3)
+        with col_pred1:
+            st.metric(
+                label="Corners Esperados",
+                value=f"{lambda_pred:.1f}",
+                help="Valor esperado (λ) del modelo"
+            )
+        with col_pred2:
+            st.metric(
+                label="Límite Inferior",
+                value=f"{lambda_low:.1f}",
+                delta=f"{lambda_low - lambda_pred:.1f}",
+                help="Intervalo de confianza 95% (inferior)"
+            )
+        with col_pred3:
+            st.metric(
+                label="Límite Superior",
+                value=f"{lambda_high:.1f}",
+                delta=f"{lambda_high - lambda_pred:.1f}",
+                help="Intervalo de confianza 95% (superior)"
+            )
+        st.write("")
+        st.write("")
+        st.write("")
+        st.markdown("---")
+        st.write("")
+        st.write("")
+        # ============================================
+        # 2. ANÁLISIS DE EQUIPOS (CON TABLAS)
+        # ============================================
+        stats_data = resultado['stats']
+        local_ck = stats_data['local_ck']
+        away_ck = stats_data['away_ck']
+        local_ck_received = stats_data['local_ck_received']
+        away_ck_received = stats_data['away_ck_received']
+        h2h_total = stats_data['h2h_total']
+        partido_esperado = stats_data['partido_esperado']
+        riesgo = resultado['riesgo']
+        # 👈 TABLA DE CORNERS GENERADOS Y CONCEDIDOS
+        st.markdown("### Análisis de Corners")
+        df_corners = pd.DataFrame({
+            'Métrica': ['Corners Generados ⚽', 'Corners Concedidos 🛡️', 'Head to Head'],
+            f'🏠 {option_local}': [f'{local_ck:.2f}', f'{local_ck_received:.2f}','---'],
+            f'✈️ {option_away}': [f'{away_ck:.2f}', f'{away_ck_received:.2f}','---'],
+            '🎯 Total': [
+                f'{(local_ck + away_ck):.2f}',
+                f'{(local_ck_received + away_ck_received):.2f}',
+                f"{h2h_total:.2f}"
+            ]
+        })
+        st.dataframe(
+            df_corners,
+            hide_index=True,
+            use_container_width=True,
+            column_config={
+                'Métrica': st.column_config.TextColumn('📊 Métrica', width='medium'),
+                f'🏠 {option_local}': st.column_config.TextColumn(f'🏠 {option_local}', width='medium'),
+                f'✈️ {option_away}': st.column_config.TextColumn(f'✈️ {option_away}', width='medium'),
+                '🎯 Total': st.column_config.TextColumn('🎯 Total', width='medium')
+            }
+        )
+        st.write("")
+        st.write("")
+        # --- FIABILIDAD ---
+        st.markdown("### Fiabilidad")
+        col_fiab1, col_fiab2, col_fiab3 = st.columns(3)
+        with col_fiab1:
+            st.markdown(f"**🏠 {option_local}**")
+            st.write(f"**Score:** {riesgo['score_local']:.0f}/100")
+            st.write(f"**Nivel:** {riesgo['nivel_local']}")
+            st.write(f"**CV:** {riesgo['cv_local']:.1f}%")
+            st.progress(riesgo['score_local'] / 100)
+        with col_fiab2:
+            st.markdown("**📊 Fiabilidad Global**")
+            score_promedio = riesgo['score_promedio']
+            st.write(f"**Score:** {score_promedio:.0f}/100")
+            st.write("")
+            if score_promedio >= 65:
+                st.success("🟢 Fiabilidad MUY ALTA")
+            elif score_promedio >= 50:
+                st.info("🟡 Fiabilidad ALTA")
+            elif score_promedio >= 35:
+                st.warning("🟠 Fiabilidad MEDIA")
+            else:
+                st.error("🔴 Fiabilidad BAJA")
+        with col_fiab3:
+            st.markdown(f"**✈️ {option_away}**")
+            st.write(f"**Score:** {riesgo['score_away']:.0f}/100")
+            st.write(f"**Nivel:** {riesgo['nivel_away']}")
+            st.write(f"**CV:** {riesgo['cv_away']:.1f}%")
+            st.progress(riesgo['score_away'] / 100)
+        st.write("")
+        st.write("")
+        st.markdown("---")
+        st.write("")
+        st.write("")
+        # ============================================
+        # 3. PROBABILIDADES CON MONTE CARLO
+        # ============================================
+        st.info(f"🔬 **Análisis con {N_SIMULACIONES:,} simulaciones Monte Carlo** considerando RMSE={RMSE_MODELO}")
+        tab_over, tab_under = st.tabs(["⬆️ OVER", "⬇️ UNDER"])
+        # TAB OVER
+        with tab_over:
+            probs_over = resultado['probabilidades_over']
+            st.markdown("### 📈 Probabilidades Over (con Intervalos de Confianza 90%)")
+            df_over_incertidumbre = []
+            with st.spinner('Calculando incertidumbres Over...'):
+                for linea_str in sorted(probs_over.keys(), key=float, reverse=True):
+                    linea = float(linea_str)
+                    resultado_inc = calcular_probabilidades_con_incertidumbre(
+                        lambda_pred, linea, tipo='over'
+                    )
+                    prob_media = resultado_inc['prob_media']
+                    prob_low = resultado_inc['prob_low']
+                    prob_high = resultado_inc['prob_high']
+                    momio_medio = probabilidad_a_momio(prob_media)
+                    momio_low = probabilidad_a_momio(prob_high)
+                    momio_high = probabilidad_a_momio(prob_low)
+                    df_over_incertidumbre.append({
+                        'Línea': f"Over {linea_str}",
+                        'Prob. Media': f"{prob_media:.1f}%",
+                        'IC 90%': f"[{prob_low:.1f}%, {prob_high:.1f}%]",
+                        'Momio Justo': f"@{momio_medio:.2f}",
+                        'Rango Momio': f"[@{momio_low:.2f} - @{momio_high:.2f}]",
+                        'linea_num': linea,
+                        'prob_media_raw': prob_media,
+                        'prob_low_raw': prob_low,
+                        'prob_high_raw': prob_high,
+                        'tipo': 'Over'
+                    })
+            df_over_display = pd.DataFrame(df_over_incertidumbre)
+            st.dataframe(
+                df_over_display[['Línea', 'Prob. Media', 'Momio Justo']],
+                hide_index=True,
+                use_container_width=True,
+                column_config={
+                    'Línea': st.column_config.TextColumn('🎯 Línea', width='small'),
+                    'Prob. Media': st.column_config.TextColumn('📊 Probabilidad', width='small'),
+                    'Momio Justo': st.column_config.TextColumn('💰 Momio', width='small'),
+                }
+            )
+            st.write("")
+            # Gráfico
+            fig_over = go.Figure()
+            lineas_sorted = sorted([x['linea_num'] for x in df_over_incertidumbre])
+            probs_medias = [x['prob_media_raw'] for x in sorted(df_over_incertidumbre, key=lambda x: x['linea_num'])]
+            probs_low = [x['prob_low_raw'] for x in sorted(df_over_incertidumbre, key=lambda x: x['linea_num'])]
+            probs_high = [x['prob_high_raw'] for x in sorted(df_over_incertidumbre, key=lambda x: x['linea_num'])]
+            fig_over.add_trace(go.Scatter(
+                x=[f"Over {l}" for l in lineas_sorted] + [f"Over {l}" for l in lineas_sorted[::-1]],
+                y=probs_high + probs_low[::-1],
+                fill='toself',
+                fillcolor='rgba(46, 204, 113, 0.2)',
+                line=dict(color='rgba(255,255,255,0)'),
+                showlegend=True,
+                name='IC 90%',
+                hoverinfo='skip'
+            ))
+            fig_over.add_trace(go.Scatter(
+                x=[f"Over {l}" for l in lineas_sorted],
+                y=probs_medias,
+                mode='lines+markers',
+                name='Probabilidad Media',
+                line=dict(color='#2ecc71', width=3),
+                marker=dict(size=10)
+            ))
+            fig_over.update_layout(
+                title="Probabilidades Over con Banda de Incertidumbre (Monte Carlo)",
+                xaxis_title="Línea",
+                yaxis_title="Probabilidad (%)",
+                height=500,
+                hovermode='x unified'
+            )
+            st.plotly_chart(fig_over, use_container_width=True)
+        # TAB UNDER
+        with tab_under:
+            probs_under = resultado['probabilidades_under']
+            st.markdown("### 📉 Probabilidades Under (con Intervalos de Confianza 90%)")
+            df_under_incertidumbre = []
+            with st.spinner('Calculando incertidumbres Under...'):
+                for linea_str in sorted(probs_under.keys(), key=float, reverse=True):
+                    linea = float(linea_str)
+                    resultado_inc = calcular_probabilidades_con_incertidumbre(
+                        lambda_pred, linea, tipo='under'
+                    )
+                    prob_media = resultado_inc['prob_media']
+                    prob_low = resultado_inc['prob_low']
+                    prob_high = resultado_inc['prob_high']
+                    momio_medio = probabilidad_a_momio(prob_media)
+                    momio_low = probabilidad_a_momio(prob_high)
+                    momio_high = probabilidad_a_momio(prob_low)
+                    df_under_incertidumbre.append({
+                        'Línea': f"Under {linea_str}",
+                        'Prob. Media': f"{prob_media:.1f}%",
+                        'IC 90%': f"[{prob_low:.1f}%, {prob_high:.1f}%]",
+                        'Momio Justo': f"@{momio_medio:.2f}",
+                        'Rango Momio': f"[@{momio_low:.2f} - @{momio_high:.2f}]",
+                        'linea_num': linea,
+                        'prob_media_raw': prob_media,
+                        'prob_low_raw': prob_low,
+                        'prob_high_raw': prob_high,
+                        'tipo': 'Under'
+                    })
+            df_under_display = pd.DataFrame(df_under_incertidumbre)
+            st.dataframe(
+                df_under_display[['Línea', 'Prob. Media', 'IC 90%', 'Momio Justo', 'Rango Momio']],
+                hide_index=True,
+                use_container_width=True,
+                column_config={
+                    'Línea': st.column_config.TextColumn('🎯 Línea', width='small'),
+                    'Prob. Media': st.column_config.TextColumn('📊 Probabilidad', width='small'),
+                    'IC 90%': st.column_config.TextColumn('📉 Intervalo 90%', width='medium'),
+                    'Momio Justo': st.column_config.TextColumn('💰 Momio', width='small'),
+                    'Rango Momio': st.column_config.TextColumn('📈 Rango Momios', width='medium')
+                }
+            )
+            st.write("")
+            # Gráfico
+            fig_under = go.Figure()
+            lineas_sorted_under = sorted([x['linea_num'] for x in df_under_incertidumbre])
+            probs_medias_under = [x['prob_media_raw'] for x in sorted(df_under_incertidumbre, key=lambda x: x['linea_num'])]
+            probs_low_under = [x['prob_low_raw'] for x in sorted(df_under_incertidumbre, key=lambda x: x['linea_num'])]
+            probs_high_under = [x['prob_high_raw'] for x in sorted(df_under_incertidumbre, key=lambda x: x['linea_num'])]
+            fig_under.add_trace(go.Scatter(
+                x=[f"Under {l}" for l in lineas_sorted_under] + [f"Under {l}" for l in lineas_sorted_under[::-1]],
+                y=probs_high_under + probs_low_under[::-1],
+                fill='toself',
+                fillcolor='rgba(231, 76, 60, 0.2)',
+                line=dict(color='rgba(255,255,255,0)'),
+                showlegend=True,
+                name='IC 90%',
+                hoverinfo='skip'
+            ))
+            fig_under.add_trace(go.Scatter(
+                x=[f"Under {l}" for l in lineas_sorted_under],
+                y=probs_medias_under,
+                mode='lines+markers',
+                name='Probabilidad Media',
+                line=dict(color='#e74c3c', width=3),
+                marker=dict(size=10)
+            ))
+            fig_under.update_layout(
+                title="Probabilidades Under con Banda de Incertidumbre (Monte Carlo)",
+                xaxis_title="Línea",
+                yaxis_title="Probabilidad (%)",
+                height=500,
+                hovermode='x unified'
+            )
+            st.plotly_chart(fig_under, use_container_width=True)
+        st.write("")
+        st.write("")
+        st.markdown("---")
+        st.write("")
+        st.write("")
+        # ============================================
+        # 4. CALCULADORA AVANZADA
+        # ============================================
+        st.markdown("## 💰 Calculadora de Valor")
+        st.write("")
+        # Combinar datos
+        todas_lineas_datos = {}
+        for item in df_over_incertidumbre:
+            todas_lineas_datos[item['Línea']] = item
+        for item in df_under_incertidumbre:
+            todas_lineas_datos[item['Línea']] = item
+        todas_lineas_ordenadas = sorted(
+            todas_lineas_datos.keys(),
+            key=lambda x: (0 if 'Over' in x else 1, float(x.split()[1])),
+            reverse=True
+        )
+        col_calc1, col_calc2 = st.columns(2)
+        with col_calc1:
+            linea_calc = st.selectbox(
+                "🎯 Selecciona línea",
+                todas_lineas_ordenadas,
+                key="calc_linea"
+            )
+        with col_calc2:
+            momio_casa = st.number_input(
+                "💰 Momio del casino",
+                min_value=1.01,
+                max_value=20.0,
+                value=2.0,
+                step=0.01,
+                key="calc_momio",
+                help="Ingresa el momio decimal que ofrece la casa de apuestas"
+            )
+        st.write("")
+        datos_linea = todas_lineas_datos[linea_calc]
+        prob_media = datos_linea['prob_media_raw']
+        prob_low = datos_linea['prob_low_raw']
+        prob_high = datos_linea['prob_high_raw']
+        recomendacion = recomendar_apuesta_avanzada(
+            prob_media, prob_low, prob_high, momio_casa
+        )
+        st.markdown("### 📊 Métricas de la Apuesta")
+        col_m1, col_m2, col_m3, col_m4 = st.columns(4)
+        with col_m1:
+            st.metric(
+                "Prob. Media",
+                f"{prob_media:.1f}%",
+                help="Probabilidad media según Monte Carlo"
+            )
+        with col_m2:
+            momio_justo = probabilidad_a_momio(prob_media)
+            st.metric(
+                "Momio Justo",
+                f"@{momio_justo:.2f}",
+                help="Momio que refleja la probabilidad real"
+            )
+        with col_m3:
+            delta_ev = "📈 Positivo" if recomendacion['ev'] > 0 else "📉 Negativo"
+            st.metric(
+                "Expected Value",
+                f"{recomendacion['ev']:+.2f}%",
+                delta=delta_ev,
+                help="Ganancia esperada por cada $1 apostado"
+            )
+        with col_m4:
+            st.metric(
+                "Prob. Casino",
+                f"{recomendacion['prob_casa']:.1f}%",
+                help="Probabilidad implícita del momio del casino"
+            )
+        st.write("")
+        st.write("")
+        st.markdown("### 💵 Gestión de Bankroll (Kelly Criterion)")
+        col_kelly1, col_kelly2 = st.columns(2)
+        with col_kelly1:
+            if recomendacion['kelly'] > 0:
+                st.write(f"**Kelly Completo:** {recomendacion['kelly']:.2f}% del bankroll")
+                st.write(f"**Kelly Conservador (1/4):** {recomendacion['kelly_conservador']:.2f}% del bankroll ⭐")
+                st.write("")
+                st.markdown("**Ejemplo con Bankroll de $1,000:**")
+                apuesta_kelly = (recomendacion['kelly'] / 100) * 1000
+                apuesta_conservador = (recomendacion['kelly_conservador'] / 100) * 1000
+                st.write(f"- Kelly Completo: **${apuesta_kelly:.2f}**")
+                st.write(f"- Conservador: **${apuesta_conservador:.2f}**")
+                ganancia_potencial = apuesta_conservador * (momio_casa - 1)
+                st.write(f"- Ganancia potencial: **${ganancia_potencial:.2f}**")
+            else:
+                st.error("❌ Kelly = 0 - No apostar")
+        with col_kelly2:
+            st.write(f"**EV:** {recomendacion['ev']:+.2f}%")
+            st.write(f"**Margen de Seguridad:** {recomendacion['margen_seguridad']:+.1f}%")
+            st.write(f"**IC 90%:** [{prob_low:.1f}%, {prob_high:.1f}%]")
+            st.write("")
+            if recomendacion['confianza_alta']:
+                st.success("✅ Alta confianza: IC inferior supera prob. casino")
+            else:
+                st.warning("⚠️ Baja confianza: IC inferior NO supera prob. casino")
+            if recomendacion['ev'] > 10:
+                st.success("🟢 EV excelente (>10%)")
+            elif recomendacion['ev'] > 5:
+                st.info("🟡 EV bueno (5-10%)")
+            elif recomendacion['ev'] > 0:
+                st.warning("🟠 EV positivo pero bajo (<5%)")
+            else:
+                st.error("🔴 EV negativo")
+        # Footer
+        st.write("")
+        st.write("")
+        st.markdown("---")
+        st.caption(f"🤖 XGBoost v4.2 + Monte Carlo | 🎲 {N_SIMULACIONES:,} simulaciones | 📊 RMSE: {RMSE_MODELO} | ⏰ {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+else:
+    if option:
+        if option_local and option_away:
+            pass  # Esperando botón
+        else:
+            st.info("👆 Selecciona ambos equipos")
+    else:
+        st.info("👆 Selecciona una liga para comenzar")
+# Sidebar
+with st.sidebar:
+    st.markdown("## Corners Forecast")
+    st.markdown("---")
+    st.markdown("### 🔗 Enlaces")
+    st.markdown("""
+    [![GitHub](https://img.shields.io/badge/GitHub-Repository-181717?style=flat&logo=github)](https://github.com/danielsaed/futbol_corners_forecast)
+    [![Hugging Face](https://img.shields.io/badge/🤗_Hugging_Face-API-FFD21E?style=flat)](https://huggingface.co/spaces/daniel-saed/futbol-corners-forecast-api)
+    """)
+    st.markdown("---")
+    st.markdown("### Ligas")
+    for league in LEAGUES_DICT.keys():
+        st.write(f"• {league}")
+    # 👈 BOTÓN PARA LIMPIAR CACHE
+    if st.button("🗑️ Limpiar Cache", use_container_width=True):
+        st.cache_data.clear()
+        st.session_state.prediccion_realizada = False
+        st.session_state.resultado_api = None
+        st.success("✅ Cache limpiado")
+        st.rerun()
+    st.markdown("---")