Spaces:
Running
Running
Upload 21 files
Browse files- Dockerfile_STREAMLIT +19 -0
- requirements.txt +32 -0
- src/api/__init__.py +0 -0
- src/api/__pycache__/__init__.cpython-311.pyc +0 -0
- src/api/__pycache__/api.cpython-311.pyc +0 -0
- src/api/__pycache__/load.cpython-311.pyc +0 -0
- src/api/api.py +191 -0
- src/api/load.py +1208 -0
- src/models/__init__.py +0 -0
- src/models/test_model.py +1148 -0
- src/models/train_model.py +425 -0
- src/process_data/__init__.py +0 -0
- src/process_data/__pycache__/__init__.cpython-311.pyc +0 -0
- src/process_data/__pycache__/process_dataset.cpython-311.pyc +0 -0
- src/process_data/generate_dataset.py +211 -0
- src/process_data/process_dataset.py +584 -0
- src/utils/__init__.py +0 -0
- src/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- src/utils/__pycache__/helper.cpython-311.pyc +0 -0
- src/utils/helper.py +18 -0
- streamlit_app.py +812 -0
Dockerfile_STREAMLIT
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
# Establecer directorio de trabajo
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Copiar requirements
|
| 7 |
+
COPY requirements.txt .
|
| 8 |
+
|
| 9 |
+
# Instalar dependencias
|
| 10 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 11 |
+
|
| 12 |
+
# Copiar todo el código
|
| 13 |
+
COPY . .
|
| 14 |
+
|
| 15 |
+
# Exponer el puerto que usa Hugging Face Spaces
|
| 16 |
+
EXPOSE 7860
|
| 17 |
+
|
| 18 |
+
# ✅ COMANDO PARA FASTAPI EN HUGGING FACE SPACES
|
| 19 |
+
CMD ["uvicorn", "src.api.api:app", "--host", "0.0.0.0", "--port", "7860"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core Data Processing
|
| 2 |
+
pandas>=2.0.0
|
| 3 |
+
numpy>=1.24.0
|
| 4 |
+
|
| 5 |
+
# Machine Learning
|
| 6 |
+
scikit-learn>=1.3.0
|
| 7 |
+
xgboost>=2.0.0
|
| 8 |
+
|
| 9 |
+
# Statistics
|
| 10 |
+
scipy>=1.11.0
|
| 11 |
+
|
| 12 |
+
# Data Collection
|
| 13 |
+
soccerdata>=1.4.0
|
| 14 |
+
|
| 15 |
+
# Experiment Tracking & Model Management
|
| 16 |
+
mlflow>=2.8.0
|
| 17 |
+
|
| 18 |
+
# Model Persistence
|
| 19 |
+
joblib>=1.3.0
|
| 20 |
+
|
| 21 |
+
fastapi>=0.115.4
|
| 22 |
+
|
| 23 |
+
# Security
|
| 24 |
+
python-dotenv>=1.0.0
|
| 25 |
+
|
| 26 |
+
# Model
|
| 27 |
+
joblib>=1.3.0
|
| 28 |
+
|
| 29 |
+
streamlit>=1.28.0
|
| 30 |
+
|
| 31 |
+
plotly
|
| 32 |
+
requests
|
src/api/__init__.py
ADDED
|
File without changes
|
src/api/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (162 Bytes). View file
|
|
|
src/api/__pycache__/api.cpython-311.pyc
ADDED
|
Binary file (7.16 kB). View file
|
|
|
src/api/__pycache__/load.cpython-311.pyc
ADDED
|
Binary file (48.8 kB). View file
|
|
|
src/api/api.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ===========================
|
| 2 |
+
# SISTEMA DE PREDICCIÓN DE CORNERS - OPTIMIZADO PARA APUESTAS (VERSIÓN COMPLETA)
|
| 3 |
+
# ===========================
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import os
|
| 8 |
+
from fastapi.responses import JSONResponse
|
| 9 |
+
from fastapi import Depends, FastAPI, HTTPException
|
| 10 |
+
from fastapi.security.api_key import APIKeyHeader
|
| 11 |
+
from fastapi import Security
|
| 12 |
+
from fastapi.responses import JSONResponse
|
| 13 |
+
from dotenv import load_dotenv
|
| 14 |
+
from src.api.load import USE_MODEL
|
| 15 |
+
#from load import USE_MODEL
|
| 16 |
+
|
| 17 |
+
load_dotenv()
|
| 18 |
+
|
| 19 |
+
model = USE_MODEL()
|
| 20 |
+
|
| 21 |
+
app = FastAPI()
|
| 22 |
+
|
| 23 |
+
# ===========================
|
| 24 |
+
# CONFIGURACIÓN API KEY
|
| 25 |
+
# ===========================
|
| 26 |
+
|
| 27 |
+
API_KEY = os.getenv("API_KEY") # ⚠️ CÁMBIALA POR UNA SEGURA
|
| 28 |
+
api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
|
| 29 |
+
|
| 30 |
+
async def get_api_key(api_key: str = Security(api_key_header)):
|
| 31 |
+
"""Validar API Key"""
|
| 32 |
+
if api_key != API_KEY:
|
| 33 |
+
raise HTTPException(
|
| 34 |
+
status_code=401,
|
| 35 |
+
detail="API Key inválida o faltante"
|
| 36 |
+
)
|
| 37 |
+
return api_key
|
| 38 |
+
|
| 39 |
+
# ===========================
|
| 40 |
+
# HELPER: CONVERTIR NUMPY/PANDAS A TIPOS NATIVOS
|
| 41 |
+
# ===========================
|
| 42 |
+
def convert_to_native(val):
|
| 43 |
+
"""Convierte tipos NumPy/Pandas a tipos nativos de Python"""
|
| 44 |
+
if isinstance(val, (np.integer, np.int64, np.int32, np.int16, np.int8)):
|
| 45 |
+
return int(val)
|
| 46 |
+
elif isinstance(val, (np.floating, np.float64, np.float32, np.float16)):
|
| 47 |
+
return float(val)
|
| 48 |
+
elif isinstance(val, np.ndarray):
|
| 49 |
+
return [convert_to_native(item) for item in val.tolist()]
|
| 50 |
+
elif isinstance(val, dict):
|
| 51 |
+
return {key: convert_to_native(value) for key, value in val.items()}
|
| 52 |
+
elif isinstance(val, (list, tuple)):
|
| 53 |
+
return [convert_to_native(item) for item in val]
|
| 54 |
+
elif isinstance(val, pd.Series):
|
| 55 |
+
return convert_to_native(val.to_dict())
|
| 56 |
+
elif isinstance(val, pd.DataFrame):
|
| 57 |
+
return convert_to_native(val.to_dict(orient='records'))
|
| 58 |
+
elif pd.isna(val):
|
| 59 |
+
return None
|
| 60 |
+
else:
|
| 61 |
+
return val
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# ===========================
|
| 67 |
+
# ENDPOINTS
|
| 68 |
+
# ===========================
|
| 69 |
+
|
| 70 |
+
@app.get("/")
|
| 71 |
+
def read_root():
|
| 72 |
+
"""Endpoint raíz con información de la API"""
|
| 73 |
+
return {
|
| 74 |
+
"api": "Corners Prediction API",
|
| 75 |
+
"version": "1.0.0",
|
| 76 |
+
"status": "active",
|
| 77 |
+
"endpoints": {
|
| 78 |
+
"/": "Información de la API",
|
| 79 |
+
"/items/": "Predicción de corners (requiere API Key)",
|
| 80 |
+
"/health": "Estado de salud"
|
| 81 |
+
},
|
| 82 |
+
"auth": "Requiere header: X-API-Key"
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
@app.get("/items/")
|
| 88 |
+
def predict_corners(
|
| 89 |
+
local: str,
|
| 90 |
+
visitante: str,
|
| 91 |
+
jornada: int,
|
| 92 |
+
league_code: str,
|
| 93 |
+
temporada: str = "2526",
|
| 94 |
+
api_key: str = Depends(get_api_key) # ✅ PROTEGIDO
|
| 95 |
+
):
|
| 96 |
+
"""
|
| 97 |
+
Predecir corners para un partido de fútbol
|
| 98 |
+
|
| 99 |
+
Args:
|
| 100 |
+
local: Nombre del equipo local (requerido)
|
| 101 |
+
visitante: Nombre del equipo visitante (requerido)
|
| 102 |
+
jornada: Número de jornada (requerido, min: 1)
|
| 103 |
+
league_code: Código de liga (requerido: ESP, GER, FRA, ITA, ENG, NED, POR, BEL)
|
| 104 |
+
temporada: Temporada en formato AABB (default: "2526")
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
JSON con predicción y análisis completo
|
| 108 |
+
|
| 109 |
+
Example:
|
| 110 |
+
GET /items/?local=Barcelona&visitante=Real%20Madrid&jornada=15&league_code=ESP&temporada=2526
|
| 111 |
+
Headers: X-API-Key: tu-clave-secreta-aqui
|
| 112 |
+
"""
|
| 113 |
+
|
| 114 |
+
# ===========================
|
| 115 |
+
# VALIDACIONES
|
| 116 |
+
# ===========================
|
| 117 |
+
|
| 118 |
+
# Validar campos obligatorios
|
| 119 |
+
if not local or not visitante:
|
| 120 |
+
raise HTTPException(
|
| 121 |
+
status_code=400,
|
| 122 |
+
detail="Los parámetros 'local' y 'visitante' son obligatorios"
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# Validar jornada
|
| 126 |
+
if jornada < 1:
|
| 127 |
+
raise HTTPException(
|
| 128 |
+
status_code=400,
|
| 129 |
+
detail="La jornada debe ser mayor o igual a 1"
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# Validar liga
|
| 133 |
+
valid_leagues = ["ESP", "GER", "FRA", "ITA", "ENG", "NED", "POR", "BEL"]
|
| 134 |
+
if league_code not in valid_leagues:
|
| 135 |
+
raise HTTPException(
|
| 136 |
+
status_code=400,
|
| 137 |
+
detail=f"Liga inválida. Ligas válidas: {', '.join(valid_leagues)}"
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
# ===========================
|
| 141 |
+
# PREDICCIÓN
|
| 142 |
+
# ===========================
|
| 143 |
+
|
| 144 |
+
try:
|
| 145 |
+
resultado = model.consume_model_single(
|
| 146 |
+
local=local,
|
| 147 |
+
visitante=visitante,
|
| 148 |
+
jornada=jornada,
|
| 149 |
+
temporada=temporada,
|
| 150 |
+
league_code=league_code
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# Verificar si hubo error en la predicción
|
| 154 |
+
if resultado.get("error"):
|
| 155 |
+
raise HTTPException(
|
| 156 |
+
status_code=422,
|
| 157 |
+
detail=f"Error en predicción: {resultado['error']}"
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
# ✅ CONVERTIR TIPOS NUMPY A NATIVOS
|
| 161 |
+
resultado_limpio = convert_to_native(resultado)
|
| 162 |
+
|
| 163 |
+
# Agregar metadata
|
| 164 |
+
resultado_limpio["metadata"] = {
|
| 165 |
+
"api_version": "1.0.0",
|
| 166 |
+
"model_version": "v4",
|
| 167 |
+
"timestamp": pd.Timestamp.now().isoformat()
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
return JSONResponse(
|
| 171 |
+
status_code=200,
|
| 172 |
+
content=resultado_limpio
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
except HTTPException:
|
| 176 |
+
# Re-lanzar excepciones HTTP
|
| 177 |
+
raise
|
| 178 |
+
|
| 179 |
+
except Exception as e:
|
| 180 |
+
# Capturar cualquier otro error
|
| 181 |
+
import traceback
|
| 182 |
+
error_detail = {
|
| 183 |
+
"error": str(e),
|
| 184 |
+
"type": type(e).__name__,
|
| 185 |
+
"traceback": traceback.format_exc() if app.debug else None
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
return JSONResponse(
|
| 189 |
+
status_code=500,
|
| 190 |
+
content=error_detail
|
| 191 |
+
)
|
src/api/load.py
ADDED
|
@@ -0,0 +1,1208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ===========================
|
| 2 |
+
# SISTEMA DE PREDICCIÓN DE CORNERS - OPTIMIZADO PARA APUESTAS (VERSIÓN COMPLETA)
|
| 3 |
+
# ===========================
|
| 4 |
+
|
| 5 |
+
import requests
|
| 6 |
+
import tempfile
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import joblib
|
| 10 |
+
from scipy.stats import poisson
|
| 11 |
+
from scipy import stats
|
| 12 |
+
import os
|
| 13 |
+
import sys
|
| 14 |
+
from src.process_data.process_dataset import get_dataframes,get_head_2_head,get_points_from_result,get_team_ppp,get_ppp_difference,get_average
|
| 15 |
+
#from process_data.process_dataset import get_dataframes,get_head_2_head,get_points_from_result,get_team_ppp,get_ppp_difference,get_average
|
| 16 |
+
#project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
|
| 17 |
+
#sys.path.insert(0, project_root)
|
| 18 |
+
# ===========================
|
| 19 |
+
# 1. FUNCIONES FIABILIDAD
|
| 20 |
+
# ===========================
|
| 21 |
+
|
| 22 |
+
def analizar_fiabilidad_equipos(df_database, temporada="2526", min_partidos=5):
|
| 23 |
+
"""
|
| 24 |
+
Análisis completo de fiabilidad para apuestas de corners
|
| 25 |
+
No solo varianza, sino consistencia, tendencias y patrones
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
df_temp = df_database[df_database['season'] == temporada].copy()
|
| 29 |
+
resultados = []
|
| 30 |
+
equipos = pd.concat([df_temp['team'], df_temp['opponent']]).unique()
|
| 31 |
+
|
| 32 |
+
for equipo in equipos:
|
| 33 |
+
# Partidos del equipo
|
| 34 |
+
partidos_equipo = df_temp[df_temp['team'] == equipo]
|
| 35 |
+
|
| 36 |
+
if len(partidos_equipo) < min_partidos:
|
| 37 |
+
continue
|
| 38 |
+
|
| 39 |
+
ck_sacados = partidos_equipo['Pass Types_CK'].values
|
| 40 |
+
|
| 41 |
+
# ===========================
|
| 42 |
+
# 1. MÉTRICAS DE VARIABILIDAD
|
| 43 |
+
# ===========================
|
| 44 |
+
media = ck_sacados.mean()
|
| 45 |
+
std = ck_sacados.std()
|
| 46 |
+
cv = (std / media * 100) if media > 0 else 0
|
| 47 |
+
|
| 48 |
+
# ===========================
|
| 49 |
+
# 2. MÉTRICAS DE CONSISTENCIA
|
| 50 |
+
# ===========================
|
| 51 |
+
|
| 52 |
+
# 2.1 Porcentaje de partidos cerca de la media (±2 corners)
|
| 53 |
+
cerca_media = np.sum(np.abs(ck_sacados - media) <= 2) / len(ck_sacados) * 100
|
| 54 |
+
|
| 55 |
+
# 2.2 Rachas (detectar equipos con "explosiones" de corners)
|
| 56 |
+
cambios_bruscos = np.sum(np.abs(np.diff(ck_sacados)) > 4)
|
| 57 |
+
pct_cambios_bruscos = cambios_bruscos / (len(ck_sacados) - 1) * 100
|
| 58 |
+
|
| 59 |
+
# 2.3 Cuartiles (Q1, Q2=mediana, Q3)
|
| 60 |
+
q1, q2, q3 = np.percentile(ck_sacados, [25, 50, 75])
|
| 61 |
+
iqr = q3 - q1 # Rango intercuartílico (más robusto que std)
|
| 62 |
+
|
| 63 |
+
# ===========================
|
| 64 |
+
# 3. MÉTRICAS DE TENDENCIA
|
| 65 |
+
# ===========================
|
| 66 |
+
|
| 67 |
+
# 3.1 Tendencia lineal (¿mejora/empeora con el tiempo?)
|
| 68 |
+
jornadas = np.arange(len(ck_sacados))
|
| 69 |
+
slope, intercept, r_value, p_value, std_err = stats.linregress(jornadas, ck_sacados)
|
| 70 |
+
|
| 71 |
+
# 3.2 Autocorrelación (¿resultado actual predice el siguiente?)
|
| 72 |
+
if len(ck_sacados) > 2:
|
| 73 |
+
autocorr = np.corrcoef(ck_sacados[:-1], ck_sacados[1:])[0, 1]
|
| 74 |
+
else:
|
| 75 |
+
autocorr = 0
|
| 76 |
+
|
| 77 |
+
# ===========================
|
| 78 |
+
# 4. MÉTRICAS DE OUTLIERS
|
| 79 |
+
# ===========================
|
| 80 |
+
|
| 81 |
+
# 4.1 Detección de valores atípicos (método IQR)
|
| 82 |
+
lower_bound = q1 - 1.5 * iqr
|
| 83 |
+
upper_bound = q3 + 1.5 * iqr
|
| 84 |
+
outliers = np.sum((ck_sacados < lower_bound) | (ck_sacados > upper_bound))
|
| 85 |
+
pct_outliers = outliers / len(ck_sacados) * 100
|
| 86 |
+
|
| 87 |
+
# 4.2 Z-score máximo
|
| 88 |
+
z_scores = np.abs(stats.zscore(ck_sacados))
|
| 89 |
+
max_z = z_scores.max()
|
| 90 |
+
|
| 91 |
+
# ===========================
|
| 92 |
+
# 5. MÉTRICAS DE RANGO
|
| 93 |
+
# ===========================
|
| 94 |
+
|
| 95 |
+
rango = ck_sacados.max() - ck_sacados.min()
|
| 96 |
+
rango_normalizado = rango / media if media > 0 else 0
|
| 97 |
+
|
| 98 |
+
# ===========================
|
| 99 |
+
# 6. SCORE GLOBAL DE FIABILIDAD
|
| 100 |
+
# ===========================
|
| 101 |
+
|
| 102 |
+
# Penalizaciones (0-100, menor = peor)
|
| 103 |
+
score_cv = max(0, 100 - cv * 2) # CV alto = mala
|
| 104 |
+
score_consistencia = cerca_media # Más cerca de media = mejor
|
| 105 |
+
score_cambios = max(0, 100 - pct_cambios_bruscos * 2) # Cambios bruscos = malo
|
| 106 |
+
score_outliers = max(0, 100 - pct_outliers * 3) # Outliers = malo
|
| 107 |
+
score_iqr = max(0, 100 - iqr * 10) # IQR grande = malo
|
| 108 |
+
|
| 109 |
+
# Score final (promedio ponderado)
|
| 110 |
+
score_fiabilidad = (
|
| 111 |
+
score_cv * 0.25 +
|
| 112 |
+
score_consistencia * 0.30 +
|
| 113 |
+
score_cambios * 0.20 +
|
| 114 |
+
score_outliers * 0.15 +
|
| 115 |
+
score_iqr * 0.10
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# ===========================
|
| 119 |
+
# 7. CLASIFICACIÓN MULTI-CRITERIO
|
| 120 |
+
# ===========================
|
| 121 |
+
|
| 122 |
+
# Clasificación basada en score
|
| 123 |
+
if score_fiabilidad >= 70:
|
| 124 |
+
nivel = "EXCELENTE ⭐⭐⭐"
|
| 125 |
+
color = "#27ae60"
|
| 126 |
+
elif score_fiabilidad >= 55:
|
| 127 |
+
nivel = "BUENO ✅"
|
| 128 |
+
color = "#2ecc71"
|
| 129 |
+
elif score_fiabilidad >= 40:
|
| 130 |
+
nivel = "ACEPTABLE 🟡"
|
| 131 |
+
color = "#f39c12"
|
| 132 |
+
elif score_fiabilidad >= 25:
|
| 133 |
+
nivel = "REGULAR ⚠️"
|
| 134 |
+
color = "#e67e22"
|
| 135 |
+
else:
|
| 136 |
+
nivel = "EVITAR ⛔"
|
| 137 |
+
color = "#e74c3c"
|
| 138 |
+
|
| 139 |
+
resultados.append({
|
| 140 |
+
'Equipo': equipo,
|
| 141 |
+
'Partidos': len(ck_sacados),
|
| 142 |
+
|
| 143 |
+
# Estadísticas básicas
|
| 144 |
+
'Media_CK': round(media, 2),
|
| 145 |
+
'Mediana_CK': round(q2, 2),
|
| 146 |
+
'Std_CK': round(std, 2),
|
| 147 |
+
'CV_%': round(cv, 1),
|
| 148 |
+
|
| 149 |
+
# Consistencia
|
| 150 |
+
'Pct_Cerca_Media': round(cerca_media, 1),
|
| 151 |
+
'Cambios_Bruscos_%': round(pct_cambios_bruscos, 1),
|
| 152 |
+
'IQR': round(iqr, 2),
|
| 153 |
+
|
| 154 |
+
# Rango
|
| 155 |
+
'Rango': int(rango),
|
| 156 |
+
'Rango_Norm': round(rango_normalizado, 2),
|
| 157 |
+
'Min': int(ck_sacados.min()),
|
| 158 |
+
'Max': int(ck_sacados.max()),
|
| 159 |
+
|
| 160 |
+
# Outliers
|
| 161 |
+
'Outliers': int(outliers),
|
| 162 |
+
'Pct_Outliers': round(pct_outliers, 1),
|
| 163 |
+
'Max_ZScore': round(max_z, 2),
|
| 164 |
+
|
| 165 |
+
# Tendencia
|
| 166 |
+
'Tendencia_Slope': round(slope, 3),
|
| 167 |
+
'Autocorr': round(autocorr, 3),
|
| 168 |
+
|
| 169 |
+
# Score y clasificación
|
| 170 |
+
'Score_Fiabilidad': round(score_fiabilidad, 1),
|
| 171 |
+
'Nivel': nivel,
|
| 172 |
+
'Color': color
|
| 173 |
+
})
|
| 174 |
+
|
| 175 |
+
df_resultado = pd.DataFrame(resultados)
|
| 176 |
+
|
| 177 |
+
df_resultado = df_resultado.sort_values('Score_Fiabilidad', ascending=False)
|
| 178 |
+
|
| 179 |
+
return df_resultado
|
| 180 |
+
|
| 181 |
+
def mostrar_analisis_fiabilidad(df_analisis, top_n=10):
|
| 182 |
+
"""
|
| 183 |
+
Muestra el análisis completo de fiabilidad
|
| 184 |
+
"""
|
| 185 |
+
|
| 186 |
+
print("\n" + "=" * 120)
|
| 187 |
+
print("🎯 ANÁLISIS DE FIABILIDAD PARA APUESTAS - CORNERS")
|
| 188 |
+
print("=" * 120)
|
| 189 |
+
|
| 190 |
+
# TOP EQUIPOS FIABLES
|
| 191 |
+
print(f"\n⭐ TOP {top_n} EQUIPOS MÁS FIABLES")
|
| 192 |
+
print("-" * 120)
|
| 193 |
+
|
| 194 |
+
top_fiables = df_analisis.head(top_n)
|
| 195 |
+
|
| 196 |
+
for idx, row in top_fiables.iterrows():
|
| 197 |
+
print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
|
| 198 |
+
print(f" 📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
|
| 199 |
+
print(f" ✅ {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
|
| 200 |
+
print(f" ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
|
| 201 |
+
print(f" 📈 Rango: {row['Min']}-{row['Max']} ({row['Rango']} corners)")
|
| 202 |
+
|
| 203 |
+
# TOP EQUIPOS NO FIABLES
|
| 204 |
+
print(f"\n\n⛔ TOP {top_n} EQUIPOS MENOS FIABLES")
|
| 205 |
+
print("-" * 120)
|
| 206 |
+
|
| 207 |
+
top_no_fiables = df_analisis.tail(top_n)
|
| 208 |
+
|
| 209 |
+
for idx, row in top_no_fiables.iterrows():
|
| 210 |
+
print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
|
| 211 |
+
print(f" 📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
|
| 212 |
+
print(f" ❌ Solo {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
|
| 213 |
+
print(f" ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
|
| 214 |
+
|
| 215 |
+
# ESTADÍSTICAS GENERALES
|
| 216 |
+
print(f"\n\n📊 DISTRIBUCIÓN POR NIVEL DE FIABILIDAD")
|
| 217 |
+
print("-" * 120)
|
| 218 |
+
print(df_analisis['Nivel'].value_counts())
|
| 219 |
+
|
| 220 |
+
print(f"\n📈 ESTADÍSTICAS DE SCORE:")
|
| 221 |
+
print(f" Media: {df_analisis['Score_Fiabilidad'].mean():.1f}")
|
| 222 |
+
print(f" Mediana: {df_analisis['Score_Fiabilidad'].median():.1f}")
|
| 223 |
+
print(f" Score máximo: {df_analisis['Score_Fiabilidad'].max():.1f}")
|
| 224 |
+
print(f" Score mínimo: {df_analisis['Score_Fiabilidad'].min():.1f}")
|
| 225 |
+
|
| 226 |
+
def obtener_fiabilidad_partido(local, visitante, df_analisis):
|
| 227 |
+
"""
|
| 228 |
+
Evalúa la fiabilidad de un partido específico
|
| 229 |
+
"""
|
| 230 |
+
|
| 231 |
+
datos_local = df_analisis[df_analisis['Equipo'] == local]
|
| 232 |
+
datos_away = df_analisis[df_analisis['Equipo'] == visitante]
|
| 233 |
+
|
| 234 |
+
if datos_local.empty or datos_away.empty:
|
| 235 |
+
return {
|
| 236 |
+
'fiabilidad': 'DESCONOCIDO',
|
| 237 |
+
'score': 0,
|
| 238 |
+
'mensaje': '⚠️ Datos insuficientes'
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
score_local = datos_local['Score_Fiabilidad'].values[0]
|
| 242 |
+
score_away = datos_away['Score_Fiabilidad'].values[0]
|
| 243 |
+
score_promedio = (score_local + score_away) / 2
|
| 244 |
+
|
| 245 |
+
# Clasificación del partido
|
| 246 |
+
if score_promedio >= 65:
|
| 247 |
+
fiabilidad = "MUY ALTA ⭐⭐⭐"
|
| 248 |
+
mensaje = "✅ EXCELENTE PARTIDO PARA APOSTAR"
|
| 249 |
+
elif score_promedio >= 50:
|
| 250 |
+
fiabilidad = "ALTA ✅"
|
| 251 |
+
mensaje = "✅ BUEN PARTIDO PARA APOSTAR"
|
| 252 |
+
elif score_promedio >= 35:
|
| 253 |
+
fiabilidad = "MEDIA 🟡"
|
| 254 |
+
mensaje = "🟡 APOSTAR CON PRECAUCIÓN"
|
| 255 |
+
else:
|
| 256 |
+
fiabilidad = "BAJA ⛔"
|
| 257 |
+
mensaje = "⛔ EVITAR APUESTA"
|
| 258 |
+
|
| 259 |
+
return {
|
| 260 |
+
'fiabilidad': fiabilidad,
|
| 261 |
+
'score_local': score_local,
|
| 262 |
+
'score_away': score_away,
|
| 263 |
+
'score_promedio': score_promedio,
|
| 264 |
+
'nivel_local': datos_local['Nivel'].values[0],
|
| 265 |
+
'nivel_away': datos_away['Nivel'].values[0],
|
| 266 |
+
'mensaje': mensaje,
|
| 267 |
+
|
| 268 |
+
# Datos adicionales útiles
|
| 269 |
+
'cv_local': datos_local['CV_%'].values[0],
|
| 270 |
+
'cv_away': datos_away['CV_%'].values[0],
|
| 271 |
+
'consistencia_local': datos_local['Pct_Cerca_Media'].values[0],
|
| 272 |
+
'consistencia_away': datos_away['Pct_Cerca_Media'].values[0]
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
def calcular_probabilidades_poisson(lambda_pred, rango_inferior=5, rango_superior=5):
|
| 276 |
+
"""Calcula probabilidades usando distribución de Poisson"""
|
| 277 |
+
|
| 278 |
+
valor_central = int(round(lambda_pred))
|
| 279 |
+
valores_analizar = range(
|
| 280 |
+
max(0, valor_central - rango_inferior),
|
| 281 |
+
valor_central + rango_superior + 1
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
probabilidades_exactas = {}
|
| 285 |
+
for k in valores_analizar:
|
| 286 |
+
prob = poisson.pmf(k, lambda_pred) * 100
|
| 287 |
+
probabilidades_exactas[k] = prob
|
| 288 |
+
|
| 289 |
+
# ✅ CORRECCIÓN: MISMAS LÍNEAS PARA OVER Y UNDER
|
| 290 |
+
lines = [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]
|
| 291 |
+
|
| 292 |
+
probabilidades_over = {}
|
| 293 |
+
for linea in lines:
|
| 294 |
+
prob_over = (1 - poisson.cdf(linea, lambda_pred)) * 100
|
| 295 |
+
probabilidades_over[linea] = prob_over
|
| 296 |
+
|
| 297 |
+
probabilidades_under = {}
|
| 298 |
+
for linea in lines: # ✅ CAMBIO: usar la misma lista
|
| 299 |
+
prob_under = poisson.cdf(linea, lambda_pred) * 100
|
| 300 |
+
probabilidades_under[linea] = prob_under
|
| 301 |
+
|
| 302 |
+
return {
|
| 303 |
+
'exactas': probabilidades_exactas,
|
| 304 |
+
'over': probabilidades_over,
|
| 305 |
+
'under': probabilidades_under
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
def clasificar_confianza(prob):
|
| 309 |
+
"""Clasifica la confianza según probabilidad"""
|
| 310 |
+
if prob >= 66:
|
| 311 |
+
return "ALTA ✅"
|
| 312 |
+
elif prob >= 55:
|
| 313 |
+
return "MEDIA ⚠️"
|
| 314 |
+
else:
|
| 315 |
+
return "BAJA ❌"
|
| 316 |
+
|
| 317 |
+
'''
|
| 318 |
+
def get_dataframes(df, season, round_num, local, away, league=None):
|
| 319 |
+
"""Retorna 8 DataFrames filtrados por equipo, venue y liga"""
|
| 320 |
+
|
| 321 |
+
season_round = (df['season'] == season) & (df['round'] < round_num)
|
| 322 |
+
|
| 323 |
+
if league is not None:
|
| 324 |
+
season_round = season_round & (df['league'] == league)
|
| 325 |
+
|
| 326 |
+
def filter_and_split(team_filter):
|
| 327 |
+
filtered = df[season_round & team_filter].copy()
|
| 328 |
+
home = filtered[filtered['venue'] == "Home"]
|
| 329 |
+
away = filtered[filtered['venue'] == "Away"]
|
| 330 |
+
return home, away
|
| 331 |
+
|
| 332 |
+
local_home, local_away = filter_and_split(df['team'] == local)
|
| 333 |
+
local_opp_home, local_opp_away = filter_and_split(df['opponent'] == local)
|
| 334 |
+
|
| 335 |
+
away_home, away_away = filter_and_split(df['team'] == away)
|
| 336 |
+
away_opp_home, away_opp_away = filter_and_split(df['opponent'] == away)
|
| 337 |
+
|
| 338 |
+
return (local_home, local_away, local_opp_home, local_opp_away,
|
| 339 |
+
away_home, away_away, away_opp_home, away_opp_away)
|
| 340 |
+
|
| 341 |
+
def get_head_2_head(df, local, away, seasons=None, league=None):
|
| 342 |
+
"""Obtiene últimos 3 enfrentamientos directos"""
|
| 343 |
+
if seasons is None:
|
| 344 |
+
seasons = []
|
| 345 |
+
|
| 346 |
+
df_filtered = df[df['season'].isin(seasons)] if seasons else df
|
| 347 |
+
|
| 348 |
+
if league is not None:
|
| 349 |
+
df_filtered = df_filtered[df_filtered['league'] == league]
|
| 350 |
+
|
| 351 |
+
local_h2h = df_filtered[(df_filtered['team'] == local) & (df_filtered['opponent'] == away)]
|
| 352 |
+
away_h2h = df_filtered[(df_filtered['team'] == away) & (df_filtered['opponent'] == local)]
|
| 353 |
+
|
| 354 |
+
if len(local_h2h) < 4:
|
| 355 |
+
return local_h2h.tail(2), away_h2h.tail(2)
|
| 356 |
+
|
| 357 |
+
return local_h2h.tail(3), away_h2h.tail(3)
|
| 358 |
+
|
| 359 |
+
def get_average(df, is_team=False, lst_avg=None):
|
| 360 |
+
"""Calcula promedios de estadísticas (VERSIÓN COMPLETA)"""
|
| 361 |
+
|
| 362 |
+
if len(df) == 0:
|
| 363 |
+
if is_team:
|
| 364 |
+
# ✅ Retornar 23 valores (métricas avanzadas)
|
| 365 |
+
return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
| 366 |
+
return (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
| 367 |
+
|
| 368 |
+
if is_team:
|
| 369 |
+
# ===========================
|
| 370 |
+
# ESTADÍSTICAS BÁSICAS (NORMALIZADAS)
|
| 371 |
+
# ===========================
|
| 372 |
+
avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3]
|
| 373 |
+
avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4]
|
| 374 |
+
avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2]
|
| 375 |
+
avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1]
|
| 376 |
+
|
| 377 |
+
# ✅ VARIANZA DE CORNERS
|
| 378 |
+
var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
|
| 379 |
+
avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8]
|
| 380 |
+
|
| 381 |
+
avg_poss = (df['Poss'].sum() / len(df)) - 50
|
| 382 |
+
avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5]
|
| 383 |
+
avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6]
|
| 384 |
+
|
| 385 |
+
# ===========================
|
| 386 |
+
# MÉTRICAS OFENSIVAS AVANZADAS
|
| 387 |
+
# ===========================
|
| 388 |
+
total_sh = df['Standard_Sh'].sum()
|
| 389 |
+
sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0
|
| 390 |
+
xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0
|
| 391 |
+
|
| 392 |
+
total_touches = df['Touches_Touches'].sum()
|
| 393 |
+
attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0
|
| 394 |
+
|
| 395 |
+
total_poss = df['Poss'].sum()
|
| 396 |
+
possession_shot = (total_sh / total_poss) if total_poss > 0 else 0
|
| 397 |
+
|
| 398 |
+
# ===========================
|
| 399 |
+
# MÉTRICAS DE CREACIÓN
|
| 400 |
+
# ===========================
|
| 401 |
+
total_passes = df['Total_Att'].sum()
|
| 402 |
+
progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0
|
| 403 |
+
final_third_involvement = (df['1/3'].sum() / total_passes) if total_passes > 0 else 0
|
| 404 |
+
|
| 405 |
+
total_sca = df['SCA Types_SCA'].sum()
|
| 406 |
+
assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0
|
| 407 |
+
creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0
|
| 408 |
+
|
| 409 |
+
# ===========================
|
| 410 |
+
# MÉTRICAS DEFENSIVAS
|
| 411 |
+
# ===========================
|
| 412 |
+
total_tackles = df['Tackles_Tkl'].sum()
|
| 413 |
+
high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0
|
| 414 |
+
interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0
|
| 415 |
+
|
| 416 |
+
total_defensive_actions = total_tackles + df['Int'].sum()
|
| 417 |
+
clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0
|
| 418 |
+
|
| 419 |
+
# ===========================
|
| 420 |
+
# MÉTRICAS DE POSESIÓN
|
| 421 |
+
# ===========================
|
| 422 |
+
total_carries = df['Carries_Carries'].sum()
|
| 423 |
+
progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0
|
| 424 |
+
|
| 425 |
+
total_prog_passes = df['PrgP'].sum()
|
| 426 |
+
carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0
|
| 427 |
+
|
| 428 |
+
# ===========================
|
| 429 |
+
# ÍNDICES COMPUESTOS
|
| 430 |
+
# ===========================
|
| 431 |
+
avg_gf_raw = df['GF'].mean()
|
| 432 |
+
avg_xg_raw = df['Expected_xG'].mean()
|
| 433 |
+
avg_sot = df['Standard_SoT'].mean()
|
| 434 |
+
avg_sh = df['Standard_Sh'].mean()
|
| 435 |
+
offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0
|
| 436 |
+
|
| 437 |
+
avg_prgp = df['PrgP'].mean()
|
| 438 |
+
avg_prgc = df['Carries_PrgC'].mean()
|
| 439 |
+
avg_poss_raw = df['Poss'].mean()
|
| 440 |
+
transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0
|
| 441 |
+
|
| 442 |
+
# ✅ RETORNAR 23 VALORES
|
| 443 |
+
return (
|
| 444 |
+
avg_ck, # 0
|
| 445 |
+
var_ck, # 1 - ✅ NUEVO
|
| 446 |
+
avg_xg, # 2
|
| 447 |
+
avg_sca, # 3
|
| 448 |
+
avg_cross, # 4
|
| 449 |
+
avg_poss, # 5
|
| 450 |
+
avg_att_3rd, # 6
|
| 451 |
+
avg_gf, # 7
|
| 452 |
+
avg_ga, # 8
|
| 453 |
+
sh_accuracy, # 9
|
| 454 |
+
xg_shot, # 10
|
| 455 |
+
attacking_presence, # 11
|
| 456 |
+
possession_shot, # 12
|
| 457 |
+
progressive_pass_ratio, # 13
|
| 458 |
+
final_third_involvement, # 14
|
| 459 |
+
assist_sca, # 15
|
| 460 |
+
creative_efficiency, # 16
|
| 461 |
+
high_press_intensity, # 17
|
| 462 |
+
interception_tackle, # 18
|
| 463 |
+
clearance_ratio, # 19
|
| 464 |
+
progressive_carry_ratio, # 20
|
| 465 |
+
carry_pass_balance, # 21
|
| 466 |
+
offensive_index, # 22
|
| 467 |
+
transition_index # 23
|
| 468 |
+
)
|
| 469 |
+
|
| 470 |
+
# ===========================
|
| 471 |
+
# PROMEDIOS DE LIGA (is_team=False)
|
| 472 |
+
# ===========================
|
| 473 |
+
avg_cross = df['Performance_Crs'].mean()
|
| 474 |
+
avg_att_3rd = df['Touches_Att 3rd'].mean()
|
| 475 |
+
avg_sca = df['SCA Types_SCA'].mean()
|
| 476 |
+
avg_xg = df['Expected_xG'].mean()
|
| 477 |
+
var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
|
| 478 |
+
avg_ck = df['Pass Types_CK'].mean()
|
| 479 |
+
avg_gf = df['GF'].mean()
|
| 480 |
+
avg_ga = df['GA'].mean()
|
| 481 |
+
avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0
|
| 482 |
+
|
| 483 |
+
return (
|
| 484 |
+
var_ck, # 0
|
| 485 |
+
avg_xg, # 1
|
| 486 |
+
avg_sca, # 2
|
| 487 |
+
avg_cross, # 3
|
| 488 |
+
avg_att_3rd, # 4
|
| 489 |
+
avg_gf, # 5
|
| 490 |
+
avg_ga, # 6
|
| 491 |
+
avg_sh, # 7
|
| 492 |
+
avg_ck # 8
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
def get_points_from_result(result):
|
| 496 |
+
"""Convierte resultado (W/D/L) a puntos"""
|
| 497 |
+
if result == 'W':
|
| 498 |
+
return 3
|
| 499 |
+
elif result == 'D':
|
| 500 |
+
return 1
|
| 501 |
+
else:
|
| 502 |
+
return 0
|
| 503 |
+
|
| 504 |
+
def get_team_ppp(df, team, season, round_num, league=None):
|
| 505 |
+
"""Calcula puntos por partido (PPP) de un equipo"""
|
| 506 |
+
team_matches = df[
|
| 507 |
+
(df['team'] == team) &
|
| 508 |
+
(df['season'] == season) &
|
| 509 |
+
(df['round'] < round_num)
|
| 510 |
+
]
|
| 511 |
+
|
| 512 |
+
if league is not None:
|
| 513 |
+
team_matches = team_matches[team_matches['league'] == league]
|
| 514 |
+
|
| 515 |
+
if len(team_matches) == 0:
|
| 516 |
+
return 0.0
|
| 517 |
+
|
| 518 |
+
total_points = team_matches['result'].apply(get_points_from_result).sum()
|
| 519 |
+
ppp = total_points / len(team_matches)
|
| 520 |
+
|
| 521 |
+
return ppp
|
| 522 |
+
|
| 523 |
+
def get_ppp_difference(df, local, away, season, round_num, league=None):
|
| 524 |
+
"""Calcula diferencia de PPP entre local y visitante"""
|
| 525 |
+
local_ppp = get_team_ppp(df, local, season, round_num, league)
|
| 526 |
+
away_ppp = get_team_ppp(df, away, season, round_num, league)
|
| 527 |
+
return local_ppp - away_ppp
|
| 528 |
+
|
| 529 |
+
'''
|
| 530 |
+
|
| 531 |
+
def predecir_corners(local, visitante, jornada, temporada="2526", league_code="ESP",df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
|
| 532 |
+
"""
|
| 533 |
+
Predice corners totales con análisis completo para apuestas
|
| 534 |
+
|
| 535 |
+
Args:
|
| 536 |
+
local: Equipo local
|
| 537 |
+
visitante: Equipo visitante
|
| 538 |
+
jornada: Número de jornada
|
| 539 |
+
temporada: Temporada (formato "2526")
|
| 540 |
+
league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
|
| 541 |
+
"""
|
| 542 |
+
|
| 543 |
+
print(f"\n{'='*80}")
|
| 544 |
+
print(f"🏟️ {local} vs {visitante}")
|
| 545 |
+
print(f"📅 Temporada {temporada} | Jornada {jornada} | Liga: {league_code}")
|
| 546 |
+
print(f"{'='*80}")
|
| 547 |
+
|
| 548 |
+
if jornada < 5:
|
| 549 |
+
return {
|
| 550 |
+
"error": "❌ Se necesitan al menos 5 jornadas previas",
|
| 551 |
+
"prediccion": None
|
| 552 |
+
}
|
| 553 |
+
|
| 554 |
+
try:
|
| 555 |
+
# ===========================
|
| 556 |
+
# EXTRAER FEATURES (igual que antes)
|
| 557 |
+
# ===========================
|
| 558 |
+
|
| 559 |
+
lst_avg = get_average(
|
| 560 |
+
df_database[
|
| 561 |
+
(df_database['season'] == temporada) &
|
| 562 |
+
(df_database['round'] < jornada) &
|
| 563 |
+
(df_database['league'] == league_code)
|
| 564 |
+
],
|
| 565 |
+
is_team=False
|
| 566 |
+
)
|
| 567 |
+
|
| 568 |
+
(team1_home, team1_away, team1_opp_home, team1_opp_away,
|
| 569 |
+
team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes(
|
| 570 |
+
df_database, temporada, jornada, local, visitante, league=league_code
|
| 571 |
+
)
|
| 572 |
+
|
| 573 |
+
index = lst_years.index(temporada)
|
| 574 |
+
result = lst_years[:index+1]
|
| 575 |
+
team1_h2h, team2_h2h = get_head_2_head(
|
| 576 |
+
df_database, local, visitante, seasons=result, league=league_code
|
| 577 |
+
)
|
| 578 |
+
|
| 579 |
+
local_ppp = get_team_ppp(df_database, local, temporada, jornada, league=league_code)
|
| 580 |
+
away_ppp = get_team_ppp(df_database, visitante, temporada, jornada, league=league_code)
|
| 581 |
+
ppp_diff = local_ppp - away_ppp
|
| 582 |
+
|
| 583 |
+
# ===========================
|
| 584 |
+
# CONSTRUIR DICCIONARIO DE FEATURES (igual que antes)
|
| 585 |
+
# ===========================
|
| 586 |
+
|
| 587 |
+
def create_line(df, is_form=True, is_team=False, use_advanced=True):
|
| 588 |
+
if is_form:
|
| 589 |
+
df = df[-6:]
|
| 590 |
+
if use_advanced:
|
| 591 |
+
return get_average(df, is_team, lst_avg)
|
| 592 |
+
else:
|
| 593 |
+
result = get_average(df, is_team, lst_avg)
|
| 594 |
+
return result[:9]
|
| 595 |
+
|
| 596 |
+
dic_features = {}
|
| 597 |
+
|
| 598 |
+
dic_features['ppp_local'] = (local_ppp,)
|
| 599 |
+
dic_features['ppp_away'] = (away_ppp,)
|
| 600 |
+
dic_features['ppp_difference'] = (ppp_diff,)
|
| 601 |
+
|
| 602 |
+
dic_features['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True)
|
| 603 |
+
dic_features['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True)
|
| 604 |
+
dic_features['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True)
|
| 605 |
+
dic_features['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True)
|
| 606 |
+
|
| 607 |
+
dic_features['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True)
|
| 608 |
+
dic_features['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True)
|
| 609 |
+
dic_features['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True)
|
| 610 |
+
dic_features['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True)
|
| 611 |
+
|
| 612 |
+
dic_features['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True)
|
| 613 |
+
dic_features['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True)
|
| 614 |
+
|
| 615 |
+
dic_features['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False)
|
| 616 |
+
dic_features['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False)
|
| 617 |
+
|
| 618 |
+
league_dummies = {
|
| 619 |
+
'league_ESP': 1 if league_code == 'ESP' else 0,
|
| 620 |
+
'league_GER': 1 if league_code == 'GER' else 0,
|
| 621 |
+
'league_FRA': 1 if league_code == 'FRA' else 0,
|
| 622 |
+
'league_ITA': 1 if league_code == 'ITA' else 0,
|
| 623 |
+
'league_NED': 1 if league_code == 'NED' else 0,
|
| 624 |
+
'league_ENG': 1 if league_code == 'ENG' else 0,
|
| 625 |
+
'league_POR': 1 if league_code == 'POR' else 0,
|
| 626 |
+
'league_BEL': 1 if league_code == 'BEL' else 0
|
| 627 |
+
}
|
| 628 |
+
|
| 629 |
+
for key, value in league_dummies.items():
|
| 630 |
+
dic_features[key] = (value,)
|
| 631 |
+
|
| 632 |
+
# ===========================
|
| 633 |
+
# CONSTRUIR VECTOR DE FEATURES
|
| 634 |
+
# ===========================
|
| 635 |
+
|
| 636 |
+
lst_base_advanced = [
|
| 637 |
+
"avg_ck", "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga",
|
| 638 |
+
"sh_accuracy", "xg_shot", "attacking_presence", "possession_shot",
|
| 639 |
+
"progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency",
|
| 640 |
+
"high_press_intensity", "interception_tackle", "clearance_ratio",
|
| 641 |
+
"progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index"
|
| 642 |
+
]
|
| 643 |
+
|
| 644 |
+
lst_base_original = [
|
| 645 |
+
"var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga", "avg_ck"
|
| 646 |
+
]
|
| 647 |
+
|
| 648 |
+
lst_features_values = []
|
| 649 |
+
lst_features_names = []
|
| 650 |
+
|
| 651 |
+
for key in dic_features:
|
| 652 |
+
lst_features_values.extend(list(dic_features[key]))
|
| 653 |
+
|
| 654 |
+
if key in ['ppp_local', 'ppp_away', 'ppp_difference']:
|
| 655 |
+
lst_features_names.append(key)
|
| 656 |
+
elif key.startswith('league_'):
|
| 657 |
+
lst_features_names.append(key)
|
| 658 |
+
elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']:
|
| 659 |
+
lst_features_names.extend([f"{key}_{col}" for col in lst_base_original])
|
| 660 |
+
else:
|
| 661 |
+
lst_features_names.extend([f"{key}_{col}" for col in lst_base_advanced])
|
| 662 |
+
|
| 663 |
+
df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
|
| 664 |
+
|
| 665 |
+
expected_features = scaler.feature_names_in_
|
| 666 |
+
|
| 667 |
+
if len(df_input.columns) != len(expected_features):
|
| 668 |
+
print(f"\n⚠️ ERROR: Número de features no coincide")
|
| 669 |
+
print(f" Esperadas: {len(expected_features)}")
|
| 670 |
+
print(f" Recibidas: {len(df_input.columns)}")
|
| 671 |
+
return {"error": "Desajuste de features", "prediccion": None}
|
| 672 |
+
|
| 673 |
+
df_input = df_input[expected_features]
|
| 674 |
+
|
| 675 |
+
X_input_scaled = pd.DataFrame(
|
| 676 |
+
scaler.transform(df_input),
|
| 677 |
+
columns=df_input.columns
|
| 678 |
+
)
|
| 679 |
+
|
| 680 |
+
# ===========================
|
| 681 |
+
# PREDICCIÓN
|
| 682 |
+
# ===========================
|
| 683 |
+
|
| 684 |
+
prediccion = xgb_model.predict(X_input_scaled)[0]
|
| 685 |
+
|
| 686 |
+
# ===========================
|
| 687 |
+
# ✅ ANÁLISIS PROBABILÍSTICO CON POISSON
|
| 688 |
+
# ===========================
|
| 689 |
+
|
| 690 |
+
analisis = calcular_probabilidades_poisson(prediccion, rango_inferior=5, rango_superior=5)
|
| 691 |
+
|
| 692 |
+
# ===========================
|
| 693 |
+
# ESTADÍSTICAS DETALLADAS
|
| 694 |
+
# ===========================
|
| 695 |
+
|
| 696 |
+
local_ck_home = team1_home['Pass Types_CK'].mean() if len(team1_home) > 0 else 0
|
| 697 |
+
local_xg_home = team1_home['Expected_xG'].mean() if len(team1_home) > 0 else 0
|
| 698 |
+
local_poss_home = team1_home['Poss'].mean() if len(team1_home) > 0 else 0
|
| 699 |
+
|
| 700 |
+
away_ck_away = team2_away['Pass Types_CK'].mean() if len(team2_away) > 0 else 0
|
| 701 |
+
away_xg_away = team2_away['Expected_xG'].mean() if len(team2_away) > 0 else 0
|
| 702 |
+
away_poss_away = team2_away['Poss'].mean() if len(team2_away) > 0 else 0
|
| 703 |
+
|
| 704 |
+
local_ck_received = team1_opp_home['Pass Types_CK'].mean() if len(team1_opp_home) > 0 else 0
|
| 705 |
+
away_ck_received = team2_opp_away['Pass Types_CK'].mean() if len(team2_opp_away) > 0 else 0
|
| 706 |
+
|
| 707 |
+
partido_ck_esperado = local_ck_home + away_ck_away
|
| 708 |
+
|
| 709 |
+
h2h_ck_local = team1_h2h['Pass Types_CK'].mean() if len(team1_h2h) > 0 else 0
|
| 710 |
+
h2h_ck_away = team2_h2h['Pass Types_CK'].mean() if len(team2_h2h) > 0 else 0
|
| 711 |
+
h2h_total = h2h_ck_local + h2h_ck_away
|
| 712 |
+
|
| 713 |
+
# ===========================
|
| 714 |
+
# ✅ MOSTRAR RESULTADOS CON PROBABILIDADES
|
| 715 |
+
# ===========================
|
| 716 |
+
|
| 717 |
+
print(f"\n🎲 PREDICCIÓN MODELO: {prediccion:.2f} corners totales")
|
| 718 |
+
print(f" PPP: {local} ({local_ppp:.2f}) vs {visitante} ({away_ppp:.2f}) | Diff: {ppp_diff:+.2f}")
|
| 719 |
+
|
| 720 |
+
print(f"\n📊 ESTADÍSTICAS HISTÓRICAS:")
|
| 721 |
+
print(f" {local} (Casa): {local_ck_home:.1f} CK/partido | xG: {local_xg_home:.2f} | Poss: {local_poss_home:.1f}%")
|
| 722 |
+
print(f" {visitante} (Fuera): {away_ck_away:.1f} CK/partido | xG: {away_xg_away:.2f} | Poss: {away_poss_away:.1f}%")
|
| 723 |
+
print(f" Corners recibidos: {local} ({local_ck_received:.1f}) | {visitante} ({away_ck_received:.1f})")
|
| 724 |
+
print(f" Total esperado (suma): {partido_ck_esperado:.1f} corners")
|
| 725 |
+
|
| 726 |
+
if len(team1_h2h) > 0 or len(team2_h2h) > 0:
|
| 727 |
+
print(f"\n🔄 HEAD TO HEAD (últimos {max(len(team1_h2h), len(team2_h2h))} partidos):")
|
| 728 |
+
print(f" {local}: {h2h_ck_local:.1f} CK/partido")
|
| 729 |
+
print(f" {visitante}: {h2h_ck_away:.1f} CK/partido")
|
| 730 |
+
print(f" Promedio total: {h2h_total:.1f} corners")
|
| 731 |
+
|
| 732 |
+
# ===========================
|
| 733 |
+
# ✅ MOSTRAR PROBABILIDADES EXACTAS
|
| 734 |
+
# ===========================
|
| 735 |
+
|
| 736 |
+
valor_mas_probable = max(analisis['exactas'].items(), key=lambda x: x[1])
|
| 737 |
+
|
| 738 |
+
print(f"\n📈 PROBABILIDADES EXACTAS (Poisson):")
|
| 739 |
+
for k in sorted(analisis['exactas'].keys()):
|
| 740 |
+
prob = analisis['exactas'][k]
|
| 741 |
+
bar = '█' * int(prob / 2)
|
| 742 |
+
marca = ' ⭐' if k == valor_mas_probable[0] else ''
|
| 743 |
+
print(f" {k:2d} corners: {prob:5.2f}% {bar}{marca}")
|
| 744 |
+
|
| 745 |
+
print(f"\n✅ Valor más probable: {valor_mas_probable[0]} corners ({valor_mas_probable[1]:.2f}%)")
|
| 746 |
+
|
| 747 |
+
# ✅ RANGO DE 80% CONFIANZA
|
| 748 |
+
probs_sorted = sorted(analisis['exactas'].items(), key=lambda x: x[1], reverse=True)
|
| 749 |
+
cumsum = 0
|
| 750 |
+
rango_80 = []
|
| 751 |
+
for val, prob in probs_sorted:
|
| 752 |
+
cumsum += prob
|
| 753 |
+
rango_80.append(val)
|
| 754 |
+
if cumsum >= 80:
|
| 755 |
+
break
|
| 756 |
+
|
| 757 |
+
print(f"📊 Rango 80% confianza: {min(rango_80)}-{max(rango_80)} corners")
|
| 758 |
+
|
| 759 |
+
# ===========================
|
| 760 |
+
# ✅ MOSTRAR OVER/UNDER CON CUOTAS IMPLÍCITAS
|
| 761 |
+
# ===========================
|
| 762 |
+
|
| 763 |
+
print(f"\n🎯 ANÁLISIS OVER/UNDER:")
|
| 764 |
+
print(f"{'Línea':<10} {'Prob Over':<12} {'Cuota Impl':<12} {'Confianza':<15} {'Prob Under':<12} {'Cuota Impl':<12}")
|
| 765 |
+
print("-" * 85)
|
| 766 |
+
|
| 767 |
+
for linea in [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]:
|
| 768 |
+
prob_over = analisis['over'][linea]
|
| 769 |
+
prob_under = analisis['under'][linea]
|
| 770 |
+
|
| 771 |
+
# Cuotas implícitas (inverso de probabilidad en decimal)
|
| 772 |
+
cuota_impl_over = 100 / prob_over if prob_over > 0 else 999
|
| 773 |
+
cuota_impl_under = 100 / prob_under if prob_under > 0 else 999
|
| 774 |
+
|
| 775 |
+
conf_over = clasificar_confianza(prob_over)
|
| 776 |
+
|
| 777 |
+
print(f"O/U {linea:<5} {prob_over:6.2f}% @{cuota_impl_over:5.2f} {conf_over:<15} {prob_under:6.2f}% @{cuota_impl_under:5.2f}")
|
| 778 |
+
|
| 779 |
+
# ===========================
|
| 780 |
+
# ✅ RECOMENDACIONES CON CUOTAS
|
| 781 |
+
# ===========================
|
| 782 |
+
|
| 783 |
+
print(f"\n💡 RECOMENDACIONES DE APUESTA:")
|
| 784 |
+
|
| 785 |
+
mejores_over = [(l, p) for l, p in analisis['over'].items() if p >= 55]
|
| 786 |
+
mejores_under = [(l, p) for l, p in analisis['under'].items() if p >= 55]
|
| 787 |
+
|
| 788 |
+
if mejores_over:
|
| 789 |
+
print(f"\n✅ OVER con confianza MEDIA/ALTA:")
|
| 790 |
+
for linea, prob in sorted(mejores_over, key=lambda x: x[1], reverse=True):
|
| 791 |
+
cuota_impl = 100 / prob
|
| 792 |
+
conf = clasificar_confianza(prob)
|
| 793 |
+
print(f" • Over {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
|
| 794 |
+
|
| 795 |
+
if mejores_under:
|
| 796 |
+
print(f"\n✅ UNDER con confianza MEDIA/ALTA:")
|
| 797 |
+
for linea, prob in sorted(mejores_under, key=lambda x: x[1], reverse=True):
|
| 798 |
+
cuota_impl = 100 / prob
|
| 799 |
+
conf = clasificar_confianza(prob)
|
| 800 |
+
print(f" • Under {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
|
| 801 |
+
|
| 802 |
+
if not mejores_over and not mejores_under:
|
| 803 |
+
print(f" ⚠️ No hay apuestas con confianza MEDIA o superior")
|
| 804 |
+
|
| 805 |
+
# ===========================
|
| 806 |
+
# ✅ ANÁLISIS DE RIESGO
|
| 807 |
+
# ===========================
|
| 808 |
+
|
| 809 |
+
df_varianza_temp = analizar_fiabilidad_equipos(df_database, temporada=temporada, min_partidos=3)
|
| 810 |
+
riesgo = obtener_fiabilidad_partido(local, visitante, df_varianza_temp)
|
| 811 |
+
|
| 812 |
+
print(f"\n⚠️ ANÁLISIS DE RIESGO:")
|
| 813 |
+
print(f" Local ({local}): {riesgo['nivel_local']} (CV: {riesgo['cv_local']:.1f}%)")
|
| 814 |
+
print(f" Away ({visitante}): {riesgo['nivel_away']} (CV: {riesgo['cv_away']:.1f}%)")
|
| 815 |
+
print(f" 🎲 FIABILIDAD PARTIDO: {riesgo['fiabilidad']} (Score: {riesgo['score_promedio']:.1f})")
|
| 816 |
+
print(f" 💡 {riesgo['mensaje']}")
|
| 817 |
+
|
| 818 |
+
# ===========================
|
| 819 |
+
# RETORNAR DICCIONARIO COMPLETO
|
| 820 |
+
# ===========================
|
| 821 |
+
|
| 822 |
+
return {
|
| 823 |
+
"prediccion": round(prediccion, 2),
|
| 824 |
+
"local": local,
|
| 825 |
+
"visitante": visitante,
|
| 826 |
+
"ppp_local": local_ppp,
|
| 827 |
+
"ppp_away": away_ppp,
|
| 828 |
+
"ppp_diff": ppp_diff,
|
| 829 |
+
"riesgo": riesgo,
|
| 830 |
+
"stats": {
|
| 831 |
+
"local_ck": local_ck_home,
|
| 832 |
+
"away_ck": away_ck_away,
|
| 833 |
+
"local_ck_received": local_ck_received,
|
| 834 |
+
"away_ck_received": away_ck_received,
|
| 835 |
+
"h2h_total": h2h_total,
|
| 836 |
+
"partido_esperado": partido_ck_esperado
|
| 837 |
+
},
|
| 838 |
+
"probabilidades_exactas": analisis['exactas'],
|
| 839 |
+
"probabilidades_over": analisis['over'],
|
| 840 |
+
"probabilidades_under": analisis['under'],
|
| 841 |
+
"valor_mas_probable": valor_mas_probable[0],
|
| 842 |
+
"prob_mas_probable": valor_mas_probable[1],
|
| 843 |
+
"rango_80": (min(rango_80), max(rango_80))
|
| 844 |
+
}
|
| 845 |
+
|
| 846 |
+
except Exception as e:
|
| 847 |
+
print(f"\n❌ ERROR: {str(e)}")
|
| 848 |
+
import traceback
|
| 849 |
+
traceback.print_exc()
|
| 850 |
+
return {"error": str(e), "prediccion": None}
|
| 851 |
+
|
| 852 |
+
def predecir_partidos_batch(partidos, jornada, temporada="2526", league_code="ESP", export_csv=True, filename=None,df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
|
| 853 |
+
"""
|
| 854 |
+
Predice corners para múltiples partidos y exporta resultados a CSV
|
| 855 |
+
|
| 856 |
+
Args:
|
| 857 |
+
partidos: Lista de tuplas [(local1, visitante1), (local2, visitante2), ...]
|
| 858 |
+
jornada: Número de jornada
|
| 859 |
+
temporada: Temporada (formato "2526")
|
| 860 |
+
league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
|
| 861 |
+
export_csv: Si True, exporta a CSV
|
| 862 |
+
filename: Nombre del archivo CSV (opcional)
|
| 863 |
+
|
| 864 |
+
Returns:
|
| 865 |
+
DataFrame con todos los resultados
|
| 866 |
+
"""
|
| 867 |
+
|
| 868 |
+
resultados = []
|
| 869 |
+
|
| 870 |
+
print("\n" + "=" * 120)
|
| 871 |
+
print(f"🎯 PROCESANDO {len(partidos)} PARTIDOS - {league_code} | J{jornada} | Temporada {temporada}")
|
| 872 |
+
print("=" * 120)
|
| 873 |
+
|
| 874 |
+
for idx, (local, visitante) in enumerate(partidos, 1):
|
| 875 |
+
print(f"\n[{idx}/{len(partidos)}] Procesando: {local} vs {visitante}...")
|
| 876 |
+
|
| 877 |
+
resultado = predecir_corners(
|
| 878 |
+
local=local,
|
| 879 |
+
visitante=visitante,
|
| 880 |
+
jornada=jornada,
|
| 881 |
+
temporada=temporada,
|
| 882 |
+
league_code=league_code,
|
| 883 |
+
df_database=df_database,
|
| 884 |
+
xgb_model=xgb_model,
|
| 885 |
+
scaler=scaler,
|
| 886 |
+
lst_years=lst_years)
|
| 887 |
+
|
| 888 |
+
|
| 889 |
+
if resultado.get("error"):
|
| 890 |
+
print(f" ❌ Error: {resultado['error']}")
|
| 891 |
+
continue
|
| 892 |
+
|
| 893 |
+
# ===========================
|
| 894 |
+
# CONSTRUIR FILA DE DATOS
|
| 895 |
+
# ===========================
|
| 896 |
+
|
| 897 |
+
fila = {
|
| 898 |
+
'Partido': f"{local} vs {visitante}",
|
| 899 |
+
'Local': local,
|
| 900 |
+
'Visitante': visitante,
|
| 901 |
+
'Liga': league_code,
|
| 902 |
+
'Jornada': jornada,
|
| 903 |
+
'Temporada': temporada,
|
| 904 |
+
|
| 905 |
+
# Predicción
|
| 906 |
+
'Prediccion': resultado['prediccion'],
|
| 907 |
+
'Valor_Mas_Probable': resultado['valor_mas_probable'],
|
| 908 |
+
'Prob_Valor_Mas_Probable_%': round(resultado['prob_mas_probable'], 2),
|
| 909 |
+
'Rango_80%_Min': resultado['rango_80'][0],
|
| 910 |
+
'Rango_80%_Max': resultado['rango_80'][1],
|
| 911 |
+
|
| 912 |
+
# PPP
|
| 913 |
+
'PPP_Local': round(resultado['ppp_local'], 2),
|
| 914 |
+
'PPP_Away': round(resultado['ppp_away'], 2),
|
| 915 |
+
'PPP_Diferencia': round(resultado['ppp_diff'], 2),
|
| 916 |
+
|
| 917 |
+
# Estadísticas históricas
|
| 918 |
+
'CK_Local_Casa': round(resultado['stats']['local_ck'], 1),
|
| 919 |
+
'CK_Away_Fuera': round(resultado['stats']['away_ck'], 1),
|
| 920 |
+
'CK_Local_Recibidos': round(resultado['stats']['local_ck_received'], 1),
|
| 921 |
+
'CK_Away_Recibidos': round(resultado['stats']['away_ck_received'], 1),
|
| 922 |
+
'CK_Esperado_Suma': round(resultado['stats']['partido_esperado'], 1),
|
| 923 |
+
'CK_H2H_Total': round(resultado['stats']['h2h_total'], 1) if resultado['stats']['h2h_total'] > 0 else 'N/A',
|
| 924 |
+
|
| 925 |
+
# Riesgo
|
| 926 |
+
'Fiabilidad_Partido': resultado['riesgo']['fiabilidad'],
|
| 927 |
+
'Score_Fiabilidad': round(resultado['riesgo']['score_promedio'], 1),
|
| 928 |
+
'Nivel_Local': resultado['riesgo']['nivel_local'],
|
| 929 |
+
'Nivel_Away': resultado['riesgo']['nivel_away'],
|
| 930 |
+
'CV_Local_%': round(resultado['riesgo']['cv_local'], 1),
|
| 931 |
+
'CV_Away_%': round(resultado['riesgo']['cv_away'], 1),
|
| 932 |
+
}
|
| 933 |
+
|
| 934 |
+
# ===========================
|
| 935 |
+
# OVER 6.5 a 10.5
|
| 936 |
+
# ===========================
|
| 937 |
+
for linea in [6.5, 7.5, 8.5, 9.5, 10.5]:
|
| 938 |
+
prob = resultado['probabilidades_over'].get(linea, 0)
|
| 939 |
+
cuota_impl = round(100 / prob, 2) if prob > 0 else 999
|
| 940 |
+
conf = clasificar_confianza(prob)
|
| 941 |
+
|
| 942 |
+
fila[f'Over_{linea}_Prob_%'] = round(prob, 2)
|
| 943 |
+
fila[f'Over_{linea}_Cuota'] = cuota_impl
|
| 944 |
+
fila[f'Over_{linea}_Confianza'] = conf
|
| 945 |
+
|
| 946 |
+
# ===========================
|
| 947 |
+
# UNDER 12.5 a 9.5
|
| 948 |
+
# ===========================
|
| 949 |
+
for linea in [12.5, 11.5, 10.5, 9.5]:
|
| 950 |
+
prob = resultado['probabilidades_under'].get(linea, 0)
|
| 951 |
+
cuota_impl = round(100 / prob, 2) if prob > 0 else 999
|
| 952 |
+
conf = clasificar_confianza(prob)
|
| 953 |
+
|
| 954 |
+
fila[f'Under_{linea}_Prob_%'] = round(prob, 2)
|
| 955 |
+
fila[f'Under_{linea}_Cuota'] = cuota_impl
|
| 956 |
+
fila[f'Under_{linea}_Confianza'] = conf
|
| 957 |
+
|
| 958 |
+
# ===========================
|
| 959 |
+
# RECOMENDACIONES
|
| 960 |
+
# ===========================
|
| 961 |
+
|
| 962 |
+
mejores_over = [(l, p) for l, p in resultado['probabilidades_over'].items() if p >= 55]
|
| 963 |
+
mejores_under = [(l, p) for l, p in resultado['probabilidades_under'].items() if p >= 55]
|
| 964 |
+
|
| 965 |
+
if resultado['riesgo']['score_promedio'] < 35:
|
| 966 |
+
fila['Recomendacion'] = "⛔ EVITAR - Baja fiabilidad"
|
| 967 |
+
fila['Es_Apostable'] = "NO"
|
| 968 |
+
elif not mejores_over and not mejores_under:
|
| 969 |
+
fila['Recomendacion'] = "⚠️ NO RECOMENDADO - Sin confianza suficiente"
|
| 970 |
+
fila['Es_Apostable'] = "NO"
|
| 971 |
+
else:
|
| 972 |
+
recomendaciones = []
|
| 973 |
+
|
| 974 |
+
if mejores_over:
|
| 975 |
+
mejor_over = max(mejores_over, key=lambda x: x[1])
|
| 976 |
+
cuota_over = round(100 / mejor_over[1], 2)
|
| 977 |
+
recomendaciones.append(f"Over {mejor_over[0]} ({mejor_over[1]:.1f}% @{cuota_over})")
|
| 978 |
+
|
| 979 |
+
if mejores_under:
|
| 980 |
+
mejor_under = max(mejores_under, key=lambda x: x[1])
|
| 981 |
+
cuota_under = round(100 / mejor_under[1], 2)
|
| 982 |
+
recomendaciones.append(f"Under {mejor_under[0]} ({mejor_under[1]:.1f}% @{cuota_under})")
|
| 983 |
+
|
| 984 |
+
fila['Recomendacion'] = " | ".join(recomendaciones)
|
| 985 |
+
|
| 986 |
+
if resultado['riesgo']['score_promedio'] >= 65:
|
| 987 |
+
fila['Es_Apostable'] = "SÍ ⭐⭐⭐"
|
| 988 |
+
elif resultado['riesgo']['score_promedio'] >= 50:
|
| 989 |
+
fila['Es_Apostable'] = "SÍ ✅"
|
| 990 |
+
else:
|
| 991 |
+
fila['Es_Apostable'] = "PRECAUCIÓN 🟡"
|
| 992 |
+
|
| 993 |
+
fila['Mensaje_Riesgo'] = resultado['riesgo']['mensaje']
|
| 994 |
+
|
| 995 |
+
resultados.append(fila)
|
| 996 |
+
print(f" ✅ Completado")
|
| 997 |
+
|
| 998 |
+
# ===========================
|
| 999 |
+
# CREAR DATAFRAME
|
| 1000 |
+
# ===========================
|
| 1001 |
+
|
| 1002 |
+
df_resultados = pd.DataFrame(resultados)
|
| 1003 |
+
|
| 1004 |
+
print("\n" + "=" * 120)
|
| 1005 |
+
print(f"✅ PROCESAMIENTO COMPLETADO: {len(df_resultados)} partidos analizados")
|
| 1006 |
+
print("=" * 120)
|
| 1007 |
+
|
| 1008 |
+
# ===========================
|
| 1009 |
+
# EXPORTAR A CSV
|
| 1010 |
+
# ===========================
|
| 1011 |
+
|
| 1012 |
+
if export_csv and len(df_resultados) > 0:
|
| 1013 |
+
if filename is None:
|
| 1014 |
+
filename = f"predicciones_{league_code}_J{jornada}_{temporada}.csv"
|
| 1015 |
+
|
| 1016 |
+
df_resultados.to_csv(filename, index=False, encoding='utf-8-sig')
|
| 1017 |
+
print(f"\n💾 Resultados exportados a: {filename}")
|
| 1018 |
+
|
| 1019 |
+
# ===========================
|
| 1020 |
+
# RESUMEN
|
| 1021 |
+
# ===========================
|
| 1022 |
+
|
| 1023 |
+
print(f"\n📊 RESUMEN DE APUESTAS:")
|
| 1024 |
+
print(f" Partidos apostables: {len(df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')])} / {len(df_resultados)}")
|
| 1025 |
+
print(f" Partidos ALTA confianza (⭐⭐⭐): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ⭐⭐⭐'])}")
|
| 1026 |
+
print(f" Partidos MEDIA confianza (✅): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ✅'])}")
|
| 1027 |
+
print(f" Partidos a evitar (⛔): {len(df_resultados[df_resultados['Es_Apostable'] == 'NO'])}")
|
| 1028 |
+
|
| 1029 |
+
return df_resultados
|
| 1030 |
+
|
| 1031 |
+
def mostrar_resumen_batch(df_resultados):
|
| 1032 |
+
"""Muestra resumen visual de los resultados"""
|
| 1033 |
+
|
| 1034 |
+
print("\n" + "=" * 120)
|
| 1035 |
+
print("🎯 MEJORES OPORTUNIDADES DE APUESTA")
|
| 1036 |
+
print("=" * 120)
|
| 1037 |
+
|
| 1038 |
+
# Filtrar solo apostables
|
| 1039 |
+
df_apostables = df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')].copy()
|
| 1040 |
+
|
| 1041 |
+
if len(df_apostables) == 0:
|
| 1042 |
+
print("\n⚠️ No se encontraron partidos con oportunidades de apuesta")
|
| 1043 |
+
return
|
| 1044 |
+
|
| 1045 |
+
# Ordenar por score de fiabilidad
|
| 1046 |
+
df_apostables = df_apostables.sort_values('Score_Fiabilidad', ascending=False)
|
| 1047 |
+
|
| 1048 |
+
for idx, row in df_apostables.iterrows():
|
| 1049 |
+
print(f"\n{'='*120}")
|
| 1050 |
+
print(f"🏟️ {row['Partido']}")
|
| 1051 |
+
print(f"{'='*120}")
|
| 1052 |
+
print(f"📊 Predicción: {row['Prediccion']:.2f} corners | Valor más probable: {row['Valor_Mas_Probable']} ({row['Prob_Valor_Mas_Probable_%']:.1f}%)")
|
| 1053 |
+
print(f"📈 Histórico: Local {row['CK_Local_Casa']:.1f} CK | Away {row['CK_Away_Fuera']:.1f} CK | H2H: {row['CK_H2H_Total']}")
|
| 1054 |
+
print(f"🎲 Fiabilidad: {row['Fiabilidad_Partido']} (Score: {row['Score_Fiabilidad']:.1f}/100)")
|
| 1055 |
+
print(f"💡 {row['Recomendacion']}")
|
| 1056 |
+
|
| 1057 |
+
# Mostrar líneas con alta probabilidad
|
| 1058 |
+
print(f"\n 📌 Líneas destacadas:")
|
| 1059 |
+
for linea in [7.5, 8.5, 9.5, 10.5]:
|
| 1060 |
+
over_prob = row.get(f'Over_{linea}_Prob_%', 0)
|
| 1061 |
+
under_prob = row.get(f'Under_{linea}_Prob_%', 0)
|
| 1062 |
+
|
| 1063 |
+
if over_prob >= 55:
|
| 1064 |
+
cuota = row.get(f'Over_{linea}_Cuota', 0)
|
| 1065 |
+
conf = row.get(f'Over_{linea}_Confianza', '')
|
| 1066 |
+
print(f" • Over {linea}: {over_prob:.1f}% @{cuota:.2f} - {conf}")
|
| 1067 |
+
|
| 1068 |
+
if under_prob >= 55:
|
| 1069 |
+
cuota = row.get(f'Under_{linea}_Cuota', 0)
|
| 1070 |
+
conf = row.get(f'Under_{linea}_Confianza', '')
|
| 1071 |
+
print(f" • Under {linea}: {under_prob:.1f}% @{cuota:.2f} - {conf}")
|
| 1072 |
+
|
| 1073 |
+
|
| 1074 |
+
|
| 1075 |
+
|
| 1076 |
+
class USE_MODEL():
|
| 1077 |
+
def __init__(self):
|
| 1078 |
+
self.load_models()
|
| 1079 |
+
self.load_data()
|
| 1080 |
+
self.init_variables()
|
| 1081 |
+
|
| 1082 |
+
def load_models(self):
|
| 1083 |
+
"""Cargar modelos desde GitHub usando raw URLs"""
|
| 1084 |
+
|
| 1085 |
+
print("📦 Cargando modelos desde GitHub...")
|
| 1086 |
+
|
| 1087 |
+
# URLs de descarga directa (raw.githubusercontent.com)
|
| 1088 |
+
base_url = "https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/models"
|
| 1089 |
+
model_url = f"{base_url}/xgboost_corners_v4_retrain.pkl"
|
| 1090 |
+
scaler_url = f"{base_url}/scaler_corners_v4_retrain.pkl"
|
| 1091 |
+
|
| 1092 |
+
try:
|
| 1093 |
+
# Descargar modelo
|
| 1094 |
+
print(f"📥 Descargando modelo desde: {model_url}")
|
| 1095 |
+
response_model = requests.get(model_url, timeout=30)
|
| 1096 |
+
response_model.raise_for_status()
|
| 1097 |
+
|
| 1098 |
+
# Descargar scaler
|
| 1099 |
+
print(f"📥 Descargando scaler desde: {scaler_url}")
|
| 1100 |
+
response_scaler = requests.get(scaler_url, timeout=30)
|
| 1101 |
+
response_scaler.raise_for_status()
|
| 1102 |
+
|
| 1103 |
+
# Guardar temporalmente y cargar
|
| 1104 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pkl') as tmp_model:
|
| 1105 |
+
tmp_model.write(response_model.content)
|
| 1106 |
+
tmp_model_path = tmp_model.name
|
| 1107 |
+
|
| 1108 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pkl') as tmp_scaler:
|
| 1109 |
+
tmp_scaler.write(response_scaler.content)
|
| 1110 |
+
tmp_scaler_path = tmp_scaler.name
|
| 1111 |
+
|
| 1112 |
+
# Cargar modelos desde archivos temporales
|
| 1113 |
+
self.xgb_model = joblib.load(tmp_model_path)
|
| 1114 |
+
self.scaler = joblib.load(tmp_scaler_path)
|
| 1115 |
+
|
| 1116 |
+
# Limpiar archivos temporales
|
| 1117 |
+
os.unlink(tmp_model_path)
|
| 1118 |
+
os.unlink(tmp_scaler_path)
|
| 1119 |
+
|
| 1120 |
+
print("✅ Modelos cargados correctamente desde GitHub")
|
| 1121 |
+
|
| 1122 |
+
except requests.exceptions.RequestException as e:
|
| 1123 |
+
raise Exception(f"❌ Error descargando modelos: {str(e)}")
|
| 1124 |
+
except Exception as e:
|
| 1125 |
+
raise Exception(f"❌ Error cargando modelos: {str(e)}")
|
| 1126 |
+
|
| 1127 |
+
def load_data(self):
|
| 1128 |
+
"""Cargar datos desde GitHub"""
|
| 1129 |
+
|
| 1130 |
+
print("📂 Cargando datos desde GitHub...")
|
| 1131 |
+
|
| 1132 |
+
base_url = "https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/dataset/cleaned"
|
| 1133 |
+
historic_url = f"{base_url}/dataset_cleaned.csv"
|
| 1134 |
+
current_url = f"{base_url}/dataset_cleaned_current_year.csv"
|
| 1135 |
+
|
| 1136 |
+
try:
|
| 1137 |
+
# Cargar dataset histórico
|
| 1138 |
+
print(f"📥 Descargando dataset histórico...")
|
| 1139 |
+
self.df_dataset_historic = pd.read_csv(historic_url)
|
| 1140 |
+
print(f"✅ Dataset histórico cargado: {len(self.df_dataset_historic)} registros")
|
| 1141 |
+
|
| 1142 |
+
# Intentar cargar año actual
|
| 1143 |
+
try:
|
| 1144 |
+
print(f"📥 Descargando dataset año actual...")
|
| 1145 |
+
self.df_dataset_current_year = pd.read_csv(current_url)
|
| 1146 |
+
print(f"✅ Dataset año actual cargado: {len(self.df_dataset_current_year)} registros")
|
| 1147 |
+
self.df_dataset = pd.concat([self.df_dataset_historic, self.df_dataset_current_year])
|
| 1148 |
+
except:
|
| 1149 |
+
print("⚠️ No se pudo cargar dataset del año actual, usando solo histórico")
|
| 1150 |
+
self.df_dataset = self.df_dataset_historic
|
| 1151 |
+
|
| 1152 |
+
# Limpieza
|
| 1153 |
+
self.df_dataset["season"] = self.df_dataset["season"].astype(str)
|
| 1154 |
+
self.df_dataset["Performance_Save%"].fillna(0, inplace=True)
|
| 1155 |
+
|
| 1156 |
+
print(f"✅ Total registros: {len(self.df_dataset)}")
|
| 1157 |
+
|
| 1158 |
+
except Exception as e:
|
| 1159 |
+
raise FileNotFoundError(
|
| 1160 |
+
f"\n❌ ERROR: No se pudieron cargar los datos desde GitHub\n"
|
| 1161 |
+
f" Error: {str(e)}\n\n"
|
| 1162 |
+
f"💡 Verifica que los archivos existan en el repositorio\n"
|
| 1163 |
+
)
|
| 1164 |
+
|
| 1165 |
+
def init_variables(self):
|
| 1166 |
+
self.lst_years = ["1819", "1920", "2021", "2122", "2223", "2324", "2425", "2526"]
|
| 1167 |
+
print("✅ Variables inicializadas")
|
| 1168 |
+
|
| 1169 |
+
def consume_model_batch(self,partidos,jornada,temporada,league_code):
|
| 1170 |
+
|
| 1171 |
+
df_predict = predecir_partidos_batch(
|
| 1172 |
+
partidos=partidos,
|
| 1173 |
+
jornada=jornada,
|
| 1174 |
+
temporada=temporada,
|
| 1175 |
+
league_code=league_code,
|
| 1176 |
+
export_csv=True,
|
| 1177 |
+
filename=f"results\{league_code}\{league_code}-{temporada}-{jornada}-predicciones.csv",
|
| 1178 |
+
df_database = self.df_dataset,
|
| 1179 |
+
xgb_model = self.xgb_model,
|
| 1180 |
+
scaler=self.scaler,
|
| 1181 |
+
lst_years=self.lst_years
|
| 1182 |
+
)
|
| 1183 |
+
|
| 1184 |
+
# Mostrar resumen
|
| 1185 |
+
return df_predict
|
| 1186 |
+
|
| 1187 |
+
def consume_model_single(self,local,visitante,jornada,temporada,league_code):
|
| 1188 |
+
|
| 1189 |
+
return predecir_corners(
|
| 1190 |
+
local=local,
|
| 1191 |
+
visitante=visitante,
|
| 1192 |
+
jornada=jornada,
|
| 1193 |
+
temporada=temporada,
|
| 1194 |
+
league_code=league_code,
|
| 1195 |
+
df_database = self.df_dataset,
|
| 1196 |
+
xgb_model = self.xgb_model,
|
| 1197 |
+
scaler=self.scaler,
|
| 1198 |
+
lst_years=self.lst_years
|
| 1199 |
+
)
|
| 1200 |
+
|
| 1201 |
+
|
| 1202 |
+
def kelly_stats(self,p, odds, fraction=0.2):
|
| 1203 |
+
|
| 1204 |
+
b = odds - 1
|
| 1205 |
+
q = 1 - p
|
| 1206 |
+
f_star = (b * p - q) / b
|
| 1207 |
+
f_star = max(f_star, 0) # evita negativos
|
| 1208 |
+
return f_star * fraction # usa 0.1 para Kelly 10%
|
src/models/__init__.py
ADDED
|
File without changes
|
src/models/test_model.py
ADDED
|
@@ -0,0 +1,1148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ===========================
|
| 2 |
+
# SISTEMA DE PREDICCIÓN DE CORNERS - OPTIMIZADO PARA APUESTAS (VERSIÓN COMPLETA)
|
| 3 |
+
# ===========================
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import joblib
|
| 8 |
+
from scipy.stats import poisson
|
| 9 |
+
from scipy import stats
|
| 10 |
+
|
| 11 |
+
# ===========================
|
| 12 |
+
# 1. FUNCIONES FIABILIDAD
|
| 13 |
+
# ===========================
|
| 14 |
+
|
| 15 |
+
def analizar_fiabilidad_equipos(df_database, temporada="2526", min_partidos=5):
|
| 16 |
+
"""
|
| 17 |
+
Análisis completo de fiabilidad para apuestas de corners
|
| 18 |
+
No solo varianza, sino consistencia, tendencias y patrones
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
df_temp = df_database[df_database['season'] == temporada].copy()
|
| 22 |
+
resultados = []
|
| 23 |
+
equipos = pd.concat([df_temp['team'], df_temp['opponent']]).unique()
|
| 24 |
+
|
| 25 |
+
for equipo in equipos:
|
| 26 |
+
# Partidos del equipo
|
| 27 |
+
partidos_equipo = df_temp[df_temp['team'] == equipo]
|
| 28 |
+
|
| 29 |
+
if len(partidos_equipo) < min_partidos:
|
| 30 |
+
continue
|
| 31 |
+
|
| 32 |
+
ck_sacados = partidos_equipo['Pass Types_CK'].values
|
| 33 |
+
|
| 34 |
+
# ===========================
|
| 35 |
+
# 1. MÉTRICAS DE VARIABILIDAD
|
| 36 |
+
# ===========================
|
| 37 |
+
media = ck_sacados.mean()
|
| 38 |
+
std = ck_sacados.std()
|
| 39 |
+
cv = (std / media * 100) if media > 0 else 0
|
| 40 |
+
|
| 41 |
+
# ===========================
|
| 42 |
+
# 2. MÉTRICAS DE CONSISTENCIA
|
| 43 |
+
# ===========================
|
| 44 |
+
|
| 45 |
+
# 2.1 Porcentaje de partidos cerca de la media (±2 corners)
|
| 46 |
+
cerca_media = np.sum(np.abs(ck_sacados - media) <= 2) / len(ck_sacados) * 100
|
| 47 |
+
|
| 48 |
+
# 2.2 Rachas (detectar equipos con "explosiones" de corners)
|
| 49 |
+
cambios_bruscos = np.sum(np.abs(np.diff(ck_sacados)) > 4)
|
| 50 |
+
pct_cambios_bruscos = cambios_bruscos / (len(ck_sacados) - 1) * 100
|
| 51 |
+
|
| 52 |
+
# 2.3 Cuartiles (Q1, Q2=mediana, Q3)
|
| 53 |
+
q1, q2, q3 = np.percentile(ck_sacados, [25, 50, 75])
|
| 54 |
+
iqr = q3 - q1 # Rango intercuartílico (más robusto que std)
|
| 55 |
+
|
| 56 |
+
# ===========================
|
| 57 |
+
# 3. MÉTRICAS DE TENDENCIA
|
| 58 |
+
# ===========================
|
| 59 |
+
|
| 60 |
+
# 3.1 Tendencia lineal (¿mejora/empeora con el tiempo?)
|
| 61 |
+
jornadas = np.arange(len(ck_sacados))
|
| 62 |
+
slope, intercept, r_value, p_value, std_err = stats.linregress(jornadas, ck_sacados)
|
| 63 |
+
|
| 64 |
+
# 3.2 Autocorrelación (¿resultado actual predice el siguiente?)
|
| 65 |
+
if len(ck_sacados) > 2:
|
| 66 |
+
autocorr = np.corrcoef(ck_sacados[:-1], ck_sacados[1:])[0, 1]
|
| 67 |
+
else:
|
| 68 |
+
autocorr = 0
|
| 69 |
+
|
| 70 |
+
# ===========================
|
| 71 |
+
# 4. MÉTRICAS DE OUTLIERS
|
| 72 |
+
# ===========================
|
| 73 |
+
|
| 74 |
+
# 4.1 Detección de valores atípicos (método IQR)
|
| 75 |
+
lower_bound = q1 - 1.5 * iqr
|
| 76 |
+
upper_bound = q3 + 1.5 * iqr
|
| 77 |
+
outliers = np.sum((ck_sacados < lower_bound) | (ck_sacados > upper_bound))
|
| 78 |
+
pct_outliers = outliers / len(ck_sacados) * 100
|
| 79 |
+
|
| 80 |
+
# 4.2 Z-score máximo
|
| 81 |
+
z_scores = np.abs(stats.zscore(ck_sacados))
|
| 82 |
+
max_z = z_scores.max()
|
| 83 |
+
|
| 84 |
+
# ===========================
|
| 85 |
+
# 5. MÉTRICAS DE RANGO
|
| 86 |
+
# ===========================
|
| 87 |
+
|
| 88 |
+
rango = ck_sacados.max() - ck_sacados.min()
|
| 89 |
+
rango_normalizado = rango / media if media > 0 else 0
|
| 90 |
+
|
| 91 |
+
# ===========================
|
| 92 |
+
# 6. SCORE GLOBAL DE FIABILIDAD
|
| 93 |
+
# ===========================
|
| 94 |
+
|
| 95 |
+
# Penalizaciones (0-100, menor = peor)
|
| 96 |
+
score_cv = max(0, 100 - cv * 2) # CV alto = mala
|
| 97 |
+
score_consistencia = cerca_media # Más cerca de media = mejor
|
| 98 |
+
score_cambios = max(0, 100 - pct_cambios_bruscos * 2) # Cambios bruscos = malo
|
| 99 |
+
score_outliers = max(0, 100 - pct_outliers * 3) # Outliers = malo
|
| 100 |
+
score_iqr = max(0, 100 - iqr * 10) # IQR grande = malo
|
| 101 |
+
|
| 102 |
+
# Score final (promedio ponderado)
|
| 103 |
+
score_fiabilidad = (
|
| 104 |
+
score_cv * 0.25 +
|
| 105 |
+
score_consistencia * 0.30 +
|
| 106 |
+
score_cambios * 0.20 +
|
| 107 |
+
score_outliers * 0.15 +
|
| 108 |
+
score_iqr * 0.10
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# ===========================
|
| 112 |
+
# 7. CLASIFICACIÓN MULTI-CRITERIO
|
| 113 |
+
# ===========================
|
| 114 |
+
|
| 115 |
+
# Clasificación basada en score
|
| 116 |
+
if score_fiabilidad >= 70:
|
| 117 |
+
nivel = "EXCELENTE ⭐⭐⭐"
|
| 118 |
+
color = "#27ae60"
|
| 119 |
+
elif score_fiabilidad >= 55:
|
| 120 |
+
nivel = "BUENO ✅"
|
| 121 |
+
color = "#2ecc71"
|
| 122 |
+
elif score_fiabilidad >= 40:
|
| 123 |
+
nivel = "ACEPTABLE 🟡"
|
| 124 |
+
color = "#f39c12"
|
| 125 |
+
elif score_fiabilidad >= 25:
|
| 126 |
+
nivel = "REGULAR ⚠️"
|
| 127 |
+
color = "#e67e22"
|
| 128 |
+
else:
|
| 129 |
+
nivel = "EVITAR ⛔"
|
| 130 |
+
color = "#e74c3c"
|
| 131 |
+
|
| 132 |
+
resultados.append({
|
| 133 |
+
'Equipo': equipo,
|
| 134 |
+
'Partidos': len(ck_sacados),
|
| 135 |
+
|
| 136 |
+
# Estadísticas básicas
|
| 137 |
+
'Media_CK': round(media, 2),
|
| 138 |
+
'Mediana_CK': round(q2, 2),
|
| 139 |
+
'Std_CK': round(std, 2),
|
| 140 |
+
'CV_%': round(cv, 1),
|
| 141 |
+
|
| 142 |
+
# Consistencia
|
| 143 |
+
'Pct_Cerca_Media': round(cerca_media, 1),
|
| 144 |
+
'Cambios_Bruscos_%': round(pct_cambios_bruscos, 1),
|
| 145 |
+
'IQR': round(iqr, 2),
|
| 146 |
+
|
| 147 |
+
# Rango
|
| 148 |
+
'Rango': int(rango),
|
| 149 |
+
'Rango_Norm': round(rango_normalizado, 2),
|
| 150 |
+
'Min': int(ck_sacados.min()),
|
| 151 |
+
'Max': int(ck_sacados.max()),
|
| 152 |
+
|
| 153 |
+
# Outliers
|
| 154 |
+
'Outliers': int(outliers),
|
| 155 |
+
'Pct_Outliers': round(pct_outliers, 1),
|
| 156 |
+
'Max_ZScore': round(max_z, 2),
|
| 157 |
+
|
| 158 |
+
# Tendencia
|
| 159 |
+
'Tendencia_Slope': round(slope, 3),
|
| 160 |
+
'Autocorr': round(autocorr, 3),
|
| 161 |
+
|
| 162 |
+
# Score y clasificación
|
| 163 |
+
'Score_Fiabilidad': round(score_fiabilidad, 1),
|
| 164 |
+
'Nivel': nivel,
|
| 165 |
+
'Color': color
|
| 166 |
+
})
|
| 167 |
+
|
| 168 |
+
df_resultado = pd.DataFrame(resultados)
|
| 169 |
+
|
| 170 |
+
df_resultado = df_resultado.sort_values('Score_Fiabilidad', ascending=False)
|
| 171 |
+
|
| 172 |
+
return df_resultado
|
| 173 |
+
|
| 174 |
+
def mostrar_analisis_fiabilidad(df_analisis, top_n=10):
|
| 175 |
+
"""
|
| 176 |
+
Muestra el análisis completo de fiabilidad
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
print("\n" + "=" * 120)
|
| 180 |
+
print("🎯 ANÁLISIS DE FIABILIDAD PARA APUESTAS - CORNERS")
|
| 181 |
+
print("=" * 120)
|
| 182 |
+
|
| 183 |
+
# TOP EQUIPOS FIABLES
|
| 184 |
+
print(f"\n⭐ TOP {top_n} EQUIPOS MÁS FIABLES")
|
| 185 |
+
print("-" * 120)
|
| 186 |
+
|
| 187 |
+
top_fiables = df_analisis.head(top_n)
|
| 188 |
+
|
| 189 |
+
for idx, row in top_fiables.iterrows():
|
| 190 |
+
print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
|
| 191 |
+
print(f" 📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
|
| 192 |
+
print(f" ✅ {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
|
| 193 |
+
print(f" ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
|
| 194 |
+
print(f" 📈 Rango: {row['Min']}-{row['Max']} ({row['Rango']} corners)")
|
| 195 |
+
|
| 196 |
+
# TOP EQUIPOS NO FIABLES
|
| 197 |
+
print(f"\n\n⛔ TOP {top_n} EQUIPOS MENOS FIABLES")
|
| 198 |
+
print("-" * 120)
|
| 199 |
+
|
| 200 |
+
top_no_fiables = df_analisis.tail(top_n)
|
| 201 |
+
|
| 202 |
+
for idx, row in top_no_fiables.iterrows():
|
| 203 |
+
print(f"\n{row['Equipo']:25s} | {row['Nivel']:20s} | Score: {row['Score_Fiabilidad']:.1f}")
|
| 204 |
+
print(f" 📊 Media: {row['Media_CK']:.1f} | Mediana: {row['Mediana_CK']:.1f} | CV: {row['CV_%']:.1f}%")
|
| 205 |
+
print(f" ❌ Solo {row['Pct_Cerca_Media']:.1f}% cerca de media | IQR: {row['IQR']:.1f}")
|
| 206 |
+
print(f" ⚠️ Cambios bruscos: {row['Cambios_Bruscos_%']:.1f}% | Outliers: {row['Pct_Outliers']:.1f}%")
|
| 207 |
+
|
| 208 |
+
# ESTADÍSTICAS GENERALES
|
| 209 |
+
print(f"\n\n📊 DISTRIBUCIÓN POR NIVEL DE FIABILIDAD")
|
| 210 |
+
print("-" * 120)
|
| 211 |
+
print(df_analisis['Nivel'].value_counts())
|
| 212 |
+
|
| 213 |
+
print(f"\n📈 ESTADÍSTICAS DE SCORE:")
|
| 214 |
+
print(f" Media: {df_analisis['Score_Fiabilidad'].mean():.1f}")
|
| 215 |
+
print(f" Mediana: {df_analisis['Score_Fiabilidad'].median():.1f}")
|
| 216 |
+
print(f" Score máximo: {df_analisis['Score_Fiabilidad'].max():.1f}")
|
| 217 |
+
print(f" Score mínimo: {df_analisis['Score_Fiabilidad'].min():.1f}")
|
| 218 |
+
|
| 219 |
+
def obtener_fiabilidad_partido(local, visitante, df_analisis):
|
| 220 |
+
"""
|
| 221 |
+
Evalúa la fiabilidad de un partido específico
|
| 222 |
+
"""
|
| 223 |
+
|
| 224 |
+
datos_local = df_analisis[df_analisis['Equipo'] == local]
|
| 225 |
+
datos_away = df_analisis[df_analisis['Equipo'] == visitante]
|
| 226 |
+
|
| 227 |
+
if datos_local.empty or datos_away.empty:
|
| 228 |
+
return {
|
| 229 |
+
'fiabilidad': 'DESCONOCIDO',
|
| 230 |
+
'score': 0,
|
| 231 |
+
'mensaje': '⚠️ Datos insuficientes'
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
score_local = datos_local['Score_Fiabilidad'].values[0]
|
| 235 |
+
score_away = datos_away['Score_Fiabilidad'].values[0]
|
| 236 |
+
score_promedio = (score_local + score_away) / 2
|
| 237 |
+
|
| 238 |
+
# Clasificación del partido
|
| 239 |
+
if score_promedio >= 65:
|
| 240 |
+
fiabilidad = "MUY ALTA ⭐⭐⭐"
|
| 241 |
+
mensaje = "✅ EXCELENTE PARTIDO PARA APOSTAR"
|
| 242 |
+
elif score_promedio >= 50:
|
| 243 |
+
fiabilidad = "ALTA ✅"
|
| 244 |
+
mensaje = "✅ BUEN PARTIDO PARA APOSTAR"
|
| 245 |
+
elif score_promedio >= 35:
|
| 246 |
+
fiabilidad = "MEDIA 🟡"
|
| 247 |
+
mensaje = "🟡 APOSTAR CON PRECAUCIÓN"
|
| 248 |
+
else:
|
| 249 |
+
fiabilidad = "BAJA ⛔"
|
| 250 |
+
mensaje = "⛔ EVITAR APUESTA"
|
| 251 |
+
|
| 252 |
+
return {
|
| 253 |
+
'fiabilidad': fiabilidad,
|
| 254 |
+
'score_local': score_local,
|
| 255 |
+
'score_away': score_away,
|
| 256 |
+
'score_promedio': score_promedio,
|
| 257 |
+
'nivel_local': datos_local['Nivel'].values[0],
|
| 258 |
+
'nivel_away': datos_away['Nivel'].values[0],
|
| 259 |
+
'mensaje': mensaje,
|
| 260 |
+
|
| 261 |
+
# Datos adicionales útiles
|
| 262 |
+
'cv_local': datos_local['CV_%'].values[0],
|
| 263 |
+
'cv_away': datos_away['CV_%'].values[0],
|
| 264 |
+
'consistencia_local': datos_local['Pct_Cerca_Media'].values[0],
|
| 265 |
+
'consistencia_away': datos_away['Pct_Cerca_Media'].values[0]
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
def calcular_probabilidades_poisson(lambda_pred, rango_inferior=5, rango_superior=5):
|
| 269 |
+
"""Calcula probabilidades usando distribución de Poisson"""
|
| 270 |
+
|
| 271 |
+
valor_central = int(round(lambda_pred))
|
| 272 |
+
valores_analizar = range(
|
| 273 |
+
max(0, valor_central - rango_inferior),
|
| 274 |
+
valor_central + rango_superior + 1
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
probabilidades_exactas = {}
|
| 278 |
+
for k in valores_analizar:
|
| 279 |
+
prob = poisson.pmf(k, lambda_pred) * 100
|
| 280 |
+
probabilidades_exactas[k] = prob
|
| 281 |
+
|
| 282 |
+
# ✅ CORRECCIÓN: MISMAS LÍNEAS PARA OVER Y UNDER
|
| 283 |
+
lines = [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]
|
| 284 |
+
|
| 285 |
+
probabilidades_over = {}
|
| 286 |
+
for linea in lines:
|
| 287 |
+
prob_over = (1 - poisson.cdf(linea, lambda_pred)) * 100
|
| 288 |
+
probabilidades_over[linea] = prob_over
|
| 289 |
+
|
| 290 |
+
probabilidades_under = {}
|
| 291 |
+
for linea in lines: # ✅ CAMBIO: usar la misma lista
|
| 292 |
+
prob_under = poisson.cdf(linea, lambda_pred) * 100
|
| 293 |
+
probabilidades_under[linea] = prob_under
|
| 294 |
+
|
| 295 |
+
return {
|
| 296 |
+
'exactas': probabilidades_exactas,
|
| 297 |
+
'over': probabilidades_over,
|
| 298 |
+
'under': probabilidades_under
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
def clasificar_confianza(prob):
|
| 302 |
+
"""Clasifica la confianza según probabilidad"""
|
| 303 |
+
if prob >= 66:
|
| 304 |
+
return "ALTA ✅"
|
| 305 |
+
elif prob >= 55:
|
| 306 |
+
return "MEDIA ⚠️"
|
| 307 |
+
else:
|
| 308 |
+
return "BAJA ❌"
|
| 309 |
+
|
| 310 |
+
def get_dataframes(df, season, round_num, local, away, league=None):
|
| 311 |
+
"""Retorna 8 DataFrames filtrados por equipo, venue y liga"""
|
| 312 |
+
|
| 313 |
+
season_round = (df['season'] == season) & (df['round'] < round_num)
|
| 314 |
+
|
| 315 |
+
if league is not None:
|
| 316 |
+
season_round = season_round & (df['league'] == league)
|
| 317 |
+
|
| 318 |
+
def filter_and_split(team_filter):
|
| 319 |
+
filtered = df[season_round & team_filter].copy()
|
| 320 |
+
home = filtered[filtered['venue'] == "Home"]
|
| 321 |
+
away = filtered[filtered['venue'] == "Away"]
|
| 322 |
+
return home, away
|
| 323 |
+
|
| 324 |
+
local_home, local_away = filter_and_split(df['team'] == local)
|
| 325 |
+
local_opp_home, local_opp_away = filter_and_split(df['opponent'] == local)
|
| 326 |
+
|
| 327 |
+
away_home, away_away = filter_and_split(df['team'] == away)
|
| 328 |
+
away_opp_home, away_opp_away = filter_and_split(df['opponent'] == away)
|
| 329 |
+
|
| 330 |
+
return (local_home, local_away, local_opp_home, local_opp_away,
|
| 331 |
+
away_home, away_away, away_opp_home, away_opp_away)
|
| 332 |
+
|
| 333 |
+
def get_head_2_head(df, local, away, seasons=None, league=None):
|
| 334 |
+
"""Obtiene últimos 3 enfrentamientos directos"""
|
| 335 |
+
if seasons is None:
|
| 336 |
+
seasons = []
|
| 337 |
+
|
| 338 |
+
df_filtered = df[df['season'].isin(seasons)] if seasons else df
|
| 339 |
+
|
| 340 |
+
if league is not None:
|
| 341 |
+
df_filtered = df_filtered[df_filtered['league'] == league]
|
| 342 |
+
|
| 343 |
+
local_h2h = df_filtered[(df_filtered['team'] == local) & (df_filtered['opponent'] == away)]
|
| 344 |
+
away_h2h = df_filtered[(df_filtered['team'] == away) & (df_filtered['opponent'] == local)]
|
| 345 |
+
|
| 346 |
+
if len(local_h2h) < 4:
|
| 347 |
+
return local_h2h.tail(2), away_h2h.tail(2)
|
| 348 |
+
|
| 349 |
+
return local_h2h.tail(3), away_h2h.tail(3)
|
| 350 |
+
|
| 351 |
+
def get_average(df, is_team=False, lst_avg=None):
|
| 352 |
+
"""Calcula promedios de estadísticas (VERSIÓN COMPLETA)"""
|
| 353 |
+
|
| 354 |
+
if len(df) == 0:
|
| 355 |
+
if is_team:
|
| 356 |
+
# ✅ Retornar 23 valores (métricas avanzadas)
|
| 357 |
+
return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
| 358 |
+
return (0, 0, 0, 0, 0, 0, 0, 0, 0)
|
| 359 |
+
|
| 360 |
+
if is_team:
|
| 361 |
+
# ===========================
|
| 362 |
+
# ESTADÍSTICAS BÁSICAS (NORMALIZADAS)
|
| 363 |
+
# ===========================
|
| 364 |
+
avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3]
|
| 365 |
+
avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4]
|
| 366 |
+
avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2]
|
| 367 |
+
avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1]
|
| 368 |
+
|
| 369 |
+
# ✅ VARIANZA DE CORNERS
|
| 370 |
+
var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
|
| 371 |
+
avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8]
|
| 372 |
+
|
| 373 |
+
avg_poss = (df['Poss'].sum() / len(df)) - 50
|
| 374 |
+
avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5]
|
| 375 |
+
avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6]
|
| 376 |
+
|
| 377 |
+
# ===========================
|
| 378 |
+
# MÉTRICAS OFENSIVAS AVANZADAS
|
| 379 |
+
# ===========================
|
| 380 |
+
total_sh = df['Standard_Sh'].sum()
|
| 381 |
+
sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0
|
| 382 |
+
xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0
|
| 383 |
+
|
| 384 |
+
total_touches = df['Touches_Touches'].sum()
|
| 385 |
+
attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0
|
| 386 |
+
|
| 387 |
+
total_poss = df['Poss'].sum()
|
| 388 |
+
possession_shot = (total_sh / total_poss) if total_poss > 0 else 0
|
| 389 |
+
|
| 390 |
+
# ===========================
|
| 391 |
+
# MÉTRICAS DE CREACIÓN
|
| 392 |
+
# ===========================
|
| 393 |
+
total_passes = df['Total_Att'].sum()
|
| 394 |
+
progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0
|
| 395 |
+
final_third_involvement = (df['1/3'].sum() / total_passes) if total_passes > 0 else 0
|
| 396 |
+
|
| 397 |
+
total_sca = df['SCA Types_SCA'].sum()
|
| 398 |
+
assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0
|
| 399 |
+
creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0
|
| 400 |
+
|
| 401 |
+
# ===========================
|
| 402 |
+
# MÉTRICAS DEFENSIVAS
|
| 403 |
+
# ===========================
|
| 404 |
+
total_tackles = df['Tackles_Tkl'].sum()
|
| 405 |
+
high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0
|
| 406 |
+
interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0
|
| 407 |
+
|
| 408 |
+
total_defensive_actions = total_tackles + df['Int'].sum()
|
| 409 |
+
clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0
|
| 410 |
+
|
| 411 |
+
# ===========================
|
| 412 |
+
# MÉTRICAS DE POSESIÓN
|
| 413 |
+
# ===========================
|
| 414 |
+
total_carries = df['Carries_Carries'].sum()
|
| 415 |
+
progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0
|
| 416 |
+
|
| 417 |
+
total_prog_passes = df['PrgP'].sum()
|
| 418 |
+
carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0
|
| 419 |
+
|
| 420 |
+
# ===========================
|
| 421 |
+
# ÍNDICES COMPUESTOS
|
| 422 |
+
# ===========================
|
| 423 |
+
avg_gf_raw = df['GF'].mean()
|
| 424 |
+
avg_xg_raw = df['Expected_xG'].mean()
|
| 425 |
+
avg_sot = df['Standard_SoT'].mean()
|
| 426 |
+
avg_sh = df['Standard_Sh'].mean()
|
| 427 |
+
offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0
|
| 428 |
+
|
| 429 |
+
avg_prgp = df['PrgP'].mean()
|
| 430 |
+
avg_prgc = df['Carries_PrgC'].mean()
|
| 431 |
+
avg_poss_raw = df['Poss'].mean()
|
| 432 |
+
transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0
|
| 433 |
+
|
| 434 |
+
# ✅ RETORNAR 23 VALORES
|
| 435 |
+
return (
|
| 436 |
+
avg_ck, # 0
|
| 437 |
+
var_ck, # 1 - ✅ NUEVO
|
| 438 |
+
avg_xg, # 2
|
| 439 |
+
avg_sca, # 3
|
| 440 |
+
avg_cross, # 4
|
| 441 |
+
avg_poss, # 5
|
| 442 |
+
avg_att_3rd, # 6
|
| 443 |
+
avg_gf, # 7
|
| 444 |
+
avg_ga, # 8
|
| 445 |
+
sh_accuracy, # 9
|
| 446 |
+
xg_shot, # 10
|
| 447 |
+
attacking_presence, # 11
|
| 448 |
+
possession_shot, # 12
|
| 449 |
+
progressive_pass_ratio, # 13
|
| 450 |
+
final_third_involvement, # 14
|
| 451 |
+
assist_sca, # 15
|
| 452 |
+
creative_efficiency, # 16
|
| 453 |
+
high_press_intensity, # 17
|
| 454 |
+
interception_tackle, # 18
|
| 455 |
+
clearance_ratio, # 19
|
| 456 |
+
progressive_carry_ratio, # 20
|
| 457 |
+
carry_pass_balance, # 21
|
| 458 |
+
offensive_index, # 22
|
| 459 |
+
transition_index # 23
|
| 460 |
+
)
|
| 461 |
+
|
| 462 |
+
# ===========================
|
| 463 |
+
# PROMEDIOS DE LIGA (is_team=False)
|
| 464 |
+
# ===========================
|
| 465 |
+
avg_cross = df['Performance_Crs'].mean()
|
| 466 |
+
avg_att_3rd = df['Touches_Att 3rd'].mean()
|
| 467 |
+
avg_sca = df['SCA Types_SCA'].mean()
|
| 468 |
+
avg_xg = df['Expected_xG'].mean()
|
| 469 |
+
var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
|
| 470 |
+
avg_ck = df['Pass Types_CK'].mean()
|
| 471 |
+
avg_gf = df['GF'].mean()
|
| 472 |
+
avg_ga = df['GA'].mean()
|
| 473 |
+
avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0
|
| 474 |
+
|
| 475 |
+
return (
|
| 476 |
+
var_ck, # 0
|
| 477 |
+
avg_xg, # 1
|
| 478 |
+
avg_sca, # 2
|
| 479 |
+
avg_cross, # 3
|
| 480 |
+
avg_att_3rd, # 4
|
| 481 |
+
avg_gf, # 5
|
| 482 |
+
avg_ga, # 6
|
| 483 |
+
avg_sh, # 7
|
| 484 |
+
avg_ck # 8
|
| 485 |
+
)
|
| 486 |
+
|
| 487 |
+
def get_points_from_result(result):
|
| 488 |
+
"""Convierte resultado (W/D/L) a puntos"""
|
| 489 |
+
if result == 'W':
|
| 490 |
+
return 3
|
| 491 |
+
elif result == 'D':
|
| 492 |
+
return 1
|
| 493 |
+
else:
|
| 494 |
+
return 0
|
| 495 |
+
|
| 496 |
+
def get_team_ppp(df, team, season, round_num, league=None):
|
| 497 |
+
"""Calcula puntos por partido (PPP) de un equipo"""
|
| 498 |
+
team_matches = df[
|
| 499 |
+
(df['team'] == team) &
|
| 500 |
+
(df['season'] == season) &
|
| 501 |
+
(df['round'] < round_num)
|
| 502 |
+
]
|
| 503 |
+
|
| 504 |
+
if league is not None:
|
| 505 |
+
team_matches = team_matches[team_matches['league'] == league]
|
| 506 |
+
|
| 507 |
+
if len(team_matches) == 0:
|
| 508 |
+
return 0.0
|
| 509 |
+
|
| 510 |
+
total_points = team_matches['result'].apply(get_points_from_result).sum()
|
| 511 |
+
ppp = total_points / len(team_matches)
|
| 512 |
+
|
| 513 |
+
return ppp
|
| 514 |
+
|
| 515 |
+
def get_ppp_difference(df, local, away, season, round_num, league=None):
|
| 516 |
+
"""Calcula diferencia de PPP entre local y visitante"""
|
| 517 |
+
local_ppp = get_team_ppp(df, local, season, round_num, league)
|
| 518 |
+
away_ppp = get_team_ppp(df, away, season, round_num, league)
|
| 519 |
+
return local_ppp - away_ppp
|
| 520 |
+
|
| 521 |
+
def predecir_corners(local, visitante, jornada, temporada="2526", league_code="ESP",df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
|
| 522 |
+
"""
|
| 523 |
+
Predice corners totales con análisis completo para apuestas
|
| 524 |
+
|
| 525 |
+
Args:
|
| 526 |
+
local: Equipo local
|
| 527 |
+
visitante: Equipo visitante
|
| 528 |
+
jornada: Número de jornada
|
| 529 |
+
temporada: Temporada (formato "2526")
|
| 530 |
+
league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
|
| 531 |
+
"""
|
| 532 |
+
|
| 533 |
+
print(f"\n{'='*80}")
|
| 534 |
+
print(f"🏟️ {local} vs {visitante}")
|
| 535 |
+
print(f"📅 Temporada {temporada} | Jornada {jornada} | Liga: {league_code}")
|
| 536 |
+
print(f"{'='*80}")
|
| 537 |
+
|
| 538 |
+
if jornada < 5:
|
| 539 |
+
return {
|
| 540 |
+
"error": "❌ Se necesitan al menos 5 jornadas previas",
|
| 541 |
+
"prediccion": None
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
try:
|
| 545 |
+
# ===========================
|
| 546 |
+
# EXTRAER FEATURES (igual que antes)
|
| 547 |
+
# ===========================
|
| 548 |
+
|
| 549 |
+
lst_avg = get_average(
|
| 550 |
+
df_database[
|
| 551 |
+
(df_database['season'] == temporada) &
|
| 552 |
+
(df_database['round'] < jornada) &
|
| 553 |
+
(df_database['league'] == league_code)
|
| 554 |
+
],
|
| 555 |
+
is_team=False
|
| 556 |
+
)
|
| 557 |
+
|
| 558 |
+
(team1_home, team1_away, team1_opp_home, team1_opp_away,
|
| 559 |
+
team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes(
|
| 560 |
+
df_database, temporada, jornada, local, visitante, league=league_code
|
| 561 |
+
)
|
| 562 |
+
|
| 563 |
+
index = lst_years.index(temporada)
|
| 564 |
+
result = lst_years[:index+1]
|
| 565 |
+
team1_h2h, team2_h2h = get_head_2_head(
|
| 566 |
+
df_database, local, visitante, seasons=result, league=league_code
|
| 567 |
+
)
|
| 568 |
+
|
| 569 |
+
local_ppp = get_team_ppp(df_database, local, temporada, jornada, league=league_code)
|
| 570 |
+
away_ppp = get_team_ppp(df_database, visitante, temporada, jornada, league=league_code)
|
| 571 |
+
ppp_diff = local_ppp - away_ppp
|
| 572 |
+
|
| 573 |
+
# ===========================
|
| 574 |
+
# CONSTRUIR DICCIONARIO DE FEATURES (igual que antes)
|
| 575 |
+
# ===========================
|
| 576 |
+
|
| 577 |
+
def create_line(df, is_form=True, is_team=False, use_advanced=True):
|
| 578 |
+
if is_form:
|
| 579 |
+
df = df[-6:]
|
| 580 |
+
if use_advanced:
|
| 581 |
+
return get_average(df, is_team, lst_avg)
|
| 582 |
+
else:
|
| 583 |
+
result = get_average(df, is_team, lst_avg)
|
| 584 |
+
return result[:9]
|
| 585 |
+
|
| 586 |
+
dic_features = {}
|
| 587 |
+
|
| 588 |
+
dic_features['ppp_local'] = (local_ppp,)
|
| 589 |
+
dic_features['ppp_away'] = (away_ppp,)
|
| 590 |
+
dic_features['ppp_difference'] = (ppp_diff,)
|
| 591 |
+
|
| 592 |
+
dic_features['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True)
|
| 593 |
+
dic_features['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True)
|
| 594 |
+
dic_features['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True)
|
| 595 |
+
dic_features['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True)
|
| 596 |
+
|
| 597 |
+
dic_features['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True)
|
| 598 |
+
dic_features['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True)
|
| 599 |
+
dic_features['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True)
|
| 600 |
+
dic_features['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True)
|
| 601 |
+
|
| 602 |
+
dic_features['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True)
|
| 603 |
+
dic_features['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True)
|
| 604 |
+
|
| 605 |
+
dic_features['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False)
|
| 606 |
+
dic_features['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False)
|
| 607 |
+
|
| 608 |
+
league_dummies = {
|
| 609 |
+
'league_ESP': 1 if league_code == 'ESP' else 0,
|
| 610 |
+
'league_GER': 1 if league_code == 'GER' else 0,
|
| 611 |
+
'league_FRA': 1 if league_code == 'FRA' else 0,
|
| 612 |
+
'league_ITA': 1 if league_code == 'ITA' else 0,
|
| 613 |
+
'league_NED': 1 if league_code == 'NED' else 0,
|
| 614 |
+
'league_ENG': 1 if league_code == 'ENG' else 0,
|
| 615 |
+
'league_POR': 1 if league_code == 'POR' else 0,
|
| 616 |
+
'league_BEL': 1 if league_code == 'BEL' else 0
|
| 617 |
+
}
|
| 618 |
+
|
| 619 |
+
for key, value in league_dummies.items():
|
| 620 |
+
dic_features[key] = (value,)
|
| 621 |
+
|
| 622 |
+
# ===========================
|
| 623 |
+
# CONSTRUIR VECTOR DE FEATURES
|
| 624 |
+
# ===========================
|
| 625 |
+
|
| 626 |
+
lst_base_advanced = [
|
| 627 |
+
"avg_ck", "var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga",
|
| 628 |
+
"sh_accuracy", "xg_shot", "attacking_presence", "possession_shot",
|
| 629 |
+
"progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency",
|
| 630 |
+
"high_press_intensity", "interception_tackle", "clearance_ratio",
|
| 631 |
+
"progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index"
|
| 632 |
+
]
|
| 633 |
+
|
| 634 |
+
lst_base_original = [
|
| 635 |
+
"var_ck", "xg", "sca", "cross", "poss", "att_3rd", "gf", "ga", "avg_ck"
|
| 636 |
+
]
|
| 637 |
+
|
| 638 |
+
lst_features_values = []
|
| 639 |
+
lst_features_names = []
|
| 640 |
+
|
| 641 |
+
for key in dic_features:
|
| 642 |
+
lst_features_values.extend(list(dic_features[key]))
|
| 643 |
+
|
| 644 |
+
if key in ['ppp_local', 'ppp_away', 'ppp_difference']:
|
| 645 |
+
lst_features_names.append(key)
|
| 646 |
+
elif key.startswith('league_'):
|
| 647 |
+
lst_features_names.append(key)
|
| 648 |
+
elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']:
|
| 649 |
+
lst_features_names.extend([f"{key}_{col}" for col in lst_base_original])
|
| 650 |
+
else:
|
| 651 |
+
lst_features_names.extend([f"{key}_{col}" for col in lst_base_advanced])
|
| 652 |
+
|
| 653 |
+
df_input = pd.DataFrame([lst_features_values], columns=lst_features_names)
|
| 654 |
+
|
| 655 |
+
expected_features = scaler.feature_names_in_
|
| 656 |
+
|
| 657 |
+
if len(df_input.columns) != len(expected_features):
|
| 658 |
+
print(f"\n⚠️ ERROR: Número de features no coincide")
|
| 659 |
+
print(f" Esperadas: {len(expected_features)}")
|
| 660 |
+
print(f" Recibidas: {len(df_input.columns)}")
|
| 661 |
+
return {"error": "Desajuste de features", "prediccion": None}
|
| 662 |
+
|
| 663 |
+
df_input = df_input[expected_features]
|
| 664 |
+
|
| 665 |
+
X_input_scaled = pd.DataFrame(
|
| 666 |
+
scaler.transform(df_input),
|
| 667 |
+
columns=df_input.columns
|
| 668 |
+
)
|
| 669 |
+
|
| 670 |
+
# ===========================
|
| 671 |
+
# PREDICCIÓN
|
| 672 |
+
# ===========================
|
| 673 |
+
|
| 674 |
+
prediccion = xgb_model.predict(X_input_scaled)[0]
|
| 675 |
+
|
| 676 |
+
# ===========================
|
| 677 |
+
# ✅ ANÁLISIS PROBABILÍSTICO CON POISSON
|
| 678 |
+
# ===========================
|
| 679 |
+
|
| 680 |
+
analisis = calcular_probabilidades_poisson(prediccion, rango_inferior=5, rango_superior=5)
|
| 681 |
+
|
| 682 |
+
# ===========================
|
| 683 |
+
# ESTADÍSTICAS DETALLADAS
|
| 684 |
+
# ===========================
|
| 685 |
+
|
| 686 |
+
local_ck_home = team1_home['Pass Types_CK'].mean() if len(team1_home) > 0 else 0
|
| 687 |
+
local_xg_home = team1_home['Expected_xG'].mean() if len(team1_home) > 0 else 0
|
| 688 |
+
local_poss_home = team1_home['Poss'].mean() if len(team1_home) > 0 else 0
|
| 689 |
+
|
| 690 |
+
away_ck_away = team2_away['Pass Types_CK'].mean() if len(team2_away) > 0 else 0
|
| 691 |
+
away_xg_away = team2_away['Expected_xG'].mean() if len(team2_away) > 0 else 0
|
| 692 |
+
away_poss_away = team2_away['Poss'].mean() if len(team2_away) > 0 else 0
|
| 693 |
+
|
| 694 |
+
local_ck_received = team1_opp_home['Pass Types_CK'].mean() if len(team1_opp_home) > 0 else 0
|
| 695 |
+
away_ck_received = team2_opp_away['Pass Types_CK'].mean() if len(team2_opp_away) > 0 else 0
|
| 696 |
+
|
| 697 |
+
partido_ck_esperado = local_ck_home + away_ck_away
|
| 698 |
+
|
| 699 |
+
h2h_ck_local = team1_h2h['Pass Types_CK'].mean() if len(team1_h2h) > 0 else 0
|
| 700 |
+
h2h_ck_away = team2_h2h['Pass Types_CK'].mean() if len(team2_h2h) > 0 else 0
|
| 701 |
+
h2h_total = h2h_ck_local + h2h_ck_away
|
| 702 |
+
|
| 703 |
+
# ===========================
|
| 704 |
+
# ✅ MOSTRAR RESULTADOS CON PROBABILIDADES
|
| 705 |
+
# ===========================
|
| 706 |
+
|
| 707 |
+
print(f"\n🎲 PREDICCIÓN MODELO: {prediccion:.2f} corners totales")
|
| 708 |
+
print(f" PPP: {local} ({local_ppp:.2f}) vs {visitante} ({away_ppp:.2f}) | Diff: {ppp_diff:+.2f}")
|
| 709 |
+
|
| 710 |
+
print(f"\n📊 ESTADÍSTICAS HISTÓRICAS:")
|
| 711 |
+
print(f" {local} (Casa): {local_ck_home:.1f} CK/partido | xG: {local_xg_home:.2f} | Poss: {local_poss_home:.1f}%")
|
| 712 |
+
print(f" {visitante} (Fuera): {away_ck_away:.1f} CK/partido | xG: {away_xg_away:.2f} | Poss: {away_poss_away:.1f}%")
|
| 713 |
+
print(f" Corners recibidos: {local} ({local_ck_received:.1f}) | {visitante} ({away_ck_received:.1f})")
|
| 714 |
+
print(f" Total esperado (suma): {partido_ck_esperado:.1f} corners")
|
| 715 |
+
|
| 716 |
+
if len(team1_h2h) > 0 or len(team2_h2h) > 0:
|
| 717 |
+
print(f"\n🔄 HEAD TO HEAD (últimos {max(len(team1_h2h), len(team2_h2h))} partidos):")
|
| 718 |
+
print(f" {local}: {h2h_ck_local:.1f} CK/partido")
|
| 719 |
+
print(f" {visitante}: {h2h_ck_away:.1f} CK/partido")
|
| 720 |
+
print(f" Promedio total: {h2h_total:.1f} corners")
|
| 721 |
+
|
| 722 |
+
# ===========================
|
| 723 |
+
# ✅ MOSTRAR PROBABILIDADES EXACTAS
|
| 724 |
+
# ===========================
|
| 725 |
+
|
| 726 |
+
valor_mas_probable = max(analisis['exactas'].items(), key=lambda x: x[1])
|
| 727 |
+
|
| 728 |
+
print(f"\n📈 PROBABILIDADES EXACTAS (Poisson):")
|
| 729 |
+
for k in sorted(analisis['exactas'].keys()):
|
| 730 |
+
prob = analisis['exactas'][k]
|
| 731 |
+
bar = '█' * int(prob / 2)
|
| 732 |
+
marca = ' ⭐' if k == valor_mas_probable[0] else ''
|
| 733 |
+
print(f" {k:2d} corners: {prob:5.2f}% {bar}{marca}")
|
| 734 |
+
|
| 735 |
+
print(f"\n✅ Valor más probable: {valor_mas_probable[0]} corners ({valor_mas_probable[1]:.2f}%)")
|
| 736 |
+
|
| 737 |
+
# ✅ RANGO DE 80% CONFIANZA
|
| 738 |
+
probs_sorted = sorted(analisis['exactas'].items(), key=lambda x: x[1], reverse=True)
|
| 739 |
+
cumsum = 0
|
| 740 |
+
rango_80 = []
|
| 741 |
+
for val, prob in probs_sorted:
|
| 742 |
+
cumsum += prob
|
| 743 |
+
rango_80.append(val)
|
| 744 |
+
if cumsum >= 80:
|
| 745 |
+
break
|
| 746 |
+
|
| 747 |
+
print(f"📊 Rango 80% confianza: {min(rango_80)}-{max(rango_80)} corners")
|
| 748 |
+
|
| 749 |
+
# ===========================
|
| 750 |
+
# ✅ MOSTRAR OVER/UNDER CON CUOTAS IMPLÍCITAS
|
| 751 |
+
# ===========================
|
| 752 |
+
|
| 753 |
+
print(f"\n🎯 ANÁLISIS OVER/UNDER:")
|
| 754 |
+
print(f"{'Línea':<10} {'Prob Over':<12} {'Cuota Impl':<12} {'Confianza':<15} {'Prob Under':<12} {'Cuota Impl':<12}")
|
| 755 |
+
print("-" * 85)
|
| 756 |
+
|
| 757 |
+
for linea in [7.5, 8.5, 9.5, 10.5, 11.5, 12.5]:
|
| 758 |
+
prob_over = analisis['over'][linea]
|
| 759 |
+
prob_under = analisis['under'][linea]
|
| 760 |
+
|
| 761 |
+
# Cuotas implícitas (inverso de probabilidad en decimal)
|
| 762 |
+
cuota_impl_over = 100 / prob_over if prob_over > 0 else 999
|
| 763 |
+
cuota_impl_under = 100 / prob_under if prob_under > 0 else 999
|
| 764 |
+
|
| 765 |
+
conf_over = clasificar_confianza(prob_over)
|
| 766 |
+
|
| 767 |
+
print(f"O/U {linea:<5} {prob_over:6.2f}% @{cuota_impl_over:5.2f} {conf_over:<15} {prob_under:6.2f}% @{cuota_impl_under:5.2f}")
|
| 768 |
+
|
| 769 |
+
# ===========================
|
| 770 |
+
# ✅ RECOMENDACIONES CON CUOTAS
|
| 771 |
+
# ===========================
|
| 772 |
+
|
| 773 |
+
print(f"\n💡 RECOMENDACIONES DE APUESTA:")
|
| 774 |
+
|
| 775 |
+
mejores_over = [(l, p) for l, p in analisis['over'].items() if p >= 55]
|
| 776 |
+
mejores_under = [(l, p) for l, p in analisis['under'].items() if p >= 55]
|
| 777 |
+
|
| 778 |
+
if mejores_over:
|
| 779 |
+
print(f"\n✅ OVER con confianza MEDIA/ALTA:")
|
| 780 |
+
for linea, prob in sorted(mejores_over, key=lambda x: x[1], reverse=True):
|
| 781 |
+
cuota_impl = 100 / prob
|
| 782 |
+
conf = clasificar_confianza(prob)
|
| 783 |
+
print(f" • Over {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
|
| 784 |
+
|
| 785 |
+
if mejores_under:
|
| 786 |
+
print(f"\n✅ UNDER con confianza MEDIA/ALTA:")
|
| 787 |
+
for linea, prob in sorted(mejores_under, key=lambda x: x[1], reverse=True):
|
| 788 |
+
cuota_impl = 100 / prob
|
| 789 |
+
conf = clasificar_confianza(prob)
|
| 790 |
+
print(f" • Under {linea}: {prob:.2f}% (Cuota justa: @{cuota_impl:.2f}) - {conf}")
|
| 791 |
+
|
| 792 |
+
if not mejores_over and not mejores_under:
|
| 793 |
+
print(f" ⚠️ No hay apuestas con confianza MEDIA o superior")
|
| 794 |
+
|
| 795 |
+
# ===========================
|
| 796 |
+
# ✅ ANÁLISIS DE RIESGO
|
| 797 |
+
# ===========================
|
| 798 |
+
|
| 799 |
+
df_varianza_temp = analizar_fiabilidad_equipos(df_database, temporada=temporada, min_partidos=3)
|
| 800 |
+
riesgo = obtener_fiabilidad_partido(local, visitante, df_varianza_temp)
|
| 801 |
+
|
| 802 |
+
print(f"\n⚠️ ANÁLISIS DE RIESGO:")
|
| 803 |
+
print(f" Local ({local}): {riesgo['nivel_local']} (CV: {riesgo['cv_local']:.1f}%)")
|
| 804 |
+
print(f" Away ({visitante}): {riesgo['nivel_away']} (CV: {riesgo['cv_away']:.1f}%)")
|
| 805 |
+
print(f" 🎲 FIABILIDAD PARTIDO: {riesgo['fiabilidad']} (Score: {riesgo['score_promedio']:.1f})")
|
| 806 |
+
print(f" 💡 {riesgo['mensaje']}")
|
| 807 |
+
|
| 808 |
+
# ===========================
|
| 809 |
+
# RETORNAR DICCIONARIO COMPLETO
|
| 810 |
+
# ===========================
|
| 811 |
+
|
| 812 |
+
return {
|
| 813 |
+
"prediccion": round(prediccion, 2),
|
| 814 |
+
"local": local,
|
| 815 |
+
"visitante": visitante,
|
| 816 |
+
"ppp_local": local_ppp,
|
| 817 |
+
"ppp_away": away_ppp,
|
| 818 |
+
"ppp_diff": ppp_diff,
|
| 819 |
+
"riesgo": riesgo,
|
| 820 |
+
"stats": {
|
| 821 |
+
"local_ck": local_ck_home,
|
| 822 |
+
"away_ck": away_ck_away,
|
| 823 |
+
"local_ck_received": local_ck_received,
|
| 824 |
+
"away_ck_received": away_ck_received,
|
| 825 |
+
"h2h_total": h2h_total,
|
| 826 |
+
"partido_esperado": partido_ck_esperado
|
| 827 |
+
},
|
| 828 |
+
"probabilidades_exactas": analisis['exactas'],
|
| 829 |
+
"probabilidades_over": analisis['over'],
|
| 830 |
+
"probabilidades_under": analisis['under'],
|
| 831 |
+
"valor_mas_probable": valor_mas_probable[0],
|
| 832 |
+
"prob_mas_probable": valor_mas_probable[1],
|
| 833 |
+
"rango_80": (min(rango_80), max(rango_80))
|
| 834 |
+
}
|
| 835 |
+
|
| 836 |
+
except Exception as e:
|
| 837 |
+
print(f"\n❌ ERROR: {str(e)}")
|
| 838 |
+
import traceback
|
| 839 |
+
traceback.print_exc()
|
| 840 |
+
return {"error": str(e), "prediccion": None}
|
| 841 |
+
|
| 842 |
+
def predecir_partidos_batch(partidos, jornada, temporada="2526", league_code="ESP", export_csv=True, filename=None,df_database=pd.DataFrame(),xgb_model="",scaler="",lst_years=[]):
|
| 843 |
+
"""
|
| 844 |
+
Predice corners para múltiples partidos y exporta resultados a CSV
|
| 845 |
+
|
| 846 |
+
Args:
|
| 847 |
+
partidos: Lista de tuplas [(local1, visitante1), (local2, visitante2), ...]
|
| 848 |
+
jornada: Número de jornada
|
| 849 |
+
temporada: Temporada (formato "2526")
|
| 850 |
+
league_code: Código de liga ("ESP", "GER", "FRA", "ITA", "NED")
|
| 851 |
+
export_csv: Si True, exporta a CSV
|
| 852 |
+
filename: Nombre del archivo CSV (opcional)
|
| 853 |
+
|
| 854 |
+
Returns:
|
| 855 |
+
DataFrame con todos los resultados
|
| 856 |
+
"""
|
| 857 |
+
|
| 858 |
+
resultados = []
|
| 859 |
+
|
| 860 |
+
print("\n" + "=" * 120)
|
| 861 |
+
print(f"🎯 PROCESANDO {len(partidos)} PARTIDOS - {league_code} | J{jornada} | Temporada {temporada}")
|
| 862 |
+
print("=" * 120)
|
| 863 |
+
|
| 864 |
+
for idx, (local, visitante) in enumerate(partidos, 1):
|
| 865 |
+
print(f"\n[{idx}/{len(partidos)}] Procesando: {local} vs {visitante}...")
|
| 866 |
+
|
| 867 |
+
resultado = predecir_corners(
|
| 868 |
+
local=local,
|
| 869 |
+
visitante=visitante,
|
| 870 |
+
jornada=jornada,
|
| 871 |
+
temporada=temporada,
|
| 872 |
+
league_code=league_code,
|
| 873 |
+
df_database=df_database,
|
| 874 |
+
xgb_model=xgb_model,
|
| 875 |
+
scaler=scaler,
|
| 876 |
+
lst_years=lst_years)
|
| 877 |
+
|
| 878 |
+
|
| 879 |
+
if resultado.get("error"):
|
| 880 |
+
print(f" ❌ Error: {resultado['error']}")
|
| 881 |
+
continue
|
| 882 |
+
|
| 883 |
+
# ===========================
|
| 884 |
+
# CONSTRUIR FILA DE DATOS
|
| 885 |
+
# ===========================
|
| 886 |
+
|
| 887 |
+
fila = {
|
| 888 |
+
'Partido': f"{local} vs {visitante}",
|
| 889 |
+
'Local': local,
|
| 890 |
+
'Visitante': visitante,
|
| 891 |
+
'Liga': league_code,
|
| 892 |
+
'Jornada': jornada,
|
| 893 |
+
'Temporada': temporada,
|
| 894 |
+
|
| 895 |
+
# Predicción
|
| 896 |
+
'Prediccion': resultado['prediccion'],
|
| 897 |
+
'Valor_Mas_Probable': resultado['valor_mas_probable'],
|
| 898 |
+
'Prob_Valor_Mas_Probable_%': round(resultado['prob_mas_probable'], 2),
|
| 899 |
+
'Rango_80%_Min': resultado['rango_80'][0],
|
| 900 |
+
'Rango_80%_Max': resultado['rango_80'][1],
|
| 901 |
+
|
| 902 |
+
# PPP
|
| 903 |
+
'PPP_Local': round(resultado['ppp_local'], 2),
|
| 904 |
+
'PPP_Away': round(resultado['ppp_away'], 2),
|
| 905 |
+
'PPP_Diferencia': round(resultado['ppp_diff'], 2),
|
| 906 |
+
|
| 907 |
+
# Estadísticas históricas
|
| 908 |
+
'CK_Local_Casa': round(resultado['stats']['local_ck'], 1),
|
| 909 |
+
'CK_Away_Fuera': round(resultado['stats']['away_ck'], 1),
|
| 910 |
+
'CK_Local_Recibidos': round(resultado['stats']['local_ck_received'], 1),
|
| 911 |
+
'CK_Away_Recibidos': round(resultado['stats']['away_ck_received'], 1),
|
| 912 |
+
'CK_Esperado_Suma': round(resultado['stats']['partido_esperado'], 1),
|
| 913 |
+
'CK_H2H_Total': round(resultado['stats']['h2h_total'], 1) if resultado['stats']['h2h_total'] > 0 else 'N/A',
|
| 914 |
+
|
| 915 |
+
# Riesgo
|
| 916 |
+
'Fiabilidad_Partido': resultado['riesgo']['fiabilidad'],
|
| 917 |
+
'Score_Fiabilidad': round(resultado['riesgo']['score_promedio'], 1),
|
| 918 |
+
'Nivel_Local': resultado['riesgo']['nivel_local'],
|
| 919 |
+
'Nivel_Away': resultado['riesgo']['nivel_away'],
|
| 920 |
+
'CV_Local_%': round(resultado['riesgo']['cv_local'], 1),
|
| 921 |
+
'CV_Away_%': round(resultado['riesgo']['cv_away'], 1),
|
| 922 |
+
}
|
| 923 |
+
|
| 924 |
+
# ===========================
|
| 925 |
+
# OVER 6.5 a 10.5
|
| 926 |
+
# ===========================
|
| 927 |
+
for linea in [6.5, 7.5, 8.5, 9.5, 10.5]:
|
| 928 |
+
prob = resultado['probabilidades_over'].get(linea, 0)
|
| 929 |
+
cuota_impl = round(100 / prob, 2) if prob > 0 else 999
|
| 930 |
+
conf = clasificar_confianza(prob)
|
| 931 |
+
|
| 932 |
+
fila[f'Over_{linea}_Prob_%'] = round(prob, 2)
|
| 933 |
+
fila[f'Over_{linea}_Cuota'] = cuota_impl
|
| 934 |
+
fila[f'Over_{linea}_Confianza'] = conf
|
| 935 |
+
|
| 936 |
+
# ===========================
|
| 937 |
+
# UNDER 12.5 a 9.5
|
| 938 |
+
# ===========================
|
| 939 |
+
for linea in [12.5, 11.5, 10.5, 9.5]:
|
| 940 |
+
prob = resultado['probabilidades_under'].get(linea, 0)
|
| 941 |
+
cuota_impl = round(100 / prob, 2) if prob > 0 else 999
|
| 942 |
+
conf = clasificar_confianza(prob)
|
| 943 |
+
|
| 944 |
+
fila[f'Under_{linea}_Prob_%'] = round(prob, 2)
|
| 945 |
+
fila[f'Under_{linea}_Cuota'] = cuota_impl
|
| 946 |
+
fila[f'Under_{linea}_Confianza'] = conf
|
| 947 |
+
|
| 948 |
+
# ===========================
|
| 949 |
+
# RECOMENDACIONES
|
| 950 |
+
# ===========================
|
| 951 |
+
|
| 952 |
+
mejores_over = [(l, p) for l, p in resultado['probabilidades_over'].items() if p >= 55]
|
| 953 |
+
mejores_under = [(l, p) for l, p in resultado['probabilidades_under'].items() if p >= 55]
|
| 954 |
+
|
| 955 |
+
if resultado['riesgo']['score_promedio'] < 35:
|
| 956 |
+
fila['Recomendacion'] = "⛔ EVITAR - Baja fiabilidad"
|
| 957 |
+
fila['Es_Apostable'] = "NO"
|
| 958 |
+
elif not mejores_over and not mejores_under:
|
| 959 |
+
fila['Recomendacion'] = "⚠️ NO RECOMENDADO - Sin confianza suficiente"
|
| 960 |
+
fila['Es_Apostable'] = "NO"
|
| 961 |
+
else:
|
| 962 |
+
recomendaciones = []
|
| 963 |
+
|
| 964 |
+
if mejores_over:
|
| 965 |
+
mejor_over = max(mejores_over, key=lambda x: x[1])
|
| 966 |
+
cuota_over = round(100 / mejor_over[1], 2)
|
| 967 |
+
recomendaciones.append(f"Over {mejor_over[0]} ({mejor_over[1]:.1f}% @{cuota_over})")
|
| 968 |
+
|
| 969 |
+
if mejores_under:
|
| 970 |
+
mejor_under = max(mejores_under, key=lambda x: x[1])
|
| 971 |
+
cuota_under = round(100 / mejor_under[1], 2)
|
| 972 |
+
recomendaciones.append(f"Under {mejor_under[0]} ({mejor_under[1]:.1f}% @{cuota_under})")
|
| 973 |
+
|
| 974 |
+
fila['Recomendacion'] = " | ".join(recomendaciones)
|
| 975 |
+
|
| 976 |
+
if resultado['riesgo']['score_promedio'] >= 65:
|
| 977 |
+
fila['Es_Apostable'] = "SÍ ⭐⭐⭐"
|
| 978 |
+
elif resultado['riesgo']['score_promedio'] >= 50:
|
| 979 |
+
fila['Es_Apostable'] = "SÍ ✅"
|
| 980 |
+
else:
|
| 981 |
+
fila['Es_Apostable'] = "PRECAUCIÓN 🟡"
|
| 982 |
+
|
| 983 |
+
fila['Mensaje_Riesgo'] = resultado['riesgo']['mensaje']
|
| 984 |
+
|
| 985 |
+
resultados.append(fila)
|
| 986 |
+
print(f" ✅ Completado")
|
| 987 |
+
|
| 988 |
+
# ===========================
|
| 989 |
+
# CREAR DATAFRAME
|
| 990 |
+
# ===========================
|
| 991 |
+
|
| 992 |
+
df_resultados = pd.DataFrame(resultados)
|
| 993 |
+
|
| 994 |
+
print("\n" + "=" * 120)
|
| 995 |
+
print(f"✅ PROCESAMIENTO COMPLETADO: {len(df_resultados)} partidos analizados")
|
| 996 |
+
print("=" * 120)
|
| 997 |
+
|
| 998 |
+
# ===========================
|
| 999 |
+
# EXPORTAR A CSV
|
| 1000 |
+
# ===========================
|
| 1001 |
+
|
| 1002 |
+
if export_csv and len(df_resultados) > 0:
|
| 1003 |
+
if filename is None:
|
| 1004 |
+
filename = f"predicciones_{league_code}_J{jornada}_{temporada}.csv"
|
| 1005 |
+
|
| 1006 |
+
df_resultados.to_csv(filename, index=False, encoding='utf-8-sig')
|
| 1007 |
+
print(f"\n💾 Resultados exportados a: {filename}")
|
| 1008 |
+
|
| 1009 |
+
# ===========================
|
| 1010 |
+
# RESUMEN
|
| 1011 |
+
# ===========================
|
| 1012 |
+
|
| 1013 |
+
print(f"\n📊 RESUMEN DE APUESTAS:")
|
| 1014 |
+
print(f" Partidos apostables: {len(df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')])} / {len(df_resultados)}")
|
| 1015 |
+
print(f" Partidos ALTA confianza (⭐⭐⭐): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ⭐⭐⭐'])}")
|
| 1016 |
+
print(f" Partidos MEDIA confianza (✅): {len(df_resultados[df_resultados['Es_Apostable'] == 'SÍ ✅'])}")
|
| 1017 |
+
print(f" Partidos a evitar (⛔): {len(df_resultados[df_resultados['Es_Apostable'] == 'NO'])}")
|
| 1018 |
+
|
| 1019 |
+
return df_resultados
|
| 1020 |
+
|
| 1021 |
+
def mostrar_resumen_batch(df_resultados):
|
| 1022 |
+
"""Muestra resumen visual de los resultados"""
|
| 1023 |
+
|
| 1024 |
+
print("\n" + "=" * 120)
|
| 1025 |
+
print("🎯 MEJORES OPORTUNIDADES DE APUESTA")
|
| 1026 |
+
print("=" * 120)
|
| 1027 |
+
|
| 1028 |
+
# Filtrar solo apostables
|
| 1029 |
+
df_apostables = df_resultados[df_resultados['Es_Apostable'].str.contains('SÍ')].copy()
|
| 1030 |
+
|
| 1031 |
+
if len(df_apostables) == 0:
|
| 1032 |
+
print("\n⚠️ No se encontraron partidos con oportunidades de apuesta")
|
| 1033 |
+
return
|
| 1034 |
+
|
| 1035 |
+
# Ordenar por score de fiabilidad
|
| 1036 |
+
df_apostables = df_apostables.sort_values('Score_Fiabilidad', ascending=False)
|
| 1037 |
+
|
| 1038 |
+
for idx, row in df_apostables.iterrows():
|
| 1039 |
+
print(f"\n{'='*120}")
|
| 1040 |
+
print(f"🏟️ {row['Partido']}")
|
| 1041 |
+
print(f"{'='*120}")
|
| 1042 |
+
print(f"📊 Predicción: {row['Prediccion']:.2f} corners | Valor más probable: {row['Valor_Mas_Probable']} ({row['Prob_Valor_Mas_Probable_%']:.1f}%)")
|
| 1043 |
+
print(f"📈 Histórico: Local {row['CK_Local_Casa']:.1f} CK | Away {row['CK_Away_Fuera']:.1f} CK | H2H: {row['CK_H2H_Total']}")
|
| 1044 |
+
print(f"🎲 Fiabilidad: {row['Fiabilidad_Partido']} (Score: {row['Score_Fiabilidad']:.1f}/100)")
|
| 1045 |
+
print(f"💡 {row['Recomendacion']}")
|
| 1046 |
+
|
| 1047 |
+
# Mostrar líneas con alta probabilidad
|
| 1048 |
+
print(f"\n 📌 Líneas destacadas:")
|
| 1049 |
+
for linea in [7.5, 8.5, 9.5, 10.5]:
|
| 1050 |
+
over_prob = row.get(f'Over_{linea}_Prob_%', 0)
|
| 1051 |
+
under_prob = row.get(f'Under_{linea}_Prob_%', 0)
|
| 1052 |
+
|
| 1053 |
+
if over_prob >= 55:
|
| 1054 |
+
cuota = row.get(f'Over_{linea}_Cuota', 0)
|
| 1055 |
+
conf = row.get(f'Over_{linea}_Confianza', '')
|
| 1056 |
+
print(f" • Over {linea}: {over_prob:.1f}% @{cuota:.2f} - {conf}")
|
| 1057 |
+
|
| 1058 |
+
if under_prob >= 55:
|
| 1059 |
+
cuota = row.get(f'Under_{linea}_Cuota', 0)
|
| 1060 |
+
conf = row.get(f'Under_{linea}_Confianza', '')
|
| 1061 |
+
print(f" • Under {linea}: {under_prob:.1f}% @{cuota:.2f} - {conf}")
|
| 1062 |
+
|
| 1063 |
+
|
| 1064 |
+
|
| 1065 |
+
|
| 1066 |
+
class USE_MODEL():
|
| 1067 |
+
def __init__(self):
|
| 1068 |
+
self.load_models()
|
| 1069 |
+
self.load_data()
|
| 1070 |
+
self.init_variables()
|
| 1071 |
+
|
| 1072 |
+
def init_variables(self):
|
| 1073 |
+
self.lst_years = ["1819", "1920", "2021", "2122", "2223", "2324", "2425", "2526"]
|
| 1074 |
+
print("Variables Loaded...")
|
| 1075 |
+
|
| 1076 |
+
def load_data(self):
|
| 1077 |
+
|
| 1078 |
+
#self.df_dataset = pd.read_csv(r"dataset\processed\dataset_processed.csv")
|
| 1079 |
+
import os
|
| 1080 |
+
#load clean dataset generated on generate_dataset.py
|
| 1081 |
+
self.df_dataset_historic = pd.read_csv("dataset/cleaned/dataset_cleaned.csv")
|
| 1082 |
+
|
| 1083 |
+
if os.path.exists(r"dataset/cleaned/dataset_cleaned_current_year.csv"):
|
| 1084 |
+
self.df_dataset_current_year = pd.read_csv("dataset/cleaned/dataset_cleaned_current_year.csv")
|
| 1085 |
+
|
| 1086 |
+
self.df_dataset = pd.concat([self.df_dataset_historic,self.df_dataset_current_year])
|
| 1087 |
+
else:
|
| 1088 |
+
self.df_dataset = self.df_dataset_historic
|
| 1089 |
+
|
| 1090 |
+
self.df_dataset["season"] = self.df_dataset["season"].astype(str)
|
| 1091 |
+
self.df_dataset["Performance_Save%"].fillna(0)
|
| 1092 |
+
|
| 1093 |
+
print("Data Loaded...")
|
| 1094 |
+
|
| 1095 |
+
|
| 1096 |
+
def load_models(self):
|
| 1097 |
+
self.xgb_model = joblib.load('models/xgboost_corners_optimized_v2_6_leagues.pkl')
|
| 1098 |
+
self.scaler = joblib.load('models/scaler_corners_xgb_v2_6_leagues.pkl')
|
| 1099 |
+
print("Models Ready...")
|
| 1100 |
+
|
| 1101 |
+
def consume_model(self,partidos,jornada,temporada,league_code):
|
| 1102 |
+
|
| 1103 |
+
df_predict = predecir_partidos_batch(
|
| 1104 |
+
partidos=partidos,
|
| 1105 |
+
jornada=jornada,
|
| 1106 |
+
temporada=temporada,
|
| 1107 |
+
league_code=league_code,
|
| 1108 |
+
export_csv=True,
|
| 1109 |
+
filename=f"results\{league_code}\{league_code}-{temporada}-{jornada}-predicciones.csv",
|
| 1110 |
+
df_database = self.df_dataset,
|
| 1111 |
+
xgb_model = self.xgb_model,
|
| 1112 |
+
scaler=self.scaler,
|
| 1113 |
+
lst_years=self.lst_years
|
| 1114 |
+
)
|
| 1115 |
+
|
| 1116 |
+
# Mostrar resumen
|
| 1117 |
+
mostrar_resumen_batch(df_predict)
|
| 1118 |
+
|
| 1119 |
+
def kelly_stats(self,p, odds, fraction=0.2):
|
| 1120 |
+
|
| 1121 |
+
b = odds - 1
|
| 1122 |
+
q = 1 - p
|
| 1123 |
+
f_star = (b * p - q) / b
|
| 1124 |
+
f_star = max(f_star, 0) # evita negativos
|
| 1125 |
+
return f_star * fraction # usa 0.1 para Kelly 10%
|
| 1126 |
+
|
| 1127 |
+
a = USE_MODEL()
|
| 1128 |
+
|
| 1129 |
+
partidos = [
|
| 1130 |
+
("Werder Bremen", "Wolfsburg"),
|
| 1131 |
+
("Hoffenheim", "RB Leipzig"),
|
| 1132 |
+
("Leverkusen", "Heidenheim"),
|
| 1133 |
+
("Hamburger SV", "Dortmund"),
|
| 1134 |
+
("Union Berlin", "Bayern"),
|
| 1135 |
+
("Gladbach", "Köln"),
|
| 1136 |
+
("Freiburg", "St. Pauli"),
|
| 1137 |
+
("Stuttgart", "Augsburg"),
|
| 1138 |
+
("Eint Frankfurt", "Mainz 05")
|
| 1139 |
+
]
|
| 1140 |
+
|
| 1141 |
+
a.consume_model(
|
| 1142 |
+
partidos=partidos,
|
| 1143 |
+
jornada=10,
|
| 1144 |
+
temporada="2526",
|
| 1145 |
+
league_code="GER"
|
| 1146 |
+
)
|
| 1147 |
+
|
| 1148 |
+
|
src/models/train_model.py
ADDED
|
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
|
| 7 |
+
# MLflow
|
| 8 |
+
import mlflow
|
| 9 |
+
import mlflow.sklearn
|
| 10 |
+
import mlflow.xgboost
|
| 11 |
+
|
| 12 |
+
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, KFold
|
| 13 |
+
from sklearn.preprocessing import StandardScaler
|
| 14 |
+
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
|
| 15 |
+
from xgboost import XGBRegressor
|
| 16 |
+
import joblib
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class TRAIN_MODEL():
|
| 20 |
+
def __init__(self, nombre, use_grid_search=False, config_path="config/model_config.json"):
|
| 21 |
+
"""
|
| 22 |
+
Entrenar modelo con tracking MLflow
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
nombre: Identificador del modelo (ej: "v3_production")
|
| 26 |
+
use_grid_search: True = buscar hiperparámetros, False = usar config guardado
|
| 27 |
+
config_path: Ruta al archivo de configuración con hiperparámetros
|
| 28 |
+
"""
|
| 29 |
+
# ===========================
|
| 30 |
+
# CONFIGURACIÓN MLFLOW
|
| 31 |
+
# ===========================
|
| 32 |
+
mlflow.set_tracking_uri("file:./mlruns")
|
| 33 |
+
mlflow.set_experiment("corners_prediction")
|
| 34 |
+
|
| 35 |
+
self.nombre = nombre
|
| 36 |
+
self.use_grid_search = use_grid_search
|
| 37 |
+
self.config_path = config_path
|
| 38 |
+
self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 39 |
+
|
| 40 |
+
# Iniciar run de MLflow
|
| 41 |
+
with mlflow.start_run(run_name=f"{nombre}_{self.timestamp}") as run:
|
| 42 |
+
self.run_id = run.info.run_id
|
| 43 |
+
|
| 44 |
+
print(f"\n{'='*80}")
|
| 45 |
+
print(f"🚀 Entrenamiento iniciado con MLflow")
|
| 46 |
+
print(f" Run ID: {self.run_id}")
|
| 47 |
+
print(f" Nombre: {nombre}")
|
| 48 |
+
print(f" GridSearch: {'SÍ' if use_grid_search else 'NO (usando config)'}")
|
| 49 |
+
print(f"{'='*80}\n")
|
| 50 |
+
|
| 51 |
+
# Tags básicos
|
| 52 |
+
mlflow.set_tags({
|
| 53 |
+
"model_name": nombre,
|
| 54 |
+
"timestamp": self.timestamp,
|
| 55 |
+
"grid_search_used": str(use_grid_search),
|
| 56 |
+
"framework": "XGBoost",
|
| 57 |
+
"task": "regression"
|
| 58 |
+
})
|
| 59 |
+
|
| 60 |
+
# Pipeline de entrenamiento
|
| 61 |
+
try:
|
| 62 |
+
self.init_variables()
|
| 63 |
+
self.load_dataset()
|
| 64 |
+
self.split_train_test(0.15)
|
| 65 |
+
self.define_model()
|
| 66 |
+
|
| 67 |
+
if use_grid_search:
|
| 68 |
+
print("🔍 Ejecutando GridSearch (puede tardar)...")
|
| 69 |
+
self.train_grid_search()
|
| 70 |
+
self.save_best_params() # Guardar para futuros entrenamientos
|
| 71 |
+
else:
|
| 72 |
+
print("⚡ Usando hiperparámetros guardados (rápido)")
|
| 73 |
+
self.load_best_params()
|
| 74 |
+
|
| 75 |
+
self.train_model()
|
| 76 |
+
self.test_and_eval()
|
| 77 |
+
self.top_features()
|
| 78 |
+
self.save_models(nombre)
|
| 79 |
+
|
| 80 |
+
mlflow.set_tag("status", "SUCCESS")
|
| 81 |
+
print(f"\n✅ Entrenamiento completado")
|
| 82 |
+
print(f"📊 Ver en MLflow UI: mlflow ui")
|
| 83 |
+
|
| 84 |
+
except Exception as e:
|
| 85 |
+
mlflow.set_tag("status", "FAILED")
|
| 86 |
+
print(f"\n❌ Error: {e}")
|
| 87 |
+
raise
|
| 88 |
+
|
| 89 |
+
def init_variables(self):
|
| 90 |
+
"""Definir espacio de búsqueda para GridSearch"""
|
| 91 |
+
# ✅ GRID INTELIGENTE (~243 combinaciones = 1-3 horas)
|
| 92 |
+
self.param_grid = {
|
| 93 |
+
'n_estimators': [200], # 1 valor (200 suele ser óptimo)
|
| 94 |
+
'max_depth': [3, 4, 5], # 3 valores (clave)
|
| 95 |
+
'learning_rate': [0.02, 0.03], # 2 valores (0.01 es muy lento)
|
| 96 |
+
'reg_alpha': [3.0, 5.0], # 2 valores
|
| 97 |
+
'reg_lambda': [5.0, 8.0], # 2 valores
|
| 98 |
+
'gamma': [0.5, 1.0], # 2 valores
|
| 99 |
+
'subsample': [0.7], # 1 valor (0.7 suele funcionar)
|
| 100 |
+
'colsample_bytree': [0.7], # 1 valor
|
| 101 |
+
'colsample_bylevel': [0.6], # 1 valor
|
| 102 |
+
'min_child_weight': [5, 7] # 2 valores
|
| 103 |
+
}
|
| 104 |
+
# Combinaciones: 1 × 3 × 2 × 2 × 2 × 2 × 1 × 1 × 1 × 2 = 192
|
| 105 |
+
# Tiempo: ~1.5-3 horas ⏱️
|
| 106 |
+
|
| 107 |
+
# Loggear configuración del grid
|
| 108 |
+
if self.use_grid_search:
|
| 109 |
+
for param, values in self.param_grid.items():
|
| 110 |
+
mlflow.log_param(f"grid_{param}", str(values))
|
| 111 |
+
|
| 112 |
+
print("✅ Variables inicializadas")
|
| 113 |
+
|
| 114 |
+
def load_dataset(self):
|
| 115 |
+
"""Cargar y preparar dataset"""
|
| 116 |
+
|
| 117 |
+
self.df_data = pd.read_csv("dataset/processed/dataset_processed.csv")
|
| 118 |
+
self.y = self.df_data["y"]
|
| 119 |
+
self.df_data = self.df_data.drop(["y"], axis=1)
|
| 120 |
+
self.y_array = np.array(self.y).flatten()
|
| 121 |
+
|
| 122 |
+
# Filtrar outliers (3-17 corners)
|
| 123 |
+
mask = (self.y_array >= 3) & (self.y_array <= 17)
|
| 124 |
+
self.df_data = self.df_data[mask].copy()
|
| 125 |
+
self.y_array = self.y_array[mask]
|
| 126 |
+
|
| 127 |
+
# Limpiar nulos
|
| 128 |
+
if self.df_data.isnull().any().any():
|
| 129 |
+
self.df_data = self.df_data.fillna(0)
|
| 130 |
+
|
| 131 |
+
# Loggear info del dataset
|
| 132 |
+
mlflow.log_params({
|
| 133 |
+
"dataset_samples": len(self.df_data),
|
| 134 |
+
"dataset_features": self.df_data.shape[1],
|
| 135 |
+
"target_min": float(self.y_array.min()),
|
| 136 |
+
"target_max": float(self.y_array.max()),
|
| 137 |
+
"target_mean": float(self.y_array.mean()),
|
| 138 |
+
"target_std": float(self.y_array.std())
|
| 139 |
+
})
|
| 140 |
+
|
| 141 |
+
print(f"✅ Dataset cargado: {self.df_data.shape}")
|
| 142 |
+
|
| 143 |
+
def split_train_test(self, test_size_):
|
| 144 |
+
"""Dividir datos en train/val/test"""
|
| 145 |
+
|
| 146 |
+
self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
|
| 147 |
+
self.df_data, self.y_array,
|
| 148 |
+
test_size=test_size_,
|
| 149 |
+
random_state=42,
|
| 150 |
+
shuffle=True
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# Escalar
|
| 154 |
+
self.scaler = StandardScaler()
|
| 155 |
+
self.X_train = pd.DataFrame(
|
| 156 |
+
self.scaler.fit_transform(self.X_train),
|
| 157 |
+
columns=self.X_train.columns
|
| 158 |
+
)
|
| 159 |
+
self.X_test = pd.DataFrame(
|
| 160 |
+
self.scaler.transform(self.X_test),
|
| 161 |
+
columns=self.X_test.columns
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
# Split validación
|
| 165 |
+
self.X_train_fit, self.X_val, self.y_train_fit, self.y_val = train_test_split(
|
| 166 |
+
self.X_train, self.y_train,
|
| 167 |
+
test_size=0.15,
|
| 168 |
+
random_state=43
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
# Loggear splits
|
| 172 |
+
mlflow.log_params({
|
| 173 |
+
"train_samples": len(self.X_train_fit),
|
| 174 |
+
"val_samples": len(self.X_val),
|
| 175 |
+
"test_samples": len(self.X_test),
|
| 176 |
+
"test_size": test_size_
|
| 177 |
+
})
|
| 178 |
+
|
| 179 |
+
print(f"✅ Train: {len(self.X_train_fit)} | Val: {len(self.X_val)} | Test: {len(self.X_test)}")
|
| 180 |
+
|
| 181 |
+
def define_model(self):
|
| 182 |
+
"""Definir modelo base y GridSearch"""
|
| 183 |
+
|
| 184 |
+
self.xgb_base = XGBRegressor(
|
| 185 |
+
objective="reg:squarederror",
|
| 186 |
+
tree_method="hist",
|
| 187 |
+
random_state=42,
|
| 188 |
+
n_jobs=-1,
|
| 189 |
+
verbosity=0
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
if self.use_grid_search:
|
| 193 |
+
self.kfold = KFold(n_splits=5, shuffle=True, random_state=42)
|
| 194 |
+
self.mae_scorer = make_scorer(mean_absolute_error, greater_is_better=False)
|
| 195 |
+
|
| 196 |
+
self.grid_search = GridSearchCV(
|
| 197 |
+
estimator=self.xgb_base,
|
| 198 |
+
param_grid=self.param_grid,
|
| 199 |
+
cv=self.kfold,
|
| 200 |
+
scoring=self.mae_scorer,
|
| 201 |
+
n_jobs=-1,
|
| 202 |
+
verbose=2,
|
| 203 |
+
return_train_score=True
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
def train_grid_search(self):
|
| 207 |
+
"""Ejecutar GridSearch y guardar mejores params"""
|
| 208 |
+
|
| 209 |
+
print("\n🔍 Buscando mejores hiperparámetros...")
|
| 210 |
+
self.grid_search.fit(self.X_train_fit, self.y_train_fit)
|
| 211 |
+
|
| 212 |
+
# Mejores parámetros
|
| 213 |
+
self.best_params = self.grid_search.best_params_
|
| 214 |
+
|
| 215 |
+
# Loggear en MLflow
|
| 216 |
+
for param, value in self.best_params.items():
|
| 217 |
+
mlflow.log_param(f"best_{param}", value)
|
| 218 |
+
|
| 219 |
+
mlflow.log_metric("cv_best_mae", -self.grid_search.best_score_)
|
| 220 |
+
|
| 221 |
+
print(f"\n✅ Mejores hiperparámetros encontrados:")
|
| 222 |
+
for param, value in self.best_params.items():
|
| 223 |
+
print(f" {param}: {value}")
|
| 224 |
+
print(f" CV MAE: {-self.grid_search.best_score_:.4f}")
|
| 225 |
+
|
| 226 |
+
def save_best_params(self):
|
| 227 |
+
"""Guardar mejores hiperparámetros en archivo JSON"""
|
| 228 |
+
|
| 229 |
+
os.makedirs("config", exist_ok=True)
|
| 230 |
+
|
| 231 |
+
config = {
|
| 232 |
+
"model_name": self.nombre,
|
| 233 |
+
"timestamp": self.timestamp,
|
| 234 |
+
"best_params": self.best_params,
|
| 235 |
+
"cv_mae": float(-self.grid_search.best_score_),
|
| 236 |
+
"run_id": self.run_id
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
with open(self.config_path, 'w') as f:
|
| 240 |
+
json.dump(config, f, indent=4)
|
| 241 |
+
|
| 242 |
+
# Loggear archivo en MLflow
|
| 243 |
+
mlflow.log_artifact(self.config_path)
|
| 244 |
+
|
| 245 |
+
print(f"💾 Hiperparámetros guardados en: {self.config_path}")
|
| 246 |
+
|
| 247 |
+
def load_best_params(self):
|
| 248 |
+
"""Cargar hiperparámetros desde archivo JSON"""
|
| 249 |
+
|
| 250 |
+
if not os.path.exists(self.config_path):
|
| 251 |
+
raise FileNotFoundError(
|
| 252 |
+
f"No se encontró {self.config_path}. "
|
| 253 |
+
"Ejecuta primero con use_grid_search=True"
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
with open(self.config_path, 'r') as f:
|
| 257 |
+
config = json.load(f)
|
| 258 |
+
|
| 259 |
+
self.best_params = config["best_params"]
|
| 260 |
+
|
| 261 |
+
# Loggear params en MLflow
|
| 262 |
+
for param, value in self.best_params.items():
|
| 263 |
+
mlflow.log_param(f"loaded_{param}", value)
|
| 264 |
+
|
| 265 |
+
mlflow.log_param("config_source", self.config_path)
|
| 266 |
+
mlflow.log_param("previous_cv_mae", config.get("cv_mae", "N/A"))
|
| 267 |
+
|
| 268 |
+
print(f"✅ Hiperparámetros cargados desde: {self.config_path}")
|
| 269 |
+
print(f" Origen: {config.get('model_name', 'unknown')} ({config.get('timestamp', 'unknown')})")
|
| 270 |
+
|
| 271 |
+
def train_model(self):
|
| 272 |
+
"""Entrenar modelo final con mejores params"""
|
| 273 |
+
|
| 274 |
+
self.xgb_model = XGBRegressor(
|
| 275 |
+
**self.best_params,
|
| 276 |
+
objective="reg:squarederror",
|
| 277 |
+
tree_method="hist",
|
| 278 |
+
random_state=42,
|
| 279 |
+
n_jobs=-1,
|
| 280 |
+
verbosity=0
|
| 281 |
+
)
|
| 282 |
+
|
| 283 |
+
self.xgb_model.fit(
|
| 284 |
+
self.X_train_fit,
|
| 285 |
+
self.y_train_fit,
|
| 286 |
+
eval_set=[(self.X_val, self.y_val)],
|
| 287 |
+
verbose=False
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
print("✅ Modelo entrenado")
|
| 291 |
+
|
| 292 |
+
def test_and_eval(self):
|
| 293 |
+
"""Evaluar y loggear métricas"""
|
| 294 |
+
|
| 295 |
+
# Predicciones
|
| 296 |
+
y_train_pred = self.xgb_model.predict(self.X_train_fit)
|
| 297 |
+
y_val_pred = self.xgb_model.predict(self.X_val)
|
| 298 |
+
y_test_pred = self.xgb_model.predict(self.X_test)
|
| 299 |
+
|
| 300 |
+
# Calcular métricas
|
| 301 |
+
metrics = {
|
| 302 |
+
'train': {
|
| 303 |
+
'mae': mean_absolute_error(self.y_train_fit, y_train_pred),
|
| 304 |
+
'rmse': np.sqrt(mean_squared_error(self.y_train_fit, y_train_pred)),
|
| 305 |
+
'r2': r2_score(self.y_train_fit, y_train_pred)
|
| 306 |
+
},
|
| 307 |
+
'val': {
|
| 308 |
+
'mae': mean_absolute_error(self.y_val, y_val_pred),
|
| 309 |
+
'rmse': np.sqrt(mean_squared_error(self.y_val, y_val_pred)),
|
| 310 |
+
'r2': r2_score(self.y_val, y_val_pred)
|
| 311 |
+
},
|
| 312 |
+
'test': {
|
| 313 |
+
'mae': mean_absolute_error(self.y_test, y_test_pred),
|
| 314 |
+
'rmse': np.sqrt(mean_squared_error(self.y_test, y_test_pred)),
|
| 315 |
+
'r2': r2_score(self.y_test, y_test_pred)
|
| 316 |
+
}
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
# Loggear TODAS las métricas en MLflow
|
| 320 |
+
for set_name, set_metrics in metrics.items():
|
| 321 |
+
for metric_name, value in set_metrics.items():
|
| 322 |
+
mlflow.log_metric(f"{set_name}_{metric_name}", value)
|
| 323 |
+
|
| 324 |
+
# Cross-validation
|
| 325 |
+
cv_mae = cross_val_score(
|
| 326 |
+
self.xgb_model, self.X_train, self.y_train,
|
| 327 |
+
cv=5, scoring='neg_mean_absolute_error'
|
| 328 |
+
)
|
| 329 |
+
cv_r2 = cross_val_score(
|
| 330 |
+
self.xgb_model, self.X_train, self.y_train,
|
| 331 |
+
cv=5, scoring='r2'
|
| 332 |
+
)
|
| 333 |
+
|
| 334 |
+
mlflow.log_metric("cv_mae_mean", -cv_mae.mean())
|
| 335 |
+
mlflow.log_metric("cv_mae_std", cv_mae.std())
|
| 336 |
+
mlflow.log_metric("cv_r2_mean", cv_r2.mean())
|
| 337 |
+
mlflow.log_metric("cv_r2_std", cv_r2.std())
|
| 338 |
+
|
| 339 |
+
# Análisis de errores
|
| 340 |
+
test_errors = np.abs(self.y_test - y_test_pred)
|
| 341 |
+
mlflow.log_metric("test_error_median", float(np.median(test_errors)))
|
| 342 |
+
mlflow.log_metric("test_error_p90", float(np.percentile(test_errors, 90)))
|
| 343 |
+
mlflow.log_metric("test_pct_error_lt_2", float((test_errors < 2.0).sum() / len(test_errors) * 100))
|
| 344 |
+
|
| 345 |
+
# Gap de overfitting
|
| 346 |
+
gap = metrics['train']['r2'] - metrics['test']['r2']
|
| 347 |
+
mlflow.log_metric("overfitting_gap", gap)
|
| 348 |
+
|
| 349 |
+
print(f"\n📊 MÉTRICAS:")
|
| 350 |
+
print(f" Train MAE: {metrics['train']['mae']:.4f} | R²: {metrics['train']['r2']:.4f}")
|
| 351 |
+
print(f" Val MAE: {metrics['val']['mae']:.4f} | R²: {metrics['val']['r2']:.4f}")
|
| 352 |
+
print(f" Test MAE: {metrics['test']['mae']:.4f} | R²: {metrics['test']['r2']:.4f}")
|
| 353 |
+
print(f" CV MAE: {-cv_mae.mean():.4f} ± {cv_mae.std():.4f}")
|
| 354 |
+
print(f" Overfitting Gap: {gap:.4f}")
|
| 355 |
+
|
| 356 |
+
def top_features(self):
|
| 357 |
+
"""Guardar importancia de features"""
|
| 358 |
+
|
| 359 |
+
feature_importance = pd.DataFrame({
|
| 360 |
+
'feature': self.df_data.columns,
|
| 361 |
+
'importance': self.xgb_model.feature_importances_
|
| 362 |
+
}).sort_values('importance', ascending=False)
|
| 363 |
+
|
| 364 |
+
# Guardar CSV
|
| 365 |
+
feature_importance.to_csv(f"models/feature_importance_{self.nombre}.csv", index=False)
|
| 366 |
+
mlflow.log_artifact(f"models/feature_importance_{self.nombre}.csv")
|
| 367 |
+
|
| 368 |
+
# Loggear top 10
|
| 369 |
+
for idx, row in feature_importance.head(10).iterrows():
|
| 370 |
+
mlflow.log_metric(f"feat_imp_{row['feature']}", row['importance'])
|
| 371 |
+
|
| 372 |
+
print(f"\n🔍 Top 5 features:")
|
| 373 |
+
for idx, row in feature_importance.head(5).iterrows():
|
| 374 |
+
print(f" {row['feature']}: {row['importance']:.4f}")
|
| 375 |
+
|
| 376 |
+
def save_models(self, nombre):
|
| 377 |
+
"""Guardar modelos localmente y en MLflow"""
|
| 378 |
+
|
| 379 |
+
os.makedirs("models", exist_ok=True)
|
| 380 |
+
|
| 381 |
+
# Paths
|
| 382 |
+
model_path = f'models/xgboost_corners_{nombre}.pkl'
|
| 383 |
+
scaler_path = f'models/scaler_corners_{nombre}.pkl'
|
| 384 |
+
|
| 385 |
+
# Guardar archivos
|
| 386 |
+
joblib.dump(self.xgb_model, model_path)
|
| 387 |
+
joblib.dump(self.scaler, scaler_path)
|
| 388 |
+
|
| 389 |
+
# Loggear en MLflow
|
| 390 |
+
mlflow.xgboost.log_model(
|
| 391 |
+
self.xgb_model,
|
| 392 |
+
artifact_path="model",
|
| 393 |
+
registered_model_name=f"corners_predictor"
|
| 394 |
+
)
|
| 395 |
+
mlflow.log_artifact(scaler_path, artifact_path="preprocessing")
|
| 396 |
+
|
| 397 |
+
print(f"\n💾 Modelos guardados:")
|
| 398 |
+
print(f" {model_path}")
|
| 399 |
+
print(f" {scaler_path}")
|
| 400 |
+
print(f" MLflow Model Registry ✓")
|
| 401 |
+
|
| 402 |
+
|
| 403 |
+
# ===========================
|
| 404 |
+
# USO
|
| 405 |
+
# ===========================
|
| 406 |
+
|
| 407 |
+
if __name__ == "__main__":
|
| 408 |
+
|
| 409 |
+
# ========================================
|
| 410 |
+
# OPCIÓN 1: Primera vez o cada 3-6 meses
|
| 411 |
+
# Ejecutar GridSearch (LENTO, 30-60 min)
|
| 412 |
+
# ========================================
|
| 413 |
+
# model = TRAIN_MODEL(
|
| 414 |
+
# nombre="v4_grid_search",
|
| 415 |
+
# use_grid_search=True # Busca mejores hiperparámetros
|
| 416 |
+
# )
|
| 417 |
+
|
| 418 |
+
# ========================================
|
| 419 |
+
# OPCIÓN 2: Reentrenamiento regular
|
| 420 |
+
# Usar hiperparámetros guardados (RÁPIDO, 2-5 min)
|
| 421 |
+
# ========================================
|
| 422 |
+
model = TRAIN_MODEL(
|
| 423 |
+
nombre="v4_retrain",
|
| 424 |
+
use_grid_search=True # Usa config/model_config.json
|
| 425 |
+
)
|
src/process_data/__init__.py
ADDED
|
File without changes
|
src/process_data/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (171 Bytes). View file
|
|
|
src/process_data/__pycache__/process_dataset.cpython-311.pyc
ADDED
|
Binary file (27.7 kB). View file
|
|
|
src/process_data/generate_dataset.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
# Añadir la ruta raíz del proyecto al PYTHONPATH
|
| 5 |
+
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
|
| 6 |
+
sys.path.insert(0, project_root)
|
| 7 |
+
|
| 8 |
+
from src.utils.helper import desactivar_advertencias
|
| 9 |
+
|
| 10 |
+
import soccerdata as sd
|
| 11 |
+
import pandas as pd
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def extract_local(game_str):
|
| 15 |
+
try:
|
| 16 |
+
parts = game_str.split(" ", 1)[1].split("-")
|
| 17 |
+
return parts[0].strip() if len(parts) > 0 else None
|
| 18 |
+
except (IndexError, AttributeError):
|
| 19 |
+
return None
|
| 20 |
+
|
| 21 |
+
def extract_away(game_str):
|
| 22 |
+
try:
|
| 23 |
+
parts = game_str.split(" ", 1)[1].split("-")
|
| 24 |
+
return parts[1].strip() if len(parts) > 1 else None
|
| 25 |
+
except (IndexError, AttributeError):
|
| 26 |
+
return None
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class GENERATE_DATASET():
|
| 30 |
+
def __init__(self,current_year):
|
| 31 |
+
print("Clase GENERATE_DATASET Inicializada")
|
| 32 |
+
|
| 33 |
+
desactivar_advertencias()
|
| 34 |
+
self.init_variables()
|
| 35 |
+
self.mergue_raw_data_all_leagues(current_year)
|
| 36 |
+
self.process_and_output_dataset(current_year)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def init_variables(self):
|
| 40 |
+
|
| 41 |
+
#Years to get from datasource
|
| 42 |
+
self.LST_YEARS_CONFIG = [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024, 2025]
|
| 43 |
+
|
| 44 |
+
self.dic_historic_all_leagues = {
|
| 45 |
+
"ENG": {},
|
| 46 |
+
"ESP": {},
|
| 47 |
+
"GER": {},
|
| 48 |
+
"FRA": {},
|
| 49 |
+
"ITA": {},
|
| 50 |
+
"NED": {},
|
| 51 |
+
"ENG2": {},
|
| 52 |
+
"POR": {},
|
| 53 |
+
"BEL": {}
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
self.df_database = pd.DataFrame()
|
| 58 |
+
|
| 59 |
+
# Diccionary to name leagues to get from datasource
|
| 60 |
+
self.DIC_LEAGUES_CONFIG = {
|
| 61 |
+
"ENG": {
|
| 62 |
+
"name": "ENG-Premier League",
|
| 63 |
+
"code": "ENG"
|
| 64 |
+
},
|
| 65 |
+
"POR": {
|
| 66 |
+
"name": "POR-Primeira Liga",
|
| 67 |
+
"code": "POR"
|
| 68 |
+
},
|
| 69 |
+
"BEL": {
|
| 70 |
+
"name": "BEL-Belgian Pro League",
|
| 71 |
+
"code": "BEL"
|
| 72 |
+
},
|
| 73 |
+
"ESP": {
|
| 74 |
+
"name": "ESP-La Liga",
|
| 75 |
+
"code": "ESP"
|
| 76 |
+
},
|
| 77 |
+
"GER": {
|
| 78 |
+
"name": "GER-Bundesliga",
|
| 79 |
+
"code": "GER"
|
| 80 |
+
},
|
| 81 |
+
"FRA": {
|
| 82 |
+
"name": "FRA-Ligue 1",
|
| 83 |
+
"code": "FRA"
|
| 84 |
+
},
|
| 85 |
+
"ITA": {
|
| 86 |
+
"name": "ITA-Serie A",
|
| 87 |
+
"code": "ITA"
|
| 88 |
+
},
|
| 89 |
+
"NED": {
|
| 90 |
+
"name": "NED-Eredivisie",
|
| 91 |
+
"code": "NED"
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
lst_base = ['season','date','game','round','day','venue','team','GF','GA','opponent',"result"]
|
| 97 |
+
lst_columns_shooting = ['Expected_xG','Standard_Sh','Standard_SoT','Standard_Dist']
|
| 98 |
+
lst_columns_passing_type = ['Pass Types_CK']
|
| 99 |
+
lst_columns_passing = ['Total_Att','Long_Att','Ast','1/3','PrgP']
|
| 100 |
+
lst_columns_defensive = ['Tackles_Att 3rd','Tackles_Tkl','Blocks_Blocks','Int','Clr']
|
| 101 |
+
lst_columns_keeper = ['Performance_Save%']
|
| 102 |
+
lst_columns_shot_creation = ['SCA Types_SCA']
|
| 103 |
+
lst_columns_misc = ['Performance_Crs']
|
| 104 |
+
lst_columns_possesion = ['Poss', 'Touches_Att 3rd','Carries_PrgC','Touches_Touches','Touches_Att Pen','Carries_Carries','Carries_1/3','Carries_CPA']
|
| 105 |
+
|
| 106 |
+
self.lst_columns_combined = lst_base + lst_columns_passing_type +lst_columns_passing+lst_columns_defensive+lst_columns_shooting+lst_columns_keeper+lst_columns_shot_creation+lst_columns_misc+lst_columns_possesion
|
| 107 |
+
print("-Variables inicializadas")
|
| 108 |
+
|
| 109 |
+
def get_raw_data_from_source(self,league,year):
|
| 110 |
+
|
| 111 |
+
print(f"\nLiga {league}... 📅 Año {year}...", end=" ")
|
| 112 |
+
# Extraer equipos local/visitante
|
| 113 |
+
if league["name"] in ["NED-Eredivisie","POR-Primeira Liga","ENG-Championship"] and year == 2017:
|
| 114 |
+
return
|
| 115 |
+
|
| 116 |
+
# Crear scraper para la liga específica
|
| 117 |
+
fbref = sd.FBref(leagues=league["name"], seasons=year)
|
| 118 |
+
|
| 119 |
+
# Leer estadísticas
|
| 120 |
+
team_season_shooting = fbref.read_team_match_stats(stat_type="shooting",opponent_stats = False)
|
| 121 |
+
team_season_passing_types = fbref.read_team_match_stats(stat_type="passing_types",opponent_stats = False)
|
| 122 |
+
team_season_passing = fbref.read_team_match_stats(stat_type="passing",opponent_stats = False)
|
| 123 |
+
team_season_defensive = fbref.read_team_match_stats(stat_type="defense",opponent_stats = False)
|
| 124 |
+
team_season_goalkeeping = fbref.read_team_match_stats(stat_type="keeper",opponent_stats = False)
|
| 125 |
+
team_season_goal_shot_creation = fbref.read_team_match_stats(stat_type="goal_shot_creation",opponent_stats = False)
|
| 126 |
+
team_season_goal_misc = fbref.read_team_match_stats(stat_type="misc",opponent_stats = False)
|
| 127 |
+
team_season_goal_possession = fbref.read_team_match_stats(stat_type="possession",opponent_stats = False)
|
| 128 |
+
|
| 129 |
+
df_concat = pd.concat([team_season_shooting,team_season_passing_types,team_season_passing,team_season_defensive,
|
| 130 |
+
team_season_goalkeeping,team_season_goal_shot_creation,team_season_goal_misc,team_season_goal_possession], axis=1)
|
| 131 |
+
|
| 132 |
+
# Reset index
|
| 133 |
+
df_reset = df_concat.copy().reset_index()
|
| 134 |
+
|
| 135 |
+
# Aplanar MultiIndex
|
| 136 |
+
df_reset.columns = [
|
| 137 |
+
'_'.join(col).strip('_') if isinstance(col, tuple) else col
|
| 138 |
+
for col in df_reset.columns.values
|
| 139 |
+
]
|
| 140 |
+
|
| 141 |
+
# Eliminar duplicados
|
| 142 |
+
df_reset = df_reset.loc[:, ~df_reset.columns.duplicated()]
|
| 143 |
+
|
| 144 |
+
df_filtered = df_reset[self.lst_columns_combined]
|
| 145 |
+
|
| 146 |
+
df_filtered["local"] = df_filtered["game"].apply(extract_local)
|
| 147 |
+
df_filtered["away"] = df_filtered["game"].apply(extract_away)
|
| 148 |
+
|
| 149 |
+
# Agregar código de liga
|
| 150 |
+
df_filtered["league"] = league["code"]
|
| 151 |
+
|
| 152 |
+
df_filtered = df_filtered.loc[:, ~df_filtered.columns.duplicated(keep='first')]
|
| 153 |
+
|
| 154 |
+
# Verificar valores problemáticos
|
| 155 |
+
problematic = df_filtered[df_filtered["away"].isna()]
|
| 156 |
+
if len(problematic) > 0:
|
| 157 |
+
print(f"⚠️ {len(problematic)} registros con formato incorrecto")
|
| 158 |
+
else:
|
| 159 |
+
print(f"✅ {len(df_filtered)} partidos extraídos")
|
| 160 |
+
|
| 161 |
+
return df_filtered
|
| 162 |
+
|
| 163 |
+
def mergue_raw_data_all_leagues(self, current_year):
|
| 164 |
+
|
| 165 |
+
all_dataframes = []
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
if current_year == True:
|
| 169 |
+
#Process only current year
|
| 170 |
+
for league_key, league_info in self.DIC_LEAGUES_CONFIG.items():
|
| 171 |
+
|
| 172 |
+
self.dic_historic_all_leagues[league_key][self.LST_YEARS_CONFIG[-1]] = self.get_raw_data_from_source(league_info,self.LST_YEARS_CONFIG[-1])
|
| 173 |
+
else:
|
| 174 |
+
|
| 175 |
+
#Process all years needed execpt for current year
|
| 176 |
+
for league_key, league_info in self.DIC_LEAGUES_CONFIG.items():
|
| 177 |
+
for year in self.LST_YEARS_CONFIG:
|
| 178 |
+
if year == 2025:
|
| 179 |
+
continue
|
| 180 |
+
self.dic_historic_all_leagues[league_key][year] = self.get_raw_data_from_source(league_info,year)
|
| 181 |
+
|
| 182 |
+
for league_key, dic_historic in self.dic_historic_all_leagues.items():
|
| 183 |
+
for year, df in dic_historic.items():
|
| 184 |
+
all_dataframes.append(df)
|
| 185 |
+
|
| 186 |
+
self.df_database = pd.concat(all_dataframes, ignore_index=True)
|
| 187 |
+
|
| 188 |
+
print("Dataset conbinado")
|
| 189 |
+
|
| 190 |
+
def process_and_output_dataset(self,current_year):
|
| 191 |
+
|
| 192 |
+
# Filtrar solo Matchweek
|
| 193 |
+
self.df_database = self.df_database[self.df_database['round'].str.contains("Matchweek", na=False)]
|
| 194 |
+
self.df_database['round'] = self.df_database['round'].str.replace("Matchweek ", "")
|
| 195 |
+
|
| 196 |
+
# Convertir tipos
|
| 197 |
+
self.df_database['round'] = self.df_database['round'].astype(int)
|
| 198 |
+
self.df_database['GF'] = self.df_database['GF'].astype(int)
|
| 199 |
+
self.df_database['GA'] = self.df_database['GA'].astype(int)
|
| 200 |
+
|
| 201 |
+
self.df_database = self.df_database.drop_duplicates()
|
| 202 |
+
|
| 203 |
+
if current_year == True:
|
| 204 |
+
self.df_database.to_csv("dataset\cleaned\dataset_cleaned_current_year.csv",index=False)
|
| 205 |
+
else:
|
| 206 |
+
self.df_database.to_csv("dataset\cleaned\dataset_cleaned.csv",index=False)
|
| 207 |
+
print("Dataset cleaned and saved on dataset\cleaned")
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
a = GENERATE_DATASET(False)
|
src/process_data/process_dataset.py
ADDED
|
@@ -0,0 +1,584 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def get_ck(df, season, round_num, local, away, league=None):
|
| 6 |
+
"""Obtiene corners totales de un partido específico"""
|
| 7 |
+
season_round = (df['season'] == season) & (df['round'] == round_num)
|
| 8 |
+
|
| 9 |
+
if league is not None:
|
| 10 |
+
season_round = season_round & (df['league'] == league)
|
| 11 |
+
|
| 12 |
+
df = df[season_round]
|
| 13 |
+
|
| 14 |
+
df_local = df[df['team'] == local]
|
| 15 |
+
df_away = df[df['team'] == away]
|
| 16 |
+
|
| 17 |
+
total_ck = df_local["Pass Types_CK"].sum() + df_away["Pass Types_CK"].sum()
|
| 18 |
+
|
| 19 |
+
return total_ck
|
| 20 |
+
|
| 21 |
+
def get_dataframes(df, season, round_num, local, away, league=None):
|
| 22 |
+
"""Retorna 8 DataFrames filtrados por equipo, venue y liga"""
|
| 23 |
+
|
| 24 |
+
season_round = (df['season'] == season) & (df['round'] < round_num)
|
| 25 |
+
|
| 26 |
+
if league is not None:
|
| 27 |
+
season_round = season_round & (df['league'] == league)
|
| 28 |
+
|
| 29 |
+
def filter_and_split(team_filter):
|
| 30 |
+
filtered = df[season_round & team_filter].copy()
|
| 31 |
+
home = filtered[filtered['venue'] == "Home"]
|
| 32 |
+
away = filtered[filtered['venue'] == "Away"]
|
| 33 |
+
return home, away
|
| 34 |
+
|
| 35 |
+
local_home, local_away = filter_and_split(df['team'] == local)
|
| 36 |
+
local_opp_home, local_opp_away = filter_and_split(df['opponent'] == local)
|
| 37 |
+
|
| 38 |
+
away_home, away_away = filter_and_split(df['team'] == away)
|
| 39 |
+
away_opp_home, away_opp_away = filter_and_split(df['opponent'] == away)
|
| 40 |
+
|
| 41 |
+
return (local_home, local_away, local_opp_home, local_opp_away,
|
| 42 |
+
away_home, away_away, away_opp_home, away_opp_away)
|
| 43 |
+
|
| 44 |
+
def get_head_2_head(df, local, away, seasons=None, league=None):
|
| 45 |
+
"""Obtiene últimos 3 enfrentamientos directos"""
|
| 46 |
+
if seasons is None:
|
| 47 |
+
seasons = []
|
| 48 |
+
|
| 49 |
+
df_filtered = df[df['season'].isin(seasons)] if seasons else df
|
| 50 |
+
|
| 51 |
+
if league is not None:
|
| 52 |
+
df_filtered = df_filtered[df_filtered['league'] == league]
|
| 53 |
+
|
| 54 |
+
local_h2h = df_filtered[(df_filtered['team'] == local) & (df_filtered['opponent'] == away)]
|
| 55 |
+
away_h2h = df_filtered[(df_filtered['team'] == away) & (df_filtered['opponent'] == local)]
|
| 56 |
+
|
| 57 |
+
if len(local_h2h) < 4:
|
| 58 |
+
return local_h2h.tail(2), away_h2h.tail(2)
|
| 59 |
+
|
| 60 |
+
return local_h2h.tail(3), away_h2h.tail(3)
|
| 61 |
+
|
| 62 |
+
def get_points_from_result(result):
|
| 63 |
+
"""Convierte resultado (W/D/L) a puntos"""
|
| 64 |
+
if result == 'W':
|
| 65 |
+
return 3
|
| 66 |
+
elif result == 'D':
|
| 67 |
+
return 1
|
| 68 |
+
else:
|
| 69 |
+
return 0
|
| 70 |
+
|
| 71 |
+
# ✅ NUEVA FUNCIÓN: Calcular PPP (Puntos Por Partido)
|
| 72 |
+
def get_team_ppp(df, team, season, round_num, league=None):
|
| 73 |
+
"""
|
| 74 |
+
Calcula puntos por partido (PPP) de un equipo
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
df: DataFrame completo
|
| 78 |
+
team: Nombre del equipo
|
| 79 |
+
season: Temporada
|
| 80 |
+
round_num: Número de jornada (NO incluye esta jornada)
|
| 81 |
+
league: Código de liga (opcional)
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
float: Puntos por partido (0-3)
|
| 85 |
+
"""
|
| 86 |
+
team_matches = df[
|
| 87 |
+
(df['team'] == team) &
|
| 88 |
+
(df['season'] == season) &
|
| 89 |
+
(df['round'] < round_num)
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
if league is not None:
|
| 93 |
+
team_matches = team_matches[team_matches['league'] == league]
|
| 94 |
+
|
| 95 |
+
if len(team_matches) == 0:
|
| 96 |
+
return 0.0
|
| 97 |
+
|
| 98 |
+
total_points = team_matches['result'].apply(get_points_from_result).sum()
|
| 99 |
+
ppp = total_points / len(team_matches)
|
| 100 |
+
|
| 101 |
+
return ppp
|
| 102 |
+
|
| 103 |
+
# ✅ NUEVA FUNCIÓN: Calcular diferencia de PPP
|
| 104 |
+
def get_ppp_difference(df, local, away, season, round_num, league=None):
|
| 105 |
+
"""
|
| 106 |
+
Calcula la diferencia de puntos por partido entre local y visitante
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
df: DataFrame completo
|
| 110 |
+
local: Equipo local
|
| 111 |
+
away: Equipo visitante
|
| 112 |
+
season: Temporada
|
| 113 |
+
round_num: Jornada actual
|
| 114 |
+
league: Código de liga (opcional)
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
float: Diferencia de PPP (local - away)
|
| 118 |
+
"""
|
| 119 |
+
local_ppp = get_team_ppp(df, local, season, round_num, league)
|
| 120 |
+
away_ppp = get_team_ppp(df, away, season, round_num, league)
|
| 121 |
+
|
| 122 |
+
return local_ppp - away_ppp
|
| 123 |
+
|
| 124 |
+
def get_average(df, is_team=False, lst_avg=None):
|
| 125 |
+
"""Calcula promedios de estadísticas"""
|
| 126 |
+
|
| 127 |
+
if len(df) == 0:
|
| 128 |
+
# Retornar valores por defecto si el DataFrame está vacío
|
| 129 |
+
if is_team:
|
| 130 |
+
return (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
|
| 131 |
+
return (0, 0, 0, 0, 0, 0, 0, 0)
|
| 132 |
+
|
| 133 |
+
if is_team:
|
| 134 |
+
# ===========================
|
| 135 |
+
# ESTADÍSTICAS BÁSICAS (NORMALIZADAS)
|
| 136 |
+
# ===========================
|
| 137 |
+
avg_cross = (df['Performance_Crs'].sum() / len(df)) - lst_avg[3]
|
| 138 |
+
avg_att_3rd = (df['Touches_Att 3rd'].sum() / len(df)) - lst_avg[4]
|
| 139 |
+
avg_sca = (df['SCA Types_SCA'].sum() / len(df)) - lst_avg[2]
|
| 140 |
+
avg_xg = (df['Expected_xG'].sum() / len(df)) - lst_avg[1]
|
| 141 |
+
|
| 142 |
+
# ✅ CAMBIO: VARIANZA EN VEZ DE PROMEDIO DE CK
|
| 143 |
+
var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
|
| 144 |
+
avg_ck = (df['Pass Types_CK'].sum() / len(df)) - lst_avg[8]
|
| 145 |
+
|
| 146 |
+
avg_poss = (df['Poss'].sum() / len(df)) - 50
|
| 147 |
+
avg_gf = (df['GF'].sum() / len(df)) - lst_avg[5]
|
| 148 |
+
avg_ga = (df['GA'].sum() / len(df)) - lst_avg[6]
|
| 149 |
+
|
| 150 |
+
# ===========================
|
| 151 |
+
# MÉTRICAS OFENSIVAS AVANZADAS
|
| 152 |
+
# ===========================
|
| 153 |
+
|
| 154 |
+
# Precisión de tiros
|
| 155 |
+
total_sh = df['Standard_Sh'].sum()
|
| 156 |
+
sh_accuracy = (df['Standard_SoT'].sum() / total_sh) if total_sh > 0 else 0
|
| 157 |
+
|
| 158 |
+
# Eficiencia xG por tiro
|
| 159 |
+
xg_shot = (df['Expected_xG'].sum() / total_sh) if total_sh > 0 else 0
|
| 160 |
+
|
| 161 |
+
# Presencia atacante (% toques en área rival)
|
| 162 |
+
total_touches = df['Touches_Touches'].sum()
|
| 163 |
+
attacking_presence = (df['Touches_Att 3rd'].sum() / total_touches) if total_touches > 0 else 0
|
| 164 |
+
|
| 165 |
+
# Tiros por posesión
|
| 166 |
+
total_poss = df['Poss'].sum()
|
| 167 |
+
possession_shot = (total_sh / total_poss) if total_poss > 0 else 0
|
| 168 |
+
|
| 169 |
+
# Distancia promedio de tiros
|
| 170 |
+
standard_dist = df['Standard_Dist'].mean() if 'Standard_Dist' in df.columns else 0
|
| 171 |
+
|
| 172 |
+
# ===========================
|
| 173 |
+
# MÉTRICAS DE CREACIÓN
|
| 174 |
+
# ===========================
|
| 175 |
+
|
| 176 |
+
# Ratio de pases progresivos
|
| 177 |
+
total_passes = df['Total_Att'].sum()
|
| 178 |
+
progressive_pass_ratio = (df['PrgP'].sum() / total_passes) if total_passes > 0 else 0
|
| 179 |
+
|
| 180 |
+
# Participación en último tercio
|
| 181 |
+
final_third_passes = df['1/3'].sum()
|
| 182 |
+
final_third_involvement = (final_third_passes / total_passes) if total_passes > 0 else 0
|
| 183 |
+
|
| 184 |
+
# Ratio de pases largos
|
| 185 |
+
long_ball_ratio = (df['Long_Att'].sum() / total_passes) if total_passes > 0 else 0
|
| 186 |
+
|
| 187 |
+
# Asistencias por SCA
|
| 188 |
+
total_sca = df['SCA Types_SCA'].sum()
|
| 189 |
+
assist_sca = (df['Ast'].sum() / total_sca) if total_sca > 0 else 0
|
| 190 |
+
|
| 191 |
+
# Dependencia de centros
|
| 192 |
+
cross_dependency = (df['Performance_Crs'].sum() / total_passes) if total_passes > 0 else 0
|
| 193 |
+
|
| 194 |
+
# Eficiencia creativa
|
| 195 |
+
creative_efficiency = (total_sca / total_poss) if total_poss > 0 else 0
|
| 196 |
+
|
| 197 |
+
# ===========================
|
| 198 |
+
# MÉTRICAS DEFENSIVAS
|
| 199 |
+
# ===========================
|
| 200 |
+
|
| 201 |
+
# Intensidad de presión alta
|
| 202 |
+
total_tackles = df['Tackles_Tkl'].sum()
|
| 203 |
+
high_press_intensity = (df['Tackles_Att 3rd'].sum() / total_tackles) if total_tackles > 0 else 0
|
| 204 |
+
|
| 205 |
+
# Ratio intercepciones/tackles
|
| 206 |
+
interception_tackle = (df['Int'].sum() / total_tackles) if total_tackles > 0 else 0
|
| 207 |
+
|
| 208 |
+
# Ratio bloqueos/tackles
|
| 209 |
+
blocks_tackle = (df['Blocks_Blocks'].sum() / total_tackles) if total_tackles > 0 else 0
|
| 210 |
+
|
| 211 |
+
# Ratio de despejes
|
| 212 |
+
total_defensive_actions = total_tackles + df['Int'].sum()
|
| 213 |
+
clearance_ratio = (df['Clr'].sum() / total_defensive_actions) if total_defensive_actions > 0 else 0
|
| 214 |
+
|
| 215 |
+
# ===========================
|
| 216 |
+
# MÉTRICAS DE PORTERÍA
|
| 217 |
+
# ===========================
|
| 218 |
+
|
| 219 |
+
# Rendimiento del portero normalizado
|
| 220 |
+
avg_save_pct = df['Performance_Save%'].mean() if 'Performance_Save%' in df.columns else 0
|
| 221 |
+
avg_xg_against = df['Expected_xG'].mean() if len(df) > 0 else 1
|
| 222 |
+
performance_save = (avg_save_pct / (1 / avg_xg_against)) if avg_xg_against > 0 else 0
|
| 223 |
+
|
| 224 |
+
# ===========================
|
| 225 |
+
# MÉTRICAS DE POSESIÓN
|
| 226 |
+
# ===========================
|
| 227 |
+
|
| 228 |
+
# Ratio de conducciones progresivas
|
| 229 |
+
total_carries = df['Carries_Carries'].sum()
|
| 230 |
+
progressive_carry_ratio = (df['Carries_PrgC'].sum() / total_carries) if total_carries > 0 else 0
|
| 231 |
+
|
| 232 |
+
# Ratio de conducciones al área
|
| 233 |
+
penalty_carry_ratio = (df['Carries_CPA'].sum() / total_carries) if total_carries > 0 else 0
|
| 234 |
+
|
| 235 |
+
# Balance conducción/pase progresivo
|
| 236 |
+
total_prog_passes = df['PrgP'].sum()
|
| 237 |
+
carry_pass_balance = (df['Carries_PrgC'].sum() / total_prog_passes) if total_prog_passes > 0 else 0
|
| 238 |
+
|
| 239 |
+
# ===========================
|
| 240 |
+
# ÍNDICES COMPUESTOS
|
| 241 |
+
# ===========================
|
| 242 |
+
|
| 243 |
+
# Índice ofensivo
|
| 244 |
+
avg_gf_raw = df['GF'].mean()
|
| 245 |
+
avg_xg_raw = df['Expected_xG'].mean()
|
| 246 |
+
avg_sot = df['Standard_SoT'].mean()
|
| 247 |
+
avg_sh = df['Standard_Sh'].mean()
|
| 248 |
+
offensive_index = (avg_gf_raw + avg_xg_raw) * (avg_sot / avg_sh) if avg_sh > 0 else 0
|
| 249 |
+
|
| 250 |
+
# Índice defensivo
|
| 251 |
+
avg_int = df['Int'].mean()
|
| 252 |
+
avg_tkl = df['Tackles_Tkl'].mean()
|
| 253 |
+
avg_clr = df['Clr'].mean()
|
| 254 |
+
defensive_index = avg_save_pct * (avg_int / (avg_tkl + avg_clr)) if (avg_tkl + avg_clr) > 0 else 0
|
| 255 |
+
|
| 256 |
+
# Índice de control de posesión
|
| 257 |
+
avg_touches_att = df['Touches_Att 3rd'].mean()
|
| 258 |
+
avg_carries_third = df['Carries_1/3'].mean() if 'Carries_1/3' in df.columns else 0
|
| 259 |
+
avg_touches_total = df['Touches_Touches'].mean()
|
| 260 |
+
possession_control_index = ((avg_touches_att + avg_carries_third) / avg_touches_total) if avg_touches_total > 0 else 0
|
| 261 |
+
|
| 262 |
+
# Índice de transición
|
| 263 |
+
avg_prgp = df['PrgP'].mean()
|
| 264 |
+
avg_prgc = df['Carries_PrgC'].mean()
|
| 265 |
+
avg_poss_raw = df['Poss'].mean()
|
| 266 |
+
transition_index = ((avg_prgp + avg_prgc) / avg_poss_raw) if avg_poss_raw > 0 else 0
|
| 267 |
+
|
| 268 |
+
# ✅ RETORNAR TODAS LAS MÉTRICAS (23 valores)
|
| 269 |
+
return (
|
| 270 |
+
avg_ck,
|
| 271 |
+
var_ck, # 0 - ✅ CAMBIADO: varianza en vez de promedio
|
| 272 |
+
avg_xg, # 1
|
| 273 |
+
avg_sca, # 2
|
| 274 |
+
avg_cross, # 3
|
| 275 |
+
avg_poss, # 4
|
| 276 |
+
avg_att_3rd, # 5
|
| 277 |
+
avg_gf, # 6
|
| 278 |
+
avg_ga, # 7
|
| 279 |
+
sh_accuracy, # 8
|
| 280 |
+
xg_shot, # 9
|
| 281 |
+
attacking_presence, # 10
|
| 282 |
+
possession_shot, # 11
|
| 283 |
+
progressive_pass_ratio, # 12
|
| 284 |
+
final_third_involvement, # 13
|
| 285 |
+
assist_sca, # 14
|
| 286 |
+
creative_efficiency, # 15
|
| 287 |
+
high_press_intensity, # 16
|
| 288 |
+
interception_tackle, # 17
|
| 289 |
+
clearance_ratio, # 18
|
| 290 |
+
progressive_carry_ratio, # 19
|
| 291 |
+
carry_pass_balance, # 20
|
| 292 |
+
offensive_index, # 21
|
| 293 |
+
transition_index # 22
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
# ===========================
|
| 297 |
+
# PROMEDIOS DE LIGA (is_team=False)
|
| 298 |
+
# ===========================
|
| 299 |
+
|
| 300 |
+
avg_cross = df['Performance_Crs'].mean()
|
| 301 |
+
avg_att_3rd = df['Touches_Att 3rd'].mean()
|
| 302 |
+
avg_sca = df['SCA Types_SCA'].mean()
|
| 303 |
+
avg_xg = df['Expected_xG'].mean()
|
| 304 |
+
|
| 305 |
+
# ✅ CAMBIO: VARIANZA EN VEZ DE PROMEDIO DE CK
|
| 306 |
+
var_ck = df['Pass Types_CK'].var() if len(df) > 1 else 0
|
| 307 |
+
avg_ck = df['Pass Types_CK'].mean()
|
| 308 |
+
|
| 309 |
+
avg_gf = df['GF'].mean()
|
| 310 |
+
avg_ga = df['GA'].mean()
|
| 311 |
+
|
| 312 |
+
# ✅ AGREGAR MÉTRICAS BÁSICAS PARA NORMALIZACIÓN
|
| 313 |
+
avg_sh = df['Standard_Sh'].mean() if 'Standard_Sh' in df.columns else 0
|
| 314 |
+
|
| 315 |
+
return (
|
| 316 |
+
|
| 317 |
+
var_ck, # 0 - ✅ CAMBIADO
|
| 318 |
+
avg_xg, # 1
|
| 319 |
+
avg_sca, # 2
|
| 320 |
+
avg_cross, # 3
|
| 321 |
+
avg_att_3rd, # 4
|
| 322 |
+
avg_gf, # 5
|
| 323 |
+
avg_ga, # 6
|
| 324 |
+
avg_sh, # 7 - NUEVO
|
| 325 |
+
avg_ck
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
class PROCESS_DATA():
|
| 331 |
+
def __init__(self,use_one_hot_encoding):
|
| 332 |
+
|
| 333 |
+
self.USE_ONE_HOT_ENCODING = use_one_hot_encoding
|
| 334 |
+
|
| 335 |
+
self.init_variables()
|
| 336 |
+
|
| 337 |
+
self.load_clean_dataset()
|
| 338 |
+
|
| 339 |
+
self.process_all_matches()
|
| 340 |
+
|
| 341 |
+
self.clean_and_ouput_dataset()
|
| 342 |
+
# Excluir temporada 1718 si es necesario
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
def init_variables(self):
|
| 346 |
+
|
| 347 |
+
self.y = []
|
| 348 |
+
|
| 349 |
+
self.lst_data = []
|
| 350 |
+
|
| 351 |
+
self.lst_years = ["1819", "1920", "2021", "2122", "2223", "2324", "2425", "2526"]
|
| 352 |
+
|
| 353 |
+
# ✅ CONSTRUIR VECTOR DE FEATURES CON NOMBRES DESCRIPTIVOS
|
| 354 |
+
self.lst_base_advanced = [
|
| 355 |
+
"avg_ck","var_ck", # ✅ CAMBIADO
|
| 356 |
+
"xg", "sca", "cross", "poss", "att_3rd", "gf", "ga",
|
| 357 |
+
"sh_accuracy", "xg_shot", "attacking_presence", "possession_shot",
|
| 358 |
+
"progressive_pass_ratio", "final_third_involvement", "assist_sca", "creative_efficiency",
|
| 359 |
+
"high_press_intensity", "interception_tackle", "clearance_ratio",
|
| 360 |
+
"progressive_carry_ratio", "carry_pass_balance", "offensive_index", "transition_index"
|
| 361 |
+
]
|
| 362 |
+
|
| 363 |
+
self.lst_base_original = [
|
| 364 |
+
"var_ck","xg", "sca", "cross", "poss", "att_3rd", "gf", "ga","avg_ck"
|
| 365 |
+
]
|
| 366 |
+
|
| 367 |
+
print("Variables inicializadas")
|
| 368 |
+
|
| 369 |
+
def load_clean_dataset(self):
|
| 370 |
+
|
| 371 |
+
#load clean dataset generated on generate_dataset.py
|
| 372 |
+
self.df_dataset_historic = pd.read_csv("dataset/cleaned/dataset_cleaned.csv")
|
| 373 |
+
|
| 374 |
+
if os.path.exists(r"dataset/cleaned/dataset_cleaned_current_year.csv"):
|
| 375 |
+
self.df_dataset_current_year = pd.read_csv("dataset/cleaned/dataset_cleaned_current_year.csv")
|
| 376 |
+
|
| 377 |
+
self.df_dataset = pd.concat([self.df_dataset_historic,self.df_dataset_current_year])
|
| 378 |
+
else:
|
| 379 |
+
self.df_dataset = self.df_dataset_historic
|
| 380 |
+
|
| 381 |
+
self.df_dataset["season"] = self.df_dataset["season"].astype(str)
|
| 382 |
+
self.df_dataset["Performance_Save%"].fillna(0)
|
| 383 |
+
|
| 384 |
+
self.df_dataset_export = self.df_dataset.copy()
|
| 385 |
+
|
| 386 |
+
#filter data to get key elements on mathces
|
| 387 |
+
self.df_dataset_export = self.df_dataset_export.drop_duplicates(subset=["game", "league"])
|
| 388 |
+
self.df_dataset_export = self.df_dataset_export[["local", "away", "round", "season", "date", "league"]]
|
| 389 |
+
|
| 390 |
+
#load all unique matches on a list to process
|
| 391 |
+
self.lst_matches = self.df_dataset_export.values.tolist()
|
| 392 |
+
|
| 393 |
+
self.lst_matches = [row for row in self.lst_matches if row[3] != "1718"]
|
| 394 |
+
|
| 395 |
+
print("dataset loaded")
|
| 396 |
+
|
| 397 |
+
def process_all_matches(self):
|
| 398 |
+
|
| 399 |
+
for i in self.lst_matches:
|
| 400 |
+
if i[2] < 5:
|
| 401 |
+
continue
|
| 402 |
+
|
| 403 |
+
local = i[0]
|
| 404 |
+
away = i[1]
|
| 405 |
+
round_num = i[2]
|
| 406 |
+
season = i[3]
|
| 407 |
+
date = i[4]
|
| 408 |
+
league_code = i[5]
|
| 409 |
+
|
| 410 |
+
dic_df = {}
|
| 411 |
+
# Promedios de liga
|
| 412 |
+
lst_avg = get_average(
|
| 413 |
+
self.df_dataset[
|
| 414 |
+
(self.df_dataset['season'] == season) &
|
| 415 |
+
(self.df_dataset['round'] < round_num) &
|
| 416 |
+
(self.df_dataset['league'] == league_code)
|
| 417 |
+
],
|
| 418 |
+
is_team=False
|
| 419 |
+
)
|
| 420 |
+
|
| 421 |
+
# ✅ FUNCIÓN MEJORADA: Maneja métricas originales y avanzadas
|
| 422 |
+
def create_line(df, is_form=True, is_team=False, use_advanced=True):
|
| 423 |
+
"""
|
| 424 |
+
Args:
|
| 425 |
+
df: DataFrame con datos del equipo
|
| 426 |
+
is_form: Si True, toma solo últimos 8 partidos
|
| 427 |
+
is_team: Si True, normaliza contra promedios de liga
|
| 428 |
+
use_advanced: Si True, incluye métricas avanzadas (23 valores)
|
| 429 |
+
Si False, solo métricas originales (8 valores)
|
| 430 |
+
"""
|
| 431 |
+
if is_form:
|
| 432 |
+
df = df[-6:]
|
| 433 |
+
|
| 434 |
+
if use_advanced:
|
| 435 |
+
# Retorna 23 valores (todas las métricas)
|
| 436 |
+
return get_average(df, is_team, lst_avg)
|
| 437 |
+
else:
|
| 438 |
+
# Retorna solo 8 valores originales
|
| 439 |
+
result = get_average(df, is_team, lst_avg)
|
| 440 |
+
return result[:9] # Primeros 8 valores
|
| 441 |
+
|
| 442 |
+
|
| 443 |
+
|
| 444 |
+
# Extraer DataFrames
|
| 445 |
+
(team1_home, team1_away, team1_opp_home, team1_opp_away,
|
| 446 |
+
team2_home, team2_away, team2_opp_home, team2_opp_away) = get_dataframes(
|
| 447 |
+
self.df_dataset, season, round_num, local, away, league=league_code
|
| 448 |
+
)
|
| 449 |
+
|
| 450 |
+
# Corners reales
|
| 451 |
+
ck = get_ck(self.df_dataset, season, round_num, local, away, league=league_code)
|
| 452 |
+
self.y.append(ck)
|
| 453 |
+
|
| 454 |
+
# Head to Head
|
| 455 |
+
index = self.lst_years.index(season)
|
| 456 |
+
result = self.lst_years[:index+1]
|
| 457 |
+
team1_h2h, team2_h2h = get_head_2_head(
|
| 458 |
+
self.df_dataset, local, away, seasons=result, league=league_code
|
| 459 |
+
)
|
| 460 |
+
|
| 461 |
+
# ✅ PPP
|
| 462 |
+
local_ppp = get_team_ppp(self.df_dataset, local, season, round_num, league=league_code)
|
| 463 |
+
away_ppp = get_team_ppp(self.df_dataset, away, season, round_num, league=league_code)
|
| 464 |
+
ppp_diff = local_ppp - away_ppp
|
| 465 |
+
|
| 466 |
+
dic_df['ppp_local'] = (local_ppp,)
|
| 467 |
+
dic_df['ppp_away'] = (away_ppp,)
|
| 468 |
+
dic_df['ppp_difference'] = (ppp_diff,)
|
| 469 |
+
|
| 470 |
+
# ✅ FEATURES CON MÉTRICAS AVANZADAS (23 valores cada una)
|
| 471 |
+
dic_df['lst_team1_home_form'] = create_line(team1_home, True, True, use_advanced=True)
|
| 472 |
+
dic_df['lst_team1_home_general'] = create_line(team1_home, False, True, use_advanced=True)
|
| 473 |
+
dic_df['lst_team1_away_form'] = create_line(team1_away, True, True, use_advanced=True)
|
| 474 |
+
dic_df['lst_team1_away_general'] = create_line(team1_away, False, True, use_advanced=True)
|
| 475 |
+
|
| 476 |
+
dic_df['lst_team2_home_form'] = create_line(team2_home, True, True, use_advanced=True)
|
| 477 |
+
dic_df['lst_team2_home_general'] = create_line(team2_home, False, True, use_advanced=True)
|
| 478 |
+
dic_df['lst_team2_away_form'] = create_line(team2_away, True, True, use_advanced=True)
|
| 479 |
+
dic_df['lst_team2_away_general'] = create_line(team2_away, False, True, use_advanced=True)
|
| 480 |
+
|
| 481 |
+
dic_df['lst_team1_h2h'] = create_line(team1_h2h, False, True, use_advanced=True)
|
| 482 |
+
dic_df['lst_team2_h2h'] = create_line(team2_h2h, False, True, use_advanced=True)
|
| 483 |
+
|
| 484 |
+
# ✅ FEATURES CON MÉTRICAS ORIGINALES (8 valores) - SOLO PARA OPONENTES
|
| 485 |
+
dic_df['lst_team1_opp_away'] = create_line(team1_opp_away, False, True, use_advanced=False)
|
| 486 |
+
dic_df['lst_team2_opp_home'] = create_line(team2_opp_home, False, True, use_advanced=False)
|
| 487 |
+
|
| 488 |
+
# One-Hot Encoding
|
| 489 |
+
if self.USE_ONE_HOT_ENCODING:
|
| 490 |
+
league_dummies = {
|
| 491 |
+
'league_ESP': 1 if league_code == 'ESP' else 0,
|
| 492 |
+
'league_GER': 1 if league_code == 'GER' else 0,
|
| 493 |
+
'league_FRA': 1 if league_code == 'FRA' else 0,
|
| 494 |
+
'league_ITA': 1 if league_code == 'ITA' else 0,
|
| 495 |
+
'league_NED': 1 if league_code == 'NED' else 0,
|
| 496 |
+
'league_ENG': 1 if league_code == 'ENG' else 0,
|
| 497 |
+
'league_POR': 1 if league_code == 'POR' else 0,
|
| 498 |
+
'league_BEL': 1 if league_code == 'BEL' else 0
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
for key, value in league_dummies.items():
|
| 502 |
+
dic_df[key] = (value,)
|
| 503 |
+
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
lst_features_values = []
|
| 507 |
+
self.lst_features_values = []
|
| 508 |
+
|
| 509 |
+
for key in dic_df:
|
| 510 |
+
lst_features_values.extend(list(dic_df[key]))
|
| 511 |
+
|
| 512 |
+
# Casos especiales
|
| 513 |
+
if key in ['ppp_local', 'ppp_away', 'ppp_difference']:
|
| 514 |
+
self.lst_features_values.append(key)
|
| 515 |
+
elif key.startswith('league_'):
|
| 516 |
+
self.lst_features_values.append(key)
|
| 517 |
+
elif key in ['lst_team1_opp_away', 'lst_team2_opp_home']:
|
| 518 |
+
# ✅ Métricas ORIGINALES (8 valores)
|
| 519 |
+
self.lst_features_values.extend([f"{key}_{col}" for col in self.lst_base_original])
|
| 520 |
+
else:
|
| 521 |
+
# ✅ Métricas AVANZADAS (23 valores)
|
| 522 |
+
self.lst_features_values.extend([f"{key}_{col}" for col in self.lst_base_advanced])
|
| 523 |
+
|
| 524 |
+
self.lst_data.append(lst_features_values)
|
| 525 |
+
print("Dataset processed")
|
| 526 |
+
|
| 527 |
+
def clean_and_ouput_dataset(self):
|
| 528 |
+
|
| 529 |
+
self.df_data = pd.DataFrame(data=self.lst_data, columns=self.lst_features_values)
|
| 530 |
+
|
| 531 |
+
print(f"\n✅ PROCESAMIENTO COMPLETADO:")
|
| 532 |
+
print(f" Shape inicial: {self.df_data.shape}")
|
| 533 |
+
print(f" Total partidos: {len(self.df_data)}")
|
| 534 |
+
print(f" Features totales: {self.df_data.shape[1]}")
|
| 535 |
+
|
| 536 |
+
# ===========================
|
| 537 |
+
# LIMPIEZA DE DATOS NULOS
|
| 538 |
+
# ===========================
|
| 539 |
+
|
| 540 |
+
print(f"\n🧹 LIMPIANDO DATOS NULOS...")
|
| 541 |
+
|
| 542 |
+
import numpy as np
|
| 543 |
+
nulos_antes_X = self.df_data.isnull().sum().sum()
|
| 544 |
+
nulos_antes_y = np.isnan(self.y).sum() if isinstance(self.y, np.ndarray) else sum(pd.isna(self.y))
|
| 545 |
+
|
| 546 |
+
print(f" Nulos en X (antes): {nulos_antes_X}")
|
| 547 |
+
print(f" Nulos en Y (antes): {nulos_antes_y}")
|
| 548 |
+
|
| 549 |
+
y_array = np.array(self.y).flatten()
|
| 550 |
+
|
| 551 |
+
mask_valid_X = ~self.df_data.isnull().any(axis=1)
|
| 552 |
+
mask_valid_y = ~np.isnan(y_array)
|
| 553 |
+
mask_combined = mask_valid_X & mask_valid_y
|
| 554 |
+
|
| 555 |
+
self.df_data = self.df_data[mask_combined].reset_index(drop=True)
|
| 556 |
+
y_array = y_array[mask_combined]
|
| 557 |
+
|
| 558 |
+
print(f"\n✅ LIMPIEZA COMPLETADA:")
|
| 559 |
+
print(f" Nulos en X (después): {self.df_data.isnull().sum().sum()}")
|
| 560 |
+
print(f" Nulos en Y (después): {np.isnan(y_array).sum()}")
|
| 561 |
+
print(f" Filas eliminadas: {len(mask_combined) - mask_combined.sum()}")
|
| 562 |
+
print(f" Shape final: {self.df_data.shape}")
|
| 563 |
+
|
| 564 |
+
# ===========================
|
| 565 |
+
# VERIFICACIÓN FINAL
|
| 566 |
+
# ===========================
|
| 567 |
+
|
| 568 |
+
print(f"\n🔍 VERIFICACIÓN DE NUEVAS FEATURES:")
|
| 569 |
+
print(f" ✅ Features con 'var_ck': {len([c for c in self.df_data.columns if 'var_ck' in c])}")
|
| 570 |
+
print(f" ✅ Features con métricas avanzadas: {len([c for c in self.df_data.columns if any(m in c for m in ['sh_accuracy', 'offensive_index'])])}")
|
| 571 |
+
print(f" ✅ Features de oponentes (8 valores): {len([c for c in self.df_data.columns if 'opp' in c])}")
|
| 572 |
+
|
| 573 |
+
print("\n" + "=" * 80)
|
| 574 |
+
print("✅ PROCESO COMPLETADO - DATOS LISTOS PARA ENTRENAMIENTO")
|
| 575 |
+
print("=" * 80)
|
| 576 |
+
|
| 577 |
+
self.y = y_array.tolist()
|
| 578 |
+
|
| 579 |
+
self.df_data["y"] = self.y
|
| 580 |
+
self.df_data.to_csv("dataset\processed\dataset_processed.csv",index=False)
|
| 581 |
+
print("Dataset")
|
| 582 |
+
|
| 583 |
+
#a = PROCESS_DATA(True)
|
| 584 |
+
|
src/utils/__init__.py
ADDED
|
File without changes
|
src/utils/__pycache__/__init__.cpython-311.pyc
ADDED
|
Binary file (164 Bytes). View file
|
|
|
src/utils/__pycache__/helper.cpython-311.pyc
ADDED
|
Binary file (1 kB). View file
|
|
|
src/utils/helper.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import warnings
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def desactivar_advertencias():
|
| 7 |
+
warnings.filterwarnings('ignore')
|
| 8 |
+
|
| 9 |
+
# Ignorar warnings específicos de bibliotecas comunes
|
| 10 |
+
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
| 11 |
+
warnings.filterwarnings('ignore', category=FutureWarning)
|
| 12 |
+
warnings.filterwarnings('ignore', category=UserWarning)
|
| 13 |
+
|
| 14 |
+
os.environ['PYTHONWARNINGS'] = 'ignore'
|
| 15 |
+
|
| 16 |
+
pd.options.mode.chained_assignment = None # Desactivar SettingWithCopyWarning
|
| 17 |
+
|
| 18 |
+
print("Advertencias desactivadas...")
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,812 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
import requests
|
| 5 |
+
import plotly.graph_objects as go
|
| 6 |
+
import plotly.express as px
|
| 7 |
+
import numpy as np
|
| 8 |
+
from scipy import stats as scipy_stats
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
load_dotenv()
|
| 13 |
+
API_KEY = os.getenv("API_KEY") # ⚠️ CÁMBIALA POR UNA SEGURA
|
| 14 |
+
# --- CONFIGURACIÓN INICIAL ---
|
| 15 |
+
st.set_page_config(layout="wide", page_title="Corners Forecast", page_icon="⚽")
|
| 16 |
+
|
| 17 |
+
# 👈 AÑADIR MARGEN AL LAYOUT WIDE
|
| 18 |
+
st.markdown("""
|
| 19 |
+
<style>
|
| 20 |
+
.block-container {
|
| 21 |
+
padding-left: 5rem;
|
| 22 |
+
padding-right: 5rem;
|
| 23 |
+
max-width: 1400px;
|
| 24 |
+
margin: 0 auto;
|
| 25 |
+
}
|
| 26 |
+
</style>
|
| 27 |
+
""", unsafe_allow_html=True)
|
| 28 |
+
|
| 29 |
+
# --- CONSTANTES DEL MODELO ---
|
| 30 |
+
MSE_MODELO = 1.9
|
| 31 |
+
RMSE_MODELO = 2.42
|
| 32 |
+
R2_MODELO = 0.39
|
| 33 |
+
N_SIMULACIONES = 5000 # 👈 REDUCIDO A 5000
|
| 34 |
+
|
| 35 |
+
# --- FUNCIONES AUXILIARES ---
|
| 36 |
+
def probabilidad_a_momio(probabilidad):
|
| 37 |
+
"""Convierte probabilidad (%) a momio decimal"""
|
| 38 |
+
if probabilidad <= 0:
|
| 39 |
+
return 0
|
| 40 |
+
return round(100 / probabilidad, 2)
|
| 41 |
+
|
| 42 |
+
def clasificar_valor_apuesta(momio_real, momio_modelo):
|
| 43 |
+
"""Determina si hay valor en la apuesta"""
|
| 44 |
+
if momio_real > momio_modelo * 1.1:
|
| 45 |
+
return "🟢 EXCELENTE VALOR"
|
| 46 |
+
elif momio_real > momio_modelo:
|
| 47 |
+
return "🟡 BUEN VALOR"
|
| 48 |
+
else:
|
| 49 |
+
return "🔴 SIN VALOR"
|
| 50 |
+
|
| 51 |
+
@st.cache_data(ttl=3600) # 👈 CACHE 1 HORA
|
| 52 |
+
def simular_lambda_montecarlo(lambda_pred, sigma=RMSE_MODELO, n_sims=N_SIMULACIONES):
|
| 53 |
+
"""Genera simulaciones Monte Carlo con CACHE"""
|
| 54 |
+
lambdas = np.random.normal(lambda_pred, sigma, n_sims)
|
| 55 |
+
lambdas = np.maximum(lambdas, 0.1)
|
| 56 |
+
return lambdas
|
| 57 |
+
|
| 58 |
+
@st.cache_data(ttl=3600) # 👈 CACHE 1 HORA
|
| 59 |
+
def calcular_probabilidades_con_incertidumbre(lambda_pred, linea, tipo='over', sigma=RMSE_MODELO, n_sims=N_SIMULACIONES):
|
| 60 |
+
"""Calcula probabilidades con CACHE"""
|
| 61 |
+
lambdas_sim = simular_lambda_montecarlo(lambda_pred, sigma, n_sims)
|
| 62 |
+
probs = []
|
| 63 |
+
|
| 64 |
+
if tipo == 'over':
|
| 65 |
+
for lam in lambdas_sim:
|
| 66 |
+
prob = 1 - scipy_stats.poisson.cdf(int(linea), lam)
|
| 67 |
+
probs.append(prob * 100)
|
| 68 |
+
else:
|
| 69 |
+
for lam in lambdas_sim:
|
| 70 |
+
prob = scipy_stats.poisson.cdf(int(linea) - 1, lam)
|
| 71 |
+
probs.append(prob * 100)
|
| 72 |
+
|
| 73 |
+
probs = np.array(probs)
|
| 74 |
+
|
| 75 |
+
return {
|
| 76 |
+
'prob_media': np.mean(probs),
|
| 77 |
+
'prob_low': np.percentile(probs, 5),
|
| 78 |
+
'prob_high': np.percentile(probs, 95),
|
| 79 |
+
'prob_std': np.std(probs),
|
| 80 |
+
'distribucion': probs
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
def calcular_expected_value(prob_media, momio_casa):
|
| 84 |
+
"""Calcula Expected Value (EV)"""
|
| 85 |
+
prob_decimal = prob_media / 100
|
| 86 |
+
ev = (prob_decimal * momio_casa) - 1
|
| 87 |
+
return ev * 100
|
| 88 |
+
|
| 89 |
+
def calcular_kelly_criterion(prob_media, momio_casa):
|
| 90 |
+
"""Calcula Kelly Criterion"""
|
| 91 |
+
p = prob_media / 100
|
| 92 |
+
|
| 93 |
+
if momio_casa <= 1:
|
| 94 |
+
return 0
|
| 95 |
+
|
| 96 |
+
kelly = (p * momio_casa - 1) / (momio_casa - 1)
|
| 97 |
+
|
| 98 |
+
if kelly < 0:
|
| 99 |
+
return 0
|
| 100 |
+
|
| 101 |
+
return min(kelly, 0.25)
|
| 102 |
+
|
| 103 |
+
def recomendar_apuesta_avanzada(prob_media, prob_low, prob_high, momio_casa):
|
| 104 |
+
"""Sistema avanzado de recomendación"""
|
| 105 |
+
prob_casa = (1 / momio_casa) * 100
|
| 106 |
+
ev = calcular_expected_value(prob_media, momio_casa)
|
| 107 |
+
kelly = calcular_kelly_criterion(prob_media, momio_casa)
|
| 108 |
+
kelly_conservador = kelly * 0.25
|
| 109 |
+
|
| 110 |
+
ev_positivo = ev > 0
|
| 111 |
+
confianza_alta = prob_low > prob_casa
|
| 112 |
+
margen_seguridad = (prob_media - prob_casa) / prob_casa
|
| 113 |
+
|
| 114 |
+
if confianza_alta and ev > 5 and margen_seguridad > 0.1:
|
| 115 |
+
nivel = "EXCELENTE"
|
| 116 |
+
emoji = "🟢"
|
| 117 |
+
recomendar = True
|
| 118 |
+
elif confianza_alta and ev > 0:
|
| 119 |
+
nivel = "BUENA"
|
| 120 |
+
emoji = "🟡"
|
| 121 |
+
recomendar = True
|
| 122 |
+
elif ev > 0:
|
| 123 |
+
nivel = "MODERADA"
|
| 124 |
+
emoji = "🟠"
|
| 125 |
+
recomendar = False
|
| 126 |
+
else:
|
| 127 |
+
nivel = "MALA"
|
| 128 |
+
emoji = "🔴"
|
| 129 |
+
recomendar = False
|
| 130 |
+
|
| 131 |
+
return {
|
| 132 |
+
'recomendar': recomendar,
|
| 133 |
+
'nivel': nivel,
|
| 134 |
+
'emoji': emoji,
|
| 135 |
+
'ev': ev,
|
| 136 |
+
'kelly': kelly * 100,
|
| 137 |
+
'kelly_conservador': kelly_conservador * 100,
|
| 138 |
+
'prob_casa': prob_casa,
|
| 139 |
+
'prob_media': prob_media,
|
| 140 |
+
'prob_low': prob_low,
|
| 141 |
+
'prob_high': prob_high,
|
| 142 |
+
'margen_seguridad': margen_seguridad * 100,
|
| 143 |
+
'ev_positivo': ev_positivo,
|
| 144 |
+
'confianza_alta': confianza_alta
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
# --- DICCIONARIO DE LIGAS ---
|
| 148 |
+
LEAGUES_DICT = {
|
| 149 |
+
"Ligue 1": "FRA",
|
| 150 |
+
"La Liga": "ESP",
|
| 151 |
+
"Premier League": "ENG",
|
| 152 |
+
"Eredivisie": "NED",
|
| 153 |
+
"Liga NOS": "POR",
|
| 154 |
+
"Pro League": "BEL",
|
| 155 |
+
"Bundesliga": "GER",
|
| 156 |
+
"Serie A": "ITA"
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
# --- HEADER ---
|
| 160 |
+
st.markdown("<h1 style='text-align: center;'>Corners Forecast</h1>", unsafe_allow_html=True)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
# --- CARGAR DATOS ---
|
| 164 |
+
@st.cache_data # 👈 CACHE PERMANENTE
|
| 165 |
+
def cargar_datos():
|
| 166 |
+
df = pd.read_csv(r"https://raw.githubusercontent.com/danielsaed/futbol_corners_forecast/refs/heads/main/dataset/cleaned/dataset_cleaned.csv")
|
| 167 |
+
return df[['local','league']].drop_duplicates()
|
| 168 |
+
|
| 169 |
+
df = cargar_datos()
|
| 170 |
+
|
| 171 |
+
# --- INICIALIZAR SESSION STATE ---
|
| 172 |
+
if 'prediccion_realizada' not in st.session_state:
|
| 173 |
+
st.session_state.prediccion_realizada = False
|
| 174 |
+
if 'resultado_api' not in st.session_state:
|
| 175 |
+
st.session_state.resultado_api = None
|
| 176 |
+
|
| 177 |
+
st.markdown("")
|
| 178 |
+
|
| 179 |
+
# --- SELECCIÓN DE PARÁMETROS ---
|
| 180 |
+
col1, col2, col3 = st.columns([1, 1, 1])
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
with col2:
|
| 185 |
+
option = st.selectbox(
|
| 186 |
+
"🏆 Liga",
|
| 187 |
+
["La Liga", "Premier League", "Ligue 1", "Serie A", "Eredivisie", "Liga NOS", "Pro League", "Bundesliga"],
|
| 188 |
+
index=None,
|
| 189 |
+
placeholder="Selecciona liga",
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
st.write("")
|
| 193 |
+
|
| 194 |
+
col_jornada1, col_jornada2, col_jornada3, col_jornada4 = st.columns([2, 1, 1, 2])
|
| 195 |
+
with col_jornada2:
|
| 196 |
+
if option:
|
| 197 |
+
jornada = st.number_input("📅 Jornada", min_value=5, max_value=42, value=15, step=1)
|
| 198 |
+
with col_jornada3:
|
| 199 |
+
if option:
|
| 200 |
+
temporada = st.selectbox(
|
| 201 |
+
"Temporada",
|
| 202 |
+
[2526, 2425, 2324, 2223, 2122],
|
| 203 |
+
index=0
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
st.write("")
|
| 207 |
+
|
| 208 |
+
cl2, cl3, cl4 = st.columns([ 4, 1, 4])
|
| 209 |
+
|
| 210 |
+
with cl2:
|
| 211 |
+
if option:
|
| 212 |
+
if jornada:
|
| 213 |
+
option_local = st.selectbox(
|
| 214 |
+
"🏠 Equipo Local",
|
| 215 |
+
list(df["local"][df["league"] == LEAGUES_DICT[option]]),
|
| 216 |
+
index=None,
|
| 217 |
+
placeholder="Equipo local",
|
| 218 |
+
)
|
| 219 |
+
|
| 220 |
+
with cl3:
|
| 221 |
+
if option:
|
| 222 |
+
st.write("")
|
| 223 |
+
st.write("")
|
| 224 |
+
st.markdown("<h3 style='text-align: center'>VS</h3>", unsafe_allow_html=True)
|
| 225 |
+
|
| 226 |
+
with cl4:
|
| 227 |
+
if option:
|
| 228 |
+
if jornada:
|
| 229 |
+
option_away = st.selectbox(
|
| 230 |
+
"✈️ Equipo Visitante",
|
| 231 |
+
list(df["local"][df["league"] == LEAGUES_DICT[option]]),
|
| 232 |
+
index=None,
|
| 233 |
+
placeholder="Equipo visitante",
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
# --- BOTÓN PARA GENERAR PREDICCIÓN ---
|
| 237 |
+
if option and option_local and option_away:
|
| 238 |
+
|
| 239 |
+
st.markdown("---")
|
| 240 |
+
|
| 241 |
+
col_btn1, col_btn2, col_btn3 = st.columns([1, 1, 1])
|
| 242 |
+
|
| 243 |
+
with col_btn2:
|
| 244 |
+
# 👈 BOTÓN PARA EJECUTAR PREDICCIÓN
|
| 245 |
+
if st.button("Generar Predicción", type="secondary", use_container_width=True):
|
| 246 |
+
st.session_state.prediccion_realizada = True
|
| 247 |
+
st.session_state.resultado_api = None # Reset resultado
|
| 248 |
+
|
| 249 |
+
st.write("")
|
| 250 |
+
st.write("")
|
| 251 |
+
|
| 252 |
+
# --- REALIZAR PREDICCIÓN (SOLO SI SE PRESIONÓ EL BOTÓN) ---
|
| 253 |
+
if option and option_local and option_away and st.session_state.prediccion_realizada:
|
| 254 |
+
|
| 255 |
+
# Si no hay resultado en cache, hacer petición
|
| 256 |
+
if st.session_state.resultado_api is None:
|
| 257 |
+
|
| 258 |
+
with st.spinner('🔮 Generando predicción con análisis de incertidumbre...'):
|
| 259 |
+
|
| 260 |
+
url = "https://daniel-saed-futbol-corners-forecast-api.hf.space/items/"
|
| 261 |
+
#url = "http://localhost:7860//items/"
|
| 262 |
+
headers = {"X-API-Key": API_KEY}
|
| 263 |
+
params = {
|
| 264 |
+
"local": option_local,
|
| 265 |
+
"visitante": option_away,
|
| 266 |
+
"jornada": jornada,
|
| 267 |
+
"league_code": LEAGUES_DICT[option],
|
| 268 |
+
"temporada": str(temporada)
|
| 269 |
+
}
|
| 270 |
+
|
| 271 |
+
try:
|
| 272 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 273 |
+
|
| 274 |
+
if response.status_code == 200:
|
| 275 |
+
st.session_state.resultado_api = response.json() # 👈 GUARDAR EN SESSION
|
| 276 |
+
st.success("✅ Predicción generada")
|
| 277 |
+
elif response.status_code == 401:
|
| 278 |
+
st.error("❌ Error de Autenticación - API Key inválida")
|
| 279 |
+
st.stop()
|
| 280 |
+
elif response.status_code == 400:
|
| 281 |
+
st.error(f"❌ Error: {response.json().get('detail', 'Parámetros inválidos')}")
|
| 282 |
+
st.stop()
|
| 283 |
+
else:
|
| 284 |
+
st.error(f"❌ Error {response.status_code}")
|
| 285 |
+
st.stop()
|
| 286 |
+
|
| 287 |
+
except requests.exceptions.Timeout:
|
| 288 |
+
st.error("⏱️ Timeout - Intenta de nuevo")
|
| 289 |
+
st.stop()
|
| 290 |
+
except requests.exceptions.ConnectionError:
|
| 291 |
+
st.error("🌐 Error de conexión")
|
| 292 |
+
st.stop()
|
| 293 |
+
except Exception as e:
|
| 294 |
+
st.error(f"❌ Error: {str(e)}")
|
| 295 |
+
import traceback
|
| 296 |
+
st.code(traceback.format_exc())
|
| 297 |
+
st.stop()
|
| 298 |
+
|
| 299 |
+
# --- MOSTRAR RESULTADOS (DESDE SESSION STATE) ---
|
| 300 |
+
if st.session_state.resultado_api:
|
| 301 |
+
resultado = st.session_state.resultado_api
|
| 302 |
+
lambda_pred = resultado['prediccion']
|
| 303 |
+
|
| 304 |
+
st.write("")
|
| 305 |
+
st.write("")
|
| 306 |
+
|
| 307 |
+
# ============================================
|
| 308 |
+
# 1. PREDICCIÓN PRINCIPAL
|
| 309 |
+
# ============================================
|
| 310 |
+
|
| 311 |
+
lambda_low = max(0, lambda_pred - 1.96 * RMSE_MODELO)
|
| 312 |
+
lambda_high = lambda_pred + 1.96 * RMSE_MODELO
|
| 313 |
+
|
| 314 |
+
st.markdown("## 🎯 Predicción de Corners")
|
| 315 |
+
|
| 316 |
+
st.write("")
|
| 317 |
+
|
| 318 |
+
# Métricas principales con Streamlit nativo
|
| 319 |
+
col_pred1, col_pred2, col_pred3 = st.columns(3)
|
| 320 |
+
|
| 321 |
+
with col_pred1:
|
| 322 |
+
st.metric(
|
| 323 |
+
label="Corners Esperados",
|
| 324 |
+
value=f"{lambda_pred:.1f}",
|
| 325 |
+
help="Valor esperado (λ) del modelo"
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
with col_pred2:
|
| 329 |
+
st.metric(
|
| 330 |
+
label="Límite Inferior",
|
| 331 |
+
value=f"{lambda_low:.1f}",
|
| 332 |
+
delta=f"{lambda_low - lambda_pred:.1f}",
|
| 333 |
+
help="Intervalo de confianza 95% (inferior)"
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
with col_pred3:
|
| 337 |
+
st.metric(
|
| 338 |
+
label="Límite Superior",
|
| 339 |
+
value=f"{lambda_high:.1f}",
|
| 340 |
+
delta=f"{lambda_high - lambda_pred:.1f}",
|
| 341 |
+
help="Intervalo de confianza 95% (superior)"
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
st.write("")
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
st.write("")
|
| 349 |
+
st.write("")
|
| 350 |
+
st.markdown("---")
|
| 351 |
+
st.write("")
|
| 352 |
+
st.write("")
|
| 353 |
+
|
| 354 |
+
# ============================================
|
| 355 |
+
# 2. ANÁLISIS DE EQUIPOS (CON TABLAS)
|
| 356 |
+
# ============================================
|
| 357 |
+
|
| 358 |
+
stats_data = resultado['stats']
|
| 359 |
+
local_ck = stats_data['local_ck']
|
| 360 |
+
away_ck = stats_data['away_ck']
|
| 361 |
+
local_ck_received = stats_data['local_ck_received']
|
| 362 |
+
away_ck_received = stats_data['away_ck_received']
|
| 363 |
+
h2h_total = stats_data['h2h_total']
|
| 364 |
+
partido_esperado = stats_data['partido_esperado']
|
| 365 |
+
|
| 366 |
+
riesgo = resultado['riesgo']
|
| 367 |
+
|
| 368 |
+
# 👈 TABLA DE CORNERS GENERADOS Y CONCEDIDOS
|
| 369 |
+
st.markdown("### Análisis de Corners")
|
| 370 |
+
|
| 371 |
+
df_corners = pd.DataFrame({
|
| 372 |
+
'Métrica': ['Corners Generados ⚽', 'Corners Concedidos 🛡️', 'Head to Head'],
|
| 373 |
+
f'🏠 {option_local}': [f'{local_ck:.2f}', f'{local_ck_received:.2f}','---'],
|
| 374 |
+
f'✈️ {option_away}': [f'{away_ck:.2f}', f'{away_ck_received:.2f}','---'],
|
| 375 |
+
'🎯 Total': [
|
| 376 |
+
f'{(local_ck + away_ck):.2f}',
|
| 377 |
+
f'{(local_ck_received + away_ck_received):.2f}',
|
| 378 |
+
f"{h2h_total:.2f}"
|
| 379 |
+
]
|
| 380 |
+
})
|
| 381 |
+
|
| 382 |
+
st.dataframe(
|
| 383 |
+
df_corners,
|
| 384 |
+
hide_index=True,
|
| 385 |
+
use_container_width=True,
|
| 386 |
+
column_config={
|
| 387 |
+
'Métrica': st.column_config.TextColumn('📊 Métrica', width='medium'),
|
| 388 |
+
f'🏠 {option_local}': st.column_config.TextColumn(f'🏠 {option_local}', width='medium'),
|
| 389 |
+
f'✈️ {option_away}': st.column_config.TextColumn(f'✈️ {option_away}', width='medium'),
|
| 390 |
+
'🎯 Total': st.column_config.TextColumn('🎯 Total', width='medium')
|
| 391 |
+
}
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
st.write("")
|
| 395 |
+
st.write("")
|
| 396 |
+
|
| 397 |
+
# --- FIABILIDAD ---
|
| 398 |
+
st.markdown("### Fiabilidad")
|
| 399 |
+
|
| 400 |
+
col_fiab1, col_fiab2, col_fiab3 = st.columns(3)
|
| 401 |
+
|
| 402 |
+
with col_fiab1:
|
| 403 |
+
st.markdown(f"**🏠 {option_local}**")
|
| 404 |
+
st.write(f"**Score:** {riesgo['score_local']:.0f}/100")
|
| 405 |
+
st.write(f"**Nivel:** {riesgo['nivel_local']}")
|
| 406 |
+
st.write(f"**CV:** {riesgo['cv_local']:.1f}%")
|
| 407 |
+
st.progress(riesgo['score_local'] / 100)
|
| 408 |
+
|
| 409 |
+
with col_fiab2:
|
| 410 |
+
st.markdown("**📊 Fiabilidad Global**")
|
| 411 |
+
score_promedio = riesgo['score_promedio']
|
| 412 |
+
st.write(f"**Score:** {score_promedio:.0f}/100")
|
| 413 |
+
st.write("")
|
| 414 |
+
|
| 415 |
+
if score_promedio >= 65:
|
| 416 |
+
st.success("🟢 Fiabilidad MUY ALTA")
|
| 417 |
+
elif score_promedio >= 50:
|
| 418 |
+
st.info("🟡 Fiabilidad ALTA")
|
| 419 |
+
elif score_promedio >= 35:
|
| 420 |
+
st.warning("🟠 Fiabilidad MEDIA")
|
| 421 |
+
else:
|
| 422 |
+
st.error("🔴 Fiabilidad BAJA")
|
| 423 |
+
|
| 424 |
+
with col_fiab3:
|
| 425 |
+
st.markdown(f"**✈️ {option_away}**")
|
| 426 |
+
st.write(f"**Score:** {riesgo['score_away']:.0f}/100")
|
| 427 |
+
st.write(f"**Nivel:** {riesgo['nivel_away']}")
|
| 428 |
+
st.write(f"**CV:** {riesgo['cv_away']:.1f}%")
|
| 429 |
+
st.progress(riesgo['score_away'] / 100)
|
| 430 |
+
|
| 431 |
+
st.write("")
|
| 432 |
+
st.write("")
|
| 433 |
+
st.markdown("---")
|
| 434 |
+
st.write("")
|
| 435 |
+
st.write("")
|
| 436 |
+
|
| 437 |
+
# ============================================
|
| 438 |
+
# 3. PROBABILIDADES CON MONTE CARLO
|
| 439 |
+
# ============================================
|
| 440 |
+
|
| 441 |
+
st.info(f"🔬 **Análisis con {N_SIMULACIONES:,} simulaciones Monte Carlo** considerando RMSE={RMSE_MODELO}")
|
| 442 |
+
|
| 443 |
+
tab_over, tab_under = st.tabs(["⬆️ OVER", "⬇️ UNDER"])
|
| 444 |
+
|
| 445 |
+
# TAB OVER
|
| 446 |
+
with tab_over:
|
| 447 |
+
probs_over = resultado['probabilidades_over']
|
| 448 |
+
|
| 449 |
+
st.markdown("### 📈 Probabilidades Over (con Intervalos de Confianza 90%)")
|
| 450 |
+
|
| 451 |
+
df_over_incertidumbre = []
|
| 452 |
+
|
| 453 |
+
with st.spinner('Calculando incertidumbres Over...'):
|
| 454 |
+
for linea_str in sorted(probs_over.keys(), key=float, reverse=True):
|
| 455 |
+
linea = float(linea_str)
|
| 456 |
+
|
| 457 |
+
resultado_inc = calcular_probabilidades_con_incertidumbre(
|
| 458 |
+
lambda_pred, linea, tipo='over'
|
| 459 |
+
)
|
| 460 |
+
|
| 461 |
+
prob_media = resultado_inc['prob_media']
|
| 462 |
+
prob_low = resultado_inc['prob_low']
|
| 463 |
+
prob_high = resultado_inc['prob_high']
|
| 464 |
+
|
| 465 |
+
momio_medio = probabilidad_a_momio(prob_media)
|
| 466 |
+
momio_low = probabilidad_a_momio(prob_high)
|
| 467 |
+
momio_high = probabilidad_a_momio(prob_low)
|
| 468 |
+
|
| 469 |
+
df_over_incertidumbre.append({
|
| 470 |
+
'Línea': f"Over {linea_str}",
|
| 471 |
+
'Prob. Media': f"{prob_media:.1f}%",
|
| 472 |
+
'IC 90%': f"[{prob_low:.1f}%, {prob_high:.1f}%]",
|
| 473 |
+
'Momio Justo': f"@{momio_medio:.2f}",
|
| 474 |
+
'Rango Momio': f"[@{momio_low:.2f} - @{momio_high:.2f}]",
|
| 475 |
+
'linea_num': linea,
|
| 476 |
+
'prob_media_raw': prob_media,
|
| 477 |
+
'prob_low_raw': prob_low,
|
| 478 |
+
'prob_high_raw': prob_high,
|
| 479 |
+
'tipo': 'Over'
|
| 480 |
+
})
|
| 481 |
+
|
| 482 |
+
df_over_display = pd.DataFrame(df_over_incertidumbre)
|
| 483 |
+
|
| 484 |
+
st.dataframe(
|
| 485 |
+
df_over_display[['Línea', 'Prob. Media', 'Momio Justo']],
|
| 486 |
+
hide_index=True,
|
| 487 |
+
use_container_width=True,
|
| 488 |
+
column_config={
|
| 489 |
+
'Línea': st.column_config.TextColumn('🎯 Línea', width='small'),
|
| 490 |
+
'Prob. Media': st.column_config.TextColumn('📊 Probabilidad', width='small'),
|
| 491 |
+
'Momio Justo': st.column_config.TextColumn('💰 Momio', width='small'),
|
| 492 |
+
}
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
st.write("")
|
| 496 |
+
|
| 497 |
+
# Gráfico
|
| 498 |
+
fig_over = go.Figure()
|
| 499 |
+
|
| 500 |
+
lineas_sorted = sorted([x['linea_num'] for x in df_over_incertidumbre])
|
| 501 |
+
probs_medias = [x['prob_media_raw'] for x in sorted(df_over_incertidumbre, key=lambda x: x['linea_num'])]
|
| 502 |
+
probs_low = [x['prob_low_raw'] for x in sorted(df_over_incertidumbre, key=lambda x: x['linea_num'])]
|
| 503 |
+
probs_high = [x['prob_high_raw'] for x in sorted(df_over_incertidumbre, key=lambda x: x['linea_num'])]
|
| 504 |
+
|
| 505 |
+
fig_over.add_trace(go.Scatter(
|
| 506 |
+
x=[f"Over {l}" for l in lineas_sorted] + [f"Over {l}" for l in lineas_sorted[::-1]],
|
| 507 |
+
y=probs_high + probs_low[::-1],
|
| 508 |
+
fill='toself',
|
| 509 |
+
fillcolor='rgba(46, 204, 113, 0.2)',
|
| 510 |
+
line=dict(color='rgba(255,255,255,0)'),
|
| 511 |
+
showlegend=True,
|
| 512 |
+
name='IC 90%',
|
| 513 |
+
hoverinfo='skip'
|
| 514 |
+
))
|
| 515 |
+
|
| 516 |
+
fig_over.add_trace(go.Scatter(
|
| 517 |
+
x=[f"Over {l}" for l in lineas_sorted],
|
| 518 |
+
y=probs_medias,
|
| 519 |
+
mode='lines+markers',
|
| 520 |
+
name='Probabilidad Media',
|
| 521 |
+
line=dict(color='#2ecc71', width=3),
|
| 522 |
+
marker=dict(size=10)
|
| 523 |
+
))
|
| 524 |
+
|
| 525 |
+
fig_over.update_layout(
|
| 526 |
+
title="Probabilidades Over con Banda de Incertidumbre (Monte Carlo)",
|
| 527 |
+
xaxis_title="Línea",
|
| 528 |
+
yaxis_title="Probabilidad (%)",
|
| 529 |
+
height=500,
|
| 530 |
+
hovermode='x unified'
|
| 531 |
+
)
|
| 532 |
+
|
| 533 |
+
st.plotly_chart(fig_over, use_container_width=True)
|
| 534 |
+
|
| 535 |
+
# TAB UNDER
|
| 536 |
+
with tab_under:
|
| 537 |
+
probs_under = resultado['probabilidades_under']
|
| 538 |
+
|
| 539 |
+
st.markdown("### 📉 Probabilidades Under (con Intervalos de Confianza 90%)")
|
| 540 |
+
|
| 541 |
+
df_under_incertidumbre = []
|
| 542 |
+
|
| 543 |
+
with st.spinner('Calculando incertidumbres Under...'):
|
| 544 |
+
for linea_str in sorted(probs_under.keys(), key=float, reverse=True):
|
| 545 |
+
linea = float(linea_str)
|
| 546 |
+
|
| 547 |
+
resultado_inc = calcular_probabilidades_con_incertidumbre(
|
| 548 |
+
lambda_pred, linea, tipo='under'
|
| 549 |
+
)
|
| 550 |
+
|
| 551 |
+
prob_media = resultado_inc['prob_media']
|
| 552 |
+
prob_low = resultado_inc['prob_low']
|
| 553 |
+
prob_high = resultado_inc['prob_high']
|
| 554 |
+
|
| 555 |
+
momio_medio = probabilidad_a_momio(prob_media)
|
| 556 |
+
momio_low = probabilidad_a_momio(prob_high)
|
| 557 |
+
momio_high = probabilidad_a_momio(prob_low)
|
| 558 |
+
|
| 559 |
+
df_under_incertidumbre.append({
|
| 560 |
+
'Línea': f"Under {linea_str}",
|
| 561 |
+
'Prob. Media': f"{prob_media:.1f}%",
|
| 562 |
+
'IC 90%': f"[{prob_low:.1f}%, {prob_high:.1f}%]",
|
| 563 |
+
'Momio Justo': f"@{momio_medio:.2f}",
|
| 564 |
+
'Rango Momio': f"[@{momio_low:.2f} - @{momio_high:.2f}]",
|
| 565 |
+
'linea_num': linea,
|
| 566 |
+
'prob_media_raw': prob_media,
|
| 567 |
+
'prob_low_raw': prob_low,
|
| 568 |
+
'prob_high_raw': prob_high,
|
| 569 |
+
'tipo': 'Under'
|
| 570 |
+
})
|
| 571 |
+
|
| 572 |
+
df_under_display = pd.DataFrame(df_under_incertidumbre)
|
| 573 |
+
|
| 574 |
+
st.dataframe(
|
| 575 |
+
df_under_display[['Línea', 'Prob. Media', 'IC 90%', 'Momio Justo', 'Rango Momio']],
|
| 576 |
+
hide_index=True,
|
| 577 |
+
use_container_width=True,
|
| 578 |
+
column_config={
|
| 579 |
+
'Línea': st.column_config.TextColumn('🎯 Línea', width='small'),
|
| 580 |
+
'Prob. Media': st.column_config.TextColumn('📊 Probabilidad', width='small'),
|
| 581 |
+
'IC 90%': st.column_config.TextColumn('📉 Intervalo 90%', width='medium'),
|
| 582 |
+
'Momio Justo': st.column_config.TextColumn('💰 Momio', width='small'),
|
| 583 |
+
'Rango Momio': st.column_config.TextColumn('📈 Rango Momios', width='medium')
|
| 584 |
+
}
|
| 585 |
+
)
|
| 586 |
+
|
| 587 |
+
st.write("")
|
| 588 |
+
|
| 589 |
+
# Gráfico
|
| 590 |
+
fig_under = go.Figure()
|
| 591 |
+
|
| 592 |
+
lineas_sorted_under = sorted([x['linea_num'] for x in df_under_incertidumbre])
|
| 593 |
+
probs_medias_under = [x['prob_media_raw'] for x in sorted(df_under_incertidumbre, key=lambda x: x['linea_num'])]
|
| 594 |
+
probs_low_under = [x['prob_low_raw'] for x in sorted(df_under_incertidumbre, key=lambda x: x['linea_num'])]
|
| 595 |
+
probs_high_under = [x['prob_high_raw'] for x in sorted(df_under_incertidumbre, key=lambda x: x['linea_num'])]
|
| 596 |
+
|
| 597 |
+
fig_under.add_trace(go.Scatter(
|
| 598 |
+
x=[f"Under {l}" for l in lineas_sorted_under] + [f"Under {l}" for l in lineas_sorted_under[::-1]],
|
| 599 |
+
y=probs_high_under + probs_low_under[::-1],
|
| 600 |
+
fill='toself',
|
| 601 |
+
fillcolor='rgba(231, 76, 60, 0.2)',
|
| 602 |
+
line=dict(color='rgba(255,255,255,0)'),
|
| 603 |
+
showlegend=True,
|
| 604 |
+
name='IC 90%',
|
| 605 |
+
hoverinfo='skip'
|
| 606 |
+
))
|
| 607 |
+
|
| 608 |
+
fig_under.add_trace(go.Scatter(
|
| 609 |
+
x=[f"Under {l}" for l in lineas_sorted_under],
|
| 610 |
+
y=probs_medias_under,
|
| 611 |
+
mode='lines+markers',
|
| 612 |
+
name='Probabilidad Media',
|
| 613 |
+
line=dict(color='#e74c3c', width=3),
|
| 614 |
+
marker=dict(size=10)
|
| 615 |
+
))
|
| 616 |
+
|
| 617 |
+
fig_under.update_layout(
|
| 618 |
+
title="Probabilidades Under con Banda de Incertidumbre (Monte Carlo)",
|
| 619 |
+
xaxis_title="Línea",
|
| 620 |
+
yaxis_title="Probabilidad (%)",
|
| 621 |
+
height=500,
|
| 622 |
+
hovermode='x unified'
|
| 623 |
+
)
|
| 624 |
+
|
| 625 |
+
st.plotly_chart(fig_under, use_container_width=True)
|
| 626 |
+
|
| 627 |
+
st.write("")
|
| 628 |
+
st.write("")
|
| 629 |
+
st.markdown("---")
|
| 630 |
+
st.write("")
|
| 631 |
+
st.write("")
|
| 632 |
+
|
| 633 |
+
# ============================================
|
| 634 |
+
# 4. CALCULADORA AVANZADA
|
| 635 |
+
# ============================================
|
| 636 |
+
st.markdown("## 💰 Calculadora de Valor")
|
| 637 |
+
|
| 638 |
+
st.write("")
|
| 639 |
+
|
| 640 |
+
# Combinar datos
|
| 641 |
+
todas_lineas_datos = {}
|
| 642 |
+
|
| 643 |
+
for item in df_over_incertidumbre:
|
| 644 |
+
todas_lineas_datos[item['Línea']] = item
|
| 645 |
+
|
| 646 |
+
for item in df_under_incertidumbre:
|
| 647 |
+
todas_lineas_datos[item['Línea']] = item
|
| 648 |
+
|
| 649 |
+
todas_lineas_ordenadas = sorted(
|
| 650 |
+
todas_lineas_datos.keys(),
|
| 651 |
+
key=lambda x: (0 if 'Over' in x else 1, float(x.split()[1])),
|
| 652 |
+
reverse=True
|
| 653 |
+
)
|
| 654 |
+
|
| 655 |
+
col_calc1, col_calc2 = st.columns(2)
|
| 656 |
+
|
| 657 |
+
with col_calc1:
|
| 658 |
+
linea_calc = st.selectbox(
|
| 659 |
+
"🎯 Selecciona línea",
|
| 660 |
+
todas_lineas_ordenadas,
|
| 661 |
+
key="calc_linea"
|
| 662 |
+
)
|
| 663 |
+
|
| 664 |
+
with col_calc2:
|
| 665 |
+
momio_casa = st.number_input(
|
| 666 |
+
"💰 Momio del casino",
|
| 667 |
+
min_value=1.01,
|
| 668 |
+
max_value=20.0,
|
| 669 |
+
value=2.0,
|
| 670 |
+
step=0.01,
|
| 671 |
+
key="calc_momio",
|
| 672 |
+
help="Ingresa el momio decimal que ofrece la casa de apuestas"
|
| 673 |
+
)
|
| 674 |
+
|
| 675 |
+
st.write("")
|
| 676 |
+
|
| 677 |
+
datos_linea = todas_lineas_datos[linea_calc]
|
| 678 |
+
|
| 679 |
+
prob_media = datos_linea['prob_media_raw']
|
| 680 |
+
prob_low = datos_linea['prob_low_raw']
|
| 681 |
+
prob_high = datos_linea['prob_high_raw']
|
| 682 |
+
|
| 683 |
+
recomendacion = recomendar_apuesta_avanzada(
|
| 684 |
+
prob_media, prob_low, prob_high, momio_casa
|
| 685 |
+
)
|
| 686 |
+
|
| 687 |
+
st.markdown("### 📊 Métricas de la Apuesta")
|
| 688 |
+
|
| 689 |
+
col_m1, col_m2, col_m3, col_m4 = st.columns(4)
|
| 690 |
+
|
| 691 |
+
with col_m1:
|
| 692 |
+
st.metric(
|
| 693 |
+
"Prob. Media",
|
| 694 |
+
f"{prob_media:.1f}%",
|
| 695 |
+
help="Probabilidad media según Monte Carlo"
|
| 696 |
+
)
|
| 697 |
+
|
| 698 |
+
with col_m2:
|
| 699 |
+
momio_justo = probabilidad_a_momio(prob_media)
|
| 700 |
+
st.metric(
|
| 701 |
+
"Momio Justo",
|
| 702 |
+
f"@{momio_justo:.2f}",
|
| 703 |
+
help="Momio que refleja la probabilidad real"
|
| 704 |
+
)
|
| 705 |
+
|
| 706 |
+
with col_m3:
|
| 707 |
+
delta_ev = "📈 Positivo" if recomendacion['ev'] > 0 else "📉 Negativo"
|
| 708 |
+
st.metric(
|
| 709 |
+
"Expected Value",
|
| 710 |
+
f"{recomendacion['ev']:+.2f}%",
|
| 711 |
+
delta=delta_ev,
|
| 712 |
+
help="Ganancia esperada por cada $1 apostado"
|
| 713 |
+
)
|
| 714 |
+
|
| 715 |
+
with col_m4:
|
| 716 |
+
st.metric(
|
| 717 |
+
"Prob. Casino",
|
| 718 |
+
f"{recomendacion['prob_casa']:.1f}%",
|
| 719 |
+
help="Probabilidad implícita del momio del casino"
|
| 720 |
+
)
|
| 721 |
+
|
| 722 |
+
st.write("")
|
| 723 |
+
st.write("")
|
| 724 |
+
|
| 725 |
+
st.markdown("### 💵 Gestión de Bankroll (Kelly Criterion)")
|
| 726 |
+
|
| 727 |
+
col_kelly1, col_kelly2 = st.columns(2)
|
| 728 |
+
|
| 729 |
+
with col_kelly1:
|
| 730 |
+
if recomendacion['kelly'] > 0:
|
| 731 |
+
st.write(f"**Kelly Completo:** {recomendacion['kelly']:.2f}% del bankroll")
|
| 732 |
+
st.write(f"**Kelly Conservador (1/4):** {recomendacion['kelly_conservador']:.2f}% del bankroll ⭐")
|
| 733 |
+
|
| 734 |
+
st.write("")
|
| 735 |
+
st.markdown("**Ejemplo con Bankroll de $1,000:**")
|
| 736 |
+
apuesta_kelly = (recomendacion['kelly'] / 100) * 1000
|
| 737 |
+
apuesta_conservador = (recomendacion['kelly_conservador'] / 100) * 1000
|
| 738 |
+
|
| 739 |
+
st.write(f"- Kelly Completo: **${apuesta_kelly:.2f}**")
|
| 740 |
+
st.write(f"- Conservador: **${apuesta_conservador:.2f}**")
|
| 741 |
+
|
| 742 |
+
ganancia_potencial = apuesta_conservador * (momio_casa - 1)
|
| 743 |
+
st.write(f"- Ganancia potencial: **${ganancia_potencial:.2f}**")
|
| 744 |
+
else:
|
| 745 |
+
st.error("❌ Kelly = 0 - No apostar")
|
| 746 |
+
|
| 747 |
+
with col_kelly2:
|
| 748 |
+
st.write(f"**EV:** {recomendacion['ev']:+.2f}%")
|
| 749 |
+
st.write(f"**Margen de Seguridad:** {recomendacion['margen_seguridad']:+.1f}%")
|
| 750 |
+
st.write(f"**IC 90%:** [{prob_low:.1f}%, {prob_high:.1f}%]")
|
| 751 |
+
|
| 752 |
+
st.write("")
|
| 753 |
+
|
| 754 |
+
if recomendacion['confianza_alta']:
|
| 755 |
+
st.success("✅ Alta confianza: IC inferior supera prob. casino")
|
| 756 |
+
else:
|
| 757 |
+
st.warning("⚠️ Baja confianza: IC inferior NO supera prob. casino")
|
| 758 |
+
|
| 759 |
+
if recomendacion['ev'] > 10:
|
| 760 |
+
st.success("🟢 EV excelente (>10%)")
|
| 761 |
+
elif recomendacion['ev'] > 5:
|
| 762 |
+
st.info("🟡 EV bueno (5-10%)")
|
| 763 |
+
elif recomendacion['ev'] > 0:
|
| 764 |
+
st.warning("🟠 EV positivo pero bajo (<5%)")
|
| 765 |
+
else:
|
| 766 |
+
st.error("🔴 EV negativo")
|
| 767 |
+
|
| 768 |
+
# Footer
|
| 769 |
+
st.write("")
|
| 770 |
+
st.write("")
|
| 771 |
+
st.markdown("---")
|
| 772 |
+
st.caption(f"🤖 XGBoost v4.2 + Monte Carlo | 🎲 {N_SIMULACIONES:,} simulaciones | 📊 RMSE: {RMSE_MODELO} | ⏰ {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
| 773 |
+
|
| 774 |
+
else:
|
| 775 |
+
if option:
|
| 776 |
+
if option_local and option_away:
|
| 777 |
+
pass # Esperando botón
|
| 778 |
+
else:
|
| 779 |
+
st.info("👆 Selecciona ambos equipos")
|
| 780 |
+
else:
|
| 781 |
+
st.info("👆 Selecciona una liga para comenzar")
|
| 782 |
+
|
| 783 |
+
# Sidebar
|
| 784 |
+
with st.sidebar:
|
| 785 |
+
st.markdown("## Corners Forecast")
|
| 786 |
+
|
| 787 |
+
st.markdown("---")
|
| 788 |
+
|
| 789 |
+
st.markdown("### 🔗 Enlaces")
|
| 790 |
+
st.markdown("""
|
| 791 |
+
[](https://github.com/danielsaed/futbol_corners_forecast)
|
| 792 |
+
|
| 793 |
+
[](https://huggingface.co/spaces/daniel-saed/futbol-corners-forecast-api)
|
| 794 |
+
""")
|
| 795 |
+
|
| 796 |
+
st.markdown("---")
|
| 797 |
+
|
| 798 |
+
st.markdown("### Ligas")
|
| 799 |
+
for league in LEAGUES_DICT.keys():
|
| 800 |
+
st.write(f"• {league}")
|
| 801 |
+
|
| 802 |
+
|
| 803 |
+
|
| 804 |
+
# 👈 BOTÓN PARA LIMPIAR CACHE
|
| 805 |
+
if st.button("🗑️ Limpiar Cache", use_container_width=True):
|
| 806 |
+
st.cache_data.clear()
|
| 807 |
+
st.session_state.prediccion_realizada = False
|
| 808 |
+
st.session_state.resultado_api = None
|
| 809 |
+
st.success("✅ Cache limpiado")
|
| 810 |
+
st.rerun()
|
| 811 |
+
|
| 812 |
+
st.markdown("---")
|