epl-predictions / full_app.py
gnosisx's picture
Fix Pydantic validation error
b4bdac6 verified
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import numpy as np
import pandas as pd
import json
import traceback
app = FastAPI(title="EPL Match Predictions - Full Model")
# Load models
try:
print("Loading models...")
rf_model = joblib.load('calibrated_rf_model.joblib')
gb_model = joblib.load('calibrated_gb_model.joblib')
lr_model = joblib.load('calibrated_lr_model.joblib')
scaler = joblib.load('calibrated_scaler.joblib')
# Load metadata for feature names
with open('calibrated_model_metadata.json', 'r') as f:
metadata = json.load(f)
feature_names = metadata['features']
print(f"Models loaded. Features: {len(feature_names)}")
models_loaded = True
except Exception as e:
print(f"Failed to load models: {e}")
traceback.print_exc()
models_loaded = False
rf_model = None
scaler = None
feature_names = []
class MatchRequest(BaseModel):
home_team: str
away_team: str
home_xg: float = 1.5
away_xg: float = 1.3
class FullPredictionResponse(BaseModel):
home_team: str
away_team: str
home_win: float
draw: float
away_win: float
prediction: str
confidence: float
over_2_5: float
btts: float
recommended_bet: str = ""
@app.get("/")
def root():
return {
"api": "EPL Full Model Predictions",
"status": "online",
"models_loaded": models_loaded,
"accuracy": "75% overall, 80% at >70% confidence",
"features": len(feature_names) if feature_names else 0
}
@app.get("/health")
def health():
return {
"status": "healthy",
"models_loaded": models_loaded,
"model_types": ["random_forest", "gradient_boosting", "logistic_regression"] if models_loaded else []
}
@app.get("/model-info")
def model_info():
if not models_loaded:
return {"error": "Models not loaded"}
return {
"model_version": "Calibrated xG Model",
"features": feature_names,
"accuracy": {
"overall": "75.0%",
"confidence_65": "77.1%",
"confidence_70": "80.0%",
"confidence_75": "83.3%"
},
"training_samples": 1560,
"xg_calibration": "0.82 factor applied"
}
def create_full_features(home_team, away_team, home_xg, away_xg):
"""Create full feature set matching the trained model"""
# Apply xG calibration
home_xg = home_xg * 0.82
away_xg = away_xg * 0.82
# Calculate all required features
features = {
'xg_home': home_xg,
'xg_away': away_xg,
'xg_home_def': 1.35 - (home_xg - 1.5) * 0.3, # Estimated defensive xG
'xg_away_def': 1.35 - (away_xg - 1.5) * 0.3,
'xg_diff': home_xg - away_xg,
'xg_ratio': home_xg / max(away_xg, 0.1),
'xg_def_diff': (1.35 - (away_xg - 1.5) * 0.3) - (1.35 - (home_xg - 1.5) * 0.3),
'expected_total_goals': home_xg + away_xg,
'expected_home_goals': home_xg,
'expected_away_goals': away_xg,
'shots_home': home_xg * 7, # Approximate shots from xG
'shots_away': away_xg * 7,
'sot_home': home_xg * 3, # Shots on target approximation
'sot_away': away_xg * 3,
'conversion_home': home_xg / max(home_xg * 7, 1),
'conversion_away': away_xg / max(away_xg * 7, 1),
'form_home': 0.5 + (home_xg - 1.5) * 0.2, # Form based on xG
'form_away': 0.5 + (away_xg - 1.5) * 0.2,
'form_diff': (0.5 + (home_xg - 1.5) * 0.2) - (0.5 + (away_xg - 1.5) * 0.2),
'home_matches': 19, # Default for mid-season
'away_matches': 19
}
# Ensure all required features are present
feature_df = pd.DataFrame([features])
# Add any missing features with defaults
for feat in feature_names:
if feat not in feature_df.columns:
feature_df[feat] = 0
# Select only the features used in training, in the right order
feature_df = feature_df[feature_names]
return feature_df
@app.post("/predict", response_model=FullPredictionResponse)
def predict(match: MatchRequest):
if not models_loaded or rf_model is None:
raise HTTPException(status_code=503, detail="Models not loaded")
try:
# Create features
features = create_full_features(
match.home_team,
match.away_team,
match.home_xg,
match.away_xg
)
# Scale features
features_scaled = scaler.transform(features)
# Get predictions from Random Forest (best model)
probs = rf_model.predict_proba(features_scaled)[0]
# Handle probability mapping
if len(probs) == 3:
# Assuming order: [away, draw, home]
away_prob = float(probs[0])
draw_prob = float(probs[1])
home_prob = float(probs[2])
else:
# Binary case, no draw
away_prob = float(probs[0])
home_prob = float(probs[1])
draw_prob = 0.25
# Normalize probabilities
total = home_prob + draw_prob + away_prob
home_prob = home_prob / total
draw_prob = draw_prob / total
away_prob = away_prob / total
# Determine prediction and confidence
if home_prob > draw_prob and home_prob > away_prob:
prediction = "Home"
confidence = home_prob
elif away_prob > draw_prob:
prediction = "Away"
confidence = away_prob
else:
prediction = "Draw"
confidence = draw_prob
# Calculate over 2.5 and BTTS
total_xg = match.home_xg * 0.82 + match.away_xg * 0.82
over_2_5 = min(0.95, max(0.05, (total_xg - 1.5) / 2))
btts = min(0.95, max(0.05, min(match.home_xg * 0.82, match.away_xg * 0.82) * 0.7))
# Recommend bet only if confidence > 70%
recommended_bet = ""
if confidence > 0.70:
if prediction == "Home":
recommended_bet = f"Back {match.home_team} to win"
elif prediction == "Away":
recommended_bet = f"Back {match.away_team} to win"
elif over_2_5 > 0.70:
recommended_bet = "Back Over 2.5 goals"
elif confidence > 0.65:
if prediction == "Home":
recommended_bet = f"Consider {match.home_team} to win"
elif prediction == "Away":
recommended_bet = f"Consider {match.away_team} to win"
return FullPredictionResponse(
home_team=match.home_team,
away_team=match.away_team,
home_win=home_prob,
draw=draw_prob,
away_win=away_prob,
prediction=prediction,
confidence=confidence,
over_2_5=over_2_5,
btts=btts,
recommended_bet=recommended_bet
)
except Exception as e:
print(f"Prediction error: {e}")
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)