Spaces:

gnosisx
/

epl-predictions

Sleeping

App Files Files Community

epl-predictions / full_app.py

gnosisx

Fix Pydantic validation error

b4bdac6 verified 7 months ago

raw

history blame contribute delete

7.14 kB

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	import joblib
	import numpy as np
	import pandas as pd
	import json
	import traceback

	app = FastAPI(title="EPL Match Predictions - Full Model")

	# Load models
	try:
	print("Loading models...")
	rf_model = joblib.load('calibrated_rf_model.joblib')
	gb_model = joblib.load('calibrated_gb_model.joblib')
	lr_model = joblib.load('calibrated_lr_model.joblib')
	scaler = joblib.load('calibrated_scaler.joblib')

	# Load metadata for feature names
	with open('calibrated_model_metadata.json', 'r') as f:
	metadata = json.load(f)
	feature_names = metadata['features']

	print(f"Models loaded. Features: {len(feature_names)}")
	models_loaded = True
	except Exception as e:
	print(f"Failed to load models: {e}")
	traceback.print_exc()
	models_loaded = False
	rf_model = None
	scaler = None
	feature_names = []

	class MatchRequest(BaseModel):
	home_team: str
	away_team: str
	home_xg: float = 1.5
	away_xg: float = 1.3

	class FullPredictionResponse(BaseModel):
	home_team: str
	away_team: str
	home_win: float
	draw: float
	away_win: float
	prediction: str
	confidence: float
	over_2_5: float
	btts: float
	recommended_bet: str = ""

	@app.get("/")
	def root():
	return {
	"api": "EPL Full Model Predictions",
	"status": "online",
	"models_loaded": models_loaded,
	"accuracy": "75% overall, 80% at >70% confidence",
	"features": len(feature_names) if feature_names else 0
	}

	@app.get("/health")
	def health():
	return {
	"status": "healthy",
	"models_loaded": models_loaded,
	"model_types": ["random_forest", "gradient_boosting", "logistic_regression"] if models_loaded else []
	}

	@app.get("/model-info")
	def model_info():
	if not models_loaded:
	return {"error": "Models not loaded"}

	return {
	"model_version": "Calibrated xG Model",
	"features": feature_names,
	"accuracy": {
	"overall": "75.0%",
	"confidence_65": "77.1%",
	"confidence_70": "80.0%",
	"confidence_75": "83.3%"
	},
	"training_samples": 1560,
	"xg_calibration": "0.82 factor applied"
	}

	def create_full_features(home_team, away_team, home_xg, away_xg):
	"""Create full feature set matching the trained model"""

	# Apply xG calibration
	home_xg = home_xg * 0.82
	away_xg = away_xg * 0.82

	# Calculate all required features
	features = {
	'xg_home': home_xg,
	'xg_away': away_xg,
	'xg_home_def': 1.35 - (home_xg - 1.5) * 0.3, # Estimated defensive xG
	'xg_away_def': 1.35 - (away_xg - 1.5) * 0.3,
	'xg_diff': home_xg - away_xg,
	'xg_ratio': home_xg / max(away_xg, 0.1),
	'xg_def_diff': (1.35 - (away_xg - 1.5) * 0.3) - (1.35 - (home_xg - 1.5) * 0.3),
	'expected_total_goals': home_xg + away_xg,
	'expected_home_goals': home_xg,
	'expected_away_goals': away_xg,
	'shots_home': home_xg * 7, # Approximate shots from xG
	'shots_away': away_xg * 7,
	'sot_home': home_xg * 3, # Shots on target approximation
	'sot_away': away_xg * 3,
	'conversion_home': home_xg / max(home_xg * 7, 1),
	'conversion_away': away_xg / max(away_xg * 7, 1),
	'form_home': 0.5 + (home_xg - 1.5) * 0.2, # Form based on xG
	'form_away': 0.5 + (away_xg - 1.5) * 0.2,
	'form_diff': (0.5 + (home_xg - 1.5) * 0.2) - (0.5 + (away_xg - 1.5) * 0.2),
	'home_matches': 19, # Default for mid-season
	'away_matches': 19
	}

	# Ensure all required features are present
	feature_df = pd.DataFrame([features])

	# Add any missing features with defaults
	for feat in feature_names:
	if feat not in feature_df.columns:
	feature_df[feat] = 0

	# Select only the features used in training, in the right order
	feature_df = feature_df[feature_names]

	return feature_df

	@app.post("/predict", response_model=FullPredictionResponse)
	def predict(match: MatchRequest):
	if not models_loaded or rf_model is None:
	raise HTTPException(status_code=503, detail="Models not loaded")

	try:
	# Create features
	features = create_full_features(
	match.home_team,
	match.away_team,
	match.home_xg,
	match.away_xg
	)

	# Scale features
	features_scaled = scaler.transform(features)

	# Get predictions from Random Forest (best model)
	probs = rf_model.predict_proba(features_scaled)[0]

	# Handle probability mapping
	if len(probs) == 3:
	# Assuming order: [away, draw, home]
	away_prob = float(probs[0])
	draw_prob = float(probs[1])
	home_prob = float(probs[2])
	else:
	# Binary case, no draw
	away_prob = float(probs[0])
	home_prob = float(probs[1])
	draw_prob = 0.25

	# Normalize probabilities
	total = home_prob + draw_prob + away_prob
	home_prob = home_prob / total
	draw_prob = draw_prob / total
	away_prob = away_prob / total

	# Determine prediction and confidence
	if home_prob > draw_prob and home_prob > away_prob:
	prediction = "Home"
	confidence = home_prob
	elif away_prob > draw_prob:
	prediction = "Away"
	confidence = away_prob
	else:
	prediction = "Draw"
	confidence = draw_prob

	# Calculate over 2.5 and BTTS
	total_xg = match.home_xg * 0.82 + match.away_xg * 0.82
	over_2_5 = min(0.95, max(0.05, (total_xg - 1.5) / 2))
	btts = min(0.95, max(0.05, min(match.home_xg * 0.82, match.away_xg * 0.82) * 0.7))

	# Recommend bet only if confidence > 70%
	recommended_bet = ""
	if confidence > 0.70:
	if prediction == "Home":
	recommended_bet = f"Back {match.home_team} to win"
	elif prediction == "Away":
	recommended_bet = f"Back {match.away_team} to win"
	elif over_2_5 > 0.70:
	recommended_bet = "Back Over 2.5 goals"
	elif confidence > 0.65:
	if prediction == "Home":
	recommended_bet = f"Consider {match.home_team} to win"
	elif prediction == "Away":
	recommended_bet = f"Consider {match.away_team} to win"

	return FullPredictionResponse(
	home_team=match.home_team,
	away_team=match.away_team,
	home_win=home_prob,
	draw=draw_prob,
	away_win=away_prob,
	prediction=prediction,
	confidence=confidence,
	over_2_5=over_2_5,
	btts=btts,
	recommended_bet=recommended_bet
	)

	except Exception as e:
	print(f"Prediction error: {e}")
	traceback.print_exc()
	raise HTTPException(status_code=500, detail=str(e))

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)