Spaces:

gnosisx
/

epl-predictor

Sleeping

File size: 7,774 Bytes

4bbca57

"""
Model predictor that loads from Hugging Face and makes predictions
"""
import joblib
import numpy as np
from typing import Dict, List
import requests
import os
from math import factorial

class EPLPredictor:
    def __init__(self, use_local=False):
        """Initialize predictor with models from HF or local"""
        self.models = {}
        self.model_repo = "gnosisx/epl-ensemble-1x2"
        self.use_local = use_local

        # Feature names for reference
        self.feature_names = [
            "xg_h_l5", "xga_h_l5", "xg_a_l5", "xga_a_l5",
            "elo_diff", "home_adv", "rest_h", "rest_a",
            "h2h_h_wins", "h2h_draws", "form_h", "form_a"
        ]

        self.load_models()

    def load_models(self):
        """Load models from Hugging Face or local files"""
        if self.use_local:
            # Load from local files
            self.models['poisson_home'] = joblib.load('poisson_home.joblib')
            self.models['poisson_away'] = joblib.load('poisson_away.joblib')
            self.models['xgboost'] = joblib.load('xgb_1x2.joblib')
        else:
            # Download from Hugging Face
            for model_name in ['poisson_home.joblib', 'poisson_away.joblib', 'xgb_1x2.joblib']:
                url = f"https://huggingface.co/{self.model_repo}/resolve/main/{model_name}"
                response = requests.get(url)
                if response.status_code == 200:
                    # Save temporarily and load
                    temp_path = f"/tmp/{model_name}"
                    with open(temp_path, 'wb') as f:
                        f.write(response.content)

                    key = model_name.replace('.joblib', '')
                    self.models[key] = joblib.load(temp_path)
                else:
                    raise Exception(f"Failed to download {model_name} from Hugging Face")

    def build_features_from_odds(self, home_team: str, away_team: str,
                                 best_odds: Dict) -> np.ndarray:
        """Build features from current odds and team names"""
        # Extract implied probabilities from odds
        h_odds = best_odds.get('H', {}).get('odds', 2.0)
        d_odds = best_odds.get('D', {}).get('odds', 3.5)
        a_odds = best_odds.get('A', {}).get('odds', 3.0)

        # Calculate implied probabilities
        total = 1/h_odds + 1/d_odds + 1/a_odds
        h_prob = (1/h_odds) / total
        a_prob = (1/a_odds) / total

        # Estimate features from odds
        # These are approximations based on market sentiment
        features = [
            1.8 * h_prob + 0.8,  # xg_h_l5 - home expected goals
            1.2 * (1 - h_prob) + 0.5,  # xga_h_l5 - home expected goals against
            1.5 * a_prob + 0.7,  # xg_a_l5 - away expected goals
            1.3 * (1 - a_prob) + 0.6,  # xga_a_l5 - away expected goals against
            (h_prob - a_prob) * 200,  # elo_diff - estimated from odds
            1.0,  # home_adv - always 1 for home team
            6,  # rest_h - default rest days
            6,  # rest_a - default rest days
            2,  # h2h_h_wins - default
            2,  # h2h_draws - default
            h_prob * 3,  # form_h - estimated from odds
            a_prob * 3   # form_a - estimated from odds
        ]

        return np.array(features).reshape(1, -1)

    def poisson_to_outcome_probs(self, lambda_h: float, lambda_a: float,
                                 max_goals: int = 10) -> Dict[str, float]:
        """Convert Poisson parameters to outcome probabilities"""
        prob_matrix = np.zeros((max_goals + 1, max_goals + 1))

        for i in range(max_goals + 1):
            for j in range(max_goals + 1):
                prob_h = np.exp(-lambda_h) * (lambda_h ** i) / factorial(i)
                prob_a = np.exp(-lambda_a) * (lambda_a ** j) / factorial(j)
                prob_matrix[i, j] = prob_h * prob_a

        # Calculate H/D/A probabilities
        p_home = np.sum(np.triu(prob_matrix, 1))
        p_draw = np.sum(np.diag(prob_matrix))
        p_away = np.sum(np.tril(prob_matrix, -1))

        # Also calculate over/under 2.5
        over_25 = 0
        for i in range(max_goals + 1):
            for j in range(max_goals + 1):
                if i + j > 2.5:
                    over_25 += prob_matrix[i, j]

        # BTTS probability
        btts = 1 - (prob_matrix[0, :].sum() + prob_matrix[:, 0].sum() - prob_matrix[0, 0])

        return {
            'H': p_home,
            'D': p_draw,
            'A': p_away,
            'over25': over_25,
            'btts': btts
        }

    def predict(self, home_team: str, away_team: str, best_odds: Dict = None,
                features: np.ndarray = None) -> Dict:
        """Make predictions for a match"""
        # Build or use provided features
        if features is None:
            features = self.build_features_from_odds(home_team, away_team, best_odds or {})

        # 1. Poisson predictions
        lambda_h = self.models['poisson_home'].predict(features)[0]
        lambda_a = self.models['poisson_away'].predict(features)[0]
        poisson_probs = self.poisson_to_outcome_probs(lambda_h, lambda_a)

        # 2. XGBoost predictions
        xgb_probs_array = self.models['xgboost'].predict_proba(features)[0]
        xgb_probs = {
            'H': xgb_probs_array[0],
            'D': xgb_probs_array[1],
            'A': xgb_probs_array[2]
        }

        # 3. Ensemble (weighted average)
        weights = {'poisson': 0.4, 'xgboost': 0.6}

        ensemble_probs = {}
        for outcome in ['H', 'D', 'A']:
            ensemble_probs[outcome] = (
                weights['poisson'] * poisson_probs[outcome] +
                weights['xgboost'] * xgb_probs[outcome]
            )

        # Normalize
        total = sum(ensemble_probs.values())
        for k in ensemble_probs:
            ensemble_probs[k] /= total

        # Add other markets from Poisson
        ensemble_probs['over25'] = poisson_probs['over25']
        ensemble_probs['btts'] = poisson_probs['btts']

        return {
            'ensemble': ensemble_probs,
            'poisson': poisson_probs,
            'xgboost': xgb_probs,
            'expected_goals': {
                'home': lambda_h,
                'away': lambda_a
            }
        }

    def calculate_value(self, model_prob: float, odds: float,
                       kelly_fraction: float = 0.25) -> Dict:
        """Calculate value bet metrics"""
        implied_prob = 1 / odds
        edge = ((model_prob - implied_prob) / implied_prob) * 100

        if edge > 0:
            # Kelly criterion
            kelly = (model_prob * odds - 1) / (odds - 1)
            adjusted_kelly = max(0, kelly * kelly_fraction)

            return {
                'has_value': True,
                'edge': edge,
                'kelly_pct': adjusted_kelly * 100,
                'implied_prob': implied_prob,
                'model_prob': model_prob
            }

        return {
            'has_value': False,
            'edge': edge,
            'kelly_pct': 0,
            'implied_prob': implied_prob,
            'model_prob': model_prob
        }


# Example usage
if __name__ == "__main__":
    predictor = EPLPredictor(use_local=True)

    # Example prediction
    result = predictor.predict(
        home_team="Liverpool",
        away_team="Everton",
        best_odds={
            'H': {'odds': 1.48},
            'D': {'odds': 5.0},
            'A': {'odds': 8.0}
        }
    )

    print("Ensemble probabilities:")
    for outcome, prob in result['ensemble'].items():
        print(f"  {outcome}: {prob:.1%}")

    print(f"\nExpected goals:")
    print(f"  Home: {result['expected_goals']['home']:.2f}")
    print(f"  Away: {result['expected_goals']['away']:.2f}")