File size: 7,774 Bytes
4bbca57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
"""
Model predictor that loads from Hugging Face and makes predictions
"""
import joblib
import numpy as np
from typing import Dict, List
import requests
import os
from math import factorial

class EPLPredictor:
    def __init__(self, use_local=False):
        """Initialize predictor with models from HF or local"""
        self.models = {}
        self.model_repo = "gnosisx/epl-ensemble-1x2"
        self.use_local = use_local

        # Feature names for reference
        self.feature_names = [
            "xg_h_l5", "xga_h_l5", "xg_a_l5", "xga_a_l5",
            "elo_diff", "home_adv", "rest_h", "rest_a",
            "h2h_h_wins", "h2h_draws", "form_h", "form_a"
        ]

        self.load_models()

    def load_models(self):
        """Load models from Hugging Face or local files"""
        if self.use_local:
            # Load from local files
            self.models['poisson_home'] = joblib.load('poisson_home.joblib')
            self.models['poisson_away'] = joblib.load('poisson_away.joblib')
            self.models['xgboost'] = joblib.load('xgb_1x2.joblib')
        else:
            # Download from Hugging Face
            for model_name in ['poisson_home.joblib', 'poisson_away.joblib', 'xgb_1x2.joblib']:
                url = f"https://huggingface.co/{self.model_repo}/resolve/main/{model_name}"
                response = requests.get(url)
                if response.status_code == 200:
                    # Save temporarily and load
                    temp_path = f"/tmp/{model_name}"
                    with open(temp_path, 'wb') as f:
                        f.write(response.content)

                    key = model_name.replace('.joblib', '')
                    self.models[key] = joblib.load(temp_path)
                else:
                    raise Exception(f"Failed to download {model_name} from Hugging Face")

    def build_features_from_odds(self, home_team: str, away_team: str,
                                 best_odds: Dict) -> np.ndarray:
        """Build features from current odds and team names"""
        # Extract implied probabilities from odds
        h_odds = best_odds.get('H', {}).get('odds', 2.0)
        d_odds = best_odds.get('D', {}).get('odds', 3.5)
        a_odds = best_odds.get('A', {}).get('odds', 3.0)

        # Calculate implied probabilities
        total = 1/h_odds + 1/d_odds + 1/a_odds
        h_prob = (1/h_odds) / total
        a_prob = (1/a_odds) / total

        # Estimate features from odds
        # These are approximations based on market sentiment
        features = [
            1.8 * h_prob + 0.8,  # xg_h_l5 - home expected goals
            1.2 * (1 - h_prob) + 0.5,  # xga_h_l5 - home expected goals against
            1.5 * a_prob + 0.7,  # xg_a_l5 - away expected goals
            1.3 * (1 - a_prob) + 0.6,  # xga_a_l5 - away expected goals against
            (h_prob - a_prob) * 200,  # elo_diff - estimated from odds
            1.0,  # home_adv - always 1 for home team
            6,  # rest_h - default rest days
            6,  # rest_a - default rest days
            2,  # h2h_h_wins - default
            2,  # h2h_draws - default
            h_prob * 3,  # form_h - estimated from odds
            a_prob * 3   # form_a - estimated from odds
        ]

        return np.array(features).reshape(1, -1)

    def poisson_to_outcome_probs(self, lambda_h: float, lambda_a: float,
                                 max_goals: int = 10) -> Dict[str, float]:
        """Convert Poisson parameters to outcome probabilities"""
        prob_matrix = np.zeros((max_goals + 1, max_goals + 1))

        for i in range(max_goals + 1):
            for j in range(max_goals + 1):
                prob_h = np.exp(-lambda_h) * (lambda_h ** i) / factorial(i)
                prob_a = np.exp(-lambda_a) * (lambda_a ** j) / factorial(j)
                prob_matrix[i, j] = prob_h * prob_a

        # Calculate H/D/A probabilities
        p_home = np.sum(np.triu(prob_matrix, 1))
        p_draw = np.sum(np.diag(prob_matrix))
        p_away = np.sum(np.tril(prob_matrix, -1))

        # Also calculate over/under 2.5
        over_25 = 0
        for i in range(max_goals + 1):
            for j in range(max_goals + 1):
                if i + j > 2.5:
                    over_25 += prob_matrix[i, j]

        # BTTS probability
        btts = 1 - (prob_matrix[0, :].sum() + prob_matrix[:, 0].sum() - prob_matrix[0, 0])

        return {
            'H': p_home,
            'D': p_draw,
            'A': p_away,
            'over25': over_25,
            'btts': btts
        }

    def predict(self, home_team: str, away_team: str, best_odds: Dict = None,
                features: np.ndarray = None) -> Dict:
        """Make predictions for a match"""
        # Build or use provided features
        if features is None:
            features = self.build_features_from_odds(home_team, away_team, best_odds or {})

        # 1. Poisson predictions
        lambda_h = self.models['poisson_home'].predict(features)[0]
        lambda_a = self.models['poisson_away'].predict(features)[0]
        poisson_probs = self.poisson_to_outcome_probs(lambda_h, lambda_a)

        # 2. XGBoost predictions
        xgb_probs_array = self.models['xgboost'].predict_proba(features)[0]
        xgb_probs = {
            'H': xgb_probs_array[0],
            'D': xgb_probs_array[1],
            'A': xgb_probs_array[2]
        }

        # 3. Ensemble (weighted average)
        weights = {'poisson': 0.4, 'xgboost': 0.6}

        ensemble_probs = {}
        for outcome in ['H', 'D', 'A']:
            ensemble_probs[outcome] = (
                weights['poisson'] * poisson_probs[outcome] +
                weights['xgboost'] * xgb_probs[outcome]
            )

        # Normalize
        total = sum(ensemble_probs.values())
        for k in ensemble_probs:
            ensemble_probs[k] /= total

        # Add other markets from Poisson
        ensemble_probs['over25'] = poisson_probs['over25']
        ensemble_probs['btts'] = poisson_probs['btts']

        return {
            'ensemble': ensemble_probs,
            'poisson': poisson_probs,
            'xgboost': xgb_probs,
            'expected_goals': {
                'home': lambda_h,
                'away': lambda_a
            }
        }

    def calculate_value(self, model_prob: float, odds: float,
                       kelly_fraction: float = 0.25) -> Dict:
        """Calculate value bet metrics"""
        implied_prob = 1 / odds
        edge = ((model_prob - implied_prob) / implied_prob) * 100

        if edge > 0:
            # Kelly criterion
            kelly = (model_prob * odds - 1) / (odds - 1)
            adjusted_kelly = max(0, kelly * kelly_fraction)

            return {
                'has_value': True,
                'edge': edge,
                'kelly_pct': adjusted_kelly * 100,
                'implied_prob': implied_prob,
                'model_prob': model_prob
            }

        return {
            'has_value': False,
            'edge': edge,
            'kelly_pct': 0,
            'implied_prob': implied_prob,
            'model_prob': model_prob
        }


# Example usage
if __name__ == "__main__":
    predictor = EPLPredictor(use_local=True)

    # Example prediction
    result = predictor.predict(
        home_team="Liverpool",
        away_team="Everton",
        best_odds={
            'H': {'odds': 1.48},
            'D': {'odds': 5.0},
            'A': {'odds': 8.0}
        }
    )

    print("Ensemble probabilities:")
    for outcome, prob in result['ensemble'].items():
        print(f"  {outcome}: {prob:.1%}")

    print(f"\nExpected goals:")
    print(f"  Home: {result['expected_goals']['home']:.2f}")
    print(f"  Away: {result['expected_goals']['away']:.2f}")