Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- feature_columns.json +33 -0
- gradio_app.py +359 -0
- inference_utils.py +193 -0
- ingest_data.py +67 -0
- model_lgb_isotonic.joblib +3 -0
- model_xgb_isotonic.joblib +3 -0
- requirements.txt +10 -0
- team name.txt +91 -0
feature_columns.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"X_cols": [
|
| 3 |
+
"home_roll_gf",
|
| 4 |
+
"home_roll_ga",
|
| 5 |
+
"home_roll_shots_f",
|
| 6 |
+
"home_roll_shots_a",
|
| 7 |
+
"home_roll_sot_f",
|
| 8 |
+
"home_roll_sot_a",
|
| 9 |
+
"home_roll_corn_f",
|
| 10 |
+
"home_roll_corn_a",
|
| 11 |
+
"home_roll_y_f",
|
| 12 |
+
"home_roll_r_f",
|
| 13 |
+
"home_roll_points",
|
| 14 |
+
"away_roll_gf",
|
| 15 |
+
"away_roll_ga",
|
| 16 |
+
"away_roll_shots_f",
|
| 17 |
+
"away_roll_shots_a",
|
| 18 |
+
"away_roll_sot_f",
|
| 19 |
+
"away_roll_sot_a",
|
| 20 |
+
"away_roll_corn_f",
|
| 21 |
+
"away_roll_corn_a",
|
| 22 |
+
"away_roll_y_f",
|
| 23 |
+
"away_roll_r_f",
|
| 24 |
+
"away_roll_points",
|
| 25 |
+
"elo_home",
|
| 26 |
+
"elo_away",
|
| 27 |
+
"elo_diff",
|
| 28 |
+
"p_odds_H",
|
| 29 |
+
"p_odds_D",
|
| 30 |
+
"p_odds_A"
|
| 31 |
+
],
|
| 32 |
+
"WINDOW": 7
|
| 33 |
+
}
|
gradio_app.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
from datetime import date, timedelta, datetime as dt
|
| 4 |
+
from typing import List, Optional, Tuple
|
| 5 |
+
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
import gradio as gr
|
| 9 |
+
import requests
|
| 10 |
+
from bs4 import BeautifulSoup
|
| 11 |
+
import json
|
| 12 |
+
import joblib
|
| 13 |
+
|
| 14 |
+
from ingest_data import load_epl_data
|
| 15 |
+
from preprocess_data import prepare_features
|
| 16 |
+
from inference_utils import (
|
| 17 |
+
clean_team,
|
| 18 |
+
implied_from_odds,
|
| 19 |
+
build_features_for_fixture,
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
# --------- Load pipeline artifacts once ---------
|
| 23 |
+
|
| 24 |
+
def _next_saturday_str(today: Optional[date] = None) -> str:
|
| 25 |
+
if today is None:
|
| 26 |
+
today = date.today()
|
| 27 |
+
days_ahead = 5 - today.weekday() # 5=Saturday
|
| 28 |
+
if days_ahead <= 0:
|
| 29 |
+
days_ahead += 7
|
| 30 |
+
return (today + timedelta(days=days_ahead)).strftime("%Y-%m-%d")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _read_team_list(path: Path) -> List[str]:
|
| 34 |
+
if not path.exists():
|
| 35 |
+
return []
|
| 36 |
+
names: List[str] = []
|
| 37 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 38 |
+
for line in f:
|
| 39 |
+
name = line.strip()
|
| 40 |
+
if not name:
|
| 41 |
+
continue
|
| 42 |
+
names.append(name)
|
| 43 |
+
return names
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _load_feature_meta() -> Tuple[List[str], int]:
|
| 47 |
+
candidates = [Path("feature_columns.json"), Path("data") / "feature_columns.json"]
|
| 48 |
+
for p in candidates:
|
| 49 |
+
if p.exists():
|
| 50 |
+
with open(p, "r", encoding="utf-8") as f:
|
| 51 |
+
meta = json.load(f)
|
| 52 |
+
return meta.get("X_cols", []), int(meta.get("WINDOW", 7))
|
| 53 |
+
return [], 7
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def init_pipeline():
|
| 57 |
+
# Data
|
| 58 |
+
data_raw = load_epl_data(start_y1=2010, end_y1=None, verbose=False)
|
| 59 |
+
feat_df, X_cols_generated, WINDOW_generated, base_df = prepare_features(data_raw, window=7, verbose=False)
|
| 60 |
+
|
| 61 |
+
# Features meta (prefer saved training order)
|
| 62 |
+
X_cols_saved, WINDOW_saved = _load_feature_meta()
|
| 63 |
+
X_cols = X_cols_saved if X_cols_saved else X_cols_generated
|
| 64 |
+
window = WINDOW_saved if X_cols_saved else WINDOW_generated
|
| 65 |
+
|
| 66 |
+
# Model
|
| 67 |
+
model = None
|
| 68 |
+
for mp in [Path("model") / "model_xgb_isotonic.joblib", Path("model_xgb_isotonic.joblib")]:
|
| 69 |
+
if mp.exists():
|
| 70 |
+
model = joblib.load(str(mp))
|
| 71 |
+
break
|
| 72 |
+
if model is None:
|
| 73 |
+
raise FileNotFoundError("Model not found at ./model/model_xgb_isotonic.joblib")
|
| 74 |
+
|
| 75 |
+
# Team list (for UI)
|
| 76 |
+
team_list = _read_team_list(Path("data") / "team name.txt")
|
| 77 |
+
if not team_list:
|
| 78 |
+
# fallback to unique teams from data
|
| 79 |
+
team_list = sorted(set(base_df["home"]).union(set(base_df["away"])))
|
| 80 |
+
|
| 81 |
+
return {
|
| 82 |
+
"feat_df": feat_df,
|
| 83 |
+
"df": base_df,
|
| 84 |
+
"X_cols": X_cols,
|
| 85 |
+
"window": window,
|
| 86 |
+
"model": model,
|
| 87 |
+
"team_list": team_list,
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
PIPE = init_pipeline()
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# --------- Inference helpers for UI ---------
|
| 95 |
+
|
| 96 |
+
def manual_predict(home_team: str, away_team: str, match_date: str,
|
| 97 |
+
home_odds: str = "", draw_odds: str = "", away_odds: str = ""):
|
| 98 |
+
if not home_team or not away_team or not match_date:
|
| 99 |
+
return "Please select Home, Away and Date.", None
|
| 100 |
+
|
| 101 |
+
odds_tuple: Optional[Tuple[float, float, float]] = None
|
| 102 |
+
if home_odds and draw_odds and away_odds:
|
| 103 |
+
try:
|
| 104 |
+
odds_tuple = (float(home_odds), float(draw_odds), float(away_odds))
|
| 105 |
+
except Exception:
|
| 106 |
+
return "Invalid odds input. Leave blank or enter numeric decimals.", None
|
| 107 |
+
|
| 108 |
+
try:
|
| 109 |
+
X_new, ctx = build_features_for_fixture(
|
| 110 |
+
home_team, away_team, match_date,
|
| 111 |
+
df_all=PIPE["df"], X_cols=PIPE["X_cols"], window=PIPE["window"],
|
| 112 |
+
odds_tuple=odds_tuple, feat_df_for_medians=PIPE["feat_df"],
|
| 113 |
+
)
|
| 114 |
+
proba = PIPE["model"].predict_proba(X_new)[0]
|
| 115 |
+
labels = ["H (Home Win)", "D (Draw)", "A (Away Win)"]
|
| 116 |
+
res = pd.DataFrame({"Outcome": labels, "Probability": [float(p) for p in proba]})
|
| 117 |
+
return res, ctx
|
| 118 |
+
except Exception as e:
|
| 119 |
+
return f"Error: {e}", None
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def fetch_next_week_fixtures_and_predict(api_key: Optional[str] = None):
|
| 123 |
+
# Use football-data.org if API key provided, else return message
|
| 124 |
+
if not api_key:
|
| 125 |
+
return "Set FOOTBALL_DATA_API_KEY env or provide API key in the textbox to auto-fetch fixtures.", None
|
| 126 |
+
|
| 127 |
+
base_url = "https://api.football-data.org/v4/competitions/PL/matches"
|
| 128 |
+
d_from = date.today()
|
| 129 |
+
d_to = d_from + timedelta(days=7)
|
| 130 |
+
params = {
|
| 131 |
+
"status": "SCHEDULED",
|
| 132 |
+
"dateFrom": d_from.strftime("%Y-%m-%d"),
|
| 133 |
+
"dateTo": d_to.strftime("%Y-%m-%d"),
|
| 134 |
+
}
|
| 135 |
+
headers = {"X-Auth-Token": api_key}
|
| 136 |
+
r = requests.get(base_url, headers=headers, params=params, timeout=30)
|
| 137 |
+
if r.status_code != 200:
|
| 138 |
+
return f"API error {r.status_code}: {r.text}", None
|
| 139 |
+
data = r.json()
|
| 140 |
+
matches = data.get("matches", [])
|
| 141 |
+
if not matches:
|
| 142 |
+
return "No scheduled PL matches in the next 7 days.", None
|
| 143 |
+
|
| 144 |
+
rows = []
|
| 145 |
+
for m in matches:
|
| 146 |
+
home = clean_team(m.get("homeTeam", {}).get("name", ""))
|
| 147 |
+
away = clean_team(m.get("awayTeam", {}).get("name", ""))
|
| 148 |
+
when = m.get("utcDate", "")
|
| 149 |
+
try:
|
| 150 |
+
match_date = dt.fromisoformat(when.replace("Z", "+00:00")).date().strftime("%Y-%m-%d")
|
| 151 |
+
except Exception:
|
| 152 |
+
match_date = _next_saturday_str()
|
| 153 |
+
|
| 154 |
+
try:
|
| 155 |
+
X_new, ctx = build_features_for_fixture(
|
| 156 |
+
home, away, match_date,
|
| 157 |
+
df_all=PIPE["df"], X_cols=PIPE["X_cols"], window=PIPE["window"],
|
| 158 |
+
odds_tuple=None, feat_df_for_medians=PIPE["feat_df"],
|
| 159 |
+
)
|
| 160 |
+
proba = PIPE["model"].predict_proba(X_new)[0]
|
| 161 |
+
rows.append({
|
| 162 |
+
"Date": match_date,
|
| 163 |
+
"Home": home,
|
| 164 |
+
"Away": away,
|
| 165 |
+
"P(Home)": float(proba[0]),
|
| 166 |
+
"P(Draw)": float(proba[1]),
|
| 167 |
+
"P(Away)": float(proba[2]),
|
| 168 |
+
})
|
| 169 |
+
except Exception as e:
|
| 170 |
+
rows.append({
|
| 171 |
+
"Date": match_date,
|
| 172 |
+
"Home": home,
|
| 173 |
+
"Away": away,
|
| 174 |
+
"Error": str(e),
|
| 175 |
+
})
|
| 176 |
+
|
| 177 |
+
df_out = pd.DataFrame(rows)
|
| 178 |
+
if not df_out.empty:
|
| 179 |
+
df_out = df_out.sort_values(["Date", "Home"]).reset_index(drop=True)
|
| 180 |
+
return df_out, None
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def _alias_team_name(name: str) -> str:
|
| 184 |
+
"""Map scraped team names to our canonical names when obvious.
|
| 185 |
+
Add common aliases here. Fallback to cleaned name.
|
| 186 |
+
"""
|
| 187 |
+
aliases = {
|
| 188 |
+
"Man City": "Manchester City",
|
| 189 |
+
"Man Utd": "Manchester United",
|
| 190 |
+
"Nott'm Forest": "Nottingham Forest",
|
| 191 |
+
"Newcastle Utd": "Newcastle",
|
| 192 |
+
"Spurs": "Tottenham",
|
| 193 |
+
"Brighton & Hove Albion": "Brighton",
|
| 194 |
+
"Sheff Utd": "Sheffield United",
|
| 195 |
+
"Sheff Wed": "Sheffield Wednesday",
|
| 196 |
+
"West Bromwich Albion": "West Brom",
|
| 197 |
+
"West Brom": "West Brom",
|
| 198 |
+
"Wolverhampton Wanderers": "Wolves",
|
| 199 |
+
"Queens Park Rangers": "QPR",
|
| 200 |
+
}
|
| 201 |
+
n = clean_team(name)
|
| 202 |
+
return aliases.get(n, n)
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def fetch_next_week_fixtures_and_predict_free():
|
| 206 |
+
"""Scrape BBC Sport fixtures for the next 7 days (Premier League) and predict all.
|
| 207 |
+
No API key required. BBC structure may change over time; this parser is best-effort.
|
| 208 |
+
"""
|
| 209 |
+
def _scrape_bbc_for_date(day: date):
|
| 210 |
+
"""Return list of (home, away) for given date from BBC."""
|
| 211 |
+
urls = [
|
| 212 |
+
f"https://www.bbc.com/sport/football/premier-league/scores-fixtures/{day:%Y-%m-%d}",
|
| 213 |
+
f"https://www.bbc.com/sport/football/scores-fixtures/{day:%Y-%m-%d}?competition=premier-league",
|
| 214 |
+
f"https://www.bbc.co.uk/sport/football/premier-league/scores-fixtures/{day:%Y-%m-%d}",
|
| 215 |
+
]
|
| 216 |
+
pairs = []
|
| 217 |
+
headers = {"User-Agent": "Mozilla/5.0"}
|
| 218 |
+
for url in urls:
|
| 219 |
+
try:
|
| 220 |
+
r = requests.get(url, timeout=30, headers=headers)
|
| 221 |
+
if r.status_code != 200 or not r.text:
|
| 222 |
+
continue
|
| 223 |
+
soup = BeautifulSoup(r.text, "html.parser")
|
| 224 |
+
# Several selector strategies
|
| 225 |
+
# 1) sp-c-fixture blocks
|
| 226 |
+
for fx in soup.select(".sp-c-fixture"):
|
| 227 |
+
tnames = fx.select(".sp-c-fixture__team-name, .sp-c-fixture__team-name-trunc, [data-testid='team-name']")
|
| 228 |
+
if len(tnames) >= 2:
|
| 229 |
+
home = _alias_team_name(tnames[0].get_text(strip=True))
|
| 230 |
+
away = _alias_team_name(tnames[1].get_text(strip=True))
|
| 231 |
+
if home and away:
|
| 232 |
+
pairs.append((home, away))
|
| 233 |
+
# 2) generic match-block containers
|
| 234 |
+
for blk in soup.select('[data-component="match-block"], [data-testid="match-block"]'):
|
| 235 |
+
teams = blk.select('[itemprop="name"], .sp-c-fixture__team-name, [data-testid="team-name"]')
|
| 236 |
+
# If page bundles many team names, take pairs sequentially
|
| 237 |
+
buf = [t.get_text(strip=True) for t in teams]
|
| 238 |
+
for i in range(0, len(buf) - 1, 2):
|
| 239 |
+
home = _alias_team_name(buf[i])
|
| 240 |
+
away = _alias_team_name(buf[i+1])
|
| 241 |
+
if home and away:
|
| 242 |
+
pairs.append((home, away))
|
| 243 |
+
if pairs:
|
| 244 |
+
break
|
| 245 |
+
except Exception:
|
| 246 |
+
continue
|
| 247 |
+
# de-duplicate
|
| 248 |
+
uniq = []
|
| 249 |
+
seen = set()
|
| 250 |
+
for h, a in pairs:
|
| 251 |
+
key = (h, a)
|
| 252 |
+
if key not in seen:
|
| 253 |
+
seen.add(key)
|
| 254 |
+
uniq.append((h, a))
|
| 255 |
+
return uniq
|
| 256 |
+
|
| 257 |
+
rows = []
|
| 258 |
+
today = date.today()
|
| 259 |
+
for d in range(0, 7):
|
| 260 |
+
day = today + timedelta(days=d)
|
| 261 |
+
pairs = _scrape_bbc_for_date(day)
|
| 262 |
+
for home, away in pairs:
|
| 263 |
+
match_date = day.strftime("%Y-%m-%d")
|
| 264 |
+
try:
|
| 265 |
+
X_new, ctx = build_features_for_fixture(
|
| 266 |
+
home, away, match_date,
|
| 267 |
+
df_all=PIPE["df"], X_cols=PIPE["X_cols"], window=PIPE["window"],
|
| 268 |
+
odds_tuple=None, feat_df_for_medians=PIPE["feat_df"],
|
| 269 |
+
)
|
| 270 |
+
proba = PIPE["model"].predict_proba(X_new)[0]
|
| 271 |
+
rows.append({
|
| 272 |
+
"Date": match_date,
|
| 273 |
+
"Home": home,
|
| 274 |
+
"Away": away,
|
| 275 |
+
"P(Home)": float(proba[0]),
|
| 276 |
+
"P(Draw)": float(proba[1]),
|
| 277 |
+
"P(Away)": float(proba[2]),
|
| 278 |
+
})
|
| 279 |
+
except Exception as e:
|
| 280 |
+
rows.append({
|
| 281 |
+
"Date": match_date,
|
| 282 |
+
"Home": home,
|
| 283 |
+
"Away": away,
|
| 284 |
+
"Error": str(e),
|
| 285 |
+
})
|
| 286 |
+
|
| 287 |
+
if not rows:
|
| 288 |
+
return "Could not find PL fixtures from BBC for the next 7 days.", None
|
| 289 |
+
df_out = pd.DataFrame(rows)
|
| 290 |
+
df_out = df_out.sort_values(["Date", "Home"]).reset_index(drop=True)
|
| 291 |
+
return df_out, None
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
# --------- Build Gradio UI ---------
|
| 295 |
+
|
| 296 |
+
def make_app():
|
| 297 |
+
with gr.Blocks(title="EPL Match Prediction") as demo:
|
| 298 |
+
gr.Markdown("""
|
| 299 |
+
# EPL Match Prediction
|
| 300 |
+
- Manual mode: pick teams and a date (optionally odds) and get predicted probabilities.
|
| 301 |
+
- Auto mode: fetch next week's Premier League fixtures (requires football-data.org API key) and predict all.
|
| 302 |
+
""")
|
| 303 |
+
|
| 304 |
+
with gr.Tab("Manual"):
|
| 305 |
+
with gr.Row():
|
| 306 |
+
home_dd = gr.Dropdown(choices=PIPE["team_list"], label="Home Team", value=PIPE["team_list"][0] if PIPE["team_list"] else None)
|
| 307 |
+
away_dd = gr.Dropdown(choices=PIPE["team_list"], label="Away Team", value=PIPE["team_list"][1] if len(PIPE["team_list"])>1 else None)
|
| 308 |
+
date_tb = gr.Textbox(label="Match Date (YYYY-MM-DD)", value=_next_saturday_str())
|
| 309 |
+
with gr.Accordion("Optional: Odds (decimal)", open=False):
|
| 310 |
+
home_od = gr.Textbox(label="Home Odds")
|
| 311 |
+
draw_od = gr.Textbox(label="Draw Odds")
|
| 312 |
+
away_od = gr.Textbox(label="Away Odds")
|
| 313 |
+
btn = gr.Button("Predict")
|
| 314 |
+
out_tbl = gr.Dataframe(label="Probabilities", interactive=False)
|
| 315 |
+
out_json = gr.JSON(label="Context")
|
| 316 |
+
|
| 317 |
+
def _on_predict(h, a, d, ho, do, ao):
|
| 318 |
+
res, ctx = manual_predict(h, a, d, ho, do, ao)
|
| 319 |
+
if isinstance(res, str):
|
| 320 |
+
return pd.DataFrame({"Message":[res]}), ctx
|
| 321 |
+
return res, ctx
|
| 322 |
+
|
| 323 |
+
btn.click(_on_predict, inputs=[home_dd, away_dd, date_tb, home_od, draw_od, away_od], outputs=[out_tbl, out_json])
|
| 324 |
+
|
| 325 |
+
with gr.Tab("Next Week Fixtures"):
|
| 326 |
+
gr.Markdown("Fetch next week's Premier League fixtures via API or scraping (no API key).")
|
| 327 |
+
api_key_tb = gr.Textbox(label="FOOTBALL_DATA_API_KEY", value=os.getenv("FOOTBALL_DATA_API_KEY", ""), type="password")
|
| 328 |
+
with gr.Row():
|
| 329 |
+
btn2 = gr.Button("Fetch via API and Predict")
|
| 330 |
+
btn3 = gr.Button("Fetch via Scraping (No API Key)")
|
| 331 |
+
out_tbl2 = gr.Dataframe(label="Next 7 days fixtures predictions", interactive=False)
|
| 332 |
+
msg = gr.Markdown(visible=True)
|
| 333 |
+
|
| 334 |
+
def _on_fetch(k):
|
| 335 |
+
res, _ = fetch_next_week_fixtures_and_predict(k.strip() or None)
|
| 336 |
+
if isinstance(res, str):
|
| 337 |
+
return pd.DataFrame(), res
|
| 338 |
+
return res, f"Found {len(res)} fixtures."
|
| 339 |
+
|
| 340 |
+
btn2.click(_on_fetch, inputs=[api_key_tb], outputs=[out_tbl2, msg])
|
| 341 |
+
|
| 342 |
+
def _on_scrape():
|
| 343 |
+
res, _ = fetch_next_week_fixtures_and_predict_free()
|
| 344 |
+
if isinstance(res, str):
|
| 345 |
+
return pd.DataFrame(), res
|
| 346 |
+
return res, f"Found {len(res)} fixtures (scraped)."
|
| 347 |
+
|
| 348 |
+
btn3.click(_on_scrape, inputs=[], outputs=[out_tbl2, msg])
|
| 349 |
+
|
| 350 |
+
return demo
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
def main():
|
| 354 |
+
app = make_app()
|
| 355 |
+
app.launch()
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
if __name__ == "__main__":
|
| 359 |
+
main()
|
inference_utils.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
from typing import Optional, Tuple, Dict, List
|
| 6 |
+
from unidecode import unidecode
|
| 7 |
+
|
| 8 |
+
# --- Team name cleaner (shared) ---
|
| 9 |
+
def clean_team(s: str) -> str:
|
| 10 |
+
if pd.isna(s):
|
| 11 |
+
return s
|
| 12 |
+
s = unidecode(str(s)).strip()
|
| 13 |
+
s = " ".join(s.split())
|
| 14 |
+
return s
|
| 15 |
+
|
| 16 |
+
# --- Odds -> implied probabilities (normalized) ---
|
| 17 |
+
def implied_from_odds(odds_tuple: Tuple[float, float, float]) -> Optional[Tuple[float, float, float]]:
|
| 18 |
+
h, d, a = odds_tuple
|
| 19 |
+
if min(h, d, a) <= 1.0:
|
| 20 |
+
return None
|
| 21 |
+
inv = np.array([1.0/h, 1.0/d, 1.0/a], dtype=float)
|
| 22 |
+
s = inv.sum()
|
| 23 |
+
if s <= 0:
|
| 24 |
+
return None
|
| 25 |
+
p = inv / s
|
| 26 |
+
return float(p[0]), float(p[1]), float(p[2])
|
| 27 |
+
|
| 28 |
+
# --- Elo snapshot from historical df (up to date-1) ---
|
| 29 |
+
def compute_elo_snapshot(df_hist: pd.DataFrame, base_elo: float = 1500.0, K: float = 20.0, home_adv: float = 60.0) -> Dict[str, float]:
|
| 30 |
+
elo = defaultdict(lambda: base_elo)
|
| 31 |
+
dfh = df_hist.sort_values("date").reset_index(drop=True)
|
| 32 |
+
for _, r in dfh.iterrows():
|
| 33 |
+
h, a = r["home"], r["away"]
|
| 34 |
+
eh, ea = elo[h], elo[a]
|
| 35 |
+
ph = 1.0/(1.0 + 10**(-((eh+home_adv)-ea)/400))
|
| 36 |
+
if r["ftr"] == "H":
|
| 37 |
+
oh, oa = 1.0, 0.0
|
| 38 |
+
elif r["ftr"] == "D":
|
| 39 |
+
oh, oa = 0.5, 0.5
|
| 40 |
+
else:
|
| 41 |
+
oh, oa = 0.0, 1.0
|
| 42 |
+
elo[h] = eh + K*(oh - ph)
|
| 43 |
+
elo[a] = ea + K*((1.0-oh) - (1.0-ph))
|
| 44 |
+
return dict(elo)
|
| 45 |
+
|
| 46 |
+
# --- Build rolling features for a single team from history ---
|
| 47 |
+
def team_rolling_features(df_hist: pd.DataFrame, team_name: str, window: int = 6):
|
| 48 |
+
rows: List[dict] = []
|
| 49 |
+
for _, r in df_hist.iterrows():
|
| 50 |
+
rows.append({
|
| 51 |
+
"date": r["date"], "team": r["home"], "is_home": 1,
|
| 52 |
+
"gf": r["fthg"], "ga": r["ftag"],
|
| 53 |
+
"shots_f": r.get("hs", np.nan), "shots_a": r.get("as", np.nan),
|
| 54 |
+
"sot_f": r.get("hst", np.nan), "sot_a": r.get("ast", np.nan),
|
| 55 |
+
"corn_f": r.get("hc", np.nan), "corn_a": r.get("ac", np.nan),
|
| 56 |
+
"y_f": r.get("hy", np.nan), "y_a": r.get("ay", np.nan),
|
| 57 |
+
"r_f": r.get("hr", np.nan), "r_a": r.get("ar", np.nan),
|
| 58 |
+
"points": 3 if r["ftr"] == "H" else (1 if r["ftr"] == "D" else 0),
|
| 59 |
+
})
|
| 60 |
+
rows.append({
|
| 61 |
+
"date": r["date"], "team": r["away"], "is_home": 0,
|
| 62 |
+
"gf": r["ftag"], "ga": r["fthg"],
|
| 63 |
+
"shots_f": r.get("as", np.nan), "shots_a": r.get("hs", np.nan),
|
| 64 |
+
"sot_f": r.get("ast", np.nan), "sot_a": r.get("hst", np.nan),
|
| 65 |
+
"corn_f": r.get("ac", np.nan), "corn_a": r.get("hc", np.nan),
|
| 66 |
+
"y_f": r.get("ay", np.nan), "y_a": r.get("hy", np.nan),
|
| 67 |
+
"r_f": r.get("ar", np.nan), "r_a": r.get("hr", np.nan),
|
| 68 |
+
"points": 3 if r["ftr"] == "A" else (1 if r["ftr"] == "D" else 0),
|
| 69 |
+
})
|
| 70 |
+
tm = pd.DataFrame(rows)
|
| 71 |
+
if tm.empty:
|
| 72 |
+
return None, None
|
| 73 |
+
|
| 74 |
+
tm = tm.sort_values(["team", "date"]).reset_index(drop=True)
|
| 75 |
+
agg_cols = ["gf", "ga", "shots_f", "shots_a", "sot_f", "sot_a", "corn_f", "corn_a", "y_f", "r_f", "points"]
|
| 76 |
+
|
| 77 |
+
feats_home = {}
|
| 78 |
+
feats_away = {}
|
| 79 |
+
|
| 80 |
+
if (tm["team"] == team_name).any():
|
| 81 |
+
tdf = tm[tm["team"] == team_name]
|
| 82 |
+
if (tdf["is_home"] == 1).any():
|
| 83 |
+
t_home = tdf[tdf["is_home"] == 1]
|
| 84 |
+
for col in agg_cols:
|
| 85 |
+
feats_home[f"roll_{col}"] = t_home[col].tail(window).mean()
|
| 86 |
+
else:
|
| 87 |
+
for col in agg_cols:
|
| 88 |
+
feats_home[f"roll_{col}"] = np.nan
|
| 89 |
+
|
| 90 |
+
if (tdf["is_home"] == 0).any():
|
| 91 |
+
t_away = tdf[tdf["is_home"] == 0]
|
| 92 |
+
for col in agg_cols:
|
| 93 |
+
feats_away[f"roll_{col}"] = t_away[col].tail(window).mean()
|
| 94 |
+
else:
|
| 95 |
+
for col in agg_cols:
|
| 96 |
+
feats_away[f"roll_{col}"] = np.nan
|
| 97 |
+
else:
|
| 98 |
+
for col in agg_cols:
|
| 99 |
+
feats_home[f"roll_{col}"] = np.nan
|
| 100 |
+
feats_away[f"roll_{col}"] = np.nan
|
| 101 |
+
|
| 102 |
+
return feats_home, feats_away
|
| 103 |
+
|
| 104 |
+
# --- Build full feature vector for a fixture ---
|
| 105 |
+
def build_features_for_fixture(
|
| 106 |
+
home_team: str,
|
| 107 |
+
away_team: str,
|
| 108 |
+
date_str: str,
|
| 109 |
+
df_all: pd.DataFrame,
|
| 110 |
+
X_cols: List[str],
|
| 111 |
+
window: int = 6,
|
| 112 |
+
odds_tuple: Optional[Tuple[float, float, float]] = None,
|
| 113 |
+
feat_df_for_medians: Optional[pd.DataFrame] = None,
|
| 114 |
+
):
|
| 115 |
+
home = clean_team(home_team)
|
| 116 |
+
away = clean_team(away_team)
|
| 117 |
+
match_date = datetime.strptime(date_str, "%Y-%m-%d")
|
| 118 |
+
|
| 119 |
+
df_hist = df_all[df_all["date"] < match_date].copy()
|
| 120 |
+
if df_hist.empty:
|
| 121 |
+
raise ValueError("No historical data found before match date. Try a later date or load more seasons.")
|
| 122 |
+
|
| 123 |
+
# Elo snapshot
|
| 124 |
+
elo_map = compute_elo_snapshot(df_hist)
|
| 125 |
+
elo_home = float(elo_map.get(home, 1500.0))
|
| 126 |
+
elo_away = float(elo_map.get(away, 1500.0))
|
| 127 |
+
elo_diff = elo_home - elo_away
|
| 128 |
+
|
| 129 |
+
# Rolling features by role
|
| 130 |
+
feats_home_homeRole, _ = team_rolling_features(df_hist, home, window)
|
| 131 |
+
_, feats_away_awayRole = team_rolling_features(df_hist, away, window)
|
| 132 |
+
|
| 133 |
+
if feats_home_homeRole is None:
|
| 134 |
+
feats_home_homeRole = {f"roll_{c}": np.nan for c in ["gf","ga","shots_f","shots_a","sot_f","sot_a","corn_f","corn_a","y_f","r_f","points"]}
|
| 135 |
+
if feats_away_awayRole is None:
|
| 136 |
+
feats_away_awayRole = {f"roll_{c}": np.nan for c in ["gf","ga","shots_f","shots_a","sot_f","sot_a","corn_f","corn_a","y_f","r_f","points"]}
|
| 137 |
+
|
| 138 |
+
# Odds -> probs
|
| 139 |
+
if odds_tuple is not None:
|
| 140 |
+
probs = implied_from_odds(odds_tuple)
|
| 141 |
+
if probs is None:
|
| 142 |
+
raise ValueError("Invalid odds provided. Use decimal odds > 1.0")
|
| 143 |
+
pH, pD, pA = probs
|
| 144 |
+
else:
|
| 145 |
+
# Proxy from Elo + average draw rate
|
| 146 |
+
draws = (df_hist["ftr"] == "D").mean()
|
| 147 |
+
draws = float(draws) if not np.isnan(draws) and draws > 0 else 0.25
|
| 148 |
+
k = 400.0
|
| 149 |
+
ph_nodraw = 1.0/(1.0 + 10**(-(elo_diff)/k))
|
| 150 |
+
pa_nodraw = 1.0 - ph_nodraw
|
| 151 |
+
pH = ph_nodraw * (1.0 - draws)
|
| 152 |
+
pA = pa_nodraw * (1.0 - draws)
|
| 153 |
+
pD = draws
|
| 154 |
+
s = pH + pD + pA
|
| 155 |
+
pH, pD, pA = pH/s, pD/s, pA/s
|
| 156 |
+
|
| 157 |
+
# Assemble features in X_cols order
|
| 158 |
+
feat_row: Dict[str, float] = {}
|
| 159 |
+
|
| 160 |
+
for k, v in feats_home_homeRole.items():
|
| 161 |
+
feat_row[f"home_{k}"] = v
|
| 162 |
+
for k, v in feats_away_awayRole.items():
|
| 163 |
+
feat_row[f"away_{k}"] = v
|
| 164 |
+
|
| 165 |
+
feat_row["elo_home"] = elo_home
|
| 166 |
+
feat_row["elo_away"] = elo_away
|
| 167 |
+
feat_row["elo_diff"] = elo_diff
|
| 168 |
+
|
| 169 |
+
feat_row["p_odds_H"] = float(pH)
|
| 170 |
+
feat_row["p_odds_D"] = float(pD)
|
| 171 |
+
feat_row["p_odds_A"] = float(pA)
|
| 172 |
+
|
| 173 |
+
x_vals = [feat_row.get(c, np.nan) for c in X_cols]
|
| 174 |
+
x = np.array(x_vals, dtype=float).reshape(1, -1)
|
| 175 |
+
|
| 176 |
+
if feat_df_for_medians is not None:
|
| 177 |
+
medians = {c: float(feat_df_for_medians[c].median()) if c in feat_df_for_medians.columns else 0.0 for c in X_cols}
|
| 178 |
+
else:
|
| 179 |
+
# fallback zeros
|
| 180 |
+
medians = {c: 0.0 for c in X_cols}
|
| 181 |
+
|
| 182 |
+
for j, c in enumerate(X_cols):
|
| 183 |
+
if np.isnan(x[0, j]):
|
| 184 |
+
x[0, j] = medians[c]
|
| 185 |
+
|
| 186 |
+
context = {
|
| 187 |
+
"p_odds_H": pH,
|
| 188 |
+
"p_odds_D": pD,
|
| 189 |
+
"p_odds_A": pA,
|
| 190 |
+
"elo_home": elo_home,
|
| 191 |
+
"elo_away": elo_away,
|
| 192 |
+
}
|
| 193 |
+
return x, context
|
ingest_data.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
from collections import defaultdict
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import requests
|
| 8 |
+
from unidecode import unidecode
|
| 9 |
+
from tqdm import tqdm
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# -----------------------------
|
| 13 |
+
# 1) Download EPL seasons
|
| 14 |
+
# -----------------------------
|
| 15 |
+
BASE_URL = "https://www.football-data.co.uk/mmz4281/{scode}/E0.csv"
|
| 16 |
+
|
| 17 |
+
def season_code(y1):
|
| 18 |
+
# 1993 -> '9394', 2024 -> '2425'
|
| 19 |
+
return f"{str(y1)[-2:]}{(y1+1)%100:02d}"
|
| 20 |
+
|
| 21 |
+
def try_read_csv_bytes(content):
|
| 22 |
+
for enc in ("utf-8", "latin-1", "ISO-8859-1"):
|
| 23 |
+
try:
|
| 24 |
+
return pd.read_csv(io.BytesIO(content), encoding=enc)
|
| 25 |
+
except Exception:
|
| 26 |
+
pass
|
| 27 |
+
return pd.DataFrame()
|
| 28 |
+
|
| 29 |
+
def fetch_season(y1, verbose=True):
|
| 30 |
+
scode = season_code(y1)
|
| 31 |
+
url = BASE_URL.format(scode=scode)
|
| 32 |
+
r = requests.get(url, timeout=30)
|
| 33 |
+
if r.status_code != 200 or len(r.content) < 100:
|
| 34 |
+
if verbose: print(f"[skip] {y1}-{(y1+1)%100:02d} ({scode}) not available")
|
| 35 |
+
return pd.DataFrame()
|
| 36 |
+
df = try_read_csv_bytes(r.content)
|
| 37 |
+
if df.empty:
|
| 38 |
+
if verbose: print(f"[warn] parse error {scode}")
|
| 39 |
+
return pd.DataFrame()
|
| 40 |
+
df["SeasonFirstYear"] = y1
|
| 41 |
+
df["Season"] = f"{y1}-{str(y1+1)[-2:]}"
|
| 42 |
+
df["SeasonCode"] = scode
|
| 43 |
+
return df
|
| 44 |
+
|
| 45 |
+
def load_epl_data(start_y1: int = 1993, end_y1: int | None = None, verbose: bool = False) -> pd.DataFrame:
|
| 46 |
+
"""Download and concatenate EPL seasons into a single DataFrame.
|
| 47 |
+
|
| 48 |
+
- start_y1: first season starting year (inclusive), e.g., 1993
|
| 49 |
+
- end_y1: last season starting year (inclusive). Defaults to current year + 1 to include latest.
|
| 50 |
+
- verbose: print per-season logs
|
| 51 |
+
"""
|
| 52 |
+
if end_y1 is None:
|
| 53 |
+
end_y1 = datetime.now().year + 1
|
| 54 |
+
frames = []
|
| 55 |
+
for y in tqdm(range(start_y1, end_y1 + 1)):
|
| 56 |
+
df = fetch_season(y, verbose=verbose)
|
| 57 |
+
if not df.empty:
|
| 58 |
+
frames.append(df)
|
| 59 |
+
data_raw = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
|
| 60 |
+
if data_raw.empty:
|
| 61 |
+
raise RuntimeError("No data downloaded. Try expanding start_y1 backward or check internet.")
|
| 62 |
+
return data_raw
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
# Example usage when running this file directly
|
| 66 |
+
data_raw = load_epl_data(start_y1=1993, end_y1=datetime.now().year + 1, verbose=True)
|
| 67 |
+
print("Seasons loaded:", sorted(data_raw["Season"].unique()))
|
model_lgb_isotonic.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ade3ab60965b439f8ef7ca79ba9ab29c6ecaa437d32c79eebb6e757ae15df4c
|
| 3 |
+
size 24789872
|
model_xgb_isotonic.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da45dfb1d875ccca9f825c363169b81ea8a6aff71333b8f330bc6d1d0ff91939
|
| 3 |
+
size 22864683
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
numpy
|
| 3 |
+
scikit-learn
|
| 4 |
+
xgboost
|
| 5 |
+
lightgbm
|
| 6 |
+
unidecode
|
| 7 |
+
requests
|
| 8 |
+
tqdm
|
| 9 |
+
gradio
|
| 10 |
+
beautifulsoup4
|
team name.txt
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Arsenal
|
| 2 |
+
|
| 3 |
+
Aston Villa
|
| 4 |
+
|
| 5 |
+
Birmingham
|
| 6 |
+
|
| 7 |
+
Blackburn
|
| 8 |
+
|
| 9 |
+
Blackpool
|
| 10 |
+
|
| 11 |
+
Bolton
|
| 12 |
+
|
| 13 |
+
Bournemouth
|
| 14 |
+
|
| 15 |
+
Brentford
|
| 16 |
+
|
| 17 |
+
Brighton
|
| 18 |
+
|
| 19 |
+
Burnley
|
| 20 |
+
|
| 21 |
+
Cardiff
|
| 22 |
+
|
| 23 |
+
Charlton
|
| 24 |
+
|
| 25 |
+
Chelsea
|
| 26 |
+
|
| 27 |
+
Coventry
|
| 28 |
+
|
| 29 |
+
Crystal Palace
|
| 30 |
+
|
| 31 |
+
Derby
|
| 32 |
+
|
| 33 |
+
Everton
|
| 34 |
+
|
| 35 |
+
Fulham
|
| 36 |
+
|
| 37 |
+
Huddersfield
|
| 38 |
+
|
| 39 |
+
Hull
|
| 40 |
+
|
| 41 |
+
Ipswich
|
| 42 |
+
|
| 43 |
+
Leeds
|
| 44 |
+
|
| 45 |
+
Leicester
|
| 46 |
+
|
| 47 |
+
Liverpool
|
| 48 |
+
|
| 49 |
+
Luton
|
| 50 |
+
|
| 51 |
+
Manchester City
|
| 52 |
+
|
| 53 |
+
Manchester United
|
| 54 |
+
|
| 55 |
+
Middlesbrough
|
| 56 |
+
|
| 57 |
+
Newcastle
|
| 58 |
+
|
| 59 |
+
Norwich
|
| 60 |
+
|
| 61 |
+
Nottingham Forest
|
| 62 |
+
|
| 63 |
+
Portsmouth
|
| 64 |
+
|
| 65 |
+
QPR
|
| 66 |
+
|
| 67 |
+
Reading
|
| 68 |
+
|
| 69 |
+
Sheffield United
|
| 70 |
+
|
| 71 |
+
Sheffield Wednesday
|
| 72 |
+
|
| 73 |
+
Southampton
|
| 74 |
+
|
| 75 |
+
Stoke
|
| 76 |
+
|
| 77 |
+
Sunderland
|
| 78 |
+
|
| 79 |
+
Swansea
|
| 80 |
+
|
| 81 |
+
Tottenham
|
| 82 |
+
|
| 83 |
+
Watford
|
| 84 |
+
|
| 85 |
+
West Brom
|
| 86 |
+
|
| 87 |
+
West Ham
|
| 88 |
+
|
| 89 |
+
Wigan
|
| 90 |
+
|
| 91 |
+
Wolves
|