Spaces:

Teera
/

Match-Prediction

Sleeping

App Files Files Community

Teera commited on Sep 23, 2025

Commit

46b7fa3

verified ·

1 Parent(s): e6f6cf2

Upload 8 files

Browse files

Files changed (8) hide show

feature_columns.json +33 -0
gradio_app.py +359 -0
inference_utils.py +193 -0
ingest_data.py +67 -0
model_lgb_isotonic.joblib +3 -0
model_xgb_isotonic.joblib +3 -0
requirements.txt +10 -0
team name.txt +91 -0

feature_columns.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "X_cols": [
+    "home_roll_gf",
+    "home_roll_ga",
+    "home_roll_shots_f",
+    "home_roll_shots_a",
+    "home_roll_sot_f",
+    "home_roll_sot_a",
+    "home_roll_corn_f",
+    "home_roll_corn_a",
+    "home_roll_y_f",
+    "home_roll_r_f",
+    "home_roll_points",
+    "away_roll_gf",
+    "away_roll_ga",
+    "away_roll_shots_f",
+    "away_roll_shots_a",
+    "away_roll_sot_f",
+    "away_roll_sot_a",
+    "away_roll_corn_f",
+    "away_roll_corn_a",
+    "away_roll_y_f",
+    "away_roll_r_f",
+    "away_roll_points",
+    "elo_home",
+    "elo_away",
+    "elo_diff",
+    "p_odds_H",
+    "p_odds_D",
+    "p_odds_A"
+  ],
+  "WINDOW": 7
+}

gradio_app.py ADDED Viewed

	@@ -0,0 +1,359 @@

+import os
+from pathlib import Path
+from datetime import date, timedelta, datetime as dt
+from typing import List, Optional, Tuple
+import numpy as np
+import pandas as pd
+import gradio as gr
+import requests
+from bs4 import BeautifulSoup
+import json
+import joblib
+from ingest_data import load_epl_data
+from preprocess_data import prepare_features
+from inference_utils import (
+    clean_team,
+    implied_from_odds,
+    build_features_for_fixture,
+)
+# --------- Load pipeline artifacts once ---------
+def _next_saturday_str(today: Optional[date] = None) -> str:
+    if today is None:
+        today = date.today()
+    days_ahead = 5 - today.weekday()  # 5=Saturday
+    if days_ahead <= 0:
+        days_ahead += 7
+    return (today + timedelta(days=days_ahead)).strftime("%Y-%m-%d")
+def _read_team_list(path: Path) -> List[str]:
+    if not path.exists():
+        return []
+    names: List[str] = []
+    with open(path, "r", encoding="utf-8") as f:
+        for line in f:
+            name = line.strip()
+            if not name:
+                continue
+            names.append(name)
+    return names
+def _load_feature_meta() -> Tuple[List[str], int]:
+    candidates = [Path("feature_columns.json"), Path("data") / "feature_columns.json"]
+    for p in candidates:
+        if p.exists():
+            with open(p, "r", encoding="utf-8") as f:
+                meta = json.load(f)
+            return meta.get("X_cols", []), int(meta.get("WINDOW", 7))
+    return [], 7
+def init_pipeline():
+    # Data
+    data_raw = load_epl_data(start_y1=2010, end_y1=None, verbose=False)
+    feat_df, X_cols_generated, WINDOW_generated, base_df = prepare_features(data_raw, window=7, verbose=False)
+    # Features meta (prefer saved training order)
+    X_cols_saved, WINDOW_saved = _load_feature_meta()
+    X_cols = X_cols_saved if X_cols_saved else X_cols_generated
+    window = WINDOW_saved if X_cols_saved else WINDOW_generated
+    # Model
+    model = None
+    for mp in [Path("model") / "model_xgb_isotonic.joblib", Path("model_xgb_isotonic.joblib")]:
+        if mp.exists():
+            model = joblib.load(str(mp))
+            break
+    if model is None:
+        raise FileNotFoundError("Model not found at ./model/model_xgb_isotonic.joblib")
+    # Team list (for UI)
+    team_list = _read_team_list(Path("data") / "team name.txt")
+    if not team_list:
+        # fallback to unique teams from data
+        team_list = sorted(set(base_df["home"]).union(set(base_df["away"])))
+    return {
+        "feat_df": feat_df,
+        "df": base_df,
+        "X_cols": X_cols,
+        "window": window,
+        "model": model,
+        "team_list": team_list,
+    }
+PIPE = init_pipeline()
+# --------- Inference helpers for UI ---------
+def manual_predict(home_team: str, away_team: str, match_date: str,
+                   home_odds: str = "", draw_odds: str = "", away_odds: str = ""):
+    if not home_team or not away_team or not match_date:
+        return "Please select Home, Away and Date.", None
+    odds_tuple: Optional[Tuple[float, float, float]] = None
+    if home_odds and draw_odds and away_odds:
+        try:
+            odds_tuple = (float(home_odds), float(draw_odds), float(away_odds))
+        except Exception:
+            return "Invalid odds input. Leave blank or enter numeric decimals.", None
+    try:
+        X_new, ctx = build_features_for_fixture(
+            home_team, away_team, match_date,
+            df_all=PIPE["df"], X_cols=PIPE["X_cols"], window=PIPE["window"],
+            odds_tuple=odds_tuple, feat_df_for_medians=PIPE["feat_df"],
+        )
+        proba = PIPE["model"].predict_proba(X_new)[0]
+        labels = ["H (Home Win)", "D (Draw)", "A (Away Win)"]
+        res = pd.DataFrame({"Outcome": labels, "Probability": [float(p) for p in proba]})
+        return res, ctx
+    except Exception as e:
+        return f"Error: {e}", None
+def fetch_next_week_fixtures_and_predict(api_key: Optional[str] = None):
+    # Use football-data.org if API key provided, else return message
+    if not api_key:
+        return "Set FOOTBALL_DATA_API_KEY env or provide API key in the textbox to auto-fetch fixtures.", None
+    base_url = "https://api.football-data.org/v4/competitions/PL/matches"
+    d_from = date.today()
+    d_to = d_from + timedelta(days=7)
+    params = {
+        "status": "SCHEDULED",
+        "dateFrom": d_from.strftime("%Y-%m-%d"),
+        "dateTo": d_to.strftime("%Y-%m-%d"),
+    }
+    headers = {"X-Auth-Token": api_key}
+    r = requests.get(base_url, headers=headers, params=params, timeout=30)
+    if r.status_code != 200:
+        return f"API error {r.status_code}: {r.text}", None
+    data = r.json()
+    matches = data.get("matches", [])
+    if not matches:
+        return "No scheduled PL matches in the next 7 days.", None
+    rows = []
+    for m in matches:
+        home = clean_team(m.get("homeTeam", {}).get("name", ""))
+        away = clean_team(m.get("awayTeam", {}).get("name", ""))
+        when = m.get("utcDate", "")
+        try:
+            match_date = dt.fromisoformat(when.replace("Z", "+00:00")).date().strftime("%Y-%m-%d")
+        except Exception:
+            match_date = _next_saturday_str()
+        try:
+            X_new, ctx = build_features_for_fixture(
+                home, away, match_date,
+                df_all=PIPE["df"], X_cols=PIPE["X_cols"], window=PIPE["window"],
+                odds_tuple=None, feat_df_for_medians=PIPE["feat_df"],
+            )
+            proba = PIPE["model"].predict_proba(X_new)[0]
+            rows.append({
+                "Date": match_date,
+                "Home": home,
+                "Away": away,
+                "P(Home)": float(proba[0]),
+                "P(Draw)": float(proba[1]),
+                "P(Away)": float(proba[2]),
+            })
+        except Exception as e:
+            rows.append({
+                "Date": match_date,
+                "Home": home,
+                "Away": away,
+                "Error": str(e),
+            })
+    df_out = pd.DataFrame(rows)
+    if not df_out.empty:
+        df_out = df_out.sort_values(["Date", "Home"]).reset_index(drop=True)
+    return df_out, None
+def _alias_team_name(name: str) -> str:
+    """Map scraped team names to our canonical names when obvious.
+    Add common aliases here. Fallback to cleaned name.
+    """
+    aliases = {
+        "Man City": "Manchester City",
+        "Man Utd": "Manchester United",
+        "Nott'm Forest": "Nottingham Forest",
+        "Newcastle Utd": "Newcastle",
+        "Spurs": "Tottenham",
+        "Brighton & Hove Albion": "Brighton",
+        "Sheff Utd": "Sheffield United",
+        "Sheff Wed": "Sheffield Wednesday",
+        "West Bromwich Albion": "West Brom",
+        "West Brom": "West Brom",
+        "Wolverhampton Wanderers": "Wolves",
+        "Queens Park Rangers": "QPR",
+    }
+    n = clean_team(name)
+    return aliases.get(n, n)
+def fetch_next_week_fixtures_and_predict_free():
+    """Scrape BBC Sport fixtures for the next 7 days (Premier League) and predict all.
+    No API key required. BBC structure may change over time; this parser is best-effort.
+    """
+    def _scrape_bbc_for_date(day: date):
+        """Return list of (home, away) for given date from BBC."""
+        urls = [
+            f"https://www.bbc.com/sport/football/premier-league/scores-fixtures/{day:%Y-%m-%d}",
+            f"https://www.bbc.com/sport/football/scores-fixtures/{day:%Y-%m-%d}?competition=premier-league",
+            f"https://www.bbc.co.uk/sport/football/premier-league/scores-fixtures/{day:%Y-%m-%d}",
+        ]
+        pairs = []
+        headers = {"User-Agent": "Mozilla/5.0"}
+        for url in urls:
+            try:
+                r = requests.get(url, timeout=30, headers=headers)
+                if r.status_code != 200 or not r.text:
+                    continue
+                soup = BeautifulSoup(r.text, "html.parser")
+                # Several selector strategies
+                # 1) sp-c-fixture blocks
+                for fx in soup.select(".sp-c-fixture"):
+                    tnames = fx.select(".sp-c-fixture__team-name, .sp-c-fixture__team-name-trunc, [data-testid='team-name']")
+                    if len(tnames) >= 2:
+                        home = _alias_team_name(tnames[0].get_text(strip=True))
+                        away = _alias_team_name(tnames[1].get_text(strip=True))
+                        if home and away:
+                            pairs.append((home, away))
+                # 2) generic match-block containers
+                for blk in soup.select('[data-component="match-block"], [data-testid="match-block"]'):
+                    teams = blk.select('[itemprop="name"], .sp-c-fixture__team-name, [data-testid="team-name"]')
+                    # If page bundles many team names, take pairs sequentially
+                    buf = [t.get_text(strip=True) for t in teams]
+                    for i in range(0, len(buf) - 1, 2):
+                        home = _alias_team_name(buf[i])
+                        away = _alias_team_name(buf[i+1])
+                        if home and away:
+                            pairs.append((home, away))
+                if pairs:
+                    break
+            except Exception:
+                continue
+        # de-duplicate
+        uniq = []
+        seen = set()
+        for h, a in pairs:
+            key = (h, a)
+            if key not in seen:
+                seen.add(key)
+                uniq.append((h, a))
+        return uniq
+    rows = []
+    today = date.today()
+    for d in range(0, 7):
+        day = today + timedelta(days=d)
+        pairs = _scrape_bbc_for_date(day)
+        for home, away in pairs:
+            match_date = day.strftime("%Y-%m-%d")
+            try:
+                X_new, ctx = build_features_for_fixture(
+                    home, away, match_date,
+                    df_all=PIPE["df"], X_cols=PIPE["X_cols"], window=PIPE["window"],
+                    odds_tuple=None, feat_df_for_medians=PIPE["feat_df"],
+                )
+                proba = PIPE["model"].predict_proba(X_new)[0]
+                rows.append({
+                    "Date": match_date,
+                    "Home": home,
+                    "Away": away,
+                    "P(Home)": float(proba[0]),
+                    "P(Draw)": float(proba[1]),
+                    "P(Away)": float(proba[2]),
+                })
+            except Exception as e:
+                rows.append({
+                    "Date": match_date,
+                    "Home": home,
+                    "Away": away,
+                    "Error": str(e),
+                })
+    if not rows:
+        return "Could not find PL fixtures from BBC for the next 7 days.", None
+    df_out = pd.DataFrame(rows)
+    df_out = df_out.sort_values(["Date", "Home"]).reset_index(drop=True)
+    return df_out, None
+# --------- Build Gradio UI ---------
+def make_app():
+    with gr.Blocks(title="EPL Match Prediction") as demo:
+        gr.Markdown("""
+        # EPL Match Prediction
+        - Manual mode: pick teams and a date (optionally odds) and get predicted probabilities.
+        - Auto mode: fetch next week's Premier League fixtures (requires football-data.org API key) and predict all.
+        """)
+        with gr.Tab("Manual"):
+            with gr.Row():
+                home_dd = gr.Dropdown(choices=PIPE["team_list"], label="Home Team", value=PIPE["team_list"][0] if PIPE["team_list"] else None)
+                away_dd = gr.Dropdown(choices=PIPE["team_list"], label="Away Team", value=PIPE["team_list"][1] if len(PIPE["team_list"])>1 else None)
+            date_tb = gr.Textbox(label="Match Date (YYYY-MM-DD)", value=_next_saturday_str())
+            with gr.Accordion("Optional: Odds (decimal)", open=False):
+                home_od = gr.Textbox(label="Home Odds")
+                draw_od = gr.Textbox(label="Draw Odds")
+                away_od = gr.Textbox(label="Away Odds")
+            btn = gr.Button("Predict")
+            out_tbl = gr.Dataframe(label="Probabilities", interactive=False)
+            out_json = gr.JSON(label="Context")
+            def _on_predict(h, a, d, ho, do, ao):
+                res, ctx = manual_predict(h, a, d, ho, do, ao)
+                if isinstance(res, str):
+                    return pd.DataFrame({"Message":[res]}), ctx
+                return res, ctx
+            btn.click(_on_predict, inputs=[home_dd, away_dd, date_tb, home_od, draw_od, away_od], outputs=[out_tbl, out_json])
+        with gr.Tab("Next Week Fixtures"):
+            gr.Markdown("Fetch next week's Premier League fixtures via API or scraping (no API key).")
+            api_key_tb = gr.Textbox(label="FOOTBALL_DATA_API_KEY", value=os.getenv("FOOTBALL_DATA_API_KEY", ""), type="password")
+            with gr.Row():
+                btn2 = gr.Button("Fetch via API and Predict")
+                btn3 = gr.Button("Fetch via Scraping (No API Key)")
+            out_tbl2 = gr.Dataframe(label="Next 7 days fixtures predictions", interactive=False)
+            msg = gr.Markdown(visible=True)
+            def _on_fetch(k):
+                res, _ = fetch_next_week_fixtures_and_predict(k.strip() or None)
+                if isinstance(res, str):
+                    return pd.DataFrame(), res
+                return res, f"Found {len(res)} fixtures."
+            btn2.click(_on_fetch, inputs=[api_key_tb], outputs=[out_tbl2, msg])
+            def _on_scrape():
+                res, _ = fetch_next_week_fixtures_and_predict_free()
+                if isinstance(res, str):
+                    return pd.DataFrame(), res
+                return res, f"Found {len(res)} fixtures (scraped)."
+            btn3.click(_on_scrape, inputs=[], outputs=[out_tbl2, msg])
+    return demo
+def main():
+    app = make_app()
+    app.launch()
+if __name__ == "__main__":
+    main()

inference_utils.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import numpy as np
+import pandas as pd
+from collections import defaultdict
+from datetime import datetime
+from typing import Optional, Tuple, Dict, List
+from unidecode import unidecode
+# --- Team name cleaner (shared) ---
+def clean_team(s: str) -> str:
+    if pd.isna(s):
+        return s
+    s = unidecode(str(s)).strip()
+    s = " ".join(s.split())
+    return s
+# --- Odds -> implied probabilities (normalized) ---
+def implied_from_odds(odds_tuple: Tuple[float, float, float]) -> Optional[Tuple[float, float, float]]:
+    h, d, a = odds_tuple
+    if min(h, d, a) <= 1.0:
+        return None
+    inv = np.array([1.0/h, 1.0/d, 1.0/a], dtype=float)
+    s = inv.sum()
+    if s <= 0:
+        return None
+    p = inv / s
+    return float(p[0]), float(p[1]), float(p[2])
+# --- Elo snapshot from historical df (up to date-1) ---
+def compute_elo_snapshot(df_hist: pd.DataFrame, base_elo: float = 1500.0, K: float = 20.0, home_adv: float = 60.0) -> Dict[str, float]:
+    elo = defaultdict(lambda: base_elo)
+    dfh = df_hist.sort_values("date").reset_index(drop=True)
+    for _, r in dfh.iterrows():
+        h, a = r["home"], r["away"]
+        eh, ea = elo[h], elo[a]
+        ph = 1.0/(1.0 + 10**(-((eh+home_adv)-ea)/400))
+        if r["ftr"] == "H":
+            oh, oa = 1.0, 0.0
+        elif r["ftr"] == "D":
+            oh, oa = 0.5, 0.5
+        else:
+            oh, oa = 0.0, 1.0
+        elo[h] = eh + K*(oh - ph)
+        elo[a] = ea + K*((1.0-oh) - (1.0-ph))
+    return dict(elo)
+# --- Build rolling features for a single team from history ---
+def team_rolling_features(df_hist: pd.DataFrame, team_name: str, window: int = 6):
+    rows: List[dict] = []
+    for _, r in df_hist.iterrows():
+        rows.append({
+            "date": r["date"], "team": r["home"], "is_home": 1,
+            "gf": r["fthg"], "ga": r["ftag"],
+            "shots_f": r.get("hs", np.nan), "shots_a": r.get("as", np.nan),
+            "sot_f": r.get("hst", np.nan), "sot_a": r.get("ast", np.nan),
+            "corn_f": r.get("hc", np.nan), "corn_a": r.get("ac", np.nan),
+            "y_f": r.get("hy", np.nan), "y_a": r.get("ay", np.nan),
+            "r_f": r.get("hr", np.nan), "r_a": r.get("ar", np.nan),
+            "points": 3 if r["ftr"] == "H" else (1 if r["ftr"] == "D" else 0),
+        })
+        rows.append({
+            "date": r["date"], "team": r["away"], "is_home": 0,
+            "gf": r["ftag"], "ga": r["fthg"],
+            "shots_f": r.get("as", np.nan), "shots_a": r.get("hs", np.nan),
+            "sot_f": r.get("ast", np.nan), "sot_a": r.get("hst", np.nan),
+            "corn_f": r.get("ac", np.nan), "corn_a": r.get("hc", np.nan),
+            "y_f": r.get("ay", np.nan), "y_a": r.get("hy", np.nan),
+            "r_f": r.get("ar", np.nan), "r_a": r.get("hr", np.nan),
+            "points": 3 if r["ftr"] == "A" else (1 if r["ftr"] == "D" else 0),
+        })
+    tm = pd.DataFrame(rows)
+    if tm.empty:
+        return None, None
+    tm = tm.sort_values(["team", "date"]).reset_index(drop=True)
+    agg_cols = ["gf", "ga", "shots_f", "shots_a", "sot_f", "sot_a", "corn_f", "corn_a", "y_f", "r_f", "points"]
+    feats_home = {}
+    feats_away = {}
+    if (tm["team"] == team_name).any():
+        tdf = tm[tm["team"] == team_name]
+        if (tdf["is_home"] == 1).any():
+            t_home = tdf[tdf["is_home"] == 1]
+            for col in agg_cols:
+                feats_home[f"roll_{col}"] = t_home[col].tail(window).mean()
+        else:
+            for col in agg_cols:
+                feats_home[f"roll_{col}"] = np.nan
+        if (tdf["is_home"] == 0).any():
+            t_away = tdf[tdf["is_home"] == 0]
+            for col in agg_cols:
+                feats_away[f"roll_{col}"] = t_away[col].tail(window).mean()
+        else:
+            for col in agg_cols:
+                feats_away[f"roll_{col}"] = np.nan
+    else:
+        for col in agg_cols:
+            feats_home[f"roll_{col}"] = np.nan
+            feats_away[f"roll_{col}"] = np.nan
+    return feats_home, feats_away
+# --- Build full feature vector for a fixture ---
+def build_features_for_fixture(
+    home_team: str,
+    away_team: str,
+    date_str: str,
+    df_all: pd.DataFrame,
+    X_cols: List[str],
+    window: int = 6,
+    odds_tuple: Optional[Tuple[float, float, float]] = None,
+    feat_df_for_medians: Optional[pd.DataFrame] = None,
+):
+    home = clean_team(home_team)
+    away = clean_team(away_team)
+    match_date = datetime.strptime(date_str, "%Y-%m-%d")
+    df_hist = df_all[df_all["date"] < match_date].copy()
+    if df_hist.empty:
+        raise ValueError("No historical data found before match date. Try a later date or load more seasons.")
+    # Elo snapshot
+    elo_map = compute_elo_snapshot(df_hist)
+    elo_home = float(elo_map.get(home, 1500.0))
+    elo_away = float(elo_map.get(away, 1500.0))
+    elo_diff = elo_home - elo_away
+    # Rolling features by role
+    feats_home_homeRole, _ = team_rolling_features(df_hist, home, window)
+    _, feats_away_awayRole = team_rolling_features(df_hist, away, window)
+    if feats_home_homeRole is None:
+        feats_home_homeRole = {f"roll_{c}": np.nan for c in ["gf","ga","shots_f","shots_a","sot_f","sot_a","corn_f","corn_a","y_f","r_f","points"]}
+    if feats_away_awayRole is None:
+        feats_away_awayRole = {f"roll_{c}": np.nan for c in ["gf","ga","shots_f","shots_a","sot_f","sot_a","corn_f","corn_a","y_f","r_f","points"]}
+    # Odds -> probs
+    if odds_tuple is not None:
+        probs = implied_from_odds(odds_tuple)
+        if probs is None:
+            raise ValueError("Invalid odds provided. Use decimal odds > 1.0")
+        pH, pD, pA = probs
+    else:
+        # Proxy from Elo + average draw rate
+        draws = (df_hist["ftr"] == "D").mean()
+        draws = float(draws) if not np.isnan(draws) and draws > 0 else 0.25
+        k = 400.0
+        ph_nodraw = 1.0/(1.0 + 10**(-(elo_diff)/k))
+        pa_nodraw = 1.0 - ph_nodraw
+        pH = ph_nodraw * (1.0 - draws)
+        pA = pa_nodraw * (1.0 - draws)
+        pD = draws
+        s = pH + pD + pA
+        pH, pD, pA = pH/s, pD/s, pA/s
+    # Assemble features in X_cols order
+    feat_row: Dict[str, float] = {}
+    for k, v in feats_home_homeRole.items():
+        feat_row[f"home_{k}"] = v
+    for k, v in feats_away_awayRole.items():
+        feat_row[f"away_{k}"] = v
+    feat_row["elo_home"] = elo_home
+    feat_row["elo_away"] = elo_away
+    feat_row["elo_diff"] = elo_diff
+    feat_row["p_odds_H"] = float(pH)
+    feat_row["p_odds_D"] = float(pD)
+    feat_row["p_odds_A"] = float(pA)
+    x_vals = [feat_row.get(c, np.nan) for c in X_cols]
+    x = np.array(x_vals, dtype=float).reshape(1, -1)
+    if feat_df_for_medians is not None:
+        medians = {c: float(feat_df_for_medians[c].median()) if c in feat_df_for_medians.columns else 0.0 for c in X_cols}
+    else:
+        # fallback zeros
+        medians = {c: 0.0 for c in X_cols}
+    for j, c in enumerate(X_cols):
+        if np.isnan(x[0, j]):
+            x[0, j] = medians[c]
+    context = {
+        "p_odds_H": pH,
+        "p_odds_D": pD,
+        "p_odds_A": pA,
+        "elo_home": elo_home,
+        "elo_away": elo_away,
+    }
+    return x, context

ingest_data.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import io
+from collections import defaultdict
+from datetime import datetime
+import numpy as np
+import pandas as pd
+import requests
+from unidecode import unidecode
+from tqdm import tqdm
+# -----------------------------
+# 1) Download EPL seasons
+# -----------------------------
+BASE_URL = "https://www.football-data.co.uk/mmz4281/{scode}/E0.csv"
+def season_code(y1):
+    # 1993 -> '9394', 2024 -> '2425'
+    return f"{str(y1)[-2:]}{(y1+1)%100:02d}"
+def try_read_csv_bytes(content):
+    for enc in ("utf-8", "latin-1", "ISO-8859-1"):
+        try:
+            return pd.read_csv(io.BytesIO(content), encoding=enc)
+        except Exception:
+            pass
+    return pd.DataFrame()
+def fetch_season(y1, verbose=True):
+    scode = season_code(y1)
+    url = BASE_URL.format(scode=scode)
+    r = requests.get(url, timeout=30)
+    if r.status_code != 200 or len(r.content) < 100:
+        if verbose: print(f"[skip] {y1}-{(y1+1)%100:02d} ({scode}) not available")
+        return pd.DataFrame()
+    df = try_read_csv_bytes(r.content)
+    if df.empty:
+        if verbose: print(f"[warn] parse error {scode}")
+        return pd.DataFrame()
+    df["SeasonFirstYear"] = y1
+    df["Season"] = f"{y1}-{str(y1+1)[-2:]}"
+    df["SeasonCode"] = scode
+    return df
+def load_epl_data(start_y1: int = 1993, end_y1: int | None = None, verbose: bool = False) -> pd.DataFrame:
+    """Download and concatenate EPL seasons into a single DataFrame.
+    - start_y1: first season starting year (inclusive), e.g., 1993
+    - end_y1: last season starting year (inclusive). Defaults to current year + 1 to include latest.
+    - verbose: print per-season logs
+    """
+    if end_y1 is None:
+        end_y1 = datetime.now().year + 1
+    frames = []
+    for y in tqdm(range(start_y1, end_y1 + 1)):
+        df = fetch_season(y, verbose=verbose)
+        if not df.empty:
+            frames.append(df)
+    data_raw = pd.concat(frames, ignore_index=True) if frames else pd.DataFrame()
+    if data_raw.empty:
+        raise RuntimeError("No data downloaded. Try expanding start_y1 backward or check internet.")
+    return data_raw
+if __name__ == "__main__":
+    # Example usage when running this file directly
+    data_raw = load_epl_data(start_y1=1993, end_y1=datetime.now().year + 1, verbose=True)
+    print("Seasons loaded:", sorted(data_raw["Season"].unique()))

model_lgb_isotonic.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ade3ab60965b439f8ef7ca79ba9ab29c6ecaa437d32c79eebb6e757ae15df4c
+size 24789872

model_xgb_isotonic.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da45dfb1d875ccca9f825c363169b81ea8a6aff71333b8f330bc6d1d0ff91939
+size 22864683

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+pandas
+numpy
+scikit-learn
+xgboost
+lightgbm
+unidecode
+requests
+tqdm
+gradio
+beautifulsoup4

team name.txt ADDED Viewed

	@@ -0,0 +1,91 @@

+Arsenal
+Aston Villa
+Birmingham
+Blackburn
+Blackpool
+Bolton
+Bournemouth
+Brentford
+Brighton
+Burnley
+Cardiff
+Charlton
+Chelsea
+Coventry
+Crystal Palace
+Derby
+Everton
+Fulham
+Huddersfield
+Hull
+Ipswich
+Leeds
+Leicester
+Liverpool
+Luton
+Manchester City
+Manchester United
+Middlesbrough
+Newcastle
+Norwich
+Nottingham Forest
+Portsmouth
+QPR
+Reading
+Sheffield United
+Sheffield Wednesday
+Southampton
+Stoke
+Sunderland
+Swansea
+Tottenham
+Watford
+West Brom
+West Ham
+Wigan
+Wolves