Spaces:

allenborochin
/

fpl-predictor

Sleeping

App Files Files Community

allenborochin commited on 20 days ago

Commit

ac4d749

verified ·

1 Parent(s): b42197e

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -50

app.py CHANGED Viewed

@@ -3,14 +3,13 @@ FPL Player Predictor - Streamlit app for HuggingFace Space.
 Loads trained models + engineered dataset from companion model repo.
 """
-import io
 import urllib.parse
-from pathlib import Path
 import numpy as np
 import pandas as pd
 import pickle
 import streamlit as st
 from huggingface_hub import hf_hub_download
 # ============================================================
@@ -22,7 +21,6 @@ REGRESSION_FILE = "fpl_regression_model.pkl"
 CLASSIFICATION_FILE = "fpl_classification_model.pkl"
 DATASET_FILE = "df_fe.parquet"
-# PL brand palette
 PL_PURPLE = "#37003C"
 PL_CYAN = "#00FF87"
 PL_LIME = "#04F5FF"
@@ -34,7 +32,6 @@ CLASS_LABELS = {0: "Blank", 1: "Decent", 2: "Good", 3: "Haul"}
 CLASS_RANGES = {0: "0-1 pts", 1: "2-4 pts", 2: "5-9 pts", 3: "10+ pts"}
 CLASS_COLORS = {0: "#888888", 1: "#04F5FF", 2: "#00FF87", 3: "#E90052"}
-# Features that are 0/1 booleans - display as Yes/No
 BOOLEAN_FEATURES = {"was_home", "had_haul_last_3", "has_std_history"}
 CLUSTER_FEATURES = {"cluster_0", "cluster_1", "cluster_2", "cluster_3", "cluster_4"}
@@ -66,15 +63,6 @@ st.markdown(
             padding: 20px;
             margin-bottom: 16px;
         }}
-        .pl-pill {{
-            display: inline-block;
-            padding: 4px 12px;
-            border-radius: 999px;
-            font-size: 11px;
-            font-weight: 700;
-            letter-spacing: 0.05em;
-            text-transform: uppercase;
-        }}
         .stSelectbox label, .stRadio label {{
             color: {PL_CYAN} !important;
             font-size: 12px !important;
@@ -125,7 +113,6 @@ st.markdown(
 @st.cache_resource(show_spinner="Loading models from HuggingFace...")
 def load_artifacts():
-    """Pull all 3 artifacts from the model repo on first run, then cache."""
     reg_path = hf_hub_download(repo_id=MODEL_REPO, filename=REGRESSION_FILE)
     cls_path = hf_hub_download(repo_id=MODEL_REPO, filename=CLASSIFICATION_FILE)
     data_path = hf_hub_download(repo_id=MODEL_REPO, filename=DATASET_FILE, repo_type="model")
@@ -144,7 +131,6 @@ def load_artifacts():
 # ============================================================
 def get_player_row(df, name, position, season, gameweek):
-    """Find the exact row matching the user's selection."""
     mask = (
         (df["name"] == name)
         & (df["position"] == position)
@@ -158,37 +144,27 @@ def get_player_row(df, name, position, season, gameweek):
 def predict_regression(row, bundle):
-    """Run regression prediction. Returns predicted points (float)."""
     model = bundle["model"]
     scaler = bundle["scaler"]
     features = bundle["feature_names"]
     X = pd.DataFrame([row[features].values], columns=features)
     X_scaled = scaler.transform(X)
     return float(model.predict(X_scaled)[0])
 def predict_classification(row, bundle):
-    """Run classification prediction. Returns (predicted_class_int, probabilities_dict)."""
     model = bundle["model"]
     scaler = bundle["scaler"]
     features = bundle["feature_names"]
     X = pd.DataFrame([row[features].values], columns=features)
     X_scaled = scaler.transform(X)
     pred_class = int(model.predict(X_scaled)[0])
     probs = model.predict_proba(X_scaled)[0]
     probs_dict = {i: float(p) for i, p in enumerate(probs)}
     return pred_class, probs_dict
-def feature_contributions(row, bundle, top_n_up=8, top_n_down=6):
-    """
-    For LogReg classifier: compute per-feature contribution to the predicted Haul class.
-    Contribution = scaled_feature_value * coefficient_for_class.
-    Returns DataFrame sorted by signed contribution.
-    """
     model = bundle["model"]
     scaler = bundle["scaler"]
     features = bundle["feature_names"]
@@ -196,10 +172,8 @@ def feature_contributions(row, bundle, top_n_up=8, top_n_down=6):
     X = pd.DataFrame([row[features].values], columns=features)
     X_scaled = scaler.transform(X)[0]
-    # Use Haul class (3) coefficients - what's pushing toward "is this a haul"
     haul_class_idx = list(model.classes_).index(3)
     coefs = model.coef_[haul_class_idx]
     contributions = X_scaled * coefs
     df_contrib = pd.DataFrame({
@@ -214,7 +188,6 @@ def feature_contributions(row, bundle, top_n_up=8, top_n_down=6):
 def feature_friendly_name(feature):
-    """Convert internal feature names to human-readable labels."""
     mapping = {
         "minutes_lag_1": "Minutes played last gameweek",
         "points_lag_1": "Points scored last gameweek",
@@ -251,35 +224,137 @@ def feature_friendly_name(feature):
 def format_value_display(feature, value):
-    """Format the raw value for display in the feature panel."""
-    # Booleans, cluster membership, position dummies, season dummies -> Yes/No
     if feature in BOOLEAN_FEATURES or feature in CLUSTER_FEATURES:
         return "Yes" if value >= 0.5 else "No"
     if feature.startswith("position_") or feature.startswith("season_"):
         return "Yes" if value >= 0.5 else "No"
-    # Numeric -> 1 decimal
     if isinstance(value, (int, float, np.floating, np.integer)) and not isinstance(value, bool):
         return f"{value:.1f}"
     return str(value)
 def should_skip_feature(feature, value):
-    """Hide cluster/position/season rows where the player doesn't belong - they aren't informative."""
     if feature in CLUSTER_FEATURES and value < 0.5:
         return True
     if feature.startswith("position_") and value < 0.5:
         return True
-    if feature.startswith("season_") and value < 0.5:
         return True
     return False
 # ============================================================
-# PLAIN-ENGLISH SUMMARY GENERATOR
 # ============================================================
 def generate_summary(row, reg_pred, cls_pred, probs):
-    """Build a rule-based plain-English match preview."""
     parts = []
     venue = "at home" if row.get("was_home", 0) == 1 else "away"
@@ -328,11 +403,58 @@ def generate_summary(row, reg_pred, cls_pred, probs):
 # ============================================================
-# YOUTUBE SEARCH LINK
 # ============================================================
 def youtube_highlights_url(row):
-    """Build a smart YouTube search link for this fixture's highlights."""
     home_team = row["team"] if row.get("was_home", 0) == 1 else row["opponent"]
     away_team = row["opponent"] if row.get("was_home", 0) == 1 else row["team"]
     season = row["season"]
@@ -532,17 +654,10 @@ elif st.session_state.step == "results":
     st.markdown("<br>", unsafe_allow_html=True)
-    # CLASS PROBABILITY BAR
     st.markdown("#### Class probabilities")
-    prob_df = pd.DataFrame({
-        "Tier": [CLASS_LABELS[i] for i in range(4)],
-        "Probability": [probs.get(i, 0) * 100 for i in range(4)],
-    })
-    st.bar_chart(
-        prob_df.set_index("Tier"),
-        height=160,
-        color=PL_CYAN,
-    )
     # MAIN CONTENT - 2 COLS
     col_left, col_right = st.columns([1, 1])
@@ -556,12 +671,15 @@ elif st.session_state.step == "results":
     with col_right:
         st.markdown('<div class="pl-card">', unsafe_allow_html=True)
         st.markdown("#### Why the model is predicting this")
-        top_up, top_down = feature_contributions(row, cls_bundle, top_n_up=8, top_n_down=6)
         st.markdown(f"<div style='color: {PL_CYAN}; font-weight: 700; font-size: 12px; letter-spacing: 0.05em; margin-bottom: 8px;'>PUSHING TOWARD HAUL</div>", unsafe_allow_html=True)
         for _, r in top_up.iterrows():
             if should_skip_feature(r["feature"], r["raw_value"]):
                 continue
             label = feature_friendly_name(r["feature"])
             val_str = format_value_display(r["feature"], r["raw_value"])
             st.markdown(
@@ -571,11 +689,15 @@ elif st.session_state.step == "results":
                 f"</div>",
                 unsafe_allow_html=True,
             )
         st.markdown(f"<div style='color: {PL_PINK}; font-weight: 700; font-size: 12px; letter-spacing: 0.05em; margin-top: 16px; margin-bottom: 8px;'>PUSHING AWAY FROM HAUL</div>", unsafe_allow_html=True)
         for _, r in top_down.iterrows():
             if should_skip_feature(r["feature"], r["raw_value"]):
                 continue
             label = feature_friendly_name(r["feature"])
             val_str = format_value_display(r["feature"], r["raw_value"])
             st.markdown(
@@ -585,10 +707,11 @@ elif st.session_state.step == "results":
                 f"</div>",
                 unsafe_allow_html=True,
             )
         st.markdown("</div>", unsafe_allow_html=True)
-    # YOUTUBE LINK
     yt_url = youtube_highlights_url(row)
     st.markdown(
         f"""

 Loads trained models + engineered dataset from companion model repo.
 """
 import urllib.parse
 import numpy as np
 import pandas as pd
 import pickle
 import streamlit as st
+import plotly.graph_objects as go
 from huggingface_hub import hf_hub_download
 # ============================================================
 CLASSIFICATION_FILE = "fpl_classification_model.pkl"
 DATASET_FILE = "df_fe.parquet"
 PL_PURPLE = "#37003C"
 PL_CYAN = "#00FF87"
 PL_LIME = "#04F5FF"
 CLASS_RANGES = {0: "0-1 pts", 1: "2-4 pts", 2: "5-9 pts", 3: "10+ pts"}
 CLASS_COLORS = {0: "#888888", 1: "#04F5FF", 2: "#00FF87", 3: "#E90052"}
 BOOLEAN_FEATURES = {"was_home", "had_haul_last_3", "has_std_history"}
 CLUSTER_FEATURES = {"cluster_0", "cluster_1", "cluster_2", "cluster_3", "cluster_4"}
             padding: 20px;
             margin-bottom: 16px;
         }}
         .stSelectbox label, .stRadio label {{
             color: {PL_CYAN} !important;
             font-size: 12px !important;
 @st.cache_resource(show_spinner="Loading models from HuggingFace...")
 def load_artifacts():
     reg_path = hf_hub_download(repo_id=MODEL_REPO, filename=REGRESSION_FILE)
     cls_path = hf_hub_download(repo_id=MODEL_REPO, filename=CLASSIFICATION_FILE)
     data_path = hf_hub_download(repo_id=MODEL_REPO, filename=DATASET_FILE, repo_type="model")
 # ============================================================
 def get_player_row(df, name, position, season, gameweek):
     mask = (
         (df["name"] == name)
         & (df["position"] == position)
 def predict_regression(row, bundle):
     model = bundle["model"]
     scaler = bundle["scaler"]
     features = bundle["feature_names"]
     X = pd.DataFrame([row[features].values], columns=features)
     X_scaled = scaler.transform(X)
     return float(model.predict(X_scaled)[0])
 def predict_classification(row, bundle):
     model = bundle["model"]
     scaler = bundle["scaler"]
     features = bundle["feature_names"]
     X = pd.DataFrame([row[features].values], columns=features)
     X_scaled = scaler.transform(X)
     pred_class = int(model.predict(X_scaled)[0])
     probs = model.predict_proba(X_scaled)[0]
     probs_dict = {i: float(p) for i, p in enumerate(probs)}
     return pred_class, probs_dict
+def feature_contributions(row, bundle, top_n_up=10, top_n_down=8):
     model = bundle["model"]
     scaler = bundle["scaler"]
     features = bundle["feature_names"]
     X = pd.DataFrame([row[features].values], columns=features)
     X_scaled = scaler.transform(X)[0]
     haul_class_idx = list(model.classes_).index(3)
     coefs = model.coef_[haul_class_idx]
     contributions = X_scaled * coefs
     df_contrib = pd.DataFrame({
 def feature_friendly_name(feature):
     mapping = {
         "minutes_lag_1": "Minutes played last gameweek",
         "points_lag_1": "Points scored last gameweek",
 def format_value_display(feature, value):
+    """Format the raw value with contextual hint where useful."""
+    # Boolean / cluster / position / season → Yes / No
     if feature in BOOLEAN_FEATURES or feature in CLUSTER_FEATURES:
+        if feature == "had_haul_last_3":
+            return "Yes (in form)" if value >= 0.5 else "No"
         return "Yes" if value >= 0.5 else "No"
     if feature.startswith("position_") or feature.startswith("season_"):
         return "Yes" if value >= 0.5 else "No"
+    # Minutes - rounded int + context
+    if feature == "minutes_lag_1":
+        v = int(round(value))
+        if v >= 75:
+            return f"{v} min (full match)"
+        elif v >= 30:
+            return f"{v} min (sub)"
+        elif v > 0:
+            return f"{v} min (cameo)"
+        else:
+            return f"{v} min (didn't play)"
+    if feature.startswith("minutes_played_rolling_"):
+        v = int(round(value))
+        if v >= 75:
+            return f"{v} min (regular starter)"
+        elif v >= 45:
+            return f"{v} min (rotation regular)"
+        elif v >= 15:
+            return f"{v} min (fringe role)"
+        else:
+            return f"{v} min (rarely featured)"
+    # Points lag
+    if feature == "points_lag_1":
+        v = int(round(value))
+        if v >= 10:
+            return f"{v} (hauled)"
+        elif v >= 5:
+            return f"{v} (good return)"
+        elif v >= 2:
+            return f"{v} (decent)"
+        else:
+            return f"{v} (blank)"
+    # Rolling points
+    if feature.startswith("points_rolling_"):
+        if value >= 6:
+            return f"{value:.1f} (excellent form)"
+        elif value >= 4:
+            return f"{value:.1f} (decent form)"
+        elif value >= 2:
+            return f"{value:.1f} (modest form)"
+        else:
+            return f"{value:.1f} (out of form)"
+    # Strengths
+    if feature == "opponent_strength":
+        if value <= 2.5:
+            return f"{value:.1f} (weak)"
+        elif value >= 4:
+            return f"{value:.1f} (strong)"
+        else:
+            return f"{value:.1f} (average)"
+    if feature == "team_strength":
+        if value <= 2.5:
+            return f"{value:.1f} (struggling)"
+        elif value >= 4:
+            return f"{value:.1f} (in form)"
+        else:
+            return f"{value:.1f} (average)"
+    # Price
+    if feature == "value":
+        if value >= 10:
+            return f"£{value:.1f}m (premium)"
+        elif value >= 7:
+            return f"£{value:.1f}m (mid-price)"
+        else:
+            return f"£{value:.1f}m (budget)"
+    # BPS
+    if feature.startswith("bps_rolling_"):
+        if value >= 25:
+            return f"{value:.1f} (high)"
+        elif value >= 15:
+            return f"{value:.1f} (decent)"
+        else:
+            return f"{value:.1f} (low)"
+    # Gameweek number - just hide context, it's not interpretable
+    if feature == "gameweek":
+        return f"GW{int(round(value))}"
+    # Volatility
+    if feature == "points_rolling_std_10":
+        if value >= 4:
+            return f"{value:.1f} (volatile)"
+        elif value >= 2:
+            return f"{value:.1f} (moderate)"
+        else:
+            return f"{value:.1f} (consistent)"
+    # Default numeric
     if isinstance(value, (int, float, np.floating, np.integer)) and not isinstance(value, bool):
         return f"{value:.1f}"
     return str(value)
 def should_skip_feature(feature, value):
+    """Hide rows that aren't informative for the user."""
+    # Cluster=No / position=No / season=No → not informative
     if feature in CLUSTER_FEATURES and value < 0.5:
         return True
     if feature.startswith("position_") and value < 0.5:
         return True
+    if feature.startswith("season_"):
+        # Season membership is true/false but not actionable - always hide
+        return True
+    # Gameweek number isn't a meaningful "factor" - hide
+    if feature == "gameweek":
         return True
     return False
 # ============================================================
+# PLAIN-ENGLISH SUMMARY
 # ============================================================
 def generate_summary(row, reg_pred, cls_pred, probs):
     parts = []
     venue = "at home" if row.get("was_home", 0) == 1 else "away"
 # ============================================================
+# CLASS PROBABILITY CHART (Plotly, on-brand)
+# ============================================================
+def plot_class_probabilities(probs, predicted_class):
+    labels = [f"{CLASS_LABELS[i]}<br><span style='font-size:10px;color:rgba(255,255,255,0.5)'>{CLASS_RANGES[i]}</span>" for i in range(4)]
+    values = [probs.get(i, 0) * 100 for i in range(4)]
+    colors = [CLASS_COLORS[i] if i != predicted_class else CLASS_COLORS[i] for i in range(4)]
+    opacities = [1.0 if i == predicted_class else 0.45 for i in range(4)]
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+        x=labels,
+        y=values,
+        marker=dict(
+            color=colors,
+            opacity=opacities,
+            line=dict(width=0),
+        ),
+        text=[f"{v:.1f}%" for v in values],
+        textposition="outside",
+        textfont=dict(color=PL_WHITE, size=14, family="Helvetica Neue"),
+        hovertemplate="<b>%{x}</b><br>Probability: %{y:.1f}%<extra></extra>",
+    ))
+    fig.update_layout(
+        plot_bgcolor="rgba(0,0,0,0)",
+        paper_bgcolor="rgba(0,0,0,0)",
+        font=dict(color=PL_WHITE, family="Helvetica Neue"),
+        height=280,
+        margin=dict(l=20, r=20, t=40, b=20),
+        yaxis=dict(
+            range=[0, max(max(values) * 1.25, 20)],
+            showgrid=True,
+            gridcolor="rgba(255,255,255,0.08)",
+            tickformat=".0f",
+            ticksuffix="%",
+            zeroline=False,
+        ),
+        xaxis=dict(
+            showgrid=False,
+            zeroline=False,
+        ),
+        showlegend=False,
+    )
+    return fig
+# ============================================================
+# YOUTUBE
 # ============================================================
 def youtube_highlights_url(row):
     home_team = row["team"] if row.get("was_home", 0) == 1 else row["opponent"]
     away_team = row["opponent"] if row.get("was_home", 0) == 1 else row["team"]
     season = row["season"]
     st.markdown("<br>", unsafe_allow_html=True)
+    # CLASS PROBABILITY BAR (Plotly)
     st.markdown("#### Class probabilities")
+    fig = plot_class_probabilities(probs, cls_pred)
+    st.plotly_chart(fig, use_container_width=True, config={"displayModeBar": False})
     # MAIN CONTENT - 2 COLS
     col_left, col_right = st.columns([1, 1])
     with col_right:
         st.markdown('<div class="pl-card">', unsafe_allow_html=True)
         st.markdown("#### Why the model is predicting this")
+        top_up, top_down = feature_contributions(row, cls_bundle, top_n_up=10, top_n_down=8)
         st.markdown(f"<div style='color: {PL_CYAN}; font-weight: 700; font-size: 12px; letter-spacing: 0.05em; margin-bottom: 8px;'>PUSHING TOWARD HAUL</div>", unsafe_allow_html=True)
+        shown_up = 0
         for _, r in top_up.iterrows():
             if should_skip_feature(r["feature"], r["raw_value"]):
                 continue
+            if shown_up >= 6:
+                break
             label = feature_friendly_name(r["feature"])
             val_str = format_value_display(r["feature"], r["raw_value"])
             st.markdown(
                 f"</div>",
                 unsafe_allow_html=True,
             )
+            shown_up += 1
         st.markdown(f"<div style='color: {PL_PINK}; font-weight: 700; font-size: 12px; letter-spacing: 0.05em; margin-top: 16px; margin-bottom: 8px;'>PUSHING AWAY FROM HAUL</div>", unsafe_allow_html=True)
+        shown_down = 0
         for _, r in top_down.iterrows():
             if should_skip_feature(r["feature"], r["raw_value"]):
                 continue
+            if shown_down >= 5:
+                break
             label = feature_friendly_name(r["feature"])
             val_str = format_value_display(r["feature"], r["raw_value"])
             st.markdown(
                 f"</div>",
                 unsafe_allow_html=True,
             )
+            shown_down += 1
         st.markdown("</div>", unsafe_allow_html=True)
+    # YOUTUBE
     yt_url = youtube_highlights_url(row)
     st.markdown(
         f"""