"""
╔══════════════════════════════════════════════════════════════════════════════╗
║ CopaVision AI | Match Outcome Predictor | Phase 1 ║
║ Streamlit Dashboard — Production Ready ║
╚══════════════════════════════════════════════════════════════════════════════╝
Run:
streamlit run app.py
Dependencies:
pip install streamlit pandas numpy scikit-learn plotly joblib
File structure expected:
app.py ← this file
models/
copavision_rf.pkl
copavision_lr.pkl
team_stats.json ← pre-computed Elo + form stats per team
"""
# ─────────────────────────────────────────────────────────────────────────────
# IMPORTS
# ─────────────────────────────────────────────────────────────────────────────
import json
import time
from pathlib import Path
import joblib
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import streamlit as st
# ─────────────────────────────────────────────────────────────────────────────
# PAGE CONFIG — must be first Streamlit call
# ─────────────────────────────────────────────────────────────────────────────
st.set_page_config(
page_title="CopaVision AI",
page_icon="⚽",
layout="wide",
initial_sidebar_state="expanded",
)
# ─────────────────────────────────────────────────────────────────────────────
# CUSTOM CSS — dark football analytics theme
# ─────────────────────────────────────────────────────────────────────────────
CUSTOM_CSS = """
"""
st.markdown(CUSTOM_CSS, unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# CONSTANTS & CONFIG
# ─────────────────────────────────────────────────────────────────────────────
FEATURE_COLS = [
"home_recent_points", "away_recent_points",
"home_avg_goals_scored", "away_avg_goals_scored",
"home_avg_goals_conceded", "away_avg_goals_conceded",
"home_rolling_gd", "away_rolling_gd",
"elo_diff", "home_elo", "away_elo",
"neutral_venue", "tournament_importance",
]
TOURNAMENT_IMPORTANCE_MAP = {
"FIFA World Cup": 5,
"UEFA Euro (EURO)": 5,
"Copa America": 5,
"AFC Asian Cup": 4,
"African Cup of Nations": 4,
"Gold Cup (CONCACAF)": 4,
"FIFA World Cup Qualification": 3,
"UEFA Euro Qualification": 3,
"UEFA Nations League": 3,
"CONMEBOL Qualifying": 3,
"CAF Qualification": 2,
"Regional Tournament": 2,
"International Friendly": 1,
}
# Plotly dark theme base
PLOTLY_TEMPLATE = "plotly_dark"
COLORS = {
"home": "#00ff9d",
"away": "#ff6b35",
"draw": "#00d4ff",
"bg": "#0e1621",
"panel": "#121d2e",
"border":"#1e3050",
"muted": "#5a7a9a",
"text": "#e8f1ff",
}
# ─────────────────────────────────────────────────────────────────────────────
# DATA & MODEL LOADING
# ─────────────────────────────────────────────────────────────────────────────
BASE_DIR = Path(__file__).parent
@st.cache_resource(show_spinner=False)
def load_models():
"""Load trained ML models. Cached so they only load once per session."""
models = {}
model_dir = BASE_DIR / "models"
for name, fname in [("Random Forest", "copavision_rf.pkl"),
("Logistic Regression", "copavision_lr.pkl")]:
path = model_dir / fname
if path.exists():
models[name] = joblib.load(path)
else:
st.error(f"Model not found: {path}")
return models
@st.cache_data(show_spinner=False)
def load_team_stats() -> dict:
"""Load pre-computed Elo + form stats per team."""
path = BASE_DIR / "team_stats.json"
if not path.exists():
st.error("team_stats.json not found. Run the Phase 1 pipeline first.")
return {}
with open(path) as f:
return json.load(f)
# ─────────────────────────────────────────────────────────────────────────────
# FEATURE GENERATION
# ─────────────────────────────────────────────────────────────────────────────
def get_team_features(team: str, stats: dict) -> dict:
"""Return a team's feature dict, with safe defaults for unknown teams."""
if team in stats:
s = stats[team]
return {
"elo": s["elo"],
"recent_points": s["recent_points"],
"avg_scored": s["avg_scored"],
"avg_conceded": s["avg_conceded"],
"rolling_gd": s["rolling_gd"],
"matches_played": s.get("matches_played", 0),
}
# Fallback for teams with no history
return {
"elo": 1500.0, "recent_points": 1.0,
"avg_scored": 1.0, "avg_conceded": 1.0,
"rolling_gd": 0.0, "matches_played": 0,
}
def build_feature_vector(home_team: str, away_team: str,
neutral: bool, tournament: str,
stats: dict) -> np.ndarray:
"""
Construct the 13-feature vector that matches the training pipeline exactly.
Feature order MUST match FEATURE_COLS — any reordering breaks the model.
"""
h = get_team_features(home_team, stats)
a = get_team_features(away_team, stats)
imp = TOURNAMENT_IMPORTANCE_MAP.get(tournament, 2)
vector = [
h["recent_points"], # home_recent_points
a["recent_points"], # away_recent_points
h["avg_scored"], # home_avg_goals_scored
a["avg_scored"], # away_avg_goals_scored
h["avg_conceded"], # home_avg_goals_conceded
a["avg_conceded"], # away_avg_goals_conceded
h["rolling_gd"], # home_rolling_gd
a["rolling_gd"], # away_rolling_gd
h["elo"] - a["elo"], # elo_diff
h["elo"], # home_elo
a["elo"], # away_elo
int(neutral), # neutral_venue
imp, # tournament_importance
]
return np.array(vector, dtype=float).reshape(1, -1)
# ─────────────────────────────────────────────────────────────────────────────
# PLOTLY CHART HELPERS
# ─────────────────────────────────────────────────────────────────────────────
def make_probability_donut(home_prob: float, draw_prob: float,
away_prob: float, home_team: str,
away_team: str) -> go.Figure:
"""Donut chart showing Home / Draw / Away probability breakdown."""
fig = go.Figure(go.Pie(
labels=[f"{home_team} Win", "Draw", f"{away_team} Win"],
values=[home_prob, draw_prob, away_prob],
hole=0.62,
marker_colors=[COLORS["home"], COLORS["draw"], COLORS["away"]],
textinfo="label+percent",
textfont=dict(family="Barlow Condensed", size=14, color=COLORS["text"]),
hovertemplate="%{label}
Probability: %{percent}",
direction="clockwise",
sort=False,
))
fig.update_layout(
template=PLOTLY_TEMPLATE,
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
margin=dict(t=20, b=20, l=20, r=20),
height=300,
showlegend=False,
annotations=[dict(
text=f"{max(home_prob, draw_prob, away_prob)*100:.0f}%",
x=0.5, y=0.5, font=dict(size=30, family="Barlow Condensed",
color=COLORS["text"]),
showarrow=False,
)],
)
return fig
def make_probability_bars(home_prob: float, draw_prob: float,
away_prob: float, home_team: str,
away_team: str) -> go.Figure:
"""Horizontal probability bar chart."""
labels = [f"{home_team} Win", "Draw", f"{away_team} Win"]
values = [home_prob * 100, draw_prob * 100, away_prob * 100]
bar_colors = [COLORS["home"], COLORS["draw"], COLORS["away"]]
fig = go.Figure(go.Bar(
x=values,
y=labels,
orientation="h",
marker=dict(
color=bar_colors,
line=dict(color="rgba(0,0,0,0)", width=0),
),
text=[f"{v:.1f}%" for v in values],
textposition="outside",
textfont=dict(family="Barlow Condensed", size=16, color=COLORS["text"]),
hovertemplate="%{y}
%{x:.1f}%",
))
fig.update_layout(
template=PLOTLY_TEMPLATE,
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
xaxis=dict(range=[0, 105], showgrid=False, visible=False),
yaxis=dict(showgrid=False, tickfont=dict(family="Barlow Condensed",
size=14, color=COLORS["text"])),
margin=dict(t=10, b=10, l=10, r=70),
height=200,
bargap=0.35,
)
return fig
def make_team_radar(h_feats: dict, a_feats: dict,
home_team: str, away_team: str) -> go.Figure:
"""Radar chart comparing two teams across key metrics (normalised 0–1)."""
categories = ["Elo Rating", "Recent Form", "Attack", "Defence\n(inverted)", "Goal Diff"]
def norm(val, lo, hi):
return max(0.0, min(1.0, (val - lo) / (hi - lo + 1e-9)))
h_vals = [
norm(h_feats["elo"], 1200, 2200),
norm(h_feats["recent_points"], 0, 3),
norm(h_feats["avg_scored"], 0, 4),
norm(1 / (h_feats["avg_conceded"] + 0.5), 0, 2), # lower conceded → better
norm(h_feats["rolling_gd"], -3, 3),
]
a_vals = [
norm(a_feats["elo"], 1200, 2200),
norm(a_feats["recent_points"], 0, 3),
norm(a_feats["avg_scored"], 0, 4),
norm(1 / (a_feats["avg_conceded"] + 0.5), 0, 2),
norm(a_feats["rolling_gd"], -3, 3),
]
fig = go.Figure()
for name, vals, color in [
(home_team, h_vals, COLORS["home"]),
(away_team, a_vals, COLORS["away"]),
]:
fig.add_trace(go.Scatterpolar(
r=vals + [vals[0]],
theta=categories + [categories[0]],
fill="toself",
fillcolor=color.replace(")", ", 0.15)").replace("rgb", "rgba") if color.startswith("rgb") else "rgba(0,255,157,0.15)" if color == COLORS["home"] else "rgba(255,107,53,0.15)",
line=dict(color=color, width=2),
name=name,
hovertemplate=f"{name}
%{{theta}}: %{{r:.2f}}",
))
fig.update_layout(
template=PLOTLY_TEMPLATE,
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
polar=dict(
bgcolor="rgba(14,22,33,0.6)",
radialaxis=dict(visible=True, range=[0, 1],
showticklabels=False, gridcolor=COLORS["border"]),
angularaxis=dict(
tickfont=dict(family="Barlow Condensed", size=12, color=COLORS["text"]),
gridcolor=COLORS["border"],
),
),
legend=dict(
font=dict(family="Barlow Condensed", color=COLORS["text"], size=13),
bgcolor="rgba(0,0,0,0)",
),
height=380,
margin=dict(t=30, b=30, l=30, r=30),
)
return fig
def make_feature_importance_chart(model) -> go.Figure:
"""Horizontal bar chart for RF feature importances."""
if not hasattr(model, "feature_importances_"):
return None
importances = model.feature_importances_
idx = np.argsort(importances)
labels = [FEATURE_COLS[i].replace("_", " ").title() for i in idx]
vals = importances[idx]
# Colour by feature group
palette = []
for i in idx:
col = FEATURE_COLS[i]
if "elo" in col: palette.append(COLORS["draw"])
elif "points" in col: palette.append(COLORS["home"])
elif "goal" in col or "gd" in col: palette.append("#d2a8ff")
else: palette.append(COLORS["muted"])
fig = go.Figure(go.Bar(
x=vals,
y=labels,
orientation="h",
marker=dict(color=palette),
text=[f"{v:.3f}" for v in vals],
textposition="outside",
textfont=dict(size=11, color=COLORS["text"], family="DM Sans"),
hovertemplate="%{y}
Importance: %{x:.4f}",
))
fig.update_layout(
template=PLOTLY_TEMPLATE,
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
xaxis=dict(showgrid=False, visible=False, range=[0, vals.max() * 1.2]),
yaxis=dict(tickfont=dict(family="DM Sans", size=11, color=COLORS["text"])),
margin=dict(t=10, b=10, l=10, r=70),
height=420,
)
return fig
def make_elo_gauge(elo: float, team: str, color: str) -> go.Figure:
"""Gauge chart displaying Elo rating."""
fig = go.Figure(go.Indicator(
mode="gauge+number",
value=elo,
number=dict(font=dict(family="Barlow Condensed", size=36, color=COLORS["text"])),
gauge=dict(
axis=dict(range=[1000, 2300], tickwidth=1,
tickcolor=COLORS["muted"],
tickfont=dict(color=COLORS["muted"], size=10)),
bar=dict(color=color, thickness=0.3),
bgcolor=COLORS["bg"],
borderwidth=0,
steps=[
dict(range=[1000, 1400], color="rgba(90,122,154,0.1)"),
dict(range=[1400, 1700], color="rgba(90,122,154,0.15)"),
dict(range=[1700, 2300], color="rgba(90,122,154,0.2)"),
],
threshold=dict(line=dict(color=color, width=3), thickness=0.7, value=elo),
),
title=dict(text=team, font=dict(family="Barlow Condensed",
size=16, color=COLORS["muted"])),
domain=dict(x=[0, 1], y=[0, 1]),
))
fig.update_layout(
paper_bgcolor="rgba(0,0,0,0)",
height=220,
margin=dict(t=40, b=10, l=30, r=30),
)
return fig
# ─────────────────────────────────────────────────────────────────────────────
# RESULT FORMATTING HELPERS
# ─────────────────────────────────────────────────────────────────────────────
def get_result_badge(prediction: int, home_team: str, away_team: str) -> str:
if prediction == 0:
return f'⚽ {home_team} Win'
elif prediction == 1:
return f'⚽ {away_team} Win'
else:
return f'🤝 Draw'
def confidence_label(prob: float) -> str:
if prob >= 0.70: return "🔥 High Confidence"
if prob >= 0.50: return "📊 Moderate Confidence"
if prob >= 0.35: return "⚖️ Low Confidence"
return "🎲 Uncertain"
def form_bar(recent_points: float) -> str:
"""Generate a simple emoji form representation."""
# recent_points is avg of last 5 games; 3=W, 1=D, 0=L
filled = int(round(recent_points / 3 * 5))
return "🟢" * filled + "⚫" * (5 - filled)
# ─────────────────────────────────────────────────────────────────────────────
# SIDEBAR
# ─────────────────────────────────────────────────────────────────────────────
def render_sidebar(team_stats: dict, models: dict):
with st.sidebar:
# Logo / brand
st.markdown("""
COPAVISION
A I · P H A S E 1
""", unsafe_allow_html=True)
# Navigation
st.markdown('NAVIGATION
',
unsafe_allow_html=True)
page = st.radio(
label="page",
options=["⚽ Match Predictor", "📊 Team Explorer",
"🧠 Model Insights", "ℹ️ About"],
label_visibility="collapsed",
)
st.markdown("
", unsafe_allow_html=True)
# Model selector
st.markdown('ML MODEL
',
unsafe_allow_html=True)
model_choice = st.selectbox(
"Select Model",
options=list(models.keys()),
label_visibility="collapsed",
)
st.markdown("
", unsafe_allow_html=True)
st.markdown("""
Select a page and model to start exploring the prediction dashboard.
""", unsafe_allow_html=True)
return page, model_choice
# ─────────────────────────────────────────────────────────────────────────────
# PAGE 1 — MATCH PREDICTOR
# ─────────────────────────────────────────────────────────────────────────────
def page_match_predictor(models: dict, team_stats: dict, model_choice: str):
# ── Header ────────────────────────────────────────────────────────────────
st.markdown("""
⚽ Match Outcome Predictor
Select two international teams and get AI-powered match predictions powered by Elo ratings and ML
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
# ── Input Section ─────────────────────────────────────────────────────────
all_teams = sorted(team_stats.keys())
popular = ["Brazil", "Argentina", "France", "Germany", "Spain",
"England", "Portugal", "Italy", "Netherlands", "Belgium",
"Croatia", "Uruguay", "Mexico", "USA", "Japan",
"South Korea", "Senegal", "Morocco", "Australia", "Poland"]
# Put popular teams first in the dropdown
ordered_teams = [t for t in popular if t in all_teams] + \
[t for t in all_teams if t not in popular]
col_l, col_mid, col_r = st.columns([5, 1, 5])
with col_l:
st.markdown('HOME TEAM
', unsafe_allow_html=True)
home_team = st.selectbox("Home Team", ordered_teams,
index=ordered_teams.index("Brazil"),
label_visibility="collapsed", key="home")
with col_mid:
st.markdown("
", unsafe_allow_html=True)
st.markdown('VS
',
unsafe_allow_html=True)
with col_r:
st.markdown('AWAY TEAM
', unsafe_allow_html=True)
away_default = ordered_teams.index("Argentina") if "Argentina" in ordered_teams else 1
away_team = st.selectbox("Away Team", ordered_teams,
index=away_default,
label_visibility="collapsed", key="away")
# Same-team guard
if home_team == away_team:
st.warning("⚠️ Home and Away teams must be different. Please select two distinct teams.")
return
# Tournament & venue row
col_t, col_n = st.columns([3, 1])
with col_t:
st.markdown('TOURNAMENT
',
unsafe_allow_html=True)
tournament = st.selectbox(
"Tournament",
list(TOURNAMENT_IMPORTANCE_MAP.keys()),
index=0,
label_visibility="collapsed",
)
with col_n:
st.markdown('NEUTRAL VENUE
',
unsafe_allow_html=True)
neutral = st.toggle("Neutral Ground", value=False)
st.markdown("
", unsafe_allow_html=True)
# ── Predict Button ─────────────────────────────────────────────────────────
col_btn, _ = st.columns([2, 5])
with col_btn:
predict_clicked = st.button("🔮 Generate Prediction", width="stretch")
st.markdown('
', unsafe_allow_html=True)
# ── Prediction Output ─────────────────────────────────────────────────────
if predict_clicked:
model = models[model_choice]
h_feats = get_team_features(home_team, team_stats)
a_feats = get_team_features(away_team, team_stats)
with st.spinner("⚙️ Running prediction engine…"):
time.sleep(0.6) # Small delay for UX polish
X = build_feature_vector(home_team, away_team, neutral, tournament, team_stats)
probs = model.predict_proba(X)[0]
prediction = int(np.argmax(probs))
home_prob, away_prob, draw_prob = probs[0], probs[1], probs[2]
max_prob = max(home_prob, draw_prob, away_prob)
# ── Result headline ────────────────────────────────────────────────────
badge = get_result_badge(prediction, home_team, away_team)
conf = confidence_label(max_prob)
venue_label = "🌐 Neutral Ground" if neutral else "🏟️ Home Advantage"
imp = TOURNAMENT_IMPORTANCE_MAP.get(tournament, 2)
st.markdown(f"""
PREDICTED OUTCOME
{badge}
{conf} ·
{max_prob*100:.1f}% confidence
{venue_label} · Tournament Weight: {'⭐' * imp}
""", unsafe_allow_html=True)
# ── Probability breakdown ──────────────────────────────────────────────
st.markdown("
", unsafe_allow_html=True)
tab_prob, tab_radar, tab_insights = st.tabs(
["📊 Probabilities", "🕸️ Team Radar", "🔍 Match Insights"]
)
with tab_prob:
c1, c2 = st.columns([1, 1])
with c1:
st.plotly_chart(
make_probability_donut(home_prob, draw_prob, away_prob,
home_team, away_team),
width="stretch", config={"displayModeBar": False},
)
with c2:
st.markdown("
", unsafe_allow_html=True)
st.plotly_chart(
make_probability_bars(home_prob, draw_prob, away_prob,
home_team, away_team),
width="stretch", config={"displayModeBar": False},
)
# Metric cards row
m1, m2, m3 = st.columns(3)
m1.metric(f"⚽ {home_team} Win", f"{home_prob*100:.1f}%",
delta=f"{'+' if home_prob > 0.33 else ''}{(home_prob-0.33)*100:.1f}% vs base")
m2.metric("🤝 Draw", f"{draw_prob*100:.1f}%",
delta=f"{(draw_prob-0.33)*100:.1f}% vs base")
m3.metric(f"⚽ {away_team} Win", f"{away_prob*100:.1f}%",
delta=f"{'+' if away_prob > 0.33 else ''}{(away_prob-0.33)*100:.1f}% vs base")
with tab_radar:
st.plotly_chart(
make_team_radar(h_feats, a_feats, home_team, away_team),
width="stretch", config={"displayModeBar": False},
)
# Elo gauges
g1, g2 = st.columns(2)
with g1:
st.plotly_chart(make_elo_gauge(h_feats["elo"], home_team, COLORS["home"]),
width="stretch", config={"displayModeBar": False})
with g2:
st.plotly_chart(make_elo_gauge(a_feats["elo"], away_team, COLORS["away"]),
width="stretch", config={"displayModeBar": False})
with tab_insights:
_render_match_insights(home_team, away_team, h_feats, a_feats,
neutral, tournament, home_prob, away_prob, draw_prob)
else:
# Pre-prediction state: show team quick-stats if both selected
if home_team and away_team and home_team != away_team:
_render_team_preview(home_team, away_team, team_stats)
def _render_team_preview(home_team: str, away_team: str, team_stats: dict):
"""Show a lightweight preview card for both teams before prediction."""
h = get_team_features(home_team, team_stats)
a = get_team_features(away_team, team_stats)
st.markdown(''
'Team Overview — click Generate Prediction to run the model
',
unsafe_allow_html=True)
c1, c2 = st.columns(2)
for col, team, feats, color in [
(c1, home_team, h, COLORS["home"]),
(c2, away_team, a, COLORS["away"]),
]:
with col:
st.markdown(f"""
{team}
{feats['matches_played']} international matches on record
| 🏆 Elo Rating |
{feats['elo']:.0f} |
| 📈 Recent Form (avg pts) |
{feats['recent_points']:.2f} / 3.00 |
| ⚽ Avg Goals Scored |
{feats['avg_scored']:.2f} |
| 🛡️ Avg Goals Conceded |
{feats['avg_conceded']:.2f} |
| 📊 Rolling Goal Diff |
{feats['rolling_gd']:+.2f} |
| ⚡ Form Strip (last 5) |
{form_bar(feats['recent_points'])} |
""", unsafe_allow_html=True)
def _render_match_insights(home_team, away_team, h, a,
neutral, tournament,
home_prob, away_prob, draw_prob):
"""Qualitative narrative insights about the match-up."""
elo_diff = h["elo"] - a["elo"]
imp = TOURNAMENT_IMPORTANCE_MAP.get(tournament, 2)
# Strength insight
if abs(elo_diff) > 200:
strength = (f"{'**' + home_team + '**'} holds a commanding Elo advantage "
f"of **{abs(elo_diff):.0f}** points — a significant gap "
f"that historically translates to a win probability above 65%.")
elif abs(elo_diff) > 80:
stronger = home_team if elo_diff > 0 else away_team
strength = (f"**{stronger}** edges the Elo comparison by "
f"**{abs(elo_diff):.0f}** points — a moderate advantage "
f"that typically gives them a 10–15% edge.")
else:
strength = (f"The teams are **evenly matched** on Elo "
f"(difference: {abs(elo_diff):.0f} pts). This fixture is a genuine "
f"coin-toss — expect a competitive, tight contest.")
# Form insight
form_diff = h["recent_points"] - a["recent_points"]
if abs(form_diff) > 0.8:
hotter = home_team if form_diff > 0 else away_team
form_msg = f"**{hotter}** is in significantly better recent form."
elif abs(form_diff) > 0.3:
hotter = home_team if form_diff > 0 else away_team
form_msg = f"**{hotter}** has a slight recent form edge."
else:
form_msg = "Both teams are in comparable recent form."
# Venue insight
venue_msg = ("On a **neutral ground**, home advantage is eliminated — "
"this levels the playing field slightly for the away side."
if neutral else
f"**{home_team}** benefits from home advantage, "
f"historically worth ~0.5 goals and a 5–8% boost in win probability.")
# Tournament context
imp_text = {1: "low-stakes friendly", 2: "regional tournament",
3: "competitive qualifier", 4: "major continental cup",
5: "elite world-stage competition"}
tourn_msg = (f"This is a **{imp_text.get(imp, 'competitive')}** "
f"(importance tier {imp}/5). Higher-stakes matches "
f"tend to be more conservative — draw probability often "
f"rises slightly as teams protect leads.")
# Uncertainty
entropy = -sum(p * np.log(p + 1e-9) for p in [home_prob, away_prob, draw_prob])
max_entropy = np.log(3)
uncertainty_pct = (entropy / max_entropy) * 100
st.markdown(f"""
⚡ STRENGTH ANALYSIS
{strength}
🏟️ VENUE CONTEXT
{venue_msg}
🏆 TOURNAMENT CONTEXT
{tourn_msg}
🎲 MODEL UNCERTAINTY
Prediction entropy is {uncertainty_pct:.0f}% of maximum uncertainty.
{('The model is confident in this prediction.' if uncertainty_pct < 50
else 'The model sees this as a genuinely open contest — treat probabilities as a guide, not a guarantee.')}
""", unsafe_allow_html=True)
# ─────────────────────────────────────────────────────────────────────────────
# PAGE 2 — TEAM EXPLORER
# ─────────────────────────────────────────────────────────────────────────────
def page_team_explorer(team_stats: dict):
st.markdown("""
📊 Team Explorer
Browse Elo ratings, form, and stats for all 298 international teams
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
# Build dataframe
rows = []
for team, s in team_stats.items():
rows.append({
"Team": team,
"Elo Rating": s["elo"],
"Recent Form (avg pts)": s["recent_points"],
"Goals Scored": s["avg_scored"],
"Goals Conceded": s["avg_conceded"],
"Goal Difference": s["rolling_gd"],
"Matches Played": s.get("matches_played", 0),
})
df = pd.DataFrame(rows).sort_values("Elo Rating", ascending=False).reset_index(drop=True)
df.index += 1 # 1-indexed ranking
# Top 15 Elo chart
top15 = df.head(15)
fig_top = go.Figure(go.Bar(
x=top15["Team"],
y=top15["Elo Rating"],
marker=dict(
color=top15["Elo Rating"],
colorscale=[[0, "#1e3050"], [0.5, "#00d4ff"], [1.0, "#00ff9d"]],
showscale=False,
),
text=top15["Elo Rating"].round(0).astype(int),
textposition="outside",
textfont=dict(family="Barlow Condensed", size=12, color=COLORS["text"]),
hovertemplate="%{x}
Elo: %{y:.0f}",
))
fig_top.update_layout(
template=PLOTLY_TEMPLATE, paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)", height=380,
xaxis=dict(tickfont=dict(family="Barlow Condensed", size=12, color=COLORS["text"]),
gridcolor="rgba(0,0,0,0)"),
yaxis=dict(range=[df["Elo Rating"].min() * 0.95, df["Elo Rating"].max() * 1.05],
gridcolor=COLORS["border"],
tickfont=dict(color=COLORS["muted"])),
margin=dict(t=20, b=20, l=20, r=20),
title=dict(text="Top 15 Teams by Elo Rating", font=dict(
family="Barlow Condensed", size=16, color=COLORS["text"])),
)
st.plotly_chart(fig_top, width="stretch", config={"displayModeBar": False})
# Scatter: Elo vs Goals Scored
with st.expander("🔍 Elo vs. Attack Strength (scatter)", expanded=False):
fig_scatter = px.scatter(
df, x="Elo Rating", y="Goals Scored",
hover_name="Team",
size="Matches Played", size_max=18,
color="Goal Difference",
color_continuous_scale=[[0,"#ff4560"],[0.5,"#5a7a9a"],[1,"#00ff9d"]],
template=PLOTLY_TEMPLATE,
labels={"Goals Scored": "Avg Goals Scored (last 5)", "Elo Rating": "Elo Rating"},
)
fig_scatter.update_layout(
paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
height=420, margin=dict(t=20, b=20),
coloraxis_colorbar=dict(tickfont=dict(color=COLORS["muted"])),
)
st.plotly_chart(fig_scatter, width="stretch", config={"displayModeBar": False})
# Filterable table
st.markdown("
", unsafe_allow_html=True)
search = st.text_input("🔎 Search team", placeholder="e.g. Brazil, Germany, Japan…")
filtered = df[df["Team"].str.contains(search, case=False)] if search else df
st.dataframe(
filtered.style.background_gradient(
subset=["Elo Rating"], cmap="Blues"
).format({
"Elo Rating": "{:.0f}",
"Recent Form (avg pts)": "{:.2f}",
"Goals Scored": "{:.2f}",
"Goals Conceded": "{:.2f}",
"Goal Difference": "{:+.2f}",
}),
width="stretch",
height=420,
)
# ─────────────────────────────────────────────────────────────────────────────
# PAGE 3 — MODEL INSIGHTS
# ─────────────────────────────────────────────────────────────────────────────
def page_model_insights(models: dict, model_choice: str):
st.markdown("""
🧠 Model Insights
Feature importance, model architecture, and performance metrics
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
model = models[model_choice]
# Performance cards
st.markdown('Model Performance on 2017–2020 Test Set
',
unsafe_allow_html=True)
perf_data = {
"Random Forest": {"accuracy": 54.5, "f1": 0.510, "params": "300 trees, depth 8"},
"Logistic Regression": {"accuracy": 57.7, "f1": 0.433, "params": "C=0.1, L2, lbfgs"},
}
m1, m2, m3, m4 = st.columns(4)
d = perf_data.get(model_choice, {"accuracy": 0, "f1": 0, "params": "N/A"})
m1.metric("Accuracy", f"{d['accuracy']:.1f}%")
m2.metric("Macro F1", f"{d['f1']:.3f}")
m3.metric("Train Size", "16,113 matches")
m4.metric("Test Size", "3,025 matches")
st.markdown("
", unsafe_allow_html=True)
# Feature importance (RF only)
if hasattr(model, "feature_importances_"):
st.markdown('Feature Importance — Random Forest (Gini)
',
unsafe_allow_html=True)
fig = make_feature_importance_chart(model)
if fig:
st.plotly_chart(fig, width="stretch", config={"displayModeBar": False})
# Feature legend
with st.expander("📖 Why These Features?"):
st.markdown("""
| Feature | Why It Matters |
|---|---|
| **elo_diff / home_elo / away_elo** | Elo is the gold standard for continuous team strength. It adjusts for opponent quality and recency. The single most predictive signal. |
| **home/away_recent_points** | Recent form (last 5 matches, pts basis) captures momentum and current squad fitness. |
| **home/away_avg_goals_scored** | Offensive firepower. Teams that score more tend to win more — obvious but quantifiably significant. |
| **home/away_avg_goals_conceded** | Defensive solidity. Conceding fewer goals is as important as scoring them. |
| **home/away_rolling_gd** | Net goal difference over 5 games — combined attacking + defensive signal. |
| **neutral_venue** | Eliminates home advantage, which is worth roughly 0.3–0.5 Elo points of expected improvement. |
| **tournament_importance** | World Cup games are played differently from friendlies. Stakes affect tactics and effort. |
""")
else:
# Logistic Regression — show coefficients
st.markdown('Logistic Regression Coefficients
',
unsafe_allow_html=True)
coef_df = pd.DataFrame(
model.coef_,
columns=FEATURE_COLS,
index=["Home Win", "Away Win", "Draw"],
).T
fig_coef = go.Figure()
for cls, color in [("Home Win", COLORS["home"]),
("Away Win", COLORS["away"]),
("Draw", COLORS["draw"])]:
fig_coef.add_trace(go.Bar(
name=cls, x=coef_df.index, y=coef_df[cls], marker_color=color,
hovertemplate=f"%{{x}}
{cls}: %{{y:.3f}}",
))
fig_coef.update_layout(
template=PLOTLY_TEMPLATE, barmode="group",
paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)",
height=400,
xaxis=dict(tickangle=-35, tickfont=dict(size=10, color=COLORS["text"])),
yaxis=dict(gridcolor=COLORS["border"]),
legend=dict(font=dict(color=COLORS["text"])),
margin=dict(t=20, b=80),
)
st.plotly_chart(fig_coef, width="stretch", config={"displayModeBar": False})
# Architecture explainer
with st.expander("🏗️ Model Architecture & Training Details"):
st.markdown(f"""
**Model selected:** `{model_choice}`
**Training period:** 2000–2016 (16,113 matches)
**Test period:** 2017–2020 (3,025 matches)
**Split strategy:** Chronological — never random (prevents data leakage)
**Why time-based split?**
A random split would let the model see 2019 matches during training and then predict
2010 matches — a form of temporal leakage. In sports analytics, we always train on the
past and test on the future, exactly mirroring deployment conditions.
**No-leakage guarantee:**
Every feature is computed using only matches *before* the current match date.
The Elo engine processes matches sequentially, updating ratings only after
feature extraction. This is the most common mistake in sports ML pipelines.
**Class balance:**
Home Win ~48% | Away Win ~28% | Draw ~24%.
Random Forest uses `class_weight='balanced'` to compensate for the draw minority.
""")
# ─────────────────────────────────────────────────────────────────────────────
# PAGE 4 — ABOUT
# ─────────────────────────────────────────────────────────────────────────────
def page_about():
st.markdown("""
ℹ️ About CopaVision AI
Phase 1 — International Football Match Predictor
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
c1, c2 = st.columns([3, 2])
with c1:
st.markdown("""
PROJECT OVERVIEW
CopaVision AI is a football analytics platform that uses machine learning
to predict international match outcomes. Phase 1 covers the core prediction
engine trained on 41,500+ historical matches from 1872–2020.
The model combines Elo ratings,
rolling form metrics, and
tournament context
to generate probabilistic win/draw/loss predictions for any pair of
international teams.
""", unsafe_allow_html=True)
st.markdown("""
HOW TO RUN
# Install dependencies
pip install streamlit pandas numpy \\
scikit-learn plotly joblib
# Launch app
streamlit run app.py
""", unsafe_allow_html=True)
st.markdown("""
DEPLOY ON STREAMLIT CLOUD
- Push your project to a public GitHub repo
- Go to share.streamlit.io and sign in
- Click New App → select your repo and branch
- Set Main file path to
app.py
- Add a
requirements.txt (see right column)
- Click Deploy — live in ~2 minutes 🚀
""", unsafe_allow_html=True)
with c2:
st.markdown("""
FILE STRUCTURE
copavision-ai/
│
├── app.py ← this file
├── requirements.txt
├── team_stats.json ← Elo + form data
│
├── models/
│ ├── copavision_rf.pkl
│ └── copavision_lr.pkl
│
├── data/
│ └── results.csv ← raw dataset
│
└── utils/ ← (Phase 2+)
├── features.py
├── elo.py
└── viz.py
""", unsafe_allow_html=True)
st.markdown("""
REQUIREMENTS.TXT
streamlit>=1.32
pandas>=2.0
numpy>=1.26
scikit-learn>=1.4
plotly>=5.20
joblib>=1.3
""", unsafe_allow_html=True)
st.markdown("""
PHASE ROADMAP
✓ Phase 1 Match Outcome Predictor
→ Phase 2 Player Dashboard
Add a pages/02_players.py file with
FIFA/Transfermarkt player stats
○ Phase 3 Sentiment Tracker
Twitter/Reddit API + VADER/BERT
sentiment pre-match signals
○ Phase 4 Live Match Feed
Rapid API football → real-time
score updates & live Elo shifts
""", unsafe_allow_html=True)
# Accuracy expectations
st.markdown("
", unsafe_allow_html=True)
with st.expander("📉 Realistic Accuracy Expectations & Limitations"):
st.markdown("""
| Benchmark | Accuracy |
|---|---|
| Random guessing (3 classes) | 33.3% |
| Always predict Home Win | ~48% |
| **CopaVision AI Phase 1** | **54–58%** |
| State-of-the-art (squad data + deep learning) | 60–65% |
| Human football experts | ~60% |
**Why football is hard to predict:**
- Individual brilliance and errors are genuinely stochastic
- Injuries and suspensions aren't in historical data
- Tactics adapt match-to-match (chess, not poker)
- Home crowd effects vary enormously by stadium
- VAR and referee decisions introduce randomness
**Phase 1 limitations:**
- No player-level data (squad strength, injury list)
- No head-to-head historical records
- Friendly matches dilute the training signal
- Model trained up to 2020 — club-level Elo not factored in
""")
# ─────────────────────────────────────────────────────────────────────────────
# MAIN APP
# ─────────────────────────────────────────────────────────────────────────────
def main():
# Load data
with st.spinner("Loading CopaVision AI…"):
models = load_models()
team_stats = load_team_stats()
if not models or not team_stats:
st.error("Critical files missing. Ensure models/ and team_stats.json exist.")
st.stop()
# Sidebar (returns page & model choice)
page, model_choice = render_sidebar(team_stats, models)
# Page routing
page_key = page.split(" ")[-1].strip()
if page_key == "Match Predictor":
page_match_predictor(models, team_stats, model_choice)
elif page_key == "Team Explorer":
page_team_explorer(team_stats)
elif page_key == "Model Insights":
page_model_insights(models, model_choice)
elif page_key == "About":
page_about()
if __name__ == "__main__":
main()