Update src/streamlit_app.py
Browse files- src/streamlit_app.py +134 -29
src/streamlit_app.py
CHANGED
|
@@ -6,6 +6,8 @@ import os
|
|
| 6 |
from datetime import datetime
|
| 7 |
from bs4 import BeautifulSoup
|
| 8 |
import re # New import for regex operations
|
|
|
|
|
|
|
| 9 |
|
| 10 |
# --- IMPORTANT: Addressing PermissionError in Containerized Environments ---
|
| 11 |
# The error "PermissionError: [Errno 13] Permission denied: '/.streamlit'"
|
|
@@ -19,7 +21,7 @@ import re # New import for regex operations
|
|
| 19 |
#
|
| 20 |
# Option 2: Disable Streamlit's usage statistics gathering.
|
| 21 |
# In your Dockerfile: ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=False
|
| 22 |
-
# Or when running: docker run -e
|
| 23 |
#
|
| 24 |
# Option 1 is generally more robust as it provides a writable home directory
|
| 25 |
# for any application that might need it.
|
|
@@ -159,7 +161,17 @@ def player_season_stats(bbr_url):
|
|
| 159 |
return pd.DataFrame()
|
| 160 |
|
| 161 |
df = parse_table(html, table_id="per_game")
|
| 162 |
-
if df.empty
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
return pd.DataFrame()
|
| 164 |
|
| 165 |
# drop repeated header rows (e.g., rows where 'Season' is literally 'Season')
|
|
@@ -201,7 +213,17 @@ def team_per_game(year):
|
|
| 201 |
return pd.DataFrame()
|
| 202 |
|
| 203 |
df = parse_table(html, table_id="per_game-team") # Correct table ID for team stats
|
| 204 |
-
if df.empty
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
return pd.DataFrame()
|
| 206 |
|
| 207 |
# drop repeated headers & rename
|
|
@@ -221,7 +243,7 @@ def team_per_game(year):
|
|
| 221 |
})
|
| 222 |
|
| 223 |
# coerce numeric columns
|
| 224 |
-
non_numeric_cols = {"Tm", "RANK"}
|
| 225 |
for col in df.columns:
|
| 226 |
if col not in non_numeric_cols:
|
| 227 |
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
@@ -283,6 +305,62 @@ def get_available_seasons(num_seasons=6):
|
|
| 283 |
seasons_list.append(f"{start_year}β{end_year}")
|
| 284 |
return sorted(seasons_list, reverse=True) # Sort to show most recent first
|
| 285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 287 |
# Main App Structure
|
| 288 |
# ββββοΏ½οΏ½οΏ½ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -324,27 +402,44 @@ def player_vs_player():
|
|
| 324 |
|
| 325 |
stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
|
| 326 |
all_player_season_data = [] # To store individual season rows for each player
|
|
|
|
|
|
|
| 327 |
|
| 328 |
with st.spinner("Fetching player data..."):
|
| 329 |
for player_name in selected_players:
|
| 330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
df_player_career = player_season_stats(player_url)
|
| 332 |
|
| 333 |
if not df_player_career.empty:
|
| 334 |
# Filter for selected seasons and ensure 'Season' column is consistent
|
| 335 |
-
|
| 336 |
filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
|
| 337 |
|
| 338 |
if not filtered_df.empty:
|
| 339 |
filtered_df['Player'] = player_name # Add player name for identification
|
| 340 |
all_player_season_data.append(filtered_df)
|
| 341 |
else:
|
| 342 |
-
|
|
|
|
| 343 |
else:
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
if not all_player_season_data:
|
| 347 |
-
st.
|
| 348 |
return
|
| 349 |
|
| 350 |
# Concatenate all collected season data into one DataFrame
|
|
@@ -416,7 +511,7 @@ def player_vs_player():
|
|
| 416 |
)
|
| 417 |
else:
|
| 418 |
# Average over selected seasons for multiple players for bar chart
|
| 419 |
-
avg_comparison_df = comparison_df_raw.groupby('Player')[available_metrics].mean().reset_index()
|
| 420 |
fig = px.bar(
|
| 421 |
avg_comparison_df,
|
| 422 |
x='Player',
|
|
@@ -469,16 +564,16 @@ def player_vs_player():
|
|
| 469 |
|
| 470 |
def team_vs_team():
|
| 471 |
st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
|
| 472 |
-
|
| 473 |
available_seasons = get_available_seasons()
|
| 474 |
selected_season_str = st.selectbox("Select Season", available_seasons, index=0)
|
| 475 |
-
|
| 476 |
# Extract the end year from the season string (e.g., "2024β25" -> 2025)
|
| 477 |
year_for_team_stats = int(selected_season_str.split('β')[1])
|
| 478 |
|
| 479 |
tm_df = team_per_game(year_for_team_stats)
|
| 480 |
if tm_df.empty:
|
| 481 |
-
st.info(f"No team data available for the {selected_season_str} season.")
|
| 482 |
return
|
| 483 |
|
| 484 |
teams = tm_df['Tm'].unique().tolist()
|
|
@@ -490,19 +585,25 @@ def team_vs_team():
|
|
| 490 |
return
|
| 491 |
|
| 492 |
stats = []
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
|
| 504 |
if not stats:
|
| 505 |
-
st.
|
| 506 |
return
|
| 507 |
|
| 508 |
comp = pd.DataFrame(stats)
|
|
@@ -611,10 +712,14 @@ def young_projections():
|
|
| 611 |
ppg = st.number_input("PPG",0.0,40.0,15.0)
|
| 612 |
rpg = st.number_input("RPG",0.0,20.0,5.0)
|
| 613 |
apg = st.number_input("APG",0.0,15.0,3.0)
|
| 614 |
-
if st.button("
|
| 615 |
prompt = (
|
| 616 |
-
f"
|
| 617 |
-
f"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 618 |
)
|
| 619 |
out = ask_perp(prompt, system="You are an NBA projection expert AI.", max_tokens=800)
|
| 620 |
st.markdown("### Projection Analysis")
|
|
@@ -633,13 +738,13 @@ def similar_players():
|
|
| 633 |
tp = st.selectbox("Target Player", all_p)
|
| 634 |
crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])
|
| 635 |
if tp and crit and st.button("Find Similar"):
|
| 636 |
-
prompt = f"Find top 5 current and top 3 historical similar to {tp}
|
| 637 |
st.write(ask_perp(prompt, system="You are a similarity expert AI.", max_tokens=800))
|
| 638 |
st.subheader("Manual Compare")
|
| 639 |
p1 = st.selectbox("Player 1", all_p, key="p1")
|
| 640 |
p2 = st.selectbox("Player 2", all_p, key="p2")
|
| 641 |
if p1 and p2 and p1!=p2 and st.button("Compare Players"):
|
| 642 |
-
prompt = f"Compare {p1} vs {p2}
|
| 643 |
st.write(ask_perp(prompt, system="You are a comparison expert AI.", max_tokens=700))
|
| 644 |
|
| 645 |
def roster_builder():
|
|
|
|
| 6 |
from datetime import datetime
|
| 7 |
from bs4 import BeautifulSoup
|
| 8 |
import re # New import for regex operations
|
| 9 |
+
import plotly.express as px # Ensure plotly imports are present
|
| 10 |
+
import plotly.graph_objects as go # Ensure plotly imports are present
|
| 11 |
|
| 12 |
# --- IMPORTANT: Addressing PermissionError in Containerized Environments ---
|
| 13 |
# The error "PermissionError: [Errno 13] Permission denied: '/.streamlit'"
|
|
|
|
| 21 |
#
|
| 22 |
# Option 2: Disable Streamlit's usage statistics gathering.
|
| 23 |
# In your Dockerfile: ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=False
|
| 24 |
+
# Or when running: docker run -e STREAMIT_BROWSER_GATHER_USAGE_STATS=False your_image_name
|
| 25 |
#
|
| 26 |
# Option 1 is generally more robust as it provides a writable home directory
|
| 27 |
# for any application that might need it.
|
|
|
|
| 161 |
return pd.DataFrame()
|
| 162 |
|
| 163 |
df = parse_table(html, table_id="per_game")
|
| 164 |
+
if df.empty: # Check if df is empty first
|
| 165 |
+
return pd.DataFrame()
|
| 166 |
+
|
| 167 |
+
# Flatten multi-index columns if they exist (common with pd.read_html)
|
| 168 |
+
if isinstance(df.columns, pd.MultiIndex):
|
| 169 |
+
df.columns = ['_'.join(col).strip() for col in df.columns.values]
|
| 170 |
+
else:
|
| 171 |
+
df.columns = [col.strip() for col in df.columns.values]
|
| 172 |
+
|
| 173 |
+
# Now check for 'Season' column after flattening
|
| 174 |
+
if "Season" not in df.columns:
|
| 175 |
return pd.DataFrame()
|
| 176 |
|
| 177 |
# drop repeated header rows (e.g., rows where 'Season' is literally 'Season')
|
|
|
|
| 213 |
return pd.DataFrame()
|
| 214 |
|
| 215 |
df = parse_table(html, table_id="per_game-team") # Correct table ID for team stats
|
| 216 |
+
if df.empty: # Check if df is empty first
|
| 217 |
+
return pd.DataFrame()
|
| 218 |
+
|
| 219 |
+
# Flatten multi-index columns if they exist
|
| 220 |
+
if isinstance(df.columns, pd.MultiIndex):
|
| 221 |
+
df.columns = ['_'.join(col).strip() for col in df.columns.values]
|
| 222 |
+
else:
|
| 223 |
+
df.columns = [col.strip() for col in df.columns.values]
|
| 224 |
+
|
| 225 |
+
# Now check for 'Team' column after flattening
|
| 226 |
+
if "Team" not in df.columns:
|
| 227 |
return pd.DataFrame()
|
| 228 |
|
| 229 |
# drop repeated headers & rename
|
|
|
|
| 243 |
})
|
| 244 |
|
| 245 |
# coerce numeric columns
|
| 246 |
+
non_numeric_cols = {"Tm", "RANK"}
|
| 247 |
for col in df.columns:
|
| 248 |
if col not in non_numeric_cols:
|
| 249 |
df[col] = pd.to_numeric(df[col], errors="coerce")
|
|
|
|
| 305 |
seasons_list.append(f"{start_year}β{end_year}")
|
| 306 |
return sorted(seasons_list, reverse=True) # Sort to show most recent first
|
| 307 |
|
| 308 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 309 |
+
# Plotting functions (retained from previous version)
|
| 310 |
+
def create_comparison_chart(data, players_names, metric):
|
| 311 |
+
"""Create comparison chart for players"""
|
| 312 |
+
fig = go.Figure()
|
| 313 |
+
|
| 314 |
+
for i, player in enumerate(players_names):
|
| 315 |
+
if player in data['Player'].values: # Changed to 'Player' column
|
| 316 |
+
player_data = data[data['Player'] == player]
|
| 317 |
+
fig.add_trace(go.Scatter(
|
| 318 |
+
x=player_data['Season'], # Changed to 'Season' column
|
| 319 |
+
y=player_data[metric],
|
| 320 |
+
mode='lines+markers',
|
| 321 |
+
name=player,
|
| 322 |
+
line=dict(width=3)
|
| 323 |
+
))
|
| 324 |
+
|
| 325 |
+
fig.update_layout(
|
| 326 |
+
title=f"{metric} Comparison",
|
| 327 |
+
xaxis_title="Season",
|
| 328 |
+
yaxis_title=metric,
|
| 329 |
+
hovermode='x unified',
|
| 330 |
+
height=500
|
| 331 |
+
)
|
| 332 |
+
|
| 333 |
+
return fig
|
| 334 |
+
|
| 335 |
+
def create_radar_chart(player_stats, categories):
|
| 336 |
+
"""Create radar chart for player comparison"""
|
| 337 |
+
fig = go.Figure()
|
| 338 |
+
|
| 339 |
+
for player_name, stats in player_stats.items():
|
| 340 |
+
# Ensure all categories are present, default to 0 if not
|
| 341 |
+
r_values = [stats.get(cat, 0) for cat in categories]
|
| 342 |
+
|
| 343 |
+
fig.add_trace(go.Scatterpolar(
|
| 344 |
+
r=r_values,
|
| 345 |
+
theta=categories,
|
| 346 |
+
fill='toself',
|
| 347 |
+
name=player_name,
|
| 348 |
+
opacity=0.7
|
| 349 |
+
))
|
| 350 |
+
|
| 351 |
+
fig.update_layout(
|
| 352 |
+
polar=dict(
|
| 353 |
+
radialaxis=dict(
|
| 354 |
+
visible=True,
|
| 355 |
+
# The range should be adjusted based on the scaled data (0-100)
|
| 356 |
+
range=[0, 100]
|
| 357 |
+
)),
|
| 358 |
+
showlegend=True,
|
| 359 |
+
title="Player Comparison Radar Chart"
|
| 360 |
+
)
|
| 361 |
+
|
| 362 |
+
return fig
|
| 363 |
+
|
| 364 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 365 |
# Main App Structure
|
| 366 |
# ββββοΏ½οΏ½οΏ½ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 402 |
|
| 403 |
stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
|
| 404 |
all_player_season_data = [] # To store individual season rows for each player
|
| 405 |
+
players_not_found_in_index = []
|
| 406 |
+
players_with_no_season_data = []
|
| 407 |
|
| 408 |
with st.spinner("Fetching player data..."):
|
| 409 |
for player_name in selected_players:
|
| 410 |
+
player_url_row = idx.loc[idx.name == player_name, 'url']
|
| 411 |
+
if player_url_row.empty:
|
| 412 |
+
players_not_found_in_index.append(player_name)
|
| 413 |
+
continue
|
| 414 |
+
|
| 415 |
+
player_url = player_url_row.iat[0]
|
| 416 |
df_player_career = player_season_stats(player_url)
|
| 417 |
|
| 418 |
if not df_player_career.empty:
|
| 419 |
# Filter for selected seasons and ensure 'Season' column is consistent
|
| 420 |
+
# The player_season_stats already handles the hyphen to en-dash replacement
|
| 421 |
filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
|
| 422 |
|
| 423 |
if not filtered_df.empty:
|
| 424 |
filtered_df['Player'] = player_name # Add player name for identification
|
| 425 |
all_player_season_data.append(filtered_df)
|
| 426 |
else:
|
| 427 |
+
# This means player was found, but no data for selected seasons
|
| 428 |
+
players_with_no_season_data.append(player_name)
|
| 429 |
else:
|
| 430 |
+
# This means player_season_stats returned an empty DF (fetch/parse failed)
|
| 431 |
+
players_with_no_season_data.append(player_name) # Treat as no data for selected seasons
|
| 432 |
+
|
| 433 |
+
# Report on players not found in index
|
| 434 |
+
if players_not_found_in_index:
|
| 435 |
+
st.error(f"The following players were not found in the Basketball-Reference index: {', '.join(players_not_found_in_index)}. Please check spelling.")
|
| 436 |
+
|
| 437 |
+
# Report on players with no data for selected seasons
|
| 438 |
+
if players_with_no_season_data:
|
| 439 |
+
st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(players_with_no_season_data)}. This might be because the season hasn't started or data is not yet available.")
|
| 440 |
|
| 441 |
if not all_player_season_data:
|
| 442 |
+
st.error("No data available for any of the selected players and seasons to display. Please adjust your selections.")
|
| 443 |
return
|
| 444 |
|
| 445 |
# Concatenate all collected season data into one DataFrame
|
|
|
|
| 511 |
)
|
| 512 |
else:
|
| 513 |
# Average over selected seasons for multiple players for bar chart
|
| 514 |
+
avg_comparison_df = comparison_df_raw.groupby('Player')[available_metrics].mean(numeric_only=True).reset_index()
|
| 515 |
fig = px.bar(
|
| 516 |
avg_comparison_df,
|
| 517 |
x='Player',
|
|
|
|
| 564 |
|
| 565 |
def team_vs_team():
|
| 566 |
st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
|
| 567 |
+
|
| 568 |
available_seasons = get_available_seasons()
|
| 569 |
selected_season_str = st.selectbox("Select Season", available_seasons, index=0)
|
| 570 |
+
|
| 571 |
# Extract the end year from the season string (e.g., "2024β25" -> 2025)
|
| 572 |
year_for_team_stats = int(selected_season_str.split('β')[1])
|
| 573 |
|
| 574 |
tm_df = team_per_game(year_for_team_stats)
|
| 575 |
if tm_df.empty:
|
| 576 |
+
st.info(f"No team data available for the {selected_season_str} season. This might be because the season hasn't started or data is not yet available.")
|
| 577 |
return
|
| 578 |
|
| 579 |
teams = tm_df['Tm'].unique().tolist()
|
|
|
|
| 585 |
return
|
| 586 |
|
| 587 |
stats = []
|
| 588 |
+
teams_with_no_data = []
|
| 589 |
+
|
| 590 |
+
with st.spinner("Fetching team data..."):
|
| 591 |
+
for t in selected_teams:
|
| 592 |
+
df = tm_df[tm_df.Tm == t].copy() # Use .copy() to avoid SettingWithCopyWarning
|
| 593 |
+
if not df.empty:
|
| 594 |
+
# For team stats, we usually get one row per team per season from team_per_game
|
| 595 |
+
# So, no need for .mean() here, just take the row.
|
| 596 |
+
df['Team'] = t # Add 'Team' column for consistency
|
| 597 |
+
df['Season'] = selected_season_str # Add 'Season' column
|
| 598 |
+
stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
|
| 599 |
+
else:
|
| 600 |
+
teams_with_no_data.append(t)
|
| 601 |
+
|
| 602 |
+
if teams_with_no_data:
|
| 603 |
+
st.info(f"No data found for the selected season ({selected_season_str}) for: {', '.join(teams_with_no_data)}. This might be because the season hasn't started or data is not yet available.")
|
| 604 |
|
| 605 |
if not stats:
|
| 606 |
+
st.error("No data available for the selected teams to display. Please adjust your selections.")
|
| 607 |
return
|
| 608 |
|
| 609 |
comp = pd.DataFrame(stats)
|
|
|
|
| 712 |
ppg = st.number_input("PPG",0.0,40.0,15.0)
|
| 713 |
rpg = st.number_input("RPG",0.0,20.0,5.0)
|
| 714 |
apg = st.number_input("APG",0.0,15.0,3.0)
|
| 715 |
+
if st.button("Generate AI Projection"):
|
| 716 |
prompt = (
|
| 717 |
+
f"Analyze and project the future potential of NBA player {sp}: "
|
| 718 |
+
f"Current Stats: Age={age}, Years in NBA={yrs}, PPG={ppg}, RPG={rpg}, APG={apg}. "
|
| 719 |
+
"Please provide: 1. 3-year projection of their stats. "
|
| 720 |
+
"2. Peak potential analysis. 3. Areas for improvement. "
|
| 721 |
+
"4. Comparison to similar players at the same age. 5. Career trajectory prediction. "
|
| 722 |
+
"Base your analysis on historical player development patterns and current NBA trends."
|
| 723 |
)
|
| 724 |
out = ask_perp(prompt, system="You are an NBA projection expert AI.", max_tokens=800)
|
| 725 |
st.markdown("### Projection Analysis")
|
|
|
|
| 738 |
tp = st.selectbox("Target Player", all_p)
|
| 739 |
crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])
|
| 740 |
if tp and crit and st.button("Find Similar"):
|
| 741 |
+
prompt = f"Find top 5 current and top 3 historical similar to {tp} based on the following criteria: {', '.join(crit)}. Provide detailed reasoning."
|
| 742 |
st.write(ask_perp(prompt, system="You are a similarity expert AI.", max_tokens=800))
|
| 743 |
st.subheader("Manual Compare")
|
| 744 |
p1 = st.selectbox("Player 1", all_p, key="p1")
|
| 745 |
p2 = st.selectbox("Player 2", all_p, key="p2")
|
| 746 |
if p1 and p2 and p1!=p2 and st.button("Compare Players"):
|
| 747 |
+
prompt = f"Compare {p1} vs {p2} in detail: 1. Statistical comparison (current season). 2. Playing style similarities and differences. 3. Strengths and weaknesses of each. 4. Team impact and role. 5. Overall similarity score (1-10). Provide a comprehensive comparison with specific examples."
|
| 748 |
st.write(ask_perp(prompt, system="You are a comparison expert AI.", max_tokens=700))
|
| 749 |
|
| 750 |
def roster_builder():
|