rairo commited on
Commit
af0594e
·
verified ·
1 Parent(s): 666b966

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +26 -51
main.py CHANGED
@@ -946,92 +946,68 @@ def _scrape_player_index_brscraper():
946
  ]
947
  return pd.DataFrame({'name': common_players})
948
 
949
- def get_player_career_stats_brscraper(player_name, seasons_to_fetch: list[str], playoffs=False):
950
  if not BRSCRAPER_AVAILABLE:
951
  logging.error("BRScraper is not available. Cannot fetch player career stats.")
952
  return pd.DataFrame()
953
-
954
- normalized_player_name = normalize_string(player_name) # Normalize input player name once
955
  all_rows = []
 
 
 
956
 
957
- for season_str in seasons_to_fetch:
 
 
958
  end_year = int(season_str.split('–')[1])
959
 
960
- # Define cache key based on normalized name, year, and playoff status
961
- cache_key = f"{normalized_player_name}_{end_year}_{'playoffs' if playoffs else 'regular'}"
962
- db_ref = db.reference(f'scraped_data/player_season_stats/{cache_key}')
963
-
964
- if FIREBASE_INITIALIZED:
965
- cached_data = db_ref.get()
966
- if cached_data and not is_data_stale(cached_data.get('last_updated'), max_age_hours=24*7): # Cache for 7 days
967
- logging.info(f"Loading stats for {player_name} in {season_str} (playoffs: {playoffs}) from Firebase cache.")
968
- all_rows.append(pd.DataFrame.from_records(cached_data['data']))
969
- continue # Skip scraping for this season if found in cache
970
- else:
971
- logging.info(f"Stats for {player_name} in {season_str} cache stale or not found. Scraping...")
972
-
973
- # Retry mechanism for scraping
974
  for attempt in range(3): # Try up to 3 times
975
  try:
976
  logging.info(f"DEBUG: Attempt {attempt+1} for nba.get_stats for player '{player_name}' in season {season_str} (year: {end_year}, playoffs: {playoffs})...")
977
 
978
- # Fetch all player stats for the given season and type (regular/playoffs)
979
  df_season = nba.get_stats(end_year, info='per_game', playoffs=playoffs, rename=False)
980
 
981
  if df_season.empty:
982
  logging.warning(f"DEBUG: nba.get_stats returned empty DataFrame for {player_name} in {season_str} on attempt {attempt+1}. Retrying...")
983
- time.sleep(1) # Wait before retrying
984
- continue
985
 
986
  if 'Player' not in df_season.columns:
987
  logging.warning(f"DEBUG: DataFrame for {player_name} in {season_str} has no 'Player' column on attempt {attempt+1}. Columns: {df_season.columns.tolist()}. Retrying...")
988
  time.sleep(1)
989
  continue
990
 
991
- # Normalize player names from the scraped data for matching
992
  df_season['Player_Normalized'] = df_season['Player'].apply(normalize_string)
 
993
  row = df_season[df_season['Player_Normalized'] == normalized_player_name]
994
 
995
  if not row.empty:
996
- row = row.copy() # Avoid SettingWithCopyWarning
997
- row['Season'] = season_str # Add the season string
998
- row = row.drop(columns=['Player_Normalized'], errors='ignore') # Drop helper column
999
-
1000
- if FIREBASE_INITIALIZED:
1001
- df_cleaned_for_firebase = clean_df_for_firebase(row.copy())
1002
- db_ref.set({
1003
- 'last_updated': datetime.utcnow().isoformat(),
1004
- 'data': df_cleaned_for_firebase.to_dict(orient='records')
1005
- })
1006
- logging.info(f"Stats for {player_name} in {season_str} saved to Firebase cache.")
1007
-
1008
  all_rows.append(row)
1009
  logging.info(f"DEBUG: Found stats for {player_name} in {season_str} on attempt {attempt+1}. Appending row.")
1010
- break # Success, exit retry loop for this season
1011
  else:
1012
- # This case means the season data was fetched, but the specific player wasn't in it.
1013
  logging.info(f"DEBUG: Player {player_name} not found in {season_str} stats (after getting season data) on attempt {attempt+1}. Retrying...")
1014
  time.sleep(1)
1015
- # If player not found after fetching season data, retrying might not help unless BRScraper has intermittent issues.
1016
- # Consider breaking if player not found in a valid scrape. For now, let it retry.
1017
- continue
1018
 
1019
  except Exception as e:
1020
  logging.warning(f"DEBUG: Exception on attempt {attempt+1} when fetching {season_str} {'playoff' if playoffs else 'regular season'} stats for {player_name}: {e}")
1021
- time.sleep(1) # Wait before retrying
1022
- if attempt == 2: # Last attempt failed
1023
  logging.error(f"DEBUG: All 3 attempts failed for {player_name} in {season_str}. Giving up on this season.")
1024
- continue # Go to next attempt or next season if all attempts failed
1025
 
1026
- time.sleep(0.5) # Small delay between fetching different seasons to be polite to the server
1027
-
1028
  if not all_rows:
1029
- logging.warning(f"DEBUG: No stats found for {player_name} in the requested seasons: {seasons_to_fetch}. Returning empty DataFrame.")
1030
  return pd.DataFrame()
1031
 
1032
  df = pd.concat(all_rows, ignore_index=True)
1033
 
1034
- # Standardize column names
1035
  mapping = {
1036
  'G':'GP','GS':'GS','MP':'MIN', 'FG%':'FG_PCT','3P%':'FG3_PCT','FT%':'FT_PCT',
1037
  'TRB':'REB','AST':'AST','STL':'STL','BLK':'BLK','TOV':'TO',
@@ -1042,16 +1018,15 @@ def get_player_career_stats_brscraper(player_name, seasons_to_fetch: list[str],
1042
  }
1043
  df = df.rename(columns={o:n for o,n in mapping.items() if o in df.columns})
1044
 
1045
- # Convert stats to numeric, coercing errors
1046
- non_num = {'Season','Player','Tm','Lg','Pos'} # Columns that should remain non-numeric
1047
  for col in df.columns:
1048
  if col not in non_num:
1049
  df[col] = pd.to_numeric(df[col], errors='coerce')
1050
 
1051
- df['Player'] = player_name # Ensure original (non-normalized) player name is in the final DataFrame
1052
- df = df.replace({np.nan: None}) # Replace NaN with None for JSON compatibility
1053
  return df
1054
-
1055
  def get_dashboard_info_brscraper():
1056
  if not FIREBASE_INITIALIZED:
1057
  logging.error("Firebase not initialized. Dashboard info skipped.")
 
946
  ]
947
  return pd.DataFrame({'name': common_players})
948
 
949
+ def get_player_career_stats_brscraper(player_name, seasons_to_check=10, playoffs=False):
950
  if not BRSCRAPER_AVAILABLE:
951
  logging.error("BRScraper is not available. Cannot fetch player career stats.")
952
  return pd.DataFrame()
 
 
953
  all_rows = []
954
+
955
+ # Normalize the input player name for consistent lookup
956
+ normalized_player_name = normalize_string(player_name)
957
 
958
+ seasons_to_try = get_available_seasons_util(seasons_to_check)
959
+
960
+ for season_str in seasons_to_try:
961
  end_year = int(season_str.split('–')[1])
962
 
963
+ # Implement retry logic for each season fetch
 
 
 
 
 
 
 
 
 
 
 
 
 
964
  for attempt in range(3): # Try up to 3 times
965
  try:
966
  logging.info(f"DEBUG: Attempt {attempt+1} for nba.get_stats for player '{player_name}' in season {season_str} (year: {end_year}, playoffs: {playoffs})...")
967
 
 
968
  df_season = nba.get_stats(end_year, info='per_game', playoffs=playoffs, rename=False)
969
 
970
  if df_season.empty:
971
  logging.warning(f"DEBUG: nba.get_stats returned empty DataFrame for {player_name} in {season_str} on attempt {attempt+1}. Retrying...")
972
+ time.sleep(1) # Wait a bit before retrying
973
+ continue # Go to next attempt
974
 
975
  if 'Player' not in df_season.columns:
976
  logging.warning(f"DEBUG: DataFrame for {player_name} in {season_str} has no 'Player' column on attempt {attempt+1}. Columns: {df_season.columns.tolist()}. Retrying...")
977
  time.sleep(1)
978
  continue
979
 
980
+ # Normalize player names in the DataFrame for comparison
981
  df_season['Player_Normalized'] = df_season['Player'].apply(normalize_string)
982
+
983
  row = df_season[df_season['Player_Normalized'] == normalized_player_name]
984
 
985
  if not row.empty:
986
+ row = row.copy()
987
+ row['Season'] = season_str
988
+ # Remove the temporary normalized column before appending
989
+ row = row.drop(columns=['Player_Normalized'], errors='ignore')
 
 
 
 
 
 
 
 
990
  all_rows.append(row)
991
  logging.info(f"DEBUG: Found stats for {player_name} in {season_str} on attempt {attempt+1}. Appending row.")
992
+ break # Break retry loop if successful
993
  else:
 
994
  logging.info(f"DEBUG: Player {player_name} not found in {season_str} stats (after getting season data) on attempt {attempt+1}. Retrying...")
995
  time.sleep(1)
996
+ continue # Go to next attempt
 
 
997
 
998
  except Exception as e:
999
  logging.warning(f"DEBUG: Exception on attempt {attempt+1} when fetching {season_str} {'playoff' if playoffs else 'regular season'} stats for {player_name}: {e}")
1000
+ time.sleep(1) # Wait before next retry
1001
+ if attempt == 2: # If last attempt failed
1002
  logging.error(f"DEBUG: All 3 attempts failed for {player_name} in {season_str}. Giving up on this season.")
1003
+ continue # Go to next attempt
1004
 
 
 
1005
  if not all_rows:
1006
+ logging.warning(f"DEBUG: No stats found for {player_name} across all attempted seasons. Returning empty DataFrame.")
1007
  return pd.DataFrame()
1008
 
1009
  df = pd.concat(all_rows, ignore_index=True)
1010
 
 
1011
  mapping = {
1012
  'G':'GP','GS':'GS','MP':'MIN', 'FG%':'FG_PCT','3P%':'FG3_PCT','FT%':'FT_PCT',
1013
  'TRB':'REB','AST':'AST','STL':'STL','BLK':'BLK','TOV':'TO',
 
1018
  }
1019
  df = df.rename(columns={o:n for o,n in mapping.items() if o in df.columns})
1020
 
1021
+ non_num = {'Season','Player','Tm','Lg','Pos'}
 
1022
  for col in df.columns:
1023
  if col not in non_num:
1024
  df[col] = pd.to_numeric(df[col], errors='coerce')
1025
 
1026
+ df['Player'] = player_name # Ensure original player name is kept
1027
+ df = df.replace({np.nan: None})
1028
  return df
1029
+
1030
  def get_dashboard_info_brscraper():
1031
  if not FIREBASE_INITIALIZED:
1032
  logging.error("Firebase not initialized. Dashboard info skipped.")