Update main.py
Browse files
main.py
CHANGED
|
@@ -946,92 +946,68 @@ def _scrape_player_index_brscraper():
|
|
| 946 |
]
|
| 947 |
return pd.DataFrame({'name': common_players})
|
| 948 |
|
| 949 |
-
def get_player_career_stats_brscraper(player_name,
|
| 950 |
if not BRSCRAPER_AVAILABLE:
|
| 951 |
logging.error("BRScraper is not available. Cannot fetch player career stats.")
|
| 952 |
return pd.DataFrame()
|
| 953 |
-
|
| 954 |
-
normalized_player_name = normalize_string(player_name) # Normalize input player name once
|
| 955 |
all_rows = []
|
|
|
|
|
|
|
|
|
|
| 956 |
|
| 957 |
-
|
|
|
|
|
|
|
| 958 |
end_year = int(season_str.split('–')[1])
|
| 959 |
|
| 960 |
-
#
|
| 961 |
-
cache_key = f"{normalized_player_name}_{end_year}_{'playoffs' if playoffs else 'regular'}"
|
| 962 |
-
db_ref = db.reference(f'scraped_data/player_season_stats/{cache_key}')
|
| 963 |
-
|
| 964 |
-
if FIREBASE_INITIALIZED:
|
| 965 |
-
cached_data = db_ref.get()
|
| 966 |
-
if cached_data and not is_data_stale(cached_data.get('last_updated'), max_age_hours=24*7): # Cache for 7 days
|
| 967 |
-
logging.info(f"Loading stats for {player_name} in {season_str} (playoffs: {playoffs}) from Firebase cache.")
|
| 968 |
-
all_rows.append(pd.DataFrame.from_records(cached_data['data']))
|
| 969 |
-
continue # Skip scraping for this season if found in cache
|
| 970 |
-
else:
|
| 971 |
-
logging.info(f"Stats for {player_name} in {season_str} cache stale or not found. Scraping...")
|
| 972 |
-
|
| 973 |
-
# Retry mechanism for scraping
|
| 974 |
for attempt in range(3): # Try up to 3 times
|
| 975 |
try:
|
| 976 |
logging.info(f"DEBUG: Attempt {attempt+1} for nba.get_stats for player '{player_name}' in season {season_str} (year: {end_year}, playoffs: {playoffs})...")
|
| 977 |
|
| 978 |
-
# Fetch all player stats for the given season and type (regular/playoffs)
|
| 979 |
df_season = nba.get_stats(end_year, info='per_game', playoffs=playoffs, rename=False)
|
| 980 |
|
| 981 |
if df_season.empty:
|
| 982 |
logging.warning(f"DEBUG: nba.get_stats returned empty DataFrame for {player_name} in {season_str} on attempt {attempt+1}. Retrying...")
|
| 983 |
-
time.sleep(1) # Wait before retrying
|
| 984 |
-
continue
|
| 985 |
|
| 986 |
if 'Player' not in df_season.columns:
|
| 987 |
logging.warning(f"DEBUG: DataFrame for {player_name} in {season_str} has no 'Player' column on attempt {attempt+1}. Columns: {df_season.columns.tolist()}. Retrying...")
|
| 988 |
time.sleep(1)
|
| 989 |
continue
|
| 990 |
|
| 991 |
-
# Normalize player names
|
| 992 |
df_season['Player_Normalized'] = df_season['Player'].apply(normalize_string)
|
|
|
|
| 993 |
row = df_season[df_season['Player_Normalized'] == normalized_player_name]
|
| 994 |
|
| 995 |
if not row.empty:
|
| 996 |
-
row = row.copy()
|
| 997 |
-
row['Season'] = season_str
|
| 998 |
-
|
| 999 |
-
|
| 1000 |
-
if FIREBASE_INITIALIZED:
|
| 1001 |
-
df_cleaned_for_firebase = clean_df_for_firebase(row.copy())
|
| 1002 |
-
db_ref.set({
|
| 1003 |
-
'last_updated': datetime.utcnow().isoformat(),
|
| 1004 |
-
'data': df_cleaned_for_firebase.to_dict(orient='records')
|
| 1005 |
-
})
|
| 1006 |
-
logging.info(f"Stats for {player_name} in {season_str} saved to Firebase cache.")
|
| 1007 |
-
|
| 1008 |
all_rows.append(row)
|
| 1009 |
logging.info(f"DEBUG: Found stats for {player_name} in {season_str} on attempt {attempt+1}. Appending row.")
|
| 1010 |
-
break #
|
| 1011 |
else:
|
| 1012 |
-
# This case means the season data was fetched, but the specific player wasn't in it.
|
| 1013 |
logging.info(f"DEBUG: Player {player_name} not found in {season_str} stats (after getting season data) on attempt {attempt+1}. Retrying...")
|
| 1014 |
time.sleep(1)
|
| 1015 |
-
#
|
| 1016 |
-
# Consider breaking if player not found in a valid scrape. For now, let it retry.
|
| 1017 |
-
continue
|
| 1018 |
|
| 1019 |
except Exception as e:
|
| 1020 |
logging.warning(f"DEBUG: Exception on attempt {attempt+1} when fetching {season_str} {'playoff' if playoffs else 'regular season'} stats for {player_name}: {e}")
|
| 1021 |
-
time.sleep(1) # Wait before
|
| 1022 |
-
if attempt == 2: #
|
| 1023 |
logging.error(f"DEBUG: All 3 attempts failed for {player_name} in {season_str}. Giving up on this season.")
|
| 1024 |
-
continue # Go to next attempt
|
| 1025 |
|
| 1026 |
-
time.sleep(0.5) # Small delay between fetching different seasons to be polite to the server
|
| 1027 |
-
|
| 1028 |
if not all_rows:
|
| 1029 |
-
logging.warning(f"DEBUG: No stats found for {player_name}
|
| 1030 |
return pd.DataFrame()
|
| 1031 |
|
| 1032 |
df = pd.concat(all_rows, ignore_index=True)
|
| 1033 |
|
| 1034 |
-
# Standardize column names
|
| 1035 |
mapping = {
|
| 1036 |
'G':'GP','GS':'GS','MP':'MIN', 'FG%':'FG_PCT','3P%':'FG3_PCT','FT%':'FT_PCT',
|
| 1037 |
'TRB':'REB','AST':'AST','STL':'STL','BLK':'BLK','TOV':'TO',
|
|
@@ -1042,16 +1018,15 @@ def get_player_career_stats_brscraper(player_name, seasons_to_fetch: list[str],
|
|
| 1042 |
}
|
| 1043 |
df = df.rename(columns={o:n for o,n in mapping.items() if o in df.columns})
|
| 1044 |
|
| 1045 |
-
|
| 1046 |
-
non_num = {'Season','Player','Tm','Lg','Pos'} # Columns that should remain non-numeric
|
| 1047 |
for col in df.columns:
|
| 1048 |
if col not in non_num:
|
| 1049 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 1050 |
|
| 1051 |
-
df['Player'] = player_name # Ensure original
|
| 1052 |
-
df = df.replace({np.nan: None})
|
| 1053 |
return df
|
| 1054 |
-
|
| 1055 |
def get_dashboard_info_brscraper():
|
| 1056 |
if not FIREBASE_INITIALIZED:
|
| 1057 |
logging.error("Firebase not initialized. Dashboard info skipped.")
|
|
|
|
| 946 |
]
|
| 947 |
return pd.DataFrame({'name': common_players})
|
| 948 |
|
| 949 |
+
def get_player_career_stats_brscraper(player_name, seasons_to_check=10, playoffs=False):
|
| 950 |
if not BRSCRAPER_AVAILABLE:
|
| 951 |
logging.error("BRScraper is not available. Cannot fetch player career stats.")
|
| 952 |
return pd.DataFrame()
|
|
|
|
|
|
|
| 953 |
all_rows = []
|
| 954 |
+
|
| 955 |
+
# Normalize the input player name for consistent lookup
|
| 956 |
+
normalized_player_name = normalize_string(player_name)
|
| 957 |
|
| 958 |
+
seasons_to_try = get_available_seasons_util(seasons_to_check)
|
| 959 |
+
|
| 960 |
+
for season_str in seasons_to_try:
|
| 961 |
end_year = int(season_str.split('–')[1])
|
| 962 |
|
| 963 |
+
# Implement retry logic for each season fetch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 964 |
for attempt in range(3): # Try up to 3 times
|
| 965 |
try:
|
| 966 |
logging.info(f"DEBUG: Attempt {attempt+1} for nba.get_stats for player '{player_name}' in season {season_str} (year: {end_year}, playoffs: {playoffs})...")
|
| 967 |
|
|
|
|
| 968 |
df_season = nba.get_stats(end_year, info='per_game', playoffs=playoffs, rename=False)
|
| 969 |
|
| 970 |
if df_season.empty:
|
| 971 |
logging.warning(f"DEBUG: nba.get_stats returned empty DataFrame for {player_name} in {season_str} on attempt {attempt+1}. Retrying...")
|
| 972 |
+
time.sleep(1) # Wait a bit before retrying
|
| 973 |
+
continue # Go to next attempt
|
| 974 |
|
| 975 |
if 'Player' not in df_season.columns:
|
| 976 |
logging.warning(f"DEBUG: DataFrame for {player_name} in {season_str} has no 'Player' column on attempt {attempt+1}. Columns: {df_season.columns.tolist()}. Retrying...")
|
| 977 |
time.sleep(1)
|
| 978 |
continue
|
| 979 |
|
| 980 |
+
# Normalize player names in the DataFrame for comparison
|
| 981 |
df_season['Player_Normalized'] = df_season['Player'].apply(normalize_string)
|
| 982 |
+
|
| 983 |
row = df_season[df_season['Player_Normalized'] == normalized_player_name]
|
| 984 |
|
| 985 |
if not row.empty:
|
| 986 |
+
row = row.copy()
|
| 987 |
+
row['Season'] = season_str
|
| 988 |
+
# Remove the temporary normalized column before appending
|
| 989 |
+
row = row.drop(columns=['Player_Normalized'], errors='ignore')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 990 |
all_rows.append(row)
|
| 991 |
logging.info(f"DEBUG: Found stats for {player_name} in {season_str} on attempt {attempt+1}. Appending row.")
|
| 992 |
+
break # Break retry loop if successful
|
| 993 |
else:
|
|
|
|
| 994 |
logging.info(f"DEBUG: Player {player_name} not found in {season_str} stats (after getting season data) on attempt {attempt+1}. Retrying...")
|
| 995 |
time.sleep(1)
|
| 996 |
+
continue # Go to next attempt
|
|
|
|
|
|
|
| 997 |
|
| 998 |
except Exception as e:
|
| 999 |
logging.warning(f"DEBUG: Exception on attempt {attempt+1} when fetching {season_str} {'playoff' if playoffs else 'regular season'} stats for {player_name}: {e}")
|
| 1000 |
+
time.sleep(1) # Wait before next retry
|
| 1001 |
+
if attempt == 2: # If last attempt failed
|
| 1002 |
logging.error(f"DEBUG: All 3 attempts failed for {player_name} in {season_str}. Giving up on this season.")
|
| 1003 |
+
continue # Go to next attempt
|
| 1004 |
|
|
|
|
|
|
|
| 1005 |
if not all_rows:
|
| 1006 |
+
logging.warning(f"DEBUG: No stats found for {player_name} across all attempted seasons. Returning empty DataFrame.")
|
| 1007 |
return pd.DataFrame()
|
| 1008 |
|
| 1009 |
df = pd.concat(all_rows, ignore_index=True)
|
| 1010 |
|
|
|
|
| 1011 |
mapping = {
|
| 1012 |
'G':'GP','GS':'GS','MP':'MIN', 'FG%':'FG_PCT','3P%':'FG3_PCT','FT%':'FT_PCT',
|
| 1013 |
'TRB':'REB','AST':'AST','STL':'STL','BLK':'BLK','TOV':'TO',
|
|
|
|
| 1018 |
}
|
| 1019 |
df = df.rename(columns={o:n for o,n in mapping.items() if o in df.columns})
|
| 1020 |
|
| 1021 |
+
non_num = {'Season','Player','Tm','Lg','Pos'}
|
|
|
|
| 1022 |
for col in df.columns:
|
| 1023 |
if col not in non_num:
|
| 1024 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
| 1025 |
|
| 1026 |
+
df['Player'] = player_name # Ensure original player name is kept
|
| 1027 |
+
df = df.replace({np.nan: None})
|
| 1028 |
return df
|
| 1029 |
+
|
| 1030 |
def get_dashboard_info_brscraper():
|
| 1031 |
if not FIREBASE_INITIALIZED:
|
| 1032 |
logging.error("Firebase not initialized. Dashboard info skipped.")
|