Upload api_scraper.py with huggingface_hub
Browse files- api_scraper.py +19 -14
api_scraper.py
CHANGED
|
@@ -100,20 +100,25 @@ class MLB_Scrape:
|
|
| 100 |
# Make API call to retrieve game schedule
|
| 101 |
game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id_str}&gameTypes={game_type_str}&season={year_input_str}&hydrate=lineup,players').json()
|
| 102 |
try:
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
# Create a Polars DataFrame with the extracted data
|
| 119 |
game_df = pl.DataFrame(data={'game_id': game_list,
|
|
|
|
| 100 |
# Make API call to retrieve game schedule
|
| 101 |
game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id_str}&gameTypes={game_type_str}&season={year_input_str}&hydrate=lineup,players').json()
|
| 102 |
try:
|
| 103 |
+
def safe_get(d, keys, default=np.nan):
|
| 104 |
+
"""Safely retrieve nested dictionary values."""
|
| 105 |
+
for key in keys:
|
| 106 |
+
d = d.get(key, {})
|
| 107 |
+
if not isinstance(d, dict):
|
| 108 |
+
return d # Return value if it's not a dict
|
| 109 |
+
return default # Return default if keys don't exist
|
| 110 |
+
|
| 111 |
+
game_list = [item for sublist in [[y.get('gamePk', np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 112 |
+
time_list = [item for sublist in [[y.get('gameDate', np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 113 |
+
date_list = [item for sublist in [[y.get('officialDate', np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 114 |
+
away_team_list = [item for sublist in [[safe_get(y, ['teams', 'away', 'team', 'name'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 115 |
+
away_team_id_list = [item for sublist in [[safe_get(y, ['teams', 'away', 'team', 'id'], np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 116 |
+
home_team_list = [item for sublist in [[safe_get(y, ['teams', 'home', 'team', 'name'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 117 |
+
home_team_id_list = [item for sublist in [[safe_get(y, ['teams', 'home', 'team', 'id'], np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 118 |
+
state_list = [item for sublist in [[safe_get(y, ['status', 'codedGameState'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 119 |
+
venue_id = [item for sublist in [[safe_get(y, ['venue', 'id'], np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 120 |
+
venue_name = [item for sublist in [[safe_get(y, ['venue', 'name'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 121 |
+
gameday_type = [item for sublist in [[safe_get(y, ['gamedayType'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
|
| 122 |
|
| 123 |
# Create a Polars DataFrame with the extracted data
|
| 124 |
game_df = pl.DataFrame(data={'game_id': game_list,
|