nesticot commited on
Commit
d5f840e
·
verified ·
1 Parent(s): 3ef5ee4

Upload api_scraper.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. api_scraper.py +19 -14
api_scraper.py CHANGED
@@ -100,20 +100,25 @@ class MLB_Scrape:
100
  # Make API call to retrieve game schedule
101
  game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id_str}&gameTypes={game_type_str}&season={year_input_str}&hydrate=lineup,players').json()
102
  try:
103
- # Extract relevant data from the API response
104
- game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
105
- time_list = [item for sublist in [[y['gameDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
106
- date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
107
- away_team_list = [item for sublist in [[y['teams']['away']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
108
- away_team_id_list = [item for sublist in [[y['teams']['away']['team']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
109
- home_team_list = [item for sublist in [[y['teams']['home']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
110
- home_team_id_list = [item for sublist in [[y['teams']['home']['team']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
111
- state_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
112
- venue_id = [item for sublist in [[y['venue']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
113
- venue_name = [item for sublist in [[y['venue']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
114
- gameday_type = [item for sublist in [[y['gamedayType'] for y in x['games']] for x in game_call['dates']] for item in sublist]
115
- # Create a Polars DataFrame with the extracted data
116
-
 
 
 
 
 
117
 
118
  # Create a Polars DataFrame with the extracted data
119
  game_df = pl.DataFrame(data={'game_id': game_list,
 
100
  # Make API call to retrieve game schedule
101
  game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id_str}&gameTypes={game_type_str}&season={year_input_str}&hydrate=lineup,players').json()
102
  try:
103
+ def safe_get(d, keys, default=np.nan):
104
+ """Safely retrieve nested dictionary values."""
105
+ for key in keys:
106
+ d = d.get(key, {})
107
+ if not isinstance(d, dict):
108
+ return d # Return value if it's not a dict
109
+ return default # Return default if keys don't exist
110
+
111
+ game_list = [item for sublist in [[y.get('gamePk', np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
112
+ time_list = [item for sublist in [[y.get('gameDate', np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
113
+ date_list = [item for sublist in [[y.get('officialDate', np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
114
+ away_team_list = [item for sublist in [[safe_get(y, ['teams', 'away', 'team', 'name'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
115
+ away_team_id_list = [item for sublist in [[safe_get(y, ['teams', 'away', 'team', 'id'], np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
116
+ home_team_list = [item for sublist in [[safe_get(y, ['teams', 'home', 'team', 'name'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
117
+ home_team_id_list = [item for sublist in [[safe_get(y, ['teams', 'home', 'team', 'id'], np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
118
+ state_list = [item for sublist in [[safe_get(y, ['status', 'codedGameState'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
119
+ venue_id = [item for sublist in [[safe_get(y, ['venue', 'id'], np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
120
+ venue_name = [item for sublist in [[safe_get(y, ['venue', 'name'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
121
+ gameday_type = [item for sublist in [[safe_get(y, ['gamedayType'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
122
 
123
  # Create a Polars DataFrame with the extracted data
124
  game_df = pl.DataFrame(data={'game_id': game_list,