nesticot commited on
Commit
c951958
·
verified ·
1 Parent(s): 23623dd

Upload api_scraper.py

Browse files
Files changed (1) hide show
  1. api_scraper.py +912 -0
api_scraper.py ADDED
@@ -0,0 +1,912 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import polars as pl
3
+ import numpy as np
4
+ from datetime import datetime
5
+ from tqdm import tqdm
6
+ from pytz import timezone
7
+ import re
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+
10
+
11
+ class MLB_Scrape:
12
+
13
+ def __init__(self):
14
+ # Initialize your class here if needed
15
+ pass
16
+
17
+ def get_sport_id(self):
18
+ """
19
+ Retrieves the list of sports from the MLB API and processes it into a Polars DataFrame.
20
+
21
+ Returns:
22
+ - df (pl.DataFrame): A DataFrame containing the sports information.
23
+ """
24
+ # Make API call to retrieve sports information
25
+ response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
26
+
27
+ # Convert the JSON response into a Polars DataFrame
28
+ df = pl.DataFrame(response['sports'])
29
+
30
+ return df
31
+
32
+ def get_sport_id_check(self, sport_id: int = 1):
33
+ """
34
+ Checks if the provided sport ID exists in the list of sports retrieved from the MLB API.
35
+
36
+ Parameters:
37
+ - sport_id (int): The sport ID to check. Default is 1.
38
+
39
+ Returns:
40
+ - bool: True if the sport ID exists, False otherwise. If False, prints the available sport IDs.
41
+ """
42
+ # Retrieve the list of sports from the MLB API
43
+ sport_id_df = self.get_sport_id()
44
+
45
+ # Check if the provided sport ID exists in the DataFrame
46
+ if sport_id not in sport_id_df['id']:
47
+ print('Please Select a New Sport ID from the following')
48
+ print(sport_id_df)
49
+ return False
50
+
51
+ return True
52
+
53
+
54
+ def get_game_types(self):
55
+ """
56
+ Retrieves the different types of MLB games from the MLB API and processes them into a Polars DataFrame.
57
+
58
+ Returns:
59
+ - df (pl.DataFrame): A DataFrame containing the game types information.
60
+ """
61
+ # Make API call to retrieve game types information
62
+ response = requests.get(url='https://statsapi.mlb.com/api/v1/gameTypes').json()
63
+
64
+ # Convert the JSON response into a Polars DataFrame
65
+ df = pl.DataFrame(response)
66
+
67
+ return df
68
+
69
+ def get_schedule(self,
70
+ year_input: list = [2024],
71
+ sport_id: list = [1],
72
+ game_type: list = ['R']):
73
+
74
+ """
75
+ Retrieves the schedule of baseball games based on the specified parameters.
76
+ Parameters:
77
+ - year_input (list): A list of years to filter the schedule. Default is [2024].
78
+ - sport_id (list): A list of sport IDs to filter the schedule. Default is [1].
79
+ - game_type (list): A list of game types to filter the schedule. Default is ['R'].
80
+ Returns:
81
+ - game_df (pandas.DataFrame): A DataFrame containing the game schedule information, including game ID, date, time, away team, home team, game state, venue ID, and venue name. If the schedule length is 0, it returns a message indicating that different parameters should be selected.
82
+ """
83
+
84
+ # Type checks
85
+ if not isinstance(year_input, list) or not all(isinstance(year, int) for year in year_input):
86
+ raise ValueError("year_input must be a list of integers.")
87
+ if not isinstance(sport_id, list) or not all(isinstance(sid, int) for sid in sport_id):
88
+ raise ValueError("sport_id must be a list of integers.")
89
+
90
+ if not isinstance(game_type, list) or not all(isinstance(gt, str) for gt in game_type):
91
+ raise ValueError("game_type must be a list of strings.")
92
+
93
+ eastern = timezone('US/Eastern')
94
+
95
+ # Convert input lists to comma-separated strings
96
+ year_input_str = ','.join([str(x) for x in year_input])
97
+ sport_id_str = ','.join([str(x) for x in sport_id])
98
+ game_type_str = ','.join([str(x) for x in game_type])
99
+
100
+ # Make API call to retrieve game schedule
101
+ game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id_str}&gameTypes={game_type_str}&season={year_input_str}&hydrate=lineup,players').json()
102
+ try:
103
+ def safe_get(d, keys, default=np.nan):
104
+ """Safely retrieve nested dictionary values."""
105
+ for key in keys:
106
+ d = d.get(key, {})
107
+ if not isinstance(d, dict):
108
+ return d # Return value if it's not a dict
109
+ return default # Return default if keys don't exist
110
+
111
+ game_list = [item for sublist in [[y.get('gamePk', np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
112
+ time_list = [item for sublist in [[y.get('gameDate', np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
113
+ date_list = [item for sublist in [[y.get('officialDate', np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
114
+ away_team_list = [item for sublist in [[safe_get(y, ['teams', 'away', 'team', 'name'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
115
+ away_team_id_list = [item for sublist in [[safe_get(y, ['teams', 'away', 'team', 'id'], np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
116
+ home_team_list = [item for sublist in [[safe_get(y, ['teams', 'home', 'team', 'name'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
117
+ home_team_id_list = [item for sublist in [[safe_get(y, ['teams', 'home', 'team', 'id'], np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
118
+ state_list = [item for sublist in [[safe_get(y, ['status', 'codedGameState'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
119
+ venue_id = [item for sublist in [[safe_get(y, ['venue', 'id'], np.nan) for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
120
+ venue_name = [item for sublist in [[safe_get(y, ['venue', 'name'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
121
+ gameday_type = [item for sublist in [[safe_get(y, ['gamedayType'], "") for y in x.get('games', [])] for x in game_call.get('dates', [])] for item in sublist]
122
+
123
+ # Create a Polars DataFrame with the extracted data
124
+ game_df = pl.DataFrame(data={'game_id': game_list,
125
+ 'time': time_list,
126
+ 'date': date_list,
127
+ 'away': away_team_list,
128
+ 'away_id': away_team_id_list,
129
+ 'home': home_team_list,
130
+ 'home_id': home_team_id_list,
131
+ 'state': state_list,
132
+ 'venue_id': venue_id,
133
+ 'venue_name': venue_name,
134
+ 'gameday_type':gameday_type})
135
+
136
+
137
+ # Check if the DataFrame is empty
138
+ if len(game_df) == 0:
139
+ print('Schedule Length of 0, please select different parameters.')
140
+ return None
141
+
142
+ # Convert date and time columns to appropriate formats
143
+ game_df = game_df.with_columns(
144
+ game_df['date'].str.to_date(),
145
+ game_df['time'].str.to_datetime().dt.convert_time_zone(eastern.zone).dt.strftime("%I:%M %p"))
146
+
147
+ # Remove duplicate games and sort by date
148
+ game_df = game_df.unique(subset='game_id').sort('date')
149
+
150
+ # Check again if the DataFrame is empty after processing
151
+ if len(game_df) == 0:
152
+ print('Schedule Length of 0, please select different parameters.')
153
+ return None
154
+ except KeyError:
155
+ print('No Data for Selected Parameters')
156
+ return None
157
+
158
+
159
+ return game_df
160
+
161
+
162
+ def get_data(self, game_list_input: list):
163
+ """
164
+ Retrieves live game data for a list of game IDs.
165
+
166
+ Parameters:
167
+ - game_list_input (list): A list of game IDs for which to retrieve live data.
168
+
169
+ Returns:
170
+ - data_total (list): A list of JSON responses containing live game data for each game ID.
171
+ """
172
+ data_total = []
173
+ print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
174
+
175
+ # Iterate over the list of game IDs with a progress bar
176
+ for i in tqdm(range(len(game_list_input)), desc="Processing", unit="iteration"):
177
+ # Make a GET request to the MLB API for each game ID
178
+ r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_list_input[i]}/feed/live')
179
+ # Append the JSON response to the data_total list
180
+ data_total.append(r.json())
181
+
182
+ return data_total
183
+
184
+ def get_data_new(self, game_list_input: list):
185
+ """
186
+ Retrieves live game data for a list of game IDs in parallel.
187
+
188
+ Parameters:
189
+ - game_list_input (list): A list of game IDs for which to retrieve live data.
190
+
191
+ Returns:
192
+ - data_total (list): A list of JSON responses containing live game data for each game ID.
193
+ """
194
+ data_total = []
195
+ print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
196
+
197
+ def fetch_data(game_id):
198
+ r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_id}/feed/live')
199
+ return r.json()
200
+
201
+ with ThreadPoolExecutor() as executor:
202
+ futures = {executor.submit(fetch_data, game_id): game_id for game_id in game_list_input}
203
+ for future in tqdm(as_completed(futures), total=len(futures), desc="Processing", unit="iteration"):
204
+ data_total.append(future.result())
205
+
206
+ return data_total
207
+
208
+ def get_data_df(self, data_list):
209
+ """
210
+ Converts a list of game data JSON objects into a Polars DataFrame.
211
+
212
+ Parameters:
213
+ - data_list (list): A list of JSON objects containing game data.
214
+
215
+ Returns:
216
+ - data_df (pl.DataFrame): A DataFrame containing the structured game data.
217
+ """
218
+ swing_list = ['X','F','S','D','E','T','W','L','M','Q','Z','R','O','J']
219
+ whiff_list = ['S','T','W','M','Q','O']
220
+ print('Converting Data to Dataframe.')
221
+ game_id = []
222
+ game_date = []
223
+ batter_id = []
224
+ batter_name = []
225
+ batter_hand = []
226
+ batter_team = []
227
+ batter_team_id = []
228
+ pitcher_id = []
229
+ pitcher_name = []
230
+ pitcher_hand = []
231
+ pitcher_team = []
232
+ pitcher_team_id = []
233
+
234
+ play_description = []
235
+ play_code = []
236
+ in_play = []
237
+ is_strike = []
238
+ is_swing = []
239
+ is_whiff = []
240
+ is_out = []
241
+ is_ball = []
242
+ is_review = []
243
+ pitch_type = []
244
+ pitch_description = []
245
+ strikes = []
246
+ balls = []
247
+ outs = []
248
+ strikes_after = []
249
+ balls_after = []
250
+ outs_after = []
251
+ inning = []
252
+
253
+ start_speed = []
254
+ end_speed = []
255
+ sz_top = []
256
+ sz_bot = []
257
+ x = []
258
+ y = []
259
+ ax = []
260
+ ay = []
261
+ az = []
262
+ pfxx = []
263
+ pfxz = []
264
+ px = []
265
+ pz = []
266
+ vx0 = []
267
+ vy0 = []
268
+ vz0 = []
269
+ x0 = []
270
+ y0 = []
271
+ z0 = []
272
+ zone = []
273
+ type_confidence = []
274
+ plate_time = []
275
+ extension = []
276
+ spin_rate = []
277
+ spin_direction = []
278
+ vb = []
279
+ ivb = []
280
+ hb = []
281
+
282
+ launch_speed = []
283
+ launch_angle = []
284
+ launch_distance = []
285
+ launch_location = []
286
+ trajectory = []
287
+ hardness = []
288
+ hit_x = []
289
+ hit_y = []
290
+
291
+ index_play = []
292
+ play_id = []
293
+ start_time = []
294
+ end_time = []
295
+ is_pitch = []
296
+ type_type = []
297
+
298
+
299
+ type_ab = []
300
+ ab_number = []
301
+ event = []
302
+ event_type = []
303
+ rbi = []
304
+ away_score = []
305
+ home_score = []
306
+
307
+ for data in data_list:
308
+ try:
309
+ for ab_id in range(len(data['liveData']['plays']['allPlays'])):
310
+ ab_list = data['liveData']['plays']['allPlays'][ab_id]
311
+ for n in range(len(ab_list['playEvents'])):
312
+
313
+
314
+ if ab_list['playEvents'][n]['isPitch'] == True or 'call' in ab_list['playEvents'][n]['details']:
315
+ ab_number.append(ab_list['atBatIndex'] if 'atBatIndex' in ab_list else None)
316
+
317
+ game_id.append(data['gamePk'])
318
+ game_date.append(data['gameData']['datetime']['officialDate'])
319
+ if 'matchup' in ab_list:
320
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
321
+ if 'batter' in ab_list['matchup']:
322
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'fullName' in ab_list['matchup']['batter'] else None)
323
+ else:
324
+ batter_name.append(None)
325
+
326
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
327
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
328
+ if 'pitcher' in ab_list['matchup']:
329
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'fullName' in ab_list['matchup']['pitcher'] else None)
330
+ else:
331
+ pitcher_name.append(None)
332
+
333
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
334
+
335
+
336
+ if ab_list['about']['isTopInning']:
337
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
338
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
339
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
340
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
341
+
342
+ else:
343
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
344
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
345
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
346
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
347
+
348
+ play_description.append(ab_list['playEvents'][n]['details']['description'] if 'description' in ab_list['playEvents'][n]['details'] else None)
349
+ play_code.append(ab_list['playEvents'][n]['details']['code'] if 'code' in ab_list['playEvents'][n]['details'] else None)
350
+ in_play.append(ab_list['playEvents'][n]['details']['isInPlay'] if 'isInPlay' in ab_list['playEvents'][n]['details'] else None)
351
+ is_strike.append(ab_list['playEvents'][n]['details']['isStrike'] if 'isStrike' in ab_list['playEvents'][n]['details'] else None)
352
+
353
+ if 'details' in ab_list['playEvents'][n]:
354
+ is_swing.append(True if ab_list['playEvents'][n]['details']['code'] in swing_list else None)
355
+ is_whiff.append(True if ab_list['playEvents'][n]['details']['code'] in whiff_list else None)
356
+ else:
357
+ is_swing.append(None)
358
+ is_whiff.append(None)
359
+
360
+ inning.append(ab_list['about']['inning'] if 'inning' in ab_list['about'] else None)
361
+ is_ball.append(ab_list['playEvents'][n]['details']['isOut'] if 'isOut' in ab_list['playEvents'][n]['details'] else None)
362
+ is_review.append(ab_list['playEvents'][n]['details']['hasReview'] if 'hasReview' in ab_list['playEvents'][n]['details'] else None)
363
+ pitch_type.append(ab_list['playEvents'][n]['details']['type']['code'] if 'type' in ab_list['playEvents'][n]['details'] else None)
364
+ pitch_description.append(ab_list['playEvents'][n]['details']['type']['description'] if 'type' in ab_list['playEvents'][n]['details'] else None)
365
+
366
+ if ab_list['playEvents'][n]['pitchNumber'] == 1:
367
+ strikes.append(0)
368
+ balls.append(0)
369
+ strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
370
+ balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
371
+ outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
372
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
373
+
374
+ else:
375
+ strikes.append(ab_list['playEvents'][n-1]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n-1]['count'] else None)
376
+ balls.append(ab_list['playEvents'][n-1]['count']['balls'] if 'balls' in ab_list['playEvents'][n-1]['count'] else None)
377
+ outs.append(ab_list['playEvents'][n-1]['count']['outs'] if 'outs' in ab_list['playEvents'][n-1]['count'] else None)
378
+
379
+ strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
380
+ balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
381
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
382
+
383
+
384
+ if 'pitchData' in ab_list['playEvents'][n]:
385
+
386
+ start_speed.append(ab_list['playEvents'][n]['pitchData']['startSpeed'] if 'startSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
387
+ end_speed.append(ab_list['playEvents'][n]['pitchData']['endSpeed'] if 'endSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
388
+
389
+ sz_top.append(ab_list['playEvents'][n]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in ab_list['playEvents'][n]['pitchData'] else None)
390
+ sz_bot.append(ab_list['playEvents'][n]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in ab_list['playEvents'][n]['pitchData'] else None)
391
+ x.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x'] if 'x' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
392
+ y.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y'] if 'y' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
393
+
394
+ ax.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aX'] if 'aX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
395
+ ay.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aY'] if 'aY' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
396
+ az.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aZ'] if 'aZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
397
+ pfxx.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxX'] if 'pfxX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
398
+ pfxz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
399
+ px.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pX'] if 'pX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
400
+ pz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pZ'] if 'pZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
401
+ vx0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vX0'] if 'vX0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
402
+ vy0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vY0'] if 'vY0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
403
+ vz0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vZ0'] if 'vZ0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
404
+ x0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x0'] if 'x0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
405
+ y0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y0'] if 'y0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
406
+ z0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['z0'] if 'z0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
407
+
408
+ zone.append(ab_list['playEvents'][n]['pitchData']['zone'] if 'zone' in ab_list['playEvents'][n]['pitchData'] else None)
409
+ type_confidence.append(ab_list['playEvents'][n]['pitchData']['typeConfidence'] if 'typeConfidence' in ab_list['playEvents'][n]['pitchData'] else None)
410
+ plate_time.append(ab_list['playEvents'][n]['pitchData']['plateTime'] if 'plateTime' in ab_list['playEvents'][n]['pitchData'] else None)
411
+ extension.append(ab_list['playEvents'][n]['pitchData']['extension'] if 'extension' in ab_list['playEvents'][n]['pitchData'] else None)
412
+
413
+ if 'breaks' in ab_list['playEvents'][n]['pitchData']:
414
+ spin_rate.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinRate'] if 'spinRate' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
415
+ spin_direction.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
416
+ vb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVertical'] if 'breakVertical' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
417
+ ivb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVerticalInduced'] if 'breakVerticalInduced' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
418
+ hb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakHorizontal'] if 'breakHorizontal' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
419
+
420
+ else:
421
+ start_speed.append(None)
422
+ end_speed.append(None)
423
+
424
+ sz_top.append(None)
425
+ sz_bot.append(None)
426
+ x.append(None)
427
+ y.append(None)
428
+
429
+ ax.append(None)
430
+ ay.append(None)
431
+ az.append(None)
432
+ pfxx.append(None)
433
+ pfxz.append(None)
434
+ px.append(None)
435
+ pz.append(None)
436
+ vx0.append(None)
437
+ vy0.append(None)
438
+ vz0.append(None)
439
+ x0.append(None)
440
+ y0.append(None)
441
+ z0.append(None)
442
+
443
+ zone.append(None)
444
+ type_confidence.append(None)
445
+ plate_time.append(None)
446
+ extension.append(None)
447
+ spin_rate.append(None)
448
+ spin_direction.append(None)
449
+ vb.append(None)
450
+ ivb.append(None)
451
+ hb.append(None)
452
+
453
+
454
+ if 'hitData' in ab_list['playEvents'][n]:
455
+ launch_speed.append(ab_list['playEvents'][n]['hitData']['launchSpeed'] if 'launchSpeed' in ab_list['playEvents'][n]['hitData'] else None)
456
+ launch_angle.append(ab_list['playEvents'][n]['hitData']['launchAngle'] if 'launchAngle' in ab_list['playEvents'][n]['hitData'] else None)
457
+ launch_distance.append(ab_list['playEvents'][n]['hitData']['totalDistance'] if 'totalDistance' in ab_list['playEvents'][n]['hitData'] else None)
458
+ launch_location.append(ab_list['playEvents'][n]['hitData']['location'] if 'location' in ab_list['playEvents'][n]['hitData'] else None)
459
+
460
+ trajectory.append(ab_list['playEvents'][n]['hitData']['trajectory'] if 'trajectory' in ab_list['playEvents'][n]['hitData'] else None)
461
+ hardness.append(ab_list['playEvents'][n]['hitData']['hardness'] if 'hardness' in ab_list['playEvents'][n]['hitData'] else None)
462
+ hit_x.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordX'] if 'coordX' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
463
+ hit_y.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordY'] if 'coordY' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
464
+ else:
465
+ launch_speed.append(None)
466
+ launch_angle.append(None)
467
+ launch_distance.append(None)
468
+ launch_location.append(None)
469
+ trajectory.append(None)
470
+ hardness.append(None)
471
+ hit_x.append(None)
472
+ hit_y.append(None)
473
+
474
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
475
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
476
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
477
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
478
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
479
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
480
+
481
+
482
+
483
+ if n == len(ab_list['playEvents']) - 1 :
484
+
485
+ type_ab.append(data['liveData']['plays']['allPlays'][ab_id]['result']['type'] if 'type' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
486
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'] if 'event' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
487
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'] if 'eventType' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
488
+ rbi.append(data['liveData']['plays']['allPlays'][ab_id]['result']['rbi'] if 'rbi' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
489
+ away_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['awayScore'] if 'awayScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
490
+ home_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['homeScore'] if 'homeScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
491
+ is_out.append(data['liveData']['plays']['allPlays'][ab_id]['result']['isOut'] if 'isOut' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
492
+
493
+ else:
494
+
495
+ type_ab.append(None)
496
+ event.append(None)
497
+ event_type.append(None)
498
+ rbi.append(None)
499
+ away_score.append(None)
500
+ home_score.append(None)
501
+ is_out.append(None)
502
+
503
+ elif ab_list['playEvents'][n]['count']['balls'] == 4:
504
+
505
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'])
506
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'])
507
+
508
+
509
+ game_id.append(data['gamePk'])
510
+ game_date.append(data['gameData']['datetime']['officialDate'])
511
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
512
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else None)
513
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
514
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
515
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else None)
516
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
517
+ if ab_list['about']['isTopInning']:
518
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
519
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
520
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
521
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
522
+ else:
523
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
524
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
525
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
526
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
527
+
528
+ play_description.append(None)
529
+ play_code.append(None)
530
+ in_play.append(None)
531
+ is_strike.append(None)
532
+ is_ball.append(None)
533
+ is_review.append(None)
534
+ pitch_type.append(None)
535
+ pitch_description.append(None)
536
+ inning.append(None)
537
+ strikes.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
538
+ balls.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
539
+ outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
540
+ strikes_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
541
+ balls_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
542
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
543
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
544
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
545
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
546
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
547
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
548
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
549
+
550
+
551
+
552
+ is_swing.append(None)
553
+ is_whiff.append(None)
554
+ start_speed.append(None)
555
+ end_speed.append(None)
556
+ sz_top.append(None)
557
+ sz_bot.append(None)
558
+ x.append(None)
559
+ y.append(None)
560
+ ax.append(None)
561
+ ay.append(None)
562
+ az.append(None)
563
+ pfxx.append(None)
564
+ pfxz.append(None)
565
+ px.append(None)
566
+ pz.append(None)
567
+ vx0.append(None)
568
+ vy0.append(None)
569
+ vz0.append(None)
570
+ x0.append(None)
571
+ y0.append(None)
572
+ z0.append(None)
573
+ zone.append(None)
574
+ type_confidence.append(None)
575
+ plate_time.append(None)
576
+ extension.append(None)
577
+ spin_rate.append(None)
578
+ spin_direction.append(None)
579
+ vb.append(None)
580
+ ivb.append(None)
581
+ hb.append(None)
582
+ launch_speed.append(None)
583
+ launch_angle.append(None)
584
+ launch_distance.append(None)
585
+ launch_location.append(None)
586
+ trajectory.append(None)
587
+ hardness.append(None)
588
+ hit_x.append(None)
589
+ hit_y.append(None)
590
+ type_ab.append(None)
591
+ ab_number.append(None)
592
+
593
+ rbi.append(None)
594
+ away_score.append(None)
595
+ home_score.append(None)
596
+ is_out.append(None)
597
+
598
+ except KeyError:
599
+ print(f"No Data for Game")
600
+
601
+ df = pl.DataFrame(data={
602
+ 'game_id':game_id,
603
+ 'game_date':game_date,
604
+ 'batter_id':batter_id,
605
+ 'batter_name':batter_name,
606
+ 'batter_hand':batter_hand,
607
+ 'batter_team':batter_team,
608
+ 'batter_team_id':batter_team_id,
609
+ 'pitcher_id':pitcher_id,
610
+ 'pitcher_name':pitcher_name,
611
+ 'pitcher_hand':pitcher_hand,
612
+ 'pitcher_team':pitcher_team,
613
+ 'pitcher_team_id':pitcher_team_id,
614
+ 'ab_number':ab_number,
615
+ 'inning':inning,
616
+ 'play_description':play_description,
617
+ 'play_code':play_code,
618
+ 'in_play':in_play,
619
+ 'is_strike':is_strike,
620
+ 'is_swing':is_swing,
621
+ 'is_whiff':is_whiff,
622
+ 'is_out':is_out,
623
+ 'is_ball':is_ball,
624
+ 'is_review':is_review,
625
+ 'pitch_type':pitch_type,
626
+ 'pitch_description':pitch_description,
627
+ 'strikes':strikes,
628
+ 'balls':balls,
629
+ 'outs':outs,
630
+ 'strikes_after':strikes_after,
631
+ 'balls_after':balls_after,
632
+ 'outs_after':outs_after,
633
+ 'start_speed':start_speed,
634
+ 'end_speed':end_speed,
635
+ 'sz_top':sz_top,
636
+ 'sz_bot':sz_bot,
637
+ 'x':x,
638
+ 'y':y,
639
+ 'ax':ax,
640
+ 'ay':ay,
641
+ 'az':az,
642
+ 'pfxx':pfxx,
643
+ 'pfxz':pfxz,
644
+ 'px':px,
645
+ 'pz':pz,
646
+ 'vx0':vx0,
647
+ 'vy0':vy0,
648
+ 'vz0':vz0,
649
+ 'x0':x0,
650
+ 'y0':y0,
651
+ 'z0':z0,
652
+ 'zone':zone,
653
+ 'type_confidence':type_confidence,
654
+ 'plate_time':plate_time,
655
+ 'extension':extension,
656
+ 'spin_rate':spin_rate,
657
+ 'spin_direction':spin_direction,
658
+ 'vb':vb,
659
+ 'ivb':ivb,
660
+ 'hb':hb,
661
+ 'launch_speed':launch_speed,
662
+ 'launch_angle':launch_angle,
663
+ 'launch_distance':launch_distance,
664
+ 'launch_location':launch_location,
665
+ 'trajectory':trajectory,
666
+ 'hardness':hardness,
667
+ 'hit_x':hit_x,
668
+ 'hit_y':hit_y,
669
+ 'index_play':index_play,
670
+ 'play_id':play_id,
671
+ 'start_time':start_time,
672
+ 'end_time':end_time,
673
+ 'is_pitch':is_pitch,
674
+ 'type_type':type_type,
675
+ 'type_ab':type_ab,
676
+ 'event':event,
677
+ 'event_type':event_type,
678
+ 'rbi':rbi,
679
+ 'away_score':away_score,
680
+ 'home_score':home_score,
681
+
682
+ },strict=False
683
+ )
684
+
685
+ return df
686
+
687
+ def get_teams(self):
688
+ """
689
+ Retrieves information about MLB teams from the MLB API and processes it into a Polars DataFrame.
690
+
691
+ Returns:
692
+ - mlb_teams_df (pl.DataFrame): A DataFrame containing team information, including team ID, city, name, franchise, abbreviation, parent organization ID, parent organization name, league ID, and league name.
693
+ """
694
+ # Make API call to retrieve team information
695
+ teams = requests.get(url='https://statsapi.mlb.com/api/v1/teams/').json()
696
+
697
+ # Extract relevant data from the API response
698
+ mlb_teams_city = [x['franchiseName'] if 'franchiseName' in x else None for x in teams['teams']]
699
+ mlb_teams_name = [x['teamName'] if 'franchiseName' in x else None for x in teams['teams']]
700
+ mlb_teams_franchise = [x['name'] if 'franchiseName' in x else None for x in teams['teams']]
701
+ mlb_teams_id = [x['id'] if 'franchiseName' in x else None for x in teams['teams']]
702
+ mlb_teams_abb = [x['abbreviation'] if 'franchiseName' in x else None for x in teams['teams']]
703
+ mlb_teams_parent_id = [x['parentOrgId'] if 'parentOrgId' in x else None for x in teams['teams']]
704
+ mlb_teams_parent = [x['parentOrgName'] if 'parentOrgName' in x else None for x in teams['teams']]
705
+ mlb_teams_league_id = [x['league']['id'] if 'id' in x['league'] else None for x in teams['teams']]
706
+ mlb_teams_league_name = [x['league']['name'] if 'name' in x['league'] else None for x in teams['teams']]
707
+
708
+ # Create a Polars DataFrame with the extracted data
709
+ mlb_teams_df = pl.DataFrame(data={'team_id': mlb_teams_id,
710
+ 'city': mlb_teams_franchise,
711
+ 'name': mlb_teams_name,
712
+ 'franchise': mlb_teams_franchise,
713
+ 'abbreviation': mlb_teams_abb,
714
+ 'parent_org_id': mlb_teams_parent_id,
715
+ 'parent_org': mlb_teams_parent,
716
+ 'league_id': mlb_teams_league_id,
717
+ 'league_name': mlb_teams_league_name
718
+ }).unique().drop_nulls(subset=['team_id']).sort('team_id')
719
+
720
+ # Fill missing parent organization IDs with team IDs
721
+ mlb_teams_df = mlb_teams_df.with_columns(
722
+ pl.when(pl.col('parent_org_id').is_null())
723
+ .then(pl.col('team_id'))
724
+ .otherwise(pl.col('parent_org_id'))
725
+ .alias('parent_org_id')
726
+ )
727
+
728
+ # Fill missing parent organization names with franchise names
729
+ mlb_teams_df = mlb_teams_df.with_columns(
730
+ pl.when(pl.col('parent_org').is_null())
731
+ .then(pl.col('franchise'))
732
+ .otherwise(pl.col('parent_org'))
733
+ .alias('parent_org')
734
+ )
735
+
736
+ # Create a dictionary for mapping team IDs to abbreviations
737
+ abbreviation_dict = mlb_teams_df.select(['team_id', 'abbreviation']).to_dict(as_series=False)
738
+ abbreviation_map = {k: v for k, v in zip(abbreviation_dict['team_id'], abbreviation_dict['abbreviation'])}
739
+
740
+ # Create a DataFrame for parent organization abbreviations
741
+ abbreviation_df = mlb_teams_df.select(['team_id', 'abbreviation']).rename({'team_id': 'parent_org_id', 'abbreviation': 'parent_org_abbreviation'})
742
+
743
+ # Join the parent organization abbreviations with the main DataFrame
744
+ mlb_teams_df = mlb_teams_df.join(abbreviation_df, on='parent_org_id', how='left')
745
+
746
+ return mlb_teams_df
747
+
748
+ def get_leagues(self):
749
+ """
750
+ Retrieves information about MLB leagues from the MLB API and processes it into a Polars DataFrame.
751
+
752
+ Returns:
753
+ - leagues_df (pl.DataFrame): A DataFrame containing league information, including league ID, league name, league abbreviation, and sport ID.
754
+ """
755
+ # Make API call to retrieve league information
756
+ leagues = requests.get(url='https://statsapi.mlb.com/api/v1/leagues/').json()
757
+
758
+ # Extract relevant data from the API response
759
+ sport_id = [x['sport']['id'] if 'sport' in x else None for x in leagues['leagues']]
760
+ league_id = [x['id'] if 'id' in x else None for x in leagues['leagues']]
761
+ league_name = [x['name'] if 'name' in x else None for x in leagues['leagues']]
762
+ league_abbreviation = [x['abbreviation'] if 'abbreviation' in x else None for x in leagues['leagues']]
763
+
764
+ # Create a Polars DataFrame with the extracted data
765
+ leagues_df = pl.DataFrame(data={
766
+ 'league_id': league_id,
767
+ 'league_name': league_name,
768
+ 'league_abbreviation': league_abbreviation,
769
+ 'sport_id': sport_id,
770
+ })
771
+
772
+ return leagues_df
773
+
774
+ def get_player_games_list(self, player_id: int,
775
+ season: int,
776
+ start_date: str = None,
777
+ end_date: str = None,
778
+ sport_id: int = 1,
779
+ game_type: list = ['R'],
780
+ pitching: bool = True):
781
+ """
782
+ Retrieves a list of game IDs for a specific player in a given season.
783
+
784
+ Parameters:
785
+ - player_id (int): The ID of the player.
786
+ - season (int): The season year for which to retrieve the game list.
787
+ - start_date (str): The start date (YYYY-MM-DD) of the range (default is January 1st of the specified season).
788
+ - end_date (str): The end date (YYYY-MM-DD) of the range (default is December 31st of the specified season).
789
+ - sport_id (int): The ID of the sport for which to retrieve player data.
790
+ - game_type (list): A list of game types to filter the schedule. Default is ['R'].
791
+ - pitching (bool): Return pitching games.
792
+
793
+ Returns:
794
+ - player_game_list (list): A list of game IDs in which the player participated during the specified season.
795
+ """
796
+ # Set default start and end dates if not provided
797
+ if not start_date:
798
+ start_date = f'{season}-01-01'
799
+ if not end_date:
800
+ end_date = f'{season}-12-31'
801
+
802
+ # Determine the group based on the pitching flag
803
+ group = 'pitching' if pitching else 'hitting'
804
+
805
+ # Validate date format
806
+ date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}$')
807
+ if not date_pattern.match(start_date):
808
+ raise ValueError(f"start_date {start_date} is not in YYYY-MM-DD format")
809
+ if not date_pattern.match(end_date):
810
+ raise ValueError(f"end_date {end_date} is not in YYYY-MM-DD format")
811
+
812
+ # Convert game type list to a comma-separated string
813
+ game_type_str = ','.join([str(x) for x in game_type])
814
+
815
+ # Make API call to retrieve player game logs
816
+ response = requests.get(url=f'http://statsapi.mlb.com/api/v1/people/{player_id}?hydrate=stats(group={group},type=gameLog,season={season},startDate={start_date},endDate={end_date},sportId={sport_id},gameType=[{game_type_str}]),hydrations').json()
817
+
818
+ # Check if stats are available in the response
819
+ if 'stats' not in response['people'][0]:
820
+ print(f'No {group} games found for player {player_id} in season {season}')
821
+ return []
822
+
823
+ # Extract game IDs from the API response
824
+ player_game_list = [x['game']['gamePk'] for x in response['people'][0]['stats'][0]['splits']]
825
+
826
+ return player_game_list
827
+
828
+ def get_players(self, sport_id: int, season: int, game_type: list = ['R']):
829
+ """
830
+ Retrieves data frame of players in a given league
831
+
832
+ Parameters:
833
+ - sport_id (int): The ID of the sport for which to retrieve player data.
834
+ - season (int): The season year for which to retrieve player data.
835
+ - game_type (list): A list of game types to filter the players. Default is ['R'].
836
+
837
+ Returns:
838
+ - player_df (pl.DataFrame): A DataFrame containing player information, including player ID, name, position, team, and age.
839
+ """
840
+ game_type_str = ','.join([str(x) for x in game_type])
841
+
842
+ # If game type is 'S', fetch data from a different endpoint
843
+ if game_type_str == 'S':
844
+ # Fetch pitcher data
845
+ pitcher_data = requests.get(f'https://bdfed.stitch.mlbinfra.com/bdfed/stats/player?&env=prod&season={season}&sportId=1&stats=season&group=pitching&gameType=S&limit=1000000&offset=0&sortStat=inningsPitched&order=asc').json()
846
+ fullName_list = [x['playerFullName'] for x in pitcher_data['stats']]
847
+ firstName_list = [x['playerFirstName'] for x in pitcher_data['stats']]
848
+ lastName_list = [x['playerLastName'] for x in pitcher_data['stats']]
849
+ id_list = [x['playerId'] for x in pitcher_data['stats']]
850
+ position_list = [x['primaryPositionAbbrev'] for x in pitcher_data['stats']]
851
+ team_list = [x['teamId'] for x in pitcher_data['stats']]
852
+
853
+ df_pitcher = pl.DataFrame(data={
854
+ 'player_id': id_list,
855
+ 'first_name': firstName_list,
856
+ 'last_name': lastName_list,
857
+ 'name': fullName_list,
858
+ 'position': position_list,
859
+ 'team': team_list
860
+ })
861
+
862
+ # Fetch batter data
863
+ batter_data = requests.get(f'https://bdfed.stitch.mlbinfra.com/bdfed/stats/player?&env=prod&season={season}&sportId=1&stats=season&group=hitting&gameType=S&limit=1000000&offset=0').json()
864
+ fullName_list = [x['playerFullName'] for x in batter_data['stats']]
865
+ firstName_list = [x['playerFirstName'] for x in batter_data['stats']]
866
+ lastName_list = [x['playerLastName'] for x in batter_data['stats']]
867
+ id_list = [x['playerId'] for x in batter_data['stats']]
868
+ position_list = [x['primaryPositionAbbrev'] for x in batter_data['stats']]
869
+ team_list = [x['teamId'] for x in batter_data['stats']]
870
+
871
+ df_batter = pl.DataFrame(data={
872
+ 'player_id': id_list,
873
+ 'first_name': firstName_list,
874
+ 'last_name': lastName_list,
875
+ 'name': fullName_list,
876
+ 'position': position_list,
877
+ 'team': team_list
878
+ })
879
+
880
+ # Combine pitcher and batter data
881
+ df = pl.concat([df_pitcher, df_batter]).unique().drop_nulls(subset=['player_id']).sort('player_id')
882
+
883
+ else:
884
+ # Fetch player data for other game types
885
+ player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players?season={season}&gameType=[{game_type_str}]').json()['people']
886
+
887
+ # Extract relevant data
888
+ fullName_list = [x['fullName'] for x in player_data]
889
+ firstName_list = [x['firstName'] for x in player_data]
890
+ lastName_list = [x['lastName'] for x in player_data]
891
+ id_list = [x['id'] for x in player_data]
892
+ position_list = [x['primaryPosition']['abbreviation'] if 'primaryPosition' in x else None for x in player_data]
893
+ team_list = [x['currentTeam']['id'] if 'currentTeam' in x else None for x in player_data]
894
+ weight_list = [x['weight'] if 'weight' in x else None for x in player_data]
895
+ height_list = [x['height'] if 'height' in x else None for x in player_data]
896
+ age_list = [x['currentAge'] if 'currentAge' in x else None for x in player_data]
897
+ birthDate_list = [x['birthDate'] if 'birthDate' in x else None for x in player_data]
898
+
899
+ df = pl.DataFrame(data={
900
+ 'player_id': id_list,
901
+ 'first_name': firstName_list,
902
+ 'last_name': lastName_list,
903
+ 'name': fullName_list,
904
+ 'position': position_list,
905
+ 'team': team_list,
906
+ 'weight': weight_list,
907
+ 'height': height_list,
908
+ 'age': age_list,
909
+ 'birthDate': birthDate_list
910
+ })
911
+
912
+ return df