nesticot commited on
Commit
8c90a6d
·
verified ·
1 Parent(s): add98af

Update api_scraper.py

Browse files
Files changed (1) hide show
  1. api_scraper.py +364 -425
api_scraper.py CHANGED
@@ -6,7 +6,6 @@ from tqdm import tqdm
6
  from pytz import timezone
7
  import re
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
9
- import time
10
 
11
 
12
  class MLB_Scrape:
@@ -100,48 +99,64 @@ class MLB_Scrape:
100
 
101
  # Make API call to retrieve game schedule
102
  game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id_str}&gameTypes={game_type_str}&season={year_input_str}&hydrate=lineup,players').json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- # Extract relevant data from the API response
105
- game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
106
- time_list = [item for sublist in [[y['gameDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
107
- date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
108
- away_team_list = [item for sublist in [[y['teams']['away']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
109
- home_team_list = [item for sublist in [[y['teams']['home']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
110
- state_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
111
- venue_id = [item for sublist in [[y['venue']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
112
- venue_name = [item for sublist in [[y['venue']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
113
-
114
- # Create a Polars DataFrame with the extracted data
115
- game_df = pl.DataFrame(data={'game_id': game_list,
116
- 'time': time_list,
117
- 'date': date_list,
118
- 'away': away_team_list,
119
- 'home': home_team_list,
120
- 'state': state_list,
121
- 'venue_id': venue_id,
122
- 'venue_name': venue_name})
123
-
124
- # Check if the DataFrame is empty
125
- if len(game_df) == 0:
126
- return 'Schedule Length of 0, please select different parameters.'
127
-
128
- # Convert date and time columns to appropriate formats
129
- game_df = game_df.with_columns(
130
- game_df['date'].str.to_date(),
131
- game_df['time'].str.to_datetime().dt.convert_time_zone(eastern.zone).dt.strftime("%I:%M %p"))
132
-
133
- # Remove duplicate games and sort by date
134
- game_df = game_df.unique(subset='game_id').sort('date')
135
-
136
- # Check again if the DataFrame is empty after processing
137
- if len(game_df) == 0:
138
- return 'Schedule Length of 0, please select different parameters.'
139
 
140
  return game_df
141
 
 
142
  def get_data(self, game_list_input: list):
143
  """
144
- Retrieves live game data for a list of game IDs.
145
 
146
  Parameters:
147
  - game_list_input (list): A list of game IDs for which to retrieve live data.
@@ -152,15 +167,14 @@ class MLB_Scrape:
152
  data_total = []
153
  print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
154
 
155
- # Iterate over the list of game IDs with a progress bar
156
- for i in tqdm(range(len(game_list_input)), desc="Processing", unit="iteration"):
157
- # Make a GET request to the MLB API for each game ID
158
- r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_list_input[i]}/feed/live')
159
- # Append the JSON response to the data_total list
160
- data_total.append(r.json())
161
 
162
- return data_total
163
-
 
 
164
 
165
  return data_total
166
 
@@ -263,125 +277,255 @@ class MLB_Scrape:
263
  home_score = []
264
 
265
  for data in data_list:
266
- for ab_id in range(len(data['liveData']['plays']['allPlays'])):
267
- ab_list = data['liveData']['plays']['allPlays'][ab_id]
268
- for n in range(len(ab_list['playEvents'])):
269
-
270
-
271
- if ab_list['playEvents'][n]['isPitch'] == True or 'call' in ab_list['playEvents'][n]['details']:
272
- ab_number.append(ab_list['atBatIndex'] if 'atBatIndex' in ab_list else None)
273
-
274
- game_id.append(data['gamePk'])
275
- game_date.append(data['gameData']['datetime']['officialDate'])
276
- if 'matchup' in ab_list:
277
- batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
278
- if 'batter' in ab_list['matchup']:
279
- batter_name.append(ab_list['matchup']['batter']['fullName'] if 'fullName' in ab_list['matchup']['batter'] else None)
280
- else:
281
- batter_name.append(None)
282
-
283
- batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
284
- pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
285
- if 'pitcher' in ab_list['matchup']:
286
- pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'fullName' in ab_list['matchup']['pitcher'] else None)
287
- else:
288
- pitcher_name.append(None)
289
 
290
- pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
291
-
292
-
293
- if ab_list['about']['isTopInning']:
294
- batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
295
- batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
296
- pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
297
- pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
298
-
299
- else:
300
- batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
301
- batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
302
- pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
303
- pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
304
-
305
- play_description.append(ab_list['playEvents'][n]['details']['description'] if 'description' in ab_list['playEvents'][n]['details'] else None)
306
- play_code.append(ab_list['playEvents'][n]['details']['code'] if 'code' in ab_list['playEvents'][n]['details'] else None)
307
- in_play.append(ab_list['playEvents'][n]['details']['isInPlay'] if 'isInPlay' in ab_list['playEvents'][n]['details'] else None)
308
- is_strike.append(ab_list['playEvents'][n]['details']['isStrike'] if 'isStrike' in ab_list['playEvents'][n]['details'] else None)
309
-
310
- if 'details' in ab_list['playEvents'][n]:
311
- is_swing.append(True if ab_list['playEvents'][n]['details']['code'] in swing_list else None)
312
- is_whiff.append(True if ab_list['playEvents'][n]['details']['code'] in whiff_list else None)
313
- else:
314
- is_swing.append(None)
315
- is_whiff.append(None)
316
-
317
- is_ball.append(ab_list['playEvents'][n]['details']['isOut'] if 'isOut' in ab_list['playEvents'][n]['details'] else None)
318
- is_review.append(ab_list['playEvents'][n]['details']['hasReview'] if 'hasReview' in ab_list['playEvents'][n]['details'] else None)
319
- pitch_type.append(ab_list['playEvents'][n]['details']['type']['code'] if 'type' in ab_list['playEvents'][n]['details'] else None)
320
- pitch_description.append(ab_list['playEvents'][n]['details']['type']['description'] if 'type' in ab_list['playEvents'][n]['details'] else None)
321
-
322
- if ab_list['playEvents'][n]['pitchNumber'] == 1:
323
- strikes.append(0)
324
- balls.append(0)
325
- strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
326
- balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
 
 
328
  outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
329
-
330
- else:
331
- strikes.append(ab_list['playEvents'][n-1]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n-1]['count'] else None)
332
- balls.append(ab_list['playEvents'][n-1]['count']['balls'] if 'balls' in ab_list['playEvents'][n-1]['count'] else None)
333
- outs.append(ab_list['playEvents'][n-1]['count']['outs'] if 'outs' in ab_list['playEvents'][n-1]['count'] else None)
334
-
335
- strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
336
- balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
337
- outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
338
-
339
-
340
- if 'pitchData' in ab_list['playEvents'][n]:
341
-
342
- start_speed.append(ab_list['playEvents'][n]['pitchData']['startSpeed'] if 'startSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
343
- end_speed.append(ab_list['playEvents'][n]['pitchData']['endSpeed'] if 'endSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
344
-
345
- sz_top.append(ab_list['playEvents'][n]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in ab_list['playEvents'][n]['pitchData'] else None)
346
- sz_bot.append(ab_list['playEvents'][n]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in ab_list['playEvents'][n]['pitchData'] else None)
347
- x.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x'] if 'x' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
348
- y.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y'] if 'y' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
349
-
350
- ax.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aX'] if 'aX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
351
- ay.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aY'] if 'aY' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
352
- az.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aZ'] if 'aZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
353
- pfxx.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxX'] if 'pfxX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
354
- pfxz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
355
- px.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pX'] if 'pX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
356
- pz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pZ'] if 'pZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
357
- vx0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vX0'] if 'vX0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
358
- vy0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vY0'] if 'vY0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
359
- vz0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vZ0'] if 'vZ0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
360
- x0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x0'] if 'x0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
361
- y0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y0'] if 'y0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
362
- z0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['z0'] if 'z0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
363
-
364
- zone.append(ab_list['playEvents'][n]['pitchData']['zone'] if 'zone' in ab_list['playEvents'][n]['pitchData'] else None)
365
- type_confidence.append(ab_list['playEvents'][n]['pitchData']['typeConfidence'] if 'typeConfidence' in ab_list['playEvents'][n]['pitchData'] else None)
366
- plate_time.append(ab_list['playEvents'][n]['pitchData']['plateTime'] if 'plateTime' in ab_list['playEvents'][n]['pitchData'] else None)
367
- extension.append(ab_list['playEvents'][n]['pitchData']['extension'] if 'extension' in ab_list['playEvents'][n]['pitchData'] else None)
368
-
369
- if 'breaks' in ab_list['playEvents'][n]['pitchData']:
370
- spin_rate.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinRate'] if 'spinRate' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
371
- spin_direction.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
372
- vb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVertical'] if 'breakVertical' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
373
- ivb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVerticalInduced'] if 'breakVerticalInduced' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
374
- hb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakHorizontal'] if 'breakHorizontal' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
375
-
376
- else:
377
  start_speed.append(None)
378
  end_speed.append(None)
379
-
380
  sz_top.append(None)
381
  sz_bot.append(None)
382
  x.append(None)
383
  y.append(None)
384
-
385
  ax.append(None)
386
  ay.append(None)
387
  az.append(None)
@@ -395,7 +539,6 @@ class MLB_Scrape:
395
  x0.append(None)
396
  y0.append(None)
397
  z0.append(None)
398
-
399
  zone.append(None)
400
  type_confidence.append(None)
401
  plate_time.append(None)
@@ -405,18 +548,6 @@ class MLB_Scrape:
405
  vb.append(None)
406
  ivb.append(None)
407
  hb.append(None)
408
-
409
- if 'hitData' in ab_list['playEvents'][n]:
410
- launch_speed.append(ab_list['playEvents'][n]['hitData']['launchSpeed'] if 'launchSpeed' in ab_list['playEvents'][n]['hitData'] else None)
411
- launch_angle.append(ab_list['playEvents'][n]['hitData']['launchAngle'] if 'launchAngle' in ab_list['playEvents'][n]['hitData'] else None)
412
- launch_distance.append(ab_list['playEvents'][n]['hitData']['totalDistance'] if 'totalDistance' in ab_list['playEvents'][n]['hitData'] else None)
413
- launch_location.append(ab_list['playEvents'][n]['hitData']['location'] if 'location' in ab_list['playEvents'][n]['hitData'] else None)
414
-
415
- trajectory.append(ab_list['playEvents'][n]['hitData']['trajectory'] if 'trajectory' in ab_list['playEvents'][n]['hitData'] else None)
416
- hardness.append(ab_list['playEvents'][n]['hitData']['hardness'] if 'hardness' in ab_list['playEvents'][n]['hitData'] else None)
417
- hit_x.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordX'] if 'coordX' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
418
- hit_y.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordY'] if 'coordY' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
419
- else:
420
  launch_speed.append(None)
421
  launch_angle.append(None)
422
  launch_distance.append(None)
@@ -425,213 +556,17 @@ class MLB_Scrape:
425
  hardness.append(None)
426
  hit_x.append(None)
427
  hit_y.append(None)
428
-
429
- index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
430
- play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
431
- start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
432
- end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
433
- is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
434
- type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
435
-
436
-
437
-
438
- if n == len(ab_list['playEvents']) - 1 :
439
-
440
- type_ab.append(data['liveData']['plays']['allPlays'][ab_id]['result']['type'] if 'type' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
441
- event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'] if 'event' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
442
- event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'] if 'eventType' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
443
- rbi.append(data['liveData']['plays']['allPlays'][ab_id]['result']['rbi'] if 'rbi' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
444
- away_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['awayScore'] if 'awayScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
445
- home_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['homeScore'] if 'homeScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
446
- is_out.append(data['liveData']['plays']['allPlays'][ab_id]['result']['isOut'] if 'isOut' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
447
-
448
- else:
449
-
450
  type_ab.append(None)
451
- event.append(None)
452
- event_type.append(None)
453
  rbi.append(None)
454
  away_score.append(None)
455
  home_score.append(None)
456
  is_out.append(None)
457
 
458
- elif ab_list['playEvents'][n]['count']['balls'] == 4:
459
-
460
- event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'])
461
- event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'])
462
-
463
-
464
- game_id.append(data['gamePk'])
465
- game_date.append(data['gameData']['datetime']['officialDate'])
466
- batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
467
- batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else None)
468
- batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
469
- pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
470
- pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else None)
471
- pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
472
- if ab_list['about']['isTopInning']:
473
- batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
474
- batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
475
- pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
476
- pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
477
- else:
478
- batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
479
- batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
480
- pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
481
- pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
482
-
483
- play_description.append(None)
484
- play_code.append(None)
485
- in_play.append(None)
486
- is_strike.append(None)
487
- is_ball.append(None)
488
- is_review.append(None)
489
- pitch_type.append(None)
490
- pitch_description.append(None)
491
- strikes.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
492
- balls.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
493
- outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
494
- strikes_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
495
- balls_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
496
- outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
497
- index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
498
- play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
499
- start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
500
- end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
501
- is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
502
- type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
503
-
504
-
505
-
506
- is_swing.append(None)
507
- is_whiff.append(None)
508
- start_speed.append(None)
509
- end_speed.append(None)
510
- sz_top.append(None)
511
- sz_bot.append(None)
512
- x.append(None)
513
- y.append(None)
514
- ax.append(None)
515
- ay.append(None)
516
- az.append(None)
517
- pfxx.append(None)
518
- pfxz.append(None)
519
- px.append(None)
520
- pz.append(None)
521
- vx0.append(None)
522
- vy0.append(None)
523
- vz0.append(None)
524
- x0.append(None)
525
- y0.append(None)
526
- z0.append(None)
527
- zone.append(None)
528
- type_confidence.append(None)
529
- plate_time.append(None)
530
- extension.append(None)
531
- spin_rate.append(None)
532
- spin_direction.append(None)
533
- vb.append(None)
534
- ivb.append(None)
535
- hb.append(None)
536
- launch_speed.append(None)
537
- launch_angle.append(None)
538
- launch_distance.append(None)
539
- launch_location.append(None)
540
- trajectory.append(None)
541
- hardness.append(None)
542
- hit_x.append(None)
543
- hit_y.append(None)
544
- type_ab.append(None)
545
- ab_number.append(None)
546
-
547
- rbi.append(None)
548
- away_score.append(None)
549
- home_score.append(None)
550
- is_out.append(None)
551
-
552
- # print({
553
- # 'game_id':len(game_id),
554
- # 'game_date':len(game_date),
555
- # 'batter_id':len(batter_id),
556
- # 'batter_name':len(batter_name),
557
- # 'batter_hand':len(batter_hand),
558
- # 'batter_team':len(batter_team),
559
- # 'batter_team_id':len(batter_team_id),
560
- # 'pitcher_id':len(pitcher_id),
561
- # 'pitcher_name':len(pitcher_name),
562
- # 'pitcher_hand':len(pitcher_hand),
563
- # 'pitcher_team':len(pitcher_team),
564
- # 'pitcher_team_id':len(pitcher_team_id),
565
-
566
- # 'play_description':len(play_description),
567
- # 'play_code':len(play_code),
568
- # 'in_play':len(in_play),
569
- # 'is_strike':len(is_strike),
570
- # 'is_swing':len(is_swing),
571
- # 'is_whiff':len(is_whiff),
572
- # 'is_out':len(is_out),
573
- # 'is_ball':len(is_ball),
574
- # 'is_review':len(is_review),
575
- # 'pitch_type':len(pitch_type),
576
- # 'pitch_description':len(pitch_description),
577
- # 'strikes':len(strikes),
578
- # 'balls':len(balls),
579
- # 'outs':len(outs),
580
- # 'strikes_after':len(strikes_after),
581
- # 'balls_after':len(balls_after),
582
- # 'outs_after':len(outs_after),
583
- # 'start_speed':len(start_speed),
584
- # 'end_speed':len(end_speed),
585
- # 'sz_top':len(sz_top),
586
- # 'sz_bot':len(sz_bot),
587
- # 'x':len(x),
588
- # 'y':len(y),
589
- # 'ax':len(ax),
590
- # 'ay':len(ay),
591
- # 'az':len(az),
592
- # 'pfxx':len(pfxx),
593
- # 'pfxz':len(pfxz),
594
- # 'px':len(px),
595
- # 'pz':len(pz),
596
- # 'vx0':len(vx0),
597
- # 'vy0':len(vy0),
598
- # 'vz0':len(vz0),
599
- # 'x0':len(x0),
600
- # 'y0':len(y0),
601
- # 'z0':len(z0),
602
- # 'zone':len(zone),
603
- # 'type_confidence':len(type_confidence),
604
- # 'plate_time':len(plate_time),
605
- # 'extension':len(extension),
606
- # 'spin_rate':len(spin_rate),
607
- # 'spin_direction':len(spin_direction),
608
- # 'vb':len(vb),
609
- # 'ivb':len(ivb),
610
- # 'hb':len(hb),
611
- # 'launch_speed':len(launch_speed),
612
- # 'launch_angle':len(launch_angle),
613
- # 'launch_distance':len(launch_distance),
614
- # 'launch_location':len(launch_location),
615
- # 'trajectory':len(trajectory),
616
- # 'hardness':len(hardness),
617
- # 'hit_x':len(hit_x),
618
- # 'hit_y':len(hit_y),
619
- # 'index_play':len(index_play),
620
- # 'play_id':len(play_id),
621
- # 'start_time':len(start_time),
622
- # 'end_time':len(end_time),
623
- # 'is_pitch':len(is_pitch),
624
- # 'type_type':len(type_type),
625
- # 'type_ab':len(type_ab),
626
- # 'event':len(event),
627
- # 'event_type':len(event_type),
628
- # 'rbi':len(rbi),
629
- # 'away_score':len(away_score),
630
- # 'home_score':len(home_score),
631
- # }
632
-
633
-
634
- # )
635
  df = pl.DataFrame(data={
636
  'game_id':game_id,
637
  'game_date':game_date,
@@ -717,23 +652,6 @@ class MLB_Scrape:
717
 
718
  return df
719
 
720
- # def get_players(self,sport_id:int):
721
- # player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players').json()
722
-
723
- # #Select relevant data that will help distinguish players from one another
724
- # fullName_list = [x['fullName'] for x in player_data['people']]
725
- # id_list = [x['id'] for x in player_data['people']]
726
- # position_list = [x['primaryPosition']['abbreviation'] for x in player_data['people']]
727
- # team_list = [x['currentTeam']['id']for x in player_data['people']]
728
- # age_list = [x['currentAge']for x in player_data['people']]
729
-
730
- # player_df = pl.DataFrame(data={'player_id':id_list,
731
- # 'name':fullName_list,
732
- # 'position':position_list,
733
- # 'team':team_list,
734
- # 'age':age_list})
735
- # return player_df
736
-
737
  def get_teams(self):
738
  """
739
  Retrieves information about MLB teams from the MLB API and processes it into a Polars DataFrame.
@@ -875,7 +793,6 @@ class MLB_Scrape:
875
 
876
  return player_game_list
877
 
878
-
879
  def get_players(self, sport_id: int, season: int, game_type: list = ['R']):
880
  """
881
  Retrieves data frame of players in a given league
@@ -883,59 +800,81 @@ class MLB_Scrape:
883
  Parameters:
884
  - sport_id (int): The ID of the sport for which to retrieve player data.
885
  - season (int): The season year for which to retrieve player data.
 
886
 
887
  Returns:
888
  - player_df (pl.DataFrame): A DataFrame containing player information, including player ID, name, position, team, and age.
889
  """
890
-
891
  game_type_str = ','.join([str(x) for x in game_type])
892
 
 
893
  if game_type_str == 'S':
894
- player_data = requests.get(f'https://bdfed.stitch.mlbinfra.com/bdfed/stats/player?&env=prod&season={season}&sportId=1&stats=season&group=pitching&gameType=S&limit=1000000&offset=0&sortStat=inningsPitched&order=asc').json()
895
- fullName_list = [x['playerFullName'] if 'playerFullName' in x else None for x in player_data['stats']]
896
- firstName_list = [x['playerFirstName'] if 'playerFirstName' in x else None for x in player_data['stats']]
897
- lastName_list = [x['playerLastName'] if 'playerLastName' in x else None for x in player_data['stats']]
898
- id_list = [x['playerId'] if 'playerId' in x else None for x in player_data['stats']]
899
- position_list = [x['primaryPositionAbbrev'] if 'primaryPositionAbbrev' in x else None for x in player_data['stats']]
900
- team_list = [x['teamId'] if 'teamId' in x else None for x in player_data['stats']]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
 
902
- df = pl.DataFrame(data={'player_id':id_list,
903
- 'first_name':firstName_list,
904
- 'last_name':lastName_list,
905
- 'name':fullName_list,
906
- 'position':position_list,
907
- 'team':team_list})
 
 
 
 
 
908
 
909
  else:
910
- print("DID NOT GET TO SPRING")
911
  player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players?season={season}&gameType=[{game_type_str}]').json()['people']
912
 
913
- #Select relevant data that will help distinguish players from one another
914
-
915
- fullName_list = [x['fullName'] if 'fullName' in x else None for x in player_data]
916
- firstName_list = [x['firstName'] if 'firstName' in x else None for x in player_data]
917
- lastName_list = [x['lastName'] if 'lastName' in x else None for x in player_data]
918
- id_list = [x['id'] if 'id' in x else None for x in player_data]
919
- position_list = [x['primaryPosition']['abbreviation'] if 'primaryPosition' in x and 'abbreviation' in x['primaryPosition'] else None for x in player_data]
920
- team_list = [x['currentTeam']['id'] if 'currentTeam' in x and 'id' in x['currentTeam'] else None for x in player_data]
921
  weight_list = [x['weight'] if 'weight' in x else None for x in player_data]
922
  height_list = [x['height'] if 'height' in x else None for x in player_data]
923
  age_list = [x['currentAge'] if 'currentAge' in x else None for x in player_data]
924
  birthDate_list = [x['birthDate'] if 'birthDate' in x else None for x in player_data]
925
 
926
-
927
-
928
- df = pl.DataFrame(data={'player_id':id_list,
929
- 'first_name':firstName_list,
930
- 'last_name':lastName_list,
931
- 'name':fullName_list,
932
- 'position':position_list,
933
- 'team':team_list,
934
- 'weight':weight_list,
935
- 'height':height_list,
936
- 'age':age_list,
937
- 'birthDate':birthDate_list})
938
-
939
  return df
940
-
941
-
 
6
  from pytz import timezone
7
  import re
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
 
9
 
10
 
11
  class MLB_Scrape:
 
99
 
100
  # Make API call to retrieve game schedule
101
  game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id_str}&gameTypes={game_type_str}&season={year_input_str}&hydrate=lineup,players').json()
102
+ try:
103
+ # Extract relevant data from the API response
104
+ game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
105
+ time_list = [item for sublist in [[y['gameDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
106
+ date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
107
+ away_team_list = [item for sublist in [[y['teams']['away']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
108
+ away_team_id_list = [item for sublist in [[y['teams']['away']['team']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
109
+ home_team_list = [item for sublist in [[y['teams']['home']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
110
+ home_team_id_list = [item for sublist in [[y['teams']['home']['team']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
111
+ state_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
112
+ venue_id = [item for sublist in [[y['venue']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
113
+ venue_name = [item for sublist in [[y['venue']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
114
+ gameday_type = [item for sublist in [[y['gamedayType'] for y in x['games']] for x in game_call['dates']] for item in sublist]
115
+ # Create a Polars DataFrame with the extracted data
116
+
117
+
118
+ # Create a Polars DataFrame with the extracted data
119
+ game_df = pl.DataFrame(data={'game_id': game_list,
120
+ 'time': time_list,
121
+ 'date': date_list,
122
+ 'away': away_team_list,
123
+ 'away_id': away_team_id_list,
124
+ 'home': home_team_list,
125
+ 'home_id': home_team_id_list,
126
+ 'state': state_list,
127
+ 'venue_id': venue_id,
128
+ 'venue_name': venue_name,
129
+ 'gameday_type':gameday_type})
130
 
131
+
132
+ # Check if the DataFrame is empty
133
+ if len(game_df) == 0:
134
+ print('Schedule Length of 0, please select different parameters.')
135
+ return None
136
+
137
+ # Convert date and time columns to appropriate formats
138
+ game_df = game_df.with_columns(
139
+ game_df['date'].str.to_date(),
140
+ game_df['time'].str.to_datetime().dt.convert_time_zone(eastern.zone).dt.strftime("%I:%M %p"))
141
+
142
+ # Remove duplicate games and sort by date
143
+ game_df = game_df.unique(subset='game_id').sort('date')
144
+
145
+ # Check again if the DataFrame is empty after processing
146
+ if len(game_df) == 0:
147
+ print('Schedule Length of 0, please select different parameters.')
148
+ return None
149
+ except KeyError:
150
+ print('No Data for Selected Parameters')
151
+ return None
152
+
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  return game_df
155
 
156
+
157
  def get_data(self, game_list_input: list):
158
  """
159
+ Retrieves live game data for a list of game IDs in parallel.
160
 
161
  Parameters:
162
  - game_list_input (list): A list of game IDs for which to retrieve live data.
 
167
  data_total = []
168
  print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
169
 
170
+ def fetch_data(game_id):
171
+ r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_id}/feed/live')
172
+ return r.json()
 
 
 
173
 
174
+ with ThreadPoolExecutor() as executor:
175
+ futures = {executor.submit(fetch_data, game_id): game_id for game_id in game_list_input}
176
+ for future in tqdm(as_completed(futures), total=len(futures), desc="Processing", unit="iteration"):
177
+ data_total.append(future.result())
178
 
179
  return data_total
180
 
 
277
  home_score = []
278
 
279
  for data in data_list:
280
+ try:
281
+ for ab_id in range(len(data['liveData']['plays']['allPlays'])):
282
+ ab_list = data['liveData']['plays']['allPlays'][ab_id]
283
+ for n in range(len(ab_list['playEvents'])):
284
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
 
286
+ if ab_list['playEvents'][n]['isPitch'] == True or 'call' in ab_list['playEvents'][n]['details']:
287
+ ab_number.append(ab_list['atBatIndex'] if 'atBatIndex' in ab_list else None)
288
+
289
+ game_id.append(data['gamePk'])
290
+ game_date.append(data['gameData']['datetime']['officialDate'])
291
+ if 'matchup' in ab_list:
292
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
293
+ if 'batter' in ab_list['matchup']:
294
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'fullName' in ab_list['matchup']['batter'] else None)
295
+ else:
296
+ batter_name.append(None)
297
+
298
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
299
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
300
+ if 'pitcher' in ab_list['matchup']:
301
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'fullName' in ab_list['matchup']['pitcher'] else None)
302
+ else:
303
+ pitcher_name.append(None)
304
+
305
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
306
+
307
+
308
+ if ab_list['about']['isTopInning']:
309
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
310
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
311
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
312
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
313
+
314
+ else:
315
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
316
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
317
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
318
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
319
+
320
+ play_description.append(ab_list['playEvents'][n]['details']['description'] if 'description' in ab_list['playEvents'][n]['details'] else None)
321
+ play_code.append(ab_list['playEvents'][n]['details']['code'] if 'code' in ab_list['playEvents'][n]['details'] else None)
322
+ in_play.append(ab_list['playEvents'][n]['details']['isInPlay'] if 'isInPlay' in ab_list['playEvents'][n]['details'] else None)
323
+ is_strike.append(ab_list['playEvents'][n]['details']['isStrike'] if 'isStrike' in ab_list['playEvents'][n]['details'] else None)
324
+
325
+ if 'details' in ab_list['playEvents'][n]:
326
+ is_swing.append(True if ab_list['playEvents'][n]['details']['code'] in swing_list else None)
327
+ is_whiff.append(True if ab_list['playEvents'][n]['details']['code'] in whiff_list else None)
328
+ else:
329
+ is_swing.append(None)
330
+ is_whiff.append(None)
331
+
332
+ is_ball.append(ab_list['playEvents'][n]['details']['isOut'] if 'isOut' in ab_list['playEvents'][n]['details'] else None)
333
+ is_review.append(ab_list['playEvents'][n]['details']['hasReview'] if 'hasReview' in ab_list['playEvents'][n]['details'] else None)
334
+ pitch_type.append(ab_list['playEvents'][n]['details']['type']['code'] if 'type' in ab_list['playEvents'][n]['details'] else None)
335
+ pitch_description.append(ab_list['playEvents'][n]['details']['type']['description'] if 'type' in ab_list['playEvents'][n]['details'] else None)
336
+
337
+ if ab_list['playEvents'][n]['pitchNumber'] == 1:
338
+ strikes.append(0)
339
+ balls.append(0)
340
+ strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
341
+ balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
342
+ outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
343
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
344
+
345
+ else:
346
+ strikes.append(ab_list['playEvents'][n-1]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n-1]['count'] else None)
347
+ balls.append(ab_list['playEvents'][n-1]['count']['balls'] if 'balls' in ab_list['playEvents'][n-1]['count'] else None)
348
+ outs.append(ab_list['playEvents'][n-1]['count']['outs'] if 'outs' in ab_list['playEvents'][n-1]['count'] else None)
349
+
350
+ strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
351
+ balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
352
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
353
+
354
+
355
+ if 'pitchData' in ab_list['playEvents'][n]:
356
+
357
+ start_speed.append(ab_list['playEvents'][n]['pitchData']['startSpeed'] if 'startSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
358
+ end_speed.append(ab_list['playEvents'][n]['pitchData']['endSpeed'] if 'endSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
359
+
360
+ sz_top.append(ab_list['playEvents'][n]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in ab_list['playEvents'][n]['pitchData'] else None)
361
+ sz_bot.append(ab_list['playEvents'][n]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in ab_list['playEvents'][n]['pitchData'] else None)
362
+ x.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x'] if 'x' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
363
+ y.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y'] if 'y' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
364
+
365
+ ax.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aX'] if 'aX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
366
+ ay.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aY'] if 'aY' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
367
+ az.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aZ'] if 'aZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
368
+ pfxx.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxX'] if 'pfxX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
369
+ pfxz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
370
+ px.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pX'] if 'pX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
371
+ pz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pZ'] if 'pZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
372
+ vx0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vX0'] if 'vX0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
373
+ vy0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vY0'] if 'vY0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
374
+ vz0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vZ0'] if 'vZ0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
375
+ x0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x0'] if 'x0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
376
+ y0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y0'] if 'y0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
377
+ z0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['z0'] if 'z0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
378
+
379
+ zone.append(ab_list['playEvents'][n]['pitchData']['zone'] if 'zone' in ab_list['playEvents'][n]['pitchData'] else None)
380
+ type_confidence.append(ab_list['playEvents'][n]['pitchData']['typeConfidence'] if 'typeConfidence' in ab_list['playEvents'][n]['pitchData'] else None)
381
+ plate_time.append(ab_list['playEvents'][n]['pitchData']['plateTime'] if 'plateTime' in ab_list['playEvents'][n]['pitchData'] else None)
382
+ extension.append(ab_list['playEvents'][n]['pitchData']['extension'] if 'extension' in ab_list['playEvents'][n]['pitchData'] else None)
383
+
384
+ if 'breaks' in ab_list['playEvents'][n]['pitchData']:
385
+ spin_rate.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinRate'] if 'spinRate' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
386
+ spin_direction.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
387
+ vb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVertical'] if 'breakVertical' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
388
+ ivb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVerticalInduced'] if 'breakVerticalInduced' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
389
+ hb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakHorizontal'] if 'breakHorizontal' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
390
+
391
+ else:
392
+ start_speed.append(None)
393
+ end_speed.append(None)
394
+
395
+ sz_top.append(None)
396
+ sz_bot.append(None)
397
+ x.append(None)
398
+ y.append(None)
399
+
400
+ ax.append(None)
401
+ ay.append(None)
402
+ az.append(None)
403
+ pfxx.append(None)
404
+ pfxz.append(None)
405
+ px.append(None)
406
+ pz.append(None)
407
+ vx0.append(None)
408
+ vy0.append(None)
409
+ vz0.append(None)
410
+ x0.append(None)
411
+ y0.append(None)
412
+ z0.append(None)
413
+
414
+ zone.append(None)
415
+ type_confidence.append(None)
416
+ plate_time.append(None)
417
+ extension.append(None)
418
+ spin_rate.append(None)
419
+ spin_direction.append(None)
420
+ vb.append(None)
421
+ ivb.append(None)
422
+ hb.append(None)
423
+
424
+ if 'hitData' in ab_list['playEvents'][n]:
425
+ launch_speed.append(ab_list['playEvents'][n]['hitData']['launchSpeed'] if 'launchSpeed' in ab_list['playEvents'][n]['hitData'] else None)
426
+ launch_angle.append(ab_list['playEvents'][n]['hitData']['launchAngle'] if 'launchAngle' in ab_list['playEvents'][n]['hitData'] else None)
427
+ launch_distance.append(ab_list['playEvents'][n]['hitData']['totalDistance'] if 'totalDistance' in ab_list['playEvents'][n]['hitData'] else None)
428
+ launch_location.append(ab_list['playEvents'][n]['hitData']['location'] if 'location' in ab_list['playEvents'][n]['hitData'] else None)
429
+
430
+ trajectory.append(ab_list['playEvents'][n]['hitData']['trajectory'] if 'trajectory' in ab_list['playEvents'][n]['hitData'] else None)
431
+ hardness.append(ab_list['playEvents'][n]['hitData']['hardness'] if 'hardness' in ab_list['playEvents'][n]['hitData'] else None)
432
+ hit_x.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordX'] if 'coordX' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
433
+ hit_y.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordY'] if 'coordY' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
434
+ else:
435
+ launch_speed.append(None)
436
+ launch_angle.append(None)
437
+ launch_distance.append(None)
438
+ launch_location.append(None)
439
+ trajectory.append(None)
440
+ hardness.append(None)
441
+ hit_x.append(None)
442
+ hit_y.append(None)
443
+
444
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
445
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
446
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
447
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
448
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
449
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
450
+
451
+
452
+
453
+ if n == len(ab_list['playEvents']) - 1 :
454
+
455
+ type_ab.append(data['liveData']['plays']['allPlays'][ab_id]['result']['type'] if 'type' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
456
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'] if 'event' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
457
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'] if 'eventType' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
458
+ rbi.append(data['liveData']['plays']['allPlays'][ab_id]['result']['rbi'] if 'rbi' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
459
+ away_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['awayScore'] if 'awayScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
460
+ home_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['homeScore'] if 'homeScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
461
+ is_out.append(data['liveData']['plays']['allPlays'][ab_id]['result']['isOut'] if 'isOut' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
462
+
463
+ else:
464
+
465
+ type_ab.append(None)
466
+ event.append(None)
467
+ event_type.append(None)
468
+ rbi.append(None)
469
+ away_score.append(None)
470
+ home_score.append(None)
471
+ is_out.append(None)
472
+
473
+ elif ab_list['playEvents'][n]['count']['balls'] == 4:
474
+
475
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'])
476
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'])
477
+
478
+
479
+ game_id.append(data['gamePk'])
480
+ game_date.append(data['gameData']['datetime']['officialDate'])
481
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
482
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else None)
483
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
484
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
485
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else None)
486
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
487
+ if ab_list['about']['isTopInning']:
488
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
489
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
490
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
491
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
492
+ else:
493
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
494
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
495
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
496
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
497
+
498
+ play_description.append(None)
499
+ play_code.append(None)
500
+ in_play.append(None)
501
+ is_strike.append(None)
502
+ is_ball.append(None)
503
+ is_review.append(None)
504
+ pitch_type.append(None)
505
+ pitch_description.append(None)
506
+ strikes.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
507
+ balls.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
508
  outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
509
+ strikes_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
510
+ balls_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
511
  outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
512
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
513
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
514
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
515
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
516
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
517
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
518
+
519
+
520
+
521
+ is_swing.append(None)
522
+ is_whiff.append(None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  start_speed.append(None)
524
  end_speed.append(None)
 
525
  sz_top.append(None)
526
  sz_bot.append(None)
527
  x.append(None)
528
  y.append(None)
 
529
  ax.append(None)
530
  ay.append(None)
531
  az.append(None)
 
539
  x0.append(None)
540
  y0.append(None)
541
  z0.append(None)
 
542
  zone.append(None)
543
  type_confidence.append(None)
544
  plate_time.append(None)
 
548
  vb.append(None)
549
  ivb.append(None)
550
  hb.append(None)
 
 
 
 
 
 
 
 
 
 
 
 
551
  launch_speed.append(None)
552
  launch_angle.append(None)
553
  launch_distance.append(None)
 
556
  hardness.append(None)
557
  hit_x.append(None)
558
  hit_y.append(None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
  type_ab.append(None)
560
+ ab_number.append(None)
561
+
562
  rbi.append(None)
563
  away_score.append(None)
564
  home_score.append(None)
565
  is_out.append(None)
566
 
567
+ except KeyError:
568
+ print(f"No Data for Game")
569
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  df = pl.DataFrame(data={
571
  'game_id':game_id,
572
  'game_date':game_date,
 
652
 
653
  return df
654
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  def get_teams(self):
656
  """
657
  Retrieves information about MLB teams from the MLB API and processes it into a Polars DataFrame.
 
793
 
794
  return player_game_list
795
 
 
796
  def get_players(self, sport_id: int, season: int, game_type: list = ['R']):
797
  """
798
  Retrieves data frame of players in a given league
 
800
  Parameters:
801
  - sport_id (int): The ID of the sport for which to retrieve player data.
802
  - season (int): The season year for which to retrieve player data.
803
+ - game_type (list): A list of game types to filter the players. Default is ['R'].
804
 
805
  Returns:
806
  - player_df (pl.DataFrame): A DataFrame containing player information, including player ID, name, position, team, and age.
807
  """
 
808
  game_type_str = ','.join([str(x) for x in game_type])
809
 
810
+ # If game type is 'S', fetch data from a different endpoint
811
  if game_type_str == 'S':
812
+ # Fetch pitcher data
813
+ pitcher_data = requests.get(f'https://bdfed.stitch.mlbinfra.com/bdfed/stats/player?&env=prod&season={season}&sportId=1&stats=season&group=pitching&gameType=S&limit=1000000&offset=0&sortStat=inningsPitched&order=asc').json()
814
+ fullName_list = [x['playerFullName'] for x in pitcher_data['stats']]
815
+ firstName_list = [x['playerFirstName'] for x in pitcher_data['stats']]
816
+ lastName_list = [x['playerLastName'] for x in pitcher_data['stats']]
817
+ id_list = [x['playerId'] for x in pitcher_data['stats']]
818
+ position_list = [x['primaryPositionAbbrev'] for x in pitcher_data['stats']]
819
+ team_list = [x['teamId'] for x in pitcher_data['stats']]
820
+
821
+ df_pitcher = pl.DataFrame(data={
822
+ 'player_id': id_list,
823
+ 'first_name': firstName_list,
824
+ 'last_name': lastName_list,
825
+ 'name': fullName_list,
826
+ 'position': position_list,
827
+ 'team': team_list
828
+ })
829
+
830
+ # Fetch batter data
831
+ batter_data = requests.get(f'https://bdfed.stitch.mlbinfra.com/bdfed/stats/player?&env=prod&season={season}&sportId=1&stats=season&group=hitting&gameType=S&limit=1000000&offset=0').json()
832
+ fullName_list = [x['playerFullName'] for x in batter_data['stats']]
833
+ firstName_list = [x['playerFirstName'] for x in batter_data['stats']]
834
+ lastName_list = [x['playerLastName'] for x in batter_data['stats']]
835
+ id_list = [x['playerId'] for x in batter_data['stats']]
836
+ position_list = [x['primaryPositionAbbrev'] for x in batter_data['stats']]
837
+ team_list = [x['teamId'] for x in batter_data['stats']]
838
 
839
+ df_batter = pl.DataFrame(data={
840
+ 'player_id': id_list,
841
+ 'first_name': firstName_list,
842
+ 'last_name': lastName_list,
843
+ 'name': fullName_list,
844
+ 'position': position_list,
845
+ 'team': team_list
846
+ })
847
+
848
+ # Combine pitcher and batter data
849
+ df = pl.concat([df_pitcher, df_batter]).unique().drop_nulls(subset=['player_id']).sort('player_id')
850
 
851
  else:
852
+ # Fetch player data for other game types
853
  player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players?season={season}&gameType=[{game_type_str}]').json()['people']
854
 
855
+ # Extract relevant data
856
+ fullName_list = [x['fullName'] for x in player_data]
857
+ firstName_list = [x['firstName'] for x in player_data]
858
+ lastName_list = [x['lastName'] for x in player_data]
859
+ id_list = [x['id'] for x in player_data]
860
+ position_list = [x['primaryPosition']['abbreviation'] if 'primaryPosition' in x else None for x in player_data]
861
+ team_list = [x['currentTeam']['id'] if 'currentTeam' in x else None for x in player_data]
 
862
  weight_list = [x['weight'] if 'weight' in x else None for x in player_data]
863
  height_list = [x['height'] if 'height' in x else None for x in player_data]
864
  age_list = [x['currentAge'] if 'currentAge' in x else None for x in player_data]
865
  birthDate_list = [x['birthDate'] if 'birthDate' in x else None for x in player_data]
866
 
867
+ df = pl.DataFrame(data={
868
+ 'player_id': id_list,
869
+ 'first_name': firstName_list,
870
+ 'last_name': lastName_list,
871
+ 'name': fullName_list,
872
+ 'position': position_list,
873
+ 'team': team_list,
874
+ 'weight': weight_list,
875
+ 'height': height_list,
876
+ 'age': age_list,
877
+ 'birthDate': birthDate_list
878
+ })
879
+
880
  return df