nesticot commited on
Commit
075cdf1
·
verified ·
1 Parent(s): d565096

Upload 3 files

Browse files
Files changed (3) hide show
  1. api_scraper.py +902 -0
  2. app.py +293 -162
  3. requirements.txt +12 -12
api_scraper.py ADDED
@@ -0,0 +1,902 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import polars as pl
3
+ import numpy as np
4
+ from datetime import datetime
5
+ from tqdm import tqdm
6
+ from pytz import timezone
7
+ import re
8
+
9
+
10
+ class MLB_Scrape:
11
+
12
+ def __init__(self):
13
+ # Initialize your class here if needed
14
+ pass
15
+
16
+ def get_sport_id(self):
17
+ """
18
+ Retrieves the list of sports from the MLB API and processes it into a Polars DataFrame.
19
+
20
+ Returns:
21
+ - df (pl.DataFrame): A DataFrame containing the sports information.
22
+ """
23
+ # Make API call to retrieve sports information
24
+ response = requests.get(url='https://statsapi.mlb.com/api/v1/sports').json()
25
+
26
+ # Convert the JSON response into a Polars DataFrame
27
+ df = pl.DataFrame(response['sports'])
28
+
29
+ return df
30
+
31
+ def get_sport_id_check(self, sport_id: int = 1):
32
+ """
33
+ Checks if the provided sport ID exists in the list of sports retrieved from the MLB API.
34
+
35
+ Parameters:
36
+ - sport_id (int): The sport ID to check. Default is 1.
37
+
38
+ Returns:
39
+ - bool: True if the sport ID exists, False otherwise. If False, prints the available sport IDs.
40
+ """
41
+ # Retrieve the list of sports from the MLB API
42
+ sport_id_df = self.get_sport_id()
43
+
44
+ # Check if the provided sport ID exists in the DataFrame
45
+ if sport_id not in sport_id_df['id']:
46
+ print('Please Select a New Sport ID from the following')
47
+ print(sport_id_df)
48
+ return False
49
+
50
+ return True
51
+
52
+
53
+ def get_game_types(self):
54
+ """
55
+ Retrieves the different types of MLB games from the MLB API and processes them into a Polars DataFrame.
56
+
57
+ Returns:
58
+ - df (pl.DataFrame): A DataFrame containing the game types information.
59
+ """
60
+ # Make API call to retrieve game types information
61
+ response = requests.get(url='https://statsapi.mlb.com/api/v1/gameTypes').json()
62
+
63
+ # Convert the JSON response into a Polars DataFrame
64
+ df = pl.DataFrame(response)
65
+
66
+ return df
67
+
68
+ def get_schedule(self,
69
+ year_input: list = [2024],
70
+ sport_id: list = [1],
71
+ game_type: list = ['R']):
72
+
73
+ """
74
+ Retrieves the schedule of baseball games based on the specified parameters.
75
+ Parameters:
76
+ - year_input (list): A list of years to filter the schedule. Default is [2024].
77
+ - sport_id (list): A list of sport IDs to filter the schedule. Default is [1].
78
+ - game_type (list): A list of game types to filter the schedule. Default is ['R'].
79
+ Returns:
80
+ - game_df (pandas.DataFrame): A DataFrame containing the game schedule information, including game ID, date, time, away team, home team, game state, venue ID, and venue name. If the schedule length is 0, it returns a message indicating that different parameters should be selected.
81
+ """
82
+
83
+ # Type checks
84
+ if not isinstance(year_input, list) or not all(isinstance(year, int) for year in year_input):
85
+ raise ValueError("year_input must be a list of integers.")
86
+ if not isinstance(sport_id, list) or not all(isinstance(sid, int) for sid in sport_id):
87
+ raise ValueError("sport_id must be a list of integers.")
88
+
89
+ if not isinstance(game_type, list) or not all(isinstance(gt, str) for gt in game_type):
90
+ raise ValueError("game_type must be a list of strings.")
91
+
92
+ eastern = timezone('US/Eastern')
93
+
94
+ # Convert input lists to comma-separated strings
95
+ year_input_str = ','.join([str(x) for x in year_input])
96
+ sport_id_str = ','.join([str(x) for x in sport_id])
97
+ game_type_str = ','.join([str(x) for x in game_type])
98
+
99
+ # Make API call to retrieve game schedule
100
+ game_call = requests.get(url=f'https://statsapi.mlb.com/api/v1/schedule/?sportId={sport_id_str}&gameTypes={game_type_str}&season={year_input_str}&hydrate=lineup,players').json()
101
+
102
+ # Extract relevant data from the API response
103
+ game_list = [item for sublist in [[y['gamePk'] for y in x['games']] for x in game_call['dates']] for item in sublist]
104
+ time_list = [item for sublist in [[y['gameDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
105
+ date_list = [item for sublist in [[y['officialDate'] for y in x['games']] for x in game_call['dates']] for item in sublist]
106
+ away_team_list = [item for sublist in [[y['teams']['away']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
107
+ home_team_list = [item for sublist in [[y['teams']['home']['team']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
108
+ state_list = [item for sublist in [[y['status']['codedGameState'] for y in x['games']] for x in game_call['dates']] for item in sublist]
109
+ venue_id = [item for sublist in [[y['venue']['id'] for y in x['games']] for x in game_call['dates']] for item in sublist]
110
+ venue_name = [item for sublist in [[y['venue']['name'] for y in x['games']] for x in game_call['dates']] for item in sublist]
111
+
112
+ # Create a Polars DataFrame with the extracted data
113
+ game_df = pl.DataFrame(data={'game_id': game_list,
114
+ 'time': time_list,
115
+ 'date': date_list,
116
+ 'away': away_team_list,
117
+ 'home': home_team_list,
118
+ 'state': state_list,
119
+ 'venue_id': venue_id,
120
+ 'venue_name': venue_name})
121
+
122
+ # Check if the DataFrame is empty
123
+ if len(game_df) == 0:
124
+ return 'Schedule Length of 0, please select different parameters.'
125
+
126
+ # Convert date and time columns to appropriate formats
127
+ game_df = game_df.with_columns(
128
+ game_df['date'].str.to_date(),
129
+ game_df['time'].str.to_datetime().dt.convert_time_zone(eastern.zone).dt.strftime("%I:%M %p"))
130
+
131
+ # Remove duplicate games and sort by date
132
+ game_df = game_df.unique(subset='game_id').sort('date')
133
+
134
+ # Check again if the DataFrame is empty after processing
135
+ if len(game_df) == 0:
136
+ return 'Schedule Length of 0, please select different parameters.'
137
+
138
+ return game_df
139
+
140
+
141
+ def get_data(self, game_list_input: list):
142
+ """
143
+ Retrieves live game data for a list of game IDs.
144
+
145
+ Parameters:
146
+ - game_list_input (list): A list of game IDs for which to retrieve live data.
147
+
148
+ Returns:
149
+ - data_total (list): A list of JSON responses containing live game data for each game ID.
150
+ """
151
+ data_total = []
152
+ print('This May Take a While. Progress Bar shows Completion of Data Retrieval.')
153
+
154
+ # Iterate over the list of game IDs with a progress bar
155
+ for i in tqdm(range(len(game_list_input)), desc="Processing", unit="iteration"):
156
+ # Make a GET request to the MLB API for each game ID
157
+ r = requests.get(f'https://statsapi.mlb.com/api/v1.1/game/{game_list_input[i]}/feed/live')
158
+ # Append the JSON response to the data_total list
159
+ data_total.append(r.json())
160
+
161
+ return data_total
162
+
163
+ def get_data_df(self, data_list):
164
+ """
165
+ Converts a list of game data JSON objects into a Polars DataFrame.
166
+
167
+ Parameters:
168
+ - data_list (list): A list of JSON objects containing game data.
169
+
170
+ Returns:
171
+ - data_df (pl.DataFrame): A DataFrame containing the structured game data.
172
+ """
173
+ swing_list = ['X','F','S','D','E','T','W']
174
+ whiff_list = ['S','T','W']
175
+ print('Converting Data to Dataframe.')
176
+ game_id = []
177
+ game_date = []
178
+ batter_id = []
179
+ batter_name = []
180
+ batter_hand = []
181
+ batter_team = []
182
+ batter_team_id = []
183
+ pitcher_id = []
184
+ pitcher_name = []
185
+ pitcher_hand = []
186
+ pitcher_team = []
187
+ pitcher_team_id = []
188
+
189
+ play_description = []
190
+ play_code = []
191
+ in_play = []
192
+ is_strike = []
193
+ is_swing = []
194
+ is_whiff = []
195
+ is_out = []
196
+ is_ball = []
197
+ is_review = []
198
+ pitch_type = []
199
+ pitch_description = []
200
+ strikes = []
201
+ balls = []
202
+ outs = []
203
+ strikes_after = []
204
+ balls_after = []
205
+ outs_after = []
206
+
207
+ start_speed = []
208
+ end_speed = []
209
+ sz_top = []
210
+ sz_bot = []
211
+ x = []
212
+ y = []
213
+ ax = []
214
+ ay = []
215
+ az = []
216
+ pfxx = []
217
+ pfxz = []
218
+ px = []
219
+ pz = []
220
+ vx0 = []
221
+ vy0 = []
222
+ vz0 = []
223
+ x0 = []
224
+ y0 = []
225
+ z0 = []
226
+ zone = []
227
+ type_confidence = []
228
+ plate_time = []
229
+ extension = []
230
+ spin_rate = []
231
+ spin_direction = []
232
+ vb = []
233
+ ivb = []
234
+ hb = []
235
+
236
+ launch_speed = []
237
+ launch_angle = []
238
+ launch_distance = []
239
+ launch_location = []
240
+ trajectory = []
241
+ hardness = []
242
+ hit_x = []
243
+ hit_y = []
244
+
245
+ index_play = []
246
+ play_id = []
247
+ start_time = []
248
+ end_time = []
249
+ is_pitch = []
250
+ type_type = []
251
+
252
+
253
+ type_ab = []
254
+ ab_number = []
255
+ event = []
256
+ event_type = []
257
+ rbi = []
258
+ away_score = []
259
+ home_score = []
260
+
261
+ for data in data_list:
262
+ for ab_id in range(len(data['liveData']['plays']['allPlays'])):
263
+ ab_list = data['liveData']['plays']['allPlays'][ab_id]
264
+ for n in range(len(ab_list['playEvents'])):
265
+
266
+
267
+ if ab_list['playEvents'][n]['isPitch'] == True or 'call' in ab_list['playEvents'][n]['details']:
268
+ ab_number.append(ab_list['atBatIndex'] if 'atBatIndex' in ab_list else None)
269
+
270
+ game_id.append(data['gamePk'])
271
+ game_date.append(data['gameData']['datetime']['officialDate'])
272
+ if 'matchup' in ab_list:
273
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
274
+ if 'batter' in ab_list['matchup']:
275
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'fullName' in ab_list['matchup']['batter'] else None)
276
+ else:
277
+ batter_name.append(None)
278
+
279
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
280
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
281
+ if 'pitcher' in ab_list['matchup']:
282
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'fullName' in ab_list['matchup']['pitcher'] else None)
283
+ else:
284
+ pitcher_name.append(None)
285
+
286
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
287
+
288
+
289
+ if ab_list['about']['isTopInning']:
290
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
291
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
292
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
293
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
294
+
295
+ else:
296
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
297
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
298
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
299
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
300
+
301
+ play_description.append(ab_list['playEvents'][n]['details']['description'] if 'description' in ab_list['playEvents'][n]['details'] else None)
302
+ play_code.append(ab_list['playEvents'][n]['details']['code'] if 'code' in ab_list['playEvents'][n]['details'] else None)
303
+ in_play.append(ab_list['playEvents'][n]['details']['isInPlay'] if 'isInPlay' in ab_list['playEvents'][n]['details'] else None)
304
+ is_strike.append(ab_list['playEvents'][n]['details']['isStrike'] if 'isStrike' in ab_list['playEvents'][n]['details'] else None)
305
+
306
+ if 'details' in ab_list['playEvents'][n]:
307
+ is_swing.append(True if ab_list['playEvents'][n]['details']['code'] in swing_list else None)
308
+ is_whiff.append(True if ab_list['playEvents'][n]['details']['code'] in whiff_list else None)
309
+ else:
310
+ is_swing.append(None)
311
+ is_whiff.append(None)
312
+
313
+ is_ball.append(ab_list['playEvents'][n]['details']['isOut'] if 'isOut' in ab_list['playEvents'][n]['details'] else None)
314
+ is_review.append(ab_list['playEvents'][n]['details']['hasReview'] if 'hasReview' in ab_list['playEvents'][n]['details'] else None)
315
+ pitch_type.append(ab_list['playEvents'][n]['details']['type']['code'] if 'type' in ab_list['playEvents'][n]['details'] else None)
316
+ pitch_description.append(ab_list['playEvents'][n]['details']['type']['description'] if 'type' in ab_list['playEvents'][n]['details'] else None)
317
+
318
+ if ab_list['playEvents'][n]['pitchNumber'] == 1:
319
+ strikes.append(0)
320
+ balls.append(0)
321
+ strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
322
+ balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
323
+ outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
324
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
325
+
326
+ else:
327
+ strikes.append(ab_list['playEvents'][n-1]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n-1]['count'] else None)
328
+ balls.append(ab_list['playEvents'][n-1]['count']['balls'] if 'balls' in ab_list['playEvents'][n-1]['count'] else None)
329
+ outs.append(ab_list['playEvents'][n-1]['count']['outs'] if 'outs' in ab_list['playEvents'][n-1]['count'] else None)
330
+
331
+ strikes_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
332
+ balls_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
333
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
334
+
335
+
336
+ if 'pitchData' in ab_list['playEvents'][n]:
337
+
338
+ start_speed.append(ab_list['playEvents'][n]['pitchData']['startSpeed'] if 'startSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
339
+ end_speed.append(ab_list['playEvents'][n]['pitchData']['endSpeed'] if 'endSpeed' in ab_list['playEvents'][n]['pitchData'] else None)
340
+
341
+ sz_top.append(ab_list['playEvents'][n]['pitchData']['strikeZoneTop'] if 'strikeZoneTop' in ab_list['playEvents'][n]['pitchData'] else None)
342
+ sz_bot.append(ab_list['playEvents'][n]['pitchData']['strikeZoneBottom'] if 'strikeZoneBottom' in ab_list['playEvents'][n]['pitchData'] else None)
343
+ x.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x'] if 'x' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
344
+ y.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y'] if 'y' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
345
+
346
+ ax.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aX'] if 'aX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
347
+ ay.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aY'] if 'aY' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
348
+ az.append(ab_list['playEvents'][n]['pitchData']['coordinates']['aZ'] if 'aZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
349
+ pfxx.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxX'] if 'pfxX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
350
+ pfxz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pfxZ'] if 'pfxZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
351
+ px.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pX'] if 'pX' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
352
+ pz.append(ab_list['playEvents'][n]['pitchData']['coordinates']['pZ'] if 'pZ' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
353
+ vx0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vX0'] if 'vX0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
354
+ vy0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vY0'] if 'vY0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
355
+ vz0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['vZ0'] if 'vZ0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
356
+ x0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['x0'] if 'x0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
357
+ y0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['y0'] if 'y0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
358
+ z0.append(ab_list['playEvents'][n]['pitchData']['coordinates']['z0'] if 'z0' in ab_list['playEvents'][n]['pitchData']['coordinates'] else None)
359
+
360
+ zone.append(ab_list['playEvents'][n]['pitchData']['zone'] if 'zone' in ab_list['playEvents'][n]['pitchData'] else None)
361
+ type_confidence.append(ab_list['playEvents'][n]['pitchData']['typeConfidence'] if 'typeConfidence' in ab_list['playEvents'][n]['pitchData'] else None)
362
+ plate_time.append(ab_list['playEvents'][n]['pitchData']['plateTime'] if 'plateTime' in ab_list['playEvents'][n]['pitchData'] else None)
363
+ extension.append(ab_list['playEvents'][n]['pitchData']['extension'] if 'extension' in ab_list['playEvents'][n]['pitchData'] else None)
364
+
365
+ if 'breaks' in ab_list['playEvents'][n]['pitchData']:
366
+ spin_rate.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinRate'] if 'spinRate' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
367
+ spin_direction.append(ab_list['playEvents'][n]['pitchData']['breaks']['spinDirection'] if 'spinDirection' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
368
+ vb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVertical'] if 'breakVertical' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
369
+ ivb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakVerticalInduced'] if 'breakVerticalInduced' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
370
+ hb.append(ab_list['playEvents'][n]['pitchData']['breaks']['breakHorizontal'] if 'breakHorizontal' in ab_list['playEvents'][n]['pitchData']['breaks'] else None)
371
+
372
+ else:
373
+ start_speed.append(None)
374
+ end_speed.append(None)
375
+
376
+ sz_top.append(None)
377
+ sz_bot.append(None)
378
+ x.append(None)
379
+ y.append(None)
380
+
381
+ ax.append(None)
382
+ ay.append(None)
383
+ az.append(None)
384
+ pfxx.append(None)
385
+ pfxz.append(None)
386
+ px.append(None)
387
+ pz.append(None)
388
+ vx0.append(None)
389
+ vy0.append(None)
390
+ vz0.append(None)
391
+ x0.append(None)
392
+ y0.append(None)
393
+ z0.append(None)
394
+
395
+ zone.append(None)
396
+ type_confidence.append(None)
397
+ plate_time.append(None)
398
+ extension.append(None)
399
+ spin_rate.append(None)
400
+ spin_direction.append(None)
401
+ vb.append(None)
402
+ ivb.append(None)
403
+ hb.append(None)
404
+
405
+ if 'hitData' in ab_list['playEvents'][n]:
406
+ launch_speed.append(ab_list['playEvents'][n]['hitData']['launchSpeed'] if 'launchSpeed' in ab_list['playEvents'][n]['hitData'] else None)
407
+ launch_angle.append(ab_list['playEvents'][n]['hitData']['launchAngle'] if 'launchAngle' in ab_list['playEvents'][n]['hitData'] else None)
408
+ launch_distance.append(ab_list['playEvents'][n]['hitData']['totalDistance'] if 'totalDistance' in ab_list['playEvents'][n]['hitData'] else None)
409
+ launch_location.append(ab_list['playEvents'][n]['hitData']['location'] if 'location' in ab_list['playEvents'][n]['hitData'] else None)
410
+
411
+ trajectory.append(ab_list['playEvents'][n]['hitData']['trajectory'] if 'trajectory' in ab_list['playEvents'][n]['hitData'] else None)
412
+ hardness.append(ab_list['playEvents'][n]['hitData']['hardness'] if 'hardness' in ab_list['playEvents'][n]['hitData'] else None)
413
+ hit_x.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordX'] if 'coordX' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
414
+ hit_y.append(ab_list['playEvents'][n]['hitData']['coordinates']['coordY'] if 'coordY' in ab_list['playEvents'][n]['hitData']['coordinates'] else None)
415
+ else:
416
+ launch_speed.append(None)
417
+ launch_angle.append(None)
418
+ launch_distance.append(None)
419
+ launch_location.append(None)
420
+ trajectory.append(None)
421
+ hardness.append(None)
422
+ hit_x.append(None)
423
+ hit_y.append(None)
424
+
425
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
426
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
427
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
428
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
429
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
430
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
431
+
432
+
433
+
434
+ if n == len(ab_list['playEvents']) - 1 :
435
+
436
+ type_ab.append(data['liveData']['plays']['allPlays'][ab_id]['result']['type'] if 'type' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
437
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'] if 'event' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
438
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'] if 'eventType' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
439
+ rbi.append(data['liveData']['plays']['allPlays'][ab_id]['result']['rbi'] if 'rbi' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
440
+ away_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['awayScore'] if 'awayScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
441
+ home_score.append(data['liveData']['plays']['allPlays'][ab_id]['result']['homeScore'] if 'homeScore' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
442
+ is_out.append(data['liveData']['plays']['allPlays'][ab_id]['result']['isOut'] if 'isOut' in data['liveData']['plays']['allPlays'][ab_id]['result'] else None)
443
+
444
+ else:
445
+
446
+ type_ab.append(None)
447
+ event.append(None)
448
+ event_type.append(None)
449
+ rbi.append(None)
450
+ away_score.append(None)
451
+ home_score.append(None)
452
+ is_out.append(None)
453
+
454
+ elif ab_list['playEvents'][n]['count']['balls'] == 4:
455
+
456
+ event.append(data['liveData']['plays']['allPlays'][ab_id]['result']['event'])
457
+ event_type.append(data['liveData']['plays']['allPlays'][ab_id]['result']['eventType'])
458
+
459
+
460
+ game_id.append(data['gamePk'])
461
+ game_date.append(data['gameData']['datetime']['officialDate'])
462
+ batter_id.append(ab_list['matchup']['batter']['id'] if 'batter' in ab_list['matchup'] else None)
463
+ batter_name.append(ab_list['matchup']['batter']['fullName'] if 'batter' in ab_list['matchup'] else None)
464
+ batter_hand.append(ab_list['matchup']['batSide']['code'] if 'batSide' in ab_list['matchup'] else None)
465
+ pitcher_id.append(ab_list['matchup']['pitcher']['id'] if 'pitcher' in ab_list['matchup'] else None)
466
+ pitcher_name.append(ab_list['matchup']['pitcher']['fullName'] if 'pitcher' in ab_list['matchup'] else None)
467
+ pitcher_hand.append(ab_list['matchup']['pitchHand']['code'] if 'pitchHand' in ab_list['matchup'] else None)
468
+ if ab_list['about']['isTopInning']:
469
+ batter_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
470
+ batter_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
471
+ pitcher_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
472
+ pitcher_team_id.append(data['gameData']['teams']['away']['id'] if 'away' in data['gameData']['teams'] else None)
473
+ else:
474
+ batter_team.append(data['gameData']['teams']['home']['abbreviation'] if 'home' in data['gameData']['teams'] else None)
475
+ batter_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
476
+ pitcher_team.append(data['gameData']['teams']['away']['abbreviation'] if 'away' in data['gameData']['teams'] else None)
477
+ pitcher_team_id.append(data['gameData']['teams']['home']['id'] if 'home' in data['gameData']['teams'] else None)
478
+
479
+ play_description.append(None)
480
+ play_code.append(None)
481
+ in_play.append(None)
482
+ is_strike.append(None)
483
+ is_ball.append(None)
484
+ is_review.append(None)
485
+ pitch_type.append(None)
486
+ pitch_description.append(None)
487
+ strikes.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
488
+ balls.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
489
+ outs.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
490
+ strikes_after.append(ab_list['playEvents'][n]['count']['balls'] if 'balls' in ab_list['playEvents'][n]['count'] else None)
491
+ balls_after.append(ab_list['playEvents'][n]['count']['strikes'] if 'strikes' in ab_list['playEvents'][n]['count'] else None)
492
+ outs_after.append(ab_list['playEvents'][n]['count']['outs'] if 'outs' in ab_list['playEvents'][n]['count'] else None)
493
+ index_play.append(ab_list['playEvents'][n]['index'] if 'index' in ab_list['playEvents'][n] else None)
494
+ play_id.append(ab_list['playEvents'][n]['playId'] if 'playId' in ab_list['playEvents'][n] else None)
495
+ start_time.append(ab_list['playEvents'][n]['startTime'] if 'startTime' in ab_list['playEvents'][n] else None)
496
+ end_time.append(ab_list['playEvents'][n]['endTime'] if 'endTime' in ab_list['playEvents'][n] else None)
497
+ is_pitch.append(ab_list['playEvents'][n]['isPitch'] if 'isPitch' in ab_list['playEvents'][n] else None)
498
+ type_type.append(ab_list['playEvents'][n]['type'] if 'type' in ab_list['playEvents'][n] else None)
499
+
500
+
501
+
502
+ is_swing.append(None)
503
+ is_whiff.append(None)
504
+ start_speed.append(None)
505
+ end_speed.append(None)
506
+ sz_top.append(None)
507
+ sz_bot.append(None)
508
+ x.append(None)
509
+ y.append(None)
510
+ ax.append(None)
511
+ ay.append(None)
512
+ az.append(None)
513
+ pfxx.append(None)
514
+ pfxz.append(None)
515
+ px.append(None)
516
+ pz.append(None)
517
+ vx0.append(None)
518
+ vy0.append(None)
519
+ vz0.append(None)
520
+ x0.append(None)
521
+ y0.append(None)
522
+ z0.append(None)
523
+ zone.append(None)
524
+ type_confidence.append(None)
525
+ plate_time.append(None)
526
+ extension.append(None)
527
+ spin_rate.append(None)
528
+ spin_direction.append(None)
529
+ vb.append(None)
530
+ ivb.append(None)
531
+ hb.append(None)
532
+ launch_speed.append(None)
533
+ launch_angle.append(None)
534
+ launch_distance.append(None)
535
+ launch_location.append(None)
536
+ trajectory.append(None)
537
+ hardness.append(None)
538
+ hit_x.append(None)
539
+ hit_y.append(None)
540
+ type_ab.append(None)
541
+ ab_number.append(None)
542
+
543
+ rbi.append(None)
544
+ away_score.append(None)
545
+ home_score.append(None)
546
+ is_out.append(None)
547
+
548
+ # print({
549
+ # 'game_id':len(game_id),
550
+ # 'game_date':len(game_date),
551
+ # 'batter_id':len(batter_id),
552
+ # 'batter_name':len(batter_name),
553
+ # 'batter_hand':len(batter_hand),
554
+ # 'batter_team':len(batter_team),
555
+ # 'batter_team_id':len(batter_team_id),
556
+ # 'pitcher_id':len(pitcher_id),
557
+ # 'pitcher_name':len(pitcher_name),
558
+ # 'pitcher_hand':len(pitcher_hand),
559
+ # 'pitcher_team':len(pitcher_team),
560
+ # 'pitcher_team_id':len(pitcher_team_id),
561
+
562
+ # 'play_description':len(play_description),
563
+ # 'play_code':len(play_code),
564
+ # 'in_play':len(in_play),
565
+ # 'is_strike':len(is_strike),
566
+ # 'is_swing':len(is_swing),
567
+ # 'is_whiff':len(is_whiff),
568
+ # 'is_out':len(is_out),
569
+ # 'is_ball':len(is_ball),
570
+ # 'is_review':len(is_review),
571
+ # 'pitch_type':len(pitch_type),
572
+ # 'pitch_description':len(pitch_description),
573
+ # 'strikes':len(strikes),
574
+ # 'balls':len(balls),
575
+ # 'outs':len(outs),
576
+ # 'strikes_after':len(strikes_after),
577
+ # 'balls_after':len(balls_after),
578
+ # 'outs_after':len(outs_after),
579
+ # 'start_speed':len(start_speed),
580
+ # 'end_speed':len(end_speed),
581
+ # 'sz_top':len(sz_top),
582
+ # 'sz_bot':len(sz_bot),
583
+ # 'x':len(x),
584
+ # 'y':len(y),
585
+ # 'ax':len(ax),
586
+ # 'ay':len(ay),
587
+ # 'az':len(az),
588
+ # 'pfxx':len(pfxx),
589
+ # 'pfxz':len(pfxz),
590
+ # 'px':len(px),
591
+ # 'pz':len(pz),
592
+ # 'vx0':len(vx0),
593
+ # 'vy0':len(vy0),
594
+ # 'vz0':len(vz0),
595
+ # 'x0':len(x0),
596
+ # 'y0':len(y0),
597
+ # 'z0':len(z0),
598
+ # 'zone':len(zone),
599
+ # 'type_confidence':len(type_confidence),
600
+ # 'plate_time':len(plate_time),
601
+ # 'extension':len(extension),
602
+ # 'spin_rate':len(spin_rate),
603
+ # 'spin_direction':len(spin_direction),
604
+ # 'vb':len(vb),
605
+ # 'ivb':len(ivb),
606
+ # 'hb':len(hb),
607
+ # 'launch_speed':len(launch_speed),
608
+ # 'launch_angle':len(launch_angle),
609
+ # 'launch_distance':len(launch_distance),
610
+ # 'launch_location':len(launch_location),
611
+ # 'trajectory':len(trajectory),
612
+ # 'hardness':len(hardness),
613
+ # 'hit_x':len(hit_x),
614
+ # 'hit_y':len(hit_y),
615
+ # 'index_play':len(index_play),
616
+ # 'play_id':len(play_id),
617
+ # 'start_time':len(start_time),
618
+ # 'end_time':len(end_time),
619
+ # 'is_pitch':len(is_pitch),
620
+ # 'type_type':len(type_type),
621
+ # 'type_ab':len(type_ab),
622
+ # 'event':len(event),
623
+ # 'event_type':len(event_type),
624
+ # 'rbi':len(rbi),
625
+ # 'away_score':len(away_score),
626
+ # 'home_score':len(home_score),
627
+ # }
628
+
629
+
630
+ # )
631
+ df = pl.DataFrame(data={
632
+ 'game_id':game_id,
633
+ 'game_date':game_date,
634
+ 'batter_id':batter_id,
635
+ 'batter_name':batter_name,
636
+ 'batter_hand':batter_hand,
637
+ 'batter_team':batter_team,
638
+ 'batter_team_id':batter_team_id,
639
+ 'pitcher_id':pitcher_id,
640
+ 'pitcher_name':pitcher_name,
641
+ 'pitcher_hand':pitcher_hand,
642
+ 'pitcher_team':pitcher_team,
643
+ 'pitcher_team_id':pitcher_team_id,
644
+ 'ab_number':ab_number,
645
+ 'play_description':play_description,
646
+ 'play_code':play_code,
647
+ 'in_play':in_play,
648
+ 'is_strike':is_strike,
649
+ 'is_swing':is_swing,
650
+ 'is_whiff':is_whiff,
651
+ 'is_out':is_out,
652
+ 'is_ball':is_ball,
653
+ 'is_review':is_review,
654
+ 'pitch_type':pitch_type,
655
+ 'pitch_description':pitch_description,
656
+ 'strikes':strikes,
657
+ 'balls':balls,
658
+ 'outs':outs,
659
+ 'strikes_after':strikes_after,
660
+ 'balls_after':balls_after,
661
+ 'outs_after':outs_after,
662
+ 'start_speed':start_speed,
663
+ 'end_speed':end_speed,
664
+ 'sz_top':sz_top,
665
+ 'sz_bot':sz_bot,
666
+ 'x':x,
667
+ 'y':y,
668
+ 'ax':ax,
669
+ 'ay':ay,
670
+ 'az':az,
671
+ 'pfxx':pfxx,
672
+ 'pfxz':pfxz,
673
+ 'px':px,
674
+ 'pz':pz,
675
+ 'vx0':vx0,
676
+ 'vy0':vy0,
677
+ 'vz0':vz0,
678
+ 'x0':x0,
679
+ 'y0':y0,
680
+ 'z0':z0,
681
+ 'zone':zone,
682
+ 'type_confidence':type_confidence,
683
+ 'plate_time':plate_time,
684
+ 'extension':extension,
685
+ 'spin_rate':spin_rate,
686
+ 'spin_direction':spin_direction,
687
+ 'vb':vb,
688
+ 'ivb':ivb,
689
+ 'hb':hb,
690
+ 'launch_speed':launch_speed,
691
+ 'launch_angle':launch_angle,
692
+ 'launch_distance':launch_distance,
693
+ 'launch_location':launch_location,
694
+ 'trajectory':trajectory,
695
+ 'hardness':hardness,
696
+ 'hit_x':hit_x,
697
+ 'hit_y':hit_y,
698
+ 'index_play':index_play,
699
+ 'play_id':play_id,
700
+ 'start_time':start_time,
701
+ 'end_time':end_time,
702
+ 'is_pitch':is_pitch,
703
+ 'type_type':type_type,
704
+ 'type_ab':type_ab,
705
+ 'event':event,
706
+ 'event_type':event_type,
707
+ 'rbi':rbi,
708
+ 'away_score':away_score,
709
+ 'home_score':home_score,
710
+
711
+ },strict=False
712
+ )
713
+
714
+ return df
715
+
716
+ # def get_players(self,sport_id:int):
717
+ # player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players').json()
718
+
719
+ # #Select relevant data that will help distinguish players from one another
720
+ # fullName_list = [x['fullName'] for x in player_data['people']]
721
+ # id_list = [x['id'] for x in player_data['people']]
722
+ # position_list = [x['primaryPosition']['abbreviation'] for x in player_data['people']]
723
+ # team_list = [x['currentTeam']['id']for x in player_data['people']]
724
+ # age_list = [x['currentAge']for x in player_data['people']]
725
+
726
+ # player_df = pl.DataFrame(data={'player_id':id_list,
727
+ # 'name':fullName_list,
728
+ # 'position':position_list,
729
+ # 'team':team_list,
730
+ # 'age':age_list})
731
+ # return player_df
732
+
733
+ def get_teams(self):
734
+ """
735
+ Retrieves information about MLB teams from the MLB API and processes it into a Polars DataFrame.
736
+
737
+ Returns:
738
+ - mlb_teams_df (pl.DataFrame): A DataFrame containing team information, including team ID, city, name, franchise, abbreviation, parent organization ID, parent organization name, league ID, and league name.
739
+ """
740
+ # Make API call to retrieve team information
741
+ teams = requests.get(url='https://statsapi.mlb.com/api/v1/teams/').json()
742
+
743
+ # Extract relevant data from the API response
744
+ mlb_teams_city = [x['franchiseName'] if 'franchiseName' in x else None for x in teams['teams']]
745
+ mlb_teams_name = [x['teamName'] if 'franchiseName' in x else None for x in teams['teams']]
746
+ mlb_teams_franchise = [x['name'] if 'franchiseName' in x else None for x in teams['teams']]
747
+ mlb_teams_id = [x['id'] if 'franchiseName' in x else None for x in teams['teams']]
748
+ mlb_teams_abb = [x['abbreviation'] if 'franchiseName' in x else None for x in teams['teams']]
749
+ mlb_teams_parent_id = [x['parentOrgId'] if 'parentOrgId' in x else None for x in teams['teams']]
750
+ mlb_teams_parent = [x['parentOrgName'] if 'parentOrgName' in x else None for x in teams['teams']]
751
+ mlb_teams_league_id = [x['league']['id'] if 'id' in x['league'] else None for x in teams['teams']]
752
+ mlb_teams_league_name = [x['league']['name'] if 'name' in x['league'] else None for x in teams['teams']]
753
+
754
+ # Create a Polars DataFrame with the extracted data
755
+ mlb_teams_df = pl.DataFrame(data={'team_id': mlb_teams_id,
756
+ 'city': mlb_teams_franchise,
757
+ 'name': mlb_teams_name,
758
+ 'franchise': mlb_teams_franchise,
759
+ 'abbreviation': mlb_teams_abb,
760
+ 'parent_org_id': mlb_teams_parent_id,
761
+ 'parent_org': mlb_teams_parent,
762
+ 'league_id': mlb_teams_league_id,
763
+ 'league_name': mlb_teams_league_name
764
+ }).unique().drop_nulls(subset=['team_id']).sort('team_id')
765
+
766
+ # Fill missing parent organization IDs with team IDs
767
+ mlb_teams_df = mlb_teams_df.with_columns(
768
+ pl.when(pl.col('parent_org_id').is_null())
769
+ .then(pl.col('team_id'))
770
+ .otherwise(pl.col('parent_org_id'))
771
+ .alias('parent_org_id')
772
+ )
773
+
774
+ # Fill missing parent organization names with franchise names
775
+ mlb_teams_df = mlb_teams_df.with_columns(
776
+ pl.when(pl.col('parent_org').is_null())
777
+ .then(pl.col('franchise'))
778
+ .otherwise(pl.col('parent_org'))
779
+ .alias('parent_org')
780
+ )
781
+
782
+ # Create a dictionary for mapping team IDs to abbreviations
783
+ abbreviation_dict = mlb_teams_df.select(['team_id', 'abbreviation']).to_dict(as_series=False)
784
+ abbreviation_map = {k: v for k, v in zip(abbreviation_dict['team_id'], abbreviation_dict['abbreviation'])}
785
+
786
+ # Create a DataFrame for parent organization abbreviations
787
+ abbreviation_df = mlb_teams_df.select(['team_id', 'abbreviation']).rename({'team_id': 'parent_org_id', 'abbreviation': 'parent_org_abbreviation'})
788
+
789
+ # Join the parent organization abbreviations with the main DataFrame
790
+ mlb_teams_df = mlb_teams_df.join(abbreviation_df, on='parent_org_id', how='left')
791
+
792
+ return mlb_teams_df
793
+
794
+ def get_leagues(self):
795
+ """
796
+ Retrieves information about MLB leagues from the MLB API and processes it into a Polars DataFrame.
797
+
798
+ Returns:
799
+ - leagues_df (pl.DataFrame): A DataFrame containing league information, including league ID, league name, league abbreviation, and sport ID.
800
+ """
801
+ # Make API call to retrieve league information
802
+ leagues = requests.get(url='https://statsapi.mlb.com/api/v1/leagues/').json()
803
+
804
+ # Extract relevant data from the API response
805
+ sport_id = [x['sport']['id'] if 'sport' in x else None for x in leagues['leagues']]
806
+ league_id = [x['id'] if 'id' in x else None for x in leagues['leagues']]
807
+ league_name = [x['name'] if 'name' in x else None for x in leagues['leagues']]
808
+ league_abbreviation = [x['abbreviation'] if 'abbreviation' in x else None for x in leagues['leagues']]
809
+
810
+ # Create a Polars DataFrame with the extracted data
811
+ leagues_df = pl.DataFrame(data={
812
+ 'league_id': league_id,
813
+ 'league_name': league_name,
814
+ 'league_abbreviation': league_abbreviation,
815
+ 'sport_id': sport_id,
816
+ })
817
+
818
+ return leagues_df
819
+
820
+ def get_player_games_list(self, player_id: int, season: int, start_date: str = None, end_date: str = None, sport_id: int = 1, game_type: list = ['R']):
821
+ """
822
+ Retrieves a list of game IDs for a specific player in a given season.
823
+
824
+ Parameters:
825
+ - player_id (int): The ID of the player.
826
+ - season (int): The season year for which to retrieve the game list.
827
+ - start_date (str): The start date (YYYY-MM-DD) of the range (default is January 1st of the specified season).
828
+ - end_date (str): The end date (YYYY-MM-DD) of the range (default is December 31st of the specified season).
829
+ - sport_id (int): The ID of the sport for which to retrieve player data.
830
+ - game_type (list): A list of game types to filter the schedule. Default is ['R'].
831
+
832
+ Returns:
833
+ - player_game_list (list): A list of game IDs in which the player participated during the specified season.
834
+ """
835
+ # Set default start and end dates if not provided
836
+
837
+ if not start_date:
838
+ start_date = f'{season}-01-01'
839
+ if not end_date:
840
+ end_date = f'{season}-12-31'
841
+
842
+
843
+
844
+ # Validate date format
845
+ date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}$')
846
+ if not date_pattern.match(start_date):
847
+ raise ValueError(f"start_date {start_date} is not in YYYY-MM-DD format")
848
+ if not date_pattern.match(end_date):
849
+ raise ValueError(f"end_date {end_date} is not in YYYY-MM-DD format")
850
+
851
+ game_type_str = ','.join([str(x) for x in game_type])
852
+
853
+ # Make API call to retrieve player game logs
854
+ response = requests.get(url=f'http://statsapi.mlb.com/api/v1/people/{player_id}?hydrate=stats(type=gameLog,season={season},startDate={start_date},endDate={end_date},sportId={sport_id},gameType=[{game_type_str}]),hydrations').json()
855
+ print(f'http://statsapi.mlb.com/api/v1/people/{player_id}?hydrate=stats(type=gameLog,season={season},startDate={start_date},endDate={end_date},sportId={sport_id},gameType=[{game_type_str}]),hydrations')
856
+ # Extract game IDs from the API response
857
+ player_game_list = [x['game']['gamePk'] for x in response['people'][0]['stats'][0]['splits']]
858
+
859
+ return player_game_list
860
+
861
+
862
+ def get_players(self, sport_id: int, season: int):
863
+ """
864
+ Retrieves data frame of players in a given league
865
+
866
+ Parameters:
867
+ - sport_id (int): The ID of the sport for which to retrieve player data.
868
+ - season (int): The season year for which to retrieve player data.
869
+
870
+ Returns:
871
+ - player_df (pl.DataFrame): A DataFrame containing player information, including player ID, name, position, team, and age.
872
+ """
873
+
874
+ player_data = requests.get(url=f'https://statsapi.mlb.com/api/v1/sports/{sport_id}/players?season={season}').json()
875
+
876
+ #Select relevant data that will help distinguish players from one another
877
+
878
+ fullName_list = [x['fullName'] if 'fullName' in x else None for x in player_data['people']]
879
+ firstName_list = [x['firstName'] if 'firstName' in x else None for x in player_data['people']]
880
+ lastName_list = [x['lastName'] if 'lastName' in x else None for x in player_data['people']]
881
+ id_list = [x['id'] if 'id' in x else None for x in player_data['people']]
882
+ position_list = [x['primaryPosition']['abbreviation'] if 'primaryPosition' in x and 'abbreviation' in x['primaryPosition'] else None for x in player_data['people']]
883
+ team_list = [x['currentTeam']['id'] if 'currentTeam' in x and 'id' in x['currentTeam'] else None for x in player_data['people']]
884
+ weight_list = [x['weight'] if 'weight' in x else None for x in player_data['people']]
885
+ height_list = [x['height'] if 'height' in x else None for x in player_data['people']]
886
+ age_list = [x['currentAge'] if 'currentAge' in x else None for x in player_data['people']]
887
+ birthDate_list = [x['birthDate'] if 'birthDate' in x else None for x in player_data['people']]
888
+
889
+
890
+ df = pl.DataFrame(data={'player_id':id_list,
891
+ 'first_name':firstName_list,
892
+ 'last_name':lastName_list,
893
+ 'name':fullName_list,
894
+ 'position':position_list,
895
+ 'team':team_list,
896
+ 'weight':weight_list,
897
+ 'height':height_list,
898
+ 'age':age_list,
899
+ 'birthDate':birthDate_list})
900
+
901
+ return df
902
+
app.py CHANGED
@@ -1,162 +1,293 @@
1
- import faicons as fa
2
- import plotly.express as px
3
-
4
- # Load data and compute static values
5
- from shared import app_dir, tips
6
- from shinywidgets import render_plotly
7
-
8
- from shiny import reactive, render
9
- from shiny.express import input, ui
10
-
11
- bill_rng = (min(tips.total_bill), max(tips.total_bill))
12
-
13
- # Add page title and sidebar
14
- ui.page_opts(title="Restaurant tipping", fillable=True)
15
-
16
- with ui.sidebar(open="desktop"):
17
- ui.input_slider(
18
- "total_bill",
19
- "Bill amount",
20
- min=bill_rng[0],
21
- max=bill_rng[1],
22
- value=bill_rng,
23
- pre="$",
24
- )
25
- ui.input_checkbox_group(
26
- "time",
27
- "Food service",
28
- ["Lunch", "Dinner"],
29
- selected=["Lunch", "Dinner"],
30
- inline=True,
31
- )
32
- ui.input_action_button("reset", "Reset filter")
33
-
34
- # Add main content
35
- ICONS = {
36
- "user": fa.icon_svg("user", "regular"),
37
- "wallet": fa.icon_svg("wallet"),
38
- "currency-dollar": fa.icon_svg("dollar-sign"),
39
- "ellipsis": fa.icon_svg("ellipsis"),
40
- }
41
-
42
- with ui.layout_columns(fill=False):
43
- with ui.value_box(showcase=ICONS["user"]):
44
- "Total tippers"
45
-
46
- @render.express
47
- def total_tippers():
48
- tips_data().shape[0]
49
-
50
- with ui.value_box(showcase=ICONS["wallet"]):
51
- "Average tip"
52
-
53
- @render.express
54
- def average_tip():
55
- d = tips_data()
56
- if d.shape[0] > 0:
57
- perc = d.tip / d.total_bill
58
- f"{perc.mean():.1%}"
59
-
60
- with ui.value_box(showcase=ICONS["currency-dollar"]):
61
- "Average bill"
62
-
63
- @render.express
64
- def average_bill():
65
- d = tips_data()
66
- if d.shape[0] > 0:
67
- bill = d.total_bill.mean()
68
- f"${bill:.2f}"
69
-
70
-
71
- with ui.layout_columns(col_widths=[6, 6, 12]):
72
- with ui.card(full_screen=True):
73
- ui.card_header("Tips data")
74
-
75
- @render.data_frame
76
- def table():
77
- return render.DataGrid(tips_data())
78
-
79
- with ui.card(full_screen=True):
80
- with ui.card_header(class_="d-flex justify-content-between align-items-center"):
81
- "Total bill vs tip"
82
- with ui.popover(title="Add a color variable", placement="top"):
83
- ICONS["ellipsis"]
84
- ui.input_radio_buttons(
85
- "scatter_color",
86
- None,
87
- ["none", "sex", "smoker", "day", "time"],
88
- inline=True,
89
- )
90
-
91
- @render_plotly
92
- def scatterplot():
93
- color = input.scatter_color()
94
- return px.scatter(
95
- tips_data(),
96
- x="total_bill",
97
- y="tip",
98
- color=None if color == "none" else color,
99
- trendline="lowess",
100
- )
101
-
102
- with ui.card(full_screen=True):
103
- with ui.card_header(class_="d-flex justify-content-between align-items-center"):
104
- "Tip percentages"
105
- with ui.popover(title="Add a color variable"):
106
- ICONS["ellipsis"]
107
- ui.input_radio_buttons(
108
- "tip_perc_y",
109
- "Split by:",
110
- ["sex", "smoker", "day", "time"],
111
- selected="day",
112
- inline=True,
113
- )
114
-
115
- @render_plotly
116
- def tip_perc():
117
- from ridgeplot import ridgeplot
118
-
119
- dat = tips_data()
120
- dat["percent"] = dat.tip / dat.total_bill
121
- yvar = input.tip_perc_y()
122
- uvals = dat[yvar].unique()
123
-
124
- samples = [[dat.percent[dat[yvar] == val]] for val in uvals]
125
-
126
- plt = ridgeplot(
127
- samples=samples,
128
- labels=uvals,
129
- bandwidth=0.01,
130
- colorscale="viridis",
131
- colormode="row-index",
132
- )
133
-
134
- plt.update_layout(
135
- legend=dict(
136
- orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5
137
- )
138
- )
139
-
140
- return plt
141
-
142
-
143
- ui.include_css(app_dir / "styles.css")
144
-
145
- # --------------------------------------------------------
146
- # Reactive calculations and effects
147
- # --------------------------------------------------------
148
-
149
-
150
- @reactive.calc
151
- def tips_data():
152
- bill = input.total_bill()
153
- idx1 = tips.total_bill.between(bill[0], bill[1])
154
- idx2 = tips.time.isin(input.time())
155
- return tips[idx1 & idx2]
156
-
157
-
158
- @reactive.effect
159
- @reactive.event(input.reset)
160
- def _():
161
- ui.update_slider("total_bill", value=bill_rng)
162
- ui.update_checkbox_group("time", selected=["Lunch", "Dinner"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+ import numpy as np
3
+ import pandas as pd
4
+ import api_scraper
5
+ scrape = api_scraper.MLB_Scrape()
6
+ from functions import df_update
7
+ from functions import pitch_summary_functions
8
+ update = df_update.df_update()
9
+ from stuff_model import feature_engineering as fe
10
+ from stuff_model import stuff_apply
11
+ import requests
12
+ import joblib
13
+ from matplotlib.gridspec import GridSpec
14
+ from shiny import App, reactive, ui, render
15
+ from shiny.ui import h2, tags
16
+ import matplotlib.pyplot as plt
17
+ import matplotlib.gridspec as gridspec
18
+ import seaborn as sns
19
+ from functions.pitch_summary_functions import *
20
+
21
+ colour_palette = ['#FFB000','#648FFF','#785EF0',
22
+ '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
23
+
24
+
25
+ year_list = [2017,2018,2019,2020,2021,2022,2023,2024]
26
+
27
+
28
+
29
+ level_dict = {'1':'MLB',
30
+ '11':'AAA',
31
+ '12':'AA',
32
+ '13':'A+',
33
+ '14':'A',
34
+ '17':'AFL',
35
+ '22':'College',
36
+ '21':'Prospects',
37
+ '51':'International' }
38
+
39
+ function_dict={
40
+ 'velocity_kdes':'Velocity Distributions',
41
+ 'break_plot':'Pitch Movement',
42
+ 'tj_stuff_roling':'Rolling tjStuff+ by Pitch',
43
+ 'tj_stuff_roling_game':'Rolling tjStuff+ by Game',
44
+ 'location_plot_lhb':'Locations vs LHB',
45
+ 'location_plot_rhb':'Locations vs RHB',
46
+ }
47
+
48
+
49
+ split_dict = {'all':'All',
50
+ 'left':'LHH',
51
+ 'right':'RHH'}
52
+
53
+ split_dict_hand = {'all':['L','R'],
54
+ 'left':['L'],
55
+ 'right':['R']}
56
+
57
+ from shiny import App, reactive, ui, render
58
+ from shiny.ui import h2, tags
59
+
60
+ # Define the UI layout for the app
61
+ app_ui = ui.page_fluid(
62
+ ui.layout_sidebar(
63
+ ui.panel_sidebar(
64
+ # Row for selecting season and level
65
+ ui.row(
66
+ ui.column(6, ui.input_select('year_input', 'Select Season', year_list, selected=2024)),
67
+ ui.column(6, ui.input_select('level_input', 'Select Level', level_dict))
68
+ ),
69
+ # Row for the action button to get player list
70
+ ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")),
71
+ # Row for selecting the player
72
+ ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
73
+ # Row for selecting the date range
74
+ ui.row(ui.column(12, ui.output_ui('date_id', 'Select Date'))),
75
+
76
+ # Rows for selecting plots and split options
77
+ ui.row(
78
+ ui.column(4, ui.input_select('plot_id_1', 'Plot Left', function_dict, multiple=False, selected='velocity_kdes')),
79
+ ui.column(4, ui.input_select('plot_id_2', 'Plot Middle', function_dict, multiple=False, selected='tj_stuff_roling')),
80
+ ui.column(4, ui.input_select('plot_id_3', 'Plot Right', function_dict, multiple=False, selected='break_plot'))
81
+ ),
82
+ ui.row(
83
+ ui.column(6, ui.input_select('split_id', 'Select Split', split_dict, multiple=False)),
84
+ ui.column(6, ui.input_numeric('rolling_window', 'Rolling Window (for tjStuff+ Plot)', min=1, value=50))
85
+ ),
86
+
87
+ # Row for the action button to generate plot
88
+ ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary"))
89
+ ),
90
+
91
+ ui.panel_main(
92
+ ui.navset_tab(
93
+ # Tab for game summary plot
94
+ ui.nav("Pitching Summary",
95
+ ui.output_text("status"),
96
+ ui.output_plot('plot', width='2100px', height='2100px')
97
+ ),
98
+ )
99
+ )
100
+ )
101
+ )
102
+
103
+
104
+ def server(input, output, session):
105
+
106
+ @reactive.calc
107
+ @reactive.event(input.pitcher_id, input.date_id,input.split_id)
108
+ def cached_data():
109
+
110
+ year_input = int(input.year_input())
111
+ sport_id = int(input.level_input())
112
+ player_input = int(input.pitcher_id())
113
+ start_date = str(input.date_id()[0])
114
+ end_date = str(input.date_id()[1])
115
+ # Simulate an expensive data operation
116
+ game_list = scrape.get_player_games_list(sport_id = sport_id,
117
+ season = year_input,
118
+ player_id = player_input,
119
+ start_date = start_date,
120
+ end_date = end_date)
121
+
122
+ data_list = scrape.get_data(game_list_input = game_list[:])
123
+ df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
124
+ (pl.col("pitcher_id") == player_input)&
125
+ (pl.col("is_pitch") == True)&
126
+ (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
127
+
128
+ )))).with_columns(
129
+ pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
130
+ ))
131
+ return df
132
+
133
+ @render.ui
134
+ @reactive.event(input.player_button, ignore_none=False)
135
+ def player_select_ui():
136
+ # Get the list of pitchers for the selected level and season
137
+ df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input())).filter(
138
+ pl.col("position").is_in(['P'])).sort("name")
139
+
140
+ # Create a dictionary of pitcher IDs and names
141
+ pitcher_dict = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['name']))
142
+
143
+ # Return a select input for choosing a pitcher
144
+ return ui.input_select("pitcher_id", "Select Pitcher", pitcher_dict, selectize=True)
145
+
146
+ @render.ui
147
+ @reactive.event(input.player_button, ignore_none=False)
148
+ def date_id():
149
+ # Create a date range input for selecting the date range within the selected year
150
+ return ui.input_date_range("date_id", "Select Date Range",
151
+ start=f"{int(input.year_input())}-01-01",
152
+ end=f"{int(input.year_input())}-12-31",
153
+ min=f"{int(input.year_input())}-01-01",
154
+ max=f"{int(input.year_input())}-12-31")
155
+ @output
156
+ @render.text
157
+ def status():
158
+ # Only show status when generating
159
+ if input.generate == 0:
160
+ return ""
161
+ return ""
162
+
163
+ @output
164
+ @render.plot
165
+ @reactive.event(input.generate_plot, ignore_none=False)
166
+ def plot():
167
+ # Show progress/loading notification
168
+ with ui.Progress(min=0, max=1) as p:
169
+ p.set(message="Generating plot", detail="This may take a while...")
170
+
171
+
172
+ p.set(0.3, "Gathering data...")
173
+ year_input = int(input.year_input())
174
+ sport_id = int(input.level_input())
175
+ player_input = int(input.pitcher_id())
176
+ start_date = str(input.date_id()[0])
177
+ end_date = str(input.date_id()[1])
178
+
179
+ print(year_input, sport_id, player_input, start_date, end_date)
180
+
181
+
182
+ # game_list = scrape.get_player_games_list(sport_id = sport_id,
183
+ # season = year_input,
184
+ # player_id = player_input,
185
+ # start_date = start_date,
186
+ # end_date = end_date)
187
+
188
+ # data_list = scrape.get_data(game_list_input = game_list[:])
189
+ # df = stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
190
+ # (pl.col("pitcher_id") == player_input)&
191
+ # (pl.col("is_pitch") == True))))).with_columns(
192
+ # pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
193
+ # )
194
+
195
+ df = cached_data()
196
+ df = df.clone()
197
+
198
+ p.set(0.6, "Creating plot...")
199
+
200
+
201
+ #plt.rcParams["figure.figsize"] = [10,10]
202
+ fig = plt.figure(figsize=(26,26))
203
+ plt.rcParams.update({'figure.autolayout': True})
204
+ fig.set_facecolor('white')
205
+ sns.set_theme(style="whitegrid", palette=colour_palette)
206
+ print('this is the one plot')
207
+
208
+ gs = gridspec.GridSpec(6, 8,
209
+ height_ratios=[5,20,12,36,36,7],
210
+ width_ratios=[4,18,18,18,18,18,18,4])
211
+
212
+
213
+ gs.update(hspace=0.2, wspace=0.5)
214
+
215
+ # Define the positions of each subplot in the grid
216
+ ax_headshot = fig.add_subplot(gs[1,1:3])
217
+ ax_bio = fig.add_subplot(gs[1,3:5])
218
+ ax_logo = fig.add_subplot(gs[1,5:7])
219
+
220
+ ax_season_table = fig.add_subplot(gs[2,1:7])
221
+
222
+ ax_plot_1 = fig.add_subplot(gs[3,1:3])
223
+ ax_plot_2 = fig.add_subplot(gs[3,3:5])
224
+ ax_plot_3 = fig.add_subplot(gs[3,5:7])
225
+
226
+ ax_table = fig.add_subplot(gs[4,1:7])
227
+
228
+ ax_footer = fig.add_subplot(gs[-1,1:7])
229
+ ax_header = fig.add_subplot(gs[0,1:7])
230
+ ax_left = fig.add_subplot(gs[:,0])
231
+ ax_right = fig.add_subplot(gs[:,-1])
232
+
233
+ # Hide axes for footer, header, left, and right
234
+ ax_footer.axis('off')
235
+ ax_header.axis('off')
236
+ ax_left.axis('off')
237
+ ax_right.axis('off')
238
+
239
+ sns.set_theme(style="whitegrid", palette=colour_palette)
240
+ fig.set_facecolor('white')
241
+
242
+ df_teams = scrape.get_teams()
243
+
244
+ player_headshot(player_input=player_input, ax=ax_headshot,sport_id=sport_id,season=year_input)
245
+ player_bio(pitcher_id=player_input, ax=ax_bio,sport_id=sport_id,year_input=year_input)
246
+ plot_logo(pitcher_id=player_input, ax=ax_logo, df_team=df_teams,df_players=scrape.get_players(sport_id,year_input))
247
+
248
+ stat_summary_table(df=df,
249
+ ax=ax_season_table,
250
+ player_input=player_input,
251
+ split=split_dict[input.split_id()],
252
+ sport_id=sport_id)
253
+
254
+ # break_plot(df=df_plot,ax=ax2)
255
+ for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax_plot_1,ax_plot_2,ax_plot_3],[1,3,5]):
256
+ if x == 'velocity_kdes':
257
+ velocity_kdes(df,
258
+ ax=y,
259
+ gs=gs,
260
+ gs_x=[3,4],
261
+ gs_y=[z,z+2],
262
+ fig=fig)
263
+ if x == 'tj_stuff_roling':
264
+ tj_stuff_roling(df=df,
265
+ window=int(input.rolling_window()),
266
+ ax=y)
267
+
268
+ if x == 'tj_stuff_roling_game':
269
+ tj_stuff_roling_game(df=df,
270
+ window=int(input.rolling_window()),
271
+ ax=y)
272
+
273
+ if x == 'break_plot':
274
+ break_plot(df = df,ax=y)
275
+
276
+ if x == 'location_plot_lhb':
277
+ location_plot(df = df,ax=y,hand='L')
278
+
279
+ if x == 'location_plot_rhb':
280
+ location_plot(df = df,ax=y,hand='R')
281
+
282
+ summary_table(df=df,
283
+ ax=ax_table)
284
+
285
+ plot_footer(ax_footer)
286
+
287
+ fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
288
+
289
+
290
+
291
+
292
+
293
+ app = App(app_ui, server)
requirements.txt CHANGED
@@ -1,12 +1,12 @@
1
- joblib==1.4.2
2
- matplotlib==3.5.1
3
- numpy==1.22.1
4
- pandas==2.0.3
5
- Pillow==11.0.0
6
- polars==1.12.0
7
- pytz==2022.7.1
8
- Requests==2.32.3
9
- seaborn==0.11.1
10
- shiny==0.7.1
11
- streamlit==1.37.1
12
- tqdm==4.62.3
 
1
+ joblib==1.4.2
2
+ matplotlib==3.5.1
3
+ numpy==1.22.1
4
+ pandas==2.0.3
5
+ Pillow==11.0.0
6
+ polars==1.12.0
7
+ pytz==2022.7.1
8
+ Requests==2.32.3
9
+ seaborn==0.11.1
10
+ shiny==0.7.1
11
+ streamlit==1.37.1
12
+ tqdm==4.62.3