nesticot commited on
Commit
baf9d65
·
verified ·
1 Parent(s): b19b6a7

Upload 2 files

Browse files
Files changed (2) hide show
  1. api_scraper.py +5 -1
  2. app.py +221 -152
api_scraper.py CHANGED
@@ -855,6 +855,8 @@ class MLB_Scrape:
855
  height_list = [x['height'] if 'height' in x else None for x in player_data]
856
  age_list = [x['currentAge'] if 'currentAge' in x else None for x in player_data]
857
  birthDate_list = [x['birthDate'] if 'birthDate' in x else None for x in player_data]
 
 
858
 
859
  df = pl.DataFrame(data={
860
  'player_id': id_list,
@@ -866,7 +868,9 @@ class MLB_Scrape:
866
  'weight': weight_list,
867
  'height': height_list,
868
  'age': age_list,
869
- 'birthDate': birthDate_list
 
 
870
  })
871
 
872
  return df
 
855
  height_list = [x['height'] if 'height' in x else None for x in player_data]
856
  age_list = [x['currentAge'] if 'currentAge' in x else None for x in player_data]
857
  birthDate_list = [x['birthDate'] if 'birthDate' in x else None for x in player_data]
858
+ pitchHand_list = [x['pitchHand']['code'] if 'pitchHand' in x else None for x in player_data]
859
+ batSide_list = [x['batSide']['code'] if 'batSide' in x else None for x in player_data]
860
 
861
  df = pl.DataFrame(data={
862
  'player_id': id_list,
 
868
  'weight': weight_list,
869
  'height': height_list,
870
  'age': age_list,
871
+ 'birthDate': birthDate_list,
872
+ 'pitchHand': pitchHand_list,
873
+ 'batSide': batSide_list
874
  })
875
 
876
  return df
app.py CHANGED
@@ -1,153 +1,222 @@
1
- import polars as pl
2
- import numpy as np
3
- import joblib
4
- from shiny import App, reactive, render, ui
5
- import matplotlib.pyplot as plt
6
- import matplotlib.ticker as tkr
7
- import seaborn as sns
8
- import adjustText
9
- sns.set_style('whitegrid')
10
-
11
-
12
- import matplotlib
13
- cmap_sum = matplotlib.colors.LinearSegmentedColormap.from_list("", ['#FFFFFF','#FFB000','#FE6100'])
14
-
15
- xwoba_model = joblib.load('joblib_model/xwoba_model.joblib')
16
-
17
- x = np.arange(-30,90.5,.5)
18
- y = np.arange(0,120.5,0.1)
19
-
20
- xx, yy = np.meshgrid(x, y)
21
-
22
- df = pl.DataFrame({'launch_angle': xx.ravel(), 'launch_speed': yy.ravel()})
23
-
24
- df = df.with_columns(
25
- pl.Series('xwoba', xwoba_model.predict_proba(df.select(['launch_angle','launch_speed'])) @ [0, 0.883, 1.244, 1.569, 2.004])
26
- )
27
-
28
- df = df.with_columns(
29
- pl.Series('xslg', xwoba_model.predict_proba(df.select(['launch_angle','launch_speed'])) @ [0, 1, 2, 3, 4])
30
- )
31
-
32
- app_ui = ui.page_sidebar(
33
- ui.sidebar(
34
- ui.markdown("""
35
- ### How to use this app
36
-
37
- 1. Click anywhere on the plot to select a point, or manually enter coordinates
38
- 2. The selected point's coordinates will update automatically
39
- 3. The xwOBA value will be calculated based on these coordinates
40
- """),
41
- ui.hr(),
42
- ui.input_numeric("x_select", "Launch Speed (mph)", value=110),
43
- ui.input_numeric("y_select", "Launch Angle (°)", value=30),
44
- ui.input_switch("flip_stat", "xwOBA", value=False),
45
-
46
-
47
- ),
48
- ui.output_plot("plot",width='900px',height='900px', click=True)
49
- )
50
-
51
-
52
- def server(input, output, session):
53
- # Store the coordinates in reactive values
54
- x_coord = reactive.value(110)
55
- y_coord = reactive.value(30)
56
-
57
- @reactive.effect
58
- @reactive.event(input.plot_click)
59
- def _():
60
- # Update reactive values when plot is clicked
61
- click_data = input.plot_click()
62
- if click_data is not None:
63
- x_coord.set(click_data["x"])
64
- y_coord.set(click_data["y"])
65
- # Update the numeric inputs
66
- ui.update_numeric("x_select", value=round(click_data["x"],1))
67
- ui.update_numeric("y_select", value=round(click_data["y"],1))
68
-
69
- @reactive.effect
70
- @reactive.event(input.x_select, input.y_select)
71
- def _():
72
- # Update reactive values when numeric inputs change
73
- x_coord.set(round(input.x_select(),1))
74
- y_coord.set(round(input.y_select(),1))
75
-
76
-
77
- @render.plot
78
- def plot():
79
- switch = input.flip_stat()
80
- fig, ax = plt.subplots(1, 1, figsize=(9, 9))
81
-
82
-
83
- if switch:
84
- h = ax.hexbin(df['launch_speed'],
85
- df['launch_angle'],
86
- C=df['xwoba'],
87
- gridsize=(40,25),
88
- cmap=cmap_sum,
89
- vmin=0.0,
90
- vmax=2.0,)
91
- bounds=[0.0,0.4,0.8,1.2,1.6,2.0]
92
- fig.colorbar(h, ax=ax, label='xwOBA',format=tkr.FormatStrFormatter('%.3f'),shrink=0.5,
93
- ticks=bounds)
94
-
95
- else:
96
- h = ax.hexbin(df['launch_speed'],
97
- df['launch_angle'],
98
- C=df['xslg'],
99
- gridsize=(40,25),
100
- cmap=cmap_sum,
101
- vmin=0.0,
102
- vmax=4.0,)
103
- bounds=[0.0,0.5,1,1.5,2,2.5,3,3.5,4]
104
- fig.colorbar(h, ax=ax, label='xSLG',format=tkr.FormatStrFormatter('%.3f'),shrink=0.5,
105
- ticks=bounds)
106
-
107
-
108
-
109
- ax.set_xlabel('Launch Speed')
110
- ax.set_ylabel('Launch Angle')
111
- if switch:
112
- ax.set_title('Exit Velocity vs Launch Angle\nExpected Weighted On Base Average (xwOBA)\nBy: @TJStats, Data:MLB')
113
- else:
114
- ax.set_title('Exit Velocity vs Launch Angle\nExpected Total Bases (xSLG)\nBy: @TJStats, Data:MLB')
115
-
116
- ax.grid(False)
117
- ax.axis('square')
118
- ax.set_xlim(0, 120)
119
- ax.set_ylim(-30, 90)
120
-
121
- x_select = input.x_select()
122
- y_select = input.y_select()
123
-
124
-
125
- sns.scatterplot(x=[x_select],y=[y_select],color='#648FFF',s=50,ax=ax,edgecolor='k',zorder=100)
126
-
127
-
128
- if switch:
129
- xwoba_value = (xwoba_model.predict_proba([[y_select,x_select]]) @ [0, 0.883, 1.244, 1.569, 2.004])[0]
130
- texts = [ax.text(x_select+3, y_select+3, f'xwOBA: {xwoba_value:.3f}', color='black', fontsize=12, weight='bold',
131
- zorder=1000, bbox=dict(facecolor='white', alpha=0.5, edgecolor='black'))]
132
-
133
- else:
134
- xwoba_value = (xwoba_model.predict_proba([[y_select,x_select]]) @ [0, 1, 2, 3, 4])[0]
135
- texts = [ax.text(x_select+3, y_select+3, f'xSLG: {xwoba_value:.3f}', color='black', fontsize=12, weight='bold',
136
- zorder=1000, bbox=dict(facecolor='white', alpha=0.5, edgecolor='black'))]
137
-
138
-
139
-
140
-
141
- adjustText.adjust_text(texts,
142
-
143
- arrowprops=dict(arrowstyle='->', color='#DC267F'),avoid_self=True,
144
- min_arrow_len =5)
145
- # xwoba_value =
146
-
147
- ax.axhline(y=y_select, color='k', linestyle='--',linewidth=1,alpha=0.5)
148
- ax.axvline(x=x_select, color='k', linestyle='--',linewidth=1,alpha=0.5)
149
-
150
- # ax.axis('square')
151
-
152
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  app = App(app_ui, server)
 
1
+ import polars as pl
2
+ import numpy as np
3
+ import pandas as pd
4
+ import api_scraper
5
+ scrape = api_scraper.MLB_Scrape()
6
+ import requests
7
+ import joblib
8
+ from matplotlib.gridspec import GridSpec
9
+ from shiny import App, reactive, ui, render
10
+ from shiny.ui import h2, tags
11
+ import matplotlib.pyplot as plt
12
+ import matplotlib.gridspec as gridspec
13
+ import seaborn as sns
14
+ from shiny import App, reactive, ui, render
15
+ from shiny.ui import h2, tags
16
+
17
+
18
+ from shiny import App, ui, render
19
+
20
+
21
+ # Import the MLB_Scrape class from the module
22
+ from api_scraper import MLB_Scrape
23
+
24
+ # Initialize the scraper
25
+ scraper = MLB_Scrape()
26
+
27
+
28
+ # Call the get_teams method
29
+ teams = scraper.get_teams()
30
+ print(teams)
31
+
32
+ df_player = pl.concat([
33
+ scraper.get_players(sport_id=1,season=2025,game_type=['R']),
34
+ scraper.get_players(sport_id=11,season=2025,game_type=['R']),
35
+ scraper.get_players(sport_id=12,season=2025,game_type=['R']),
36
+ scraper.get_players(sport_id=13,season=2025,game_type=['R']),
37
+ scraper.get_players(sport_id=14,season=2025,game_type=['R']),
38
+ scraper.get_players(sport_id=22,season=2025,game_type=['R'])
39
+ ]).unique(subset=['player_id'])
40
+
41
+
42
+
43
+ teams_mlb = teams.filter(pl.col("league_id").is_in([103,104])).sort("abbreviation")
44
+ teams_dict = dict(zip(teams_mlb['team_id'],teams_mlb['abbreviation']))
45
+
46
+ teams_name_dict = dict(zip(teams_mlb['team_id'],teams_mlb['franchise']))
47
+
48
+ app_ui = ui.page_sidebar(
49
+ ui.sidebar(
50
+ ui.input_select(
51
+ "team_id",
52
+ "Select Team",
53
+ choices=teams_dict
54
+ ),
55
+ ui.input_switch("nri_only", "NRI Only"),
56
+ ui.div(
57
+ ui.div({"style": "font-size:1.2em;"}, ui.markdown("Legend")),
58
+ ui.div(
59
+ style="display: inline-block; width: 20px; height: 20px; background-color: #b7e1cd; margin-right: 10px;"
60
+ ),
61
+ ui.span("NRI", style="vertical-align: top;"),
62
+ style="padding: 10px;"
63
+ ),
64
+
65
+ ),
66
+ ui.card(
67
+ ui.div({"style": "font-size:2em;"}, ui.output_text("card_title")),
68
+
69
+ ui.output_table("team_stats")
70
+ )
71
+ )
72
+
73
+ def server(input, output, session):
74
+
75
+ @render.text
76
+ def card_title():
77
+ if input.nri_only():
78
+ return f"{teams_name_dict[int(input.team_id())]} — Spring Training Roster Non-Roster Invitees"
79
+ else:
80
+ return f"{teams_name_dict[int(input.team_id())]} Spring Training Roster"
81
+
82
+ @render.table
83
+ def team_stats():
84
+ # Get the selected team's data
85
+ i = int(input.team_id())
86
+
87
+ url = f'https://statsapi.mlb.com/api/v1/teams/{i}/roster/40man?season=2025'
88
+ data = requests.get(url).json()
89
+ # Normalize the roster data
90
+ roster_df = pd.json_normalize(data['roster'])
91
+ roster_df['nri'] = False
92
+ roster_df['status.code'] = ''
93
+ roster_df = roster_df.fillna('')
94
+
95
+
96
+ url = f'https://statsapi.mlb.com/api/v1/teams/{i}/roster/nonRosterInvitees?season=2025'
97
+ data = requests.get(url).json()
98
+ # Normalize the roster data
99
+ nri_roster_df = pd.json_normalize(data['roster'])
100
+ nri_roster_df['nri'] = True
101
+ nri_roster_df['parentTeamId'] = i
102
+ nri_roster_df = nri_roster_df.fillna('')
103
+
104
+ df_output = pd.concat([roster_df,nri_roster_df]).sort_values(by=['position.code','status.code']).reset_index(drop=True)
105
+ if input.nri_only():
106
+ df_output = df_output[df_output['status.code'] == 'NRI']
107
+
108
+
109
+ df_output = df_output.merge(df_player.to_pandas(),left_on='person.id',right_on='player_id',how='left')
110
+
111
+ conditions = [
112
+ (df_output['position.abbreviation'] == 'P') & (~df_output.duplicated(subset=['position.abbreviation'], keep='first')),
113
+ (df_output['position.abbreviation'] == 'C') & (~df_output.duplicated(subset=['position.abbreviation'], keep='first')),
114
+ (df_output['position.abbreviation'] == 'LF') & (~df_output.duplicated(subset=['position.abbreviation'], keep='first'))
115
+ ]
116
+
117
+ choices = ['Pitchers', 'Infielders', 'Outfielders']
118
+
119
+ df_output['position_group'] = np.select(conditions, choices, default='')
120
+
121
+
122
+
123
+ df_output['team'] = df_output['parentTeamId'].map(teams_dict)
124
+ df_output.loc[df_output['position.abbreviation'] == 'P', 'position.abbreviation'] = df_output['pitchHand'] + 'H' + df_output['position.abbreviation']
125
+ df_output['bat_throw'] = df_output['batSide'] + '/' + df_output['pitchHand']
126
+ df_output_small = df_output[['position_group','person.id', 'person.fullName',
127
+ 'position.abbreviation','team', 'status.code', 'age','weight', 'height', 'bat_throw']]
128
+ df_output_small['age'] = df_output_small['age'].replace('', np.nan).astype(int)
129
+ df_output_small['weight'] = df_output_small['weight'].replace('', np.nan).astype(int)
130
+
131
+ # # Insert blank rows with position group indicated
132
+ # blank_rows = []
133
+ # for idx, row in df_output_small.iterrows():
134
+ # if row['position_group']:
135
+ # blank_row = pd.Series([''] * len(df_output_small.columns), index=df_output_small.columns)
136
+ # blank_row['position_group'] = row['position_group']
137
+ # blank_rows.append((idx, blank_row))
138
+
139
+ # for idx, blank_row in reversed(blank_rows):
140
+ # df_output_small = pd.concat([df_output_small.iloc[:idx], pd.DataFrame([blank_row]), df_output_small.iloc[idx:]]).reset_index(drop=True)
141
+ # df_output_small.loc[(df_output_small['position_group'] != '') & (df_output_small['person.fullName'] != ''), 'position_group'] = ''
142
+
143
+ def highlight_nri(val):
144
+ color = 'yellow' if val else ''
145
+ return f'background-color: {color}'
146
+ # Function to alternate row colors
147
+ def highlight_alternate_rows(x):
148
+ return ['background-color: #ebebeb' if i % 2 == 0 else '' for i in range(len(x))]
149
+
150
+ #
151
+
152
+
153
+ df_output_small.columns = ['Group','Player ID', 'Name', 'Pos','Team', 'Status','Age','Weight', 'Height', 'B/T']
154
+
155
+
156
+
157
+ style_df = (df_output_small.style.set_precision(1)
158
+ .set_properties(**{'border': '3 px'}, overwrite=False)
159
+ .set_table_styles([{
160
+ 'selector': 'caption',
161
+ 'props': [
162
+ ('color', ''),
163
+ ('fontname', 'Century Gothic'),
164
+ ('font-size', '16px'),
165
+ ('font-style', 'italic'),
166
+ ('font-weight', ''),
167
+ ('text-align', 'centre'),
168
+ ]
169
+ }, {
170
+ 'selector': 'th',
171
+ 'props': [('font-size', '16px'), ('text-align', 'center'), ('Height', 'px'), ('color', 'black'), ('border', '1px black solid !important')]
172
+ }, {
173
+ 'selector': 'td',
174
+ 'props': [('text-align', 'center'), ('font-size', '16px'), ('color', 'black')]
175
+ }], overwrite=False)
176
+ .set_properties(**{'background-color': 'White', 'index': 'White', 'min-width': '72px'}, overwrite=False)
177
+ .set_table_styles([{'selector': 'th:first-child', 'props': [('background-color', 'white')]}], overwrite=False)
178
+ .set_table_styles([{'selector': 'tr:first-child', 'props': [('background-color', 'white')]}], overwrite=False)
179
+ .set_table_styles([{'selector': 'tr', 'props': [('line-height', '20px')]}], overwrite=False)
180
+ .set_properties(**{'Height': '8px'}, **{'text-align': 'center'}, overwrite=False)
181
+ .hide_index()
182
+ .set_properties(**{'border': '1px black solid'})
183
+ .set_table_styles([{'selector': 'thead th:nth-child(1)', 'props': [('min-width', '150px')]}], overwrite=False)
184
+ .set_table_styles([{'selector': 'thead th:nth-child(2)', 'props': [('min-width', '150px')]}], overwrite=False)
185
+ .set_table_styles([{'selector': 'thead th:nth-child(3)', 'props': [('min-width', '250px')]}], overwrite=False)
186
+ .set_table_styles([{'selector': 'thead th', 'props': [('height', '30px')]}], overwrite=False)
187
+ .apply(highlight_alternate_rows, axis=0, subset=df_output_small.columns[1:])
188
+ .applymap(lambda x: 'background-color: #bdbdbd' if x != '' else '', subset=['Group'])
189
+ .applymap(lambda x: 'background-color: #bdbdbd' if x else '', subset=['Group'])
190
+ # .apply(lambda x: ['background-color: #bdbdbd' if x['Group'] != '' else '' for _ in x], axis=1)
191
+ .set_properties(
192
+ **{'background-color':'#bdbdbd'}, # Apply only right border
193
+ subset=df_output_small.columns[0] # Only affects column 1
194
+ )
195
+ .set_properties(
196
+ **{'border-top': 'none', 'border-bottom': 'none'},
197
+ subset=df_output_small.columns[0] # Apply only to column 1
198
+ )
199
+
200
+ # .format({'Age': '{:.0f}', 'Weight': '{:.0f}'})
201
+ )
202
+ def highlight_nri(s):
203
+ return ['background-color: #b7e1cd' if s.name != 'Status' and s['Status'] == 'NRI' else '' for _ in s]
204
+
205
+ # style_df = style_df.style.apply(highlight_nri, axis=1, subset=style_df.columns[1:])
206
+
207
+ if not input.nri_only():
208
+ style_df = style_df.apply(highlight_nri, axis=1, subset=df_output_small.columns[1:])
209
+
210
+ def add_top_border(s):
211
+ return ['border-top: 3px solid black' if s['Group'] != '' else '' for _ in s]
212
+
213
+ styled_df = style_df.apply(add_top_border, axis=1)
214
+
215
+ def add_bottom_border(s):
216
+ return ['border-bottom: 3px solid black' if s.name == len(df_output_small) - 1 else '' for _ in s]
217
+
218
+ styled_df = style_df.apply(add_bottom_border, axis=1)
219
+
220
+
221
+ return style_df
222
  app = App(app_ui, server)