nesticot commited on
Commit
fcb7c74
·
verified ·
1 Parent(s): fa175ae

Upload 27 files

Browse files
Dockerfile CHANGED
@@ -1,13 +1,13 @@
1
- FROM python:3.9
2
-
3
- WORKDIR /code
4
-
5
- COPY ./requirements.txt /code/requirements.txt
6
-
7
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
-
9
- COPY . .
10
-
11
- EXPOSE 7860
12
-
13
  CMD ["shiny", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ EXPOSE 7860
12
+
13
  CMD ["shiny", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,20 +1,20 @@
1
- ---
2
- title: Pitching Summary
3
- emoji: 🌍
4
- colorFrom: yellow
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- This is a templated Space for [Shiny for Python](https://shiny.rstudio.com/py/).
12
-
13
-
14
- To get started with a new app do the following:
15
-
16
- 1) Install Shiny with `pip install shiny`
17
- 2) Create a new app with `shiny create`
18
- 3) Then run the app with `shiny run --reload`
19
-
20
- To learn more about this framework please see the [Documentation](https://shiny.rstudio.com/py/docs/overview.html).
 
1
+ ---
2
+ title: Pitching Summary
3
+ emoji: 🌍
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
+ sdk: docker
7
+ pinned: false
8
+ license: mit
9
+ ---
10
+
11
+ This is a templated Space for [Shiny for Python](https://shiny.rstudio.com/py/).
12
+
13
+
14
+ To get started with a new app do the following:
15
+
16
+ 1) Install Shiny with `pip install shiny`
17
+ 2) Create a new app with `shiny create`
18
+ 3) Then run the app with `shiny run --reload`
19
+
20
+ To learn more about this framework please see the [Documentation](https://shiny.rstudio.com/py/docs/overview.html).
api_scraper.py CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -1,4 +1,3 @@
1
- from shiny import App, ui, render, reactive
2
  import polars as pl
3
  import numpy as np
4
  import pandas as pd
@@ -12,477 +11,390 @@ from stuff_model import stuff_apply
12
  import requests
13
  import joblib
14
  from matplotlib.gridspec import GridSpec
15
- import math
16
- from pytabulator import TableOptions, Tabulator, output_tabulator, render_tabulator, theme
17
- theme.tabulator_site()
 
 
 
 
 
18
 
19
  colour_palette = ['#FFB000','#648FFF','#785EF0',
20
  '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
21
 
22
- # df = pl.read_csv("data.csv")
23
- # df = pl.read_parquet("data_small.parquet")[:]
24
- # df = pl.read_parquet("data.parquet")[:]
25
- # print('df')
26
- season = 2024
27
 
28
- df_mlb = pl.read_parquet("data/data_mlb_2024.parquet")[:]
29
- df_aaa = pl.read_parquet("data/data_aaa_2024.parquet")[:]
30
- df_a = pl.read_parquet("data/data_a_2024.parquet")[:]
31
 
32
 
33
 
34
- def df_final(df:pl.dataframe,year_input:int,sport_id:int):
 
 
 
 
 
 
 
 
35
 
36
- df_schedule = scrape.get_schedule(year_input=[year_input],sport_id=[sport_id])
37
- df = df.join(df_schedule, on='game_id', how='left')
 
 
 
 
 
 
38
 
39
- df = df.with_columns(
40
- pl.when((pl.col('batter_team_id') == pl.col('away_id')))
41
- .then(pl.lit('Away'))
42
- .when((pl.col('batter_team_id') == pl.col('home_id')))
43
- .then(pl.lit('Home'))
44
- .otherwise(None)
45
- .alias('home_away')
46
- )
47
 
48
- df = df.with_columns(
49
- pl.when((pl.col('pitcher_team_id') == pl.col('away_id')))
50
- .then(pl.lit('Away'))
51
- .when((pl.col('pitcher_team_id') == pl.col('home_id')))
52
- .then(pl.lit('Home'))
53
- .otherwise(None)
54
- .alias('home_away_pitcher')
55
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
 
58
- print('schedule')
59
-
60
- df_stuff = stuff_apply.stuff_apply(fe.feature_engineering(df))
61
- print('stuff')
62
- df_up = update.update(df)
63
- print('update')
64
- df_total = df_up.join(df_stuff[['play_id','tj_stuff_plus']], on='play_id', how='left')
65
- print('total')
66
- return df_total
67
-
68
-
69
- df_mlb_total = df_final(df=df_mlb,year_input=season,sport_id=1)
70
- df_aaa_total = df_final(df=df_aaa,year_input=season,sport_id=11)
71
- df_a_total = df_final(df=df_a.drop_nulls(subset=['start_speed']),year_input=season,sport_id=14)
72
-
73
- rounding_dict = {
74
- 'pa': 0,
75
- 'bip': 0,
76
- 'hits': 0,
77
- 'k': 0,
78
- 'bb': 0,
79
- 'max_launch_speed': 1,
80
- 'launch_speed_90': 1,
81
- 'launch_speed': 1,
82
- 'pitches': 0,
83
- 'tj_stuff_plus_avg': 0,
84
- 'avg': 3,
85
- 'obp': 3,
86
- 'slg': 3,
87
- 'ops': 3,
88
- 'k_percent': 3,
89
- 'bb_percent': 3,
90
- 'k_minus_bb_percent': 3,
91
- 'sweet_spot_percent': 3,
92
- 'woba_percent': 3,
93
- 'xwoba_percent': 3,
94
- 'woba_percent_contact': 3,
95
- 'xwoba_percent_contact': 3,
96
- 'hard_hit_percent': 3,
97
- 'barrel_percent': 3,
98
- 'zone_contact_percent': 3,
99
- 'zone_swing_percent': 3,
100
- 'zone_percent': 3,
101
- 'chase_percent': 3,
102
- 'chase_contact': 3,
103
- 'swing_percent': 3,
104
- 'whiff_rate': 3,
105
- 'swstr_rate': 3,
106
- 'ground_ball_percent': 3,
107
- 'line_drive_percent': 3,
108
- 'fly_ball_percent': 3,
109
- 'pop_up_percent': 3,
110
- 'heart_zone_swing_percent': 3,
111
- 'shadow_zone_swing_percent': 3,
112
- 'chase_zone_swing_percent': 3,
113
- 'waste_zone_swing_percent': 3,
114
- 'heart_zone_whiff_percent': 3,
115
- 'shadow_zone_whiff_percent': 3,
116
- 'chase_zone_whiff_percent': 3,
117
- 'waste_zone_whiff_percent': 3,
118
- 'start_speed_avg': 1,
119
- 'vb_avg': 1,
120
- 'ivb_avg': 1,
121
- 'hb_avg': 1,
122
- 'z0_avg': 1,
123
- 'x0_avg': 1,
124
- 'vaa_avg': 1,
125
- 'haa_avg': 1,
126
- 'spin_rate_avg': 0,
127
- 'extension_avg': 1
128
- }
129
 
130
- columns = [
131
- { "title": "PA", "field": "pa", "width": 150},
132
- { "title": "BBE", "field": "bip", "width": 150 },
133
- { "title": "H", "field": "hits", "width": 150 },
134
- { "title": "K", "field": "k", "width": 150 },
135
- { "title": "BB", "field": "bb", "width": 150 },
136
- { "title": "Max EV", "field": "max_launch_speed", "width": 150 },
137
- { "title": "90th% EV", "field": "launch_speed_90", "width": 150 },
138
- { "title": "EV", "field": "launch_speed", "width": 150 },
139
- { "title": "Pitches", "field": "pitches", "width": 150 },
140
- { "title": "AVG", "field": "avg", "width": 150 },
141
- { "title": "OBP", "field": "obp", "width": 150 },
142
- { "title": "SLG", "field": "slg", "width": 150 },
143
- { "title": "OPS", "field": "ops", "width": 150 },
144
- { "title": "K%", "field": "k_percent", "width": 150,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
145
- { "title": "BB%", "field": "bb_percent", "width": 150,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
146
- { "title": "K-BB%", "field": "k_minus_bb_percent", "width": 150,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
147
- { "title": "SwSpot%", "field": "sweet_spot_percent", "width": 150,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
148
- { "title": "wOBA", "field": "woba_percent", "width": 150 },
149
- { "title": "xwOBA", "field": "xwoba_percent", "width": 150 },
150
- { "title": "wOBACON", "field": "woba_percent_contact", "width": 150 },
151
- { "title": "xwOBACON", "field": "xwoba_percent_contact", "width": 150 },
152
- { "title": "HardHit%", "field": "hard_hit_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
153
- { "title": "Barrel%", "field": "barrel_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
154
- { "title": "Z-Contact%", "field": "zone_contact_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
155
- { "title": "Z-Swing%", "field": "zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
156
- { "title": "Zone%", "field": "zone_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
157
- { "title": "O-Swing%", "field": "chase_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
158
- { "title": "O-Contact%", "field": "chase_contact", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
159
- { "title": "Swing%", "field": "swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
160
- { "title": "Whiff%", "field": "whiff_rate", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
161
- { "title": "SwStr%", "field": "swstr_rate", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
162
- { "title": "GB%", "field": "ground_ball_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
163
- { "title": "LD%", "field": "line_drive_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
164
- { "title": "FB%", "field": "fly_ball_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
165
- { "title": "PU%", "field": "pop_up_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
166
- { "title": "Heart Swing%", "field": "heart_zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
167
- { "title": "Shadow Swing%", "field": "shadow_zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
168
- { "title": "Chase Swing%", "field": "chase_zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
169
- { "title": "Waste Swing%", "field": "waste_zone_swing_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
170
- { "title": "Heart Whiff%", "field": "heart_zone_whiff_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
171
- { "title": "Shadow Whiff%", "field": "shadow_zone_whiff_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
172
- { "title": "Chase Whiff%", "field": "chase_zone_whiff_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
173
- { "title": "Waste Whiff%", "field": "waste_zone_whiff_percent", "width": 150 ,"formatter": "money", "formatterParams":{"decimal":".","thousand":".","symbol":"%","symbolAfter":"%","negativeSign":True,"precision":1}},
174
- { "title": "tjStuff+", "field": "tj_stuff_plus_avg", "width": 150 },
175
- { "title": "Velocity", "field": "start_speed_avg", "width": 150 },
176
- { "title": "Extension", "field": "extension_avg", "width": 150 },
177
- { "title": "VB", "field": "vb_avg", "width": 150 },
178
- { "title": "iVB", "field": "ivb_avg", "width": 150 },
179
- { "title": "HB", "field": "hb_avg", "width": 150 },
180
- { "title": "vRel", "field": "z0_avg", "width": 150 },
181
- { "title": "hRel", "field": "x0_avg", "width": 150 },
182
- { "title": "VAA", "field": "vaa_avg", "width": 150 },
183
- { "title": "HAA", "field": "haa_avg", "width": 150 },
184
- { "title": "Spin Rate", "field": "spin_rate_avg", "width": 150 },
185
- { "title": "Extension", "field": "extension_avg", "width": 150 },
186
 
187
- ]
188
 
189
- stat_titles = dict(zip([col["field"] for col in columns],[col["title"] for col in columns]))
190
-
191
- stat_selection = [key for key in stat_titles.keys()]
192
-
193
- agg_titles = {'batter_id':'Batter ID',
194
- 'batter_name':'Batter Name',
195
- 'batter_team':'Batter Team',
196
- 'batter_hand':'Batter Hand',
197
- 'pitcher_id':'Pitcher ID',
198
- 'pitcher_name':'Pitcher Name',
199
- 'pitcher_team':'Pitcher Team',
200
- 'pitcher_hand':'Pitcher Hand',
201
- 'pitch_type':'Pitch Type',
202
- 'pitch_group':'Pitch Group',
203
- 'home_away_batter':'Home/Away Batter',
204
- 'home_away_pitcher':'Home/Away Pitcher',
205
- 'is_swing':'Is Swing?',
206
- 'is_bip':'Is BIP?',
207
- 'in_zone_final':'In Zone?',
208
- 'attack_zone_final':'Attack Zone'}
209
-
210
-
211
- columns_group = [
212
- { "title": "Batter ID", "field": "batter_id", "width": 150, "headerFilter":"input","frozen":True,},
213
- { "title": "Batter Name", "field": "batter_name", "width": 200,"frozen":True, "headerFilter":"input" },
214
- { "title": "Batter Team", "field": "batter_team", "width": 150,"frozen":True, "headerFilter":"input" },
215
- { "title": "Batter Hand", "field": "batter_hand", "width": 150,"frozen":True, "headerFilter":"input" },
216
- { "title": "Pitcher ID", "field": "pitcher_id", "width": 150,"frozen":True, "headerFilter":"input" },
217
- { "title": "Pitcher Name", "field": "pitcher_name", "width": 200,"frozen":True, "headerFilter":"input" },
218
- { "title": "Pitcher Team", "field": "pitcher_team", "width": 150,"frozen":True, "headerFilter":"input" },
219
- { "title": "Pitcher Hand", "field": "pitcher_hand", "width": 150,"frozen":True, "headerFilter":"input" },
220
- { "title": "Pitch Type", "field": "pitch_type", "width": 150,"frozen":True, "headerFilter":"input" },
221
- { "title": "Pitch Group", "field": "pitch_group", "width": 150,"frozen":True, "headerFilter":"input" },
222
- { "title": "Home/Away Batter", "field": "home_away_batter", "width": 150,"frozen":True, "headerFilter":"input" },
223
- { "title": "Home/Away Pitcher", "field": "home_away_pitcher", "width": 150,"frozen":True, "headerFilter":"input" },
224
- { "title": "Is Swing?", "field": "is_swing", "width": 150,"frozen":True, "headerFilter":"input" },
225
- { "title": "Is BIP?", "field": "is_bip", "width": 150,"frozen":True, "headerFilter":"input" },
226
- { "title": "In Zone?", "field": "in_zone_final", "width": 150,"frozen":True, "headerFilter":"input" },
227
- { "title": "Attack Zone", "field": "attack_zone_final", "width": 150,"frozen":True, "headerFilter":"input" }
228
- ]
229
 
 
 
230
 
 
231
  app_ui = ui.page_sidebar(
232
  ui.sidebar(
233
- ui.input_selectize(
234
- "level_input",
235
- "Select Level:",
236
- choices=['MLB','AAA','A'],
237
- multiple=False,
238
- selected=['MLB']
239
  ),
240
- ui.input_selectize(
241
- "list_input",
242
- "Select Aggregation:",
243
- choices=agg_titles,
244
- multiple=True,
245
- selected=['batter_id', 'batter_name']
 
 
 
 
 
 
246
  ),
247
- ui.input_selectize(
248
- "list_stats",
249
- "Select Stats:",
250
- choices=stat_titles,
251
- multiple=True,
252
- selected=['pa']
253
  ),
254
- ui.input_date_range(
255
- "date_id",
256
- "Select Date Range",
257
- start=f'{season}-01-01',
258
- end=f'{season}-12-01',
259
- min=f'{season}-01-01',
260
- max=f'{season}-12-01',
261
  ),
262
- ui.hr(),
263
- ui.h4("Filters"),
264
- ui.div(
265
- {"id": "filter-container"},
266
- ui.div(
267
- {"class": "filter-row", "id": "filter_row_1"}, # Add id for deletion
268
- ui.row(
269
- ui.column(5, # Adjusted column widths to make room for delete button
270
- ui.input_select(
271
- "filter_column_1",
272
- "Metric",
273
- choices={}
274
- )
275
- ),
276
- ui.column(3,
277
- ui.input_select(
278
- "filter_operator_1",
279
- "Operator",
280
- choices=[">=", "<="]
281
- ),
282
- ),
283
- ui.column(3,
284
- ui.input_numeric(
285
- "filter_value_1",
286
- "Value",
287
- value=0
288
- )
289
- ),
290
- ui.column(1,
291
- ui.markdown("&nbsp;"),
292
-
293
-
294
- ui.input_action_button(
295
- f"delete_filter_1",
296
- "",
297
- class_="btn-danger btn-sm",
298
- style="padding: 3px 6px;",
299
- icon='✖'
300
-
301
- )
302
- )
303
- )
304
- )
305
- ),
306
- ui.input_action_button(
307
- "add_filter",
308
- "Add Filter",
309
- class_="btn-secondary"
310
- ),
311
- ui.br(),
312
- ui.br(),
313
- ui.input_action_button(
314
- "generate_table",
315
- "Generate Table",
316
- class_="btn-primary"
317
- ),
318
- width="400px"
319
  ),
 
 
320
  ui.navset_tab(
321
- ui.nav_panel("Leaderboard",
322
- ui.card(
323
- #ui.card_header("Leaderboard"),
324
- output_tabulator("tabulator")
325
- )
326
  ),
327
-
 
328
  )
329
  )
330
 
 
331
  def server(input, output, session):
332
- # Store the number of active filters
333
- filter_count = reactive.value(1)
334
- # Store active filter IDs
335
- active_filters = reactive.value([1])
336
-
337
- @reactive.effect
338
- @reactive.event(input.list_stats)
339
- def _():
340
- stat_choices = {k: k for k in input.list_stats()}
341
- filtered_stat_choices = {key: stat_titles[key] for key in stat_choices}
342
- ui.update_select("filter_column_1", choices=filtered_stat_choices)
343
-
344
- @reactive.effect
345
- @reactive.event(input.add_filter)
346
- def _():
347
- current_count = filter_count.get()
348
- new_count = current_count + 1
349
-
350
- stat_choices = {k: k for k in input.list_stats()}
351
- filtered_stat_choices = {key: stat_titles[key] for key in stat_choices}
352
-
353
- ui.insert_ui(
354
- selector="#filter-container",
355
- where="beforeEnd",
356
- ui=ui.div(
357
- {"class": "filter-row", "id": f"filter_row_{new_count}"},
358
- ui.row(
359
- ui.column(5,
360
- ui.input_select(
361
- f"filter_column_{new_count}",
362
- "Metric",
363
- choices=filtered_stat_choices
364
- ),
365
- ),
366
- ui.column(3,
367
- ui.input_select(
368
- f"filter_operator_{new_count}",
369
- "Operator",
370
- choices=[">=", "<="]
371
- ),
372
- ),
373
- ui.column(3,
374
- ui.input_numeric(
375
- f"filter_value_{new_count}",
376
- "Value",
377
- value=0
378
- )
379
- ),
380
- ui.column(1,
381
- ui.markdown("&nbsp;"),
382
-
383
-
384
- ui.input_action_button(
385
- f"delete_filter_{new_count}",
386
- "",
387
- class_="btn-danger btn-sm",
388
- style="padding: 3px 6px;",
389
- icon='✖'
390
-
391
- )
392
- )
393
- )
394
- )
395
- )
396
- filter_count.set(new_count)
397
- current_filters = active_filters.get()
398
- current_filters.append(new_count)
399
- active_filters.set(current_filters)
400
-
401
- @reactive.effect
402
- def _():
403
- # Monitor all possible delete buttons
404
- for i in range(1, filter_count.get() + 1):
405
- try:
406
- if getattr(input, f"delete_filter_{i}")() > 0:
407
- # Remove the filter row
408
- ui.remove_ui(f"#filter_row_{i}")
409
- # Update active filters
410
- current_filters = active_filters.get()
411
- if i in current_filters:
412
- current_filters.remove(i)
413
- active_filters.set(current_filters)
414
- except:
415
- continue
416
 
417
- @output
418
- @render_tabulator
419
- @reactive.event(input.generate_table, ignore_none=False)
420
- def tabulator():
421
- columns_c = columns.copy()
422
- selection_list = list(input.list_input())
 
423
  start_date = str(input.date_id()[0])
424
  end_date = str(input.date_id()[1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
 
427
- if input.level_input() == "MLB":
428
- df_agg = update.update_summary_select(df=df_mlb_total.filter((pl.col('game_date')>=start_date)&(pl.col('game_date')<=end_date)),
429
- selection=selection_list)
430
 
431
- elif input.level_input() == "AAA":
432
- df_agg = update.update_summary_select(df=df_aaa_total.filter((pl.col('game_date')>=start_date)&(pl.col('game_date')<=end_date)),
433
- selection=selection_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
 
435
- elif input.level_input() == "A":
436
- df_agg = update.update_summary_select(df=df_a_total.filter((pl.col('game_date')>=start_date)&(pl.col('game_date')<=end_date)),
437
- selection=selection_list)
438
-
439
 
440
- df_agg = df_agg.select(selection_list + list(input.list_stats()))#.sort('pa', descending=True)
 
441
 
442
- # Apply filters - only for active filters
443
- for i in active_filters.get():
444
- try:
445
- col_name = getattr(input, f"filter_column_{i}")()
446
- if col_name: # Only apply filter if column is selected
447
- operator = getattr(input, f"filter_operator_{i}")()
448
- if col_name in [col["field"] for col in columns_c if col.get("formatter") == "money"]:
449
- value = getattr(input, f"filter_value_{i}")()/100
450
- else:
451
- value = getattr(input, f"filter_value_{i}")()
452
-
453
- if operator == ">=":
454
- df_agg = df_agg.filter(pl.col(col_name) >= value)
455
- elif operator == "<=":
456
- df_agg = df_agg.filter(pl.col(col_name) <= value)
457
- except:
458
- continue
459
 
460
- for col in df_agg.columns[len(selection_list):]:
461
- if col in rounding_dict:
462
- df_agg = df_agg.with_columns(pl.col(col).round(rounding_dict[col]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
 
464
- for column in columns_c:
465
- if column.get("formatter") == "money" and column.get("field") in df_agg.columns:
466
- df_agg = df_agg.with_columns(pl.col(column.get("field"))*100)
467
 
468
- col_group = []
469
- for column in columns_group:
470
- if column.get("field") in df_agg.columns:
471
- col_group.append(column)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
- col_group_stats = []
474
- for column in columns_c:
475
- if column.get("field") in df_agg.columns:
476
- col_group_stats.append(column)
477
 
478
- columns_c = col_group + col_group_stats
479
-
480
- return Tabulator(
481
- df_agg.to_pandas(),
482
- table_options=TableOptions(
483
- height=800,
484
- columns=columns_c,
485
  )
486
- )
487
 
488
  app = App(app_ui, server)
 
 
 
 
 
 
1
  import polars as pl
2
  import numpy as np
3
  import pandas as pd
 
11
  import requests
12
  import joblib
13
  from matplotlib.gridspec import GridSpec
14
+ from shiny import App, reactive, ui, render
15
+ from shiny.ui import h2, tags
16
+ import matplotlib.pyplot as plt
17
+ import matplotlib.gridspec as gridspec
18
+ import seaborn as sns
19
+ from functions.pitch_summary_functions import *
20
+ from shiny import App, reactive, ui, render
21
+ from shiny.ui import h2, tags
22
 
23
  colour_palette = ['#FFB000','#648FFF','#785EF0',
24
  '#DC267F','#FE6100','#3D1EB2','#894D80','#16AA02','#B5592B','#A3C1ED']
25
 
 
 
 
 
 
26
 
27
+ year_list = [2017,2018,2019,2020,2021,2022,2023,2024]
 
 
28
 
29
 
30
 
31
+ level_dict = {'1':'MLB',
32
+ '11':'AAA',
33
+ # '12':'AA',
34
+ #'13':'A+',
35
+ '14':'A',
36
+ '17':'AFL',
37
+ '22':'College',
38
+ '21':'Prospects',
39
+ '51':'International' }
40
 
41
+ function_dict={
42
+ 'velocity_kdes':'Velocity Distributions',
43
+ 'break_plot':'Pitch Movement',
44
+ 'tj_stuff_roling':'Rolling tjStuff+ by Pitch',
45
+ 'tj_stuff_roling_game':'Rolling tjStuff+ by Game',
46
+ 'location_plot_lhb':'Locations vs LHB',
47
+ 'location_plot_rhb':'Locations vs RHB',
48
+ }
49
 
 
 
 
 
 
 
 
 
50
 
51
+ split_dict = {'all':'All',
52
+ 'left':'LHH',
53
+ 'right':'RHH'}
54
+
55
+ split_dict_hand = {'all':['L','R'],
56
+ 'left':['L'],
57
+ 'right':['R']}
58
+
59
+
60
+ type_dict = {'R':'Regular Season',
61
+ 'S':'Spring',
62
+ 'P':'Playoffs' }
63
+
64
+
65
+
66
+ # List of MLB teams and their corresponding ESPN logo URLs
67
+ mlb_teams = [
68
+ {"team": "AZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/ari.png&h=500&w=500"},
69
+ {"team": "ATH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/oak.png&h=500&w=500"},
70
+ {"team": "ATL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/atl.png&h=500&w=500"},
71
+ {"team": "BAL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bal.png&h=500&w=500"},
72
+ {"team": "BOS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/bos.png&h=500&w=500"},
73
+ {"team": "CHC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chc.png&h=500&w=500"},
74
+ {"team": "CWS", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/chw.png&h=500&w=500"},
75
+ {"team": "CIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cin.png&h=500&w=500"},
76
+ {"team": "CLE", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/cle.png&h=500&w=500"},
77
+ {"team": "COL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/col.png&h=500&w=500"},
78
+ {"team": "DET", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/det.png&h=500&w=500"},
79
+ {"team": "HOU", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/hou.png&h=500&w=500"},
80
+ {"team": "KC", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/kc.png&h=500&w=500"},
81
+ {"team": "LAA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/laa.png&h=500&w=500"},
82
+ {"team": "LAD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/lad.png&h=500&w=500"},
83
+ {"team": "MIA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mia.png&h=500&w=500"},
84
+ {"team": "MIL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/mil.png&h=500&w=500"},
85
+ {"team": "MIN", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/min.png&h=500&w=500"},
86
+ {"team": "NYM", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nym.png&h=500&w=500"},
87
+ {"team": "NYY", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/nyy.png&h=500&w=500"},
88
+ {"team": "PHI", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/phi.png&h=500&w=500"},
89
+ {"team": "PIT", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/pit.png&h=500&w=500"},
90
+ {"team": "SD", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sd.png&h=500&w=500"},
91
+ {"team": "SF", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sf.png&h=500&w=500"},
92
+ {"team": "SEA", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/sea.png&h=500&w=500"},
93
+ {"team": "STL", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/stl.png&h=500&w=500"},
94
+ {"team": "TB", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tb.png&h=500&w=500"},
95
+ {"team": "TEX", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tex.png&h=500&w=500"},
96
+ {"team": "TOR", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/tor.png&h=500&w=500"},
97
+ {"team": "WSH", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/mlb/500/scoreboard/wsh.png&h=500&w=500"},
98
+ {"team": "ZZZ", "logo_url": "https://a.espncdn.com/combiner/i?img=/i/teamlogos/leagues/500/mlb.png&w=500&h=500"}
99
+ ]
100
 
101
 
102
+ df_image = pd.DataFrame(mlb_teams)
103
+ image_dict = df_image.set_index('team')['logo_url'].to_dict()
104
+ image_dict_flip = df_image.set_index('logo_url')['team'].to_dict()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
 
107
 
108
+ # # Define the features to be used for training
109
+ # features_table = ['start_speed',
110
+ # 'spin_rate',
111
+ # 'extension',
112
+ # 'ivb',
113
+ # 'hb',
114
+ # 'x0',
115
+ # 'z0',
116
+ # 'tj_stuff_plus']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
+ from shiny import App, reactive, ui, render
119
+ from shiny.ui import h2, tags
120
 
121
+ # Define the UI layout for the app
122
  app_ui = ui.page_sidebar(
123
  ui.sidebar(
124
+ # Row for selecting season and level
125
+ ui.row(
126
+ ui.column(4, ui.input_select('year_input', 'Select Season', year_list, selected=2024)),
127
+ ui.column(4, ui.input_select('level_input', 'Select Level', level_dict)),
128
+ ui.column(4, ui.input_select('type_input', 'Select Type', type_dict,selected='R'))
 
129
  ),
130
+ # Row for the action button to get player list
131
+ ui.row(ui.input_action_button("player_button", "Get Player List", class_="btn-primary")),
132
+ # Row for selecting the player
133
+ ui.row(ui.column(12, ui.output_ui('player_select_ui', 'Select Player'))),
134
+ # Row for selecting the date range
135
+ ui.row(ui.column(12, ui.output_ui('date_id', 'Select Date'))),
136
+
137
+ # Rows for selecting plots and split options
138
+ ui.row(
139
+ ui.column(4, ui.input_select('plot_id_1', 'Plot Left', function_dict, multiple=False, selected='velocity_kdes')),
140
+ ui.column(4, ui.input_select('plot_id_2', 'Plot Middle', function_dict, multiple=False, selected='tj_stuff_roling')),
141
+ ui.column(4, ui.input_select('plot_id_3', 'Plot Right', function_dict, multiple=False, selected='break_plot'))
142
  ),
143
+ ui.row(
144
+ ui.column(6, ui.input_select('split_id', 'Select Split', split_dict, multiple=False)),
145
+ ui.column(6, ui.input_numeric('rolling_window', 'Rolling Window (for tjStuff+ Plot)', min=1, value=50))
 
 
 
146
  ),
147
+ ui.row(
148
+ ui.column(6, ui.input_switch("switch", "Custom Team?", False)),
149
+ ui.column(6, ui.input_select('logo_select', 'Select Custom Logo', image_dict_flip, multiple=False))
 
 
 
 
150
  ),
151
+
152
+ # Row for the action button to generate plot
153
+ ui.row(ui.input_action_button("generate_plot", "Generate Plot", class_="btn-primary")),
154
+ width="400px" # Added this parameter to control sidebar width
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  ),
156
+
157
+ # Main content area with tabs (placed directly in page_sidebar)
158
  ui.navset_tab(
159
+ ui.nav_panel("Pitching Summary",
160
+ ui.output_text("status"),
161
+ ui.output_plot('plot', width='2100px', height='2100px')
 
 
162
  ),
163
+ ui.nav_panel("Summary Table",
164
+ ui.output_data_frame("grid"))
165
  )
166
  )
167
 
168
+
169
  def server(input, output, session):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
+ @reactive.calc
172
+ @reactive.event(input.pitcher_id, input.date_id,input.split_id)
173
+ def cached_data():
174
+
175
+ year_input = int(input.year_input())
176
+ sport_id = int(input.level_input())
177
+ player_input = int(input.pitcher_id())
178
  start_date = str(input.date_id()[0])
179
  end_date = str(input.date_id()[1])
180
+ # Simulate an expensive data operation
181
+ game_list = scrape.get_player_games_list(sport_id = sport_id,
182
+ season = year_input,
183
+ player_id = player_input,
184
+ start_date = start_date,
185
+ end_date = end_date,
186
+ game_type = [input.type_input()])
187
+
188
+ data_list = scrape.get_data(game_list_input = game_list[:])
189
+ df = (stuff_apply.stuff_apply(fe.feature_engineering(update.update(scrape.get_data_df(data_list = data_list).filter(
190
+ (pl.col("pitcher_id") == player_input)&
191
+ (pl.col("is_pitch") == True)&
192
+ (pl.col("start_speed") >= 50)&
193
+ (pl.col('batter_hand').is_in(split_dict_hand[input.split_id()]))
194
+
195
+ )))).with_columns(
196
+ pl.col('pitch_type').count().over('pitch_type').alias('pitch_count')
197
+ ))
198
+ return df
199
+
200
+ @render.ui
201
+ @reactive.event(input.player_button, input.year_input, input.level_input, input.type_input,ignore_none=False)
202
+ def player_select_ui():
203
+ # Get the list of pitchers for the selected level and season
204
+ df_pitcher_info = scrape.get_players(sport_id=int(input.level_input()), season=int(input.year_input()), game_type = [input.type_input()]).filter(
205
+ pl.col("position").is_in(['P','TWP'])).sort("name")
206
+
207
+ # Create a dictionary of pitcher IDs and names
208
+ pitcher_dict = dict(zip(df_pitcher_info['player_id'], df_pitcher_info['name']))
209
 
210
+ # Return a select input for choosing a pitcher
211
+ return ui.input_select("pitcher_id", "Select Pitcher", pitcher_dict, selectize=True)
212
+
213
+ @render.ui
214
+ @reactive.event(input.player_button, input.year_input, input.level_input, input.type_input,ignore_none=False)
215
+ def date_id():
216
+ # Create a date range input for selecting the date range within the selected year
217
+ return ui.input_date_range("date_id", "Select Date Range",
218
+ start=f"{int(input.year_input())}-01-01",
219
+ end=f"{int(input.year_input())}-12-31",
220
+ min=f"{int(input.year_input())}-01-01",
221
+ max=f"{int(input.year_input())}-12-31")
222
+ @output
223
+ @render.text
224
+ def status():
225
+ # Only show status when generating
226
+ if input.generate == 0:
227
+ return ""
228
+ return ""
229
+
230
+ @output
231
+ @render.plot
232
+ @reactive.event(input.generate_plot, ignore_none=False)
233
+ def plot():
234
+ # Show progress/loading notification
235
+ with ui.Progress(min=0, max=1) as p:
236
+ p.set(message="Generating plot", detail="This may take a while...")
237
+
238
+
239
+ p.set(0.3, "Gathering data...")
240
+ year_input = int(input.year_input())
241
+ sport_id = int(input.level_input())
242
+ player_input = int(input.pitcher_id())
243
+ start_date = str(input.date_id()[0])
244
+ end_date = str(input.date_id()[1])
245
+
246
+ print(year_input, sport_id, player_input, start_date, end_date)
247
 
248
+ df = cached_data()
249
+ df = df.clone()
 
250
 
251
+ p.set(0.6, "Creating plot...")
252
+
253
+
254
+ #plt.rcParams["figure.figsize"] = [10,10]
255
+ fig = plt.figure(figsize=(26,26))
256
+ plt.rcParams.update({'figure.autolayout': True})
257
+ fig.set_facecolor('white')
258
+ sns.set_theme(style="whitegrid", palette=colour_palette)
259
+ print('this is the one plot')
260
+
261
+ gs = gridspec.GridSpec(6, 8,
262
+ height_ratios=[5,20,12,36,36,7],
263
+ width_ratios=[4,18,18,18,18,18,18,4])
264
+
265
+
266
+ gs.update(hspace=0.2, wspace=0.5)
267
+
268
+ # Define the positions of each subplot in the grid
269
+ ax_headshot = fig.add_subplot(gs[1,1:3])
270
+ ax_bio = fig.add_subplot(gs[1,3:5])
271
+ ax_logo = fig.add_subplot(gs[1,5:7])
272
+
273
+ ax_season_table = fig.add_subplot(gs[2,1:7])
274
+
275
+ ax_plot_1 = fig.add_subplot(gs[3,1:3])
276
+ ax_plot_2 = fig.add_subplot(gs[3,3:5])
277
+ ax_plot_3 = fig.add_subplot(gs[3,5:7])
278
+
279
+ ax_table = fig.add_subplot(gs[4,1:7])
280
+
281
+ ax_footer = fig.add_subplot(gs[-1,1:7])
282
+ ax_header = fig.add_subplot(gs[0,1:7])
283
+ ax_left = fig.add_subplot(gs[:,0])
284
+ ax_right = fig.add_subplot(gs[:,-1])
285
+
286
+ # Hide axes for footer, header, left, and right
287
+ ax_footer.axis('off')
288
+ ax_header.axis('off')
289
+ ax_left.axis('off')
290
+ ax_right.axis('off')
291
+
292
+ sns.set_theme(style="whitegrid", palette=colour_palette)
293
+ fig.set_facecolor('white')
294
+
295
+ df_teams = scrape.get_teams()
296
+
297
+ player_headshot(player_input=player_input, ax=ax_headshot,sport_id=sport_id,season=year_input)
298
+ player_bio(pitcher_id=player_input, ax=ax_bio,sport_id=sport_id,year_input=year_input)
299
+
300
+ if input.switch():
301
+
302
+ # Get the logo URL from the image dictionary using the team abbreviation
303
+ logo_url = input.logo_select()
304
 
305
+ # Send a GET request to the logo URL
306
+ response = requests.get(logo_url)
 
 
307
 
308
+ # Open the image from the response content
309
+ img = Image.open(BytesIO(response.content))
310
 
311
+ # Display the image on the axis
312
+ ax_logo.set_xlim(0, 1.3)
313
+ ax_logo.set_ylim(0, 1)
314
+ ax_logo.imshow(img, extent=[0.3, 1.3, 0, 1], origin='upper')
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
+ # Turn off the axis
317
+ ax_logo.axis('off')
318
+
319
+ else:
320
+ plot_logo(pitcher_id=player_input, ax=ax_logo, df_team=df_teams,df_players=scrape.get_players(sport_id,year_input))
321
+
322
+ stat_summary_table(df=df,
323
+ ax=ax_season_table,
324
+ player_input=player_input,
325
+ split=input.split_id(),
326
+ sport_id=sport_id,
327
+ game_type=[input.type_input()])
328
+
329
+ # break_plot(df=df_plot,ax=ax2)
330
+ for x,y,z in zip([input.plot_id_1(),input.plot_id_2(),input.plot_id_3()],[ax_plot_1,ax_plot_2,ax_plot_3],[1,3,5]):
331
+ if x == 'velocity_kdes':
332
+ velocity_kdes(df,
333
+ ax=y,
334
+ gs=gs,
335
+ gs_x=[3,4],
336
+ gs_y=[z,z+2],
337
+ fig=fig)
338
+ if x == 'tj_stuff_roling':
339
+ tj_stuff_roling(df=df,
340
+ window=int(input.rolling_window()),
341
+ ax=y)
342
+
343
+ if x == 'tj_stuff_roling_game':
344
+ tj_stuff_roling_game(df=df,
345
+ window=int(input.rolling_window()),
346
+ ax=y)
347
+
348
+ if x == 'break_plot':
349
+ break_plot(df = df,ax=y)
350
+
351
+ if x == 'location_plot_lhb':
352
+ location_plot(df = df,ax=y,hand='L')
353
+
354
+ if x == 'location_plot_rhb':
355
+ location_plot(df = df,ax=y,hand='R')
356
+
357
+ summary_table(df=df,
358
+ ax=ax_table)
359
+
360
+ plot_footer(ax_footer)
361
+
362
+ fig.subplots_adjust(left=0.01, right=0.99, top=0.99, bottom=0.01)
363
 
 
 
 
364
 
365
+ @output
366
+ @render.data_frame
367
+ @reactive.event(input.generate_plot, ignore_none=False)
368
+ def grid():
369
+
370
+ df = cached_data()
371
+ df = df.clone()
372
+ features_table = ['start_speed',
373
+ 'spin_rate',
374
+ 'extension',
375
+ 'ivb',
376
+ 'hb',
377
+ 'x0',
378
+ 'z0']
379
+
380
+
381
+
382
+ selection = ['game_id','pitcher_id','pitcher_name','batter_id','batter_name','pitcher_hand',
383
+ 'batter_hand','balls','strikes','play_code','event_type','pitch_type','vaa','haa']+features_table+['tj_stuff_plus']
384
 
 
 
 
 
385
 
386
+
387
+ return render.DataGrid(
388
+ df.select(selection).to_pandas().round(1),
389
+ row_selection_mode='multiple',
390
+ height='700px',
391
+ width='fit-content',
392
+ filters=True,
393
  )
394
+
395
 
396
  app = App(app_ui, server)
397
+
398
+
399
+
400
+ app = App(app_ui, server)
functions/__pycache__/df_update.cpython-39.pyc CHANGED
Binary files a/functions/__pycache__/df_update.cpython-39.pyc and b/functions/__pycache__/df_update.cpython-39.pyc differ
 
functions/__pycache__/pitch_summary_functions.cpython-39.pyc CHANGED
Binary files a/functions/__pycache__/pitch_summary_functions.cpython-39.pyc and b/functions/__pycache__/pitch_summary_functions.cpython-39.pyc differ
 
functions/df_update.py CHANGED
@@ -138,28 +138,23 @@ class df_update:
138
 
139
  ])
140
 
141
-
142
  df = df.with_columns([
143
  pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
144
  pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
145
  pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
146
  pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
147
  pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
148
- pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone_final'),
149
-
150
- ])
151
-
152
- df = df.with_columns([
153
  pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
154
  pl.lit('average').alias('average'),
155
- pl.when(pl.col('in_zone_final') == False).then(True).otherwise(False).alias('out_zone'),
156
- pl.when((pl.col('in_zone_final') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
157
- pl.when((pl.col('in_zone_final') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
158
- pl.when((pl.col('in_zone_final') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
159
- pl.when((pl.col('in_zone_final') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
160
  pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
161
  pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
162
- pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone_final'),
163
 
164
 
165
  ])
@@ -168,18 +163,18 @@ class df_update:
168
  (df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
169
  (df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
170
  (df['launch_speed'] > 0).alias('bip_div'),
171
- (df['attack_zone_final'] == 0).alias('heart'),
172
- (df['attack_zone_final'] == 1).alias('shadow'),
173
- (df['attack_zone_final'] == 2).alias('chase'),
174
- (df['attack_zone_final'] == 3).alias('waste'),
175
- ((df['attack_zone_final'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
176
- ((df['attack_zone_final'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
177
- ((df['attack_zone_final'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
178
- ((df['attack_zone_final'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
179
- ((df['attack_zone_final'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
180
- ((df['attack_zone_final'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
181
- ((df['attack_zone_final'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
182
- ((df['attack_zone_final'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
183
  ])
184
 
185
 
@@ -234,84 +229,7 @@ class df_update:
234
  # Check if 'trajectory_null' column exists and drop it
235
  if 'trajectory_null' in df.columns:
236
  df = df.drop('trajectory_null')
237
-
238
-
239
- pitch_cat = {'FA': None,
240
- 'FF': 'Fastball',
241
- 'FT': 'Fastball',
242
- 'FC': 'Fastball',
243
- 'FS': 'Off-Speed',
244
- 'FO': 'Off-Speed',
245
- 'SI': 'Fastball',
246
- 'ST': 'Breaking',
247
- 'SL': 'Breaking',
248
- 'CU': 'Breaking',
249
- 'KC': 'Breaking',
250
- 'SC': 'Off-Speed',
251
- 'GY': 'Off-Speed',
252
- 'SV': 'Breaking',
253
- 'CS': 'Breaking',
254
- 'CH': 'Off-Speed',
255
- 'KN': 'Off-Speed',
256
- 'EP': 'Breaking',
257
- 'UN': None,
258
- 'IN': None,
259
- 'PO': None,
260
- 'AB': None,
261
- 'AS': None,
262
- 'NP': None}
263
- df = df.with_columns(
264
- df["pitch_type"].map_elements(lambda x: pitch_cat.get(x, x)).alias("pitch_group")
265
- )
266
-
267
- df = df.with_columns([
268
 
269
- (-(pl.col('vy0')**2 - (2 * pl.col('ay') * (pl.col('y0') - 17/12)))**0.5).alias('vy_f'),
270
- ])
271
-
272
- df = df.with_columns([
273
- ((pl.col('vy_f') - pl.col('vy0')) / pl.col('ay')).alias('t'),
274
- ])
275
-
276
- df = df.with_columns([
277
- (pl.col('vz0') + (pl.col('az') * pl.col('t'))).alias('vz_f'),
278
- (pl.col('vx0') + (pl.col('ax') * pl.col('t'))).alias('vx_f')
279
- ])
280
-
281
- df = df.with_columns([
282
- (-np.arctan(pl.col('vz_f') / pl.col('vy_f')) * (180 / np.pi)).alias('vaa'),
283
- (-np.arctan(pl.col('vx_f') / pl.col('vy_f')) * (180 / np.pi)).alias('haa')
284
- ])
285
-
286
- # Mirror horizontal break for left-handed pitchers
287
- df = df.with_columns(
288
- pl.when(pl.col('pitcher_hand') == 'L')
289
- .then(-pl.col('ax'))
290
- .otherwise(pl.col('ax'))
291
- .alias('ax')
292
- )
293
-
294
- # Mirror horizontal break for left-handed pitchers
295
- df = df.with_columns(
296
- pl.when(pl.col('pitcher_hand') == 'L')
297
- .then(-pl.col('hb'))
298
- .otherwise(pl.col('hb'))
299
- .alias('hb')
300
- )
301
-
302
- # Mirror horizontal release point for left-handed pitchers
303
- df = df.with_columns(
304
- pl.when(pl.col('pitcher_hand') == 'L')
305
- .then(pl.col('x0'))
306
- .otherwise(-pl.col('x0'))
307
- .alias('x0')
308
- )
309
-
310
- df = df.with_columns([
311
- pl.when(df['swings'].is_null()).then(None).otherwise(df['swings']).alias('is_swing'),
312
- pl.when(df['bip'].is_null()).then(None).otherwise(df['bip']).alias('is_bip')])
313
-
314
-
315
  return df
316
 
317
  # Assuming df is your Polars DataFrame
@@ -462,7 +380,6 @@ class df_update:
462
  pl.col('k').sum().alias('k'),
463
  pl.col('bb').sum().alias('bb'),
464
  pl.col('bb_minus_k').sum().alias('bb_minus_k'),
465
- pl.col('k_minus_bb').sum().alias('k_minus_bb'),
466
  pl.col('csw').sum().alias('csw'),
467
  pl.col('bip').sum().alias('bip'),
468
  pl.col('bip_div').sum().alias('bip_div'),
@@ -506,17 +423,7 @@ class df_update:
506
  pl.col('shadow_whiff').sum().alias('shadow_whiff'),
507
  pl.col('chase_whiff').sum().alias('chase_whiff'),
508
  pl.col('waste_whiff').sum().alias('waste_whiff'),
509
- pl.col('tj_stuff_plus').sum().alias('tj_stuff_plus'),
510
- pl.col('start_speed').sum(),
511
- pl.col('vb').sum(),
512
- pl.col('ivb').sum(),
513
- pl.col('hb').sum(),
514
- pl.col('x0').sum(),
515
- pl.col('z0').sum(),
516
- pl.col('vaa').sum(),
517
- pl.col('haa').sum(),
518
- pl.col('spin_rate').sum(),
519
- pl.col('extension').sum(),
520
  ])
521
 
522
  # Add calculated columns to the summary DataFrame
@@ -528,7 +435,6 @@ class df_update:
528
  (pl.col('k') / pl.col('pa')).alias('k_percent'),
529
  (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
530
  (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
531
- (pl.col('k_minus_bb') / pl.col('pa')).alias('k_minus_bb_percent'),
532
  (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
533
  (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
534
  (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
@@ -563,16 +469,6 @@ class df_update:
563
  (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
564
  (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact'),
565
  (pl.col('tj_stuff_plus') / pl.col('pitches')).alias('tj_stuff_plus_avg'),
566
- (pl.col('start_speed')/ pl.col('pitches')).alias('start_speed_avg'),
567
- (pl.col('vb')/ pl.col('pitches')).alias('vb_avg'),
568
- (pl.col('ivb')/ pl.col('pitches')).alias('ivb_avg'),
569
- (pl.col('hb')/ pl.col('pitches')).alias('hb_avg'),
570
- (pl.col('x0')/ pl.col('pitches')).alias('x0_avg'),
571
- (pl.col('z0')/ pl.col('pitches')).alias('z0_avg'),
572
- (pl.col('vaa')/ pl.col('pitches')).alias('vaa_avg'),
573
- (pl.col('haa')/ pl.col('pitches')).alias('haa_avg'),
574
- (pl.col('spin_rate')/ pl.col('pitches')).alias('spin_rate_avg'),
575
- (pl.col('extension')/ pl.col('pitches')).alias('extension_avg'),
576
 
577
  ])
578
 
 
138
 
139
  ])
140
 
 
141
  df = df.with_columns([
142
  pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('woba_codes'),
143
  pl.when(df['event_type'].is_in(woba_codes)).then(1).otherwise(None).alias('xwoba_codes'),
144
  pl.when((pl.col('tb') >= 0)).then(df['woba']).otherwise(None).alias('woba_contact'),
145
  pl.when(pl.col('px').is_null()).then(pl.col('px_predict')).otherwise(pl.col('px')).alias('px'),
146
  pl.when(pl.col('pz').is_null()).then(pl.col('pz_predict')).otherwise(pl.col('pz')).alias('pz'),
147
+ pl.when(pl.col('in_zone').is_null()).then(pl.col('in_zone_predict')).otherwise(pl.col('in_zone')).alias('in_zone'),
 
 
 
 
148
  pl.when(df['launch_speed'].is_null()).then(None).otherwise(df['barrel']).alias('barrel'),
149
  pl.lit('average').alias('average'),
150
+ pl.when(pl.col('in_zone') == False).then(True).otherwise(False).alias('out_zone'),
151
+ pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('zone_swing'),
152
+ pl.when((pl.col('in_zone') == True) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('zone_contact'),
153
+ pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1)).then(True).otherwise(False).alias('ozone_swing'),
154
+ pl.when((pl.col('in_zone') == False) & (pl.col('swings') == 1) & (pl.col('whiffs') == 0)).then(True).otherwise(False).alias('ozone_contact'),
155
  pl.when(pl.col('event_type').str.contains('strikeout')).then(True).otherwise(False).alias('k'),
156
  pl.when(pl.col('event_type').is_in(['walk', 'intent_walk'])).then(True).otherwise(False).alias('bb'),
157
+ pl.when(pl.col('attack_zone').is_null()).then(pl.col('attack_zone_predict')).otherwise(pl.col('attack_zone')).alias('attack_zone'),
158
 
159
 
160
  ])
 
163
  (df['k'].cast(pl.Float32) - df['bb'].cast(pl.Float32)).alias('k_minus_bb'),
164
  (df['bb'].cast(pl.Float32) - df['k'].cast(pl.Float32)).alias('bb_minus_k'),
165
  (df['launch_speed'] > 0).alias('bip_div'),
166
+ (df['attack_zone'] == 0).alias('heart'),
167
+ (df['attack_zone'] == 1).alias('shadow'),
168
+ (df['attack_zone'] == 2).alias('chase'),
169
+ (df['attack_zone'] == 3).alias('waste'),
170
+ ((df['attack_zone'] == 0) & (df['swings'] == 1)).alias('heart_swing'),
171
+ ((df['attack_zone'] == 1) & (df['swings'] == 1)).alias('shadow_swing'),
172
+ ((df['attack_zone'] == 2) & (df['swings'] == 1)).alias('chase_swing'),
173
+ ((df['attack_zone'] == 3) & (df['swings'] == 1)).alias('waste_swing'),
174
+ ((df['attack_zone'] == 0) & (df['whiffs'] == 1)).alias('heart_whiff'),
175
+ ((df['attack_zone'] == 1) & (df['whiffs'] == 1)).alias('shadow_whiff'),
176
+ ((df['attack_zone'] == 2) & (df['whiffs'] == 1)).alias('chase_whiff'),
177
+ ((df['attack_zone'] == 3) & (df['whiffs'] == 1)).alias('waste_whiff')
178
  ])
179
 
180
 
 
229
  # Check if 'trajectory_null' column exists and drop it
230
  if 'trajectory_null' in df.columns:
231
  df = df.drop('trajectory_null')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  return df
234
 
235
  # Assuming df is your Polars DataFrame
 
380
  pl.col('k').sum().alias('k'),
381
  pl.col('bb').sum().alias('bb'),
382
  pl.col('bb_minus_k').sum().alias('bb_minus_k'),
 
383
  pl.col('csw').sum().alias('csw'),
384
  pl.col('bip').sum().alias('bip'),
385
  pl.col('bip_div').sum().alias('bip_div'),
 
423
  pl.col('shadow_whiff').sum().alias('shadow_whiff'),
424
  pl.col('chase_whiff').sum().alias('chase_whiff'),
425
  pl.col('waste_whiff').sum().alias('waste_whiff'),
426
+ pl.col('tj_stuff_plus').sum().alias('tj_stuff_plus')
 
 
 
 
 
 
 
 
 
 
427
  ])
428
 
429
  # Add calculated columns to the summary DataFrame
 
435
  (pl.col('k') / pl.col('pa')).alias('k_percent'),
436
  (pl.col('bb') / pl.col('pa')).alias('bb_percent'),
437
  (pl.col('bb_minus_k') / pl.col('pa')).alias('bb_minus_k_percent'),
 
438
  (pl.col('bb') / pl.col('k')).alias('bb_over_k_percent'),
439
  (pl.col('csw') / pl.col('pitches')).alias('csw_percent'),
440
  (pl.col('sweet_spot') / pl.col('bip_div')).alias('sweet_spot_percent'),
 
469
  (pl.col('xwoba') / pl.col('xwoba_codes')).alias('xwoba_percent'),
470
  (pl.col('xwoba_contact') / pl.col('bip')).alias('xwoba_percent_contact'),
471
  (pl.col('tj_stuff_plus') / pl.col('pitches')).alias('tj_stuff_plus_avg'),
 
 
 
 
 
 
 
 
 
 
472
 
473
  ])
474
 
functions/pitch_summary_functions.py CHANGED
The diff for this file is too large to render. See raw diff
 
joblib_model/barrel_model.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9428e89f2a408148377efb3cd169dc8790bcc89df9495cb895b9db5a955e8fb7
3
- size 11447
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b0ddc8cb10b3b8b52fbae3039e17b470967dae57564567848403f7ce7c54d6b
3
+ size 130
joblib_model/in_zone.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5300b15a6ccfb1dd1e79c85bd9ea478a1945c454845e6be31cd8815e4063a3e
3
- size 54459064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93cbdcde8fffdc81b5817b979ebf8eda4375f8e6cdd7d9d652b17caf4af7c5ff
3
+ size 133
joblib_model/in_zone_model_knn_20240410.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82d6d95be88b006bea7efd4bbf0464a0a50f261f6f65f060bf022114300721ed
3
- size 46782024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:584aacfffca94352d0305db4975aeb63d5513cf6adaa773af785e44b5deaaa9f
3
+ size 133
joblib_model/linear_reg_model_x.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:179663ae0fa65c626b9a941b6934bda1ce58bdf02a69c0daefc28abd28154201
3
- size 579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e65ec79a8f003432d2c991e40f564eaba695d7b9557f560a37caa0234158fe
3
+ size 128
joblib_model/linear_reg_model_z.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ceabc302949cdbe5515b428f900bce98d6f6bedf99153c8d8a645cb0240ef8b
3
- size 579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9e6629bbfeffaba8b37bb30f5b04ef948f013d96c20d1971240300beeef8409
3
+ size 128
joblib_model/model_attack_zone.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2671d4db2606cfee299dcffba2a94138fce77c1b7ef6ad14695a972a38dda3c8
3
- size 50570139
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b895a99631588abf80042e50ffc87320c76b81a05ef43bfc86542ca31967bc10
3
+ size 133
joblib_model/no_swing.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3da3e7ab2b513b87d05e90ae30c788ac819dfcaa7cc1cd9943fc13d2958a00f
3
- size 279409
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dfb7cd67c8415fb8c14c5bfcdcfcd618524a5311486fac04fe5b715696af412
3
+ size 131
joblib_model/swing.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fef4a66363e5f3fdc70ae45c5382bd986c800ff8bf9296a1f9b334461e70fd4
3
- size 262137
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5cba3138b84208d41355bd3ce810eec7faa46ec36bd22210beeba1501c61ff1
3
+ size 131
joblib_model/xwoba_model.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05bade9c0420657d3f0dfe35f0b1adbd2d5ae25c87a07bdf6629987f29926438
3
- size 10684246
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1747435104c4dc0e5a8778a992138ced3e4ff70f71e569547007ca5a8ceb351d
3
+ size 133
requirements.txt CHANGED
@@ -1,16 +1,15 @@
1
- joblib==1.3.2
2
- lightgbm
3
- matplotlib==3.5.1
4
- numpy==1.23.5
5
- pandas==1.5.2
6
- polars==1.12.0
7
- Requests==2.31.0
8
- scipy==1.11.1
9
- seaborn==0.11.1
10
- scikit-learn==1.0.1
11
- shiny==0.6.1
12
- Jinja2==3.1.4
13
- tqdm==4.62.3
14
- pyarrow
15
- tabulator
16
-
 
1
+ joblib==1.3.2
2
+ lightgbm
3
+ matplotlib==3.5.1
4
+ numpy==1.23.5
5
+ pandas==1.5.2
6
+ polars==1.12.0
7
+ Requests==2.31.0
8
+ scipy==1.11.1
9
+ seaborn==0.11.1
10
+ scikit-learn==1.0.1
11
+ shiny==0.6.1
12
+ Jinja2==3.1.4
13
+ tqdm==4.62.3
14
+ pyarrow
15
+
 
stuff_model/__pycache__/feature_engineering.cpython-39.pyc CHANGED
Binary files a/stuff_model/__pycache__/feature_engineering.cpython-39.pyc and b/stuff_model/__pycache__/feature_engineering.cpython-39.pyc differ
 
stuff_model/lgbm_model_2020_2023.joblib CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41001a1acf6ce7dbe247f1b8b7e68a1bb1b112f39d080b7e95a83479e56cb7c1
3
- size 3092328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:975989d422d2a2a5882eb0c296c811575b7b48ad0fbe6b22a901dfad76ea4a88
3
+ size 132