OwenStOnge commited on
Commit
0eb12ce
·
verified ·
1 Parent(s): da6ef78

Update app.R

Browse files
Files changed (1) hide show
  1. app.R +240 -1
app.R CHANGED
@@ -10,7 +10,11 @@ library(httr)
10
  library(progressr)
11
  library(RCurl)
12
  library(curl)
 
 
13
 
 
 
14
 
15
  # Define columns to remove if they exist
16
  columns_to_remove <- c(
@@ -215,6 +219,231 @@ merge_with_bat_tracking <- function(csv_data, bat_tracking_data) {
215
  ))
216
  }
217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
 
220
  # UI
@@ -1652,8 +1881,18 @@ observe({
1652
  NULL
1653
  }
1654
  }, error = function(e) { NULL })
1655
-
1656
  if (!is.null(data) && nrow(data) > 0) {
 
 
 
 
 
 
 
 
 
 
 
1657
  scraped_data(data)
1658
  scrape_status_msg(paste0("Done! ", nrow(data), " rows × ", ncol(data), " columns."))
1659
  } else {
 
10
  library(progressr)
11
  library(RCurl)
12
  library(curl)
13
+ library(xgboost)
14
+ library(recipes)
15
 
16
+ stuffplus_model <- xgb.load("stuffplus_xgb.json")
17
+ stuffplus_recipe <- readRDS("stuffplus_recipe.rds")
18
 
19
  # Define columns to remove if they exist
20
  columns_to_remove <- c(
 
219
  ))
220
  }
221
 
222
+
223
+ clean_college_data <- function(data, teams = NA){
224
+
225
+ data <- data %>%
226
+ mutate(PlayResult = ifelse(PlayResult %in% c("HomeRun", "homerun"), "Homerun", PlayResult),
227
+ Batter = sub("(.*),\\s*(.*)", "\\2 \\1", Batter),
228
+ Pitcher = sub("(.*),\\s*(.*)", "\\2 \\1", Pitcher),
229
+ Catcher = sub("(.*),\\s*(.*)", "\\2 \\1", Catcher))
230
+
231
+
232
+ col <- colnames(data)
233
+
234
+ if ("Top/Bottom" %in% col){
235
+ data <- data %>%
236
+ rename(`Top.Bottom` = `Top/Bottom`)
237
+ }
238
+
239
+ numeric_columns <- c("PitchNo", "PAofInning", "PitchofPA", "PitcherId", "BatterId", "Inning", "Outs", "Balls",
240
+ "Strikes", "OutsOnPlay", "RunsScored", "RelSpeed", "VertRelAngle", "HorzRelAngle", "SpinRate",
241
+ "SpinAxis", "RelHeight", "RelSide", "Extension", "VertBreak", "InducedVertBreak", "HorzBreak",
242
+ "PlateLocHeight", "PlateLocSide", "ZoneSpeed", "VertApprAngle", "HorzApprAngle", "ZoneTime",
243
+ "ExitSpeed", "Angle", "Direction", "HitSpinRate", "Distance", "Bearing", "HangTime",
244
+ "LastTrackedDistance", "pfxx", "pfxz", "x0", "y0", "z0", "vx0", "vz0", "vy0", "ax0", "ay0",
245
+ "az0", "EffectiveVelo", "MaxHeight", "SpeedDrop", "ContactPositionX", "ContactPositionY",
246
+ "ContactPositionZ", "HomeTeamForeignID", "AwayTeamForeignID", "CatcherId", "ThrowSpeed",
247
+ "PopTime", "ExchangeTime", "TimeToBase")
248
+
249
+ data <- data %>%
250
+ mutate(across(any_of(numeric_columns), as.numeric),
251
+ PlateLocHeight = if ("PlateLocHeight" %in% names(.)) 12 * PlateLocHeight else PlateLocHeight,
252
+ PlateLocSide = if ("PlateLocSide" %in% names(.)) 12 * PlateLocSide else PlateLocSide)
253
+
254
+
255
+ data <- data %>%
256
+ mutate(TaggedPitchType = case_when(
257
+ TaggedPitchType == "FourSeamFastBall" ~ "Fastball",
258
+ TaggedPitchType %in% c("TwoSeamFastBall", "OneSeamFastBall") ~ "Sinker",
259
+ TaggedPitchType == "ChangeUp" ~ "Changeup",
260
+ TaggedPitchType == "Undefined" ~ "Other",
261
+ T ~ TaggedPitchType
262
+ ))
263
+
264
+ data <- data %>%
265
+ mutate(
266
+ is_csw = case_when(
267
+ PitchCall %in% c("StrikeSwinging", "StrikeCalled") ~ 1,
268
+ TRUE ~ 0
269
+ ),
270
+ is_swing = case_when(
271
+ PitchCall %in% c("StrikeSwinging", "FoulBallNotFieldable", "InPlay",
272
+ "FoulBallFieldable", "FoulBall") ~ 1,
273
+ TRUE ~ 0
274
+ ),
275
+ is_whiff = case_when(
276
+ PitchCall == "StrikeSwinging" & is_swing == 1 ~ 1,
277
+ PitchCall != "StrikeSwinging" & is_swing == 1 ~ 0,
278
+ TRUE ~ NA_real_
279
+ ),
280
+ in_zone = case_when(
281
+ PlateLocSide > 9.975 | PlateLocSide < -9.975 |
282
+ PlateLocHeight > 40 | PlateLocHeight < 20 ~ 0,
283
+ TRUE ~ 1
284
+ ),
285
+ chase = case_when(
286
+ is_swing == 1 & in_zone == 0 ~ 1,
287
+ is_swing == 0 & in_zone == 0 ~ 0,
288
+ TRUE ~ NA_real_
289
+ ),
290
+ in_zone_whiff = case_when(
291
+ is_swing == 1 & in_zone == 1 & is_whiff == 1 ~ 1,
292
+ is_swing == 1 & in_zone == 1 & is_whiff == 0 ~ 0,
293
+ TRUE ~ NA_real_
294
+ ),
295
+ is_hit = case_when(
296
+ PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") & PitchCall == "InPlay" ~ 1,
297
+ !PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") & PitchCall == "InPlay" ~ 0,
298
+ KorBB == "Strikeout" ~ 0,
299
+ PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
300
+ TRUE ~ NA_real_
301
+ ),
302
+ slg = case_when(
303
+ PitchCall == "InPlay" & PlayResult == "Single" ~ 1,
304
+ PitchCall == "InPlay" & PlayResult == "Double" ~ 2,
305
+ PitchCall == "InPlay" & PlayResult == "Triple" ~ 3,
306
+ PitchCall == "InPlay" & PlayResult %in% c("Homerun", "HomeRun") ~ 4,
307
+ !PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") & PitchCall == "InPlay" ~ 0,
308
+ KorBB == "Strikeout" ~ 0,
309
+ PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
310
+ TRUE ~ NA_real_
311
+ ),
312
+ on_base = case_when(
313
+ PitchCall == "InPlay" & PlayResult %in% c("Single", "Double", "Triple", "Homerun", "HomeRun") ~ 1,
314
+ PitchCall %in% c("HitByPitch") | KorBB == "Walk" ~ 1,
315
+ PitchCall == "InPlay" & PlayResult %in% c("Out", "Error", "FieldersChoice") & PlayResult != "Sacrifice" ~ 0,
316
+ KorBB == "Strikeout" ~ 0,
317
+ PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
318
+ TRUE ~ NA_real_
319
+ ),
320
+ is_hard_hit = case_when(
321
+ ExitSpeed >= 95 & PitchCall == "InPlay" ~ 1,
322
+ ExitSpeed < 95 & PitchCall == "InPlay" ~ 0,
323
+ TRUE ~ NA_real_
324
+ ),
325
+ woba = case_when(
326
+ PitchCall == "InPlay" & PlayResult == "Single" ~ 0.95,
327
+ PitchCall == "InPlay" & PlayResult == "Double" ~ 1.24,
328
+ PitchCall == "InPlay" & PlayResult == "Triple" ~ 1.47,
329
+ PitchCall == "InPlay" & PlayResult %in% c("Homerun", "HomeRun") ~ 1.71,
330
+ KorBB == "Walk" ~ 0.82,
331
+ PitchCall %in% c("HitByPitch") ~ 0.85,
332
+ KorBB == "Strikeout" ~ 0,
333
+ PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 0,
334
+ PitchCall == "InPlay" & !PlayResult %in% c("Single", "Double" ,"Triple" ,"Homerun", "HomeRun") ~ 0,
335
+ TRUE ~ NA_real_
336
+ ),
337
+ wobacon = case_when(
338
+ PitchCall == "InPlay" & PlayResult == "Single" ~ 0.95,
339
+ PitchCall == "InPlay" & PlayResult == "Double" ~ 1.24,
340
+ PitchCall == "InPlay" & PlayResult == "Triple" ~ 1.47,
341
+ PitchCall == "InPlay" & PlayResult %in% c("Homerun", "HomeRun") ~ 1.71,
342
+ PitchCall == "InPlay" & !PlayResult %in% c("Single", "Double" ,"Triple" ,"Homerun", "HomeRun") ~ 0,
343
+ TRUE ~ NA_real_
344
+ ),
345
+ is_plate_appearance = ifelse(
346
+ PitchCall %in% c("InPlay", "HitByPitch") | KorBB %in% c("Strikeout", "Walk") | PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking"), 1, 0
347
+ ),
348
+ is_at_bat = case_when(
349
+ PitchCall == "InPlay" & !PlayResult %in% c("StolenBase", "Sacrifice", "CaughtStealing", "Undefined") ~ 1,
350
+ KorBB == "Strikeout" ~ 1,
351
+ PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 1,
352
+ TRUE ~ 0
353
+ ),
354
+ is_walk = case_when(
355
+ is_plate_appearance == 1 & KorBB == "Walk" ~ 1,
356
+ is_plate_appearance == 1 & KorBB != "Walk" ~ 0,
357
+ TRUE ~ NA_real_
358
+ ),
359
+ is_k = case_when(
360
+ is_at_bat == 1 & KorBB == "Strikeout" ~ 1,
361
+ is_at_bat == 1 & KorBB != "Strikeout" ~ 0,
362
+ PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 1,
363
+ TRUE ~ NA_real_
364
+ ),
365
+ is_put_away = case_when(
366
+ Strikes == 2 & KorBB == "Strikeout" ~ 1,
367
+ Strikes == 2 & KorBB != "Strikeout" ~ 0,
368
+ Strikes == 2 & PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking") ~ 1,
369
+ TRUE ~ NA_real_
370
+ ),
371
+ OutsOnPlay = ifelse(KorBB == "Strikeout" | PlayResult %in% c("StrikeoutSwinging", "StrikeoutLooking"), OutsOnPlay + 1, OutsOnPlay)
372
+ )
373
+
374
+ data <- data %>%
375
+ mutate(event_type = case_when(
376
+ PitchCall %in% c("StrikeSwinging", "StrkeSwinging") ~ "Whiff",
377
+ PitchCall %in% c("StriekC", "StrikeCalled") ~ "Called Strike",
378
+ PitchCall %in% c("FoulBallFieldable", "FoulBall", "FoulBallNotFieldable",
379
+ "FouldBallNotFieldable") ~ "Foul Ball",
380
+ PitchCall %in% c("BallCalled", "BallinDirt", "BallIntentional", "BalIntentional") ~ "Ball",
381
+ PitchCall == "HitByPitch" ~ "HBP",
382
+ PitchCall == "InPlay" & PlayResult %in% c("Out", "FieldersChoice",
383
+ "Error", "error",
384
+ "Sacrifice") ~ "Field Out",
385
+ PitchCall == "InPlay" & PlayResult == "Single" ~ "Single",
386
+ PitchCall == "InPlay" & PlayResult == "Double" ~ "Double",
387
+ PitchCall == "InPlay" & PlayResult == "Triple" ~ "Triple",
388
+ PitchCall == "InPlay" & PlayResult == "Homerun" ~ "Home Run",
389
+ T ~ NA
390
+ ))
391
+
392
+
393
+ return(data)
394
+
395
+
396
+ }
397
+
398
+
399
+
400
+ predict_stuffplus <- function(data) {
401
+
402
+ predict_data <- data %>%
403
+ mutate(RelSide = case_when(
404
+ PitcherThrows == "Right" ~ RelSide,
405
+ PitcherThrows == "Left" ~ -RelSide,
406
+ PitcherThrows %in% c("Both", "Undefined") & RelSide > 0 ~ RelSide,
407
+ PitcherThrows %in% c("Both", "Undefined") & RelSide < 0 ~ -RelSide),
408
+ ax0 = case_when(
409
+ PitcherThrows == "Right" ~ ax0,
410
+ PitcherThrows == "Left" ~ -ax0,
411
+ PitcherThrows %in% c("Both", "Undefined") & ax0 > 0 ~ ax0,
412
+ PitcherThrows %in% c("Both", "Undefined") & ax0 < 0 ~ -ax0),
413
+ PlateLocHeight = PlateLocHeight*12,
414
+ PlateLocSide = PlateLocSide*12,
415
+ ax0 = -ax0) %>%
416
+ group_by(Pitcher, GameID) %>%
417
+ mutate(
418
+ primary_pitch = case_when(
419
+ any(TaggedPitchType == "Fastball") ~ "Fastball",
420
+ any(TaggedPitchType == "Sinker") ~ "Sinker",
421
+ TRUE ~ names(sort(table(TaggedPitchType), decreasing = TRUE))[1]
422
+ )
423
+ ) %>%
424
+ group_by(Pitcher, GameID, primary_pitch) %>%
425
+ mutate(
426
+ primary_az0 = mean(az0[TaggedPitchType == primary_pitch], na.rm = TRUE),
427
+ primary_velo = mean(RelSpeed[TaggedPitchType == primary_pitch], na.rm = TRUE)
428
+ ) %>%
429
+ ungroup() %>%
430
+ mutate(az0_diff = az0 - primary_az0,
431
+ velo_diff = RelSpeed - primary_velo)
432
+
433
+
434
+ df_processed <- bake(stuffplus_recipe, new_data = predict_data)
435
+
436
+ df_matrix <- as.matrix(df_processed)
437
+
438
+ raw_stuff <- predict(stuffplus_model, df_matrix)
439
+
440
+ data$raw_stuff <- raw_stuff
441
+
442
+ return(data)
443
+
444
+ }
445
+
446
+
447
 
448
 
449
  # UI
 
1881
  NULL
1882
  }
1883
  }, error = function(e) { NULL })
 
1884
  if (!is.null(data) && nrow(data) > 0) {
1885
+ scrape_status_msg("Processing data...")
1886
+
1887
+ data <- tryCatch({
1888
+ d <- clean_college_data(data)
1889
+ d <- predict_stuffplus(d)
1890
+ d
1891
+ }, error = function(e) {
1892
+ scrape_status_msg(paste("Processing error:", e$message))
1893
+ data # return unprocessed data if it fails
1894
+ })
1895
+
1896
  scraped_data(data)
1897
  scrape_status_msg(paste0("Done! ", nrow(data), " rows × ", ncol(data), " columns."))
1898
  } else {