igroffman commited on
Commit
69a3f1b
·
verified ·
1 Parent(s): 8722bc6

Update app.R

Browse files
Files changed (1) hide show
  1. app.R +102 -11
app.R CHANGED
@@ -1967,6 +1967,7 @@ has_col_index <- function(idx) {
1967
  is.numeric(idx) && length(idx) == 1 && !is.na(idx) && is.finite(idx) && idx >= 1
1968
  }
1969
 
 
1970
  stuffplus_model <- tryCatch({
1971
  readr::read_rds("college_stuffplusCC.rds")
1972
  }, error = function(e) {
@@ -2191,24 +2192,114 @@ advanced_normalize_columns <- function(df) {
2191
 
2192
  normalize_columns <- advanced_normalize_columns
2193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2194
  create_advanced_pitcher_summary <- function(data, player_name) {
2195
  data <- normalize_columns(data)
 
 
2196
  pitcher_data <- data %>% dplyr::filter(Pitcher == player_name)
 
 
 
 
 
 
 
 
2197
  summary_stats <- pitcher_data %>%
2198
  dplyr::summarise(
2199
- BF = dplyr::n_distinct(paste(Inning, Batter, PAofInning)),
2200
- P = dplyr::n(),
 
2201
  K = sum(KorBB == "Strikeout", na.rm = TRUE),
2202
  BB = sum(WalkIndicator, na.rm = TRUE),
2203
- H = sum(PlayResult %in% c("Single","Double","Triple","HomeRun"), na.rm = TRUE),
2204
- XBH = sum(PlayResult %in% c("Double","Triple","HomeRun"), na.rm = TRUE),
2205
- HR = sum(PlayResult == "HomeRun", na.rm = TRUE),
2206
- HBP = sum(HBPIndicator, na.rm = TRUE),
2207
- `Strike%` = 100 * sum(!PitchCall %in% c("BallCalled","HitByPitch","BallinDirt","BallIntentional")) / dplyr::n(),
2208
- `CSW%` = 100 * sum(PitchCall %in% c("StrikeCalled","StrikeSwinging")) / dplyr::n(),
2209
- `Whiff%` = ifelse(sum(SwingIndicator, na.rm = TRUE) > 0,
2210
- 100 * sum(WhiffIndicator, na.rm = TRUE) / sum(SwingIndicator, na.rm = TRUE), 0),
2211
- `Zone%` = 100 * sum(StrikeZoneIndicator, na.rm = TRUE) / dplyr::n(),
2212
  .groups = "drop"
2213
  )
2214
 
 
1967
  is.numeric(idx) && length(idx) == 1 && !is.na(idx) && is.finite(idx) && idx >= 1
1968
  }
1969
 
1970
+
1971
  stuffplus_model <- tryCatch({
1972
  readr::read_rds("college_stuffplusCC.rds")
1973
  }, error = function(e) {
 
2192
 
2193
  normalize_columns <- advanced_normalize_columns
2194
 
2195
+ process_pitcher_indicators <- function(df) {
2196
+ # Ensure basic columns exist
2197
+ df <- df %>%
2198
+ mutate(
2199
+ # Outs on play - you may need to adjust based on your data structure
2200
+ OutsOnPlay = case_when(
2201
+ PlayResult == "Out" ~ 1,
2202
+ PlayResult == "FieldersChoice" ~ 1,
2203
+ PlayResult == "Sacrifice" ~ 1,
2204
+ PlayResult == "SacrificeFly" ~ 1,
2205
+ KorBB == "Strikeout" ~ 1,
2206
+ # Double play - adjust if you have this info
2207
+ TRUE ~ 0
2208
+ ),
2209
+
2210
+ # Runs scored - you may already have this column
2211
+ RunsScored = if ("RunsScored" %in% names(df)) RunsScored else 0,
2212
+
2213
+ # Hit indicator
2214
+ is_hit = as.integer(PlayResult %in% c("Single", "Double", "Triple", "HomeRun")),
2215
+
2216
+ # On base indicator (hits + walks + HBP)
2217
+ on_base = as.integer(
2218
+ PlayResult %in% c("Single", "Double", "Triple", "HomeRun") |
2219
+ KorBB == "Walk" |
2220
+ PitchCall == "HitByPitch"
2221
+ ),
2222
+
2223
+ # Total bases for SLG
2224
+ total_bases = case_when(
2225
+ PlayResult == "Single" ~ 1,
2226
+ PlayResult == "Double" ~ 2,
2227
+ PlayResult == "Triple" ~ 3,
2228
+ PlayResult == "HomeRun" ~ 4,
2229
+ TRUE ~ 0
2230
+ ),
2231
+
2232
+ # SLG is total_bases per AB - we'll calculate per PA for simplicity
2233
+ # You may want to exclude walks/HBP from denominator for true SLG
2234
+ slg = total_bases,
2235
+
2236
+ # Strikeout indicator (per PA)
2237
+ is_k = as.integer(KorBB == "Strikeout"),
2238
+
2239
+ # Walk indicator (per PA)
2240
+ is_walk = as.integer(KorBB == "Walk"),
2241
+
2242
+ # CSW (Called Strike + Whiff) indicator
2243
+ is_csw = as.integer(PitchCall %in% c("StrikeCalled", "StrikeSwinging")),
2244
+
2245
+ # Chase indicator (swing outside zone)
2246
+ chase = as.integer(
2247
+ PitchCall %in% c("StrikeSwinging", "FoulBall", "FoulBallNotFieldable",
2248
+ "FoulBallFieldable", "InPlay") &
2249
+ (PlateLocSide < -0.83 | PlateLocSide > 0.83 |
2250
+ PlateLocHeight < 1.5 | PlateLocHeight > 3.38)
2251
+ ),
2252
+
2253
+ # In zone indicator
2254
+ in_zone = as.integer(
2255
+ PlateLocSide >= -0.83 & PlateLocSide <= 0.83 &
2256
+ PlateLocHeight >= 1.5 & PlateLocHeight <= 3.38
2257
+ ),
2258
+
2259
+ # Whiff indicator
2260
+ is_whiff = as.integer(PitchCall == "StrikeSwinging"),
2261
+
2262
+ # Put away indicator (strikeout with 2 strikes)
2263
+ is_put_away = as.integer(KorBB == "Strikeout" & Strikes == 2),
2264
+
2265
+ # PA indicator for rate calculations
2266
+ PAindicator = as.integer(
2267
+ !is.na(KorBB) |
2268
+ PlayResult %in% c("Single", "Double", "Triple", "HomeRun", "Out",
2269
+ "FieldersChoice", "Error", "Sacrifice", "SacrificeFly") |
2270
+ PitchCall == "HitByPitch"
2271
+ )
2272
+ )
2273
+
2274
+ df
2275
+ }
2276
+
2277
  create_advanced_pitcher_summary <- function(data, player_name) {
2278
  data <- normalize_columns(data)
2279
+ data <- process_pitcher_indicators(data)
2280
+
2281
  pitcher_data <- data %>% dplyr::filter(Pitcher == player_name)
2282
+
2283
+ # Calculate PA-level stats
2284
+ pa_data <- pitcher_data %>%
2285
+ filter(PAindicator == 1) %>%
2286
+ group_by(Inning, Batter, PAofInning) %>%
2287
+ slice_tail(n = 1) %>% # Get final pitch of each PA
2288
+ ungroup()
2289
+
2290
  summary_stats <- pitcher_data %>%
2291
  dplyr::summarise(
2292
+ IP = round(sum(OutsOnPlay, na.rm = TRUE) / 3, 1),
2293
+ R = sum(RunsScored, na.rm = TRUE),
2294
+ BF = n_distinct(paste(Inning, Batter, PAofInning)),
2295
  K = sum(KorBB == "Strikeout", na.rm = TRUE),
2296
  BB = sum(WalkIndicator, na.rm = TRUE),
2297
+ H = sum(PlayResult %in% c("Single", "Double", "Triple", "HomeRun"), na.rm = TRUE),
2298
+ `Strike%` = round(100 * mean(is_csw | PitchCall %in% c("FoulBall", "FoulBallNotFieldable", "InPlay"), na.rm = TRUE), 1),
2299
+ `CSW%` = round(100 * mean(is_csw, na.rm = TRUE), 1),
2300
+ `Whiff%` = ifelse(sum(SwingIndicator, na.rm = TRUE) > 0,
2301
+ round(100 * sum(is_whiff, na.rm = TRUE) / sum(SwingIndicator, na.rm = TRUE), 1), 0),
2302
+ `Zone%` = round(100 * mean(in_zone, na.rm = TRUE), 1),
 
 
 
2303
  .groups = "drop"
2304
  )
2305