Spaces:
Running
Running
Update app.R
Browse files
app.R
CHANGED
|
@@ -1967,6 +1967,7 @@ has_col_index <- function(idx) {
|
|
| 1967 |
is.numeric(idx) && length(idx) == 1 && !is.na(idx) && is.finite(idx) && idx >= 1
|
| 1968 |
}
|
| 1969 |
|
|
|
|
| 1970 |
stuffplus_model <- tryCatch({
|
| 1971 |
readr::read_rds("college_stuffplusCC.rds")
|
| 1972 |
}, error = function(e) {
|
|
@@ -2191,24 +2192,114 @@ advanced_normalize_columns <- function(df) {
|
|
| 2191 |
|
| 2192 |
normalize_columns <- advanced_normalize_columns
|
| 2193 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2194 |
create_advanced_pitcher_summary <- function(data, player_name) {
|
| 2195 |
data <- normalize_columns(data)
|
|
|
|
|
|
|
| 2196 |
pitcher_data <- data %>% dplyr::filter(Pitcher == player_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2197 |
summary_stats <- pitcher_data %>%
|
| 2198 |
dplyr::summarise(
|
| 2199 |
-
|
| 2200 |
-
|
|
|
|
| 2201 |
K = sum(KorBB == "Strikeout", na.rm = TRUE),
|
| 2202 |
BB = sum(WalkIndicator, na.rm = TRUE),
|
| 2203 |
-
H = sum(PlayResult %in% c("Single","Double","Triple","HomeRun"), na.rm = TRUE),
|
| 2204 |
-
|
| 2205 |
-
|
| 2206 |
-
|
| 2207 |
-
|
| 2208 |
-
`
|
| 2209 |
-
`Whiff%` = ifelse(sum(SwingIndicator, na.rm = TRUE) > 0,
|
| 2210 |
-
100 * sum(WhiffIndicator, na.rm = TRUE) / sum(SwingIndicator, na.rm = TRUE), 0),
|
| 2211 |
-
`Zone%` = 100 * sum(StrikeZoneIndicator, na.rm = TRUE) / dplyr::n(),
|
| 2212 |
.groups = "drop"
|
| 2213 |
)
|
| 2214 |
|
|
|
|
| 1967 |
is.numeric(idx) && length(idx) == 1 && !is.na(idx) && is.finite(idx) && idx >= 1
|
| 1968 |
}
|
| 1969 |
|
| 1970 |
+
|
| 1971 |
stuffplus_model <- tryCatch({
|
| 1972 |
readr::read_rds("college_stuffplusCC.rds")
|
| 1973 |
}, error = function(e) {
|
|
|
|
| 2192 |
|
| 2193 |
normalize_columns <- advanced_normalize_columns
|
| 2194 |
|
| 2195 |
+
process_pitcher_indicators <- function(df) {
|
| 2196 |
+
# Ensure basic columns exist
|
| 2197 |
+
df <- df %>%
|
| 2198 |
+
mutate(
|
| 2199 |
+
# Outs on play - you may need to adjust based on your data structure
|
| 2200 |
+
OutsOnPlay = case_when(
|
| 2201 |
+
PlayResult == "Out" ~ 1,
|
| 2202 |
+
PlayResult == "FieldersChoice" ~ 1,
|
| 2203 |
+
PlayResult == "Sacrifice" ~ 1,
|
| 2204 |
+
PlayResult == "SacrificeFly" ~ 1,
|
| 2205 |
+
KorBB == "Strikeout" ~ 1,
|
| 2206 |
+
# Double play - adjust if you have this info
|
| 2207 |
+
TRUE ~ 0
|
| 2208 |
+
),
|
| 2209 |
+
|
| 2210 |
+
# Runs scored - you may already have this column
|
| 2211 |
+
RunsScored = if ("RunsScored" %in% names(df)) RunsScored else 0,
|
| 2212 |
+
|
| 2213 |
+
# Hit indicator
|
| 2214 |
+
is_hit = as.integer(PlayResult %in% c("Single", "Double", "Triple", "HomeRun")),
|
| 2215 |
+
|
| 2216 |
+
# On base indicator (hits + walks + HBP)
|
| 2217 |
+
on_base = as.integer(
|
| 2218 |
+
PlayResult %in% c("Single", "Double", "Triple", "HomeRun") |
|
| 2219 |
+
KorBB == "Walk" |
|
| 2220 |
+
PitchCall == "HitByPitch"
|
| 2221 |
+
),
|
| 2222 |
+
|
| 2223 |
+
# Total bases for SLG
|
| 2224 |
+
total_bases = case_when(
|
| 2225 |
+
PlayResult == "Single" ~ 1,
|
| 2226 |
+
PlayResult == "Double" ~ 2,
|
| 2227 |
+
PlayResult == "Triple" ~ 3,
|
| 2228 |
+
PlayResult == "HomeRun" ~ 4,
|
| 2229 |
+
TRUE ~ 0
|
| 2230 |
+
),
|
| 2231 |
+
|
| 2232 |
+
# SLG is total_bases per AB - we'll calculate per PA for simplicity
|
| 2233 |
+
# You may want to exclude walks/HBP from denominator for true SLG
|
| 2234 |
+
slg = total_bases,
|
| 2235 |
+
|
| 2236 |
+
# Strikeout indicator (per PA)
|
| 2237 |
+
is_k = as.integer(KorBB == "Strikeout"),
|
| 2238 |
+
|
| 2239 |
+
# Walk indicator (per PA)
|
| 2240 |
+
is_walk = as.integer(KorBB == "Walk"),
|
| 2241 |
+
|
| 2242 |
+
# CSW (Called Strike + Whiff) indicator
|
| 2243 |
+
is_csw = as.integer(PitchCall %in% c("StrikeCalled", "StrikeSwinging")),
|
| 2244 |
+
|
| 2245 |
+
# Chase indicator (swing outside zone)
|
| 2246 |
+
chase = as.integer(
|
| 2247 |
+
PitchCall %in% c("StrikeSwinging", "FoulBall", "FoulBallNotFieldable",
|
| 2248 |
+
"FoulBallFieldable", "InPlay") &
|
| 2249 |
+
(PlateLocSide < -0.83 | PlateLocSide > 0.83 |
|
| 2250 |
+
PlateLocHeight < 1.5 | PlateLocHeight > 3.38)
|
| 2251 |
+
),
|
| 2252 |
+
|
| 2253 |
+
# In zone indicator
|
| 2254 |
+
in_zone = as.integer(
|
| 2255 |
+
PlateLocSide >= -0.83 & PlateLocSide <= 0.83 &
|
| 2256 |
+
PlateLocHeight >= 1.5 & PlateLocHeight <= 3.38
|
| 2257 |
+
),
|
| 2258 |
+
|
| 2259 |
+
# Whiff indicator
|
| 2260 |
+
is_whiff = as.integer(PitchCall == "StrikeSwinging"),
|
| 2261 |
+
|
| 2262 |
+
# Put away indicator (strikeout with 2 strikes)
|
| 2263 |
+
is_put_away = as.integer(KorBB == "Strikeout" & Strikes == 2),
|
| 2264 |
+
|
| 2265 |
+
# PA indicator for rate calculations
|
| 2266 |
+
PAindicator = as.integer(
|
| 2267 |
+
!is.na(KorBB) |
|
| 2268 |
+
PlayResult %in% c("Single", "Double", "Triple", "HomeRun", "Out",
|
| 2269 |
+
"FieldersChoice", "Error", "Sacrifice", "SacrificeFly") |
|
| 2270 |
+
PitchCall == "HitByPitch"
|
| 2271 |
+
)
|
| 2272 |
+
)
|
| 2273 |
+
|
| 2274 |
+
df
|
| 2275 |
+
}
|
| 2276 |
+
|
| 2277 |
create_advanced_pitcher_summary <- function(data, player_name) {
|
| 2278 |
data <- normalize_columns(data)
|
| 2279 |
+
data <- process_pitcher_indicators(data)
|
| 2280 |
+
|
| 2281 |
pitcher_data <- data %>% dplyr::filter(Pitcher == player_name)
|
| 2282 |
+
|
| 2283 |
+
# Calculate PA-level stats
|
| 2284 |
+
pa_data <- pitcher_data %>%
|
| 2285 |
+
filter(PAindicator == 1) %>%
|
| 2286 |
+
group_by(Inning, Batter, PAofInning) %>%
|
| 2287 |
+
slice_tail(n = 1) %>% # Get final pitch of each PA
|
| 2288 |
+
ungroup()
|
| 2289 |
+
|
| 2290 |
summary_stats <- pitcher_data %>%
|
| 2291 |
dplyr::summarise(
|
| 2292 |
+
IP = round(sum(OutsOnPlay, na.rm = TRUE) / 3, 1),
|
| 2293 |
+
R = sum(RunsScored, na.rm = TRUE),
|
| 2294 |
+
BF = n_distinct(paste(Inning, Batter, PAofInning)),
|
| 2295 |
K = sum(KorBB == "Strikeout", na.rm = TRUE),
|
| 2296 |
BB = sum(WalkIndicator, na.rm = TRUE),
|
| 2297 |
+
H = sum(PlayResult %in% c("Single", "Double", "Triple", "HomeRun"), na.rm = TRUE),
|
| 2298 |
+
`Strike%` = round(100 * mean(is_csw | PitchCall %in% c("FoulBall", "FoulBallNotFieldable", "InPlay"), na.rm = TRUE), 1),
|
| 2299 |
+
`CSW%` = round(100 * mean(is_csw, na.rm = TRUE), 1),
|
| 2300 |
+
`Whiff%` = ifelse(sum(SwingIndicator, na.rm = TRUE) > 0,
|
| 2301 |
+
round(100 * sum(is_whiff, na.rm = TRUE) / sum(SwingIndicator, na.rm = TRUE), 1), 0),
|
| 2302 |
+
`Zone%` = round(100 * mean(in_zone, na.rm = TRUE), 1),
|
|
|
|
|
|
|
|
|
|
| 2303 |
.groups = "drop"
|
| 2304 |
)
|
| 2305 |
|