sabr3evaluation / data /data_processing.R
rkarthur's picture
fixing my previous fix which made an error
d64c0d2
raw
history blame
2.51 kB
library(tidyverse)
base_path = "C:/Users/rober/Documents/Github/sabr3evaluation/data/"
base_files = grep("20[0-9]*.csv", list.files(base_path), value=T)
files = list()
for(i in 1:length(base_files))
{
files[[i]] <- read_csv(paste0(base_path, base_files[i])) %>%
filter(PA>99) %>%
filter(!Tm=="TOT") %>%
group_by(`Name-additional`) %>%
summarize(PAtot=sum(PA), OPS=weighted.mean(OPS, PA), Age=unique(Age))
files[[i]]$year <- 2016+i
}
a <- do.call(rbind.data.frame, files)
marc_pred = a %>% filter(year==2020)
r21 <- a %>% filter(year==2021)
r19 <- a %>% filter(year==2019)
r18 <- a %>% filter(year==2018)
r17 <- a %>% filter(year==2017)
colnames(marc_pred) <- c("bbref_id", "PA1", "OPSY1", "Age", "Year")
marc_pred$OPSY2 <- r19[match(marc_pred$bbref_id, r19$`Name-additional`),]$OPS
marc_pred$PA2 <- r19[match(marc_pred$bbref_id, r19$`Name-additional`),]$PAtot
marc_pred$OPSY3 <- r18[match(marc_pred$bbref_id, r18$`Name-additional`),]$OPS
marc_pred$PA3 <- r18[match(marc_pred$bbref_id, r18$`Name-additional`),]$PAtot
marc_pred$OPSY4 <- r17[match(marc_pred$bbref_id, r17$`Name-additional`),]$OPS
marc_pred$PA4 <- r17[match(marc_pred$bbref_id, r17$`Name-additional`),]$PAtot
final_pred <- marc_pred %>%
select(bbref_id, Age, PA1, PA2, PA3, PA4, OPSY1, OPSY2,
OPSY3, OPSY4) %>% filter(PA1>99&PA2>99&PA3>99&PA4>99)
final_pred$weighted_avg <- ((final_pred$OPSY1*5)+(final_pred$OPSY2*4)+(final_pred$OPSY3*3))/12
final_pred$reliability <- (final_pred$PA1+final_pred$PA2+final_pred$PA3)/
(final_pred$PA1+final_pred$PA2+final_pred$PA3+1200)
#Regressed rate = (Player reliability * Player Rate) +
#((1 – Player reliability) * .720)
final_pred$regressed_ops <- (final_pred$reliability*final_pred$weighted_avg) +
((1-final_pred$reliability) * .720)
final_pred$age_adjusted_ops <- final_pred$regressed_ops * (1+ifelse(final_pred$Age>29,
(final_pred$Age - 29) * -.003,
(final_pred$Age - 29) * .006))
final_pred$ops_pred <- final_pred$age_adjusted_ops
final_pred$ops_real <- r21[match(final_pred$bbref_id, r21$`Name-additional`),]$OPS
write_csv(final_pred, "C:/Users/rober/Documents/Github/sabr3evaluation/data/MarcelData_wproj.csv")
data_for_release <- final_pred %>%
select(-weighted_avg, -age_adjusted_ops, -reliability,
-ops_pred, -ops_real)
write_csv(final_pred,
"C:/Users/rober/Documents/Github/sabr3evaluation/data/SABR3_FinalAssignment_data.csv")
#