bankchurn / scripts /r_analysis.R
XRachel's picture
Upload 14 files
7f9c8dd verified
# Lightweight R analysis for the HF demo pipeline.
# Usage: Rscript scripts/r_analysis.R <processed_csv> <out_dir>
args <- commandArgs(trailingOnly = TRUE)
if (length(args) < 2) {
stop("Usage: Rscript scripts/r_analysis.R <processed_csv> <out_dir>")
}
processed_csv <- args[1]
out_dir <- args[2]
df <- read.csv(processed_csv)
if (!("Exited" %in% names(df))) {
stop("Expected target column 'Exited' in processed CSV")
}
# Simple logistic regression (glm) using available predictors
df$Exited <- as.integer(df$Exited)
# Convert known categorical columns if present
if ("Geography" %in% names(df)) df$Geography <- as.factor(df$Geography)
if ("Gender" %in% names(df)) df$Gender <- as.factor(df$Gender)
formula <- as.formula("Exited ~ .")
model <- glm(formula, data = df, family = binomial())
coefs <- summary(model)$coefficients
coef_df <- data.frame(
term = rownames(coefs),
estimate = coefs[,1],
std_error = coefs[,2],
z_value = coefs[,3],
p_value = coefs[,4],
row.names = NULL
)
dir.create(file.path(out_dir, "tables"), showWarnings = FALSE, recursive = TRUE)
out_csv <- file.path(out_dir, "tables", "r_glm_coefficients.csv")
write.csv(coef_df, out_csv, row.names = FALSE)
meta <- list(
n = nrow(df),
p = ncol(df) - 1,
aic = AIC(model)
)
out_json <- file.path(out_dir, "r_meta.json")
json <- paste0(
"{\n",
" \"n\": ", meta$n, ",\n",
" \"p\": ", meta$p, ",\n",
" \"aic\": ", meta$aic, "\n",
"}\n"
)
writeLines(json, out_json)
cat("Tables: ", "outputs/tables/r_glm_coefficients.csv\n", sep = "")
cat("Tables: ", "outputs/r_meta.json\n", sep = "")