| |
| |
|
|
| args <- commandArgs(trailingOnly = TRUE) |
| if (length(args) < 2) { |
| stop("Usage: Rscript scripts/r_analysis.R <processed_csv> <out_dir>") |
| } |
|
|
| processed_csv <- args[1] |
| out_dir <- args[2] |
|
|
| df <- read.csv(processed_csv) |
| if (!("Exited" %in% names(df))) { |
| stop("Expected target column 'Exited' in processed CSV") |
| } |
|
|
| |
| df$Exited <- as.integer(df$Exited) |
|
|
| |
| if ("Geography" %in% names(df)) df$Geography <- as.factor(df$Geography) |
| if ("Gender" %in% names(df)) df$Gender <- as.factor(df$Gender) |
|
|
| formula <- as.formula("Exited ~ .") |
| model <- glm(formula, data = df, family = binomial()) |
|
|
| coefs <- summary(model)$coefficients |
| coef_df <- data.frame( |
| term = rownames(coefs), |
| estimate = coefs[,1], |
| std_error = coefs[,2], |
| z_value = coefs[,3], |
| p_value = coefs[,4], |
| row.names = NULL |
| ) |
|
|
| dir.create(file.path(out_dir, "tables"), showWarnings = FALSE, recursive = TRUE) |
| out_csv <- file.path(out_dir, "tables", "r_glm_coefficients.csv") |
| write.csv(coef_df, out_csv, row.names = FALSE) |
|
|
| meta <- list( |
| n = nrow(df), |
| p = ncol(df) - 1, |
| aic = AIC(model) |
| ) |
| out_json <- file.path(out_dir, "r_meta.json") |
| json <- paste0( |
| "{\n", |
| " \"n\": ", meta$n, ",\n", |
| " \"p\": ", meta$p, ",\n", |
| " \"aic\": ", meta$aic, "\n", |
| "}\n" |
| ) |
| writeLines(json, out_json) |
|
|
| cat("Tables: ", "outputs/tables/r_glm_coefficients.csv\n", sep = "") |
| cat("Tables: ", "outputs/r_meta.json\n", sep = "") |
|
|