Spaces:

cjerzak
/

fastrerandomize

Running

App Files Files Community

cjerzak commited on Apr 10, 2025

Commit

8ea2821

verified ·

1 Parent(s): da997e8

Update app.R

Browse files

Files changed (1) hide show

app.R +79 -289

app.R CHANGED Viewed

@@ -1,4 +1,4 @@
-#
 # ============================================================
 #  app.R  |  Shiny App for Rerandomization with fastrerandomize
 # ============================================================
@@ -25,248 +25,6 @@ library(parallel)         # For detecting CPU cores
 # install.packages("devtools")
 # devtools::install_github("cjerzak/fastrerandomize-software/fastrerandomize")
-# ---------------------------------------------------------
-# HELPER FUNCTIONS (BASE R)
-# ---------------------------------------------------------
-# 1) Compute Hotelling's T^2 in base R
-baseR_hotellingT2 <- function(X, W) {
-  # For a single assignment W:
-  # T^2 = (n0 * n1 / (n0 + n1)) * (xbar1 - xbar0)^T * S_inv * (xbar1 - xbar0)
-  n <- length(W)
-  n1 <- sum(W)
-  n0 <- n - n1
-  if (n1 == 0 || n0 == 0) return(NA_real_)  # invalid scenario
-  xbar_treat <- colMeans(X[W == 1, , drop = FALSE])
-  xbar_control <- colMeans(X[W == 0, , drop = FALSE])
-  diff_vec <- (xbar_treat - xbar_control)
-  # covariance (pooled) – we just use cov(X)
-  S <- cov(X)
-  Sinv <- tryCatch(solve(S), error = function(e) NULL)
-  if (is.null(Sinv)) {
-    # fallback: diagonal approximation if solve fails
-    Sinv <- diag(1 / diag(S), ncol(S))
-  }
-  out <- (n0 * n1 / (n0 + n1)) * c(t(diff_vec) %*% Sinv %*% diff_vec)
-  out
-}
-# 2) Generate randomizations in base R, filtering by acceptance probability
-#    using T^2 and keep the best (lowest) fraction.
-baseR_generate_randomizations <- function(n_units, n_treated, X, accept_prob, random_type,
-                                          max_draws, batch_size) {
-  # For safety, check if exact enumerations will explode:
-  if (random_type == "exact") {
-    n_comb_total <- choose(n_units, n_treated)
-    if (n_comb_total > 1e6) {
-      warning(
-        sprintf("Exact randomization is requested, but that is %s combinations.
-                 This may be infeasible in terms of memory/time.
-                 Consider Monte Carlo instead.", format(n_comb_total, big.mark=",")),
-        immediate. = TRUE
-      )
-    }
-  }
-  if (random_type == "exact") {
-    # -------------- EXACT RANDOMIZATIONS --------------
-    cidx <- combn(n_units, n_treated)
-    # Build assignment matrix
-    n_comb <- ncol(cidx)
-    assignment_mat <- matrix(0, nrow = n_comb, ncol = n_units)
-    for (i in seq_len(n_comb)) {
-      assignment_mat[i, cidx[, i]] <- 1
-    }
-    # Compute T^2 for each row
-    T2vals <- apply(assignment_mat, 1, function(w) baseR_hotellingT2(X, w))
-    # Drop any NA (in pathological cases)
-    keep_idx <- which(!is.na(T2vals))
-    assignment_mat <- assignment_mat[keep_idx, , drop = FALSE]
-    T2vals <- T2vals[keep_idx]
-    # acceptance threshold
-    cutoff <- quantile(T2vals, probs = accept_prob)
-    keep_final <- (T2vals < cutoff)
-    assignment_mat_accepted <- assignment_mat[keep_final, , drop = FALSE]
-    T2vals_accepted <- T2vals[keep_final]
-  } else {
-    # -------------- MONTE CARLO RANDOMIZATIONS --------------
-    # We'll sample max_draws permutations
-    base_assign <- c(rep(1, n_treated), rep(0, n_units - n_treated))
-    # We'll store T^2's in chunks to reduce memory overhead
-    batch_count <- ceiling(max_draws / batch_size)
-    all_assign <- list()
-    all_T2 <- numeric(0)
-    cur_draw <- 0
-    for (b in seq_len(batch_count)) {
-      ndraws_here <- min(batch_size, max_draws - cur_draw)
-      cur_draw <- cur_draw + ndraws_here
-      # sample permutations
-      perms <- matrix(nrow = ndraws_here, ncol = n_units)
-      for (j in seq_len(ndraws_here)) {
-        perms[j, ] <- sample(base_assign)
-      }
-      # T^2 for each
-      T2vals_batch <- apply(perms, 1, function(w) baseR_hotellingT2(X, w))
-      # collect
-      all_assign[[b]] <- perms
-      all_T2 <- c(all_T2, T2vals_batch)
-    }
-    assignment_mat <- do.call(rbind, all_assign)
-    # remove any NA
-    keep_idx <- which(!is.na(all_T2))
-    assignment_mat <- assignment_mat[keep_idx, , drop = FALSE]
-    all_T2 <- all_T2[keep_idx]
-    # acceptance threshold
-    cutoff <- quantile(all_T2, probs = accept_prob)
-    keep_final <- (all_T2 < cutoff)
-    assignment_mat_accepted <- assignment_mat[keep_final, , drop = FALSE]
-    T2vals_accepted <- all_T2[keep_final]
-  }
-  list(randomizations = assignment_mat_accepted, balance = T2vals_accepted)
-}
-# Helper: compute difference in means quickly
-diff_in_means <- function(Y, W) {
-  mean(Y[W == 1]) - mean(Y[W == 0])
-}
-# Helper: for a given tau, relabel outcomes and compute the difference in means for a single permutation
-compute_diff_at_tau_for_oneW <- function(Wprime, obsY, obsW, tau) {
-  # Y0_under_null = obsY - obsW * tau
-  Y0 <- obsY - obsW * tau
-  # Y1_under_null = Y0 + tau
-  # But in practice, for assignment Wprime, the observed outcome is:
-  #   Y'(i) = Y0(i) if Wprime(i) = 0, or Y0(i) + tau if Wprime(i)=1
-  Yprime <- Y0
-  Yprime[Wprime == 1] <- Y0[Wprime == 1] + tau
-  diff_in_means(Yprime, Wprime)
-}
-# 3a) For base R randomization test: difference in means + optional p-value
-#     *without* fiducial interval
-# (We will incorporate the FI logic below.)
-baseR_randomization_test <- function(obsW, obsY, allW, findFI = FALSE, alpha = 0.05) {
-  # Observed diff in means
-  tau_obs <- diff_in_means(obsY, obsW)
-  # for each candidate assignment, compute diff in means on obsY
-  diffs <- apply(allW, 1, function(w) diff_in_means(obsY, w))
-  # p-value = fraction whose absolute diff >= observed
-  pval <- mean(abs(diffs) >= abs(tau_obs))
-  # optionally compute a fiducial interval
-  FI <- NULL
-  if (findFI) {
-    FI <- baseR_find_fiducial_interval(obsW, obsY, allW, tau_obs, alpha = alpha)
-  }
-  list(p_value = pval, tau_obs = tau_obs, FI = FI)
-}
-# 3b) The fiducial interval logic for base R, mirroring the approach in fastrerandomize:
-#     1) Attempt to find a wide lower and upper bracket via random updates
-#     2) Then a grid search in [lowerBound-1, upperBound*2] for which tau are accepted.
-baseR_find_fiducial_interval <- function(obsW, obsY, allW, tau_obs, alpha = 0.05, c_initial = 2,
-                                         n_search_attempts = 500) {
-  # random bracket approach
-  lowerBound_est <- tau_obs - 3*tau_obs
-  upperBound_est <- tau_obs + 3*tau_obs
-  z_alpha <- qnorm(1 - alpha)
-  k <- 2 / (z_alpha * (2 * pi)^(-1/2) * exp(-z_alpha^2 / 2))
-  # For each iteration, pick one random assignment from allW
-  # then see how the implied difference changes, and update the bracket
-  n_allW <- nrow(allW)
-  for (step_t in seq_len(n_search_attempts)) {
-    # pick random assignment
-    idx <- sample.int(n_allW, 1)
-    Wprime <- allW[idx, ]
-    # ~~~~~ update lowerBound ~~~~~
-    # Y0 = obsY - obsW * lowerBound_est
-    # Y'(Wprime) = ...
-    lowerY0 <- obsY - obsW * lowerBound_est
-    Yprime_lower <- lowerY0
-    Yprime_lower[Wprime == 1] <- lowerY0[Wprime == 1] + lowerBound_est
-    tau_at_step_lower <- diff_in_means(Yprime_lower, Wprime)
-    c_step <- c_initial
-    # difference from obs
-    delta <- tau_obs - tau_at_step_lower
-    if (tau_at_step_lower < tau_obs) {
-      # move lowerBound up
-      lowerBound_est <- lowerBound_est + k * delta * (alpha/2) / step_t
-    } else {
-      # move it down
-      lowerBound_est <- lowerBound_est - k * (-delta) * (1 - alpha/2) / step_t
-    }
-    # ~~~~~ update upperBound ~~~~~
-    upperY0 <- obsY - obsW * upperBound_est
-    Yprime_upper <- upperY0
-    Yprime_upper[Wprime == 1] <- upperY0[Wprime == 1] + upperBound_est
-    tau_at_step_upper <- diff_in_means(Yprime_upper, Wprime)
-    delta2 <- tau_at_step_upper - tau_obs
-    if (tau_at_step_upper > tau_obs) {
-      # move upperBound down
-      upperBound_est <- upperBound_est - k * delta2 * (alpha/2) / step_t
-    } else {
-      # move it up
-      upperBound_est <- upperBound_est + k * (-delta2) * (1 - alpha/2) / step_t
-    }
-  }
-  # Now we do a grid search from (lowerBound_est - 1) to (upperBound_est * 2)
-  # in e.g. 100 steps, seeing which tau is "accepted".
-  # We'll define "accepted" if the min of:
-  #    fraction(tau_obs >= distribution_of(tau_pseudo))
-  #    fraction(tau_obs <= distribution_of(tau_pseudo))
-  # is > alpha, i.e. do not reject
-  grid_lower <- lowerBound_est - 1
-  grid_upper <- upperBound_est * 2
-  tau_seq <- seq(grid_lower, grid_upper, length.out = 100)
-  accepted <- logical(length(tau_seq))
-  for (i in seq_along(tau_seq)) {
-    tau_pseudo <- tau_seq[i]
-    # for each row in allW, compute the diff in means if the true effect = tau_pseudo
-    # distribution_of(tau_pseudo)
-    diffs_pseudo <- apply(allW, 1, function(wp) compute_diff_at_tau_for_oneW(wp, obsY, obsW, tau_pseudo))
-    # Then see how often diffs_pseudo >= tau_obs (or <= tau_obs)
-    frac_ge <- mean(diffs_pseudo >= tau_obs)
-    frac_le <- mean(diffs_pseudo <= tau_obs)
-    # min(...) is the typical "two-sided" approach
-    accepted[i] <- (min(frac_ge, frac_le) > alpha / 2) # or 0.05 if we want 5% test
-  }
-  if (!any(accepted)) {
-    # no values accepted => degenerate?
-    # We'll return the bracket we found, or NA.
-    return(c(NA, NA))
-  }
-  c(min(tau_seq[accepted]), max(tau_seq[accepted]))
-}
 # ---------------------------------------------------------
 # UI Section
 # ---------------------------------------------------------
@@ -481,7 +239,7 @@ ui <- dashboardPage(
                 numericInput("max_draws", "Max Draws (MC)", value = 1e5, min = 1e3),
                 numericInput("batch_size", "Batch Size (MC)", value = 1e3, min = 1e2)
               ),
-              actionButton("generate_btn", "Generate Randomizations")
           ),
           box(width = 8, title = "Summary of Accepted Randomizations",
@@ -516,32 +274,49 @@ ui <- dashboardPage(
         tabName = "randtest",
         fluidRow(
-          box(width = 4, title = "Randomization Test Setup",
-              status = "primary", solidHeader = TRUE,
-              radioButtons("outcome_source", "Outcome Data (Y):",
-                           choices = c("Simulate Y" = "simulate",
-                                       "Upload CSV" = "uploadY"),
-                           selected = "simulate"),
-              conditionalPanel(
-                condition = "input.outcome_source == 'simulate'",
-                numericInput("true_tau", "True Effect (simulate)", 1, step = 0.5),
-                numericInput("noise_sd", "Noise SD for Y", 0.5, step = 0.1),
-                actionButton("simulateY_btn", "Simulate Y")
-              ),
-              conditionalPanel(
-                condition = "input.outcome_source == 'uploadY'",
-                fileInput("file_outcomes", "Choose CSV File with outcome vector Y",
-                          accept = c(".csv")),
-                helpText("Single column with length = #units.")
-              ),
-              br(),
-              actionButton("run_randtest_btn", "Run Randomization Test"),
-              checkboxInput("findFI", "Compute Fiducial Interval?", value = TRUE)
           ),
           box(width = 8, title = "Test Results", status = "info", solidHeader = TRUE,
               # First row: p-value and observed effect (fastrerandomize)
@@ -671,7 +446,7 @@ server <- function(input, output, session) {
       # =========== 2) base R generation timing ===========
       t0_base <- Sys.time()
       out_base <- tryCatch({
-        baseR_generate_randomizations(
           n_units    = nrow(X_data()),
           n_treated  = input$n_treated,
           X          = X_data(),
@@ -709,7 +484,7 @@ server <- function(input, output, session) {
     if (is.null(rr) || is.null(rr$balance)) {
       valueBox("---", "Min Balance Measure", icon = icon("question"), color = "orange")
     } else {
-      minBal <- round(min(rr$balance), 4)
       valueBox(minBal, "Min Balance Measure", icon = icon("thumbs-up"), color = "blue")
     }
   })
@@ -742,8 +517,9 @@ server <- function(input, output, session) {
     df <- data.frame(balance = rr$balance)
     ggplot(df, aes(x = balance)) +
       geom_histogram(binwidth = diff(range(df$balance))/30, fill = "darkblue", alpha = 0.7) +
-      labs(title = "Distribution of Balance Measure",
-           x = "Balance (e.g. T^2)",
            y = "Frequency") +
       theme_minimal(base_size = 14)
   })
@@ -804,6 +580,24 @@ server <- function(input, output, session) {
     }
   })
   # The randomization test result:
   RandTestResult <- reactiveVal(NULL)
   RandTestResult_base <- reactiveVal(NULL)
@@ -857,7 +651,7 @@ server <- function(input, output, session) {
       t0_testbase <- Sys.time()
       outTestBase <- tryCatch({
-        baseR_randomization_test(
           obsW    = obsW,
           obsY    = obsY,
           allW    = rr_base$randomizations,
@@ -889,9 +683,9 @@ server <- function(input, output, session) {
   output$tauobs_box <- renderValueBox({
     rt <- RandTestResult()
     if (is.null(rt)) {
-      valueBox("---", "Observed Effect (fastrerandomize)", icon = icon("question"), color = "maroon")
     } else {
-      valueBox(round(rt$tau_obs, 4), "Observed Effect (fastrerandomize)", icon = icon("bullseye"), color = "maroon")
     }
   })
@@ -917,19 +711,14 @@ server <- function(input, output, session) {
   })
   # If we have a fiducial interval from fastrerandomize, display it
-  output$fi_text <- renderUI({
-    rt <- RandTestResult()
-    if (is.null(rt) || is.null(rt$FI)) {
-      return(NULL)
-    }
-    fi_lower <- round(rt$FI[1], 4)
-    fi_upper <- round(rt$FI[2], 4)
-    tagList(
-      strong("Fiducial Interval (fastrerandomize, 95%):"),
-      p(sprintf("[%.4f, %.4f]", fi_lower, fi_upper))
-    )
-  })
   # If we have a fiducial interval from base R, display it
   output$fi_text_baseR <- renderUI({
@@ -941,7 +730,7 @@ server <- function(input, output, session) {
     fi_upper <- round(rt$FI[2], 4)
     tagList(
-      strong("Fiducial Interval (base R, 95%):"),
       p(sprintf("[%.4f, %.4f]", fi_lower, fi_upper))
     )
   })
@@ -973,3 +762,4 @@ server <- function(input, output, session) {
 # Run the Application
 # ---------------------------------------------------------
 shinyApp(ui = ui, server = server)

+#  install.packages("~/Documents/fastrerandomize-software/fastrerandomize",repos = NULL, type = "source",force = F)
 # ============================================================
 #  app.R  |  Shiny App for Rerandomization with fastrerandomize
 # ============================================================
 # install.packages("devtools")
 # devtools::install_github("cjerzak/fastrerandomize-software/fastrerandomize")
 # ---------------------------------------------------------
 # UI Section
 # ---------------------------------------------------------
                 numericInput("max_draws", "Max Draws (MC)", value = 1e5, min = 1e3),
                 numericInput("batch_size", "Batch Size (MC)", value = 1e3, min = 1e2)
               ),
+              actionButton("generate_btn", "Generate")
           ),
           box(width = 8, title = "Summary of Accepted Randomizations",
         tabName = "randtest",
         fluidRow(
+          box(
+            width = 4, title = "Randomization Test Setup",
+            status = "primary", solidHeader = TRUE,
+            # (Existing UI elements for Y already in your code)
+            radioButtons("outcome_source", "Outcome Data (Y):",
+                         choices = c("Simulate Y" = "simulate",
+                                     "Upload CSV" = "uploadY"),
+                         selected = "simulate"),
+            conditionalPanel(
+              condition = "input.outcome_source == 'simulate'",
+              numericInput("true_tau", "True Effect (simulate)", 1, step = 0.5),
+              numericInput("noise_sd", "Noise SD for Y", 0.5, step = 0.1),
+              actionButton("simulateY_btn", "Simulate Y")
+            ),
+            conditionalPanel(
+              condition = "input.outcome_source == 'uploadY'",
+              fileInput("file_outcomes", "Choose CSV File with outcome vector Y",
+                        accept = c(".csv")),
+              helpText("Single column with length = #units.")
+            ),
+            br(),
+            actionButton("run_randtest_btn", "Run Test"),
+            checkboxInput("findFI", "Compute Fiducial Interval?", value = TRUE)
           ),
+          box(
+            width = 8, title = "Preview of Outcomes (Y)",
+            status = "info", solidHeader = TRUE,
+            DTOutput("outcomes_table")
+          )
+        ),
+        fluidRow(
+          box(
+            width = 4, title = NULL, status = NULL,
+            background = NULL, solidHeader = FALSE, collapsible = FALSE,
+            tags$p("Note: Relative speedups greatest for large number of accepted randomizations.",
+                   style = "color:#555; font-size:90%; margin:0;")
+          ),
           box(width = 8, title = "Test Results", status = "info", solidHeader = TRUE,
               # First row: p-value and observed effect (fastrerandomize)
       # =========== 2) base R generation timing ===========
       t0_base <- Sys.time()
       out_base <- tryCatch({
+        generate_randomizations_R(
           n_units    = nrow(X_data()),
           n_treated  = input$n_treated,
           X          = X_data(),
     if (is.null(rr) || is.null(rr$balance)) {
       valueBox("---", "Min Balance Measure", icon = icon("question"), color = "orange")
     } else {
+      minBal <- round(min(rr$balance), 3)
       valueBox(minBal, "Min Balance Measure", icon = icon("thumbs-up"), color = "blue")
     }
   })
     df <- data.frame(balance = rr$balance)
     ggplot(df, aes(x = balance)) +
       geom_histogram(binwidth = diff(range(df$balance))/30, fill = "darkblue", alpha = 0.7) +
+      labs(title = "Distribution of Balance Statistic",
+           subtitle = "Among Accepted Randomizations",
+           x = "Balance (i.e., T^2)",
            y = "Frequency") +
       theme_minimal(base_size = 14)
   })
     }
   })
+  # Render a preview of Y
+  output$outcomes_table <- renderDT({
+    req(Y_data())  # Make sure Y_data is not NULL
+    # Convert to data frame for DT
+    dfy <- data.frame(Y = Y_data())
+    # Optionally round numeric data
+    dfy[] <- lapply(dfy, function(col) {
+      if (is.numeric(col)) signif(col, 3) else col
+    })
+    datatable(
+      dfy,
+      options = list(scrollX = TRUE, pageLength = 5)
+    )
+  })
   # The randomization test result:
   RandTestResult <- reactiveVal(NULL)
   RandTestResult_base <- reactiveVal(NULL)
       t0_testbase <- Sys.time()
       outTestBase <- tryCatch({
+        randomization_test_R(
           obsW    = obsW,
           obsY    = obsY,
           allW    = rr_base$randomizations,
   output$tauobs_box <- renderValueBox({
     rt <- RandTestResult()
     if (is.null(rt)) {
+      valueBox("---", "Observed Effect", icon = icon("question"), color = "maroon")
     } else {
+      valueBox(round(rt$tau_obs, 4), "Observed Effect", icon = icon("bullseye"), color = "maroon")
     }
   })
   })
   # If we have a fiducial interval from fastrerandomize, display it
+  #output$fi_text <- renderUI({
+  #  rt <- RandTestResult()
+  #  if (is.null(rt) || is.null(rt$FI)) {
+  #    return(NULL)
+  #  }
+  #  fi_lower <- round(rt$FI[1], 4)
+  #  fi_upper <- round(rt$FI[2], 4)
+  #})
   # If we have a fiducial interval from base R, display it
   output$fi_text_baseR <- renderUI({
     fi_upper <- round(rt$FI[2], 4)
     tagList(
+      strong("Fiducial Interval (95%):"),
       p(sprintf("[%.4f, %.4f]", fi_lower, fi_upper))
     )
   })
 # Run the Application
 # ---------------------------------------------------------
 shinyApp(ui = ui, server = server)