Spaces:

WLenhard
/

d.quantile

Running

App Files Files Community

WLenhard commited on Nov 21, 2025

Commit

567ae09

verified ·

1 Parent(s): 3267b56

Upload app.R

Browse files

Files changed (1) hide show

app.R +237 -48

app.R CHANGED Viewed

@@ -2,8 +2,7 @@ library(plumber)
 #* @apiTitle Effect Size Calculator API
-# Your functions here
-d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
   # Input validation
   if (!is.numeric(x1) || !is.numeric(x2)) {
@@ -35,9 +34,11 @@ d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
     stop("Cannot compute effect size with empty groups after removing NAs.")
   }
   model1 <- fit_quantile_function(x1, degree)
   model2 <- fit_quantile_function(x2, degree)
   tie1 <- attr(model1, "tie_proportion")
   tie2 <- attr(model2, "tie_proportion")
@@ -46,16 +47,22 @@ d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
       "Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).",
       tie1 * 100, tie2 * 100
     ))
   }
-  moments1 <- get_moments(model1, group_label = "Group 1")
-  moments2 <- get_moments(model2, group_label = "Group 2")
-  weighted_pooled_variance <- ((n1 -1) * moments1$variance + (n2 - 1) * moments2$variance) / (n1 + n2 - 2)
   pooled_sd <- sqrt(weighted_pooled_variance)
   mean_diff <- moments2$mean - moments1$mean
   if (pooled_sd == 0) {
     if (mean_diff == 0) {
       d_q <- 0
@@ -67,6 +74,7 @@ d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
     d_q <- mean_diff / pooled_sd
   }
   result <- list(
     d_q = d_q,
     group1_mean = moments1$mean,
@@ -78,18 +86,47 @@ d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
     pooled_sd = pooled_sd,
     n1 = n1,
     n2 = n2,
-    degree = degree
   )
   result$tie_proportion_1 <- tie1
   result$tie_proportion_2 <- tie2
   return(result)
 }
 fit_quantile_function <- function(x, poly_degree,
                                   check_monotonicity = FALSE,
-                                  min_degree = 2) {
   n <- length(x)
@@ -97,99 +134,251 @@ fit_quantile_function <- function(x, poly_degree,
     stop("Need at least 3 observations to fit a polynomial quantile function.")
   }
   n_unique <- length(unique(x))
   tie_proportion <- 1 - (n_unique / n)
   max_possible_degree <- n_unique - 1
   if (poly_degree > max_possible_degree) {
     poly_degree <- max_possible_degree
   }
   if (tie_proportion > 0.3 && poly_degree > 3) {
     recommended_degree <- min(poly_degree, max(3, floor(n_unique / 2)))
     if (recommended_degree < poly_degree) {
       poly_degree <- recommended_degree
     }
   }
   if (poly_degree < min_degree) {
     stop(sprintf(
-      "Insufficient unique values (%d) to fit minimum polynomial degree (%d).",
-      n_unique, min_degree
-    ))
   }
   avg_ranks <- rank(x, ties.method = "average")
   p <- (avg_ranks - 0.5) / n
   z <- qnorm(p)
   current_degree <- poly_degree
-  model <- lm(x ~ poly(z, current_degree, raw = TRUE))
   attr(model, "sample_size") <- n
   attr(model, "n_unique") <- n_unique
   attr(model, "tie_proportion") <- tie_proportion
   attr(model, "poly_degree") <- current_degree
   return(model)
 }
-get_moments <- function(model, group_label = "Unknown") {
   coeffs <- coef(model)
-  poly_degree <- length(coeffs) - 1
-  f <- function(z) {
-    val <- coeffs[poly_degree + 1]
-    for (i in poly_degree:1) {
-      val <- val * z + coeffs[i]
     }
-    return(val)
   }
-  mean_integrand <- function(z) {
-    f(z) * dnorm(z)
-  }
-  mean_result <- integrate(
-    mean_integrand,
-    lower = -Inf,
-    upper = Inf,
-    subdivisions = 2000L,
-    rel.tol = 1e-8,
-    abs.tol = 1e-10,
-    stop.on.error = FALSE
-  )
-  mu <- mean_result$value
-  variance_integrand <- function(z) {
-    deviation <- f(z) - mu
-    deviation^2 * dnorm(z)
   }
-  variance_result <- integrate(
-    variance_integrand,
-    lower = -Inf,
-    upper = Inf,
-    subdivisions = 2000L,
-    rel.tol = 1e-8,
-    abs.tol = 1e-10,
-    stop.on.error = FALSE
-  )
-  var <- variance_result$value
-  if (var < 0 && abs(var) < 1e-10) {
-    var <- 0
   }
   return(list(
     mean = mu,
-    variance = var
   ))
 }
 # API endpoint
 #* Calculate effect size from two groups
 #* @param group1 Comma-separated numeric values for group 1

 #* @apiTitle Effect Size Calculator API
+d.quantile <- function(x1, x2, degree = 4, CI = .95, silent = T) {
   # Input validation
   if (!is.numeric(x1) || !is.numeric(x2)) {
     stop("Cannot compute effect size with empty groups after removing NAs.")
   }
+  # Step 1: Fit the polynomial models for each group
   model1 <- fit_quantile_function(x1, degree)
   model2 <- fit_quantile_function(x2, degree)
+  # Check for ties and warn user
   tie1 <- attr(model1, "tie_proportion")
   tie2 <- attr(model2, "tie_proportion")
       "Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).",
       tie1 * 100, tie2 * 100
     ))
+    message("This suggests discrete/ordinal data. Results should be interpreted cautiously.")
+    message("Consider comparing multiple effect size measures for discrete data.")
   }
+  # Step 2: Get the moments from each fitted model
+  moments1 <- get_moments_analytical(model1, group_label = "Group 1")
+  moments2 <- get_moments_analytical(model2, group_label = "Group 2")
+  # Step 3: Calculate the pooled standard deviation
+  weighted_pooled_variance <- (n1 * moments1$variance + n2 * moments2$variance) / (n1 + n2)
   pooled_sd <- sqrt(weighted_pooled_variance)
+  # Step 4: Compute the effect size d_q
   mean_diff <- moments2$mean - moments1$mean
+  # Handle edge cases
   if (pooled_sd == 0) {
     if (mean_diff == 0) {
       d_q <- 0
     d_q <- mean_diff / pooled_sd
   }
+  # Return results
   result <- list(
     d_q = d_q,
     group1_mean = moments1$mean,
     pooled_sd = pooled_sd,
     n1 = n1,
     n2 = n2,
+    degree = degree,
+    model1 = model1,
+    model2 = model2
   )
+  if(!is.na(CI)) {
+    if(CI <= 0 || CI >= 1) {
+      stop("CI must be between 0 and 1 (exclusive).")
+    }
+    # Standard error for d_q
+    se_dq <- sqrt((n1 + n2) / (n1 * n2) + (d_q^2) / (2 * (n1 + n2)))
+    df <- n1 + n2 - 2
+    alpha <- 1 - CI
+    t_crit <- qt(1 - alpha / 2, df)
+    ci_lower <- d_q - t_crit * se_dq
+    ci_upper <- d_q + t_crit * se_dq
+    result$ci_lower <- ci_lower
+    result$ci_upper <- ci_upper
+    result$ci_level <- CI
+  }
   result$tie_proportion_1 <- tie1
   result$tie_proportion_2 <- tie2
+  result$n_unique_1 <- attr(model1, "n_unique")
+  result$n_unique_2 <- attr(model2, "n_unique")
+  class(result) <- "d_quantile"
   return(result)
 }
 fit_quantile_function <- function(x, poly_degree,
                                   check_monotonicity = FALSE,
+                                  min_degree = 1) {
+  # ============================================================================
+  # Step 1: Input validation and tie detection
+  # ============================================================================
   n <- length(x)
     stop("Need at least 3 observations to fit a polynomial quantile function.")
   }
+  # Count unique values to detect ties
   n_unique <- length(unique(x))
   tie_proportion <- 1 - (n_unique / n)
+  # ============================================================================
+  # Step 2: Adjust polynomial degree based on unique values
+  # ============================================================================
+  # Can't fit more parameters than unique data points
   max_possible_degree <- n_unique - 1
   if (poly_degree > max_possible_degree) {
+    warning(sprintf(
+      "Requested polynomial degree (%d) exceeds number of unique values (%d). ",
+      poly_degree, n_unique,
+      "Reducing to degree %d."
+    ), max_possible_degree)
     poly_degree <- max_possible_degree
   }
+  # Additional reduction for substantial ties
   if (tie_proportion > 0.3 && poly_degree > 3) {
+    # With >30% ties, be more conservative
     recommended_degree <- min(poly_degree, max(3, floor(n_unique / 2)))
     if (recommended_degree < poly_degree) {
+      warning(sprintf(
+        "High proportion of ties (%.1f%%). Reducing polynomial degree from %d to %d for stability.",
+        tie_proportion * 100, poly_degree, recommended_degree
+      ))
       poly_degree <- recommended_degree
     }
   }
+  # Ensure we stay above minimum
   if (poly_degree < min_degree) {
     stop(sprintf(
+      "Insufficient unique values (%d) to fit minimum polynomial degree (%d). ",
+      n_unique, min_degree,
+      "Need at least %d unique observations."
+    ), min_degree + 1)
   }
+  # ============================================================================
+  # Step 3: Compute ranks and z-scores (handles ties via midrank)
+  # ============================================================================
+  # Average ranks handle ties by assigning mean rank to tied observations
+  # Example: values [1, 2, 2, 3] get ranks [1, 2.5, 2.5, 4]
   avg_ranks <- rank(x, ties.method = "average")
+  # Convert ranks to plotting positions
   p <- (avg_ranks - 0.5) / n
+  # Transform to standard normal quantiles
   z <- qnorm(p)
+  # ============================================================================
+  # Step 4: Fit polynomial, with optional monotonicity enforcement
+  # ============================================================================
   current_degree <- poly_degree
+  degree_reduced <- FALSE
+  monotonic <- NULL  # Will be checked if requested
+  if (check_monotonicity) {
+    # Iteratively reduce degree until monotonic or min_degree reached
+    while (current_degree >= min_degree) {
+      # Fit model at current degree
+      model <- lm(x ~ poly(z, current_degree, raw = TRUE))
+      # Check monotonicity
+      monotonicity_check <- check_monotonicity(model)
+      monotonic <- monotonicity_check$is_monotonic
+      if (monotonic) {
+        # Success - monotonic fit achieved
+        break
+      }
+      # Not monotonic - try lower degree
+      current_degree <- current_degree - 1
+      degree_reduced <- TRUE
+    }
+    if (current_degree < min_degree) {
+      stop(sprintf(
+        "Could not achieve monotonic fit even with minimum degree %d. ",
+        min_degree,
+        "Data may be too irregular or have insufficient unique values."
+      ))
+    }
+    if (degree_reduced) {
+      warning(sprintf(
+        "Polynomial degree reduced from %d to %d to achieve monotonicity.",
+        poly_degree, current_degree
+      ))
+    }
+  } else {
+    # Just fit at requested degree without monotonicity check
+    model <- lm(x ~ poly(z, current_degree, raw = TRUE))
+    # Optionally check monotonicity for diagnostic purposes (don't enforce)
+    if (exists("check_monotonicity", mode = "function")) {
+      monotonicity_check <- check_monotonicity(model)
+      monotonic <- monotonicity_check$is_monotonic
+    }
+  }
+  # ============================================================================
+  # Step 5: Store metadata as attributes
+  # ============================================================================
   attr(model, "sample_size") <- n
   attr(model, "n_unique") <- n_unique
   attr(model, "tie_proportion") <- tie_proportion
   attr(model, "poly_degree") <- current_degree
+  attr(model, "requested_degree") <- poly_degree
+  attr(model, "degree_reduced") <- degree_reduced
+  attr(model, "monotonic") <- monotonic
+  attr(model, "has_ties") <- tie_proportion > 0.01  # Flag if >1% ties
   return(model)
 }
+check_monotonicity <- function(model, z_range = c(-3, 3), n_points = 100) {
+  z_seq <- seq(z_range[1], z_range[2], length.out = n_points)
+  # Get predictions
+  pred <- predict(model, newdata = data.frame(z = z_seq))
+  # Calculate finite differences (approximate derivatives)
+  derivatives <- diff(pred) / diff(z_seq)
+  # Check for violations (negative derivatives)
+  # Use small tolerance to avoid flagging numerical noise
+  tolerance <- -1e-6
+  violations <- sum(derivatives < tolerance)
+  min_deriv <- min(derivatives)
+  is_monotonic <- violations == 0
+  return(list(
+    is_monotonic = is_monotonic,
+    min_derivative = min_deriv,
+    violations = violations,
+    proportion_violations = violations / length(derivatives),
+    z_range_checked = z_range
+  ))
+}
+get_moments_analytical <- function(model, group_label = "Unknown") {
+  # Extract coefficients and determine polynomial degree
   coeffs <- coef(model)
+  k <- length(coeffs) - 1  # polynomial degree
+  # --------------------------------------------------------------------------
+  # Pre-compute standard normal raw moments: E[Z^j]
+  # --------------------------------------------------------------------------
+  # For j even: E[Z^j] = (j-1)!! = (j-1) × (j-3) × ... × 3 × 1
+  # For j odd:  E[Z^j] = 0 (due to symmetry)
+  compute_moment <- function(j) {
+    if (j == 0) return(1)           # E[Z^0] = 1 (total probability)
+    if (j %% 2 == 1) return(0)      # Odd moments vanish
+    # Even moments: double factorial
+    # E[Z^2] = 1, E[Z^4] = 3, E[Z^6] = 15, E[Z^8] = 105, ...
+    result <- 1
+    for (i in seq(j - 1, 1, by = -2)) {
+      result <- result * i
     }
+    return(result)
   }
+  # We need moments up to degree 2k for computing E[X^2]
+  max_moment <- 2 * k
+  moments_z <- sapply(0:max_moment, compute_moment)
+  # --------------------------------------------------------------------------
+  # Compute mean: μ = E[X] = E[f(Z)] = Σ β_j E[Z^j]
+  # --------------------------------------------------------------------------
+  # Only even-powered terms contribute due to symmetry
+  mu <- 0
+  for (j in 0:k) {
+    mu <- mu + coeffs[j + 1] * moments_z[j + 1]
+  }
+  # --------------------------------------------------------------------------
+  # Compute variance: σ² = E[X²] - μ²
+  # First calculate E[X²] = E[(Σ β_i Z^i)²] = Σ_i Σ_j β_i β_j E[Z^(i+j)]
+  # --------------------------------------------------------------------------
+  E_X2 <- 0
+  for (i in 0:k) {
+    for (j in 0:k) {
+      power <- i + j
+      if (power <= max_moment) {
+        E_X2 <- E_X2 + coeffs[i + 1] * coeffs[j + 1] * moments_z[power + 1]
+      }
+    }
   }
+  variance <- E_X2 - mu^2
+  # --------------------------------------------------------------------------
+  # Handle numerical edge cases
+  # --------------------------------------------------------------------------
+  # Variance should always be non-negative, but numerical precision limits
+  # can occasionally produce tiny negative values
+  if (variance < 0) {
+    if (abs(variance) < 1e-10) {
+      # Likely just numerical noise - round to zero
+      variance <- 0
+    } else {
+      # Substantial negative variance indicates a real problem
+      warning(
+        "Variance for ", group_label, " is negative (",
+        format(variance, scientific = TRUE, digits = 3),
+        "). This indicates numerical instability in the polynomial fit. ",
+        "Consider reducing the polynomial degree or checking for data issues.",
+        call. = FALSE
+      )
+      # Set to zero to avoid downstream errors, but flag it
+      variance <- 0
+    }
   }
+  # --------------------------------------------------------------------------
+  # Return results
+  # --------------------------------------------------------------------------
   return(list(
     mean = mu,
+    variance = variance
   ))
 }
 # API endpoint
 #* Calculate effect size from two groups
 #* @param group1 Comma-separated numeric values for group 1