Spaces:
Running
Running
| library(plumber) | |
| #* @apiTitle Effect Size Calculator API | |
| #' Calculate a Distribution-Free Effect Size (d_reg) | |
| #' | |
| #' This function computes a distribution-free effect size by modeling the | |
| #' empirical distribution function (eCDF) of two groups via polynomial | |
| #' regression. The effect size is computed as the standardized | |
| #' difference between the means of the smoothed distributions. | |
| #' | |
| #' The method involves: | |
| #' 1. Fitting polynomials to each group's quantile function: x = f(z) | |
| #' 2. Computing moments (mean, variance) of the polynomials | |
| #' 3. Calculating d(reg) using the pooled standard deviation | |
| #' | |
| #' @param x1 A numeric vector of data for the first group. | |
| #' @param x2 A numeric vector of data for the second group. | |
| #' @param degree The degree of the polynomial to fit (default = 5). | |
| #' Higher degrees capture more complex distributional shapes but | |
| #' may overfit with small samples. | |
| #' @param CI Confidence level for confidence interval (default = NA, no CI computed). | |
| #' If specified (e.g., 0.95), uses asymptotic normal approximation. | |
| #' WARNING: CI formula assumes Cohen's d distribution and may not be accurate for d_reg. | |
| #' @param silent Logical; if TRUE, suppresses warnings during fitting (default = TRUE). | |
| #' | |
| #' @return A list (S3 class "d_reg") containing: | |
| #' \item{d_reg}{The distribution-free effect size (standardized mean difference).} | |
| #' \item{group1_mean}{Mean of the smoothed distribution for group 1.} | |
| #' \item{group1_variance}{Variance of the smoothed distribution for group 1.} | |
| #' \item{group1_sd}{Standard deviation of the smoothed distribution for group 1.} | |
| #' \item{group2_mean}{Mean of the smoothed distribution for group 2.} | |
| #' \item{group2_variance}{Variance of the smoothed distribution for group 2.} | |
| #' \item{group2_sd}{Standard deviation of the smoothed distribution for group 2.} | |
| #' \item{pooled_sd}{Pooled standard deviation.} | |
| #' \item{n1}{Sample size of group 1.} | |
| #' \item{n2}{Sample size of group 2.} | |
| #' \item{model1}{Fitted polynomial model for group 1.} | |
| #' \item{model2}{Fitted polynomial model for group 2.} | |
| #' \item{default_degree}{Polynomial degree used.} | |
| #' \item{tie_proportion_1}{Proportion of tied values in group 1.} | |
| #' \item{tie_proportion_2}{Proportion of tied values in group 2.} | |
| #' \item{n_unique_1}{Number of unique values in group 1.} | |
| #' \item{n_unique_2}{Number of unique values in group 2.} | |
| #' \item{ci_lower}{Lower bound of confidence interval (if CI specified).} | |
| #' \item{ci_upper}{Upper bound of confidence interval (if CI specified).} | |
| #' \item{ci_level}{Confidence level (if CI specified).} | |
| #' | |
| #' @details | |
| #' The method is distribution-free and converges to Cohen's d under normality with | |
| #' increasing group size. It is robust to outliers and skewness compared to | |
| #' classical parametric methods. | |
| #' | |
| #' Sample size requirements: At least (degree + 1) observations per group. | |
| #' Recommended: n > 10 per group for stable polynomial fits. | |
| #' For small samples (n < 20), consider using degree = 3 or lower. | |
| #' | |
| #' Confidence intervals use an asymptotic approximation based on Cohen's d | |
| #' distribution and may not accurately reflect the true sampling distribution | |
| #' of d_reg, especially in small samples or non-normal data. | |
| #' | |
| #' @author Wolfgang Lenhard and Alexandra Lenhard | |
| #' @references | |
| #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: | |
| #' A Robust Alternative to Cohen's d. | |
| #' | |
| #' @examples | |
| #' # Normal distributions | |
| #' set.seed(123) | |
| #' x1 <- rnorm(30, mean = 0, sd = 1) | |
| #' x2 <- rnorm(30, mean = 0.5, sd = 1) | |
| #' result <- d.reg(x1, x2) | |
| #' print(result) | |
| #' | |
| #' # With confidence interval | |
| #' result_ci <- d.reg(x1, x2, CI = 0.95) | |
| #' print(result_ci) | |
| #' | |
| #' # Skewed distributions | |
| #' x1 <- rexp(50, rate = 1) | |
| #' x2 <- rexp(50, rate = 0.8) | |
| #' result <- d.reg(x1, x2, degree = 4) | |
| #' print(result) | |
| #' | |
| #' @export | |
| d.reg <- function(x1, x2, degree = 5, CI = NA, silent = TRUE) { | |
| # ============================================================================ | |
| # Input Validation | |
| # ============================================================================ | |
| if (!is.numeric(x1) || !is.numeric(x2)) { | |
| stop("Both x1 and x2 must be numeric vectors.") | |
| } | |
| # Handle missing values | |
| if (any(is.na(x1)) || any(is.na(x2))) { | |
| if (!silent) { | |
| warning("Missing values detected and will be removed.") | |
| } | |
| x1 <- x1[!is.na(x1)] | |
| x2 <- x2[!is.na(x2)] | |
| } | |
| n1 <- length(x1) | |
| n2 <- length(x2) | |
| # Check for empty groups | |
| if (n1 == 0 || n2 == 0) { | |
| stop("Cannot compute effect size with empty groups after removing NAs.") | |
| } | |
| # Check sufficient sample size for polynomial degree | |
| if (n1 < degree + 1) { | |
| stop("Group 1 has insufficient data: need at least ", degree + 1, | |
| " observations for degree ", degree, " polynomial (got ", n1, ").") | |
| } | |
| if (n2 < degree + 1) { | |
| stop("Group 2 has insufficient data: need at least ", degree + 1, | |
| " observations for degree ", degree, " polynomial (got ", n2, ").") | |
| } | |
| # Validate CI parameter if provided | |
| if (!is.na(CI)) { | |
| if (!is.numeric(CI) || length(CI) != 1) { | |
| stop("CI must be a single numeric value or NA.") | |
| } | |
| if (CI <= 0 || CI >= 1) { | |
| stop("CI must be between 0 and 1 (exclusive).") | |
| } | |
| } | |
| model1 <- fit_polynomial(x1, degree) | |
| model2 <- fit_polynomial(x2, degree) | |
| # Extract tie information | |
| tie1 <- attr(model1, "tie_proportion") | |
| tie2 <- attr(model2, "tie_proportion") | |
| n_unique1 <- attr(model1, "n_unique") | |
| n_unique2 <- attr(model2, "n_unique") | |
| # Warn about substantial ties | |
| if (!silent && (tie1 > 0.3 || tie2 > 0.3)) { | |
| message(sprintf( | |
| "Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).", | |
| tie1 * 100, tie2 * 100 | |
| )) | |
| message("This suggests discrete/ordinal data. Results should be interpreted cautiously.") | |
| message("Consider comparing multiple effect size measures for discrete data.") | |
| } | |
| moments1 <- get_moments(model1, group_label = "Group 1") | |
| moments2 <- get_moments(model2, group_label = "Group 2") | |
| # Weighted pooled variance (population formula, not sample formula) | |
| weighted_pooled_variance <- (n1 * moments1$variance + n2 * moments2$variance) / (n1 + n2) | |
| pooled_sd <- sqrt(weighted_pooled_variance) | |
| mean_diff <- moments2$mean - moments1$mean | |
| # Handle edge cases | |
| if (pooled_sd == 0) { | |
| if (mean_diff == 0) { | |
| d_reg <- 0 | |
| } else { | |
| d_reg <- sign(mean_diff) * Inf | |
| if (!silent) { | |
| warning("Pooled SD is zero but means differ. Returning Inf with appropriate sign.") | |
| } | |
| } | |
| } else { | |
| d_reg <- mean_diff / pooled_sd | |
| } | |
| result <- list( | |
| d_reg = d_reg, | |
| # Group 1 statistics | |
| group1_mean = moments1$mean, | |
| group1_variance = moments1$variance, | |
| group1_sd = sqrt(moments1$variance), | |
| # Group 2 statistics | |
| group2_mean = moments2$mean, | |
| group2_variance = moments2$variance, | |
| group2_sd = sqrt(moments2$variance), | |
| # Pooled statistics | |
| pooled_sd = pooled_sd, | |
| # Sample sizes | |
| n1 = n1, | |
| n2 = n2, | |
| # Models | |
| model1 = model1, | |
| model2 = model2, | |
| # Metadata | |
| default_degree = degree, | |
| tie_proportion_1 = tie1, | |
| tie_proportion_2 = tie2, | |
| n_unique_1 = n_unique1, | |
| n_unique_2 = n_unique2 | |
| ) | |
| if (!is.na(CI)) { | |
| # Standard error using asymptotic approximation | |
| # NOTE: This formula assumes Cohen's d distribution and may not be | |
| # accurate for d_reg, especially in small samples or non-normal data | |
| se_dreg <- sqrt((n1 + n2) / (n1 * n2) + (d_reg^2) / (2 * (n1 + n2))) | |
| # Degrees of freedom | |
| df <- n1 + n2 - 2 | |
| # Critical value from t-distribution | |
| alpha <- 1 - CI | |
| t_crit <- qt(1 - alpha / 2, df) | |
| # Confidence interval bounds | |
| ci_lower <- d_reg - t_crit * se_dreg | |
| ci_upper <- d_reg + t_crit * se_dreg | |
| # Add to result | |
| result$ci_lower <- ci_lower | |
| result$ci_upper <- ci_upper | |
| result$ci_level <- CI | |
| result$ci_se <- se_dreg | |
| result$ci_df <- df | |
| } | |
| class(result) <- "d_reg" | |
| return(result) | |
| } | |
| #' Fit a Polynomial to eCDF | |
| #' | |
| #' This helper function fits a polynomial regression model to represent the | |
| #' distribution. It models the relationship between | |
| #' z-scores and observed raw scores. | |
| #' | |
| #' @param x A numeric vector of observations. | |
| #' @param poly_degree The degree of the polynomial to fit. | |
| #' @param check_monotonicity Logical; should monotonicity be enforced by | |
| #' reducing polynomial degree if needed? (default = FALSE for speed) | |
| #' @param min_degree Minimum polynomial degree to try (default = 1, representing | |
| #' a linear fit to a normal distribution). | |
| #' | |
| #' @return An lm model object representing x = f(z), where z ~ N(0,1). | |
| #' Additional attributes: | |
| #' \describe{ | |
| #' \item{sample_size}{Original sample size} | |
| #' \item{n_unique}{Number of unique values (for tie detection)} | |
| #' \item{tie_proportion}{Proportion of tied observations} | |
| #' \item{poly_degree}{Actual polynomial degree used (may be reduced)} | |
| #' \item{monotonic}{Logical; is the fitted function monotonic?} | |
| #' \item{degree_reduced}{Logical; was degree reduced from requested?} | |
| #' } | |
| #' | |
| #' @details | |
| #' The function uses average ranks (midrank method) to handle tied observations, | |
| #' which is the standard approach in rank-based statistics. Plotting positions | |
| #' (rank - 0.5)/n avoid infinite z-scores at boundaries. | |
| #' | |
| #' When substantial ties are present (>10% of observations), the function may | |
| #' automatically reduce the polynomial degree to avoid overfitting to a small | |
| #' number of unique values. | |
| #' | |
| #' If check_monotonicity=TRUE, the function iteratively reduces the polynomial | |
| #' degree until a monotonic fit is achieved or min_degree is reached. | |
| #' | |
| #' @author Wolfgang Lenhard and Alexandra Lenhard | |
| #' Licensed under the MIT License | |
| #' | |
| #' Citation: | |
| #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: | |
| #' A Robust Alternative to Cohen’s d. | |
| #' | |
| #' @export | |
| fit_polynomial <- function(x, poly_degree, | |
| check_monotonicity = TRUE, | |
| min_degree = 1) { | |
| # Step 1: Input validation and tie detection | |
| n <- length(x) | |
| if (n < 3) { | |
| stop("Need at least 3 observations to fit a polynomial.") | |
| } | |
| # Count unique values to detect ties | |
| n_unique <- length(unique(x)) | |
| tie_proportion <- 1 - (n_unique / n) | |
| # Step 2: Adjust polynomial degree based on unique values | |
| # Can't fit more parameters than unique data points | |
| max_possible_degree <- n_unique - 1 | |
| if (poly_degree > max_possible_degree) { | |
| warning(sprintf( | |
| "Requested polynomial degree (%d) exceeds number of unique values (%d). ", | |
| poly_degree, n_unique, | |
| "Reducing to degree %d." | |
| ), max_possible_degree) | |
| poly_degree <- max_possible_degree | |
| } | |
| # Additional reduction for substantial ties | |
| if (tie_proportion > 0.3 && poly_degree > 3) { | |
| # With >30% ties, be more conservative | |
| recommended_degree <- min(poly_degree, max(3, floor(n_unique / 2))) | |
| if (recommended_degree < poly_degree) { | |
| warning(sprintf( | |
| "High proportion of ties (%.1f%%). Reducing polynomial degree from %d to %d for stability.", | |
| tie_proportion * 100, poly_degree, recommended_degree | |
| )) | |
| poly_degree <- recommended_degree | |
| } | |
| } | |
| # Ensure we stay above minimum | |
| if (poly_degree < min_degree) { | |
| stop(sprintf( | |
| "Insufficient unique values (%d) to fit minimum polynomial degree (%d). ", | |
| n_unique, min_degree, | |
| "Need at least %d unique observations." | |
| ), min_degree + 1) | |
| } | |
| # Step 3: Compute ranks and z-scores (handles ties via midrank) | |
| # Average ranks handle ties by assigning mean rank to tied observations | |
| avg_ranks <- rank(x, ties.method = "average") | |
| # Convert ranks to plotting positions | |
| p <- (avg_ranks - 0.5) / n | |
| # Transform to standard normal quantiles | |
| z <- qnorm(p) | |
| # Step 4: Fit polynomial, with optional monotonicity enforcement | |
| check_range <- range(z) | |
| current_degree <- poly_degree | |
| degree_reduced <- FALSE | |
| monotonic <- FALSE | |
| if (check_monotonicity) { | |
| while (current_degree >= min_degree) { | |
| model <- lm(x ~ poly(z, current_degree, raw = TRUE)) | |
| check <- check_monotonicity(model, z_range = check_range) | |
| if (check$is_monotonic) { | |
| monotonic <- TRUE | |
| break | |
| } | |
| # Reduce degree and try again | |
| current_degree <- current_degree - 1 | |
| degree_reduced <- TRUE | |
| } | |
| # Emergency fallback | |
| if (current_degree < min_degree) { | |
| current_degree <- min_degree | |
| # Fit linear even if non-monotonic (rare/impossible for degree 1 unless negative correlation) | |
| model <- lm(x ~ poly(z, current_degree, raw = TRUE)) | |
| check <- check_monotonicity(model, z_range = check_range) | |
| monotonic <- check$is_monotonic | |
| } | |
| } else { | |
| model <- lm(x ~ poly(z, current_degree, raw = TRUE)) | |
| check <- check_monotonicity(model, z_range = check_range) | |
| monotonic <- check$is_monotonic | |
| } | |
| # Metadata | |
| attr(model, "sample_size") <- n | |
| attr(model, "n_unique") <- n_unique # ADD THIS | |
| attr(model, "tie_proportion") <- tie_proportion # ADD THIS | |
| attr(model, "poly_degree") <- current_degree | |
| attr(model, "monotonic") <- monotonic | |
| attr(model, "min_derivative") <- check$min_derivative | |
| return(model) | |
| } | |
| #' Check Monotonicity of Fitted Quantile Function | |
| #' | |
| #' Analytically checks if a polynomial quantile function is monotonic | |
| #' within the observed range of the data. | |
| #' | |
| #' @param model An lm model object fitted with poly(..., raw=TRUE). | |
| #' @param z_range A numeric vector of length 2 defining the range [min, max] | |
| #' over which to check monotonicity. If NULL, checks reasonable defaults | |
| #' based on N (-4 to 4). | |
| #' @param strictly_positive Logical; if TRUE, derivative must be > 0 (strict). | |
| #' If FALSE, derivative can be >= 0 (allows flat regions). | |
| #' | |
| #' @return A list containing: | |
| #' \item{is_monotonic}{Logical; TRUE if monotonic in range.} | |
| #' \item{min_derivative}{The lowest slope found in the range.} | |
| #' \item{location_min}{The z-value where the minimum slope occurs.} | |
| #' | |
| #' | |
| #' @author Wolfgang Lenhard and Alexandra Lenhard | |
| #' Licensed under the MIT License | |
| #' | |
| #' Citation: | |
| #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: | |
| #' A Robust Alternative to Cohen’s d. | |
| #' | |
| #' @export | |
| check_monotonicity <- function(model, z_range = c(-4, 4), | |
| strictly_positive = FALSE) { | |
| # Input handling | |
| if (!inherits(model, "lm")) stop("model must be an lm object") | |
| if (length(z_range) != 2 || z_range[1] >= z_range[2]) { | |
| stop("z_range must be a vector [min, max] with min < max") | |
| } | |
| # Extract polynomial coefficients | |
| coeffs <- coef(model) | |
| # Handle NAs (rare cases where regression had failed) | |
| if (any(is.na(coeffs))) return(list( | |
| is_monotonic = FALSE, | |
| min_derivative = -Inf, | |
| location_min = NA | |
| )) | |
| degree <- length(coeffs) - 1 | |
| # Special cases for low-degree polynomials | |
| if (degree == 0) { | |
| # CASE: Constant (Degree 0) | |
| return(list( | |
| is_monotonic = TRUE, | |
| min_derivative = 0, | |
| location_min = 0 | |
| )) | |
| } else if (degree == 1) { | |
| # CASE: Linear (Degree 1) | |
| # f(z) = b0 + b1*z -> f'(z) = b1 | |
| slope <- coeffs[2] | |
| return(list( | |
| is_monotonic = if(strictly_positive) slope > 0 else slope >= 0, | |
| min_derivative = slope, | |
| location_min = 0 | |
| )) | |
| } | |
| # 1. Calculate coefficients of First Derivative f'(z) | |
| # f(z) = c0 + c1*z + c2*z^2 + c3*z^3 ... | |
| # f'(z) = c1 + 2*c2*z + 3*c3*z^2 ... | |
| deriv1_coeffs <- numeric(degree) # Degree drops by 1 | |
| for (i in 1:degree) { | |
| # Coeff index i+1 corresponds to power z^i in original model | |
| deriv1_coeffs[i] <- coeffs[i + 1] * i | |
| } | |
| # Define function to evaluate slope at specific z values | |
| eval_deriv <- function(z, coefs) { | |
| # Horner's method | |
| val <- coefs[length(coefs)] | |
| if (length(coefs) > 1) { | |
| for (i in (length(coefs)-1):1) { | |
| val <- val * z + coefs[i] | |
| } | |
| } | |
| return(val) | |
| } | |
| # 2. Find Critical Points of the Derivative | |
| # To find where slope is minimized, we look for roots of f''(z) | |
| # Coefficients of f''(z) | |
| degree_d1 <- degree - 1 | |
| if (degree_d1 >= 1) { | |
| deriv2_coeffs <- numeric(degree_d1) | |
| for (i in 1:degree_d1) { | |
| deriv2_coeffs[i] <- deriv1_coeffs[i + 1] * i | |
| } | |
| # Find roots (complex) of f''(z) | |
| roots_complex <- polyroot(deriv2_coeffs) | |
| # Filter for real roots within the z_range | |
| # Keep if imaginary part is negligible | |
| real_indices <- abs(Im(roots_complex)) < 1e-9 | |
| roots_real <- Re(roots_complex)[real_indices] | |
| # Filter roots strictly inside our check range | |
| critical_z <- roots_real[roots_real >= z_range[1] & roots_real <= z_range[2]] | |
| } else { | |
| # If derivative is constant (should be handled by degree=1 check above) | |
| critical_z <- numeric(0) | |
| } | |
| # 3. Check Constraints (Boundaries + Critical Points) | |
| # The minimum slope MUST occur either at endpoints or at a local extremum | |
| check_points <- unique(c(z_range[1], z_range[2], critical_z)) | |
| slopes <- sapply(check_points, eval_deriv, coefs = deriv1_coeffs) | |
| min_slope <- min(slopes) | |
| loc_min <- check_points[which.min(slopes)] | |
| threshold <- if(strictly_positive) 1e-9 else -1e-9 | |
| return(list( | |
| is_monotonic = min_slope >= threshold, | |
| min_derivative = min_slope, | |
| location_min = loc_min | |
| )) | |
| } | |
| #' Calculate Moments from a Fitted Polynomial Function | |
| #' | |
| #' This function computes the mean and variance of the distribution represented | |
| #' by a polynomial function using Iserlis (1918) theorem. | |
| #' | |
| #' @param model An lm model object from \code{\link{fit_quantile_function}()}. | |
| #' The model should represent the relationship x = f(z) where z are standard | |
| #' normal quantiles and x are the observed values. | |
| #' @param group_label Optional character string label for warning messages | |
| #' (default = "Unknown"). Used to identify which group produced warnings in | |
| #' multi-group comparisons. | |
| #' | |
| #' @return A list with elements: | |
| #' \item{mean}{The expected value E[X] where X = f(Z), Z ~ N(0,1).} | |
| #' \item{variance}{The variance Var(X) = E[X²] - (E[X])².} | |
| #' | |
| #' | |
| #' @author Wolfgang Lenhard and Alexandra Lenhard | |
| #' Licensed under the MIT License | |
| #' | |
| #' Citation: | |
| #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: | |
| #' A Robust Alternative to Cohen’s d. | |
| #' | |
| #' @seealso | |
| #' \code{\link{fit_polynomial}} for fitting the polynomial model. | |
| #' \code{\link{d.reg}} for the main effect size calculation. | |
| #' | |
| #' @examples | |
| #' # Generate sample data | |
| #' set.seed(123) | |
| #' x <- rnorm(50, mean = 100, sd = 15) | |
| #' | |
| #' # Fit quantile function | |
| #' n <- length(x) | |
| #' avg_ranks <- rank(x, ties.method = "average") | |
| #' p <- (avg_ranks - 0.5) / n | |
| #' z <- qnorm(p) | |
| #' model <- lm(x ~ poly(z, 5, raw = TRUE)) | |
| #' | |
| #' # Compute moments | |
| #' moments <- get_moments(model, group_label = "Test Group") | |
| #' | |
| #' cat("Mean:", moments$mean, "\n") | |
| #' cat("Variance:", moments$variance, "\n") | |
| #' cat("SD:", sqrt(moments$variance), "\n") | |
| #' | |
| #' # Compare with sample statistics | |
| #' cat("\nSample mean:", mean(x), "\n") | |
| #' cat("Sample variance:", var(x), "\n") | |
| #' | |
| #' | |
| #' | |
| #' @export | |
| get_moments <- function(model, group_label = "Unknown") { | |
| # Extract coefficients and determine polynomial degree | |
| coeffs <- coef(model) | |
| k <- length(coeffs) - 1 # polynomial degree | |
| # Pre-compute standard normal raw moments: E[Z^j] | |
| # For j even: E[Z^j] = (j-1)!! = (j-1) × (j-3) × ... × 3 × 1 | |
| # For j odd: E[Z^j] = 0 (due to symmetry) | |
| compute_moment <- function(j) { | |
| if (j == 0) return(1) # E[Z^0] = 1 (total probability) | |
| if (j %% 2 == 1) return(0) # Odd moments vanish | |
| # Even moments: double factorial | |
| # E[Z^2] = 1, E[Z^4] = 3, E[Z^6] = 15, E[Z^8] = 105, ... | |
| result <- 1 | |
| for (i in seq(j - 1, 1, by = -2)) { | |
| result <- result * i | |
| } | |
| return(result) | |
| } | |
| # We need moments up to degree 2k for computing E[X^2] | |
| max_moment <- 2 * k | |
| moments_z <- sapply(0:max_moment, compute_moment) | |
| # Compute mean: μ = E[X] = E[f(Z)] = Σ β_j E[Z^j] | |
| # Only even-powered terms contribute due to symmetry | |
| mu <- 0 | |
| for (j in 0:k) { | |
| mu <- mu + coeffs[j + 1] * moments_z[j + 1] | |
| } | |
| # Compute variance: σ² = E[X²] - μ² | |
| # First calculate E[X²] = E[(Σ β_i Z^i)²] = Σ_i Σ_j β_i β_j E[Z^(i+j)] | |
| E_X2 <- 0 | |
| for (i in 0:k) { | |
| for (j in 0:k) { | |
| power <- i + j | |
| if (power <= max_moment) { | |
| E_X2 <- E_X2 + coeffs[i + 1] * coeffs[j + 1] * moments_z[power + 1] | |
| } | |
| } | |
| } | |
| variance <- E_X2 - mu^2 | |
| # Handle numerical edge cases | |
| if (variance < 0) { | |
| if (abs(variance) < 1e-10) { | |
| # Likely just numerical noise - round to zero | |
| variance <- 0 | |
| } else { | |
| # Substantial negative variance indicates a real problem | |
| warning( | |
| "Variance for ", group_label, " is negative (", | |
| format(variance, scientific = TRUE, digits = 3), | |
| "). This indicates numerical instability in the polynomial fit. ", | |
| "Consider reducing the polynomial degree or checking for data issues.", | |
| call. = FALSE | |
| ) | |
| # Set to zero to avoid downstream errors, but flag it | |
| variance <- 0 | |
| } | |
| } | |
| return(list( | |
| mean = mu, | |
| variance = variance | |
| )) | |
| } | |
| #' Print Method for d_reg Objects | |
| #' | |
| #' @param x An object of class "d_reg" | |
| #' @param ... Additional arguments (not used) | |
| #' | |
| #' @author Wolfgang Lenhard and Alexandra Lenhard | |
| #' Licensed under the MIT License | |
| #' | |
| #' Citation: | |
| #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: | |
| #' A Robust Alternative to Cohen’s d. | |
| #' | |
| #' @export | |
| print.d_reg <- function(x, ...) { | |
| cat("\nDistribution-Free Effect Size (d_reg)\n") | |
| cat("===================================\n\n") | |
| cat("Effect size d_reg:", round(x$d_reg, 4), "\n") | |
| # Display CI if available | |
| if (!is.null(x$ci_lower)) { | |
| cat(sprintf("%d%% CI: [%.4f, %.4f]\n", | |
| round(x$ci_level * 100), | |
| x$ci_lower, | |
| x$ci_upper), "\n") | |
| } | |
| cat("\nGroup 1: n =", x$n1, ", mean =", round(x$group1_mean, 4), | |
| ", SD =", round(x$group1_sd, 4), "\n") | |
| cat("Group 2: n =", x$n2, ", mean =", round(x$group2_mean, 4), | |
| ", SD =", round(x$group2_sd, 4), "\n") | |
| cat("Pooled SD:", round(x$pooled_sd, 4), "\n") | |
| cat("Polynomial degree:", x$default_degree, "\n") | |
| invisible(x) | |
| } | |
| #' Summary Method for d_reg Objects | |
| #' | |
| #' Provides detailed summary statistics and diagnostic information. | |
| #' | |
| #' @param object An object of class "d_reg" | |
| #' | |
| #' @author Wolfgang Lenhard and Alexandra Lenhard | |
| #' Licensed under the MIT License | |
| #' | |
| #' Citation: | |
| #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: | |
| #' A Robust Alternative to Cohen’s d. | |
| #' | |
| #' @export | |
| summary.d_reg <- function(object, ...) { | |
| cat("\n") | |
| cat("=======================================================\n") | |
| cat(" Distribution-Free Effect Size Analysis (d_reg)\n") | |
| cat("=======================================================\n\n") | |
| cat("Effect Size:\n") | |
| cat(" d_reg =", round(object$d_reg, 4), "\n") | |
| # Interpretation | |
| abs_d <- abs(object$d_reg) | |
| interpretation <- if (abs_d < 0.2) { | |
| "negligible" | |
| } else if (abs_d < 0.5) { | |
| "small" | |
| } else if (abs_d < 0.8) { | |
| "medium" | |
| } else { | |
| "large" | |
| } | |
| cat(" Interpretation:", interpretation, "\n\n") | |
| cat("Group 1:\n") | |
| cat(" Sample size: ", object$n1, "\n") | |
| cat(" Mean (smoothed): ", round(object$group1_mean, 4), "\n") | |
| cat(" SD (smoothed): ", round(object$group1_sd, 4), "\n") | |
| cat(" Variance: ", round(object$group1_variance, 4), "\n\n") | |
| cat("Group 2:\n") | |
| cat(" Sample size: ", object$n2, "\n") | |
| cat(" Mean (smoothed): ", round(object$group2_mean, 4), "\n") | |
| cat(" SD (smoothed): ", round(object$group2_sd, 4), "\n") | |
| cat(" Variance: ", round(object$group2_variance, 4), "\n\n") | |
| cat("Pooled Statistics:\n") | |
| cat(" Pooled SD: ", round(object$pooled_sd, 4), "\n") | |
| cat(" Mean difference: ", round(object$group2_mean - object$group1_mean, 4), "\n\n") | |
| cat("Model Details:\n") | |
| cat(" Polynomial degree:", object$default_degree, "\n") | |
| cat(" Model 1 R²: ", round(summary(object$model1)$r.squared, 4), "\n") | |
| cat(" Model 2 R²: ", round(summary(object$model2)$r.squared, 4), "\n\n") | |
| if(!is.null(object$ci_lower) && !is.null(object$ci_upper)) { | |
| cat(sprintf("Confidence Interval (%.1f%%): [%.4f, %.4f]\n\n", | |
| object$ci_level * 100, | |
| round(object$ci_lower, 4), | |
| round(object$ci_upper, 4))) | |
| } | |
| cat("=======================================================\n\n") | |
| invisible(object) | |
| } | |
| # API endpoint | |
| #* Calculate effect size from two groups | |
| #* @param group1 Comma-separated numeric values for group 1 | |
| #* @param group2 Comma-separated numeric values for group 2 | |
| #* @param ci Confidence interval level (default 0.95) | |
| #* @post /calculate | |
| #* @get /calculate | |
| function(group1 = NULL, group2 = NULL, degree = 4, ci = 0.95) { | |
| tryCatch({ | |
| # Check if parameters are missing | |
| if (is.null(group1) || is.null(group2) || group1 == "" || group2 == "") { | |
| return(list( | |
| success = FALSE, | |
| error = "Missing required parameters: group1 and group2" | |
| )) | |
| } | |
| # Parse input | |
| x1 <- as.numeric(unlist(strsplit(as.character(group1), ","))) | |
| x2 <- as.numeric(unlist(strsplit(as.character(group2), ","))) | |
| ci_level <- as.numeric(ci) | |
| degree <- as.numeric(degree) | |
| # Remove any NA values from parsing | |
| x1 <- x1[!is.na(x1)] | |
| x2 <- x2[!is.na(x2)] | |
| if (length(x1) == 0 || length(x2) == 0) { | |
| return(list( | |
| success = FALSE, | |
| error = "Invalid input: Could not parse numeric values from input strings" | |
| )) | |
| } | |
| # Calculate effect size with CI | |
| result <- d.reg(x1, x2, degree, CI = ci_level, silent = TRUE) | |
| # Return clean result | |
| return(list( | |
| success = TRUE, | |
| d_reg = result$d_reg, | |
| ci_lower = result$ci_lower, | |
| ci_upper = result$ci_upper, | |
| ci_level = result$ci_level, | |
| group1 = list( | |
| n = result$n1, | |
| mean = result$group1_mean, | |
| sd = result$group1_sd, | |
| mean_classic = mean(x1), | |
| sd_classic = sd(x1) | |
| ), | |
| group2 = list( | |
| n = result$n2, | |
| mean = result$group2_mean, | |
| sd = result$group2_sd, | |
| mean_classic = mean(x2), | |
| sd_classic = sd(x2) | |
| ), | |
| pooled_sd = result$pooled_sd, | |
| degree = result$default_degree | |
| )) | |
| }, error = function(e) { | |
| return(list( | |
| success = FALSE, | |
| error = as.character(e$message) | |
| )) | |
| }) | |
| } | |
| #* Health check endpoint | |
| #* @get / | |
| function() { | |
| return(list( | |
| status = "running", | |
| message = "Effect Size Calculator API is ready", | |
| endpoints = list( | |
| calculate = "/calculate?group1=1,2,3&group2=4,5,6" | |
| ) | |
| )) | |
| } |