Spaces:
Running
Running
File size: 28,017 Bytes
d711953 08854f9 d711953 08854f9 d711953 08854f9 d711953 08854f9 d711953 567ae09 08854f9 d711953 08854f9 d711953 08854f9 d711953 567ae09 d711953 08854f9 adb521e 08854f9 567ae09 d711953 567ae09 d711953 08854f9 d711953 08854f9 d711953 08854f9 d711953 08854f9 d711953 08854f9 d711953 08854f9 d711953 08854f9 d711953 08854f9 d711953 08854f9 567ae09 adb521e 08854f9 d711953 08854f9 567ae09 08854f9 567ae09 08854f9 567ae09 08854f9 567ae09 08854f9 567ae09 08854f9 567ae09 08854f9 567ae09 08854f9 d711953 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 567ae09 d711953 08854f9 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 adb521e 567ae09 d711953 567ae09 adb521e 567ae09 08854f9 567ae09 adb521e 567ae09 adb521e 567ae09 adb521e 567ae09 adb521e 08854f9 adb521e 567ae09 adb521e 567ae09 adb521e d711953 08854f9 d711953 567ae09 adb521e 08854f9 d711953 adb521e 567ae09 adb521e 567ae09 adb521e 567ae09 adb521e 567ae09 adb521e 567ae09 adb521e 567ae09 adb521e 567ae09 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 d711953 567ae09 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 adb521e 08854f9 d711953 eacfed9 d711953 73cbd6b d711953 ef6747a a38c6dc ef6747a d711953 ef6747a eacfed9 73cbd6b d711953 eacfed9 d711953 eacfed9 8836743 d711953 5f09da6 eacfed9 d711953 218b69c c7a9cec d711953 218b69c c7a9cec d711953 c7a9cec d711953 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 |
library(plumber)
#* @apiTitle Effect Size Calculator API
#' Calculate a Distribution-Free Effect Size (d_reg)
#'
#' This function computes a distribution-free effect size by modeling the
#' empirical distribution function (eCDF) of two groups via polynomial
#' regression. The effect size is computed as the standardized
#' difference between the means of the smoothed distributions.
#'
#' The method involves:
#' 1. Fitting polynomials to each group's quantile function: x = f(z)
#' 2. Computing moments (mean, variance) of the polynomials
#' 3. Calculating d(reg) using the pooled standard deviation
#'
#' @param x1 A numeric vector of data for the first group.
#' @param x2 A numeric vector of data for the second group.
#' @param degree The degree of the polynomial to fit (default = 5).
#' Higher degrees capture more complex distributional shapes but
#' may overfit with small samples.
#' @param CI Confidence level for confidence interval (default = NA, no CI computed).
#' If specified (e.g., 0.95), uses asymptotic normal approximation.
#' WARNING: CI formula assumes Cohen's d distribution and may not be accurate for d_reg.
#' @param silent Logical; if TRUE, suppresses warnings during fitting (default = TRUE).
#'
#' @return A list (S3 class "d_reg") containing:
#' \item{d_reg}{The distribution-free effect size (standardized mean difference).}
#' \item{group1_mean}{Mean of the smoothed distribution for group 1.}
#' \item{group1_variance}{Variance of the smoothed distribution for group 1.}
#' \item{group1_sd}{Standard deviation of the smoothed distribution for group 1.}
#' \item{group2_mean}{Mean of the smoothed distribution for group 2.}
#' \item{group2_variance}{Variance of the smoothed distribution for group 2.}
#' \item{group2_sd}{Standard deviation of the smoothed distribution for group 2.}
#' \item{pooled_sd}{Pooled standard deviation.}
#' \item{n1}{Sample size of group 1.}
#' \item{n2}{Sample size of group 2.}
#' \item{model1}{Fitted polynomial model for group 1.}
#' \item{model2}{Fitted polynomial model for group 2.}
#' \item{default_degree}{Polynomial degree used.}
#' \item{tie_proportion_1}{Proportion of tied values in group 1.}
#' \item{tie_proportion_2}{Proportion of tied values in group 2.}
#' \item{n_unique_1}{Number of unique values in group 1.}
#' \item{n_unique_2}{Number of unique values in group 2.}
#' \item{ci_lower}{Lower bound of confidence interval (if CI specified).}
#' \item{ci_upper}{Upper bound of confidence interval (if CI specified).}
#' \item{ci_level}{Confidence level (if CI specified).}
#'
#' @details
#' The method is distribution-free and converges to Cohen's d under normality with
#' increasing group size. It is robust to outliers and skewness compared to
#' classical parametric methods.
#'
#' Sample size requirements: At least (degree + 1) observations per group.
#' Recommended: n > 10 per group for stable polynomial fits.
#' For small samples (n < 20), consider using degree = 3 or lower.
#'
#' Confidence intervals use an asymptotic approximation based on Cohen's d
#' distribution and may not accurately reflect the true sampling distribution
#' of d_reg, especially in small samples or non-normal data.
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' @references
#' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
#' A Robust Alternative to Cohen's d.
#'
#' @examples
#' # Normal distributions
#' set.seed(123)
#' x1 <- rnorm(30, mean = 0, sd = 1)
#' x2 <- rnorm(30, mean = 0.5, sd = 1)
#' result <- d.reg(x1, x2)
#' print(result)
#'
#' # With confidence interval
#' result_ci <- d.reg(x1, x2, CI = 0.95)
#' print(result_ci)
#'
#' # Skewed distributions
#' x1 <- rexp(50, rate = 1)
#' x2 <- rexp(50, rate = 0.8)
#' result <- d.reg(x1, x2, degree = 4)
#' print(result)
#'
#' @export
d.reg <- function(x1, x2, degree = 5, CI = NA, silent = TRUE) {
# ============================================================================
# Input Validation
# ============================================================================
if (!is.numeric(x1) || !is.numeric(x2)) {
stop("Both x1 and x2 must be numeric vectors.")
}
# Handle missing values
if (any(is.na(x1)) || any(is.na(x2))) {
if (!silent) {
warning("Missing values detected and will be removed.")
}
x1 <- x1[!is.na(x1)]
x2 <- x2[!is.na(x2)]
}
n1 <- length(x1)
n2 <- length(x2)
# Check for empty groups
if (n1 == 0 || n2 == 0) {
stop("Cannot compute effect size with empty groups after removing NAs.")
}
# Check sufficient sample size for polynomial degree
if (n1 < degree + 1) {
stop("Group 1 has insufficient data: need at least ", degree + 1,
" observations for degree ", degree, " polynomial (got ", n1, ").")
}
if (n2 < degree + 1) {
stop("Group 2 has insufficient data: need at least ", degree + 1,
" observations for degree ", degree, " polynomial (got ", n2, ").")
}
# Validate CI parameter if provided
if (!is.na(CI)) {
if (!is.numeric(CI) || length(CI) != 1) {
stop("CI must be a single numeric value or NA.")
}
if (CI <= 0 || CI >= 1) {
stop("CI must be between 0 and 1 (exclusive).")
}
}
model1 <- fit_polynomial(x1, degree)
model2 <- fit_polynomial(x2, degree)
# Extract tie information
tie1 <- attr(model1, "tie_proportion")
tie2 <- attr(model2, "tie_proportion")
n_unique1 <- attr(model1, "n_unique")
n_unique2 <- attr(model2, "n_unique")
# Warn about substantial ties
if (!silent && (tie1 > 0.3 || tie2 > 0.3)) {
message(sprintf(
"Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).",
tie1 * 100, tie2 * 100
))
message("This suggests discrete/ordinal data. Results should be interpreted cautiously.")
message("Consider comparing multiple effect size measures for discrete data.")
}
moments1 <- get_moments(model1, group_label = "Group 1")
moments2 <- get_moments(model2, group_label = "Group 2")
# Weighted pooled variance (population formula, not sample formula)
weighted_pooled_variance <- (n1 * moments1$variance + n2 * moments2$variance) / (n1 + n2)
pooled_sd <- sqrt(weighted_pooled_variance)
mean_diff <- moments2$mean - moments1$mean
# Handle edge cases
if (pooled_sd == 0) {
if (mean_diff == 0) {
d_reg <- 0
} else {
d_reg <- sign(mean_diff) * Inf
if (!silent) {
warning("Pooled SD is zero but means differ. Returning Inf with appropriate sign.")
}
}
} else {
d_reg <- mean_diff / pooled_sd
}
result <- list(
d_reg = d_reg,
# Group 1 statistics
group1_mean = moments1$mean,
group1_variance = moments1$variance,
group1_sd = sqrt(moments1$variance),
# Group 2 statistics
group2_mean = moments2$mean,
group2_variance = moments2$variance,
group2_sd = sqrt(moments2$variance),
# Pooled statistics
pooled_sd = pooled_sd,
# Sample sizes
n1 = n1,
n2 = n2,
# Models
model1 = model1,
model2 = model2,
# Metadata
default_degree = degree,
tie_proportion_1 = tie1,
tie_proportion_2 = tie2,
n_unique_1 = n_unique1,
n_unique_2 = n_unique2
)
if (!is.na(CI)) {
# Standard error using asymptotic approximation
# NOTE: This formula assumes Cohen's d distribution and may not be
# accurate for d_reg, especially in small samples or non-normal data
se_dreg <- sqrt((n1 + n2) / (n1 * n2) + (d_reg^2) / (2 * (n1 + n2)))
# Degrees of freedom
df <- n1 + n2 - 2
# Critical value from t-distribution
alpha <- 1 - CI
t_crit <- qt(1 - alpha / 2, df)
# Confidence interval bounds
ci_lower <- d_reg - t_crit * se_dreg
ci_upper <- d_reg + t_crit * se_dreg
# Add to result
result$ci_lower <- ci_lower
result$ci_upper <- ci_upper
result$ci_level <- CI
result$ci_se <- se_dreg
result$ci_df <- df
}
class(result) <- "d_reg"
return(result)
}
#' Fit a Polynomial to eCDF
#'
#' This helper function fits a polynomial regression model to represent the
#' distribution. It models the relationship between
#' z-scores and observed raw scores.
#'
#' @param x A numeric vector of observations.
#' @param poly_degree The degree of the polynomial to fit.
#' @param check_monotonicity Logical; should monotonicity be enforced by
#' reducing polynomial degree if needed? (default = FALSE for speed)
#' @param min_degree Minimum polynomial degree to try (default = 1, representing
#' a linear fit to a normal distribution).
#'
#' @return An lm model object representing x = f(z), where z ~ N(0,1).
#' Additional attributes:
#' \describe{
#' \item{sample_size}{Original sample size}
#' \item{n_unique}{Number of unique values (for tie detection)}
#' \item{tie_proportion}{Proportion of tied observations}
#' \item{poly_degree}{Actual polynomial degree used (may be reduced)}
#' \item{monotonic}{Logical; is the fitted function monotonic?}
#' \item{degree_reduced}{Logical; was degree reduced from requested?}
#' }
#'
#' @details
#' The function uses average ranks (midrank method) to handle tied observations,
#' which is the standard approach in rank-based statistics. Plotting positions
#' (rank - 0.5)/n avoid infinite z-scores at boundaries.
#'
#' When substantial ties are present (>10% of observations), the function may
#' automatically reduce the polynomial degree to avoid overfitting to a small
#' number of unique values.
#'
#' If check_monotonicity=TRUE, the function iteratively reduces the polynomial
#' degree until a monotonic fit is achieved or min_degree is reached.
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
#' A Robust Alternative to Cohen’s d.
#'
#' @export
fit_polynomial <- function(x, poly_degree,
check_monotonicity = TRUE,
min_degree = 1) {
# Step 1: Input validation and tie detection
n <- length(x)
if (n < 3) {
stop("Need at least 3 observations to fit a polynomial.")
}
# Count unique values to detect ties
n_unique <- length(unique(x))
tie_proportion <- 1 - (n_unique / n)
# Step 2: Adjust polynomial degree based on unique values
# Can't fit more parameters than unique data points
max_possible_degree <- n_unique - 1
if (poly_degree > max_possible_degree) {
warning(sprintf(
"Requested polynomial degree (%d) exceeds number of unique values (%d). ",
poly_degree, n_unique,
"Reducing to degree %d."
), max_possible_degree)
poly_degree <- max_possible_degree
}
# Additional reduction for substantial ties
if (tie_proportion > 0.3 && poly_degree > 3) {
# With >30% ties, be more conservative
recommended_degree <- min(poly_degree, max(3, floor(n_unique / 2)))
if (recommended_degree < poly_degree) {
warning(sprintf(
"High proportion of ties (%.1f%%). Reducing polynomial degree from %d to %d for stability.",
tie_proportion * 100, poly_degree, recommended_degree
))
poly_degree <- recommended_degree
}
}
# Ensure we stay above minimum
if (poly_degree < min_degree) {
stop(sprintf(
"Insufficient unique values (%d) to fit minimum polynomial degree (%d). ",
n_unique, min_degree,
"Need at least %d unique observations."
), min_degree + 1)
}
# Step 3: Compute ranks and z-scores (handles ties via midrank)
# Average ranks handle ties by assigning mean rank to tied observations
avg_ranks <- rank(x, ties.method = "average")
# Convert ranks to plotting positions
p <- (avg_ranks - 0.5) / n
# Transform to standard normal quantiles
z <- qnorm(p)
# Step 4: Fit polynomial, with optional monotonicity enforcement
check_range <- range(z)
current_degree <- poly_degree
degree_reduced <- FALSE
monotonic <- FALSE
if (check_monotonicity) {
while (current_degree >= min_degree) {
model <- lm(x ~ poly(z, current_degree, raw = TRUE))
check <- check_monotonicity(model, z_range = check_range)
if (check$is_monotonic) {
monotonic <- TRUE
break
}
# Reduce degree and try again
current_degree <- current_degree - 1
degree_reduced <- TRUE
}
# Emergency fallback
if (current_degree < min_degree) {
current_degree <- min_degree
# Fit linear even if non-monotonic (rare/impossible for degree 1 unless negative correlation)
model <- lm(x ~ poly(z, current_degree, raw = TRUE))
check <- check_monotonicity(model, z_range = check_range)
monotonic <- check$is_monotonic
}
} else {
model <- lm(x ~ poly(z, current_degree, raw = TRUE))
check <- check_monotonicity(model, z_range = check_range)
monotonic <- check$is_monotonic
}
# Metadata
attr(model, "sample_size") <- n
attr(model, "n_unique") <- n_unique # ADD THIS
attr(model, "tie_proportion") <- tie_proportion # ADD THIS
attr(model, "poly_degree") <- current_degree
attr(model, "monotonic") <- monotonic
attr(model, "min_derivative") <- check$min_derivative
return(model)
}
#' Check Monotonicity of Fitted Quantile Function
#'
#' Analytically checks if a polynomial quantile function is monotonic
#' within the observed range of the data.
#'
#' @param model An lm model object fitted with poly(..., raw=TRUE).
#' @param z_range A numeric vector of length 2 defining the range [min, max]
#' over which to check monotonicity. If NULL, checks reasonable defaults
#' based on N (-4 to 4).
#' @param strictly_positive Logical; if TRUE, derivative must be > 0 (strict).
#' If FALSE, derivative can be >= 0 (allows flat regions).
#'
#' @return A list containing:
#' \item{is_monotonic}{Logical; TRUE if monotonic in range.}
#' \item{min_derivative}{The lowest slope found in the range.}
#' \item{location_min}{The z-value where the minimum slope occurs.}
#'
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
#' A Robust Alternative to Cohen’s d.
#'
#' @export
check_monotonicity <- function(model, z_range = c(-4, 4),
strictly_positive = FALSE) {
# Input handling
if (!inherits(model, "lm")) stop("model must be an lm object")
if (length(z_range) != 2 || z_range[1] >= z_range[2]) {
stop("z_range must be a vector [min, max] with min < max")
}
# Extract polynomial coefficients
coeffs <- coef(model)
# Handle NAs (rare cases where regression had failed)
if (any(is.na(coeffs))) return(list(
is_monotonic = FALSE,
min_derivative = -Inf,
location_min = NA
))
degree <- length(coeffs) - 1
# Special cases for low-degree polynomials
if (degree == 0) {
# CASE: Constant (Degree 0)
return(list(
is_monotonic = TRUE,
min_derivative = 0,
location_min = 0
))
} else if (degree == 1) {
# CASE: Linear (Degree 1)
# f(z) = b0 + b1*z -> f'(z) = b1
slope <- coeffs[2]
return(list(
is_monotonic = if(strictly_positive) slope > 0 else slope >= 0,
min_derivative = slope,
location_min = 0
))
}
# 1. Calculate coefficients of First Derivative f'(z)
# f(z) = c0 + c1*z + c2*z^2 + c3*z^3 ...
# f'(z) = c1 + 2*c2*z + 3*c3*z^2 ...
deriv1_coeffs <- numeric(degree) # Degree drops by 1
for (i in 1:degree) {
# Coeff index i+1 corresponds to power z^i in original model
deriv1_coeffs[i] <- coeffs[i + 1] * i
}
# Define function to evaluate slope at specific z values
eval_deriv <- function(z, coefs) {
# Horner's method
val <- coefs[length(coefs)]
if (length(coefs) > 1) {
for (i in (length(coefs)-1):1) {
val <- val * z + coefs[i]
}
}
return(val)
}
# 2. Find Critical Points of the Derivative
# To find where slope is minimized, we look for roots of f''(z)
# Coefficients of f''(z)
degree_d1 <- degree - 1
if (degree_d1 >= 1) {
deriv2_coeffs <- numeric(degree_d1)
for (i in 1:degree_d1) {
deriv2_coeffs[i] <- deriv1_coeffs[i + 1] * i
}
# Find roots (complex) of f''(z)
roots_complex <- polyroot(deriv2_coeffs)
# Filter for real roots within the z_range
# Keep if imaginary part is negligible
real_indices <- abs(Im(roots_complex)) < 1e-9
roots_real <- Re(roots_complex)[real_indices]
# Filter roots strictly inside our check range
critical_z <- roots_real[roots_real >= z_range[1] & roots_real <= z_range[2]]
} else {
# If derivative is constant (should be handled by degree=1 check above)
critical_z <- numeric(0)
}
# 3. Check Constraints (Boundaries + Critical Points)
# The minimum slope MUST occur either at endpoints or at a local extremum
check_points <- unique(c(z_range[1], z_range[2], critical_z))
slopes <- sapply(check_points, eval_deriv, coefs = deriv1_coeffs)
min_slope <- min(slopes)
loc_min <- check_points[which.min(slopes)]
threshold <- if(strictly_positive) 1e-9 else -1e-9
return(list(
is_monotonic = min_slope >= threshold,
min_derivative = min_slope,
location_min = loc_min
))
}
#' Calculate Moments from a Fitted Polynomial Function
#'
#' This function computes the mean and variance of the distribution represented
#' by a polynomial function using Iserlis (1918) theorem.
#'
#' @param model An lm model object from \code{\link{fit_quantile_function}()}.
#' The model should represent the relationship x = f(z) where z are standard
#' normal quantiles and x are the observed values.
#' @param group_label Optional character string label for warning messages
#' (default = "Unknown"). Used to identify which group produced warnings in
#' multi-group comparisons.
#'
#' @return A list with elements:
#' \item{mean}{The expected value E[X] where X = f(Z), Z ~ N(0,1).}
#' \item{variance}{The variance Var(X) = E[X²] - (E[X])².}
#'
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
#' A Robust Alternative to Cohen’s d.
#'
#' @seealso
#' \code{\link{fit_polynomial}} for fitting the polynomial model.
#' \code{\link{d.reg}} for the main effect size calculation.
#'
#' @examples
#' # Generate sample data
#' set.seed(123)
#' x <- rnorm(50, mean = 100, sd = 15)
#'
#' # Fit quantile function
#' n <- length(x)
#' avg_ranks <- rank(x, ties.method = "average")
#' p <- (avg_ranks - 0.5) / n
#' z <- qnorm(p)
#' model <- lm(x ~ poly(z, 5, raw = TRUE))
#'
#' # Compute moments
#' moments <- get_moments(model, group_label = "Test Group")
#'
#' cat("Mean:", moments$mean, "\n")
#' cat("Variance:", moments$variance, "\n")
#' cat("SD:", sqrt(moments$variance), "\n")
#'
#' # Compare with sample statistics
#' cat("\nSample mean:", mean(x), "\n")
#' cat("Sample variance:", var(x), "\n")
#'
#'
#'
#' @export
get_moments <- function(model, group_label = "Unknown") {
# Extract coefficients and determine polynomial degree
coeffs <- coef(model)
k <- length(coeffs) - 1 # polynomial degree
# Pre-compute standard normal raw moments: E[Z^j]
# For j even: E[Z^j] = (j-1)!! = (j-1) × (j-3) × ... × 3 × 1
# For j odd: E[Z^j] = 0 (due to symmetry)
compute_moment <- function(j) {
if (j == 0) return(1) # E[Z^0] = 1 (total probability)
if (j %% 2 == 1) return(0) # Odd moments vanish
# Even moments: double factorial
# E[Z^2] = 1, E[Z^4] = 3, E[Z^6] = 15, E[Z^8] = 105, ...
result <- 1
for (i in seq(j - 1, 1, by = -2)) {
result <- result * i
}
return(result)
}
# We need moments up to degree 2k for computing E[X^2]
max_moment <- 2 * k
moments_z <- sapply(0:max_moment, compute_moment)
# Compute mean: μ = E[X] = E[f(Z)] = Σ β_j E[Z^j]
# Only even-powered terms contribute due to symmetry
mu <- 0
for (j in 0:k) {
mu <- mu + coeffs[j + 1] * moments_z[j + 1]
}
# Compute variance: σ² = E[X²] - μ²
# First calculate E[X²] = E[(Σ β_i Z^i)²] = Σ_i Σ_j β_i β_j E[Z^(i+j)]
E_X2 <- 0
for (i in 0:k) {
for (j in 0:k) {
power <- i + j
if (power <= max_moment) {
E_X2 <- E_X2 + coeffs[i + 1] * coeffs[j + 1] * moments_z[power + 1]
}
}
}
variance <- E_X2 - mu^2
# Handle numerical edge cases
if (variance < 0) {
if (abs(variance) < 1e-10) {
# Likely just numerical noise - round to zero
variance <- 0
} else {
# Substantial negative variance indicates a real problem
warning(
"Variance for ", group_label, " is negative (",
format(variance, scientific = TRUE, digits = 3),
"). This indicates numerical instability in the polynomial fit. ",
"Consider reducing the polynomial degree or checking for data issues.",
call. = FALSE
)
# Set to zero to avoid downstream errors, but flag it
variance <- 0
}
}
return(list(
mean = mu,
variance = variance
))
}
#' Print Method for d_reg Objects
#'
#' @param x An object of class "d_reg"
#' @param ... Additional arguments (not used)
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
#' A Robust Alternative to Cohen’s d.
#'
#' @export
print.d_reg <- function(x, ...) {
cat("\nDistribution-Free Effect Size (d_reg)\n")
cat("===================================\n\n")
cat("Effect size d_reg:", round(x$d_reg, 4), "\n")
# Display CI if available
if (!is.null(x$ci_lower)) {
cat(sprintf("%d%% CI: [%.4f, %.4f]\n",
round(x$ci_level * 100),
x$ci_lower,
x$ci_upper), "\n")
}
cat("\nGroup 1: n =", x$n1, ", mean =", round(x$group1_mean, 4),
", SD =", round(x$group1_sd, 4), "\n")
cat("Group 2: n =", x$n2, ", mean =", round(x$group2_mean, 4),
", SD =", round(x$group2_sd, 4), "\n")
cat("Pooled SD:", round(x$pooled_sd, 4), "\n")
cat("Polynomial degree:", x$default_degree, "\n")
invisible(x)
}
#' Summary Method for d_reg Objects
#'
#' Provides detailed summary statistics and diagnostic information.
#'
#' @param object An object of class "d_reg"
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
#' A Robust Alternative to Cohen’s d.
#'
#' @export
summary.d_reg <- function(object, ...) {
cat("\n")
cat("=======================================================\n")
cat(" Distribution-Free Effect Size Analysis (d_reg)\n")
cat("=======================================================\n\n")
cat("Effect Size:\n")
cat(" d_reg =", round(object$d_reg, 4), "\n")
# Interpretation
abs_d <- abs(object$d_reg)
interpretation <- if (abs_d < 0.2) {
"negligible"
} else if (abs_d < 0.5) {
"small"
} else if (abs_d < 0.8) {
"medium"
} else {
"large"
}
cat(" Interpretation:", interpretation, "\n\n")
cat("Group 1:\n")
cat(" Sample size: ", object$n1, "\n")
cat(" Mean (smoothed): ", round(object$group1_mean, 4), "\n")
cat(" SD (smoothed): ", round(object$group1_sd, 4), "\n")
cat(" Variance: ", round(object$group1_variance, 4), "\n\n")
cat("Group 2:\n")
cat(" Sample size: ", object$n2, "\n")
cat(" Mean (smoothed): ", round(object$group2_mean, 4), "\n")
cat(" SD (smoothed): ", round(object$group2_sd, 4), "\n")
cat(" Variance: ", round(object$group2_variance, 4), "\n\n")
cat("Pooled Statistics:\n")
cat(" Pooled SD: ", round(object$pooled_sd, 4), "\n")
cat(" Mean difference: ", round(object$group2_mean - object$group1_mean, 4), "\n\n")
cat("Model Details:\n")
cat(" Polynomial degree:", object$default_degree, "\n")
cat(" Model 1 R²: ", round(summary(object$model1)$r.squared, 4), "\n")
cat(" Model 2 R²: ", round(summary(object$model2)$r.squared, 4), "\n\n")
if(!is.null(object$ci_lower) && !is.null(object$ci_upper)) {
cat(sprintf("Confidence Interval (%.1f%%): [%.4f, %.4f]\n\n",
object$ci_level * 100,
round(object$ci_lower, 4),
round(object$ci_upper, 4)))
}
cat("=======================================================\n\n")
invisible(object)
}
# API endpoint
#* Calculate effect size from two groups
#* @param group1 Comma-separated numeric values for group 1
#* @param group2 Comma-separated numeric values for group 2
#* @param ci Confidence interval level (default 0.95)
#* @post /calculate
#* @get /calculate
function(group1 = NULL, group2 = NULL, degree = 4, ci = 0.95) {
tryCatch({
# Check if parameters are missing
if (is.null(group1) || is.null(group2) || group1 == "" || group2 == "") {
return(list(
success = FALSE,
error = "Missing required parameters: group1 and group2"
))
}
# Parse input
x1 <- as.numeric(unlist(strsplit(as.character(group1), ",")))
x2 <- as.numeric(unlist(strsplit(as.character(group2), ",")))
ci_level <- as.numeric(ci)
degree <- as.numeric(degree)
# Remove any NA values from parsing
x1 <- x1[!is.na(x1)]
x2 <- x2[!is.na(x2)]
if (length(x1) == 0 || length(x2) == 0) {
return(list(
success = FALSE,
error = "Invalid input: Could not parse numeric values from input strings"
))
}
# Calculate effect size with CI
result <- d.reg(x1, x2, degree, CI = ci_level, silent = TRUE)
# Return clean result
return(list(
success = TRUE,
d_reg = result$d_reg,
ci_lower = result$ci_lower,
ci_upper = result$ci_upper,
ci_level = result$ci_level,
group1 = list(
n = result$n1,
mean = result$group1_mean,
sd = result$group1_sd,
mean_classic = mean(x1),
sd_classic = sd(x1)
),
group2 = list(
n = result$n2,
mean = result$group2_mean,
sd = result$group2_sd,
mean_classic = mean(x2),
sd_classic = sd(x2)
),
pooled_sd = result$pooled_sd,
degree = result$default_degree
))
}, error = function(e) {
return(list(
success = FALSE,
error = as.character(e$message)
))
})
}
#* Health check endpoint
#* @get /
function() {
return(list(
status = "running",
message = "Effect Size Calculator API is ready",
endpoints = list(
calculate = "/calculate?group1=1,2,3&group2=4,5,6"
)
))
} |