WLenhard commited on
Commit
08854f9
·
verified ·
1 Parent(s): c7a9cec

Upload app.R

Browse files
Files changed (1) hide show
  1. app.R +194 -132
app.R CHANGED
@@ -2,16 +2,117 @@ library(plumber)
2
 
3
  #* @apiTitle Effect Size Calculator API
4
 
5
- d.quantile <- function(x1, x2, degree = 5, CI = NA, silent = T) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # Input validation
8
  if (!is.numeric(x1) || !is.numeric(x2)) {
9
  stop("Both x1 and x2 must be numeric vectors.")
10
  }
11
 
 
 
 
 
 
 
 
 
 
12
  n1 <- length(x1)
13
  n2 <- length(x2)
14
 
 
 
 
 
 
 
15
  if (n1 < degree + 1) {
16
  stop("Group 1 has insufficient data: need at least ", degree + 1,
17
  " observations for degree ", degree, " polynomial (got ", n1, ").")
@@ -22,26 +123,26 @@ d.quantile <- function(x1, x2, degree = 5, CI = NA, silent = T) {
22
  " observations for degree ", degree, " polynomial (got ", n2, ").")
23
  }
24
 
25
- if (any(is.na(x1)) || any(is.na(x2))) {
26
- warning("Missing values detected and will be removed.")
27
- x1 <- x1[!is.na(x1)]
28
- x2 <- x2[!is.na(x2)]
29
- n1 <- length(x1)
30
- n2 <- length(x2)
31
- }
32
-
33
- if (n1 == 0 || n2 == 0) {
34
- stop("Cannot compute effect size with empty groups after removing NAs.")
35
  }
36
-
37
- # Step 1: Fit the polynomial models for each group
38
- model1 <- fit_quantile_function(x1, degree)
39
- model2 <- fit_quantile_function(x2, degree)
40
 
41
- # Check for ties and warn user
 
 
 
42
  tie1 <- attr(model1, "tie_proportion")
43
  tie2 <- attr(model2, "tie_proportion")
 
 
44
 
 
45
  if (!silent && (tie1 > 0.3 || tie2 > 0.3)) {
46
  message(sprintf(
47
  "Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).",
@@ -50,83 +151,97 @@ d.quantile <- function(x1, x2, degree = 5, CI = NA, silent = T) {
50
  message("This suggests discrete/ordinal data. Results should be interpreted cautiously.")
51
  message("Consider comparing multiple effect size measures for discrete data.")
52
  }
53
-
54
- # Step 2: Get the moments from each fitted model
55
  moments1 <- get_moments(model1, group_label = "Group 1")
56
  moments2 <- get_moments(model2, group_label = "Group 2")
57
-
58
- # Step 3: Calculate the pooled standard deviation
59
  weighted_pooled_variance <- (n1 * moments1$variance + n2 * moments2$variance) / (n1 + n2)
60
  pooled_sd <- sqrt(weighted_pooled_variance)
61
-
62
- # Step 4: Compute the effect size d_q
63
  mean_diff <- moments2$mean - moments1$mean
64
 
65
  # Handle edge cases
66
  if (pooled_sd == 0) {
67
  if (mean_diff == 0) {
68
- d_q <- 0
69
  } else {
70
- d_q <- sign(mean_diff) * Inf
71
- warning("Pooled SD is zero but means differ. Returning Inf with appropriate sign.")
 
 
72
  }
73
  } else {
74
- d_q <- mean_diff / pooled_sd
75
  }
76
-
77
- # Return results
78
  result <- list(
79
- d_q = d_q,
 
 
80
  group1_mean = moments1$mean,
81
  group1_variance = moments1$variance,
82
  group1_sd = sqrt(moments1$variance),
 
 
83
  group2_mean = moments2$mean,
84
  group2_variance = moments2$variance,
85
  group2_sd = sqrt(moments2$variance),
 
 
86
  pooled_sd = pooled_sd,
 
 
87
  n1 = n1,
88
  n2 = n2,
 
 
89
  model1 = model1,
90
  model2 = model2,
91
- default_degree = degree
 
 
 
 
 
 
92
  )
93
-
94
-
95
- if(!is.na(CI)) {
96
- if(CI <= 0 || CI >= 1) {
97
- stop("CI must be between 0 and 1 (exclusive).")
98
- }
99
 
100
- # Standard error for d_q
101
- se_dq <- sqrt((n1 + n2) / (n1 * n2) + (d_q^2) / (2 * (n1 + n2)))
 
 
102
 
 
103
  df <- n1 + n2 - 2
 
 
104
  alpha <- 1 - CI
105
  t_crit <- qt(1 - alpha / 2, df)
106
 
107
- ci_lower <- d_q - t_crit * se_dq
108
- ci_upper <- d_q + t_crit * se_dq
 
109
 
 
110
  result$ci_lower <- ci_lower
111
  result$ci_upper <- ci_upper
112
  result$ci_level <- CI
 
 
113
  }
114
-
115
- result$tie_proportion_1 <- tie1
116
- result$tie_proportion_2 <- tie2
117
- result$n_unique_1 <- attr(model1, "n_unique")
118
- result$n_unique_2 <- attr(model2, "n_unique")
119
-
120
- class(result) <- "d_quantile"
121
  return(result)
122
  }
123
 
124
 
125
- #' Fit a Polynomial to the Quantile Function
126
  #'
127
  #' This helper function fits a polynomial regression model to represent the
128
- #' quantile function of a distribution. It models the relationship between
129
- #' standard normal quantiles (z-scores) and observed raw scores.
130
  #'
131
  #' @param x A numeric vector of observations.
132
  #' @param poly_degree The degree of the polynomial to fit.
@@ -166,15 +281,15 @@ d.quantile <- function(x1, x2, degree = 5, CI = NA, silent = T) {
166
  #' A Robust Alternative to Cohen’s d.
167
  #'
168
  #' @export
169
- fit_quantile_function <- function(x, poly_degree,
170
- check_monotonicity = FALSE,
171
  min_degree = 1) {
172
 
173
  # Step 1: Input validation and tie detection
174
  n <- length(x)
175
 
176
  if (n < 3) {
177
- stop("Need at least 3 observations to fit a polynomial quantile function.")
178
  }
179
 
180
  # Count unique values to detect ties
@@ -237,9 +352,7 @@ fit_quantile_function <- function(x, poly_degree,
237
  while (current_degree >= min_degree) {
238
 
239
  model <- lm(x ~ poly(z, current_degree, raw = TRUE))
240
-
241
- # Use the NEW analytic check
242
- check <- check_monotonicity_analytic(model, z_range = check_range)
243
 
244
  if (check$is_monotonic) {
245
  monotonic <- TRUE
@@ -256,7 +369,7 @@ fit_quantile_function <- function(x, poly_degree,
256
  current_degree <- min_degree
257
  # Fit linear even if non-monotonic (rare/impossible for degree 1 unless negative correlation)
258
  model <- lm(x ~ poly(z, current_degree, raw = TRUE))
259
- check <- check_monotonicity_analytic(model, z_range = check_range)
260
  monotonic <- check$is_monotonic
261
  }
262
 
@@ -268,10 +381,12 @@ fit_quantile_function <- function(x, poly_degree,
268
 
269
  # Metadata
270
  attr(model, "sample_size") <- n
 
 
271
  attr(model, "poly_degree") <- current_degree
272
  attr(model, "monotonic") <- monotonic
273
  attr(model, "min_derivative") <- check$min_derivative
274
-
275
  return(model)
276
  }
277
 
@@ -411,11 +526,10 @@ check_monotonicity <- function(model, z_range = c(-4, 4),
411
 
412
 
413
 
414
- #' Calculate Moments from a Fitted Polynomial Quantile Function (Analytical)
415
  #'
416
  #' This function computes the mean and variance of the distribution represented
417
- #' by a polynomial quantile function using closed-form analytical formulas
418
- #' based on the raw moments of the standard normal distribution.
419
  #'
420
  #' @param model An lm model object from \code{\link{fit_quantile_function}()}.
421
  #' The model should represent the relationship x = f(z) where z are standard
@@ -428,53 +542,6 @@ check_monotonicity <- function(model, z_range = c(-4, 4),
428
  #' \item{mean}{The expected value E[X] where X = f(Z), Z ~ N(0,1).}
429
  #' \item{variance}{The variance Var(X) = E[X²] - (E[X])².}
430
  #'
431
- #' @details
432
- #' This function provides an analytical alternative to numerical integration
433
- #' for computing distributional moments. It exploits the fact that when the
434
- #' quantile function is represented as a polynomial:
435
- #'
436
- #' \deqn{f(z) = \sum_{j=0}^{k} \beta_j z^j}
437
- #'
438
- #' the moments can be computed in closed form using the known raw moments of
439
- #' the standard normal distribution.
440
- #'
441
- #' \strong{Mathematical Foundation:}
442
- #'
443
- #' The mean is computed as:
444
- #' \deqn{\mu = E[f(Z)] = \sum_{j=0}^{k} \beta_j E[Z^j]}
445
- #'
446
- #' where \eqn{E[Z^j]} are the raw moments of the standard normal distribution:
447
- #' \itemize{
448
- #' \item For odd j: \eqn{E[Z^j] = 0} (due to symmetry)
449
- #' \item For even j: \eqn{E[Z^j] = (j-1)!!} (double factorial)
450
- #' }
451
- #'
452
- #' The double factorial is defined as:
453
- #' \deqn{(j-1)!! = (j-1) \times (j-3) \times \ldots \times 3 \times 1}
454
- #'
455
- #' Examples: \eqn{E[Z^0] = 1}, \eqn{E[Z^2] = 1}, \eqn{E[Z^4] = 3},
456
- #' \eqn{E[Z^6] = 15}, \eqn{E[Z^8] = 105}.
457
- #'
458
- #' For a polynomial of degree k=5, the mean simplifies to:
459
- #' \deqn{\mu = \beta_0 + \beta_2 \cdot 1 + \beta_4 \cdot 3}
460
- #'
461
- #' The variance is computed as:
462
- #' \deqn{\sigma^2 = E[X^2] - \mu^2}
463
- #'
464
- #' where:
465
- #' \deqn{E[X^2] = E\left[\left(\sum_{i=0}^{k} \beta_i Z^i\right)^2\right]
466
- #' = \sum_{i=0}^{k} \sum_{j=0}^{k} \beta_i \beta_j E[Z^{i+j}]}
467
- #'
468
- #' @section Theoretical Background:
469
- #' This approach is based on the principle that any random variable X can be
470
- #' expressed as a transformation of a standard normal variable:
471
- #' \deqn{X = f(Z), \quad Z \sim N(0,1)}
472
- #'
473
- #' Expectations with respect to X can then be computed as:
474
- #' \deqn{E[g(X)] = E[g(f(Z))] = \int_{-\infty}^{\infty} g(f(z)) \phi(z) dz}
475
- #'
476
- #' When f is a polynomial, these integrals reduce to linear combinations of
477
- #' standard normal moments, which are known analytically.
478
  #'
479
  #' @author Wolfgang Lenhard and Alexandra Lenhard
480
  #' Licensed under the MIT License
@@ -484,12 +551,9 @@ check_monotonicity <- function(model, z_range = c(-4, 4),
484
  #' A Robust Alternative to Cohen’s d.
485
  #'
486
  #' @seealso
487
- #' \code{\link{get_moments}} for the numerical integration implementation.
488
- #'
489
- #' \code{\link{fit_quantile_function}} for fitting the polynomial model.
490
  #'
491
- #' \code{\link{d.quantile}} for the main effect size calculation.
492
- #'
493
  #' @examples
494
  #' # Generate sample data
495
  #' set.seed(123)
@@ -502,8 +566,8 @@ check_monotonicity <- function(model, z_range = c(-4, 4),
502
  #' z <- qnorm(p)
503
  #' model <- lm(x ~ poly(z, 5, raw = TRUE))
504
  #'
505
- #' # Compute moments analytically
506
- #' moments <- get_moments_analytical(model, group_label = "Test Group")
507
  #'
508
  #' cat("Mean:", moments$mean, "\n")
509
  #' cat("Variance:", moments$variance, "\n")
@@ -513,8 +577,6 @@ check_monotonicity <- function(model, z_range = c(-4, 4),
513
  #' cat("\nSample mean:", mean(x), "\n")
514
  #' cat("Sample variance:", var(x), "\n")
515
  #'
516
- #' # The smoothed estimates will be similar but not identical,
517
- #' # with the analytical method providing regularization
518
  #'
519
  #'
520
  #' @export
@@ -596,9 +658,9 @@ get_moments <- function(model, group_label = "Unknown") {
596
 
597
 
598
 
599
- #' Print Method for d_quantile Objects
600
  #'
601
- #' @param x An object of class "d_quantile"
602
  #' @param ... Additional arguments (not used)
603
  #'
604
  #' @author Wolfgang Lenhard and Alexandra Lenhard
@@ -609,10 +671,10 @@ get_moments <- function(model, group_label = "Unknown") {
609
  #' A Robust Alternative to Cohen’s d.
610
  #'
611
  #' @export
612
- print.d_quantile <- function(x, ...) {
613
- cat("\nDistribution-Free Effect Size (d_q)\n")
614
  cat("===================================\n\n")
615
- cat("Effect size d_q:", round(x$d_q, 4), "\n")
616
 
617
  # Display CI if available
618
  if (!is.null(x$ci_lower)) {
@@ -627,16 +689,15 @@ print.d_quantile <- function(x, ...) {
627
  cat("Group 2: n =", x$n2, ", mean =", round(x$group2_mean, 4),
628
  ", SD =", round(x$group2_sd, 4), "\n")
629
  cat("Pooled SD:", round(x$pooled_sd, 4), "\n")
630
- cat("Polynomial degree:", x$degree, "\n")
631
  invisible(x)
632
  }
633
 
634
- #' Summary Method for d_quantile Objects
635
  #'
636
  #' Provides detailed summary statistics and diagnostic information.
637
  #'
638
- #' @param object An object of class "d_quantile"
639
- #' @param ... Additional arguments (not used)
640
  #'
641
  #' @author Wolfgang Lenhard and Alexandra Lenhard
642
  #' Licensed under the MIT License
@@ -646,18 +707,18 @@ print.d_quantile <- function(x, ...) {
646
  #' A Robust Alternative to Cohen’s d.
647
  #'
648
  #' @export
649
- summary.d_quantile <- function(object, ...) {
650
 
651
  cat("\n")
652
  cat("=======================================================\n")
653
- cat(" Distribution-Free Effect Size Analysis (d_quantile)\n")
654
  cat("=======================================================\n\n")
655
 
656
  cat("Effect Size:\n")
657
- cat(" d_q =", round(object$d_q, 4), "\n")
658
 
659
  # Interpretation
660
- abs_d <- abs(object$d_q)
661
  interpretation <- if (abs_d < 0.2) {
662
  "negligible"
663
  } else if (abs_d < 0.5) {
@@ -686,7 +747,7 @@ summary.d_quantile <- function(object, ...) {
686
  cat(" Mean difference: ", round(object$group2_mean - object$group1_mean, 4), "\n\n")
687
 
688
  cat("Model Details:\n")
689
- cat(" Polynomial degree:", object$degree, "\n")
690
  cat(" Model 1 R²: ", round(summary(object$model1)$r.squared, 4), "\n")
691
  cat(" Model 2 R²: ", round(summary(object$model2)$r.squared, 4), "\n\n")
692
 
@@ -702,6 +763,7 @@ summary.d_quantile <- function(object, ...) {
702
  }
703
 
704
 
 
705
  # API endpoint
706
  #* Calculate effect size from two groups
707
  #* @param group1 Comma-separated numeric values for group 1
 
2
 
3
  #* @apiTitle Effect Size Calculator API
4
 
5
+
6
+
7
+ #' Calculate a Distribution-Free Effect Size (d_reg)
8
+ #'
9
+ #' This function computes a distribution-free effect size by modeling the
10
+ #' empirical distribution function (eCDF) of two groups via polynomial
11
+ #' regression. The effect size is computed as the standardized
12
+ #' difference between the means of the smoothed distributions.
13
+ #'
14
+ #' The method involves:
15
+ #' 1. Fitting polynomials to each group's quantile function: x = f(z)
16
+ #' 2. Computing moments (mean, variance) of the polynomials
17
+ #' 3. Calculating d(reg) using the pooled standard deviation
18
+ #'
19
+ #' @param x1 A numeric vector of data for the first group.
20
+ #' @param x2 A numeric vector of data for the second group.
21
+ #' @param degree The degree of the polynomial to fit (default = 5).
22
+ #' Higher degrees capture more complex distributional shapes but
23
+ #' may overfit with small samples.
24
+ #' @param CI Confidence level for confidence interval (default = NA, no CI computed).
25
+ #' If specified (e.g., 0.95), uses asymptotic normal approximation.
26
+ #' WARNING: CI formula assumes Cohen's d distribution and may not be accurate for d_reg.
27
+ #' @param silent Logical; if TRUE, suppresses warnings during fitting (default = TRUE).
28
+ #'
29
+ #' @return A list (S3 class "d_reg") containing:
30
+ #' \item{d_reg}{The distribution-free effect size (standardized mean difference).}
31
+ #' \item{group1_mean}{Mean of the smoothed distribution for group 1.}
32
+ #' \item{group1_variance}{Variance of the smoothed distribution for group 1.}
33
+ #' \item{group1_sd}{Standard deviation of the smoothed distribution for group 1.}
34
+ #' \item{group2_mean}{Mean of the smoothed distribution for group 2.}
35
+ #' \item{group2_variance}{Variance of the smoothed distribution for group 2.}
36
+ #' \item{group2_sd}{Standard deviation of the smoothed distribution for group 2.}
37
+ #' \item{pooled_sd}{Pooled standard deviation.}
38
+ #' \item{n1}{Sample size of group 1.}
39
+ #' \item{n2}{Sample size of group 2.}
40
+ #' \item{model1}{Fitted polynomial model for group 1.}
41
+ #' \item{model2}{Fitted polynomial model for group 2.}
42
+ #' \item{default_degree}{Polynomial degree used.}
43
+ #' \item{tie_proportion_1}{Proportion of tied values in group 1.}
44
+ #' \item{tie_proportion_2}{Proportion of tied values in group 2.}
45
+ #' \item{n_unique_1}{Number of unique values in group 1.}
46
+ #' \item{n_unique_2}{Number of unique values in group 2.}
47
+ #' \item{ci_lower}{Lower bound of confidence interval (if CI specified).}
48
+ #' \item{ci_upper}{Upper bound of confidence interval (if CI specified).}
49
+ #' \item{ci_level}{Confidence level (if CI specified).}
50
+ #'
51
+ #' @details
52
+ #' The method is distribution-free and converges to Cohen's d under normality with
53
+ #' increasing group size. It is robust to outliers and skewness compared to
54
+ #' classical parametric methods.
55
+ #'
56
+ #' Sample size requirements: At least (degree + 1) observations per group.
57
+ #' Recommended: n > 10 per group for stable polynomial fits.
58
+ #' For small samples (n < 20), consider using degree = 3 or lower.
59
+ #'
60
+ #' Confidence intervals use an asymptotic approximation based on Cohen's d
61
+ #' distribution and may not accurately reflect the true sampling distribution
62
+ #' of d_reg, especially in small samples or non-normal data.
63
+ #'
64
+ #' @author Wolfgang Lenhard and Alexandra Lenhard
65
+ #' @references
66
+ #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
67
+ #' A Robust Alternative to Cohen's d.
68
+ #'
69
+ #' @examples
70
+ #' # Normal distributions
71
+ #' set.seed(123)
72
+ #' x1 <- rnorm(30, mean = 0, sd = 1)
73
+ #' x2 <- rnorm(30, mean = 0.5, sd = 1)
74
+ #' result <- d.reg(x1, x2)
75
+ #' print(result)
76
+ #'
77
+ #' # With confidence interval
78
+ #' result_ci <- d.reg(x1, x2, CI = 0.95)
79
+ #' print(result_ci)
80
+ #'
81
+ #' # Skewed distributions
82
+ #' x1 <- rexp(50, rate = 1)
83
+ #' x2 <- rexp(50, rate = 0.8)
84
+ #' result <- d.reg(x1, x2, degree = 4)
85
+ #' print(result)
86
+ #'
87
+ #' @export
88
+ d.reg <- function(x1, x2, degree = 5, CI = NA, silent = TRUE) {
89
+
90
+ # ============================================================================
91
+ # Input Validation
92
+ # ============================================================================
93
 
 
94
  if (!is.numeric(x1) || !is.numeric(x2)) {
95
  stop("Both x1 and x2 must be numeric vectors.")
96
  }
97
 
98
+ # Handle missing values
99
+ if (any(is.na(x1)) || any(is.na(x2))) {
100
+ if (!silent) {
101
+ warning("Missing values detected and will be removed.")
102
+ }
103
+ x1 <- x1[!is.na(x1)]
104
+ x2 <- x2[!is.na(x2)]
105
+ }
106
+
107
  n1 <- length(x1)
108
  n2 <- length(x2)
109
 
110
+ # Check for empty groups
111
+ if (n1 == 0 || n2 == 0) {
112
+ stop("Cannot compute effect size with empty groups after removing NAs.")
113
+ }
114
+
115
+ # Check sufficient sample size for polynomial degree
116
  if (n1 < degree + 1) {
117
  stop("Group 1 has insufficient data: need at least ", degree + 1,
118
  " observations for degree ", degree, " polynomial (got ", n1, ").")
 
123
  " observations for degree ", degree, " polynomial (got ", n2, ").")
124
  }
125
 
126
+ # Validate CI parameter if provided
127
+ if (!is.na(CI)) {
128
+ if (!is.numeric(CI) || length(CI) != 1) {
129
+ stop("CI must be a single numeric value or NA.")
130
+ }
131
+ if (CI <= 0 || CI >= 1) {
132
+ stop("CI must be between 0 and 1 (exclusive).")
133
+ }
 
 
134
  }
 
 
 
 
135
 
136
+ model1 <- fit_polynomial(x1, degree)
137
+ model2 <- fit_polynomial(x2, degree)
138
+
139
+ # Extract tie information
140
  tie1 <- attr(model1, "tie_proportion")
141
  tie2 <- attr(model2, "tie_proportion")
142
+ n_unique1 <- attr(model1, "n_unique")
143
+ n_unique2 <- attr(model2, "n_unique")
144
 
145
+ # Warn about substantial ties
146
  if (!silent && (tie1 > 0.3 || tie2 > 0.3)) {
147
  message(sprintf(
148
  "Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).",
 
151
  message("This suggests discrete/ordinal data. Results should be interpreted cautiously.")
152
  message("Consider comparing multiple effect size measures for discrete data.")
153
  }
154
+
 
155
  moments1 <- get_moments(model1, group_label = "Group 1")
156
  moments2 <- get_moments(model2, group_label = "Group 2")
157
+
158
+ # Weighted pooled variance (population formula, not sample formula)
159
  weighted_pooled_variance <- (n1 * moments1$variance + n2 * moments2$variance) / (n1 + n2)
160
  pooled_sd <- sqrt(weighted_pooled_variance)
 
 
161
  mean_diff <- moments2$mean - moments1$mean
162
 
163
  # Handle edge cases
164
  if (pooled_sd == 0) {
165
  if (mean_diff == 0) {
166
+ d_reg <- 0
167
  } else {
168
+ d_reg <- sign(mean_diff) * Inf
169
+ if (!silent) {
170
+ warning("Pooled SD is zero but means differ. Returning Inf with appropriate sign.")
171
+ }
172
  }
173
  } else {
174
+ d_reg <- mean_diff / pooled_sd
175
  }
176
+
 
177
  result <- list(
178
+ d_reg = d_reg,
179
+
180
+ # Group 1 statistics
181
  group1_mean = moments1$mean,
182
  group1_variance = moments1$variance,
183
  group1_sd = sqrt(moments1$variance),
184
+
185
+ # Group 2 statistics
186
  group2_mean = moments2$mean,
187
  group2_variance = moments2$variance,
188
  group2_sd = sqrt(moments2$variance),
189
+
190
+ # Pooled statistics
191
  pooled_sd = pooled_sd,
192
+
193
+ # Sample sizes
194
  n1 = n1,
195
  n2 = n2,
196
+
197
+ # Models
198
  model1 = model1,
199
  model2 = model2,
200
+
201
+ # Metadata
202
+ default_degree = degree,
203
+ tie_proportion_1 = tie1,
204
+ tie_proportion_2 = tie2,
205
+ n_unique_1 = n_unique1,
206
+ n_unique_2 = n_unique2
207
  )
208
+
209
+ if (!is.na(CI)) {
 
 
 
 
210
 
211
+ # Standard error using asymptotic approximation
212
+ # NOTE: This formula assumes Cohen's d distribution and may not be
213
+ # accurate for d_reg, especially in small samples or non-normal data
214
+ se_dreg <- sqrt((n1 + n2) / (n1 * n2) + (d_reg^2) / (2 * (n1 + n2)))
215
 
216
+ # Degrees of freedom
217
  df <- n1 + n2 - 2
218
+
219
+ # Critical value from t-distribution
220
  alpha <- 1 - CI
221
  t_crit <- qt(1 - alpha / 2, df)
222
 
223
+ # Confidence interval bounds
224
+ ci_lower <- d_reg - t_crit * se_dreg
225
+ ci_upper <- d_reg + t_crit * se_dreg
226
 
227
+ # Add to result
228
  result$ci_lower <- ci_lower
229
  result$ci_upper <- ci_upper
230
  result$ci_level <- CI
231
+ result$ci_se <- se_dreg
232
+ result$ci_df <- df
233
  }
234
+
235
+ class(result) <- "d_reg"
 
 
 
 
 
236
  return(result)
237
  }
238
 
239
 
240
+ #' Fit a Polynomial to eCDF
241
  #'
242
  #' This helper function fits a polynomial regression model to represent the
243
+ #' distribution. It models the relationship between
244
+ #' z-scores and observed raw scores.
245
  #'
246
  #' @param x A numeric vector of observations.
247
  #' @param poly_degree The degree of the polynomial to fit.
 
281
  #' A Robust Alternative to Cohen’s d.
282
  #'
283
  #' @export
284
+ fit_polynomial <- function(x, poly_degree,
285
+ check_monotonicity = TRUE,
286
  min_degree = 1) {
287
 
288
  # Step 1: Input validation and tie detection
289
  n <- length(x)
290
 
291
  if (n < 3) {
292
+ stop("Need at least 3 observations to fit a polynomial.")
293
  }
294
 
295
  # Count unique values to detect ties
 
352
  while (current_degree >= min_degree) {
353
 
354
  model <- lm(x ~ poly(z, current_degree, raw = TRUE))
355
+ check <- check_monotonicity(model, z_range = check_range)
 
 
356
 
357
  if (check$is_monotonic) {
358
  monotonic <- TRUE
 
369
  current_degree <- min_degree
370
  # Fit linear even if non-monotonic (rare/impossible for degree 1 unless negative correlation)
371
  model <- lm(x ~ poly(z, current_degree, raw = TRUE))
372
+ check <- check_monotonicity(model, z_range = check_range)
373
  monotonic <- check$is_monotonic
374
  }
375
 
 
381
 
382
  # Metadata
383
  attr(model, "sample_size") <- n
384
+ attr(model, "n_unique") <- n_unique # ADD THIS
385
+ attr(model, "tie_proportion") <- tie_proportion # ADD THIS
386
  attr(model, "poly_degree") <- current_degree
387
  attr(model, "monotonic") <- monotonic
388
  attr(model, "min_derivative") <- check$min_derivative
389
+
390
  return(model)
391
  }
392
 
 
526
 
527
 
528
 
529
+ #' Calculate Moments from a Fitted Polynomial Function
530
  #'
531
  #' This function computes the mean and variance of the distribution represented
532
+ #' by a polynomial function using Iserlis (1918) theorem.
 
533
  #'
534
  #' @param model An lm model object from \code{\link{fit_quantile_function}()}.
535
  #' The model should represent the relationship x = f(z) where z are standard
 
542
  #' \item{mean}{The expected value E[X] where X = f(Z), Z ~ N(0,1).}
543
  #' \item{variance}{The variance Var(X) = E[X²] - (E[X])².}
544
  #'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545
  #'
546
  #' @author Wolfgang Lenhard and Alexandra Lenhard
547
  #' Licensed under the MIT License
 
551
  #' A Robust Alternative to Cohen’s d.
552
  #'
553
  #' @seealso
554
+ #' \code{\link{fit_polynomial}} for fitting the polynomial model.
555
+ #' \code{\link{d.reg}} for the main effect size calculation.
 
556
  #'
 
 
557
  #' @examples
558
  #' # Generate sample data
559
  #' set.seed(123)
 
566
  #' z <- qnorm(p)
567
  #' model <- lm(x ~ poly(z, 5, raw = TRUE))
568
  #'
569
+ #' # Compute moments
570
+ #' moments <- get_moments(model, group_label = "Test Group")
571
  #'
572
  #' cat("Mean:", moments$mean, "\n")
573
  #' cat("Variance:", moments$variance, "\n")
 
577
  #' cat("\nSample mean:", mean(x), "\n")
578
  #' cat("Sample variance:", var(x), "\n")
579
  #'
 
 
580
  #'
581
  #'
582
  #' @export
 
658
 
659
 
660
 
661
+ #' Print Method for d_reg Objects
662
  #'
663
+ #' @param x An object of class "d_reg"
664
  #' @param ... Additional arguments (not used)
665
  #'
666
  #' @author Wolfgang Lenhard and Alexandra Lenhard
 
671
  #' A Robust Alternative to Cohen’s d.
672
  #'
673
  #' @export
674
+ print.d_reg <- function(x, ...) {
675
+ cat("\nDistribution-Free Effect Size (d_reg)\n")
676
  cat("===================================\n\n")
677
+ cat("Effect size d_reg:", round(x$d_reg, 4), "\n")
678
 
679
  # Display CI if available
680
  if (!is.null(x$ci_lower)) {
 
689
  cat("Group 2: n =", x$n2, ", mean =", round(x$group2_mean, 4),
690
  ", SD =", round(x$group2_sd, 4), "\n")
691
  cat("Pooled SD:", round(x$pooled_sd, 4), "\n")
692
+ cat("Polynomial degree:", x$default_degree, "\n")
693
  invisible(x)
694
  }
695
 
696
+ #' Summary Method for d_reg Objects
697
  #'
698
  #' Provides detailed summary statistics and diagnostic information.
699
  #'
700
+ #' @param object An object of class "d_reg"
 
701
  #'
702
  #' @author Wolfgang Lenhard and Alexandra Lenhard
703
  #' Licensed under the MIT License
 
707
  #' A Robust Alternative to Cohen’s d.
708
  #'
709
  #' @export
710
+ summary.d_reg <- function(object, ...) {
711
 
712
  cat("\n")
713
  cat("=======================================================\n")
714
+ cat(" Distribution-Free Effect Size Analysis (d_reg)\n")
715
  cat("=======================================================\n\n")
716
 
717
  cat("Effect Size:\n")
718
+ cat(" d_reg =", round(object$d_reg, 4), "\n")
719
 
720
  # Interpretation
721
+ abs_d <- abs(object$d_reg)
722
  interpretation <- if (abs_d < 0.2) {
723
  "negligible"
724
  } else if (abs_d < 0.5) {
 
747
  cat(" Mean difference: ", round(object$group2_mean - object$group1_mean, 4), "\n\n")
748
 
749
  cat("Model Details:\n")
750
+ cat(" Polynomial degree:", object$default_degree, "\n")
751
  cat(" Model 1 R²: ", round(summary(object$model1)$r.squared, 4), "\n")
752
  cat(" Model 2 R²: ", round(summary(object$model2)$r.squared, 4), "\n\n")
753
 
 
763
  }
764
 
765
 
766
+
767
  # API endpoint
768
  #* Calculate effect size from two groups
769
  #* @param group1 Comma-separated numeric values for group 1