WLenhard commited on
Commit
adb521e
·
verified ·
1 Parent(s): 218b69c

Upload app.R

Browse files
Files changed (1) hide show
  1. app.R +410 -91
app.R CHANGED
@@ -2,7 +2,7 @@ library(plumber)
2
 
3
  #* @apiTitle Effect Size Calculator API
4
 
5
- d.quantile <- function(x1, x2, degree = 4, CI = .95, silent = T) {
6
 
7
  # Input validation
8
  if (!is.numeric(x1) || !is.numeric(x2)) {
@@ -52,8 +52,8 @@ d.quantile <- function(x1, x2, degree = 4, CI = .95, silent = T) {
52
  }
53
 
54
  # Step 2: Get the moments from each fitted model
55
- moments1 <- get_moments_analytical(model1, group_label = "Group 1")
56
- moments2 <- get_moments_analytical(model2, group_label = "Group 2")
57
 
58
  # Step 3: Calculate the pooled standard deviation
59
  weighted_pooled_variance <- (n1 * moments1$variance + n2 * moments2$variance) / (n1 + n2)
@@ -80,21 +80,18 @@ d.quantile <- function(x1, x2, degree = 4, CI = .95, silent = T) {
80
  group1_mean = moments1$mean,
81
  group1_variance = moments1$variance,
82
  group1_sd = sqrt(moments1$variance),
83
- group1_sd_classic = sd(x1),
84
- group1_m_classic = mean(x1),
85
  group2_mean = moments2$mean,
86
  group2_variance = moments2$variance,
87
  group2_sd = sqrt(moments2$variance),
88
- group2_sd_classic = sd(x2),
89
- group2_m_classic = mean(x2),
90
  pooled_sd = pooled_sd,
91
  n1 = n1,
92
  n2 = n2,
93
- degree = degree,
94
  model1 = model1,
95
- model2 = model2
 
96
  )
97
 
 
98
  if(!is.na(CI)) {
99
  if(CI <= 0 || CI >= 1) {
100
  stop("CI must be between 0 and 1 (exclusive).")
@@ -124,14 +121,56 @@ d.quantile <- function(x1, x2, degree = 4, CI = .95, silent = T) {
124
  return(result)
125
  }
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  fit_quantile_function <- function(x, poly_degree,
128
  check_monotonicity = FALSE,
129
  min_degree = 1) {
130
 
131
- # ============================================================================
132
  # Step 1: Input validation and tie detection
133
- # ============================================================================
134
-
135
  n <- length(x)
136
 
137
  if (n < 3) {
@@ -142,10 +181,7 @@ fit_quantile_function <- function(x, poly_degree,
142
  n_unique <- length(unique(x))
143
  tie_proportion <- 1 - (n_unique / n)
144
 
145
- # ============================================================================
146
  # Step 2: Adjust polynomial degree based on unique values
147
- # ============================================================================
148
-
149
  # Can't fit more parameters than unique data points
150
  max_possible_degree <- n_unique - 1
151
 
@@ -180,12 +216,8 @@ fit_quantile_function <- function(x, poly_degree,
180
  ), min_degree + 1)
181
  }
182
 
183
- # ============================================================================
184
  # Step 3: Compute ranks and z-scores (handles ties via midrank)
185
- # ============================================================================
186
-
187
  # Average ranks handle ties by assigning mean rank to tied observations
188
- # Example: values [1, 2, 2, 3] get ranks [1, 2.5, 2.5, 4]
189
  avg_ranks <- rank(x, ties.method = "average")
190
 
191
  # Convert ranks to plotting positions
@@ -194,113 +226,305 @@ fit_quantile_function <- function(x, poly_degree,
194
  # Transform to standard normal quantiles
195
  z <- qnorm(p)
196
 
197
- # ============================================================================
198
  # Step 4: Fit polynomial, with optional monotonicity enforcement
199
- # ============================================================================
200
 
201
  current_degree <- poly_degree
202
  degree_reduced <- FALSE
203
- monotonic <- NULL # Will be checked if requested
204
 
205
  if (check_monotonicity) {
206
- # Iteratively reduce degree until monotonic or min_degree reached
207
  while (current_degree >= min_degree) {
208
 
209
- # Fit model at current degree
210
  model <- lm(x ~ poly(z, current_degree, raw = TRUE))
211
 
212
- # Check monotonicity
213
- monotonicity_check <- check_monotonicity(model)
214
- monotonic <- monotonicity_check$is_monotonic
215
 
216
- if (monotonic) {
217
- # Success - monotonic fit achieved
218
  break
219
  }
220
 
221
- # Not monotonic - try lower degree
222
  current_degree <- current_degree - 1
223
  degree_reduced <- TRUE
224
  }
225
 
 
226
  if (current_degree < min_degree) {
227
- stop(sprintf(
228
- "Could not achieve monotonic fit even with minimum degree %d. ",
229
- min_degree,
230
- "Data may be too irregular or have insufficient unique values."
231
- ))
232
- }
233
-
234
- if (degree_reduced) {
235
- warning(sprintf(
236
- "Polynomial degree reduced from %d to %d to achieve monotonicity.",
237
- poly_degree, current_degree
238
- ))
239
  }
240
 
241
  } else {
242
- # Just fit at requested degree without monotonicity check
243
  model <- lm(x ~ poly(z, current_degree, raw = TRUE))
244
-
245
- # Optionally check monotonicity for diagnostic purposes (don't enforce)
246
- if (exists("check_monotonicity", mode = "function")) {
247
- monotonicity_check <- check_monotonicity(model)
248
- monotonic <- monotonicity_check$is_monotonic
249
- }
250
  }
251
 
252
- # ============================================================================
253
- # Step 5: Store metadata as attributes
254
- # ============================================================================
255
-
256
  attr(model, "sample_size") <- n
257
- attr(model, "n_unique") <- n_unique
258
- attr(model, "tie_proportion") <- tie_proportion
259
  attr(model, "poly_degree") <- current_degree
260
- attr(model, "requested_degree") <- poly_degree
261
- attr(model, "degree_reduced") <- degree_reduced
262
  attr(model, "monotonic") <- monotonic
263
- attr(model, "has_ties") <- tie_proportion > 0.01 # Flag if >1% ties
264
 
265
  return(model)
266
  }
267
 
268
- check_monotonicity <- function(model, z_range = c(-3, 3), n_points = 100) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
- z_seq <- seq(z_range[1], z_range[2], length.out = n_points)
 
 
 
 
 
 
 
 
 
 
271
 
272
- # Get predictions
273
- pred <- predict(model, newdata = data.frame(z = z_seq))
274
 
275
- # Calculate finite differences (approximate derivatives)
276
- derivatives <- diff(pred) / diff(z_seq)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
- # Check for violations (negative derivatives)
279
- # Use small tolerance to avoid flagging numerical noise
280
- tolerance <- -1e-6
281
- violations <- sum(derivatives < tolerance)
282
 
283
- min_deriv <- min(derivatives)
284
- is_monotonic <- violations == 0
285
 
286
  return(list(
287
- is_monotonic = is_monotonic,
288
- min_derivative = min_deriv,
289
- violations = violations,
290
- proportion_violations = violations / length(derivatives),
291
- z_range_checked = z_range
292
  ))
293
  }
294
 
295
- get_moments_analytical <- function(model, group_label = "Unknown") {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
  # Extract coefficients and determine polynomial degree
298
  coeffs <- coef(model)
299
  k <- length(coeffs) - 1 # polynomial degree
300
 
301
- # --------------------------------------------------------------------------
302
  # Pre-compute standard normal raw moments: E[Z^j]
303
- # --------------------------------------------------------------------------
304
  # For j even: E[Z^j] = (j-1)!! = (j-1) × (j-3) × ... × 3 × 1
305
  # For j odd: E[Z^j] = 0 (due to symmetry)
306
 
@@ -321,9 +545,7 @@ get_moments_analytical <- function(model, group_label = "Unknown") {
321
  max_moment <- 2 * k
322
  moments_z <- sapply(0:max_moment, compute_moment)
323
 
324
- # --------------------------------------------------------------------------
325
  # Compute mean: μ = E[X] = E[f(Z)] = Σ β_j E[Z^j]
326
- # --------------------------------------------------------------------------
327
  # Only even-powered terms contribute due to symmetry
328
 
329
  mu <- 0
@@ -331,10 +553,8 @@ get_moments_analytical <- function(model, group_label = "Unknown") {
331
  mu <- mu + coeffs[j + 1] * moments_z[j + 1]
332
  }
333
 
334
- # --------------------------------------------------------------------------
335
  # Compute variance: σ² = E[X²] - μ²
336
  # First calculate E[X²] = E[(Σ β_i Z^i)²] = Σ_i Σ_j β_i β_j E[Z^(i+j)]
337
- # --------------------------------------------------------------------------
338
 
339
  E_X2 <- 0
340
  for (i in 0:k) {
@@ -348,12 +568,7 @@ get_moments_analytical <- function(model, group_label = "Unknown") {
348
 
349
  variance <- E_X2 - mu^2
350
 
351
- # --------------------------------------------------------------------------
352
  # Handle numerical edge cases
353
- # --------------------------------------------------------------------------
354
- # Variance should always be non-negative, but numerical precision limits
355
- # can occasionally produce tiny negative values
356
-
357
  if (variance < 0) {
358
  if (abs(variance) < 1e-10) {
359
  # Likely just numerical noise - round to zero
@@ -372,10 +587,6 @@ get_moments_analytical <- function(model, group_label = "Unknown") {
372
  }
373
  }
374
 
375
- # --------------------------------------------------------------------------
376
- # Return results
377
- # --------------------------------------------------------------------------
378
-
379
  return(list(
380
  mean = mu,
381
  variance = variance
@@ -383,6 +594,114 @@ get_moments_analytical <- function(model, group_label = "Unknown") {
383
  }
384
 
385
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  # API endpoint
387
  #* Calculate effect size from two groups
388
  #* @param group1 Comma-separated numeric values for group 1
 
2
 
3
  #* @apiTitle Effect Size Calculator API
4
 
5
+ d.quantile <- function(x1, x2, degree = 5, CI = NA, silent = T) {
6
 
7
  # Input validation
8
  if (!is.numeric(x1) || !is.numeric(x2)) {
 
52
  }
53
 
54
  # Step 2: Get the moments from each fitted model
55
+ moments1 <- get_moments(model1, group_label = "Group 1")
56
+ moments2 <- get_moments(model2, group_label = "Group 2")
57
 
58
  # Step 3: Calculate the pooled standard deviation
59
  weighted_pooled_variance <- (n1 * moments1$variance + n2 * moments2$variance) / (n1 + n2)
 
80
  group1_mean = moments1$mean,
81
  group1_variance = moments1$variance,
82
  group1_sd = sqrt(moments1$variance),
 
 
83
  group2_mean = moments2$mean,
84
  group2_variance = moments2$variance,
85
  group2_sd = sqrt(moments2$variance),
 
 
86
  pooled_sd = pooled_sd,
87
  n1 = n1,
88
  n2 = n2,
 
89
  model1 = model1,
90
+ model2 = model2,
91
+ default_degree = degree
92
  )
93
 
94
+
95
  if(!is.na(CI)) {
96
  if(CI <= 0 || CI >= 1) {
97
  stop("CI must be between 0 and 1 (exclusive).")
 
121
  return(result)
122
  }
123
 
124
+
125
+ #' Fit a Polynomial to the Quantile Function
126
+ #'
127
+ #' This helper function fits a polynomial regression model to represent the
128
+ #' quantile function of a distribution. It models the relationship between
129
+ #' standard normal quantiles (z-scores) and observed raw scores.
130
+ #'
131
+ #' @param x A numeric vector of observations.
132
+ #' @param poly_degree The degree of the polynomial to fit.
133
+ #' @param check_monotonicity Logical; should monotonicity be enforced by
134
+ #' reducing polynomial degree if needed? (default = FALSE for speed)
135
+ #' @param min_degree Minimum polynomial degree to try (default = 1, representing
136
+ #' a linear fit to a normal distribution).
137
+ #'
138
+ #' @return An lm model object representing x = f(z), where z ~ N(0,1).
139
+ #' Additional attributes:
140
+ #' \describe{
141
+ #' \item{sample_size}{Original sample size}
142
+ #' \item{n_unique}{Number of unique values (for tie detection)}
143
+ #' \item{tie_proportion}{Proportion of tied observations}
144
+ #' \item{poly_degree}{Actual polynomial degree used (may be reduced)}
145
+ #' \item{monotonic}{Logical; is the fitted function monotonic?}
146
+ #' \item{degree_reduced}{Logical; was degree reduced from requested?}
147
+ #' }
148
+ #'
149
+ #' @details
150
+ #' The function uses average ranks (midrank method) to handle tied observations,
151
+ #' which is the standard approach in rank-based statistics. Plotting positions
152
+ #' (rank - 0.5)/n avoid infinite z-scores at boundaries.
153
+ #'
154
+ #' When substantial ties are present (>10% of observations), the function may
155
+ #' automatically reduce the polynomial degree to avoid overfitting to a small
156
+ #' number of unique values.
157
+ #'
158
+ #' If check_monotonicity=TRUE, the function iteratively reduces the polynomial
159
+ #' degree until a monotonic fit is achieved or min_degree is reached.
160
+ #'
161
+ #' @author Wolfgang Lenhard and Alexandra Lenhard
162
+ #' Licensed under the MIT License
163
+ #'
164
+ #' Citation:
165
+ #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
166
+ #' A Robust Alternative to Cohen’s d.
167
+ #'
168
+ #' @export
169
  fit_quantile_function <- function(x, poly_degree,
170
  check_monotonicity = FALSE,
171
  min_degree = 1) {
172
 
 
173
  # Step 1: Input validation and tie detection
 
 
174
  n <- length(x)
175
 
176
  if (n < 3) {
 
181
  n_unique <- length(unique(x))
182
  tie_proportion <- 1 - (n_unique / n)
183
 
 
184
  # Step 2: Adjust polynomial degree based on unique values
 
 
185
  # Can't fit more parameters than unique data points
186
  max_possible_degree <- n_unique - 1
187
 
 
216
  ), min_degree + 1)
217
  }
218
 
 
219
  # Step 3: Compute ranks and z-scores (handles ties via midrank)
 
 
220
  # Average ranks handle ties by assigning mean rank to tied observations
 
221
  avg_ranks <- rank(x, ties.method = "average")
222
 
223
  # Convert ranks to plotting positions
 
226
  # Transform to standard normal quantiles
227
  z <- qnorm(p)
228
 
 
229
  # Step 4: Fit polynomial, with optional monotonicity enforcement
230
+ check_range <- range(z)
231
 
232
  current_degree <- poly_degree
233
  degree_reduced <- FALSE
234
+ monotonic <- FALSE
235
 
236
  if (check_monotonicity) {
 
237
  while (current_degree >= min_degree) {
238
 
 
239
  model <- lm(x ~ poly(z, current_degree, raw = TRUE))
240
 
241
+ # Use the NEW analytic check
242
+ check <- check_monotonicity_analytic(model, z_range = check_range)
 
243
 
244
+ if (check$is_monotonic) {
245
+ monotonic <- TRUE
246
  break
247
  }
248
 
249
+ # Reduce degree and try again
250
  current_degree <- current_degree - 1
251
  degree_reduced <- TRUE
252
  }
253
 
254
+ # Emergency fallback
255
  if (current_degree < min_degree) {
256
+ current_degree <- min_degree
257
+ # Fit linear even if non-monotonic (rare/impossible for degree 1 unless negative correlation)
258
+ model <- lm(x ~ poly(z, current_degree, raw = TRUE))
259
+ check <- check_monotonicity_analytic(model, z_range = check_range)
260
+ monotonic <- check$is_monotonic
 
 
 
 
 
 
 
261
  }
262
 
263
  } else {
 
264
  model <- lm(x ~ poly(z, current_degree, raw = TRUE))
265
+ check <- check_monotonicity(model, z_range = check_range)
266
+ monotonic <- check$is_monotonic
 
 
 
 
267
  }
268
 
269
+ # Metadata
 
 
 
270
  attr(model, "sample_size") <- n
 
 
271
  attr(model, "poly_degree") <- current_degree
 
 
272
  attr(model, "monotonic") <- monotonic
273
+ attr(model, "min_derivative") <- check$min_derivative
274
 
275
  return(model)
276
  }
277
 
278
+
279
+
280
+ #' Check Monotonicity of Fitted Quantile Function
281
+ #'
282
+ #' Analytically checks if a polynomial quantile function is monotonic
283
+ #' within the observed range of the data.
284
+ #'
285
+ #' @param model An lm model object fitted with poly(..., raw=TRUE).
286
+ #' @param z_range A numeric vector of length 2 defining the range [min, max]
287
+ #' over which to check monotonicity. If NULL, checks reasonable defaults
288
+ #' based on N (-4 to 4).
289
+ #' @param strictly_positive Logical; if TRUE, derivative must be > 0 (strict).
290
+ #' If FALSE, derivative can be >= 0 (allows flat regions).
291
+ #'
292
+ #' @return A list containing:
293
+ #' \item{is_monotonic}{Logical; TRUE if monotonic in range.}
294
+ #' \item{min_derivative}{The lowest slope found in the range.}
295
+ #' \item{location_min}{The z-value where the minimum slope occurs.}
296
+ #'
297
+ #'
298
+ #' @author Wolfgang Lenhard and Alexandra Lenhard
299
+ #' Licensed under the MIT License
300
+ #'
301
+ #' Citation:
302
+ #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
303
+ #' A Robust Alternative to Cohen’s d.
304
+ #'
305
+ #' @export
306
+ check_monotonicity <- function(model, z_range = c(-4, 4),
307
+ strictly_positive = FALSE) {
308
+
309
+ # Input handling
310
+ if (!inherits(model, "lm")) stop("model must be an lm object")
311
+ if (length(z_range) != 2 || z_range[1] >= z_range[2]) {
312
+ stop("z_range must be a vector [min, max] with min < max")
313
+ }
314
+
315
+ # Extract polynomial coefficients
316
+ coeffs <- coef(model)
317
+
318
+ # Handle NAs (rare cases where regression had failed)
319
+ if (any(is.na(coeffs))) return(list(
320
+ is_monotonic = FALSE,
321
+ min_derivative = -Inf,
322
+ location_min = NA
323
+ ))
324
+
325
+ degree <- length(coeffs) - 1
326
+
327
+ # Special cases for low-degree polynomials
328
+ if (degree == 0) {
329
+ # CASE: Constant (Degree 0)
330
+ return(list(
331
+ is_monotonic = TRUE,
332
+ min_derivative = 0,
333
+ location_min = 0
334
+ ))
335
+ } else if (degree == 1) {
336
+ # CASE: Linear (Degree 1)
337
+ # f(z) = b0 + b1*z -> f'(z) = b1
338
+ slope <- coeffs[2]
339
+ return(list(
340
+ is_monotonic = if(strictly_positive) slope > 0 else slope >= 0,
341
+ min_derivative = slope,
342
+ location_min = 0
343
+ ))
344
+ }
345
+
346
+ # 1. Calculate coefficients of First Derivative f'(z)
347
+ # f(z) = c0 + c1*z + c2*z^2 + c3*z^3 ...
348
+ # f'(z) = c1 + 2*c2*z + 3*c3*z^2 ...
349
+ deriv1_coeffs <- numeric(degree) # Degree drops by 1
350
+ for (i in 1:degree) {
351
+ # Coeff index i+1 corresponds to power z^i in original model
352
+ deriv1_coeffs[i] <- coeffs[i + 1] * i
353
+ }
354
 
355
+ # Define function to evaluate slope at specific z values
356
+ eval_deriv <- function(z, coefs) {
357
+ # Horner's method
358
+ val <- coefs[length(coefs)]
359
+ if (length(coefs) > 1) {
360
+ for (i in (length(coefs)-1):1) {
361
+ val <- val * z + coefs[i]
362
+ }
363
+ }
364
+ return(val)
365
+ }
366
 
367
+ # 2. Find Critical Points of the Derivative
368
+ # To find where slope is minimized, we look for roots of f''(z)
369
 
370
+ # Coefficients of f''(z)
371
+ degree_d1 <- degree - 1
372
+ if (degree_d1 >= 1) {
373
+ deriv2_coeffs <- numeric(degree_d1)
374
+ for (i in 1:degree_d1) {
375
+ deriv2_coeffs[i] <- deriv1_coeffs[i + 1] * i
376
+ }
377
+
378
+ # Find roots (complex) of f''(z)
379
+ roots_complex <- polyroot(deriv2_coeffs)
380
+
381
+ # Filter for real roots within the z_range
382
+ # Keep if imaginary part is negligible
383
+ real_indices <- abs(Im(roots_complex)) < 1e-9
384
+ roots_real <- Re(roots_complex)[real_indices]
385
+
386
+ # Filter roots strictly inside our check range
387
+ critical_z <- roots_real[roots_real >= z_range[1] & roots_real <= z_range[2]]
388
+
389
+ } else {
390
+ # If derivative is constant (should be handled by degree=1 check above)
391
+ critical_z <- numeric(0)
392
+ }
393
+
394
+ # 3. Check Constraints (Boundaries + Critical Points)
395
+ # The minimum slope MUST occur either at endpoints or at a local extremum
396
+
397
+ check_points <- unique(c(z_range[1], z_range[2], critical_z))
398
+ slopes <- sapply(check_points, eval_deriv, coefs = deriv1_coeffs)
399
 
400
+ min_slope <- min(slopes)
401
+ loc_min <- check_points[which.min(slopes)]
 
 
402
 
403
+ threshold <- if(strictly_positive) 1e-9 else -1e-9
 
404
 
405
  return(list(
406
+ is_monotonic = min_slope >= threshold,
407
+ min_derivative = min_slope,
408
+ location_min = loc_min
 
 
409
  ))
410
  }
411
 
412
+
413
+
414
+ #' Calculate Moments from a Fitted Polynomial Quantile Function (Analytical)
415
+ #'
416
+ #' This function computes the mean and variance of the distribution represented
417
+ #' by a polynomial quantile function using closed-form analytical formulas
418
+ #' based on the raw moments of the standard normal distribution.
419
+ #'
420
+ #' @param model An lm model object from \code{\link{fit_quantile_function}()}.
421
+ #' The model should represent the relationship x = f(z) where z are standard
422
+ #' normal quantiles and x are the observed values.
423
+ #' @param group_label Optional character string label for warning messages
424
+ #' (default = "Unknown"). Used to identify which group produced warnings in
425
+ #' multi-group comparisons.
426
+ #'
427
+ #' @return A list with elements:
428
+ #' \item{mean}{The expected value E[X] where X = f(Z), Z ~ N(0,1).}
429
+ #' \item{variance}{The variance Var(X) = E[X²] - (E[X])².}
430
+ #'
431
+ #' @details
432
+ #' This function provides an analytical alternative to numerical integration
433
+ #' for computing distributional moments. It exploits the fact that when the
434
+ #' quantile function is represented as a polynomial:
435
+ #'
436
+ #' \deqn{f(z) = \sum_{j=0}^{k} \beta_j z^j}
437
+ #'
438
+ #' the moments can be computed in closed form using the known raw moments of
439
+ #' the standard normal distribution.
440
+ #'
441
+ #' \strong{Mathematical Foundation:}
442
+ #'
443
+ #' The mean is computed as:
444
+ #' \deqn{\mu = E[f(Z)] = \sum_{j=0}^{k} \beta_j E[Z^j]}
445
+ #'
446
+ #' where \eqn{E[Z^j]} are the raw moments of the standard normal distribution:
447
+ #' \itemize{
448
+ #' \item For odd j: \eqn{E[Z^j] = 0} (due to symmetry)
449
+ #' \item For even j: \eqn{E[Z^j] = (j-1)!!} (double factorial)
450
+ #' }
451
+ #'
452
+ #' The double factorial is defined as:
453
+ #' \deqn{(j-1)!! = (j-1) \times (j-3) \times \ldots \times 3 \times 1}
454
+ #'
455
+ #' Examples: \eqn{E[Z^0] = 1}, \eqn{E[Z^2] = 1}, \eqn{E[Z^4] = 3},
456
+ #' \eqn{E[Z^6] = 15}, \eqn{E[Z^8] = 105}.
457
+ #'
458
+ #' For a polynomial of degree k=5, the mean simplifies to:
459
+ #' \deqn{\mu = \beta_0 + \beta_2 \cdot 1 + \beta_4 \cdot 3}
460
+ #'
461
+ #' The variance is computed as:
462
+ #' \deqn{\sigma^2 = E[X^2] - \mu^2}
463
+ #'
464
+ #' where:
465
+ #' \deqn{E[X^2] = E\left[\left(\sum_{i=0}^{k} \beta_i Z^i\right)^2\right]
466
+ #' = \sum_{i=0}^{k} \sum_{j=0}^{k} \beta_i \beta_j E[Z^{i+j}]}
467
+ #'
468
+ #' @section Theoretical Background:
469
+ #' This approach is based on the principle that any random variable X can be
470
+ #' expressed as a transformation of a standard normal variable:
471
+ #' \deqn{X = f(Z), \quad Z \sim N(0,1)}
472
+ #'
473
+ #' Expectations with respect to X can then be computed as:
474
+ #' \deqn{E[g(X)] = E[g(f(Z))] = \int_{-\infty}^{\infty} g(f(z)) \phi(z) dz}
475
+ #'
476
+ #' When f is a polynomial, these integrals reduce to linear combinations of
477
+ #' standard normal moments, which are known analytically.
478
+ #'
479
+ #' @author Wolfgang Lenhard and Alexandra Lenhard
480
+ #' Licensed under the MIT License
481
+ #'
482
+ #' Citation:
483
+ #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
484
+ #' A Robust Alternative to Cohen’s d.
485
+ #'
486
+ #' @seealso
487
+ #' \code{\link{get_moments}} for the numerical integration implementation.
488
+ #'
489
+ #' \code{\link{fit_quantile_function}} for fitting the polynomial model.
490
+ #'
491
+ #' \code{\link{d.quantile}} for the main effect size calculation.
492
+ #'
493
+ #' @examples
494
+ #' # Generate sample data
495
+ #' set.seed(123)
496
+ #' x <- rnorm(50, mean = 100, sd = 15)
497
+ #'
498
+ #' # Fit quantile function
499
+ #' n <- length(x)
500
+ #' avg_ranks <- rank(x, ties.method = "average")
501
+ #' p <- (avg_ranks - 0.5) / n
502
+ #' z <- qnorm(p)
503
+ #' model <- lm(x ~ poly(z, 5, raw = TRUE))
504
+ #'
505
+ #' # Compute moments analytically
506
+ #' moments <- get_moments_analytical(model, group_label = "Test Group")
507
+ #'
508
+ #' cat("Mean:", moments$mean, "\n")
509
+ #' cat("Variance:", moments$variance, "\n")
510
+ #' cat("SD:", sqrt(moments$variance), "\n")
511
+ #'
512
+ #' # Compare with sample statistics
513
+ #' cat("\nSample mean:", mean(x), "\n")
514
+ #' cat("Sample variance:", var(x), "\n")
515
+ #'
516
+ #' # The smoothed estimates will be similar but not identical,
517
+ #' # with the analytical method providing regularization
518
+ #'
519
+ #'
520
+ #' @export
521
+ get_moments <- function(model, group_label = "Unknown") {
522
 
523
  # Extract coefficients and determine polynomial degree
524
  coeffs <- coef(model)
525
  k <- length(coeffs) - 1 # polynomial degree
526
 
 
527
  # Pre-compute standard normal raw moments: E[Z^j]
 
528
  # For j even: E[Z^j] = (j-1)!! = (j-1) × (j-3) × ... × 3 × 1
529
  # For j odd: E[Z^j] = 0 (due to symmetry)
530
 
 
545
  max_moment <- 2 * k
546
  moments_z <- sapply(0:max_moment, compute_moment)
547
 
 
548
  # Compute mean: μ = E[X] = E[f(Z)] = Σ β_j E[Z^j]
 
549
  # Only even-powered terms contribute due to symmetry
550
 
551
  mu <- 0
 
553
  mu <- mu + coeffs[j + 1] * moments_z[j + 1]
554
  }
555
 
 
556
  # Compute variance: σ² = E[X²] - μ²
557
  # First calculate E[X²] = E[(Σ β_i Z^i)²] = Σ_i Σ_j β_i β_j E[Z^(i+j)]
 
558
 
559
  E_X2 <- 0
560
  for (i in 0:k) {
 
568
 
569
  variance <- E_X2 - mu^2
570
 
 
571
  # Handle numerical edge cases
 
 
 
 
572
  if (variance < 0) {
573
  if (abs(variance) < 1e-10) {
574
  # Likely just numerical noise - round to zero
 
587
  }
588
  }
589
 
 
 
 
 
590
  return(list(
591
  mean = mu,
592
  variance = variance
 
594
  }
595
 
596
 
597
+
598
+
599
+ #' Print Method for d_quantile Objects
600
+ #'
601
+ #' @param x An object of class "d_quantile"
602
+ #' @param ... Additional arguments (not used)
603
+ #'
604
+ #' @author Wolfgang Lenhard and Alexandra Lenhard
605
+ #' Licensed under the MIT License
606
+ #'
607
+ #' Citation:
608
+ #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
609
+ #' A Robust Alternative to Cohen’s d.
610
+ #'
611
+ #' @export
612
+ print.d_quantile <- function(x, ...) {
613
+ cat("\nDistribution-Free Effect Size (d_q)\n")
614
+ cat("===================================\n\n")
615
+ cat("Effect size d_q:", round(x$d_q, 4), "\n")
616
+
617
+ # Display CI if available
618
+ if (!is.null(x$ci_lower)) {
619
+ cat(sprintf("%d%% CI: [%.4f, %.4f]\n",
620
+ round(x$ci_level * 100),
621
+ x$ci_lower,
622
+ x$ci_upper), "\n")
623
+ }
624
+
625
+ cat("\nGroup 1: n =", x$n1, ", mean =", round(x$group1_mean, 4),
626
+ ", SD =", round(x$group1_sd, 4), "\n")
627
+ cat("Group 2: n =", x$n2, ", mean =", round(x$group2_mean, 4),
628
+ ", SD =", round(x$group2_sd, 4), "\n")
629
+ cat("Pooled SD:", round(x$pooled_sd, 4), "\n")
630
+ cat("Polynomial degree:", x$degree, "\n")
631
+ invisible(x)
632
+ }
633
+
634
+ #' Summary Method for d_quantile Objects
635
+ #'
636
+ #' Provides detailed summary statistics and diagnostic information.
637
+ #'
638
+ #' @param object An object of class "d_quantile"
639
+ #' @param ... Additional arguments (not used)
640
+ #'
641
+ #' @author Wolfgang Lenhard and Alexandra Lenhard
642
+ #' Licensed under the MIT License
643
+ #'
644
+ #' Citation:
645
+ #' Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation:
646
+ #' A Robust Alternative to Cohen’s d.
647
+ #'
648
+ #' @export
649
+ summary.d_quantile <- function(object, ...) {
650
+
651
+ cat("\n")
652
+ cat("=======================================================\n")
653
+ cat(" Distribution-Free Effect Size Analysis (d_quantile)\n")
654
+ cat("=======================================================\n\n")
655
+
656
+ cat("Effect Size:\n")
657
+ cat(" d_q =", round(object$d_q, 4), "\n")
658
+
659
+ # Interpretation
660
+ abs_d <- abs(object$d_q)
661
+ interpretation <- if (abs_d < 0.2) {
662
+ "negligible"
663
+ } else if (abs_d < 0.5) {
664
+ "small"
665
+ } else if (abs_d < 0.8) {
666
+ "medium"
667
+ } else {
668
+ "large"
669
+ }
670
+ cat(" Interpretation:", interpretation, "\n\n")
671
+
672
+ cat("Group 1:\n")
673
+ cat(" Sample size: ", object$n1, "\n")
674
+ cat(" Mean (smoothed): ", round(object$group1_mean, 4), "\n")
675
+ cat(" SD (smoothed): ", round(object$group1_sd, 4), "\n")
676
+ cat(" Variance: ", round(object$group1_variance, 4), "\n\n")
677
+
678
+ cat("Group 2:\n")
679
+ cat(" Sample size: ", object$n2, "\n")
680
+ cat(" Mean (smoothed): ", round(object$group2_mean, 4), "\n")
681
+ cat(" SD (smoothed): ", round(object$group2_sd, 4), "\n")
682
+ cat(" Variance: ", round(object$group2_variance, 4), "\n\n")
683
+
684
+ cat("Pooled Statistics:\n")
685
+ cat(" Pooled SD: ", round(object$pooled_sd, 4), "\n")
686
+ cat(" Mean difference: ", round(object$group2_mean - object$group1_mean, 4), "\n\n")
687
+
688
+ cat("Model Details:\n")
689
+ cat(" Polynomial degree:", object$degree, "\n")
690
+ cat(" Model 1 R²: ", round(summary(object$model1)$r.squared, 4), "\n")
691
+ cat(" Model 2 R²: ", round(summary(object$model2)$r.squared, 4), "\n\n")
692
+
693
+ if(!is.null(object$ci_lower) && !is.null(object$ci_upper)) {
694
+ cat(sprintf("Confidence Interval (%.1f%%): [%.4f, %.4f]\n\n",
695
+ object$ci_level * 100,
696
+ round(object$ci_lower, 4),
697
+ round(object$ci_upper, 4)))
698
+ }
699
+ cat("=======================================================\n\n")
700
+
701
+ invisible(object)
702
+ }
703
+
704
+
705
  # API endpoint
706
  #* Calculate effect size from two groups
707
  #* @param group1 Comma-separated numeric values for group 1