File size: 28,017 Bytes
d711953
 
 
 
08854f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d711953
 
 
 
 
08854f9
 
 
 
 
 
 
 
 
d711953
 
 
08854f9
 
 
 
 
 
d711953
 
 
 
 
 
 
 
 
 
08854f9
 
 
 
 
 
 
 
d711953
567ae09
08854f9
 
 
 
d711953
 
08854f9
 
d711953
08854f9
d711953
 
 
 
 
567ae09
 
d711953
08854f9
adb521e
 
08854f9
 
567ae09
d711953
 
 
567ae09
d711953
 
08854f9
d711953
08854f9
 
 
 
d711953
 
08854f9
d711953
08854f9
d711953
08854f9
 
 
d711953
 
 
08854f9
 
d711953
 
 
08854f9
 
d711953
08854f9
 
d711953
 
08854f9
 
567ae09
adb521e
08854f9
 
 
 
 
 
 
d711953
08854f9
 
567ae09
08854f9
 
 
 
567ae09
08854f9
567ae09
08854f9
 
567ae09
 
 
08854f9
 
 
567ae09
08854f9
567ae09
 
 
08854f9
 
567ae09
08854f9
 
d711953
 
 
adb521e
08854f9
adb521e
 
08854f9
 
adb521e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08854f9
 
567ae09
 
 
d711953
 
 
08854f9
d711953
 
567ae09
d711953
 
 
567ae09
 
d711953
 
 
567ae09
 
 
 
 
d711953
 
 
567ae09
d711953
567ae09
d711953
 
567ae09
 
 
 
d711953
 
 
 
567ae09
d711953
 
567ae09
 
 
 
d711953
 
567ae09
 
d711953
567ae09
 
d711953
567ae09
 
d711953
 
567ae09
adb521e
567ae09
d711953
567ae09
adb521e
567ae09
 
 
 
 
08854f9
567ae09
adb521e
 
567ae09
 
 
adb521e
567ae09
 
 
 
adb521e
567ae09
adb521e
 
 
08854f9
adb521e
567ae09
 
 
 
adb521e
 
567ae09
 
adb521e
d711953
08854f9
 
d711953
567ae09
adb521e
08854f9
d711953
 
 
adb521e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567ae09
adb521e
 
567ae09
adb521e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567ae09
adb521e
 
567ae09
adb521e
 
567ae09
adb521e
 
 
 
567ae09
 
adb521e
 
 
567ae09
 
 
adb521e
 
08854f9
adb521e
 
08854f9
adb521e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08854f9
 
adb521e
 
 
 
 
 
 
 
 
 
 
 
 
08854f9
 
adb521e
 
 
 
 
 
 
 
 
 
 
 
 
567ae09
 
d711953
567ae09
d711953
567ae09
 
 
 
 
 
 
 
 
 
 
 
 
d711953
567ae09
d711953
 
567ae09
 
 
d711953
567ae09
 
d711953
567ae09
 
 
 
d711953
567ae09
 
 
 
 
 
 
 
 
 
 
d711953
 
567ae09
d711953
567ae09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d711953
 
 
 
567ae09
d711953
 
 
567ae09
adb521e
 
08854f9
adb521e
08854f9
adb521e
 
 
 
 
 
 
 
 
 
08854f9
 
adb521e
08854f9
adb521e
 
 
 
 
 
 
 
 
 
 
 
 
 
08854f9
adb521e
 
 
08854f9
adb521e
 
 
08854f9
adb521e
 
 
 
 
 
 
 
 
08854f9
adb521e
 
 
08854f9
adb521e
 
 
08854f9
adb521e
 
08854f9
adb521e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08854f9
adb521e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08854f9
d711953
 
 
 
eacfed9
d711953
 
73cbd6b
d711953
 
ef6747a
 
a38c6dc
ef6747a
 
 
 
 
 
d711953
ef6747a
 
eacfed9
73cbd6b
d711953
 
 
 
 
 
 
eacfed9
d711953
 
 
 
eacfed9
8836743
d711953
 
 
 
5f09da6
eacfed9
 
 
d711953
 
 
218b69c
c7a9cec
 
d711953
 
 
 
218b69c
c7a9cec
 
d711953
 
c7a9cec
d711953
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
library(plumber)

#* @apiTitle Effect Size Calculator API



#' Calculate a Distribution-Free Effect Size (d_reg)
#'
#' This function computes a distribution-free effect size by modeling the 
#' empirical distribution function (eCDF) of two groups via polynomial 
#' regression. The effect size is computed as the standardized
#' difference between the means of the smoothed distributions.
#'
#' The method involves:
#' 1. Fitting polynomials to each group's quantile function: x = f(z)
#' 2. Computing moments (mean, variance) of the polynomials
#' 3. Calculating d(reg) using the pooled standard deviation
#'
#' @param x1 A numeric vector of data for the first group.
#' @param x2 A numeric vector of data for the second group.
#' @param degree The degree of the polynomial to fit (default = 5).
#'        Higher degrees capture more complex distributional shapes but
#'        may overfit with small samples.
#' @param CI Confidence level for confidence interval (default = NA, no CI computed).
#'        If specified (e.g., 0.95), uses asymptotic normal approximation.
#'        WARNING: CI formula assumes Cohen's d distribution and may not be accurate for d_reg.
#' @param silent Logical; if TRUE, suppresses warnings during fitting (default = TRUE).
#'
#' @return A list (S3 class "d_reg") containing:
#'   \item{d_reg}{The distribution-free effect size (standardized mean difference).}
#'   \item{group1_mean}{Mean of the smoothed distribution for group 1.}
#'   \item{group1_variance}{Variance of the smoothed distribution for group 1.}
#'   \item{group1_sd}{Standard deviation of the smoothed distribution for group 1.}
#'   \item{group2_mean}{Mean of the smoothed distribution for group 2.}
#'   \item{group2_variance}{Variance of the smoothed distribution for group 2.}
#'   \item{group2_sd}{Standard deviation of the smoothed distribution for group 2.}
#'   \item{pooled_sd}{Pooled standard deviation.}
#'   \item{n1}{Sample size of group 1.}
#'   \item{n2}{Sample size of group 2.}
#'   \item{model1}{Fitted polynomial model for group 1.}
#'   \item{model2}{Fitted polynomial model for group 2.}
#'   \item{default_degree}{Polynomial degree used.}
#'   \item{tie_proportion_1}{Proportion of tied values in group 1.}
#'   \item{tie_proportion_2}{Proportion of tied values in group 2.}
#'   \item{n_unique_1}{Number of unique values in group 1.}
#'   \item{n_unique_2}{Number of unique values in group 2.}
#'   \item{ci_lower}{Lower bound of confidence interval (if CI specified).}
#'   \item{ci_upper}{Upper bound of confidence interval (if CI specified).}
#'   \item{ci_level}{Confidence level (if CI specified).}
#'
#' @details
#' The method is distribution-free and converges to Cohen's d under normality with 
#' increasing group size. It is robust to outliers and skewness compared to 
#' classical parametric methods.
#' 
#' Sample size requirements: At least (degree + 1) observations per group.
#' Recommended: n > 10 per group for stable polynomial fits.
#' For small samples (n < 20), consider using degree = 3 or lower.
#'
#' Confidence intervals use an asymptotic approximation based on Cohen's d 
#' distribution and may not accurately reflect the true sampling distribution 
#' of d_reg, especially in small samples or non-normal data.
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' @references
#'   Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: 
#'   A Robust Alternative to Cohen's d.
#'
#' @examples

#' # Normal distributions

#' set.seed(123)

#' x1 <- rnorm(30, mean = 0, sd = 1)

#' x2 <- rnorm(30, mean = 0.5, sd = 1)

#' result <- d.reg(x1, x2)

#' print(result)

#' 

#' # With confidence interval

#' result_ci <- d.reg(x1, x2, CI = 0.95)

#' print(result_ci)

#' 

#' # Skewed distributions

#' x1 <- rexp(50, rate = 1)

#' x2 <- rexp(50, rate = 0.8)

#' result <- d.reg(x1, x2, degree = 4)

#' print(result)

#'

#' @export
d.reg <- function(x1, x2, degree = 5, CI = NA, silent = TRUE) {
  
  # ============================================================================
  # Input Validation
  # ============================================================================
  
  if (!is.numeric(x1) || !is.numeric(x2)) {
    stop("Both x1 and x2 must be numeric vectors.")
  }
  
  # Handle missing values
  if (any(is.na(x1)) || any(is.na(x2))) {
    if (!silent) {
      warning("Missing values detected and will be removed.")
    }
    x1 <- x1[!is.na(x1)]
    x2 <- x2[!is.na(x2)]
  }
  
  n1 <- length(x1)
  n2 <- length(x2)
  
  # Check for empty groups
  if (n1 == 0 || n2 == 0) {
    stop("Cannot compute effect size with empty groups after removing NAs.")
  }
  
  # Check sufficient sample size for polynomial degree
  if (n1 < degree + 1) {
    stop("Group 1 has insufficient data: need at least ", degree + 1, 
         " observations for degree ", degree, " polynomial (got ", n1, ").")
  }
  
  if (n2 < degree + 1) {
    stop("Group 2 has insufficient data: need at least ", degree + 1, 
         " observations for degree ", degree, " polynomial (got ", n2, ").")
  }
  
  # Validate CI parameter if provided
  if (!is.na(CI)) {
    if (!is.numeric(CI) || length(CI) != 1) {
      stop("CI must be a single numeric value or NA.")
    }
    if (CI <= 0 || CI >= 1) {
      stop("CI must be between 0 and 1 (exclusive).")
    }
  }

  model1 <- fit_polynomial(x1, degree)
  model2 <- fit_polynomial(x2, degree)
  
  # Extract tie information
  tie1 <- attr(model1, "tie_proportion")
  tie2 <- attr(model2, "tie_proportion")
  n_unique1 <- attr(model1, "n_unique")
  n_unique2 <- attr(model2, "n_unique")
  
  # Warn about substantial ties
  if (!silent && (tie1 > 0.3 || tie2 > 0.3)) {
    message(sprintf(
      "Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).",
      tie1 * 100, tie2 * 100
    ))
    message("This suggests discrete/ordinal data. Results should be interpreted cautiously.")
    message("Consider comparing multiple effect size measures for discrete data.")
  }

  moments1 <- get_moments(model1, group_label = "Group 1")
  moments2 <- get_moments(model2, group_label = "Group 2")

  # Weighted pooled variance (population formula, not sample formula)
  weighted_pooled_variance <- (n1 * moments1$variance + n2 * moments2$variance) / (n1 + n2)
  pooled_sd <- sqrt(weighted_pooled_variance)
  mean_diff <- moments2$mean - moments1$mean
  
  # Handle edge cases
  if (pooled_sd == 0) {
    if (mean_diff == 0) {
      d_reg <- 0
    } else {
      d_reg <- sign(mean_diff) * Inf
      if (!silent) {
        warning("Pooled SD is zero but means differ. Returning Inf with appropriate sign.")
      }
    }
  } else {
    d_reg <- mean_diff / pooled_sd
  }

  result <- list(
    d_reg = d_reg,
    
    # Group 1 statistics
    group1_mean = moments1$mean,
    group1_variance = moments1$variance,
    group1_sd = sqrt(moments1$variance),
    
    # Group 2 statistics
    group2_mean = moments2$mean,
    group2_variance = moments2$variance,
    group2_sd = sqrt(moments2$variance),
    
    # Pooled statistics
    pooled_sd = pooled_sd,
    
    # Sample sizes
    n1 = n1,
    n2 = n2,
    
    # Models
    model1 = model1,
    model2 = model2,
    
    # Metadata
    default_degree = degree,
    tie_proportion_1 = tie1,
    tie_proportion_2 = tie2,
    n_unique_1 = n_unique1,
    n_unique_2 = n_unique2
  )

  if (!is.na(CI)) {
    
    # Standard error using asymptotic approximation
    # NOTE: This formula assumes Cohen's d distribution and may not be 
    # accurate for d_reg, especially in small samples or non-normal data
    se_dreg <- sqrt((n1 + n2) / (n1 * n2) + (d_reg^2) / (2 * (n1 + n2)))
    
    # Degrees of freedom
    df <- n1 + n2 - 2
    
    # Critical value from t-distribution
    alpha <- 1 - CI
    t_crit <- qt(1 - alpha / 2, df)
    
    # Confidence interval bounds
    ci_lower <- d_reg - t_crit * se_dreg
    ci_upper <- d_reg + t_crit * se_dreg
    
    # Add to result
    result$ci_lower <- ci_lower
    result$ci_upper <- ci_upper
    result$ci_level <- CI
    result$ci_se <- se_dreg
    result$ci_df <- df
  }

  class(result) <- "d_reg"
  return(result)
}


#' Fit a Polynomial to eCDF
#'
#' This helper function fits a polynomial regression model to represent the
#' distribution. It models the relationship between
#' z-scores and observed raw scores.
#'
#' @param x A numeric vector of observations.
#' @param poly_degree The degree of the polynomial to fit.
#' @param check_monotonicity Logical; should monotonicity be enforced by 
#'   reducing polynomial degree if needed? (default = FALSE for speed)
#' @param min_degree Minimum polynomial degree to try (default = 1, representing
#'   a linear fit to a normal distribution).
#'
#' @return An lm model object representing x = f(z), where z ~ N(0,1).
#'   Additional attributes:
#'   \describe{
#'     \item{sample_size}{Original sample size}
#'     \item{n_unique}{Number of unique values (for tie detection)}
#'     \item{tie_proportion}{Proportion of tied observations}
#'     \item{poly_degree}{Actual polynomial degree used (may be reduced)}
#'     \item{monotonic}{Logical; is the fitted function monotonic?}
#'     \item{degree_reduced}{Logical; was degree reduced from requested?}
#'   }
#'
#' @details
#' The function uses average ranks (midrank method) to handle tied observations,
#' which is the standard approach in rank-based statistics. Plotting positions 
#' (rank - 0.5)/n avoid infinite z-scores at boundaries.
#'
#' When substantial ties are present (>10% of observations), the function may
#' automatically reduce the polynomial degree to avoid overfitting to a small
#' number of unique values.
#'
#' If check_monotonicity=TRUE, the function iteratively reduces the polynomial
#' degree until a monotonic fit is achieved or min_degree is reached.
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#'   Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: 
#'   A Robust Alternative to Cohen’s d.
#'
#' @export
fit_polynomial <- function(x, poly_degree, 
                                  check_monotonicity = TRUE,
                                  min_degree = 1) {
  
  # Step 1: Input validation and tie detection
  n <- length(x)
  
  if (n < 3) {
    stop("Need at least 3 observations to fit a polynomial.")
  }
  
  # Count unique values to detect ties
  n_unique <- length(unique(x))
  tie_proportion <- 1 - (n_unique / n)
  
  # Step 2: Adjust polynomial degree based on unique values
  # Can't fit more parameters than unique data points
  max_possible_degree <- n_unique - 1
  
  if (poly_degree > max_possible_degree) {
    warning(sprintf(
      "Requested polynomial degree (%d) exceeds number of unique values (%d). ",
      poly_degree, n_unique,
      "Reducing to degree %d."
    ), max_possible_degree)
    poly_degree <- max_possible_degree
  }
  
  # Additional reduction for substantial ties
  if (tie_proportion > 0.3 && poly_degree > 3) {
    # With >30% ties, be more conservative
    recommended_degree <- min(poly_degree, max(3, floor(n_unique / 2)))
    if (recommended_degree < poly_degree) {
      warning(sprintf(
        "High proportion of ties (%.1f%%). Reducing polynomial degree from %d to %d for stability.",
        tie_proportion * 100, poly_degree, recommended_degree
      ))
      poly_degree <- recommended_degree
    }
  }
  
  # Ensure we stay above minimum
  if (poly_degree < min_degree) {
    stop(sprintf(
      "Insufficient unique values (%d) to fit minimum polynomial degree (%d). ",
      n_unique, min_degree,
      "Need at least %d unique observations."
    ), min_degree + 1)
  }
  
  # Step 3: Compute ranks and z-scores (handles ties via midrank)
  # Average ranks handle ties by assigning mean rank to tied observations
  avg_ranks <- rank(x, ties.method = "average")
  
  # Convert ranks to plotting positions
  p <- (avg_ranks - 0.5) / n
  
  # Transform to standard normal quantiles
  z <- qnorm(p)
  
  # Step 4: Fit polynomial, with optional monotonicity enforcement
  check_range <- range(z)
  
  current_degree <- poly_degree
  degree_reduced <- FALSE
  monotonic <- FALSE
  
  if (check_monotonicity) {
    while (current_degree >= min_degree) {
      
      model <- lm(x ~ poly(z, current_degree, raw = TRUE))
      check <- check_monotonicity(model, z_range = check_range)
      
      if (check$is_monotonic) {
        monotonic <- TRUE
        break
      }
      
      # Reduce degree and try again
      current_degree <- current_degree - 1
      degree_reduced <- TRUE
    }
    
    # Emergency fallback
    if (current_degree < min_degree) {
      current_degree <- min_degree
      # Fit linear even if non-monotonic (rare/impossible for degree 1 unless negative correlation)
      model <- lm(x ~ poly(z, current_degree, raw = TRUE))
      check <- check_monotonicity(model, z_range = check_range)
      monotonic <- check$is_monotonic
    }
    
  } else {
    model <- lm(x ~ poly(z, current_degree, raw = TRUE))
    check <- check_monotonicity(model, z_range = check_range)
    monotonic <- check$is_monotonic
  }
  
  # Metadata
  attr(model, "sample_size") <- n
  attr(model, "n_unique") <- n_unique              # ADD THIS
  attr(model, "tie_proportion") <- tie_proportion  # ADD THIS
  attr(model, "poly_degree") <- current_degree
  attr(model, "monotonic") <- monotonic
  attr(model, "min_derivative") <- check$min_derivative

  return(model)
}



#' Check Monotonicity of Fitted Quantile Function
#'
#' Analytically checks if a polynomial quantile function is monotonic 
#' within the observed range of the data.
#'
#' @param model An lm model object fitted with poly(..., raw=TRUE).
#' @param z_range A numeric vector of length 2 defining the range [min, max]
#'   over which to check monotonicity. If NULL, checks reasonable defaults 
#'   based on N (-4 to 4).
#' @param strictly_positive Logical; if TRUE, derivative must be > 0 (strict).
#'   If FALSE, derivative can be >= 0 (allows flat regions).
#'
#' @return A list containing:
#'   \item{is_monotonic}{Logical; TRUE if monotonic in range.}
#'   \item{min_derivative}{The lowest slope found in the range.}
#'   \item{location_min}{The z-value where the minimum slope occurs.}
#'
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#'   Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: 
#'   A Robust Alternative to Cohen’s d.
#'   
#' @export
check_monotonicity <- function(model, z_range = c(-4, 4), 
                                        strictly_positive = FALSE) {
  
  # Input handling
  if (!inherits(model, "lm")) stop("model must be an lm object")
  if (length(z_range) != 2 || z_range[1] >= z_range[2]) {
    stop("z_range must be a vector [min, max] with min < max")
  }
  
  # Extract polynomial coefficients
  coeffs <- coef(model)
  
  # Handle NAs (rare cases where regression had failed)
  if (any(is.na(coeffs))) return(list(
    is_monotonic = FALSE, 
    min_derivative = -Inf,
    location_min = NA
  ))
  
  degree <- length(coeffs) - 1
  
  # Special cases for low-degree polynomials
  if (degree == 0) {
    # CASE: Constant (Degree 0)
    return(list(
      is_monotonic = TRUE,
      min_derivative = 0,
      location_min = 0
    ))
  } else if (degree == 1) {
    # CASE: Linear (Degree 1)
    # f(z) = b0 + b1*z -> f'(z) = b1
    slope <- coeffs[2]
    return(list(
      is_monotonic = if(strictly_positive) slope > 0 else slope >= 0,
      min_derivative = slope,
      location_min = 0
    ))
  }
  
  # 1. Calculate coefficients of First Derivative f'(z)
  # f(z)  = c0 + c1*z + c2*z^2 + c3*z^3 ...
  # f'(z) = c1 + 2*c2*z + 3*c3*z^2 ...
  deriv1_coeffs <- numeric(degree) # Degree drops by 1
  for (i in 1:degree) {
    # Coeff index i+1 corresponds to power z^i in original model
    deriv1_coeffs[i] <- coeffs[i + 1] * i 
  }
  
  # Define function to evaluate slope at specific z values
  eval_deriv <- function(z, coefs) {
    # Horner's method
    val <- coefs[length(coefs)]
    if (length(coefs) > 1) {
      for (i in (length(coefs)-1):1) {
        val <- val * z + coefs[i]
      }
    }
    return(val)
  }
  
  # 2. Find Critical Points of the Derivative
  # To find where slope is minimized, we look for roots of f''(z)
  
  # Coefficients of f''(z)
  degree_d1 <- degree - 1
  if (degree_d1 >= 1) {
    deriv2_coeffs <- numeric(degree_d1)
    for (i in 1:degree_d1) {
      deriv2_coeffs[i] <- deriv1_coeffs[i + 1] * i
    }
    
    # Find roots (complex) of f''(z)
    roots_complex <- polyroot(deriv2_coeffs)
    
    # Filter for real roots within the z_range
    # Keep if imaginary part is negligible
    real_indices <- abs(Im(roots_complex)) < 1e-9
    roots_real <- Re(roots_complex)[real_indices]
    
    # Filter roots strictly inside our check range
    critical_z <- roots_real[roots_real >= z_range[1] & roots_real <= z_range[2]]
    
  } else {
    # If derivative is constant (should be handled by degree=1 check above)
    critical_z <- numeric(0)
  }
  
  # 3. Check Constraints (Boundaries + Critical Points)
  # The minimum slope MUST occur either at endpoints or at a local extremum
  
  check_points <- unique(c(z_range[1], z_range[2], critical_z))
  slopes <- sapply(check_points, eval_deriv, coefs = deriv1_coeffs)
  
  min_slope <- min(slopes)
  loc_min <- check_points[which.min(slopes)]
  
  threshold <- if(strictly_positive) 1e-9 else -1e-9
  
  return(list(
    is_monotonic = min_slope >= threshold,
    min_derivative = min_slope,
    location_min = loc_min
  ))
}



#' Calculate Moments from a Fitted Polynomial Function
#'
#' This function computes the mean and variance of the distribution represented 
#' by a polynomial function using Iserlis (1918) theorem.
#'
#' @param model An lm model object from \code{\link{fit_quantile_function}()}.
#'   The model should represent the relationship x = f(z) where z are standard 
#'   normal quantiles and x are the observed values.
#' @param group_label Optional character string label for warning messages 
#'   (default = "Unknown"). Used to identify which group produced warnings in 
#'   multi-group comparisons.
#'
#' @return A list with elements:
#'   \item{mean}{The expected value E[X] where X = f(Z), Z ~ N(0,1).}
#'   \item{variance}{The variance Var(X) = E[X²] - (E[X])².}
#'
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#'   Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: 
#'   A Robust Alternative to Cohen’s d.
#'
#' @seealso 
#' \code{\link{fit_polynomial}} for fitting the polynomial model.
#' \code{\link{d.reg}} for the main effect size calculation.
#' 
#' @examples

#' # Generate sample data

#' set.seed(123)

#' x <- rnorm(50, mean = 100, sd = 15)

#' 

#' # Fit quantile function

#' n <- length(x)

#' avg_ranks <- rank(x, ties.method = "average")

#' p <- (avg_ranks - 0.5) / n

#' z <- qnorm(p)

#' model <- lm(x ~ poly(z, 5, raw = TRUE))

#' 

#' # Compute moments 

#' moments <- get_moments(model, group_label = "Test Group")

#' 

#' cat("Mean:", moments$mean, "\n")

#' cat("Variance:", moments$variance, "\n")

#' cat("SD:", sqrt(moments$variance), "\n")

#' 

#' # Compare with sample statistics

#' cat("\nSample mean:", mean(x), "\n")

#' cat("Sample variance:", var(x), "\n")

#' 

#' 

#'

#' @export
get_moments <- function(model, group_label = "Unknown") {
  
  # Extract coefficients and determine polynomial degree
  coeffs <- coef(model)
  k <- length(coeffs) - 1  # polynomial degree
  
  # Pre-compute standard normal raw moments: E[Z^j]
  # For j even: E[Z^j] = (j-1)!! = (j-1) × (j-3) × ... × 3 × 1
  # For j odd:  E[Z^j] = 0 (due to symmetry)
  
  compute_moment <- function(j) {
    if (j == 0) return(1)           # E[Z^0] = 1 (total probability)
    if (j %% 2 == 1) return(0)      # Odd moments vanish
    
    # Even moments: double factorial
    # E[Z^2] = 1, E[Z^4] = 3, E[Z^6] = 15, E[Z^8] = 105, ...
    result <- 1
    for (i in seq(j - 1, 1, by = -2)) {
      result <- result * i
    }
    return(result)
  }
  
  # We need moments up to degree 2k for computing E[X^2]
  max_moment <- 2 * k
  moments_z <- sapply(0:max_moment, compute_moment)
  
  # Compute mean: μ = E[X] = E[f(Z)] = Σ β_j E[Z^j]
  # Only even-powered terms contribute due to symmetry
  
  mu <- 0
  for (j in 0:k) {
    mu <- mu + coeffs[j + 1] * moments_z[j + 1]
  }
  
  # Compute variance: σ² = E[X²] - μ²
  # First calculate E[X²] = E[(Σ β_i Z^i)²] = Σ_i Σ_j β_i β_j E[Z^(i+j)]
  
  E_X2 <- 0
  for (i in 0:k) {
    for (j in 0:k) {
      power <- i + j
      if (power <= max_moment) {
        E_X2 <- E_X2 + coeffs[i + 1] * coeffs[j + 1] * moments_z[power + 1]
      }
    }
  }
  
  variance <- E_X2 - mu^2
  
  # Handle numerical edge cases
  if (variance < 0) {
    if (abs(variance) < 1e-10) {
      # Likely just numerical noise - round to zero
      variance <- 0
    } else {
      # Substantial negative variance indicates a real problem
      warning(
        "Variance for ", group_label, " is negative (", 
        format(variance, scientific = TRUE, digits = 3), 
        "). This indicates numerical instability in the polynomial fit. ",
        "Consider reducing the polynomial degree or checking for data issues.",
        call. = FALSE
      )
      # Set to zero to avoid downstream errors, but flag it
      variance <- 0
    }
  }
  
  return(list(
    mean = mu, 
    variance = variance
  ))
}




#' Print Method for d_reg Objects
#'
#' @param x An object of class "d_reg"
#' @param ... Additional arguments (not used)
#' 
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#'   Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: 
#'   A Robust Alternative to Cohen’s d.
#'   
#' @export
print.d_reg <- function(x, ...) {
  cat("\nDistribution-Free Effect Size (d_reg)\n")
  cat("===================================\n\n")
  cat("Effect size d_reg:", round(x$d_reg, 4), "\n")
  
  # Display CI if available
  if (!is.null(x$ci_lower)) {
    cat(sprintf("%d%% CI: [%.4f, %.4f]\n", 
                round(x$ci_level * 100), 
                x$ci_lower, 
                x$ci_upper), "\n")
  }
  
  cat("\nGroup 1: n =", x$n1, ", mean =", round(x$group1_mean, 4), 
      ", SD =", round(x$group1_sd, 4), "\n")
  cat("Group 2: n =", x$n2, ", mean =", round(x$group2_mean, 4), 
      ", SD =", round(x$group2_sd, 4), "\n")
  cat("Pooled SD:", round(x$pooled_sd, 4), "\n")
  cat("Polynomial degree:", x$default_degree, "\n")
  invisible(x)
}

#' Summary Method for d_reg Objects
#'
#' Provides detailed summary statistics and diagnostic information.
#'
#' @param object An object of class "d_reg"
#'
#' @author Wolfgang Lenhard and Alexandra Lenhard
#' Licensed under the MIT License
#'
#' Citation:
#'   Lenhard, W. & Lenhard, A. (submitted). Distribution-Free Effect Size Estimation: 
#'   A Robust Alternative to Cohen’s d.
#'   
#' @export
summary.d_reg <- function(object, ...) {
  
  cat("\n")
  cat("=======================================================\n")
  cat("  Distribution-Free Effect Size Analysis (d_reg)\n")
  cat("=======================================================\n\n")
  
  cat("Effect Size:\n")
  cat("  d_reg =", round(object$d_reg, 4), "\n")
  
  # Interpretation
  abs_d <- abs(object$d_reg)
  interpretation <- if (abs_d < 0.2) {
    "negligible"
  } else if (abs_d < 0.5) {
    "small"
  } else if (abs_d < 0.8) {
    "medium"
  } else {
    "large"
  }
  cat("  Interpretation:", interpretation, "\n\n")
  
  cat("Group 1:\n")
  cat("  Sample size:      ", object$n1, "\n")
  cat("  Mean (smoothed):  ", round(object$group1_mean, 4), "\n")
  cat("  SD (smoothed):    ", round(object$group1_sd, 4), "\n")
  cat("  Variance:         ", round(object$group1_variance, 4), "\n\n")
  
  cat("Group 2:\n")
  cat("  Sample size:      ", object$n2, "\n")
  cat("  Mean (smoothed):  ", round(object$group2_mean, 4), "\n")
  cat("  SD (smoothed):    ", round(object$group2_sd, 4), "\n")
  cat("  Variance:         ", round(object$group2_variance, 4), "\n\n")
  
  cat("Pooled Statistics:\n")
  cat("  Pooled SD:        ", round(object$pooled_sd, 4), "\n")
  cat("  Mean difference:  ", round(object$group2_mean - object$group1_mean, 4), "\n\n")
  
  cat("Model Details:\n")
  cat("  Polynomial degree:", object$default_degree, "\n")
  cat("  Model 1 R²:       ", round(summary(object$model1)$r.squared, 4), "\n")
  cat("  Model 2 R²:       ", round(summary(object$model2)$r.squared, 4), "\n\n")
  
  if(!is.null(object$ci_lower) && !is.null(object$ci_upper)) {
    cat(sprintf("Confidence Interval (%.1f%%): [%.4f, %.4f]\n\n", 
                object$ci_level * 100, 
                round(object$ci_lower, 4), 
                round(object$ci_upper, 4)))
  }
  cat("=======================================================\n\n")
  
  invisible(object)
}



# API endpoint
#* Calculate effect size from two groups
#* @param group1 Comma-separated numeric values for group 1
#* @param group2 Comma-separated numeric values for group 2
#* @param ci Confidence interval level (default 0.95)
#* @post /calculate
#* @get /calculate
function(group1 = NULL, group2 = NULL, degree = 4, ci = 0.95) {
  
  tryCatch({
    
    # Check if parameters are missing
    if (is.null(group1) || is.null(group2) || group1 == "" || group2 == "") {
      return(list(
        success = FALSE,
        error = "Missing required parameters: group1 and group2"
      ))
    }
    
    # Parse input
    x1 <- as.numeric(unlist(strsplit(as.character(group1), ",")))
    x2 <- as.numeric(unlist(strsplit(as.character(group2), ",")))
    ci_level <- as.numeric(ci)
	degree <- as.numeric(degree)
    
    # Remove any NA values from parsing
    x1 <- x1[!is.na(x1)]
    x2 <- x2[!is.na(x2)]
    
    if (length(x1) == 0 || length(x2) == 0) {
      return(list(
        success = FALSE,
        error = "Invalid input: Could not parse numeric values from input strings"
      ))
    }
    
    # Calculate effect size with CI
    result <- d.reg(x1, x2, degree, CI = ci_level, silent = TRUE)
    
    # Return clean result
    return(list(
      success = TRUE,
      d_reg = result$d_reg,
      ci_lower = result$ci_lower,
      ci_upper = result$ci_upper,
      ci_level = result$ci_level,
      group1 = list(
        n = result$n1,
        mean = result$group1_mean,
        sd = result$group1_sd,
		mean_classic = mean(x1),
		sd_classic = sd(x1)
      ),
      group2 = list(
        n = result$n2,
        mean = result$group2_mean,
        sd = result$group2_sd,
		mean_classic = mean(x2),
		sd_classic = sd(x2)
      ),
      pooled_sd = result$pooled_sd,
      degree = result$default_degree
    ))
    
  }, error = function(e) {
    return(list(
      success = FALSE,
      error = as.character(e$message)
    ))
  })
}

#* Health check endpoint
#* @get /
function() {
  return(list(
    status = "running",
    message = "Effect Size Calculator API is ready",
    endpoints = list(
      calculate = "/calculate?group1=1,2,3&group2=4,5,6"
    )
  ))
}