WLenhard commited on
Commit
567ae09
·
verified ·
1 Parent(s): 3267b56

Upload app.R

Browse files
Files changed (1) hide show
  1. app.R +237 -48
app.R CHANGED
@@ -2,8 +2,7 @@ library(plumber)
2
 
3
  #* @apiTitle Effect Size Calculator API
4
 
5
- # Your functions here
6
- d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
7
 
8
  # Input validation
9
  if (!is.numeric(x1) || !is.numeric(x2)) {
@@ -35,9 +34,11 @@ d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
35
  stop("Cannot compute effect size with empty groups after removing NAs.")
36
  }
37
 
 
38
  model1 <- fit_quantile_function(x1, degree)
39
  model2 <- fit_quantile_function(x2, degree)
40
-
 
41
  tie1 <- attr(model1, "tie_proportion")
42
  tie2 <- attr(model2, "tie_proportion")
43
 
@@ -46,16 +47,22 @@ d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
46
  "Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).",
47
  tie1 * 100, tie2 * 100
48
  ))
 
 
49
  }
50
 
51
- moments1 <- get_moments(model1, group_label = "Group 1")
52
- moments2 <- get_moments(model2, group_label = "Group 2")
 
53
 
54
- weighted_pooled_variance <- ((n1 -1) * moments1$variance + (n2 - 1) * moments2$variance) / (n1 + n2 - 2)
 
55
  pooled_sd <- sqrt(weighted_pooled_variance)
56
 
 
57
  mean_diff <- moments2$mean - moments1$mean
58
 
 
59
  if (pooled_sd == 0) {
60
  if (mean_diff == 0) {
61
  d_q <- 0
@@ -67,6 +74,7 @@ d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
67
  d_q <- mean_diff / pooled_sd
68
  }
69
 
 
70
  result <- list(
71
  d_q = d_q,
72
  group1_mean = moments1$mean,
@@ -78,18 +86,47 @@ d.quantile <- function(x1, x2, degree = 5, silent = TRUE) {
78
  pooled_sd = pooled_sd,
79
  n1 = n1,
80
  n2 = n2,
81
- degree = degree
 
 
82
  )
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  result$tie_proportion_1 <- tie1
85
  result$tie_proportion_2 <- tie2
 
 
86
 
 
87
  return(result)
88
  }
89
 
90
  fit_quantile_function <- function(x, poly_degree,
91
  check_monotonicity = FALSE,
92
- min_degree = 2) {
 
 
 
 
93
 
94
  n <- length(x)
95
 
@@ -97,99 +134,251 @@ fit_quantile_function <- function(x, poly_degree,
97
  stop("Need at least 3 observations to fit a polynomial quantile function.")
98
  }
99
 
 
100
  n_unique <- length(unique(x))
101
  tie_proportion <- 1 - (n_unique / n)
102
 
 
 
 
 
 
103
  max_possible_degree <- n_unique - 1
104
 
105
  if (poly_degree > max_possible_degree) {
 
 
 
 
 
106
  poly_degree <- max_possible_degree
107
  }
108
 
 
109
  if (tie_proportion > 0.3 && poly_degree > 3) {
 
110
  recommended_degree <- min(poly_degree, max(3, floor(n_unique / 2)))
111
  if (recommended_degree < poly_degree) {
 
 
 
 
112
  poly_degree <- recommended_degree
113
  }
114
  }
115
 
 
116
  if (poly_degree < min_degree) {
117
  stop(sprintf(
118
- "Insufficient unique values (%d) to fit minimum polynomial degree (%d).",
119
- n_unique, min_degree
120
- ))
 
121
  }
122
 
 
 
 
 
 
 
123
  avg_ranks <- rank(x, ties.method = "average")
 
 
124
  p <- (avg_ranks - 0.5) / n
 
 
125
  z <- qnorm(p)
126
 
 
 
 
 
127
  current_degree <- poly_degree
128
- model <- lm(x ~ poly(z, current_degree, raw = TRUE))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
  attr(model, "sample_size") <- n
131
  attr(model, "n_unique") <- n_unique
132
  attr(model, "tie_proportion") <- tie_proportion
133
  attr(model, "poly_degree") <- current_degree
 
 
 
 
134
 
135
  return(model)
136
  }
137
 
138
- get_moments <- function(model, group_label = "Unknown") {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  coeffs <- coef(model)
140
- poly_degree <- length(coeffs) - 1
 
 
 
 
 
 
141
 
142
- f <- function(z) {
143
- val <- coeffs[poly_degree + 1]
144
- for (i in poly_degree:1) {
145
- val <- val * z + coeffs[i]
 
 
 
 
 
146
  }
147
- return(val)
148
  }
149
 
150
- mean_integrand <- function(z) {
151
- f(z) * dnorm(z)
152
- }
153
 
154
- mean_result <- integrate(
155
- mean_integrand,
156
- lower = -Inf,
157
- upper = Inf,
158
- subdivisions = 2000L,
159
- rel.tol = 1e-8,
160
- abs.tol = 1e-10,
161
- stop.on.error = FALSE
162
- )
163
 
164
- mu <- mean_result$value
 
 
 
165
 
166
- variance_integrand <- function(z) {
167
- deviation <- f(z) - mu
168
- deviation^2 * dnorm(z)
 
 
 
 
 
 
 
 
 
 
169
  }
170
 
171
- variance_result <- integrate(
172
- variance_integrand,
173
- lower = -Inf,
174
- upper = Inf,
175
- subdivisions = 2000L,
176
- rel.tol = 1e-8,
177
- abs.tol = 1e-10,
178
- stop.on.error = FALSE
179
- )
180
 
181
- var <- variance_result$value
 
 
 
 
182
 
183
- if (var < 0 && abs(var) < 1e-10) {
184
- var <- 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  }
186
 
 
 
 
 
187
  return(list(
188
  mean = mu,
189
- variance = var
190
  ))
191
  }
192
 
 
193
  # API endpoint
194
  #* Calculate effect size from two groups
195
  #* @param group1 Comma-separated numeric values for group 1
 
2
 
3
  #* @apiTitle Effect Size Calculator API
4
 
5
+ d.quantile <- function(x1, x2, degree = 4, CI = .95, silent = T) {
 
6
 
7
  # Input validation
8
  if (!is.numeric(x1) || !is.numeric(x2)) {
 
34
  stop("Cannot compute effect size with empty groups after removing NAs.")
35
  }
36
 
37
+ # Step 1: Fit the polynomial models for each group
38
  model1 <- fit_quantile_function(x1, degree)
39
  model2 <- fit_quantile_function(x2, degree)
40
+
41
+ # Check for ties and warn user
42
  tie1 <- attr(model1, "tie_proportion")
43
  tie2 <- attr(model2, "tie_proportion")
44
 
 
47
  "Note: Substantial ties detected (Group 1: %.1f%%, Group 2: %.1f%%).",
48
  tie1 * 100, tie2 * 100
49
  ))
50
+ message("This suggests discrete/ordinal data. Results should be interpreted cautiously.")
51
+ message("Consider comparing multiple effect size measures for discrete data.")
52
  }
53
 
54
+ # Step 2: Get the moments from each fitted model
55
+ moments1 <- get_moments_analytical(model1, group_label = "Group 1")
56
+ moments2 <- get_moments_analytical(model2, group_label = "Group 2")
57
 
58
+ # Step 3: Calculate the pooled standard deviation
59
+ weighted_pooled_variance <- (n1 * moments1$variance + n2 * moments2$variance) / (n1 + n2)
60
  pooled_sd <- sqrt(weighted_pooled_variance)
61
 
62
+ # Step 4: Compute the effect size d_q
63
  mean_diff <- moments2$mean - moments1$mean
64
 
65
+ # Handle edge cases
66
  if (pooled_sd == 0) {
67
  if (mean_diff == 0) {
68
  d_q <- 0
 
74
  d_q <- mean_diff / pooled_sd
75
  }
76
 
77
+ # Return results
78
  result <- list(
79
  d_q = d_q,
80
  group1_mean = moments1$mean,
 
86
  pooled_sd = pooled_sd,
87
  n1 = n1,
88
  n2 = n2,
89
+ degree = degree,
90
+ model1 = model1,
91
+ model2 = model2
92
  )
93
 
94
+ if(!is.na(CI)) {
95
+ if(CI <= 0 || CI >= 1) {
96
+ stop("CI must be between 0 and 1 (exclusive).")
97
+ }
98
+
99
+ # Standard error for d_q
100
+ se_dq <- sqrt((n1 + n2) / (n1 * n2) + (d_q^2) / (2 * (n1 + n2)))
101
+
102
+ df <- n1 + n2 - 2
103
+ alpha <- 1 - CI
104
+ t_crit <- qt(1 - alpha / 2, df)
105
+
106
+ ci_lower <- d_q - t_crit * se_dq
107
+ ci_upper <- d_q + t_crit * se_dq
108
+
109
+ result$ci_lower <- ci_lower
110
+ result$ci_upper <- ci_upper
111
+ result$ci_level <- CI
112
+ }
113
+
114
  result$tie_proportion_1 <- tie1
115
  result$tie_proportion_2 <- tie2
116
+ result$n_unique_1 <- attr(model1, "n_unique")
117
+ result$n_unique_2 <- attr(model2, "n_unique")
118
 
119
+ class(result) <- "d_quantile"
120
  return(result)
121
  }
122
 
123
  fit_quantile_function <- function(x, poly_degree,
124
  check_monotonicity = FALSE,
125
+ min_degree = 1) {
126
+
127
+ # ============================================================================
128
+ # Step 1: Input validation and tie detection
129
+ # ============================================================================
130
 
131
  n <- length(x)
132
 
 
134
  stop("Need at least 3 observations to fit a polynomial quantile function.")
135
  }
136
 
137
+ # Count unique values to detect ties
138
  n_unique <- length(unique(x))
139
  tie_proportion <- 1 - (n_unique / n)
140
 
141
+ # ============================================================================
142
+ # Step 2: Adjust polynomial degree based on unique values
143
+ # ============================================================================
144
+
145
+ # Can't fit more parameters than unique data points
146
  max_possible_degree <- n_unique - 1
147
 
148
  if (poly_degree > max_possible_degree) {
149
+ warning(sprintf(
150
+ "Requested polynomial degree (%d) exceeds number of unique values (%d). ",
151
+ poly_degree, n_unique,
152
+ "Reducing to degree %d."
153
+ ), max_possible_degree)
154
  poly_degree <- max_possible_degree
155
  }
156
 
157
+ # Additional reduction for substantial ties
158
  if (tie_proportion > 0.3 && poly_degree > 3) {
159
+ # With >30% ties, be more conservative
160
  recommended_degree <- min(poly_degree, max(3, floor(n_unique / 2)))
161
  if (recommended_degree < poly_degree) {
162
+ warning(sprintf(
163
+ "High proportion of ties (%.1f%%). Reducing polynomial degree from %d to %d for stability.",
164
+ tie_proportion * 100, poly_degree, recommended_degree
165
+ ))
166
  poly_degree <- recommended_degree
167
  }
168
  }
169
 
170
+ # Ensure we stay above minimum
171
  if (poly_degree < min_degree) {
172
  stop(sprintf(
173
+ "Insufficient unique values (%d) to fit minimum polynomial degree (%d). ",
174
+ n_unique, min_degree,
175
+ "Need at least %d unique observations."
176
+ ), min_degree + 1)
177
  }
178
 
179
+ # ============================================================================
180
+ # Step 3: Compute ranks and z-scores (handles ties via midrank)
181
+ # ============================================================================
182
+
183
+ # Average ranks handle ties by assigning mean rank to tied observations
184
+ # Example: values [1, 2, 2, 3] get ranks [1, 2.5, 2.5, 4]
185
  avg_ranks <- rank(x, ties.method = "average")
186
+
187
+ # Convert ranks to plotting positions
188
  p <- (avg_ranks - 0.5) / n
189
+
190
+ # Transform to standard normal quantiles
191
  z <- qnorm(p)
192
 
193
+ # ============================================================================
194
+ # Step 4: Fit polynomial, with optional monotonicity enforcement
195
+ # ============================================================================
196
+
197
  current_degree <- poly_degree
198
+ degree_reduced <- FALSE
199
+ monotonic <- NULL # Will be checked if requested
200
+
201
+ if (check_monotonicity) {
202
+ # Iteratively reduce degree until monotonic or min_degree reached
203
+ while (current_degree >= min_degree) {
204
+
205
+ # Fit model at current degree
206
+ model <- lm(x ~ poly(z, current_degree, raw = TRUE))
207
+
208
+ # Check monotonicity
209
+ monotonicity_check <- check_monotonicity(model)
210
+ monotonic <- monotonicity_check$is_monotonic
211
+
212
+ if (monotonic) {
213
+ # Success - monotonic fit achieved
214
+ break
215
+ }
216
+
217
+ # Not monotonic - try lower degree
218
+ current_degree <- current_degree - 1
219
+ degree_reduced <- TRUE
220
+ }
221
+
222
+ if (current_degree < min_degree) {
223
+ stop(sprintf(
224
+ "Could not achieve monotonic fit even with minimum degree %d. ",
225
+ min_degree,
226
+ "Data may be too irregular or have insufficient unique values."
227
+ ))
228
+ }
229
+
230
+ if (degree_reduced) {
231
+ warning(sprintf(
232
+ "Polynomial degree reduced from %d to %d to achieve monotonicity.",
233
+ poly_degree, current_degree
234
+ ))
235
+ }
236
+
237
+ } else {
238
+ # Just fit at requested degree without monotonicity check
239
+ model <- lm(x ~ poly(z, current_degree, raw = TRUE))
240
+
241
+ # Optionally check monotonicity for diagnostic purposes (don't enforce)
242
+ if (exists("check_monotonicity", mode = "function")) {
243
+ monotonicity_check <- check_monotonicity(model)
244
+ monotonic <- monotonicity_check$is_monotonic
245
+ }
246
+ }
247
+
248
+ # ============================================================================
249
+ # Step 5: Store metadata as attributes
250
+ # ============================================================================
251
 
252
  attr(model, "sample_size") <- n
253
  attr(model, "n_unique") <- n_unique
254
  attr(model, "tie_proportion") <- tie_proportion
255
  attr(model, "poly_degree") <- current_degree
256
+ attr(model, "requested_degree") <- poly_degree
257
+ attr(model, "degree_reduced") <- degree_reduced
258
+ attr(model, "monotonic") <- monotonic
259
+ attr(model, "has_ties") <- tie_proportion > 0.01 # Flag if >1% ties
260
 
261
  return(model)
262
  }
263
 
264
+ check_monotonicity <- function(model, z_range = c(-3, 3), n_points = 100) {
265
+
266
+ z_seq <- seq(z_range[1], z_range[2], length.out = n_points)
267
+
268
+ # Get predictions
269
+ pred <- predict(model, newdata = data.frame(z = z_seq))
270
+
271
+ # Calculate finite differences (approximate derivatives)
272
+ derivatives <- diff(pred) / diff(z_seq)
273
+
274
+ # Check for violations (negative derivatives)
275
+ # Use small tolerance to avoid flagging numerical noise
276
+ tolerance <- -1e-6
277
+ violations <- sum(derivatives < tolerance)
278
+
279
+ min_deriv <- min(derivatives)
280
+ is_monotonic <- violations == 0
281
+
282
+ return(list(
283
+ is_monotonic = is_monotonic,
284
+ min_derivative = min_deriv,
285
+ violations = violations,
286
+ proportion_violations = violations / length(derivatives),
287
+ z_range_checked = z_range
288
+ ))
289
+ }
290
+
291
+ get_moments_analytical <- function(model, group_label = "Unknown") {
292
+
293
+ # Extract coefficients and determine polynomial degree
294
  coeffs <- coef(model)
295
+ k <- length(coeffs) - 1 # polynomial degree
296
+
297
+ # --------------------------------------------------------------------------
298
+ # Pre-compute standard normal raw moments: E[Z^j]
299
+ # --------------------------------------------------------------------------
300
+ # For j even: E[Z^j] = (j-1)!! = (j-1) × (j-3) × ... × 3 × 1
301
+ # For j odd: E[Z^j] = 0 (due to symmetry)
302
 
303
+ compute_moment <- function(j) {
304
+ if (j == 0) return(1) # E[Z^0] = 1 (total probability)
305
+ if (j %% 2 == 1) return(0) # Odd moments vanish
306
+
307
+ # Even moments: double factorial
308
+ # E[Z^2] = 1, E[Z^4] = 3, E[Z^6] = 15, E[Z^8] = 105, ...
309
+ result <- 1
310
+ for (i in seq(j - 1, 1, by = -2)) {
311
+ result <- result * i
312
  }
313
+ return(result)
314
  }
315
 
316
+ # We need moments up to degree 2k for computing E[X^2]
317
+ max_moment <- 2 * k
318
+ moments_z <- sapply(0:max_moment, compute_moment)
319
 
320
+ # --------------------------------------------------------------------------
321
+ # Compute mean: μ = E[X] = E[f(Z)] = Σ β_j E[Z^j]
322
+ # --------------------------------------------------------------------------
323
+ # Only even-powered terms contribute due to symmetry
 
 
 
 
 
324
 
325
+ mu <- 0
326
+ for (j in 0:k) {
327
+ mu <- mu + coeffs[j + 1] * moments_z[j + 1]
328
+ }
329
 
330
+ # --------------------------------------------------------------------------
331
+ # Compute variance: σ² = E[X²] - μ²
332
+ # First calculate E[X²] = E[(Σ β_i Z^i)²] = Σ_i Σ_j β_i β_j E[Z^(i+j)]
333
+ # --------------------------------------------------------------------------
334
+
335
+ E_X2 <- 0
336
+ for (i in 0:k) {
337
+ for (j in 0:k) {
338
+ power <- i + j
339
+ if (power <= max_moment) {
340
+ E_X2 <- E_X2 + coeffs[i + 1] * coeffs[j + 1] * moments_z[power + 1]
341
+ }
342
+ }
343
  }
344
 
345
+ variance <- E_X2 - mu^2
 
 
 
 
 
 
 
 
346
 
347
+ # --------------------------------------------------------------------------
348
+ # Handle numerical edge cases
349
+ # --------------------------------------------------------------------------
350
+ # Variance should always be non-negative, but numerical precision limits
351
+ # can occasionally produce tiny negative values
352
 
353
+ if (variance < 0) {
354
+ if (abs(variance) < 1e-10) {
355
+ # Likely just numerical noise - round to zero
356
+ variance <- 0
357
+ } else {
358
+ # Substantial negative variance indicates a real problem
359
+ warning(
360
+ "Variance for ", group_label, " is negative (",
361
+ format(variance, scientific = TRUE, digits = 3),
362
+ "). This indicates numerical instability in the polynomial fit. ",
363
+ "Consider reducing the polynomial degree or checking for data issues.",
364
+ call. = FALSE
365
+ )
366
+ # Set to zero to avoid downstream errors, but flag it
367
+ variance <- 0
368
+ }
369
  }
370
 
371
+ # --------------------------------------------------------------------------
372
+ # Return results
373
+ # --------------------------------------------------------------------------
374
+
375
  return(list(
376
  mean = mu,
377
+ variance = variance
378
  ))
379
  }
380
 
381
+
382
  # API endpoint
383
  #* Calculate effect size from two groups
384
  #* @param group1 Comma-separated numeric values for group 1