JG1310 commited on
Commit
60af967
·
verified ·
1 Parent(s): 519b2a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +435 -6
app.py CHANGED
@@ -1,10 +1,439 @@
1
  import gradio as gr
 
 
 
 
2
 
3
- demo = gr.Interface(fn=greet,
4
- inputs="text",
5
- outputs="text"
6
- )
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  if __name__ == "__main__":
10
- demo.launch()
 
1
  import gradio as gr
2
+ import numpy as np
3
+ from scipy import stats
4
+ from typing import List, Dict, Any, Union, Tuple
5
+ import json
6
 
7
+ def independent_t_test(group1: str, group2: str, equal_var: bool = True, alternative: str = "two-sided") -> Dict[str, Any]:
8
+ """
9
+ Perform an independent samples t-test between two groups.
10
+
11
+ Args:
12
+ group1 (str): Comma-separated values for group 1 (e.g., "1.2,2.3,3.4,2.1")
13
+ group2 (str): Comma-separated values for group 2 (e.g., "2.1,3.2,4.1,3.5")
14
+ equal_var (bool): If True, perform standard t-test assuming equal variances. If False, perform Welch's t-test
15
+ alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
16
+
17
+ Returns:
18
+ dict: Test results including t-statistic, p-value, degrees of freedom, and interpretation
19
+ """
20
+ try:
21
+ # Parse input data
22
+ data1 = [float(x.strip()) for x in group1.split(',') if x.strip()]
23
+ data2 = [float(x.strip()) for x in group2.split(',') if x.strip()]
24
+
25
+ if len(data1) < 2 or len(data2) < 2:
26
+ return {"error": "Each group must have at least 2 observations"}
27
+
28
+ # Perform t-test
29
+ t_stat, p_value = stats.ttest_ind(data1, data2, equal_var=equal_var, alternative=alternative)
30
+
31
+ # Calculate descriptive statistics
32
+ desc1 = {"mean": np.mean(data1), "std": np.std(data1, ddof=1), "n": len(data1)}
33
+ desc2 = {"mean": np.mean(data2), "std": np.std(data2, ddof=1), "n": len(data2)}
34
+
35
+ # Degrees of freedom
36
+ if equal_var:
37
+ df = len(data1) + len(data2) - 2
38
+ else:
39
+ # Welch's formula for unequal variances
40
+ s1_sq, s2_sq = desc1["std"]**2, desc2["std"]**2
41
+ n1, n2 = desc1["n"], desc2["n"]
42
+ df = (s1_sq/n1 + s2_sq/n2)**2 / ((s1_sq/n1)**2/(n1-1) + (s2_sq/n2)**2/(n2-1))
43
+
44
+ # Effect size (Cohen's d)
45
+ pooled_std = np.sqrt(((len(data1)-1)*desc1["std"]**2 + (len(data2)-1)*desc2["std"]**2) / (len(data1)+len(data2)-2))
46
+ cohens_d = (desc1["mean"] - desc2["mean"]) / pooled_std
47
+
48
+ # Interpretation
49
+ significance = "significant" if p_value < 0.05 else "not significant"
50
+ effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
51
+
52
+ return {
53
+ "test_type": f"Independent t-test ({'equal variances' if equal_var else 'unequal variances'})",
54
+ "t_statistic": round(t_stat, 4),
55
+ "p_value": round(p_value, 6),
56
+ "degrees_of_freedom": round(df, 2),
57
+ "cohens_d": round(cohens_d, 4),
58
+ "group1_stats": desc1,
59
+ "group2_stats": desc2,
60
+ "result": f"The difference between groups is {significance} (p = {p_value:.6f})",
61
+ "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
62
+ "alternative_hypothesis": alternative
63
+ }
64
+ except Exception as e:
65
+ return {"error": f"Error performing t-test: {str(e)}"}
66
+
67
+ def paired_t_test(before: str, after: str, alternative: str = "two-sided") -> Dict[str, Any]:
68
+ """
69
+ Perform a paired samples t-test.
70
+
71
+ Args:
72
+ before (str): Comma-separated values for before condition
73
+ after (str): Comma-separated values for after condition
74
+ alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
75
+
76
+ Returns:
77
+ dict: Test results including t-statistic, p-value, and interpretation
78
+ """
79
+ try:
80
+ # Parse input data
81
+ data_before = [float(x.strip()) for x in before.split(',') if x.strip()]
82
+ data_after = [float(x.strip()) for x in after.split(',') if x.strip()]
83
+
84
+ if len(data_before) != len(data_after):
85
+ return {"error": "Before and after groups must have the same number of observations"}
86
+
87
+ if len(data_before) < 2:
88
+ return {"error": "Need at least 2 paired observations"}
89
+
90
+ # Perform paired t-test
91
+ t_stat, p_value = stats.ttest_rel(data_before, data_after, alternative=alternative)
92
+
93
+ # Calculate differences and descriptive statistics
94
+ differences = np.array(data_after) - np.array(data_before)
95
+ mean_diff = np.mean(differences)
96
+ std_diff = np.std(differences, ddof=1)
97
+
98
+ # Effect size (Cohen's d for paired samples)
99
+ cohens_d = mean_diff / std_diff
100
+
101
+ # Degrees of freedom
102
+ df = len(data_before) - 1
103
+
104
+ # Interpretation
105
+ significance = "significant" if p_value < 0.05 else "not significant"
106
+ effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
107
+
108
+ return {
109
+ "test_type": "Paired t-test",
110
+ "t_statistic": round(t_stat, 4),
111
+ "p_value": round(p_value, 6),
112
+ "degrees_of_freedom": df,
113
+ "mean_difference": round(mean_diff, 4),
114
+ "std_difference": round(std_diff, 4),
115
+ "cohens_d": round(cohens_d, 4),
116
+ "before_mean": round(np.mean(data_before), 4),
117
+ "after_mean": round(np.mean(data_after), 4),
118
+ "result": f"The paired difference is {significance} (p = {p_value:.6f})",
119
+ "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
120
+ "alternative_hypothesis": alternative
121
+ }
122
+ except Exception as e:
123
+ return {"error": f"Error performing paired t-test: {str(e)}"}
124
+
125
+ def one_sample_t_test(sample: str, population_mean: float, alternative: str = "two-sided") -> Dict[str, Any]:
126
+ """
127
+ Perform a one-sample t-test against a population mean.
128
+
129
+ Args:
130
+ sample (str): Comma-separated sample values
131
+ population_mean (float): Hypothesized population mean
132
+ alternative (str): Alternative hypothesis - 'two-sided', 'less', or 'greater'
133
+
134
+ Returns:
135
+ dict: Test results including t-statistic, p-value, and interpretation
136
+ """
137
+ try:
138
+ # Parse input data
139
+ data = [float(x.strip()) for x in sample.split(',') if x.strip()]
140
+
141
+ if len(data) < 2:
142
+ return {"error": "Sample must have at least 2 observations"}
143
+
144
+ # Perform one-sample t-test
145
+ t_stat, p_value = stats.ttest_1samp(data, population_mean, alternative=alternative)
146
+
147
+ # Calculate descriptive statistics
148
+ sample_mean = np.mean(data)
149
+ sample_std = np.std(data, ddof=1)
150
+ sample_size = len(data)
151
+
152
+ # Effect size (Cohen's d)
153
+ cohens_d = (sample_mean - population_mean) / sample_std
154
+
155
+ # Degrees of freedom
156
+ df = sample_size - 1
157
+
158
+ # Interpretation
159
+ significance = "significant" if p_value < 0.05 else "not significant"
160
+ effect_size_interp = "small" if abs(cohens_d) < 0.5 else "medium" if abs(cohens_d) < 0.8 else "large"
161
+
162
+ return {
163
+ "test_type": "One-sample t-test",
164
+ "t_statistic": round(t_stat, 4),
165
+ "p_value": round(p_value, 6),
166
+ "degrees_of_freedom": df,
167
+ "sample_mean": round(sample_mean, 4),
168
+ "population_mean": population_mean,
169
+ "sample_std": round(sample_std, 4),
170
+ "sample_size": sample_size,
171
+ "cohens_d": round(cohens_d, 4),
172
+ "result": f"Sample mean differs {significance}ly from population mean (p = {p_value:.6f})",
173
+ "effect_size": f"Effect size (Cohen's d = {cohens_d:.4f}) is {effect_size_interp}",
174
+ "alternative_hypothesis": alternative
175
+ }
176
+ except Exception as e:
177
+ return {"error": f"Error performing one-sample t-test: {str(e)}"}
178
+
179
+ def one_way_anova(*groups: str) -> Dict[str, Any]:
180
+ """
181
+ Perform a one-way ANOVA test.
182
+
183
+ Args:
184
+ *groups: Variable number of comma-separated group values (minimum 2 groups)
185
+
186
+ Returns:
187
+ dict: ANOVA results including F-statistic, p-value, and interpretation
188
+ """
189
+ try:
190
+ # Parse input data
191
+ parsed_groups = []
192
+ for i, group in enumerate(groups):
193
+ if not group.strip():
194
+ continue
195
+ data = [float(x.strip()) for x in group.split(',') if x.strip()]
196
+ if len(data) < 2:
197
+ return {"error": f"Group {i+1} must have at least 2 observations"}
198
+ parsed_groups.append(data)
199
+
200
+ if len(parsed_groups) < 2:
201
+ return {"error": "Need at least 2 groups for ANOVA"}
202
+
203
+ # Perform one-way ANOVA
204
+ f_stat, p_value = stats.f_oneway(*parsed_groups)
205
+
206
+ # Calculate descriptive statistics for each group
207
+ group_stats = []
208
+ overall_data = []
209
+ for i, group in enumerate(parsed_groups):
210
+ group_stats.append({
211
+ "group": i+1,
212
+ "n": len(group),
213
+ "mean": round(np.mean(group), 4),
214
+ "std": round(np.std(group, ddof=1), 4)
215
+ })
216
+ overall_data.extend(group)
217
+
218
+ # Calculate effect size (eta-squared)
219
+ # SS_between / SS_total
220
+ overall_mean = np.mean(overall_data)
221
+ ss_total = sum((x - overall_mean)**2 for x in overall_data)
222
+ ss_between = sum(len(group) * (np.mean(group) - overall_mean)**2 for group in parsed_groups)
223
+ eta_squared = ss_between / ss_total if ss_total > 0 else 0
224
+
225
+ # Degrees of freedom
226
+ df_between = len(parsed_groups) - 1
227
+ df_within = len(overall_data) - len(parsed_groups)
228
+
229
+ # Interpretation
230
+ significance = "significant" if p_value < 0.05 else "not significant"
231
+ effect_size_interp = "small" if eta_squared < 0.06 else "medium" if eta_squared < 0.14 else "large"
232
+
233
+ return {
234
+ "test_type": "One-way ANOVA",
235
+ "f_statistic": round(f_stat, 4),
236
+ "p_value": round(p_value, 6),
237
+ "df_between": df_between,
238
+ "df_within": df_within,
239
+ "eta_squared": round(eta_squared, 4),
240
+ "group_statistics": group_stats,
241
+ "result": f"Group differences are {significance} (p = {p_value:.6f})",
242
+ "effect_size": f"Effect size (η² = {eta_squared:.4f}) is {effect_size_interp}",
243
+ "note": "If significant, consider post-hoc tests to identify specific group differences"
244
+ }
245
+ except Exception as e:
246
+ return {"error": f"Error performing ANOVA: {str(e)}"}
247
+
248
+ def chi_square_test(observed: str, expected: str = None) -> Dict[str, Any]:
249
+ """
250
+ Perform a chi-square goodness of fit test.
251
+
252
+ Args:
253
+ observed (str): Comma-separated observed frequencies
254
+ expected (str): Comma-separated expected frequencies (optional, defaults to equal distribution)
255
+
256
+ Returns:
257
+ dict: Chi-square test results
258
+ """
259
+ try:
260
+ # Parse observed frequencies
261
+ obs_data = [float(x.strip()) for x in observed.split(',') if x.strip()]
262
+
263
+ # Parse expected frequencies or create equal distribution
264
+ if expected and expected.strip():
265
+ exp_data = [float(x.strip()) for x in expected.split(',') if x.strip()]
266
+ if len(obs_data) != len(exp_data):
267
+ return {"error": "Observed and expected must have the same number of categories"}
268
+ else:
269
+ # Equal distribution
270
+ total = sum(obs_data)
271
+ exp_data = [total / len(obs_data)] * len(obs_data)
272
+
273
+ # Perform chi-square test
274
+ chi2_stat, p_value = stats.chisquare(obs_data, exp_data)
275
+
276
+ # Degrees of freedom
277
+ df = len(obs_data) - 1
278
+
279
+ # Effect size (Cramér's V for goodness of fit)
280
+ n = sum(obs_data)
281
+ cramers_v = np.sqrt(chi2_stat / (n * (len(obs_data) - 1)))
282
+
283
+ # Interpretation
284
+ significance = "significant" if p_value < 0.05 else "not significant"
285
+ effect_size_interp = "small" if cramers_v < 0.3 else "medium" if cramers_v < 0.5 else "large"
286
+
287
+ return {
288
+ "test_type": "Chi-square goodness of fit test",
289
+ "chi_square_statistic": round(chi2_stat, 4),
290
+ "p_value": round(p_value, 6),
291
+ "degrees_of_freedom": df,
292
+ "cramers_v": round(cramers_v, 4),
293
+ "observed_frequencies": obs_data,
294
+ "expected_frequencies": [round(x, 2) for x in exp_data],
295
+ "result": f"Observed frequencies differ {significance}ly from expected (p = {p_value:.6f})",
296
+ "effect_size": f"Effect size (Cramér's V = {cramers_v:.4f}) is {effect_size_interp}"
297
+ }
298
+ except Exception as e:
299
+ return {"error": f"Error performing chi-square test: {str(e)}"}
300
+
301
+ def correlation_test(x_values: str, y_values: str, method: str = "pearson") -> Dict[str, Any]:
302
+ """
303
+ Perform correlation analysis between two variables.
304
+
305
+ Args:
306
+ x_values (str): Comma-separated X variable values
307
+ y_values (str): Comma-separated Y variable values
308
+ method (str): Correlation method - 'pearson', 'spearman', or 'kendall'
309
+
310
+ Returns:
311
+ dict: Correlation results including coefficient and p-value
312
+ """
313
+ try:
314
+ # Parse input data
315
+ x_data = [float(x.strip()) for x in x_values.split(',') if x.strip()]
316
+ y_data = [float(y.strip()) for y in y_values.split(',') if y.strip()]
317
+
318
+ if len(x_data) != len(y_data):
319
+ return {"error": "X and Y variables must have the same number of observations"}
320
+
321
+ if len(x_data) < 3:
322
+ return {"error": "Need at least 3 observations for correlation"}
323
+
324
+ # Perform correlation test
325
+ if method.lower() == "pearson":
326
+ corr_coef, p_value = stats.pearsonr(x_data, y_data)
327
+ test_name = "Pearson correlation"
328
+ elif method.lower() == "spearman":
329
+ corr_coef, p_value = stats.spearmanr(x_data, y_data)
330
+ test_name = "Spearman rank correlation"
331
+ elif method.lower() == "kendall":
332
+ corr_coef, p_value = stats.kendalltau(x_data, y_data)
333
+ test_name = "Kendall's tau correlation"
334
+ else:
335
+ return {"error": "Method must be 'pearson', 'spearman', or 'kendall'"}
336
+
337
+ # Interpretation
338
+ significance = "significant" if p_value < 0.05 else "not significant"
339
+
340
+ # Correlation strength interpretation
341
+ abs_corr = abs(corr_coef)
342
+ if abs_corr < 0.3:
343
+ strength = "weak"
344
+ elif abs_corr < 0.7:
345
+ strength = "moderate"
346
+ else:
347
+ strength = "strong"
348
+
349
+ direction = "positive" if corr_coef > 0 else "negative"
350
+
351
+ return {
352
+ "test_type": test_name,
353
+ "correlation_coefficient": round(corr_coef, 4),
354
+ "p_value": round(p_value, 6),
355
+ "sample_size": len(x_data),
356
+ "result": f"The correlation is {significance} (p = {p_value:.6f})",
357
+ "interpretation": f"{strength.title()} {direction} correlation (r = {corr_coef:.4f})",
358
+ "method": method.lower()
359
+ }
360
+ except Exception as e:
361
+ return {"error": f"Error performing correlation test: {str(e)}"}
362
+
363
+ # Create Gradio interfaces for each function
364
+ demo = gr.TabbedInterface(
365
+ [
366
+ gr.Interface(
367
+ fn=independent_t_test,
368
+ inputs=[
369
+ gr.Textbox(placeholder="1.2,2.3,3.4,2.1", label="Group 1 (comma-separated)"),
370
+ gr.Textbox(placeholder="2.1,3.2,4.1,3.5", label="Group 2 (comma-separated)"),
371
+ gr.Checkbox(value=True, label="Equal variances"),
372
+ gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
373
+ ],
374
+ outputs=gr.JSON(),
375
+ title="Independent T-Test",
376
+ description="Compare means between two independent groups"
377
+ ),
378
+ gr.Interface(
379
+ fn=paired_t_test,
380
+ inputs=[
381
+ gr.Textbox(placeholder="10,12,11,13", label="Before (comma-separated)"),
382
+ gr.Textbox(placeholder="12,14,13,15", label="After (comma-separated)"),
383
+ gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
384
+ ],
385
+ outputs=gr.JSON(),
386
+ title="Paired T-Test",
387
+ description="Compare paired/matched samples"
388
+ ),
389
+ gr.Interface(
390
+ fn=one_sample_t_test,
391
+ inputs=[
392
+ gr.Textbox(placeholder="10,12,11,13,9", label="Sample (comma-separated)"),
393
+ gr.Number(value=10, label="Population mean"),
394
+ gr.Dropdown(["two-sided", "less", "greater"], value="two-sided", label="Alternative hypothesis")
395
+ ],
396
+ outputs=gr.JSON(),
397
+ title="One-Sample T-Test",
398
+ description="Test sample mean against population mean"
399
+ ),
400
+ gr.Interface(
401
+ fn=one_way_anova,
402
+ inputs=[
403
+ gr.Textbox(placeholder="1,2,3,2", label="Group 1 (comma-separated)"),
404
+ gr.Textbox(placeholder="4,5,6,5", label="Group 2 (comma-separated)"),
405
+ gr.Textbox(placeholder="7,8,9,8", label="Group 3 (comma-separated)", info="Optional"),
406
+ gr.Textbox(placeholder="", label="Group 4 (comma-separated)", info="Optional"),
407
+ gr.Textbox(placeholder="", label="Group 5 (comma-separated)", info="Optional")
408
+ ],
409
+ outputs=gr.JSON(),
410
+ title="One-Way ANOVA",
411
+ description="Compare means across multiple groups"
412
+ ),
413
+ gr.Interface(
414
+ fn=chi_square_test,
415
+ inputs=[
416
+ gr.Textbox(placeholder="10,20,15,25", label="Observed frequencies (comma-separated)"),
417
+ gr.Textbox(placeholder="", label="Expected frequencies (optional, comma-separated)")
418
+ ],
419
+ outputs=gr.JSON(),
420
+ title="Chi-Square Test",
421
+ description="Test goodness of fit for categorical data"
422
+ ),
423
+ gr.Interface(
424
+ fn=correlation_test,
425
+ inputs=[
426
+ gr.Textbox(placeholder="1,2,3,4,5", label="X values (comma-separated)"),
427
+ gr.Textbox(placeholder="2,4,6,8,10", label="Y values (comma-separated)"),
428
+ gr.Dropdown(["pearson", "spearman", "kendall"], value="pearson", label="Correlation method")
429
+ ],
430
+ outputs=gr.JSON(),
431
+ title="Correlation Analysis",
432
+ description="Test correlation between two variables"
433
+ )
434
+ ],
435
+ tab_names=["Independent T-Test", "Paired T-Test", "One-Sample T-Test", "ANOVA", "Chi-Square", "Correlation"]
436
+ )
437
 
438
  if __name__ == "__main__":
439
+ demo.launch(mcp_server=True)