Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -250,19 +250,14 @@ tree_models = ["RandomForest", "DecisionTree"]
|
|
| 250 |
non_tree_models = ["KNN", "SVM", "LogisticRegression"]
|
| 251 |
#models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
| 252 |
|
| 253 |
-
|
| 254 |
-
print("REPLICATION: Uddin & Lu (2024) - Pairwise Model Comparisons")
|
| 255 |
-
print("="*80)
|
| 256 |
|
| 257 |
# Store all results
|
| 258 |
all_results = []
|
| 259 |
|
| 260 |
# For each metric
|
| 261 |
for metric in ["accuracy", "precision", "recall", "f1_score"]:
|
| 262 |
-
|
| 263 |
-
print("-"*80)
|
| 264 |
-
print(f"{'#':<3} {'Tree Model':<20} {'Non-Tree Model':<20} {'Mean 1':<10} {'Mean 2':<10} {'t':<8} {'p-value':<10} {'Sig.'}")
|
| 265 |
-
print("-"*80)
|
| 266 |
|
| 267 |
comparison_num = 1
|
| 268 |
|
|
@@ -320,19 +315,12 @@ for metric in ["accuracy", "precision", "recall", "f1_score"]:
|
|
| 320 |
comparison_num += 1
|
| 321 |
models = other_models.copy()
|
| 322 |
|
| 323 |
-
# Summary
|
| 324 |
-
print("\n" + "="*80)
|
| 325 |
-
print("SUMMARY")
|
| 326 |
-
print("="*80)
|
| 327 |
results_df = pd.DataFrame(all_results)
|
| 328 |
significant_count = (results_df['p_value'] < 0.05).sum()
|
| 329 |
total_count = len(results_df)
|
| 330 |
-
print(f"\nSignificant comparisons (p < 0.05): {significant_count}/{total_count}")
|
| 331 |
-
print(f"Tree models won in: {(results_df['tree_mean'] > results_df['non_tree_mean']).sum()} comparisons")
|
| 332 |
|
| 333 |
# Save detailed results
|
| 334 |
#results_df.to_csv('pairwise_comparison_results.csv', index=False)
|
| 335 |
-
print("\nDetailed results saved to: pairwise_comparison_results.csv")
|
| 336 |
import pandas as pd
|
| 337 |
from scipy.stats import ttest_rel
|
| 338 |
sig1 = {}
|
|
@@ -345,20 +333,12 @@ for key in list(DATASET_CATEGORIES.keys()):
|
|
| 345 |
non_tree_models = ["KNN", "SVM", "LogisticRegression"]
|
| 346 |
#models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
| 347 |
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
| 348 |
-
print("="*80)
|
| 349 |
-
print("REPLICATION: Uddin & Lu (2024) - Pairwise Model Comparisons")
|
| 350 |
-
print("="*80)
|
| 351 |
|
| 352 |
# Store all results
|
| 353 |
all_results = []
|
| 354 |
|
| 355 |
# For each metric
|
| 356 |
for metric in ["accuracy", "precision", "recall", "f1_score"]:
|
| 357 |
-
print(f"\n({metric.upper()})")
|
| 358 |
-
print("-"*80)
|
| 359 |
-
print(f"{'#':<3} {'Tree Model':<20} {'Non-Tree Model':<20} {'Mean 1':<10} {'Mean 2':<10} {'t':<8} {'p-value':<10} {'Sig.'}")
|
| 360 |
-
print("-"*80)
|
| 361 |
-
|
| 362 |
comparison_num = 1
|
| 363 |
|
| 364 |
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
|
@@ -397,7 +377,6 @@ for key in list(DATASET_CATEGORIES.keys()):
|
|
| 397 |
|
| 398 |
sig = "< 0.001" if p_val < 0.001 else f"{p_val:.3f}"
|
| 399 |
|
| 400 |
-
print(f"{comparison_num:<3} {tree_model:<20} {non_tree_model:<20} {mean1:<10.5f} {mean2:<10.5f} {t_stat:<8.2f} {sig:<10} {'True' if p_val < 0.05 else 'False'}")
|
| 401 |
|
| 402 |
all_results.append({
|
| 403 |
'metric': metric,
|
|
@@ -527,7 +506,7 @@ def compare_groups(data_choice, model1, model2):
|
|
| 527 |
return fig, "\n".join(p_values_text)
|
| 528 |
import gradio as gr
|
| 529 |
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
| 530 |
-
with gr.Blocks(
|
| 531 |
with gr.Tabs() as tabs:
|
| 532 |
with gr.TabItem("Compaes groups of datasets"):
|
| 533 |
with gr.Column():
|
|
|
|
| 250 |
non_tree_models = ["KNN", "SVM", "LogisticRegression"]
|
| 251 |
#models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
| 252 |
|
| 253 |
+
|
|
|
|
|
|
|
| 254 |
|
| 255 |
# Store all results
|
| 256 |
all_results = []
|
| 257 |
|
| 258 |
# For each metric
|
| 259 |
for metric in ["accuracy", "precision", "recall", "f1_score"]:
|
| 260 |
+
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
comparison_num = 1
|
| 263 |
|
|
|
|
| 315 |
comparison_num += 1
|
| 316 |
models = other_models.copy()
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
results_df = pd.DataFrame(all_results)
|
| 319 |
significant_count = (results_df['p_value'] < 0.05).sum()
|
| 320 |
total_count = len(results_df)
|
|
|
|
|
|
|
| 321 |
|
| 322 |
# Save detailed results
|
| 323 |
#results_df.to_csv('pairwise_comparison_results.csv', index=False)
|
|
|
|
| 324 |
import pandas as pd
|
| 325 |
from scipy.stats import ttest_rel
|
| 326 |
sig1 = {}
|
|
|
|
| 333 |
non_tree_models = ["KNN", "SVM", "LogisticRegression"]
|
| 334 |
#models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
| 335 |
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
# Store all results
|
| 338 |
all_results = []
|
| 339 |
|
| 340 |
# For each metric
|
| 341 |
for metric in ["accuracy", "precision", "recall", "f1_score"]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
comparison_num = 1
|
| 343 |
|
| 344 |
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
|
|
|
| 377 |
|
| 378 |
sig = "< 0.001" if p_val < 0.001 else f"{p_val:.3f}"
|
| 379 |
|
|
|
|
| 380 |
|
| 381 |
all_results.append({
|
| 382 |
'metric': metric,
|
|
|
|
| 506 |
return fig, "\n".join(p_values_text)
|
| 507 |
import gradio as gr
|
| 508 |
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
|
| 509 |
+
with gr.Blocks() as demo:
|
| 510 |
with gr.Tabs() as tabs:
|
| 511 |
with gr.TabItem("Compaes groups of datasets"):
|
| 512 |
with gr.Column():
|