Arun-AK commited on
Commit
44bb2d4
·
verified ·
1 Parent(s): 0476e37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -24
app.py CHANGED
@@ -250,19 +250,14 @@ tree_models = ["RandomForest", "DecisionTree"]
250
  non_tree_models = ["KNN", "SVM", "LogisticRegression"]
251
  #models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
252
 
253
- print("="*80)
254
- print("REPLICATION: Uddin & Lu (2024) - Pairwise Model Comparisons")
255
- print("="*80)
256
 
257
  # Store all results
258
  all_results = []
259
 
260
  # For each metric
261
  for metric in ["accuracy", "precision", "recall", "f1_score"]:
262
- print(f"\n({metric.upper()})")
263
- print("-"*80)
264
- print(f"{'#':<3} {'Tree Model':<20} {'Non-Tree Model':<20} {'Mean 1':<10} {'Mean 2':<10} {'t':<8} {'p-value':<10} {'Sig.'}")
265
- print("-"*80)
266
 
267
  comparison_num = 1
268
 
@@ -320,19 +315,12 @@ for metric in ["accuracy", "precision", "recall", "f1_score"]:
320
  comparison_num += 1
321
  models = other_models.copy()
322
 
323
- # Summary
324
- print("\n" + "="*80)
325
- print("SUMMARY")
326
- print("="*80)
327
  results_df = pd.DataFrame(all_results)
328
  significant_count = (results_df['p_value'] < 0.05).sum()
329
  total_count = len(results_df)
330
- print(f"\nSignificant comparisons (p < 0.05): {significant_count}/{total_count}")
331
- print(f"Tree models won in: {(results_df['tree_mean'] > results_df['non_tree_mean']).sum()} comparisons")
332
 
333
  # Save detailed results
334
  #results_df.to_csv('pairwise_comparison_results.csv', index=False)
335
- print("\nDetailed results saved to: pairwise_comparison_results.csv")
336
  import pandas as pd
337
  from scipy.stats import ttest_rel
338
  sig1 = {}
@@ -345,20 +333,12 @@ for key in list(DATASET_CATEGORIES.keys()):
345
  non_tree_models = ["KNN", "SVM", "LogisticRegression"]
346
  #models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
347
  models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
348
- print("="*80)
349
- print("REPLICATION: Uddin & Lu (2024) - Pairwise Model Comparisons")
350
- print("="*80)
351
 
352
  # Store all results
353
  all_results = []
354
 
355
  # For each metric
356
  for metric in ["accuracy", "precision", "recall", "f1_score"]:
357
- print(f"\n({metric.upper()})")
358
- print("-"*80)
359
- print(f"{'#':<3} {'Tree Model':<20} {'Non-Tree Model':<20} {'Mean 1':<10} {'Mean 2':<10} {'t':<8} {'p-value':<10} {'Sig.'}")
360
- print("-"*80)
361
-
362
  comparison_num = 1
363
 
364
  models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
@@ -397,7 +377,6 @@ for key in list(DATASET_CATEGORIES.keys()):
397
 
398
  sig = "< 0.001" if p_val < 0.001 else f"{p_val:.3f}"
399
 
400
- print(f"{comparison_num:<3} {tree_model:<20} {non_tree_model:<20} {mean1:<10.5f} {mean2:<10.5f} {t_stat:<8.2f} {sig:<10} {'True' if p_val < 0.05 else 'False'}")
401
 
402
  all_results.append({
403
  'metric': metric,
@@ -527,7 +506,7 @@ def compare_groups(data_choice, model1, model2):
527
  return fig, "\n".join(p_values_text)
528
  import gradio as gr
529
  models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
530
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
531
  with gr.Tabs() as tabs:
532
  with gr.TabItem("Compaes groups of datasets"):
533
  with gr.Column():
 
250
  non_tree_models = ["KNN", "SVM", "LogisticRegression"]
251
  #models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
252
 
253
+
 
 
254
 
255
  # Store all results
256
  all_results = []
257
 
258
  # For each metric
259
  for metric in ["accuracy", "precision", "recall", "f1_score"]:
260
+
 
 
 
261
 
262
  comparison_num = 1
263
 
 
315
  comparison_num += 1
316
  models = other_models.copy()
317
 
 
 
 
 
318
  results_df = pd.DataFrame(all_results)
319
  significant_count = (results_df['p_value'] < 0.05).sum()
320
  total_count = len(results_df)
 
 
321
 
322
  # Save detailed results
323
  #results_df.to_csv('pairwise_comparison_results.csv', index=False)
 
324
  import pandas as pd
325
  from scipy.stats import ttest_rel
326
  sig1 = {}
 
333
  non_tree_models = ["KNN", "SVM", "LogisticRegression"]
334
  #models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
335
  models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
 
 
 
336
 
337
  # Store all results
338
  all_results = []
339
 
340
  # For each metric
341
  for metric in ["accuracy", "precision", "recall", "f1_score"]:
 
 
 
 
 
342
  comparison_num = 1
343
 
344
  models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
 
377
 
378
  sig = "< 0.001" if p_val < 0.001 else f"{p_val:.3f}"
379
 
 
380
 
381
  all_results.append({
382
  'metric': metric,
 
506
  return fig, "\n".join(p_values_text)
507
  import gradio as gr
508
  models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
509
+ with gr.Blocks() as demo:
510
  with gr.Tabs() as tabs:
511
  with gr.TabItem("Compaes groups of datasets"):
512
  with gr.Column():