Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| import warnings | |
| from scipy.stats import ttest_rel | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import io | |
| import gradio as gr | |
| def get_keys(d, values): | |
| return [k for k, v in d.items() if v in values] | |
| pickle_file_path = 'model_results1.pkl' | |
| model_results = pd.read_pickle(pickle_file_path) | |
| csv_file_path = 'the_model_results.csv' | |
| model_results_csv = pd.read_csv(csv_file_path) | |
| fmodel_results = pd.concat([model_results, model_results_csv.rename(columns = {"dataset_name" : "dataset"})], ignore_index=True) | |
| DATASET_CATEGORIES = { | |
| "Medical & Healthcare": { | |
| "D1": "Heart Disease (Comprehensive)", | |
| "D2": "Heart attack possibility", | |
| "D3": "Heart Disease Dataset", | |
| "D4": "Liver Disorders", | |
| "D5": "Diabetes Prediction", | |
| "D9": "Chronic Kidney Disease", | |
| "D10": "Breast Cancer Prediction", | |
| "D11": "Stroke Prediction", | |
| "D12": "Lung Cancer Prediction", | |
| "D13": "Hepatitis", | |
| "D15": "Thyroid Disease", | |
| "D16": "Heart Failure Prediction", | |
| "D17": "Parkinson's", | |
| "D18": "Indian Liver Patient", | |
| "D19": "COVID-19 Effect on Liver Cancer", | |
| "D20": "Liver Dataset", | |
| "D21": "Specht Heart", | |
| "D22": "Early-stage Diabetes", | |
| "D23": "Diabetic Retinopathy", | |
| "D24": "Breast Cancer Coimbra", | |
| "D25": "Chronic Kidney Disease", | |
| "D26": "Kidney Stone", | |
| "D28": "Echocardiogram", | |
| "D29": "Bladder Cancer Recurrence", | |
| "D31": "Prostate Cancer", | |
| "D46": "Real Breast Cancer Data", | |
| "D47": "Breast Cancer (Royston)", | |
| "D48": "Lung Cancer Dataset", | |
| "D52": "Cervical Cancer Risk", | |
| "D53": "Breast Cancer Wisconsin", | |
| "D61": "Breast Cancer Prediction", | |
| "D62": "Thyroid Disease", | |
| "D68": "Lung Cancer", | |
| "D69": "Cancer Patients Data", | |
| "D70": "Labor Relations", | |
| "D71": "Glioma Grading", | |
| "D74": "Post-Operative Patient", | |
| "D80": "Heart Rate Stress Monitoring", | |
| "D82": "Diabetes 2019", | |
| "D87": "Personal Heart Disease Indicators", | |
| "D92": "Heart Disease (Logistic)", | |
| "D95": "Diabetes Prediction", | |
| "D97": "Cardiovascular Disease", | |
| "D98": "Diabetes 130 US Hospitals", | |
| "D99": "Heart Disease Dataset", | |
| "D181": "HCV Data", | |
| "D184": "Cardiotocography", | |
| "D189": "Mammographic Mass", | |
| "D199": "Easiest Diabetes", | |
| "D200": "Monkey-Pox Patients", | |
| "D54": "Breast Cancer Wisconsin", | |
| "D63": "Sick-euthyroid", | |
| "D64": "Ann-test", | |
| "D65": "Ann-train", | |
| "D66": "Hypothyroid", | |
| "D67": "New-thyroid", | |
| "D72": "Glioma Grading" | |
| }, | |
| "Gaming & Sports": { | |
| "D27": "Chess King-Rook", | |
| "D36": "Tic-Tac-Toe", | |
| "D40": "IPL 2022 Matches", | |
| "D41": "League of Legends", | |
| "D55": "League of Legends Diamond", | |
| "D56": "Chess Game Dataset", | |
| "D57": "Game of Thrones", | |
| "D73": "Connect-4", | |
| "D75": "FIFA 2018", | |
| "D76": "Dota 2 Matches", | |
| "D77": "IPL Match Analysis", | |
| "D78": "CS:GO Professional", | |
| "D79": "IPL 2008-2022", | |
| "D114": "Video Games", | |
| "D115": "Video Games Sales", | |
| "D117": "Sacred Games", | |
| "D118": "PC Games Sales", | |
| "D119": "Popular Video Games", | |
| "D120": "Olympic Games 2021", | |
| "D121": "Video Games ESRB", | |
| "D122": "Top Play Store Games", | |
| "D123": "Steam Games", | |
| "D124": "PS4 Games", | |
| "D116": "Video Games Sales" | |
| }, | |
| "Education & Students": { | |
| "D43": "Student Marks", | |
| "D44": "Student 2nd Year Result", | |
| "D45": "Student Mat Pass/Fail", | |
| "D103": "Academic Performance", | |
| "D104": "Student Academic Analysis", | |
| "D105": "Student Dropout Prediction", | |
| "D106": "Electronic Gadgets Impact", | |
| "D107": "Campus Recruitment", | |
| "D108": "End-Semester Performance", | |
| "D109": "Fitbits and Grades", | |
| "D110": "Student Time Management", | |
| "D111": "Student Feedback", | |
| "D112": "Depression & Performance", | |
| "D113": "University Rankings", | |
| "D126": "University Ranking CWUR", | |
| "D127": "University Ranking CWUR 2013-2014", | |
| "D128": "University Ranking CWUR 2014-2015", | |
| "D129": "University Ranking CWUR 2015-2016", | |
| "D130": "University Ranking CWUR 2016-2017", | |
| "D131": "University Ranking CWUR 2017-2018", | |
| "D132": "University Ranking CWUR 2018-2019", | |
| "D133": "University Ranking CWUR 2019-2020", | |
| "D134": "University Ranking CWUR 2020-2021", | |
| "D135": "University Ranking CWUR 2021-2022", | |
| "D136": "University Ranking CWUR 2022-2023", | |
| "D137": "University Ranking GM 2016", | |
| "D138": "University Ranking GM 2017", | |
| "D139": "University Ranking GM 2018", | |
| "D140": "University Ranking GM 2019", | |
| "D141": "University Ranking GM 2020", | |
| "D142": "University Ranking GM 2021", | |
| "D143": "University Ranking GM 2022", | |
| "D144": "University Ranking Webometric 2012", | |
| "D145": "University Ranking Webometric 2013", | |
| "D146": "University Ranking Webometric 2014", | |
| "D147": "University Ranking Webometric 2015", | |
| "D148": "University Ranking Webometric 2016", | |
| "D149": "University Ranking Webometric 2017", | |
| "D150": "University Ranking Webometric 2018", | |
| "D151": "University Ranking Webometric 2019", | |
| "D152": "University Ranking Webometric 2020", | |
| "D153": "University Ranking Webometric 2021", | |
| "D154": "University Ranking Webometric 2022", | |
| "D155": "University Ranking Webometric 2023", | |
| "D156": "University Ranking URAP 2018-2019", | |
| "D157": "University Ranking URAP 2019-2020", | |
| "D158": "University Ranking URAP 2020-2021", | |
| "D159": "University Ranking URAP 2021-2022", | |
| "D160": "University Ranking URAP 2022-2023", | |
| "D161": "University Ranking THE 2011", | |
| "D162": "University Ranking THE 2012", | |
| "D163": "University Ranking THE 2013", | |
| "D164": "University Ranking THE 2014", | |
| "D165": "University Ranking THE 2015", | |
| "D166": "University Ranking THE 2016", | |
| "D167": "University Ranking THE 2017", | |
| "D168": "University Ranking THE 2018", | |
| "D169": "University Ranking THE 2019", | |
| "D170": "University Ranking THE 2020", | |
| "D171": "University Ranking THE 2021", | |
| "D172": "University Ranking THE 2022", | |
| "D173": "University Ranking THE 2023", | |
| "D174": "University Ranking QS 2022", | |
| "D190": "Student Academics Performance" | |
| }, | |
| "Banking & Finance": { | |
| "D6": "Bank Marketing 1", | |
| "D7": "Bank Marketing 2", | |
| "D30": "Adult Income", | |
| "D32": "Telco Customer Churn", | |
| "D35": "Credit Approval", | |
| "D50": "Term Deposit Prediction", | |
| "D96": "Credit Card Fraud", | |
| "D188": "South German Credit", | |
| "D193": "Credit Risk Classification", | |
| "D195": "Credit Score Classification", | |
| "D196": "Banking Classification" | |
| }, | |
| "Science & Engineering": { | |
| "D8": "Mushroom", | |
| "D14": "Ionosphere", | |
| "D33": "EEG Eye State", | |
| "D37": "Steel Plates Faults", | |
| "D39": "Fertility", | |
| "D51": "Darwin", | |
| "D58": "EEG Emotions", | |
| "D81": "Predictive Maintenance", | |
| "D84": "Oranges vs Grapefruit", | |
| "D90": "Crystal System Li-ion", | |
| "D183": "Drug Consumption", | |
| "D49": "Air Pressure System Failures", | |
| "D93": "Air Pressure System Failures", | |
| "D185": "Toxicity", | |
| "D186": "Toxicity" | |
| }, | |
| "Social & Lifestyle": { | |
| "D38": "Online Shoppers", | |
| "D59": "Red Wine Quality", | |
| "D60": "White Wine Quality", | |
| "D88": "Airline Passenger Satisfaction", | |
| "D94": "Go Emotions Google", | |
| "D100": "Spotify East Asian", | |
| "D125": "Suicide Rates", | |
| "D182": "Obesity Levels", | |
| "D187": "Blood Transfusion", | |
| "D191": "Obesity Classification", | |
| "D192": "Gender Classification", | |
| "D194": "Happiness Classification", | |
| "D42": "Airline customer Holiday Booking dataset" | |
| }, | |
| "ML Benchmarks & Synthetic": { | |
| "D34": "Spambase", | |
| "D85": "Synthetic Binary", | |
| "D89": "Naive Bayes Data", | |
| "D175": "Monk's Problems 1", | |
| "D176": "Monk's Problems 2", | |
| "D177": "Monk's Problems 3", | |
| "D178": "Monk's Problems 4", | |
| "D179": "Monk's Problems 5", | |
| "D180": "Monk's Problems 6" | |
| }, | |
| "Other": { | |
| "D83": "Paris Housing", | |
| "D91": "Fake Bills", | |
| "D197": "Star Classification" | |
| } | |
| } | |
| cats1 = list(DATASET_CATEGORIES.keys()) | |
| import pandas as pd | |
| from scipy.stats import ttest_rel | |
| # Load results | |
| df_1 = fmodel_results | |
| # Define individual models | |
| tree_models = ["RandomForest", "DecisionTree"] | |
| non_tree_models = ["KNN", "SVM", "LogisticRegression"] | |
| #models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"] | |
| # Store all results | |
| all_results = [] | |
| # For each metric | |
| for metric in ["accuracy", "precision", "recall", "f1_score"]: | |
| comparison_num = 1 | |
| models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"] | |
| # Compare each tree model with each non-tree model | |
| for tree_model in models: | |
| other_models = models.copy() | |
| other_models.remove(tree_model) | |
| if other_models == None: | |
| break | |
| for non_tree_model in other_models: | |
| # Get data for both models across all datasets | |
| tree_data = df_1[df_1['model'] == tree_model].set_index('dataset')[metric] | |
| non_tree_data = df_1[df_1['model'] == non_tree_model].set_index('dataset')[metric] | |
| # Align datasets (inner join - only datasets present for both models) | |
| combined = pd.DataFrame({ | |
| 'tree': tree_data, | |
| 'non_tree': non_tree_data | |
| }).dropna() | |
| if len(combined) < 2: | |
| print(f"{comparison_num:<3} {tree_model:<20} {non_tree_model:<20} Insufficient data") | |
| comparison_num += 1 | |
| continue | |
| # Paired t-test | |
| t_stat, p_val = ttest_rel(combined['tree'], combined['non_tree']) | |
| # Calculate means and stds | |
| mean1 = combined['tree'].mean() | |
| mean2 = combined['non_tree'].mean() | |
| std1 = combined['tree'].std() | |
| std2 = combined['non_tree'].std() | |
| n = len(combined) | |
| sig = "< 0.001" if p_val < 0.001 else f"{p_val:.3f}" | |
| print(f"{comparison_num:<3} {tree_model:<20} {non_tree_model:<20} {mean1:<10.5f} {mean2:<10.5f} {t_stat:<8.2f} {sig:<10} {'True' if p_val < 0.05 else 'False'}") | |
| all_results.append({ | |
| 'metric': metric, | |
| 'tree_model': tree_model, | |
| 'non_tree_model': non_tree_model, | |
| 'tree_mean': mean1, | |
| 'non_tree_mean': mean2, | |
| 'tree_std': std1, | |
| 'non_tree_std': std2, | |
| 'n_datasets': n, | |
| 't_statistic': t_stat, | |
| 'p_value': p_val | |
| }) | |
| comparison_num += 1 | |
| models = other_models.copy() | |
| results_df = pd.DataFrame(all_results) | |
| significant_count = (results_df['p_value'] < 0.05).sum() | |
| total_count = len(results_df) | |
| # Save detailed results | |
| #results_df.to_csv('pairwise_comparison_results.csv', index=False) | |
| import pandas as pd | |
| from scipy.stats import ttest_rel | |
| sig1 = {} | |
| for key in list(DATASET_CATEGORIES.keys()): | |
| # Load results | |
| df_1 = fmodel_results[fmodel_results["dataset"].isin(list(DATASET_CATEGORIES[key].keys()))] | |
| # Define individual models | |
| tree_models = ["RandomForest", "DecisionTree"] | |
| non_tree_models = ["KNN", "SVM", "LogisticRegression"] | |
| #models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"] | |
| models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"] | |
| # Store all results | |
| all_results = [] | |
| # For each metric | |
| for metric in ["accuracy", "precision", "recall", "f1_score"]: | |
| comparison_num = 1 | |
| models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"] | |
| # Compare each tree model with each non-tree model | |
| for tree_model in models: | |
| other_models = models.copy() | |
| other_models.remove(tree_model) | |
| if other_models == None: | |
| break | |
| for non_tree_model in other_models: | |
| # Get data for both models across all datasets | |
| tree_data = df_1[df_1['model'] == tree_model].set_index('dataset')[metric] | |
| non_tree_data = df_1[df_1['model'] == non_tree_model].set_index('dataset')[metric] | |
| # Align datasets (inner join - only datasets present for both models) | |
| combined = pd.DataFrame({ | |
| 'tree': tree_data, | |
| 'non_tree': non_tree_data | |
| }).dropna() | |
| if len(combined) < 2: | |
| print(f"{comparison_num:<3} {tree_model:<20} {non_tree_model:<20} Insufficient data") | |
| comparison_num += 1 | |
| continue | |
| # Paired t-test | |
| t_stat, p_val = ttest_rel(combined['tree'], combined['non_tree']) | |
| # Calculate means and stds | |
| mean1 = combined['tree'].mean() | |
| mean2 = combined['non_tree'].mean() | |
| std1 = combined['tree'].std() | |
| std2 = combined['non_tree'].std() | |
| n = len(combined) | |
| sig = "< 0.001" if p_val < 0.001 else f"{p_val:.3f}" | |
| all_results.append({ | |
| 'metric': metric, | |
| 'tree_model': tree_model, | |
| 'non_tree_model': non_tree_model, | |
| 'tree_mean': mean1, | |
| 'non_tree_mean': mean2, | |
| 'tree_std': std1, | |
| 'non_tree_std': std2, | |
| 'n_datasets': n, | |
| 't_statistic': t_stat, | |
| 'p_value': p_val | |
| }) | |
| comparison_num += 1 | |
| models = other_models.copy() | |
| # Summary | |
| print("\n" + "="*80) | |
| print("SUMMARY") | |
| print("="*80) | |
| results_df4 = pd.DataFrame(all_results) | |
| sig1[key] = results_df4 | |
| significant_count = (results_df4['p_value'] < 0.05).sum() | |
| total_count = len(results_df4) | |
| print(f"\nSignificant comparisons (p < 0.05): {significant_count}/{total_count}") | |
| print(f"Tree models won in: {(results_df4['tree_mean'] > results_df4['non_tree_mean']).sum()} comparisons") | |
| # Save detailed results | |
| #results_df.to_csv('pairwise_comparison_results.csv', index=False) | |
| print("\nDetailed results saved to: pairwise_comparison_results.csv") | |
| sig1["AllDatasets"] = results_df | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from PIL import Image | |
| import io | |
| cats = [f"{key} ({len(values)})" for key, values in DATASET_CATEGORIES.items()] | |
| datasel = cats.copy() | |
| datasel.extend(["AllDatasets (150)"]) | |
| def compare_ind(med, game, ed, bank, sci, social, ml, other, models_to_compare=None): | |
| metri = ["accuracy", "precision", "recall", "f1_score"] | |
| figs = [] | |
| messages = [] | |
| dropdowns = [med, game, ed, bank, sci, social, ml, other] | |
| selected_datasets_keys = [] | |
| for cat_name, dropdown_values in zip(cats1, dropdowns): | |
| if dropdown_values: | |
| selected_datasets_keys.extend(get_keys(DATASET_CATEGORIES[cat_name], dropdown_values)) | |
| if not models_to_compare: | |
| messages.append("No models selected. Displaying results for all models.") | |
| models_to_compare = models | |
| dataset_id_to_name = {id: name for category_dict in DATASET_CATEGORIES.values() for id, name in category_dict.items()} | |
| filtered_df_all_metrics = fmodel_results[ | |
| (fmodel_results["dataset"].isin(selected_datasets_keys)) & | |
| (fmodel_results["model"].isin(models_to_compare)) | |
| ].copy() | |
| for metric in metri: | |
| heatmap_data_metric = filtered_df_all_metrics.pivot_table( | |
| index='dataset', | |
| columns='model', | |
| values=metric | |
| ) | |
| heatmap_data_metric = heatmap_data_metric.rename(index=dataset_id_to_name) | |
| print(heatmap_data_metric) | |
| fig = plt.figure(figsize=(12, 8)) | |
| sns.heatmap(heatmap_data_metric, annot=True, cmap="crest", fmt=".3f", cbar=True) | |
| plt.title(f"{metric} per Dataset and Model ({len(selected_datasets_keys)} datasets)") | |
| plt.ylabel("Dataset") | |
| plt.xlabel("Model") | |
| plt.tight_layout() | |
| figs.append(fig) | |
| return figs[0], figs[1], figs[2], figs[3], "\n".join(messages) if messages else "Comparison complete." | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from PIL import Image | |
| import io | |
| def compare_groups(data_choice, model1, model2): | |
| messages = [] | |
| data1 = sig1[data_choice.split(' (')[0]] | |
| comparison_data = data1[ | |
| ((data1['tree_model'] == model1) & (data1['non_tree_model'] == model2)) | | |
| ((data1['tree_model'] == model2) & (data1['non_tree_model'] == model1)) | |
| ] | |
| if comparison_data.empty: | |
| fig = plt.figure(figsize=(10, 6)) | |
| plt.close(fig) | |
| return fig, "No comparison data found for the selected models. \n Dont pick the same models." | |
| plot_data = [] | |
| p_values_text = [] | |
| for index, row in comparison_data.iterrows(): | |
| metric = row['metric'] | |
| if row['tree_model'] == model1: | |
| plot_data.append({'Metric': metric, 'Model': model1, 'Mean Score': row['tree_mean']}) | |
| plot_data.append({'Metric': metric, 'Model': model2, 'Mean Score': row['non_tree_mean']}) | |
| else: # if: row['tree_model'] == model2 | |
| plot_data.append({'Metric': metric, 'Model': model1, 'Mean Score': row['non_tree_mean']}) | |
| plot_data.append({'Metric': metric, 'Model': model2, 'Mean Score': row['tree_mean']}) | |
| p_values_text.append(f"{metric} p-value: {row['p_value']:.5f} (Significant (cutoff = 0.05): {'Yes' if row['p_value'] < 0.05 else 'No'})") | |
| df_plot = pd.DataFrame(plot_data) | |
| fig = plt.figure(figsize=(10, 6)) | |
| sns.barplot(x='Metric', y='Mean Score', hue='Model', data=df_plot) | |
| plt.title(f'Comparison of {model1} vs {model2} Across Metrics') | |
| plt.ylabel('Mean Score') | |
| plt.xlabel('Metric') | |
| plt.ylim(0, 1) | |
| plt.legend(title='Model') | |
| plt.tight_layout() | |
| return fig, "\n".join(p_values_text) | |
| import gradio as gr | |
| models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"] | |
| with gr.Blocks() as demo: | |
| with gr.Tabs() as tabs: | |
| with gr.TabItem("Compaes groups of datasets"): | |
| with gr.Column(): | |
| with gr.Row(): | |
| gr.Markdown("# Comparing models") | |
| model1 = gr.Dropdown(["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression", "AllModels(not for AllData)"], label = "Model 1") | |
| model2 = gr.Dropdown(models, value = models[1], label = "Model 2") | |
| dataset = gr.Dropdown(datasel, label = "Datasets") | |
| compare_button = gr.Button("Compare") | |
| with gr.Row(): | |
| gr.Markdown("## Results") | |
| #output_plot = gr.Gallery(label="Results") | |
| p_value = gr.Textbox(label="Model Output", lines = 4) | |
| with gr.Row(): | |
| output_plot = gr.Plot(label="Results") | |
| compare_button.click( | |
| fn = compare_groups, | |
| inputs =[dataset, model1, model2], | |
| outputs = [output_plot, p_value] | |
| ) | |
| with gr.TabItem("Compare Individual Tabs"): | |
| with gr.Column(): | |
| with gr.Row(): | |
| gr.Markdown("# Comparing models") | |
| model_choice = gr.Dropdown(models, label = "Model 1", multiselect = True) | |
| med = gr.Dropdown(DATASET_CATEGORIES[cats1[0]].values(), label = cats1[0],multiselect=True) | |
| game = gr.Dropdown(DATASET_CATEGORIES[cats1[1]].values(), label = cats1[1],multiselect=True) | |
| ed = gr.Dropdown(DATASET_CATEGORIES[cats1[2]].values(), label = cats1[2],multiselect=True) | |
| with gr.Row(): | |
| bank = gr.Dropdown(DATASET_CATEGORIES[cats1[3]].values(), label = cats1[3],multiselect=True) | |
| sci = gr.Dropdown(DATASET_CATEGORIES[cats1[4]].values(), label = cats1[4],multiselect=True) | |
| social = gr.Dropdown(DATASET_CATEGORIES[cats1[5]].values(), label = cats1[5],multiselect=True) | |
| ml = gr.Dropdown(DATASET_CATEGORIES[cats1[6]].values(), label = cats1[6],multiselect=True) | |
| other = gr.Dropdown(DATASET_CATEGORIES[cats1[7]].values(), label = cats1[7],multiselect=True) | |
| compare_button = gr.Button("Compare") | |
| with gr.Row(): | |
| #output_plot = gr.Gallery(label="Results") | |
| p1 = gr.Plot(label="Results") | |
| with gr.Row(): | |
| p2 = gr.Plot(label="Results") | |
| with gr.Row(): | |
| p3 = gr.Plot(label="Results") | |
| with gr.Row(): | |
| p4 = gr.Plot(label="Results") | |
| with gr.Row(): | |
| p_value = gr.Textbox(label="Model Output", lines = 4) | |
| compare_button.click( | |
| fn = compare_ind, | |
| inputs =[med, game, ed, bank, sci, social, ml, other, model_choice], | |
| outputs = [p1, p2, p3, p4, p_value] | |
| ) | |
| demo.launch(share=True, show_error=True) |