Arun-AK's picture
Update app.py
6e513c8 verified
import pandas as pd
import numpy as np
import warnings
from scipy.stats import ttest_rel
import matplotlib.pyplot as plt
import seaborn as sns
import io
import gradio as gr
def get_keys(d, values):
return [k for k, v in d.items() if v in values]
pickle_file_path = 'model_results1.pkl'
model_results = pd.read_pickle(pickle_file_path)
csv_file_path = 'the_model_results.csv'
model_results_csv = pd.read_csv(csv_file_path)
fmodel_results = pd.concat([model_results, model_results_csv.rename(columns = {"dataset_name" : "dataset"})], ignore_index=True)
DATASET_CATEGORIES = {
"Medical & Healthcare": {
"D1": "Heart Disease (Comprehensive)",
"D2": "Heart attack possibility",
"D3": "Heart Disease Dataset",
"D4": "Liver Disorders",
"D5": "Diabetes Prediction",
"D9": "Chronic Kidney Disease",
"D10": "Breast Cancer Prediction",
"D11": "Stroke Prediction",
"D12": "Lung Cancer Prediction",
"D13": "Hepatitis",
"D15": "Thyroid Disease",
"D16": "Heart Failure Prediction",
"D17": "Parkinson's",
"D18": "Indian Liver Patient",
"D19": "COVID-19 Effect on Liver Cancer",
"D20": "Liver Dataset",
"D21": "Specht Heart",
"D22": "Early-stage Diabetes",
"D23": "Diabetic Retinopathy",
"D24": "Breast Cancer Coimbra",
"D25": "Chronic Kidney Disease",
"D26": "Kidney Stone",
"D28": "Echocardiogram",
"D29": "Bladder Cancer Recurrence",
"D31": "Prostate Cancer",
"D46": "Real Breast Cancer Data",
"D47": "Breast Cancer (Royston)",
"D48": "Lung Cancer Dataset",
"D52": "Cervical Cancer Risk",
"D53": "Breast Cancer Wisconsin",
"D61": "Breast Cancer Prediction",
"D62": "Thyroid Disease",
"D68": "Lung Cancer",
"D69": "Cancer Patients Data",
"D70": "Labor Relations",
"D71": "Glioma Grading",
"D74": "Post-Operative Patient",
"D80": "Heart Rate Stress Monitoring",
"D82": "Diabetes 2019",
"D87": "Personal Heart Disease Indicators",
"D92": "Heart Disease (Logistic)",
"D95": "Diabetes Prediction",
"D97": "Cardiovascular Disease",
"D98": "Diabetes 130 US Hospitals",
"D99": "Heart Disease Dataset",
"D181": "HCV Data",
"D184": "Cardiotocography",
"D189": "Mammographic Mass",
"D199": "Easiest Diabetes",
"D200": "Monkey-Pox Patients",
"D54": "Breast Cancer Wisconsin",
"D63": "Sick-euthyroid",
"D64": "Ann-test",
"D65": "Ann-train",
"D66": "Hypothyroid",
"D67": "New-thyroid",
"D72": "Glioma Grading"
},
"Gaming & Sports": {
"D27": "Chess King-Rook",
"D36": "Tic-Tac-Toe",
"D40": "IPL 2022 Matches",
"D41": "League of Legends",
"D55": "League of Legends Diamond",
"D56": "Chess Game Dataset",
"D57": "Game of Thrones",
"D73": "Connect-4",
"D75": "FIFA 2018",
"D76": "Dota 2 Matches",
"D77": "IPL Match Analysis",
"D78": "CS:GO Professional",
"D79": "IPL 2008-2022",
"D114": "Video Games",
"D115": "Video Games Sales",
"D117": "Sacred Games",
"D118": "PC Games Sales",
"D119": "Popular Video Games",
"D120": "Olympic Games 2021",
"D121": "Video Games ESRB",
"D122": "Top Play Store Games",
"D123": "Steam Games",
"D124": "PS4 Games",
"D116": "Video Games Sales"
},
"Education & Students": {
"D43": "Student Marks",
"D44": "Student 2nd Year Result",
"D45": "Student Mat Pass/Fail",
"D103": "Academic Performance",
"D104": "Student Academic Analysis",
"D105": "Student Dropout Prediction",
"D106": "Electronic Gadgets Impact",
"D107": "Campus Recruitment",
"D108": "End-Semester Performance",
"D109": "Fitbits and Grades",
"D110": "Student Time Management",
"D111": "Student Feedback",
"D112": "Depression & Performance",
"D113": "University Rankings",
"D126": "University Ranking CWUR",
"D127": "University Ranking CWUR 2013-2014",
"D128": "University Ranking CWUR 2014-2015",
"D129": "University Ranking CWUR 2015-2016",
"D130": "University Ranking CWUR 2016-2017",
"D131": "University Ranking CWUR 2017-2018",
"D132": "University Ranking CWUR 2018-2019",
"D133": "University Ranking CWUR 2019-2020",
"D134": "University Ranking CWUR 2020-2021",
"D135": "University Ranking CWUR 2021-2022",
"D136": "University Ranking CWUR 2022-2023",
"D137": "University Ranking GM 2016",
"D138": "University Ranking GM 2017",
"D139": "University Ranking GM 2018",
"D140": "University Ranking GM 2019",
"D141": "University Ranking GM 2020",
"D142": "University Ranking GM 2021",
"D143": "University Ranking GM 2022",
"D144": "University Ranking Webometric 2012",
"D145": "University Ranking Webometric 2013",
"D146": "University Ranking Webometric 2014",
"D147": "University Ranking Webometric 2015",
"D148": "University Ranking Webometric 2016",
"D149": "University Ranking Webometric 2017",
"D150": "University Ranking Webometric 2018",
"D151": "University Ranking Webometric 2019",
"D152": "University Ranking Webometric 2020",
"D153": "University Ranking Webometric 2021",
"D154": "University Ranking Webometric 2022",
"D155": "University Ranking Webometric 2023",
"D156": "University Ranking URAP 2018-2019",
"D157": "University Ranking URAP 2019-2020",
"D158": "University Ranking URAP 2020-2021",
"D159": "University Ranking URAP 2021-2022",
"D160": "University Ranking URAP 2022-2023",
"D161": "University Ranking THE 2011",
"D162": "University Ranking THE 2012",
"D163": "University Ranking THE 2013",
"D164": "University Ranking THE 2014",
"D165": "University Ranking THE 2015",
"D166": "University Ranking THE 2016",
"D167": "University Ranking THE 2017",
"D168": "University Ranking THE 2018",
"D169": "University Ranking THE 2019",
"D170": "University Ranking THE 2020",
"D171": "University Ranking THE 2021",
"D172": "University Ranking THE 2022",
"D173": "University Ranking THE 2023",
"D174": "University Ranking QS 2022",
"D190": "Student Academics Performance"
},
"Banking & Finance": {
"D6": "Bank Marketing 1",
"D7": "Bank Marketing 2",
"D30": "Adult Income",
"D32": "Telco Customer Churn",
"D35": "Credit Approval",
"D50": "Term Deposit Prediction",
"D96": "Credit Card Fraud",
"D188": "South German Credit",
"D193": "Credit Risk Classification",
"D195": "Credit Score Classification",
"D196": "Banking Classification"
},
"Science & Engineering": {
"D8": "Mushroom",
"D14": "Ionosphere",
"D33": "EEG Eye State",
"D37": "Steel Plates Faults",
"D39": "Fertility",
"D51": "Darwin",
"D58": "EEG Emotions",
"D81": "Predictive Maintenance",
"D84": "Oranges vs Grapefruit",
"D90": "Crystal System Li-ion",
"D183": "Drug Consumption",
"D49": "Air Pressure System Failures",
"D93": "Air Pressure System Failures",
"D185": "Toxicity",
"D186": "Toxicity"
},
"Social & Lifestyle": {
"D38": "Online Shoppers",
"D59": "Red Wine Quality",
"D60": "White Wine Quality",
"D88": "Airline Passenger Satisfaction",
"D94": "Go Emotions Google",
"D100": "Spotify East Asian",
"D125": "Suicide Rates",
"D182": "Obesity Levels",
"D187": "Blood Transfusion",
"D191": "Obesity Classification",
"D192": "Gender Classification",
"D194": "Happiness Classification",
"D42": "Airline customer Holiday Booking dataset"
},
"ML Benchmarks & Synthetic": {
"D34": "Spambase",
"D85": "Synthetic Binary",
"D89": "Naive Bayes Data",
"D175": "Monk's Problems 1",
"D176": "Monk's Problems 2",
"D177": "Monk's Problems 3",
"D178": "Monk's Problems 4",
"D179": "Monk's Problems 5",
"D180": "Monk's Problems 6"
},
"Other": {
"D83": "Paris Housing",
"D91": "Fake Bills",
"D197": "Star Classification"
}
}
cats1 = list(DATASET_CATEGORIES.keys())
import pandas as pd
from scipy.stats import ttest_rel
# Load results
df_1 = fmodel_results
# Define individual models
tree_models = ["RandomForest", "DecisionTree"]
non_tree_models = ["KNN", "SVM", "LogisticRegression"]
#models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
# Store all results
all_results = []
# For each metric
for metric in ["accuracy", "precision", "recall", "f1_score"]:
comparison_num = 1
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
# Compare each tree model with each non-tree model
for tree_model in models:
other_models = models.copy()
other_models.remove(tree_model)
if other_models == None:
break
for non_tree_model in other_models:
# Get data for both models across all datasets
tree_data = df_1[df_1['model'] == tree_model].set_index('dataset')[metric]
non_tree_data = df_1[df_1['model'] == non_tree_model].set_index('dataset')[metric]
# Align datasets (inner join - only datasets present for both models)
combined = pd.DataFrame({
'tree': tree_data,
'non_tree': non_tree_data
}).dropna()
if len(combined) < 2:
print(f"{comparison_num:<3} {tree_model:<20} {non_tree_model:<20} Insufficient data")
comparison_num += 1
continue
# Paired t-test
t_stat, p_val = ttest_rel(combined['tree'], combined['non_tree'])
# Calculate means and stds
mean1 = combined['tree'].mean()
mean2 = combined['non_tree'].mean()
std1 = combined['tree'].std()
std2 = combined['non_tree'].std()
n = len(combined)
sig = "< 0.001" if p_val < 0.001 else f"{p_val:.3f}"
print(f"{comparison_num:<3} {tree_model:<20} {non_tree_model:<20} {mean1:<10.5f} {mean2:<10.5f} {t_stat:<8.2f} {sig:<10} {'True' if p_val < 0.05 else 'False'}")
all_results.append({
'metric': metric,
'tree_model': tree_model,
'non_tree_model': non_tree_model,
'tree_mean': mean1,
'non_tree_mean': mean2,
'tree_std': std1,
'non_tree_std': std2,
'n_datasets': n,
't_statistic': t_stat,
'p_value': p_val
})
comparison_num += 1
models = other_models.copy()
results_df = pd.DataFrame(all_results)
significant_count = (results_df['p_value'] < 0.05).sum()
total_count = len(results_df)
# Save detailed results
#results_df.to_csv('pairwise_comparison_results.csv', index=False)
import pandas as pd
from scipy.stats import ttest_rel
sig1 = {}
for key in list(DATASET_CATEGORIES.keys()):
# Load results
df_1 = fmodel_results[fmodel_results["dataset"].isin(list(DATASET_CATEGORIES[key].keys()))]
# Define individual models
tree_models = ["RandomForest", "DecisionTree"]
non_tree_models = ["KNN", "SVM", "LogisticRegression"]
#models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
# Store all results
all_results = []
# For each metric
for metric in ["accuracy", "precision", "recall", "f1_score"]:
comparison_num = 1
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
# Compare each tree model with each non-tree model
for tree_model in models:
other_models = models.copy()
other_models.remove(tree_model)
if other_models == None:
break
for non_tree_model in other_models:
# Get data for both models across all datasets
tree_data = df_1[df_1['model'] == tree_model].set_index('dataset')[metric]
non_tree_data = df_1[df_1['model'] == non_tree_model].set_index('dataset')[metric]
# Align datasets (inner join - only datasets present for both models)
combined = pd.DataFrame({
'tree': tree_data,
'non_tree': non_tree_data
}).dropna()
if len(combined) < 2:
print(f"{comparison_num:<3} {tree_model:<20} {non_tree_model:<20} Insufficient data")
comparison_num += 1
continue
# Paired t-test
t_stat, p_val = ttest_rel(combined['tree'], combined['non_tree'])
# Calculate means and stds
mean1 = combined['tree'].mean()
mean2 = combined['non_tree'].mean()
std1 = combined['tree'].std()
std2 = combined['non_tree'].std()
n = len(combined)
sig = "< 0.001" if p_val < 0.001 else f"{p_val:.3f}"
all_results.append({
'metric': metric,
'tree_model': tree_model,
'non_tree_model': non_tree_model,
'tree_mean': mean1,
'non_tree_mean': mean2,
'tree_std': std1,
'non_tree_std': std2,
'n_datasets': n,
't_statistic': t_stat,
'p_value': p_val
})
comparison_num += 1
models = other_models.copy()
# Summary
print("\n" + "="*80)
print("SUMMARY")
print("="*80)
results_df4 = pd.DataFrame(all_results)
sig1[key] = results_df4
significant_count = (results_df4['p_value'] < 0.05).sum()
total_count = len(results_df4)
print(f"\nSignificant comparisons (p < 0.05): {significant_count}/{total_count}")
print(f"Tree models won in: {(results_df4['tree_mean'] > results_df4['non_tree_mean']).sum()} comparisons")
# Save detailed results
#results_df.to_csv('pairwise_comparison_results.csv', index=False)
print("\nDetailed results saved to: pairwise_comparison_results.csv")
sig1["AllDatasets"] = results_df
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import io
cats = [f"{key} ({len(values)})" for key, values in DATASET_CATEGORIES.items()]
datasel = cats.copy()
datasel.extend(["AllDatasets (150)"])
def compare_ind(med, game, ed, bank, sci, social, ml, other, models_to_compare=None):
metri = ["accuracy", "precision", "recall", "f1_score"]
figs = []
messages = []
dropdowns = [med, game, ed, bank, sci, social, ml, other]
selected_datasets_keys = []
for cat_name, dropdown_values in zip(cats1, dropdowns):
if dropdown_values:
selected_datasets_keys.extend(get_keys(DATASET_CATEGORIES[cat_name], dropdown_values))
if not models_to_compare:
messages.append("No models selected. Displaying results for all models.")
models_to_compare = models
dataset_id_to_name = {id: name for category_dict in DATASET_CATEGORIES.values() for id, name in category_dict.items()}
filtered_df_all_metrics = fmodel_results[
(fmodel_results["dataset"].isin(selected_datasets_keys)) &
(fmodel_results["model"].isin(models_to_compare))
].copy()
for metric in metri:
heatmap_data_metric = filtered_df_all_metrics.pivot_table(
index='dataset',
columns='model',
values=metric
)
heatmap_data_metric = heatmap_data_metric.rename(index=dataset_id_to_name)
print(heatmap_data_metric)
fig = plt.figure(figsize=(12, 8))
sns.heatmap(heatmap_data_metric, annot=True, cmap="crest", fmt=".3f", cbar=True)
plt.title(f"{metric} per Dataset and Model ({len(selected_datasets_keys)} datasets)")
plt.ylabel("Dataset")
plt.xlabel("Model")
plt.tight_layout()
figs.append(fig)
return figs[0], figs[1], figs[2], figs[3], "\n".join(messages) if messages else "Comparison complete."
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import io
def compare_groups(data_choice, model1, model2):
messages = []
data1 = sig1[data_choice.split(' (')[0]]
comparison_data = data1[
((data1['tree_model'] == model1) & (data1['non_tree_model'] == model2)) |
((data1['tree_model'] == model2) & (data1['non_tree_model'] == model1))
]
if comparison_data.empty:
fig = plt.figure(figsize=(10, 6))
plt.close(fig)
return fig, "No comparison data found for the selected models. \n Dont pick the same models."
plot_data = []
p_values_text = []
for index, row in comparison_data.iterrows():
metric = row['metric']
if row['tree_model'] == model1:
plot_data.append({'Metric': metric, 'Model': model1, 'Mean Score': row['tree_mean']})
plot_data.append({'Metric': metric, 'Model': model2, 'Mean Score': row['non_tree_mean']})
else: # if: row['tree_model'] == model2
plot_data.append({'Metric': metric, 'Model': model1, 'Mean Score': row['non_tree_mean']})
plot_data.append({'Metric': metric, 'Model': model2, 'Mean Score': row['tree_mean']})
p_values_text.append(f"{metric} p-value: {row['p_value']:.5f} (Significant (cutoff = 0.05): {'Yes' if row['p_value'] < 0.05 else 'No'})")
df_plot = pd.DataFrame(plot_data)
fig = plt.figure(figsize=(10, 6))
sns.barplot(x='Metric', y='Mean Score', hue='Model', data=df_plot)
plt.title(f'Comparison of {model1} vs {model2} Across Metrics')
plt.ylabel('Mean Score')
plt.xlabel('Metric')
plt.ylim(0, 1)
plt.legend(title='Model')
plt.tight_layout()
return fig, "\n".join(p_values_text)
import gradio as gr
models = ["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression"]
with gr.Blocks() as demo:
with gr.Tabs() as tabs:
with gr.TabItem("Compaes groups of datasets"):
with gr.Column():
with gr.Row():
gr.Markdown("# Comparing models")
model1 = gr.Dropdown(["RandomForest", "DecisionTree", "KNN", "SVM", "LogisticRegression", "AllModels(not for AllData)"], label = "Model 1")
model2 = gr.Dropdown(models, value = models[1], label = "Model 2")
dataset = gr.Dropdown(datasel, label = "Datasets")
compare_button = gr.Button("Compare")
with gr.Row():
gr.Markdown("## Results")
#output_plot = gr.Gallery(label="Results")
p_value = gr.Textbox(label="Model Output", lines = 4)
with gr.Row():
output_plot = gr.Plot(label="Results")
compare_button.click(
fn = compare_groups,
inputs =[dataset, model1, model2],
outputs = [output_plot, p_value]
)
with gr.TabItem("Compare Individual Tabs"):
with gr.Column():
with gr.Row():
gr.Markdown("# Comparing models")
model_choice = gr.Dropdown(models, label = "Model 1", multiselect = True)
med = gr.Dropdown(DATASET_CATEGORIES[cats1[0]].values(), label = cats1[0],multiselect=True)
game = gr.Dropdown(DATASET_CATEGORIES[cats1[1]].values(), label = cats1[1],multiselect=True)
ed = gr.Dropdown(DATASET_CATEGORIES[cats1[2]].values(), label = cats1[2],multiselect=True)
with gr.Row():
bank = gr.Dropdown(DATASET_CATEGORIES[cats1[3]].values(), label = cats1[3],multiselect=True)
sci = gr.Dropdown(DATASET_CATEGORIES[cats1[4]].values(), label = cats1[4],multiselect=True)
social = gr.Dropdown(DATASET_CATEGORIES[cats1[5]].values(), label = cats1[5],multiselect=True)
ml = gr.Dropdown(DATASET_CATEGORIES[cats1[6]].values(), label = cats1[6],multiselect=True)
other = gr.Dropdown(DATASET_CATEGORIES[cats1[7]].values(), label = cats1[7],multiselect=True)
compare_button = gr.Button("Compare")
with gr.Row():
#output_plot = gr.Gallery(label="Results")
p1 = gr.Plot(label="Results")
with gr.Row():
p2 = gr.Plot(label="Results")
with gr.Row():
p3 = gr.Plot(label="Results")
with gr.Row():
p4 = gr.Plot(label="Results")
with gr.Row():
p_value = gr.Textbox(label="Model Output", lines = 4)
compare_button.click(
fn = compare_ind,
inputs =[med, game, ed, bank, sci, social, ml, other, model_choice],
outputs = [p1, p2, p3, p4, p_value]
)
demo.launch(share=True, show_error=True)