import gradio as gr import main import numpy as np import pandas as pd from main import clf_rf, clf_log, accuracy_score_rf, accuracy_score_lr, brier_score_rf, brier_score_lr, roc_rf, roc_lr, logistic, rf_clf, encoder, scaler def eda(Graphs): match Graphs: case "Customer Churn": return gr.Image("graphs/EDAGraphs/Churn.png") case "Contract": return gr.Image("graphs/EDAGraphs/Contract.png") case "Dependents": return gr.Image("graphs/EDAGraphs/Dependents.png") case "Device Protection": return gr.Image("graphs/EDAGraphs/DeviceProtection.png") case "Heatmap": return gr.Image("graphs/EDAGraphs/Heatmap.png") case "Monthly Charges": return gr.Image("graphs/EDAGraphs/MonthlyCharges.png") case "Online Backup": return gr.Image("graphs/EDAGraphs/OnlineBackup.png") case "Online Security": return gr.Image("graphs/EDAGraphs/OnlineSecurity.png") case "Paperless Billing": return gr.Image("graphs/EDAGraphs/PaperlessBilling.png") case "Partner": return gr.Image("graphs/EDAGraphs/Partner.png") case "Payment Method": return gr.Image("graphs/EDAGraphs/PaymentMethod.png") case "Senior Citizen": return gr.Image("graphs/EDAGraphs/SeniorCitizen.png") case "Tech Support": return gr.Image("graphs/EDAGraphs/TechSupport.png") case "Tenure": return gr.Image("graphs/EDAGraphs/tenure.png") def result(Graphs): match Graphs: case "Correlation": inf = """ Inferences: - Churn is perfectly correlated with itself (1.0). - Tenure and Contract have strong negative correlation with Churn. - MonthlyCharges and PaperlessBilling have moderate positive correlation with Churn. - Gender, PhoneService, and MultipleLines have near-zero correlation. - OnlineSecurity, TechSupport, and DeviceProtection have moderate negative correlation. """ return [gr.Image("graphs/EDAGraphs/Correlation.png"), inf] case "Confusion Matrix for Random Forest": inf = """ Inferences: - True Positives (181) are lower than Logistic Regression’s (215), indicating worse recall for churn. - False Negatives (192) are higher – more churners are missed. - False Positives (104) are slightly higher than logistic regression’s — marginally more false alarms. - True Negatives (932) are comparable – performs similarly for non-churn cases. - Overall: Slightly poorer at identifying churn compared to Logistic Regression in this case. """ return [gr.Image("graphs/OutputGraphs/ConfusionMatrixRandomForest.png"), inf] case "Confusion Matrix for Logistic regression": inf = """ Inferences: - True Negatives (936) and True Positives (215) indicate good performance in correctly identifying both classes. - False Negatives (158): A moderate number of actual churns are missed, which could be critical in business decisions. - False Positives (100): Some non-churning customers are predicted to churn — might lead to unnecessary retention efforts. - Model Bias: Slight bias towards predicting the majority class (non-churn). - Overall: Reasonable balance, but recall for churn could be improved. """ return [gr.Image("graphs/OutputGraphs/ConfusionMatrixLogistic.png"), inf] case "SHAP analysis for Random Forest": inf = """ Inferences: - Bimodal Distribution: Indicates two clear groups — likely senior citizens and non-seniors with distinct behavior patterns. - Near-Zero SHAP values: SeniorCitizen has limited standalone predictive power. - Interaction: The variable may influence predictions when combined with other features (e.g., internet service or contract). - Red vs Blue Dots: Represents SHAP values across classes — they’re symmetric, confirming weak influence. - Overall: SeniorCitizen is not a key predictor by itself in this model. """ return [gr.Image("graphs/OutputGraphs/SHAP_RandomForest_Summary.png"), inf] case "SHAP analysis for Logistic Regression": inf = """ Inferences: - Top Feature: tenure is the most influential in predicting churn — lower tenure likely increases churn risk. - MonthlyCharges & Contract also have strong effects — customers on monthly or expensive plans may churn more. - Security-related services (e.g., OnlineSecurity, TechSupport) have moderate influence — presence may reduce churn. - PaperlessBilling and OnlineBackup show notable contributions, possibly associated with digital-savvy customers. - Features like DeviceProtection, Partner, and PaymentMethod have minimal impact. """ return [gr.Image("graphs/OutputGraphs/SHAP_Logistic_Summary.png"), inf] def metrics(Algorithms): match Algorithms: case "Random Forest": df = pd.DataFrame(clf_rf) df = df.drop(columns = ['accuracy', 'macro avg', 'weighted avg']).T df = df.reset_index().rename(columns={'index': 'class'}) df_clf = gr.DataFrame( value = df ) df_acc = gr.DataFrame( headers = ['Accuracy Score', 'Brier Score', 'ROC Score'], value = [list([accuracy_score_rf, brier_score_rf, roc_rf])], ) return df_clf, df_acc case "Logistic Regression": df = pd.DataFrame(clf_log) df = df.drop(columns = ['accuracy', 'macro avg', 'weighted avg']).T df = df.reset_index().rename(columns={'index': 'class'}) df_clf = gr.DataFrame( value = df ) df_acc = gr.DataFrame( headers = ['Accuracy Score', 'Brier Score', 'ROC Score'], value = [list([accuracy_score_lr, brier_score_lr, roc_lr])], ) return df_clf, df_acc def predictChurn(model, filename): dataset = pd.read_csv(filename) customers = dataset["customerID"] dataset.drop(columns=['customerID'], inplace=True) for column in dataset.select_dtypes(include=['int64', 'float64']).columns: dataset[column] = scaler.fit_transform(dataset[column].values.reshape(-1, 1)) for column in dataset.select_dtypes(include=['object']).columns: dataset[column] = encoder.fit_transform(dataset[column]) dataset.drop(columns=['gender', 'PhoneService', 'MultipleLines', 'InternetService', 'StreamingTV', 'StreamingMovies', 'TotalCharges'], inplace=True) match model: case "Logistic Regression": model = logistic case "Random Forest": model = rf_clf y_predictions = model.predict(dataset) print(y_predictions) d = {0: "No Churn", 1: "Churn"} l = zip(customers, y_predictions) df = pd.DataFrame(l, columns= ["Customer ID", "Output"]) for i in range(len(df["Customer ID"])): output = d[y_predictions[i]] df["Output"][i] = output return gr.DataFrame(value = df) with gr.Blocks() as Output: gr.Markdown("View Exploratory data Analysis and Output") with gr.Tab("EDA Graphs"): eda_input = gr.Radio(["Customer Churn", "Contract", "Dependents", "Device Protection", "Heatmap", "Monthly Charges", "Online Backup", "Online Security", "Paperless Billing", "Partner", "Payment Method", "Senior Citizen", "Tech Support", "Tenure"], show_label= False) eda_output = gr.Image() eda_input.change(fn = eda, inputs= eda_input, outputs= eda_output) with gr.Tab("Output Graphs"): result_input = gr.Radio(["Correlation", "Confusion Matrix for Random Forest", "Confusion Matrix for Logistic regression", "SHAP analysis for Random Forest", "SHAP analysis for Logistic Regression"], show_label = False) result_output = [gr.Image(), gr.Markdown()] result_input.change(fn = result, inputs=result_input, outputs = result_output) with gr.Tab("Performance Metrics"): algorithm = gr.Radio(["Random Forest", "Logistic Regression"], show_label= False) metrics_output = [gr.DataFrame(), gr.DataFrame()] algorithm.change(fn = metrics, inputs = algorithm, outputs = metrics_output) with gr.Tab("Predict Live"): gr.Markdown("# Predict Churn") model = gr.Radio(["Logistic Regression", "Random Forest"], show_label = False) file = gr.File() dataset = gr.UploadButton("Upload Dataset(as CSV file)", file_count = "single") predict = gr.Button("Predict") op_df = gr.DataFrame(headers = ["Customer ID", "Output"]) op_md = gr.Markdown("# Predicted Churns") clear = gr.ClearButton(components = [model, file, dataset, op_df]) dataset.upload(lambda file: file, dataset, file) predict.click(fn = predictChurn, inputs = [model, dataset], outputs = op_df) clear.click() Output.launch()