Spaces:

AseemD
/

InterpretableML

Sleeping

App Files Files Community

AseemD commited on Jan 22, 2025

Commit

a3ad66b

verified ·

1 Parent(s): a6fde97

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -110

app.py CHANGED Viewed

@@ -31,10 +31,6 @@ def load_dataset(name):
         data['self_employed'] = data['self_employed'].map({'Yes': 1, 'No': 0})
         data['loan_status'] = data['loan_status'].map({'Approved': 1, 'Rejected': 0})
-    elif name == "NLP":
-        # Replace with your dataset and all the preprocessing steps
-        data = pd.read_csv("datasets/nlp_dataset.csv")
     elif name == "Healthcare":
         data = pd.read_csv("datasets/healthcare_dataset.csv")
         data.columns = data.columns.str.strip()
@@ -57,8 +53,6 @@ def load_dataset(name):
 def load_models(dataset_name):
     if dataset_name == "Financial":
         return joblib.load("models/loan_models.pkl")
-    elif dataset_name == "NLP":
-        return joblib.load("models/nlp_models.pkl")
     elif dataset_name == "Healthcare":
         model_path = "models/healthcare_models.pkl"
         model = joblib.load(model_path)
@@ -78,7 +72,7 @@ def main():
     if dataset == "Financial":
         # 1. Load the dataset
         X = load_dataset(dataset)
-        st.write(f"{dataset} Dataset Sample")
         st.write(X.head())
         # 2. Select interpretability method
@@ -99,14 +93,14 @@ def main():
             # Visualize SHAP values
             idx = st.slider("Select Test Instance", 0, len(X) - 1, 0)
             st.write("SHAP Force Plot for a Single Prediction")
-            shap.force_plot(explainer.expected_value, shap_values[idx].values, X.iloc[idx], matplotlib=True, show=False)
-            st.pyplot(bbox_inches='tight')
             st.write("SHAP Summary Plot")
-            shap.summary_plot(shap_values, X, show=False)
-            st.pyplot(bbox_inches='tight')
             st.write("SHAP Bar Plot")
-            shap.summary_plot(shap_values, X, plot_type="bar", show=False)
-            st.pyplot(bbox_inches='tight')
         elif method == "LIME":
             st.subheader("3. Interpretability using LIME")
@@ -146,103 +140,6 @@ def main():
             exp.as_pyplot_figure()
             st.pyplot(bbox_inches='tight')
-    # Perform different interpretability methods on the second dataset
-    elif dataset == "Healthcare":
-        data = load_dataset(dataset)
-        st.write(f"{dataset} Dataset Sample")
-        st.write(data.head())
-        st.subheader("2. Select an Interpretability Method")
-        method = st.selectbox("Choose an interpretability method:", ["LIME", "SHAP"])
-        loaded_models = load_models(dataset)
-        model = loaded_models.get('Random Forest')
-        idx = st.slider("Select Test Instance", 0, 24031, 0)
-        if method == "SHAP":
-            st.subheader("3. Interpretability using SHAP")
-            loaded_models = load_models(dataset)
-            model = loaded_models.get('Random Forest')
-            if model and isinstance(model, imbPipeline):
-                st.write("Model loaded and is a valid pipeline.")
-                try:
-                    if 'classifier' in model.named_steps:
-                        tree_model = model.named_steps['classifier']
-                        if isinstance(tree_model, RandomForestClassifier):
-                            explainer = shap.TreeExplainer(tree_model)
-                            X_shap = data.drop(columns=["diabetes"])
-                            st.write(f"Data shape for SHAP: {X_shap.shape}")
-                            sample_size = 1000
-                            X_sample = X_shap.sample(n=sample_size, random_state=42)
-                            st.write(f"Using a sample of {sample_size} instances for SHAP analysis.")
-                            shap_values = explainer.shap_values(X_sample)
-                            st.write(f"SHAP values shape: {np.array(shap_values).shape}")
-                            idx = st.slider("Select Test Instance", 0, len(X_sample) - 1, 0)
-                            st.write("SHAP Force Plot for a Single Prediction")
-                            shap.force_plot(explainer.expected_value[1], shap_values[1][idx, :], X_sample.iloc[idx, :], matplotlib=True, show=False)
-                            st.pyplot(bbox_inches='tight')
-                            st.write("SHAP Summary Plot")
-                            shap.summary_plot(shap_values[1], X_sample, show=False)
-                            st.pyplot(bbox_inches='tight')
-                            st.write("SHAP Bar Plot")
-                            shap.summary_plot(shap_values[1], X_sample, plot_type="bar", show=False)
-                            st.pyplot(bbox_inches='tight')
-                        else:
-                            st.error("The classifier in the pipeline is not a RandomForest.")
-                    else:
-                        st.error("RandomForest classifier not found in the pipeline.")
-                except Exception as e:
-                    st.error(f"Error during SHAP analysis: {e}")
-            else:
-                st.error("Model could not be loaded or is not a valid RandomForest pipeline.")
-        elif method == "LIME":
-            st.subheader("3. Interpretability using LIME")
-            model_choice = st.radio("Select Model", ["Random Forest"])
-            model = loaded_models.get('Random Forest')
-            sns.set_style('whitegrid')
-            x = data.drop(columns=["diabetes"])
-            y = data["diabetes"]
-            X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)
-            target = ['Non-Diabetic', 'Diabetic']
-            explainer = LimeTabularExplainer(
-                X_train.values,
-                feature_names=X_train.columns.tolist(),
-                class_names=target,
-                verbose=True,
-                mode='classification'
-                )
-            instance = X_test.iloc[idx].values.reshape(1, -1)
-            def model_predict(instance):
-                return model.predict_proba(pd.DataFrame(instance, columns=X_train.columns))
-            exp = explainer.explain_instance(
-                data_row=instance[0],
-                predict_fn=model_predict
-                )
-            st.write("LIME Explanation")
-            exp.save_to_file('lime_explanation.html')
-            HtmlFile = open('lime_explanation.html', 'r', encoding='utf-8')
-            components.html(HtmlFile.read(), height=600)
-            st.write('True label:', target[y_test.iloc[idx]])
-            st.write("Effect of Predictors")
-            exp.as_pyplot_figure()
-            st.pyplot(bbox_inches='tight')
 if __name__ == "__main__":
     main()

         data['self_employed'] = data['self_employed'].map({'Yes': 1, 'No': 0})
         data['loan_status'] = data['loan_status'].map({'Approved': 1, 'Rejected': 0})
     elif name == "Healthcare":
         data = pd.read_csv("datasets/healthcare_dataset.csv")
         data.columns = data.columns.str.strip()
 def load_models(dataset_name):
     if dataset_name == "Financial":
         return joblib.load("models/loan_models.pkl")
     elif dataset_name == "Healthcare":
         model_path = "models/healthcare_models.pkl"
         model = joblib.load(model_path)
     if dataset == "Financial":
         # 1. Load the dataset
         X = load_dataset(dataset)
+        st.write(f"Loan Approval Dataset Sample")
         st.write(X.head())
         # 2. Select interpretability method
             # Visualize SHAP values
             idx = st.slider("Select Test Instance", 0, len(X) - 1, 0)
             st.write("SHAP Force Plot for a Single Prediction")
+            fig = shap.force_plot(explainer.expected_value, shap_values[idx].values, X.iloc[idx], matplotlib=True, show=False)
+            st.pyplot(fig, bbox_inches='tight')
             st.write("SHAP Summary Plot")
+            fig =shap.summary_plot(shap_values, X, show=False)
+            st.pyplot(fig, bbox_inches='tight')
             st.write("SHAP Bar Plot")
+            fig =shap.summary_plot(shap_values, X, plot_type="bar", show=False)
+            st.pyplot(fig, bbox_inches='tight')
         elif method == "LIME":
             st.subheader("3. Interpretability using LIME")
             exp.as_pyplot_figure()
             st.pyplot(bbox_inches='tight')
 if __name__ == "__main__":
     main()