Spaces:

dperales
/

Fraud_Detection_Pycaret

Runtime error

App Files Files Community

dperales commited on Apr 8, 2023

Commit

39de30e

1 Parent(s): 1715696

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -1

app.py CHANGED Viewed

@@ -93,6 +93,7 @@ if page == "Clustering Analysis":
         else:
             insurance_claims = pd.read_csv(selected_csv)
         insurance_claims.describe().T
         cat_col = insurance_claims.select_dtypes(include=['object']).columns
@@ -104,6 +105,7 @@ if page == "Clustering Analysis":
         # Create a Matplotlib figure
         fig, ax = plt.subplots(figsize=(12, 8))
         # Create a heatmap using seaborn
         sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
         # Set the title for the heatmap
         ax.set_title('Correlation Heatmap')
@@ -133,15 +135,19 @@ if page == "Clustering Analysis":
                 cluster_summary = cluster_model_2.groupby('Cluster').agg(['count', 'mean', 'median', 'min', 'max',
                                                                              'std', 'var', 'sum', ('quantile_25', lambda x: x.quantile(0.25)),
                                                                              ('quantile_75', lambda x: x.quantile(0.75)), 'skew'])
                 cluster_summary
                 cluster_model_2
                 # all_metrics = get_metrics()
                 # all_metrics
                 cluster_results = pull()
                 cluster_results
                 # plot pca cluster plot
                 plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
@@ -160,6 +166,20 @@ if page == "Clustering Analysis":
                 if selected_model != 'ap':
                     plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')
 elif page == "Anomaly Detection":
     st.header('Anomaly Detection')
@@ -221,12 +241,29 @@ elif page == "Anomaly Detection":
                 # train model
                 anomaly_model = create_model(selected_model)
                 anomaly_model_2 = assign_model(anomaly_model)
                 anomaly_model_2
                 anomaly_results = pull()
                 anomaly_results
                 # plot
                 plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
-                plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')

         else:
             insurance_claims = pd.read_csv(selected_csv)
+        st.header("Inference Description")
         insurance_claims.describe().T
         cat_col = insurance_claims.select_dtypes(include=['object']).columns
         # Create a Matplotlib figure
         fig, ax = plt.subplots(figsize=(12, 8))
         # Create a heatmap using seaborn
+        st.header("Heat Map")
         sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
         # Set the title for the heatmap
         ax.set_title('Correlation Heatmap')
                 cluster_summary = cluster_model_2.groupby('Cluster').agg(['count', 'mean', 'median', 'min', 'max',
                                                                              'std', 'var', 'sum', ('quantile_25', lambda x: x.quantile(0.25)),
                                                                              ('quantile_75', lambda x: x.quantile(0.75)), 'skew'])
+                st.header("Cluster Summary")
                 cluster_summary
+                st.header("Assign Model")
                 cluster_model_2
                 # all_metrics = get_metrics()
                 # all_metrics
+                st.header("Clustering Metrics")
                 cluster_results = pull()
                 cluster_results
+                st.header("Clustering Plots")
                 # plot pca cluster plot
                 plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
                 if selected_model != 'ap':
                     plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')
+                # Create a Classification Model to extract feature importance
+                st.header("Feature Importance")
+                from pycaret.classification import *
+                s = setup(cluster_model_2, target = 'Cluster')
+                lr = create_model('lr')
+                # this is how you can recreate the table
+                feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
+                # sort by feature importance value and filter top 10
+                feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
+                # Display the filtered table in Streamlit
+                # st.dataframe(feat_imp)
+                # Display the filtered table as a bar chart in Streamlit
+                st.bar_chart(feat_imp.set_index('Feature'))
 elif page == "Anomaly Detection":
     st.header('Anomaly Detection')
                 # train model
                 anomaly_model = create_model(selected_model)
+                st.header("Assign Model")
                 anomaly_model_2 = assign_model(anomaly_model)
                 anomaly_model_2
+                st.header("Anomaly Metrics")
                 anomaly_results = pull()
                 anomaly_results
                 # plot
+                st.header("Anomaly Plots")
                 plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
+                plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')
+                # Create a Classification Model to extract feature importance
+                st.header("Feature Importance")
+                from pycaret.classification import *
+                s = setup(anomaly_model_2, target = 'Anomaly')
+                lr = create_model('lr')
+                # this is how you can recreate the table
+                feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
+                # sort by feature importance value and filter top 10
+                feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
+                # Display the filtered table in Streamlit
+                # st.dataframe(feat_imp)
+                # Display the filtered table as a bar chart in Streamlit
+                st.bar_chart(feat_imp.set_index('Feature'))