Spaces:

saherPervaiz
/

Depression

Sleeping

App Files Files Community

saherPervaiz commited on Jan 14, 2025

Commit

34b1335

verified ·

1 Parent(s): 43d6671

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -3

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ import seaborn as sns
 from io import BytesIO
 # Streamlit app title
-st.title("Model Training with Metrics and Correlation Heatmap")
 # File uploader
 uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
@@ -52,10 +52,46 @@ if uploaded_file is not None:
             else:
                 df[col].fillna(df[col].mode()[0], inplace=True)
-    # Show cleaned dataset
-    st.write("Cleaned Dataset:")
     st.dataframe(df)
     # Correlation Heatmap
     st.subheader("Correlation Heatmap")
     corr = df.corr()
@@ -63,6 +99,32 @@ if uploaded_file is not None:
     sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", cbar=True)
     st.pyplot(plt)
     # Select target variable
     target = st.selectbox("Select Target Variable", df.columns)
     features = [col for col in df.columns if col != target]
@@ -102,6 +164,44 @@ if uploaded_file is not None:
             st.subheader("Classification Model Performance Metrics")
             st.dataframe(metrics_df)
         else:  # Regression
             st.subheader("Regression Model Training")
             regressors = {
@@ -131,5 +231,43 @@ if uploaded_file is not None:
             regression_metrics_df = pd.DataFrame(regression_metrics)
             st.subheader("Regression Model Performance Metrics")
             st.dataframe(regression_metrics_df)
     else:
         st.error("The target variable must contain at least two unique values for classification or regression. Please check your dataset.")

 from io import BytesIO
 # Streamlit app title
+st.title("Model Training with Outlier Removal, Metrics, and Correlation Heatmap")
 # File uploader
 uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
             else:
                 df[col].fillna(df[col].mode()[0], inplace=True)
+    # Remove outliers using the IQR method
+    st.write("Removing Outliers Using IQR:")
+    def remove_outliers_iqr(data, column):
+        Q1 = data[column].quantile(0.25)
+        Q3 = data[column].quantile(0.75)
+        IQR = Q3 - Q1
+        lower_bound = Q1 - 1.5 * IQR
+        upper_bound = Q3 + 1.5 * IQR
+        return data[(data[column] >= lower_bound) & (data[column] <= upper_bound)]
+    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
+    for col in numeric_cols:
+        original_count = len(df)
+        df = remove_outliers_iqr(df, col)
+        st.write(f"Removed outliers from **{col}**: {original_count - len(df)} rows removed.")
+    # Capping Extreme Values (based on 5% and 95% percentiles)
+    st.write("Handling Extreme Values (Capping):")
+    def cap_extreme_values(dataframe):
+        for col in dataframe.select_dtypes(include=[np.number]).columns:
+            lower_limit = dataframe[col].quantile(0.05)
+            upper_limit = dataframe[col].quantile(0.95)
+            dataframe[col] = np.clip(dataframe[col], lower_limit, upper_limit)
+        return dataframe
+    df = cap_extreme_values(df)
+    # Display dataset after cleaning
+    st.write("Dataset After Outlier Removal and Capping Extreme Values:")
     st.dataframe(df)
+    # Add clean data download option
+    st.subheader("Download Cleaned Dataset")
+    st.download_button(
+        label="Download Cleaned Dataset (CSV)",
+        data=df.to_csv(index=False),
+        file_name="cleaned_dataset.csv",
+        mime="text/csv"
+    )
     # Correlation Heatmap
     st.subheader("Correlation Heatmap")
     corr = df.corr()
     sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", cbar=True)
     st.pyplot(plt)
+    # Save heatmap as PNG
+    buf = BytesIO()
+    plt.savefig(buf, format="png")
+    buf.seek(0)
+    st.download_button(
+        label="Download Correlation Heatmap as PNG",
+        data=buf,
+        file_name="correlation_heatmap.png",
+        mime="image/png"
+    )
+    # Highlight highly correlated pairs
+    st.subheader("Highly Correlated Features")
+    high_corr = corr.abs().unstack().sort_values(ascending=False).drop_duplicates()
+    high_corr = high_corr[high_corr.index.get_level_values(0) != high_corr.index.get_level_values(1)]
+    high_corr_df = pd.DataFrame(high_corr, columns=["Correlation"])
+    st.dataframe(high_corr_df)
+    # Download correlation table as CSV
+    st.download_button(
+        label="Download Correlation Table (CSV)",
+        data=high_corr_df.to_csv(index=True),
+        file_name="correlation_table.csv",
+        mime="text/csv"
+    )
     # Select target variable
     target = st.selectbox("Select Target Variable", df.columns)
     features = [col for col in df.columns if col != target]
             st.subheader("Classification Model Performance Metrics")
             st.dataframe(metrics_df)
+            # Save metrics as PNG (table form)
+            fig, ax = plt.subplots(figsize=(8, 4))
+            ax.axis('tight')
+            ax.axis('off')
+            table = plt.table(cellText=metrics_df.values, colLabels=metrics_df.columns, cellLoc='center', loc='center')
+            table.auto_set_font_size(False)
+            table.set_fontsize(10)
+            table.auto_set_column_width(col=list(range(len(metrics_df.columns))))
+            buf = BytesIO()
+            fig.savefig(buf, format="png")
+            buf.seek(0)
+            st.download_button(
+                label="Download Classification Metrics Table as PNG",
+                data=buf,
+                file_name="classification_metrics_table.png",
+                mime="image/png"
+            )
+            # Visualization (Bar Graphs for Classification)
+            st.subheader("Classification Model Performance Metrics Graph")
+            metrics_df.set_index('Model', inplace=True)
+            ax = metrics_df.plot(kind='bar', figsize=(10, 6), colormap='coolwarm', rot=45)
+            plt.title("Classification Models - Performance Metrics")
+            plt.ylabel("Scores")
+            plt.xlabel("Models")
+            st.pyplot(plt)
+            # Download button for the bar graph
+            buf = BytesIO()
+            ax.figure.savefig(buf, format="png")
+            buf.seek(0)
+            st.download_button(
+                label="Download Classification Performance Graph as PNG",
+                data=buf,
+                file_name="classification_performance_graph.png",
+                mime="image/png"
+            )
         else:  # Regression
             st.subheader("Regression Model Training")
             regressors = {
             regression_metrics_df = pd.DataFrame(regression_metrics)
             st.subheader("Regression Model Performance Metrics")
             st.dataframe(regression_metrics_df)
+            # Save metrics as PNG (table form)
+            fig, ax = plt.subplots(figsize=(8, 4))
+            ax.axis('tight')
+            ax.axis('off')
+            table = plt.table(cellText=regression_metrics_df.values, colLabels=regression_metrics_df.columns, cellLoc='center', loc='center')
+            table.auto_set_font_size(False)
+            table.set_fontsize(10)
+            table.auto_set_column_width(col=list(range(len(regression_metrics_df.columns))))
+            buf = BytesIO()
+            fig.savefig(buf, format="png")
+            buf.seek(0)
+            st.download_button(
+                label="Download Regression Metrics Table as PNG",
+                data=buf,
+                file_name="regression_metrics_table.png",
+                mime="image/png"
+            )
+            # Visualization (Bar Graphs for Regression)
+            st.subheader("Regression Model Performance Metrics Graph")
+            regression_metrics_df.set_index('Model', inplace=True)
+            ax = regression_metrics_df.plot(kind='bar', figsize=(10, 6), colormap='coolwarm', rot=45)
+            plt.title("Regression Models - Performance Metrics")
+            plt.ylabel("Scores")
+            plt.xlabel("Models")
+            st.pyplot(plt)
+            # Download button for the bar graph
+            buf = BytesIO()
+            ax.figure.savefig(buf, format="png")
+            buf.seek(0)
+            st.download_button(
+                label="Download Regression Performance Graph as PNG",
+                data=buf,
+                file_name="regression_performance_graph.png",
+                mime="image/png"
+            )
     else:
         st.error("The target variable must contain at least two unique values for classification or regression. Please check your dataset.")