Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

saherPervaiz commited on Jan 12, 2025

Commit

c8b56c0

verified ·

1 Parent(s): edf0043

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -26

app.py CHANGED Viewed

@@ -31,11 +31,18 @@ if uploaded_file is not None:
                 df[column] = le.fit_transform(df[column].astype(str))
         # Handle missing values (impute numerical columns with median and categorical columns with mode)
-        imputer = SimpleImputer(strategy='most_frequent')
-        df[df.select_dtypes(include=['object']).columns] = imputer.fit_transform(df.select_dtypes(include=['object']))
-        imputer = SimpleImputer(strategy='median')
-        df[df.select_dtypes(include=['number']).columns] = imputer.fit_transform(df.select_dtypes(include=['number']))
         # Remove outliers (using z-score method)
         z_scores = np.abs(stats.zscore(df.select_dtypes(include=['number'])))
@@ -54,6 +61,7 @@ if uploaded_file is not None:
         return df
     df_cleaned = clean_dataset(df)
     # Show the cleaned dataset
@@ -126,20 +134,14 @@ if uploaded_file is not None:
             st.markdown(f"**Model Performance Results**")
             st.dataframe(results_df)
-            # Download Image for Model Accuracy Plot
-            fig, ax = plt.subplots(figsize=(6, 4))
-            sns.barplot(x=['Accuracy' if is_classification else 'MSE'], y=[accuracy_score(y_test, y_pred) if is_classification else mean_squared_error(y_test, y_pred)], ax=ax)
-            st.pyplot(fig)
-            # Save and provide download option
-            fig.savefig("/tmp/model_accuracy.png")
-            with open("/tmp/model_accuracy.png", "rb") as f:
-                st.download_button(
-                    label="Download Accuracy Plot",
-                    data=f,
-                    file_name="model_accuracy.png",
-                    mime="image/png"
-                )
             # Option to download the cleaned dataset
             st.download_button(
                 label="Download Cleaned Dataset",
@@ -148,14 +150,6 @@ if uploaded_file is not None:
                 mime="text/csv"
             )
-            # Option to download model performance metrics (Results Table)
-            st.download_button(
-                label="Download Model Report",
-                data=results_df.to_csv(index=False),
-                file_name="model_report.csv",
-                mime="text/csv"
-            )
             # Download correlation heatmap
             st.subheader("Correlation Heatmap")
             correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()

                 df[column] = le.fit_transform(df[column].astype(str))
         # Handle missing values (impute numerical columns with median and categorical columns with mode)
+        # Handle categorical columns
+        categorical_columns = df.select_dtypes(include=['object']).columns
+        if len(categorical_columns) > 0:
+            imputer = SimpleImputer(strategy='most_frequent')
+            df[categorical_columns] = imputer.fit_transform(df[categorical_columns])
+        # Handle numerical columns
+        numerical_columns = df.select_dtypes(include=['number']).columns
+        if len(numerical_columns) > 0:
+            imputer = SimpleImputer(strategy='median')
+            df[numerical_columns] = imputer.fit_transform(df[numerical_columns])
         # Remove outliers (using z-score method)
         z_scores = np.abs(stats.zscore(df.select_dtypes(include=['number'])))
         return df
+    # Apply the clean_dataset function
     df_cleaned = clean_dataset(df)
     # Show the cleaned dataset
             st.markdown(f"**Model Performance Results**")
             st.dataframe(results_df)
+            # Option to download the model performance metrics (Results Table)
+            st.download_button(
+                label="Download Model Report",
+                data=results_df.to_csv(index=False),
+                file_name="model_report.csv",
+                mime="text/csv"
+            )
             # Option to download the cleaned dataset
             st.download_button(
                 label="Download Cleaned Dataset",
                 mime="text/csv"
             )
             # Download correlation heatmap
             st.subheader("Correlation Heatmap")
             correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()