saherPervaiz commited on
Commit
c8b56c0
·
verified ·
1 Parent(s): edf0043

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -26
app.py CHANGED
@@ -31,11 +31,18 @@ if uploaded_file is not None:
31
  df[column] = le.fit_transform(df[column].astype(str))
32
 
33
  # Handle missing values (impute numerical columns with median and categorical columns with mode)
34
- imputer = SimpleImputer(strategy='most_frequent')
35
- df[df.select_dtypes(include=['object']).columns] = imputer.fit_transform(df.select_dtypes(include=['object']))
36
 
37
- imputer = SimpleImputer(strategy='median')
38
- df[df.select_dtypes(include=['number']).columns] = imputer.fit_transform(df.select_dtypes(include=['number']))
 
 
 
 
 
 
 
 
 
39
 
40
  # Remove outliers (using z-score method)
41
  z_scores = np.abs(stats.zscore(df.select_dtypes(include=['number'])))
@@ -54,6 +61,7 @@ if uploaded_file is not None:
54
 
55
  return df
56
 
 
57
  df_cleaned = clean_dataset(df)
58
 
59
  # Show the cleaned dataset
@@ -126,20 +134,14 @@ if uploaded_file is not None:
126
  st.markdown(f"**Model Performance Results**")
127
  st.dataframe(results_df)
128
 
129
- # Download Image for Model Accuracy Plot
130
- fig, ax = plt.subplots(figsize=(6, 4))
131
- sns.barplot(x=['Accuracy' if is_classification else 'MSE'], y=[accuracy_score(y_test, y_pred) if is_classification else mean_squared_error(y_test, y_pred)], ax=ax)
132
- st.pyplot(fig)
133
- # Save and provide download option
134
- fig.savefig("/tmp/model_accuracy.png")
135
- with open("/tmp/model_accuracy.png", "rb") as f:
136
- st.download_button(
137
- label="Download Accuracy Plot",
138
- data=f,
139
- file_name="model_accuracy.png",
140
- mime="image/png"
141
- )
142
-
143
  # Option to download the cleaned dataset
144
  st.download_button(
145
  label="Download Cleaned Dataset",
@@ -148,14 +150,6 @@ if uploaded_file is not None:
148
  mime="text/csv"
149
  )
150
 
151
- # Option to download model performance metrics (Results Table)
152
- st.download_button(
153
- label="Download Model Report",
154
- data=results_df.to_csv(index=False),
155
- file_name="model_report.csv",
156
- mime="text/csv"
157
- )
158
-
159
  # Download correlation heatmap
160
  st.subheader("Correlation Heatmap")
161
  correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()
 
31
  df[column] = le.fit_transform(df[column].astype(str))
32
 
33
  # Handle missing values (impute numerical columns with median and categorical columns with mode)
 
 
34
 
35
+ # Handle categorical columns
36
+ categorical_columns = df.select_dtypes(include=['object']).columns
37
+ if len(categorical_columns) > 0:
38
+ imputer = SimpleImputer(strategy='most_frequent')
39
+ df[categorical_columns] = imputer.fit_transform(df[categorical_columns])
40
+
41
+ # Handle numerical columns
42
+ numerical_columns = df.select_dtypes(include=['number']).columns
43
+ if len(numerical_columns) > 0:
44
+ imputer = SimpleImputer(strategy='median')
45
+ df[numerical_columns] = imputer.fit_transform(df[numerical_columns])
46
 
47
  # Remove outliers (using z-score method)
48
  z_scores = np.abs(stats.zscore(df.select_dtypes(include=['number'])))
 
61
 
62
  return df
63
 
64
+ # Apply the clean_dataset function
65
  df_cleaned = clean_dataset(df)
66
 
67
  # Show the cleaned dataset
 
134
  st.markdown(f"**Model Performance Results**")
135
  st.dataframe(results_df)
136
 
137
+ # Option to download the model performance metrics (Results Table)
138
+ st.download_button(
139
+ label="Download Model Report",
140
+ data=results_df.to_csv(index=False),
141
+ file_name="model_report.csv",
142
+ mime="text/csv"
143
+ )
144
+
 
 
 
 
 
 
145
  # Option to download the cleaned dataset
146
  st.download_button(
147
  label="Download Cleaned Dataset",
 
150
  mime="text/csv"
151
  )
152
 
 
 
 
 
 
 
 
 
153
  # Download correlation heatmap
154
  st.subheader("Correlation Heatmap")
155
  correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()