saherPervaiz commited on
Commit
d6bf5be
·
verified ·
1 Parent(s): d0e8c92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -17
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import dask.dataframe as dd
4
  from sklearn.model_selection import train_test_split
5
  from sklearn.preprocessing import LabelEncoder
6
  from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
@@ -16,18 +15,15 @@ import seaborn as sns
16
  from io import BytesIO
17
 
18
  # File uploader
19
- st.title("Model Training with Metrics and Correlation Heatmap (Optimized for Large Datasets)")
20
  uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
21
 
22
  if uploaded_file is not None:
23
- # Use Dask for large datasets
24
- st.write("Loading dataset...")
25
- df = dd.read_csv(uploaded_file).compute() # Convert Dask DataFrame to Pandas DataFrame for further processing
26
- st.success("Dataset loaded successfully!")
27
-
28
- # Show a preview of the dataset
29
- st.write("Dataset Preview:")
30
- st.dataframe(df.head(100)) # Display only the first 100 rows for better performance
31
 
32
  # Convert categorical (str) data to numerical
33
  st.write("Converting Categorical Columns to Numerical Values:")
@@ -36,12 +32,12 @@ if uploaded_file is not None:
36
  for col in df.columns:
37
  if df[col].dtype == 'object' or len(df[col].unique()) <= 10:
38
  st.write(f"Encoding Column: **{col}**")
39
- df[col] = label_encoder.fit_transform(df[col].astype(str))
40
 
41
  # Display the dataset after conversion
42
- st.write("Dataset After Conversion (Preview):")
43
- st.dataframe(df.head(100)) # Display a preview of the converted dataset
44
-
45
  # Handle Null Values (Missing Data)
46
  st.write("Handling Missing (Null) Values:")
47
  fill_method = st.selectbox("Choose how to handle missing values", ["Drop rows", "Fill with mean/median"])
@@ -76,8 +72,8 @@ if uploaded_file is not None:
76
  df = cap_extreme_values(df)
77
 
78
  # Show cleaned dataset
79
- st.write("Cleaned Dataset (Preview):")
80
- st.dataframe(df.head(100)) # Display a preview of the cleaned dataset
81
 
82
  # Add clean data download option
83
  st.subheader("Download Cleaned Dataset")
@@ -111,7 +107,7 @@ if uploaded_file is not None:
111
  high_corr = corr.abs().unstack().sort_values(ascending=False).drop_duplicates()
112
  high_corr = high_corr[high_corr.index.get_level_values(0) != high_corr.index.get_level_values(1)]
113
  high_corr_df = pd.DataFrame(high_corr)
114
- st.write(high_corr_df.head(10)) # Show top 10 highly correlated pairs
115
 
116
  target = st.selectbox("Select Target Variable", df.columns)
117
  features = [col for col in df.columns if col != target]
@@ -148,6 +144,44 @@ if uploaded_file is not None:
148
  st.subheader("Classification Model Performance Metrics")
149
  st.dataframe(metrics_df)
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  else: # Continuous target (regression)
152
  st.subheader("Regression Model Training")
153
  regressors = {
@@ -175,3 +209,41 @@ if uploaded_file is not None:
175
  regression_metrics_df = pd.DataFrame(regression_metrics)
176
  st.subheader("Regression Model Performance Metrics")
177
  st.dataframe(regression_metrics_df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.preprocessing import LabelEncoder
5
  from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 
15
  from io import BytesIO
16
 
17
  # File uploader
18
+ st.title("Model Training with Metrics and Correlation Heatmap")
19
  uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
20
 
21
  if uploaded_file is not None:
22
+ df = pd.read_csv(uploaded_file)
23
+
24
+ # Show the dataset
25
+ st.write("Dataset:")
26
+ st.dataframe(df)
 
 
 
27
 
28
  # Convert categorical (str) data to numerical
29
  st.write("Converting Categorical Columns to Numerical Values:")
 
32
  for col in df.columns:
33
  if df[col].dtype == 'object' or len(df[col].unique()) <= 10:
34
  st.write(f"Encoding Column: **{col}**")
35
+ df[col] = label_encoder.fit_transform(df[col])
36
 
37
  # Display the dataset after conversion
38
+ st.write("Dataset After Conversion:")
39
+ st.dataframe(df)
40
+
41
  # Handle Null Values (Missing Data)
42
  st.write("Handling Missing (Null) Values:")
43
  fill_method = st.selectbox("Choose how to handle missing values", ["Drop rows", "Fill with mean/median"])
 
72
  df = cap_extreme_values(df)
73
 
74
  # Show cleaned dataset
75
+ st.write("Cleaned Dataset:")
76
+ st.dataframe(df)
77
 
78
  # Add clean data download option
79
  st.subheader("Download Cleaned Dataset")
 
107
  high_corr = corr.abs().unstack().sort_values(ascending=False).drop_duplicates()
108
  high_corr = high_corr[high_corr.index.get_level_values(0) != high_corr.index.get_level_values(1)]
109
  high_corr_df = pd.DataFrame(high_corr)
110
+ st.write(high_corr_df)
111
 
112
  target = st.selectbox("Select Target Variable", df.columns)
113
  features = [col for col in df.columns if col != target]
 
144
  st.subheader("Classification Model Performance Metrics")
145
  st.dataframe(metrics_df)
146
 
147
+ # Save metrics as PNG (table form)
148
+ fig, ax = plt.subplots(figsize=(8, 4))
149
+ ax.axis('tight')
150
+ ax.axis('off')
151
+ table = plt.table(cellText=metrics_df.values, colLabels=metrics_df.columns, cellLoc='center', loc='center')
152
+ table.auto_set_font_size(False)
153
+ table.set_fontsize(10)
154
+ table.auto_set_column_width(col=list(range(len(metrics_df.columns))))
155
+ buf = BytesIO()
156
+ fig.savefig(buf, format="png")
157
+ buf.seek(0)
158
+ st.download_button(
159
+ label="Download Classification Metrics Table as PNG",
160
+ data=buf,
161
+ file_name="classification_metrics_table.png",
162
+ mime="image/png"
163
+ )
164
+
165
+ # Visualization (Bar Graphs for Classification)
166
+ st.subheader("Classification Model Performance Metrics Graph")
167
+ metrics_df.set_index('Model', inplace=True)
168
+ ax = metrics_df.plot(kind='bar', figsize=(10, 6), colormap='coolwarm', rot=45)
169
+ plt.title("Classification Models - Performance Metrics")
170
+ plt.ylabel("Scores")
171
+ plt.xlabel("Models")
172
+ st.pyplot(plt)
173
+
174
+ # Download button for the bar graph
175
+ buf = BytesIO()
176
+ ax.figure.savefig(buf, format="png")
177
+ buf.seek(0)
178
+ st.download_button(
179
+ label="Download Classification Performance Graph as PNG",
180
+ data=buf,
181
+ file_name="classification_performance_graph.png",
182
+ mime="image/png"
183
+ )
184
+
185
  else: # Continuous target (regression)
186
  st.subheader("Regression Model Training")
187
  regressors = {
 
209
  regression_metrics_df = pd.DataFrame(regression_metrics)
210
  st.subheader("Regression Model Performance Metrics")
211
  st.dataframe(regression_metrics_df)
212
+
213
+ # Save metrics as PNG (table form)
214
+ fig, ax = plt.subplots(figsize=(8, 4))
215
+ ax.axis('tight')
216
+ ax.axis('off')
217
+ table = plt.table(cellText=regression_metrics_df.values, colLabels=regression_metrics_df.columns, cellLoc='center', loc='center')
218
+ table.auto_set_font_size(False)
219
+ table.set_fontsize(10)
220
+ table.auto_set_column_width(col=list(range(len(regression_metrics_df.columns))))
221
+ buf = BytesIO()
222
+ fig.savefig(buf, format="png")
223
+ buf.seek(0)
224
+ st.download_button(
225
+ label="Download Regression Metrics Table as PNG",
226
+ data=buf,
227
+ file_name="regression_metrics_table.png",
228
+ mime="image/png"
229
+ )
230
+
231
+ # Visualization (Bar Graphs for Regression)
232
+ st.subheader("Regression Model Performance Metrics Graph")
233
+ regression_metrics_df.set_index('Model', inplace=True)
234
+ regression_metrics_df.plot(kind='bar', figsize=(10, 6), colormap='coolwarm', rot=45)
235
+ plt.title("Regression Models - Performance Metrics")
236
+ plt.ylabel("Scores")
237
+ plt.xlabel("Models")
238
+ st.pyplot(plt)
239
+
240
+ # Download button for the bar graph
241
+ buf = BytesIO()
242
+ plt.savefig(buf, format="png")
243
+ buf.seek(0)
244
+ st.download_button(
245
+ label="Download Regression Performance Graph as PNG",
246
+ data=buf,
247
+ file_name="regression_performance_graph.png",
248
+ mime="image/png"
249
+ )