saherPervaiz commited on
Commit
edf0043
·
verified ·
1 Parent(s): 05285ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -56
app.py CHANGED
@@ -7,8 +7,11 @@ from sklearn.model_selection import train_test_split
7
  from sklearn.impute import SimpleImputer
8
  from sklearn.preprocessing import LabelEncoder, StandardScaler
9
  from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
10
- from sklearn.linear_model import LogisticRegression, LinearRegression
11
  from sklearn.svm import SVC, SVR
 
 
 
12
  from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
13
  from scipy import stats
14
 
@@ -57,23 +60,6 @@ if uploaded_file is not None:
57
  st.write("Cleaned Dataset:")
58
  st.dataframe(df_cleaned)
59
 
60
- # Plot the correlation heatmap
61
- st.subheader("Correlation Heatmap")
62
-
63
- # Select only numeric columns for correlation matrix
64
- correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()
65
-
66
- fig, ax = plt.subplots(figsize=(8, 6)) # Small graph
67
- sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
68
- st.pyplot(fig)
69
-
70
- # Display Histograms of Numerical Columns
71
- st.subheader("Histograms of Numerical Columns")
72
- for column in df_cleaned.select_dtypes(include=['number']).columns:
73
- fig, ax = plt.subplots(figsize=(5, 4)) # Small graph
74
- df_cleaned[column].plot(kind="hist", bins=20, ax=ax, title=column)
75
- st.pyplot(fig)
76
-
77
  # Model Training Section
78
  st.subheader("Model Training")
79
  if df_cleaned.empty:
@@ -95,35 +81,64 @@ if uploaded_file is not None:
95
  train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
96
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
97
 
98
- # Model Selection
 
 
 
 
99
  if is_classification:
100
- model_type = st.selectbox("Choose Classification Model", ["Random Forest", "Logistic Regression", "SVM"])
101
- if model_type == "Random Forest":
102
- n_estimators = st.slider("Number of Trees", 10, 100, 50)
103
- model = RandomForestClassifier(n_estimators=n_estimators)
104
- elif model_type == "Logistic Regression":
105
- model = LogisticRegression(max_iter=1000)
106
- elif model_type == "SVM":
107
- model = SVC()
 
 
 
 
 
 
 
108
  else:
109
- model_type = st.selectbox("Choose Regression Model", ["Random Forest", "Linear Regression", "SVR"])
110
- if model_type == "Random Forest":
111
- n_estimators = st.slider("Number of Trees", 10, 100, 50)
112
- model = RandomForestRegressor(n_estimators=n_estimators)
113
- elif model_type == "Linear Regression":
114
- model = LinearRegression()
115
- elif model_type == "SVR":
116
- model = SVR()
 
 
 
 
 
 
117
 
118
- # Train and Evaluate Model
119
- model.fit(X_train, y_train)
120
- y_pred = model.predict(X_test)
121
 
122
- if is_classification:
123
- st.write(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
124
- st.text(classification_report(y_test, y_pred))
125
- else:
126
- st.write(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}")
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  # Option to download the cleaned dataset
129
  st.download_button(
@@ -133,25 +148,41 @@ if uploaded_file is not None:
133
  mime="text/csv"
134
  )
135
 
136
- # Option to download model performance metrics
137
  st.download_button(
138
  label="Download Model Report",
139
- data=classification_report(y_test, y_pred) if is_classification else f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}",
140
- file_name="model_report.txt",
141
- mime="text/plain"
142
  )
143
 
144
- # Save and provide a download option for the model accuracy plot
145
- fig, ax = plt.subplots(figsize=(6, 4))
146
- sns.barplot(x=['Accuracy' if is_classification else 'MSE'], y=[accuracy_score(y_test, y_pred) if is_classification else mean_squared_error(y_test, y_pred)], ax=ax)
 
 
147
  st.pyplot(fig)
148
-
149
- # Option to download the accuracy plot
150
- fig.savefig("/tmp/model_accuracy.png")
151
- with open("/tmp/model_accuracy.png", "rb") as f:
152
  st.download_button(
153
- label="Download Accuracy Plot",
154
  data=f,
155
- file_name="model_accuracy.png",
156
  mime="image/png"
157
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  from sklearn.impute import SimpleImputer
8
  from sklearn.preprocessing import LabelEncoder, StandardScaler
9
  from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
10
+ from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge
11
  from sklearn.svm import SVC, SVR
12
+ from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
13
+ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
14
+ from sklearn.naive_bayes import GaussianNB
15
  from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
16
  from scipy import stats
17
 
 
60
  st.write("Cleaned Dataset:")
61
  st.dataframe(df_cleaned)
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  # Model Training Section
64
  st.subheader("Model Training")
65
  if df_cleaned.empty:
 
81
  train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
82
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
83
 
84
+ # Store results in a dictionary
85
+ results = []
86
+
87
+ # Model Selection and Evaluation
88
+ models = []
89
  if is_classification:
90
+ model_choices = [
91
+ ("Random Forest", RandomForestClassifier(n_estimators=50)),
92
+ ("Logistic Regression", LogisticRegression(max_iter=1000)),
93
+ ("SVM", SVC()),
94
+ ("K-Nearest Neighbors", KNeighborsClassifier(n_neighbors=5)),
95
+ ("Decision Tree", DecisionTreeClassifier()),
96
+ ("Naive Bayes", GaussianNB())
97
+ ]
98
+
99
+ for name, model in model_choices:
100
+ model.fit(X_train, y_train)
101
+ y_pred = model.predict(X_test)
102
+ accuracy = accuracy_score(y_test, y_pred)
103
+ results.append([name, accuracy, None])
104
+
105
  else:
106
+ model_choices = [
107
+ ("Random Forest", RandomForestRegressor(n_estimators=50)),
108
+ ("Linear Regression", LinearRegression()),
109
+ ("SVR", SVR()),
110
+ ("K-Nearest Neighbors", KNeighborsRegressor(n_neighbors=5)),
111
+ ("Decision Tree", DecisionTreeRegressor()),
112
+ ("Ridge Regression", Ridge())
113
+ ]
114
+
115
+ for name, model in model_choices:
116
+ model.fit(X_train, y_train)
117
+ y_pred = model.predict(X_test)
118
+ mse = mean_squared_error(y_test, y_pred)
119
+ results.append([name, None, mse])
120
 
121
+ # Display results in a table
122
+ st.subheader("Model Performance Results")
123
+ results_df = pd.DataFrame(results, columns=["Model", "Accuracy" if is_classification else "Accuracy (N/A)", "Mean Squared Error" if not is_classification else "MSE (N/A)"])
124
 
125
+ # Bold the headers
126
+ st.markdown(f"**Model Performance Results**")
127
+ st.dataframe(results_df)
128
+
129
+ # Download Image for Model Accuracy Plot
130
+ fig, ax = plt.subplots(figsize=(6, 4))
131
+ sns.barplot(x=['Accuracy' if is_classification else 'MSE'], y=[accuracy_score(y_test, y_pred) if is_classification else mean_squared_error(y_test, y_pred)], ax=ax)
132
+ st.pyplot(fig)
133
+ # Save and provide download option
134
+ fig.savefig("/tmp/model_accuracy.png")
135
+ with open("/tmp/model_accuracy.png", "rb") as f:
136
+ st.download_button(
137
+ label="Download Accuracy Plot",
138
+ data=f,
139
+ file_name="model_accuracy.png",
140
+ mime="image/png"
141
+ )
142
 
143
  # Option to download the cleaned dataset
144
  st.download_button(
 
148
  mime="text/csv"
149
  )
150
 
151
+ # Option to download model performance metrics (Results Table)
152
  st.download_button(
153
  label="Download Model Report",
154
+ data=results_df.to_csv(index=False),
155
+ file_name="model_report.csv",
156
+ mime="text/csv"
157
  )
158
 
159
+ # Download correlation heatmap
160
+ st.subheader("Correlation Heatmap")
161
+ correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()
162
+ fig, ax = plt.subplots(figsize=(8, 6))
163
+ sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
164
  st.pyplot(fig)
165
+ fig.savefig("/tmp/correlation_heatmap.png")
166
+ with open("/tmp/correlation_heatmap.png", "rb") as f:
 
 
167
  st.download_button(
168
+ label="Download Correlation Heatmap",
169
  data=f,
170
+ file_name="correlation_heatmap.png",
171
  mime="image/png"
172
  )
173
+
174
+ # Display Histograms of Numerical Columns
175
+ st.subheader("Histograms of Numerical Columns")
176
+ for column in df_cleaned.select_dtypes(include=['number']).columns:
177
+ fig, ax = plt.subplots(figsize=(5, 4)) # Small graph
178
+ df_cleaned[column].plot(kind="hist", bins=20, ax=ax, title=column)
179
+ st.pyplot(fig)
180
+ # Save and provide download option for histogram
181
+ fig.savefig(f"/tmp/{column}_histogram.png")
182
+ with open(f"/tmp/{column}_histogram.png", "rb") as f:
183
+ st.download_button(
184
+ label=f"Download {column} Histogram",
185
+ data=f,
186
+ file_name=f"{column}_histogram.png",
187
+ mime="image/png"
188
+ )