saherPervaiz commited on
Commit
1b7c47b
·
verified ·
1 Parent(s): 8ab0d02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -55
app.py CHANGED
@@ -6,13 +6,14 @@ import matplotlib.pyplot as plt
6
  from sklearn.model_selection import train_test_split
7
  from sklearn.impute import SimpleImputer
8
  from sklearn.preprocessing import LabelEncoder, StandardScaler
9
- from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
10
- from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge
11
- from sklearn.svm import SVC, SVR
12
- from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
13
- from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
14
  from sklearn.naive_bayes import GaussianNB
15
- from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
 
16
  from scipy import stats
17
 
18
  # File uploader
@@ -97,64 +98,61 @@ if uploaded_file is not None:
97
  train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
98
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
99
 
100
- # Store results in a dictionary
101
- results = []
 
102
 
103
- # Model Selection and Evaluation
104
  if is_classification:
105
- model_choices = [
106
- ("Random Forest", RandomForestClassifier(n_estimators=50)),
107
- ("Logistic Regression", LogisticRegression(max_iter=1000)),
108
- ("SVM", SVC()),
109
- ("K-Nearest Neighbors", KNeighborsClassifier(n_neighbors=5)),
110
- ("Decision Tree", DecisionTreeClassifier()),
111
- ("Naive Bayes", GaussianNB())
112
- ]
113
-
114
- for name, model in model_choices:
115
- model.fit(X_train, y_train)
116
- y_pred = model.predict(X_test)
 
117
 
118
- # Accuracy and Classification Report
119
  accuracy = accuracy_score(y_test, y_pred)
120
- classification_report_output = classification_report(y_test, y_pred)
121
-
122
- # Append results
123
- results.append([name, accuracy, classification_report_output])
124
-
125
- else: # Regression models
126
- model_choices = [
127
- ("Random Forest", RandomForestRegressor(n_estimators=50)),
128
- ("Linear Regression", LinearRegression()),
129
- ("SVR", SVR()),
130
- ("K-Nearest Neighbors", KNeighborsRegressor(n_neighbors=5)),
131
- ("Decision Tree", DecisionTreeRegressor()),
132
- ("Ridge Regression", Ridge())
133
- ]
134
-
135
- for name, model in model_choices:
136
- model.fit(X_train, y_train)
137
- y_pred = model.predict(X_test)
138
-
139
- # Mean Squared Error (MSE) for regression tasks
140
- mse = mean_squared_error(y_test, y_pred)
141
 
142
- # Append results
143
- results.append([name, None, mse])
 
 
 
 
 
144
 
145
- # Display results in a table
146
- st.subheader("Model Performance Results")
147
- results_df = pd.DataFrame(results, columns=["Model", "Accuracy" if is_classification else "Accuracy (N/A)",
148
- "Classification Report" if is_classification else "MSE (N/A)"])
149
-
150
- # Bold the headers
151
- st.markdown(f"**Model Performance Results**")
152
- st.dataframe(results_df)
 
 
 
 
 
 
 
 
153
 
154
  # Option to download the model performance metrics (Results Table)
155
  st.download_button(
156
  label="Download Model Report",
157
- data=results_df.to_csv(index=False),
158
  file_name="model_report.csv",
159
  mime="text/csv"
160
  )
@@ -166,7 +164,7 @@ if uploaded_file is not None:
166
  file_name="cleaned_dataset.csv",
167
  mime="text/csv"
168
  )
169
-
170
  # Download correlation heatmap
171
  st.subheader("Correlation Heatmap")
172
  correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()
 
6
  from sklearn.model_selection import train_test_split
7
  from sklearn.impute import SimpleImputer
8
  from sklearn.preprocessing import LabelEncoder, StandardScaler
9
+ from sklearn.ensemble import RandomForestClassifier
10
+ from sklearn.linear_model import LogisticRegression
11
+ from sklearn.svm import SVC
12
+ from sklearn.neighbors import KNeighborsClassifier
13
+ from sklearn.tree import DecisionTreeClassifier
14
  from sklearn.naive_bayes import GaussianNB
15
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
16
+ from tabulate import tabulate
17
  from scipy import stats
18
 
19
  # File uploader
 
98
  train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
99
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
100
 
101
+ # Initialize results storage
102
+ predictions = pd.DataFrame()
103
+ metrics = []
104
 
105
+ # Evaluate classifiers (if classification)
106
  if is_classification:
107
+ classifiers = {
108
+ 'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
109
+ 'Decision Tree': DecisionTreeClassifier(),
110
+ 'Random Forest': RandomForestClassifier(),
111
+ 'Support Vector Machine (SVM)': SVC(),
112
+ 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
113
+ 'Naive Bayes': GaussianNB()
114
+ }
115
+
116
+ for name, classifier in classifiers.items():
117
+ classifier.fit(X_train, y_train)
118
+ y_pred = classifier.predict(X_test)
119
+ predictions[name] = y_pred # Store predictions
120
 
121
+ # Evaluate metrics
122
  accuracy = accuracy_score(y_test, y_pred)
123
+ precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
124
+ recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
125
+ f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ metrics.append({
128
+ 'Model': name,
129
+ 'Accuracy': round(accuracy, 2),
130
+ 'Precision': round(precision, 2),
131
+ 'Recall': round(recall, 2),
132
+ 'F1-Score': round(f1, 2)
133
+ })
134
 
135
+ # Create a metrics DataFrame
136
+ metrics_df = pd.DataFrame(metrics)
137
+
138
+ # Format table with tabulate
139
+ table = tabulate(
140
+ metrics_df,
141
+ headers="keys",
142
+ tablefmt="fancy_grid",
143
+ showindex=False,
144
+ numalign="center",
145
+ stralign="center"
146
+ )
147
+
148
+ # Display formatted table
149
+ st.markdown(f"**Model Performance Metrics**")
150
+ st.text(table)
151
 
152
  # Option to download the model performance metrics (Results Table)
153
  st.download_button(
154
  label="Download Model Report",
155
+ data=metrics_df.to_csv(index=False),
156
  file_name="model_report.csv",
157
  mime="text/csv"
158
  )
 
164
  file_name="cleaned_dataset.csv",
165
  mime="text/csv"
166
  )
167
+
168
  # Download correlation heatmap
169
  st.subheader("Correlation Heatmap")
170
  correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()