shimaa22 commited on
Commit
d4b1c85
·
verified ·
1 Parent(s): f388de4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -98
app.py CHANGED
@@ -14,25 +14,31 @@ from sklearn.metrics import (
14
  accuracy_score,
15
  precision_score,
16
  recall_score,
 
17
  confusion_matrix
18
  )
19
 
20
  from imblearn.over_sampling import SMOTE
21
 
22
- from reportlab.pdfgen import canvas
 
 
23
 
24
  # =========================
25
- # GLOBAL
26
  # =========================
27
  df_global = None
28
- best_model_obj = None
29
  best_model_name = None
30
- X_global = None
31
- y_global = None
 
 
 
 
32
 
33
 
34
  # =========================
35
- # UPLOAD + CLEAN
36
  # =========================
37
  def upload_and_clean(file):
38
 
@@ -58,12 +64,10 @@ def upload_and_clean(file):
58
 
59
 
60
  # =========================
61
- # VISUALIZATION
62
  # =========================
63
  def analyze_data(target):
64
 
65
- global df_global
66
-
67
  df = df_global.copy()
68
  images = []
69
 
@@ -100,11 +104,35 @@ def analyze_data(target):
100
 
101
 
102
  # =========================
103
- # ML WITH SMOTE + CLASS WEIGHT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # =========================
105
  def run_ml(target):
106
 
107
- global df_global, best_model_obj, best_model_name, X_global, y_global
 
108
 
109
  df = df_global.copy()
110
 
@@ -116,39 +144,28 @@ def run_ml(target):
116
  X = df.drop(columns=[target])
117
  y = df[target]
118
 
119
- X_global = X
120
- y_global = y
121
-
122
- # =========================
123
- # imbalance detection
124
- # =========================
125
- counts = np.bincount(y)
126
- imbalance_ratio = min(counts) / max(counts)
127
- is_imbalanced = imbalance_ratio < 0.5
128
-
129
- # split
130
  X_train, X_test, y_train, y_test = train_test_split(
131
  X, y, test_size=0.2, random_state=42
132
  )
133
 
 
 
 
 
134
  models = {
135
  "Decision Tree": DecisionTreeClassifier(),
136
  "Random Forest": RandomForestClassifier(),
137
  "XGBoost": XGBClassifier(eval_metric="logloss")
138
  }
139
 
140
- # =========================
141
- # RESULT TABLES
142
- # =========================
143
- no_results = []
144
- cw_results = []
145
- smote_results = []
146
 
147
  best_score = 0
148
 
149
- # =====================================================
150
- # 1️⃣ NO SAMPLING
151
- # =====================================================
152
  for name, model in models.items():
153
 
154
  model.fit(X_train, y_train)
@@ -156,36 +173,49 @@ def run_ml(target):
156
 
157
  acc = accuracy_score(y_test, pred)
158
 
159
- no_results.append({
160
  "Model": name,
161
- "Accuracy": acc
 
 
 
162
  })
163
 
 
 
164
  if acc > best_score:
165
  best_score = acc
166
- best_model_obj = model
167
- best_model_name = name + " (No Sampling)"
168
 
169
- # =====================================================
170
- # 2️⃣ CLASS WEIGHT
171
- # =====================================================
172
- for name, model in models.items():
173
 
174
- if name != "XGBoost":
175
- model = DecisionTreeClassifier(class_weight="balanced") if name=="Decision Tree" else RandomForestClassifier(class_weight="balanced")
 
 
 
 
176
 
177
  model.fit(X_train, y_train)
178
  pred = model.predict(X_test)
179
 
180
- cw_results.append({
181
  "Model": name,
182
- "Accuracy": accuracy_score(y_test, pred)
 
 
 
183
  })
184
 
185
- # =====================================================
186
- # 3️⃣ SMOTE
187
- # =====================================================
188
- if is_imbalanced:
 
 
189
  sm = SMOTE(random_state=42)
190
  X_res, y_res = sm.fit_resample(X_train, y_train)
191
  else:
@@ -196,16 +226,28 @@ def run_ml(target):
196
  model.fit(X_res, y_res)
197
  pred = model.predict(X_test)
198
 
199
- smote_results.append({
200
  "Model": name,
201
- "Accuracy": accuracy_score(y_test, pred)
 
 
 
202
  })
203
 
 
 
 
 
 
 
 
 
204
  return (
205
- f"Imbalanced Dataset: {is_imbalanced}",
206
- pd.DataFrame(no_results),
207
- pd.DataFrame(cw_results),
208
- pd.DataFrame(smote_results)
 
209
  )
210
 
211
 
@@ -214,12 +256,13 @@ def run_ml(target):
214
  # =========================
215
  def feature_importance():
216
 
217
- global best_model_obj, X_global
218
 
219
  if hasattr(best_model_obj, "feature_importances_"):
220
 
221
  plt.figure(figsize=(6,4))
222
- plt.barh(X_global.columns, best_model_obj.feature_importances_)
 
223
 
224
  path = "/tmp/feat.png"
225
  plt.savefig(path)
@@ -233,33 +276,58 @@ def feature_importance():
233
  # =========================
234
  # PDF REPORT
235
  # =========================
236
- def download_report():
237
 
238
- global best_model_name
239
 
240
  path = "/tmp/report.pdf"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
- c = canvas.Canvas(path)
243
 
244
- c.drawString(100, 750, "Auto ML Report")
245
- c.drawString(100, 730, f"Best Model: {best_model_name}")
 
246
 
247
- c.drawString(100, 700, "Includes SMOTE + Class Weight Comparison")
 
248
 
249
- c.save()
 
 
 
 
250
 
251
  return path
252
 
253
 
254
  # =========================
255
- # FULL ANALYSIS
256
  # =========================
257
  def full_analysis(target):
258
 
259
- ml_status, no_df, cw_df, smote_df = run_ml(target)
260
- images = analyze_data(target)
261
 
262
- return ml_status, no_df, cw_df, smote_df, images
263
 
264
 
265
  # =========================
@@ -267,59 +335,41 @@ def full_analysis(target):
267
  # =========================
268
  with gr.Blocks() as demo:
269
 
270
- gr.Markdown("# 🚀 Advanced AutoML System (SMOTE + Class Weight)")
271
 
272
  file = gr.File()
273
 
274
- upload_btn = gr.Button("Upload Data")
275
 
276
  status = gr.Textbox()
277
  preview = gr.Dataframe()
278
 
279
- target = gr.Dropdown(label="Select Target")
280
 
281
  run_btn = gr.Button("Run Full Analysis")
282
 
283
  ml_status = gr.Textbox()
284
 
285
- no_table = gr.Dataframe(label="No Sampling")
286
- cw_table = gr.Dataframe(label="Class Weight")
287
- smote_table = gr.Dataframe(label="SMOTE")
288
 
289
- gallery = gr.Gallery(label="Visualizations", columns=2)
290
 
291
  feat_btn = gr.Button("Feature Importance")
292
  feat_img = gr.Image()
293
 
294
- pdf_btn = gr.Button("Download Report")
295
  pdf_file = gr.File()
296
 
297
- # upload
298
- upload_btn.click(
299
- upload_and_clean,
300
- file,
301
- [status, preview, target, target]
302
- )
303
 
304
- # full analysis
305
- run_btn.click(
306
- full_analysis,
307
- target,
308
- [ml_status, no_table, cw_table, smote_table, gallery]
309
- )
310
 
311
- # feature importance
312
- feat_btn.click(
313
- feature_importance,
314
- None,
315
- feat_img
316
- )
317
 
318
- # pdf
319
- pdf_btn.click(
320
- download_report,
321
- None,
322
- pdf_file
323
- )
324
 
325
  demo.launch(share=True)
 
14
  accuracy_score,
15
  precision_score,
16
  recall_score,
17
+ f1_score,
18
  confusion_matrix
19
  )
20
 
21
  from imblearn.over_sampling import SMOTE
22
 
23
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, Table, TableStyle
24
+ from reportlab.lib import colors
25
+ from reportlab.lib.styles import getSampleStyleSheet
26
 
27
  # =========================
28
+ # GLOBALS
29
  # =========================
30
  df_global = None
 
31
  best_model_name = None
32
+ best_model_obj = None
33
+
34
+ no_global = None
35
+ cw_global = None
36
+ smote_global = None
37
+ cm_global = None
38
 
39
 
40
  # =========================
41
+ # UPLOAD
42
  # =========================
43
  def upload_and_clean(file):
44
 
 
64
 
65
 
66
  # =========================
67
+ # ANALYSIS VISUALIZATION
68
  # =========================
69
  def analyze_data(target):
70
 
 
 
71
  df = df_global.copy()
72
  images = []
73
 
 
104
 
105
 
106
  # =========================
107
+ # CONFUSION MATRIX
108
+ # =========================
109
+ def plot_cm(y_true, y_pred, title):
110
+
111
+ cm = confusion_matrix(y_true, y_pred)
112
+
113
+ plt.figure(figsize=(4,4))
114
+ plt.imshow(cm, cmap="Blues")
115
+
116
+ plt.title(title)
117
+
118
+ for i in range(cm.shape[0]):
119
+ for j in range(cm.shape[1]):
120
+ plt.text(j, i, cm[i, j], ha="center", va="center")
121
+
122
+ path = f"/tmp/{title}.png"
123
+ plt.savefig(path)
124
+ plt.close()
125
+
126
+ return path
127
+
128
+
129
+ # =========================
130
+ # ML (NO / CW / SMOTE)
131
  # =========================
132
  def run_ml(target):
133
 
134
+ global df_global, best_model_name
135
+ global no_global, cw_global, smote_global, cm_global
136
 
137
  df = df_global.copy()
138
 
 
144
  X = df.drop(columns=[target])
145
  y = df[target]
146
 
 
 
 
 
 
 
 
 
 
 
 
147
  X_train, X_test, y_train, y_test = train_test_split(
148
  X, y, test_size=0.2, random_state=42
149
  )
150
 
151
+ # imbalance check
152
+ counts = np.bincount(y)
153
+ imbalance = min(counts) / max(counts) < 0.5
154
+
155
  models = {
156
  "Decision Tree": DecisionTreeClassifier(),
157
  "Random Forest": RandomForestClassifier(),
158
  "XGBoost": XGBClassifier(eval_metric="logloss")
159
  }
160
 
161
+ no_rows, cw_rows, smote_rows = [], [], []
162
+ cm_images = {}
 
 
 
 
163
 
164
  best_score = 0
165
 
166
+ # =========================
167
+ # NO SAMPLING
168
+ # =========================
169
  for name, model in models.items():
170
 
171
  model.fit(X_train, y_train)
 
173
 
174
  acc = accuracy_score(y_test, pred)
175
 
176
+ no_rows.append({
177
  "Model": name,
178
+ "Accuracy": acc,
179
+ "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
180
+ "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
181
+ "F1": f1_score(y_test, pred, average="weighted", zero_division=0)
182
  })
183
 
184
+ cm_images[f"{name}_no"] = plot_cm(y_test, pred, f"{name}_NO")
185
+
186
  if acc > best_score:
187
  best_score = acc
188
+ best_model_name = name + " (No)"
 
189
 
190
+ # =========================
191
+ # CLASS WEIGHT
192
+ # =========================
193
+ for name in models.keys():
194
 
195
+ if name == "Decision Tree":
196
+ model = DecisionTreeClassifier(class_weight="balanced")
197
+ elif name == "Random Forest":
198
+ model = RandomForestClassifier(class_weight="balanced")
199
+ else:
200
+ model = XGBClassifier(eval_metric="logloss")
201
 
202
  model.fit(X_train, y_train)
203
  pred = model.predict(X_test)
204
 
205
+ cw_rows.append({
206
  "Model": name,
207
+ "Accuracy": accuracy_score(y_test, pred),
208
+ "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
209
+ "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
210
+ "F1": f1_score(y_test, pred, average="weighted", zero_division=0)
211
  })
212
 
213
+ cm_images[f"{name}_cw"] = plot_cm(y_test, pred, f"{name}_CW")
214
+
215
+ # =========================
216
+ # SMOTE
217
+ # =========================
218
+ if imbalance:
219
  sm = SMOTE(random_state=42)
220
  X_res, y_res = sm.fit_resample(X_train, y_train)
221
  else:
 
226
  model.fit(X_res, y_res)
227
  pred = model.predict(X_test)
228
 
229
+ smote_rows.append({
230
  "Model": name,
231
+ "Accuracy": accuracy_score(y_test, pred),
232
+ "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
233
+ "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
234
+ "F1": f1_score(y_test, pred, average="weighted", zero_division=0)
235
  })
236
 
237
+ cm_images[f"{name}_smote"] = plot_cm(y_test, pred, f"{name}_SMOTE")
238
+
239
+ # store globally
240
+ no_global = pd.DataFrame(no_rows)
241
+ cw_global = pd.DataFrame(cw_rows)
242
+ smote_global = pd.DataFrame(smote_rows)
243
+ cm_global = cm_images
244
+
245
  return (
246
+ f"Imbalance: {imbalance}",
247
+ no_global,
248
+ cw_global,
249
+ smote_global,
250
+ list(cm_images.values())
251
  )
252
 
253
 
 
256
  # =========================
257
  def feature_importance():
258
 
259
+ global best_model_obj
260
 
261
  if hasattr(best_model_obj, "feature_importances_"):
262
 
263
  plt.figure(figsize=(6,4))
264
+ plt.barh(range(len(best_model_obj.feature_importances_)),
265
+ best_model_obj.feature_importances_)
266
 
267
  path = "/tmp/feat.png"
268
  plt.savefig(path)
 
276
  # =========================
277
  # PDF REPORT
278
  # =========================
279
+ def generate_pdf():
280
 
281
+ global no_global, cw_global, smote_global, cm_global, best_model_name
282
 
283
  path = "/tmp/report.pdf"
284
+ doc = SimpleDocTemplate(path)
285
+ styles = getSampleStyleSheet()
286
+ elements = []
287
+
288
+ elements.append(Paragraph("AutoML Full Report", styles["Title"]))
289
+ elements.append(Spacer(1, 10))
290
+ elements.append(Paragraph(f"Best Model: {best_model_name}", styles["Heading2"]))
291
+
292
+ def add_table(df, title):
293
+ elements.append(Spacer(1, 10))
294
+ elements.append(Paragraph(title, styles["Heading3"]))
295
+
296
+ data = [df.columns.tolist()] + df.values.tolist()
297
+
298
+ table = Table(data)
299
+ table.setStyle(TableStyle([
300
+ ("BACKGROUND", (0,0), (-1,0), colors.grey),
301
+ ("TEXTCOLOR", (0,0), (-1,0), colors.white),
302
+ ("GRID", (0,0), (-1,-1), 0.5, colors.black)
303
+ ]))
304
 
305
+ elements.append(table)
306
 
307
+ add_table(no_global, "No Sampling")
308
+ add_table(cw_global, "Class Weight")
309
+ add_table(smote_global, "SMOTE")
310
 
311
+ elements.append(Spacer(1, 10))
312
+ elements.append(Paragraph("Confusion Matrices", styles["Heading2"]))
313
 
314
+ for name, img in cm_global.items():
315
+ elements.append(Paragraph(name, styles["Normal"]))
316
+ elements.append(Image(img, width=200, height=200))
317
+
318
+ doc.build(elements)
319
 
320
  return path
321
 
322
 
323
  # =========================
324
+ # ANALYSIS
325
  # =========================
326
  def full_analysis(target):
327
 
328
+ ml_status, no_df, cw_df, smote_df, imgs = run_ml(target)
 
329
 
330
+ return ml_status, no_df, cw_df, smote_df, imgs
331
 
332
 
333
  # =========================
 
335
  # =========================
336
  with gr.Blocks() as demo:
337
 
338
+ gr.Markdown("# 🚀 Advanced AutoML System")
339
 
340
  file = gr.File()
341
 
342
+ upload_btn = gr.Button("Upload")
343
 
344
  status = gr.Textbox()
345
  preview = gr.Dataframe()
346
 
347
+ target = gr.Dropdown(label="Target")
348
 
349
  run_btn = gr.Button("Run Full Analysis")
350
 
351
  ml_status = gr.Textbox()
352
 
353
+ no_table = gr.Dataframe()
354
+ cw_table = gr.Dataframe()
355
+ smote_table = gr.Dataframe()
356
 
357
+ gallery = gr.Gallery(columns=2)
358
 
359
  feat_btn = gr.Button("Feature Importance")
360
  feat_img = gr.Image()
361
 
362
+ pdf_btn = gr.Button("Download PDF")
363
  pdf_file = gr.File()
364
 
365
+ upload_btn.click(upload_and_clean, file,
366
+ [status, preview, target, target])
 
 
 
 
367
 
368
+ run_btn.click(full_analysis, target,
369
+ [ml_status, no_table, cw_table, smote_table, gallery])
 
 
 
 
370
 
371
+ feat_btn.click(feature_importance, None, feat_img)
 
 
 
 
 
372
 
373
+ pdf_btn.click(generate_pdf, None, pdf_file)
 
 
 
 
 
374
 
375
  demo.launch(share=True)