shimaa22 commited on
Commit
2967f48
·
verified ·
1 Parent(s): 060f18c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -47
app.py CHANGED
@@ -1,5 +1,3 @@
1
- # app.py
2
-
3
  import gradio as gr
4
  import pandas as pd
5
  import numpy as np
@@ -26,20 +24,20 @@ from sklearn.metrics import (
26
  r2_score
27
  )
28
 
29
- from reportlab.lib.pagesizes import letter
30
  from reportlab.pdfgen import canvas
31
 
32
  # =========================
33
  # GLOBAL
34
  # =========================
35
  df_global = None
36
- best_model_global = None
37
  best_model_obj = None
 
38
  X_global = None
39
  y_global = None
40
 
 
41
  # =========================
42
- # UPLOAD
43
  # =========================
44
  def upload_and_clean(file):
45
 
@@ -54,40 +52,70 @@ def upload_and_clean(file):
54
  else:
55
  df[col] = df[col].fillna(df[col].mode()[0])
56
 
57
- df_global = df.copy()
 
 
 
 
 
 
 
58
 
59
- return "Data Loaded", df.head(), gr.update(choices=list(df.columns)), gr.update(choices=list(df.columns))
60
 
61
  # =========================
62
- # FEATURE IMPORTANCE
63
  # =========================
64
- def feature_importance_plot(model, X, title):
65
 
66
- if hasattr(model, "feature_importances_"):
67
 
68
- imp = model.feature_importances_
 
69
 
70
- plt.figure(figsize=(6,4))
71
- plt.barh(X.columns, imp)
72
- plt.title("Feature Importance")
 
 
 
 
 
 
 
 
 
 
73
 
74
- path = "/tmp/feature_importance.png"
 
 
 
 
 
 
 
 
 
 
 
75
  plt.savefig(path)
76
  plt.close()
77
 
78
- return path
 
 
79
 
80
- return None
81
 
82
  # =========================
83
- # ML
84
  # =========================
85
  def run_ml(target):
86
 
87
- global df_global, best_model_global, best_model_obj, X_global, y_global
88
 
89
  df = df_global.copy()
90
 
 
91
  for col in df.columns:
92
  if not pd.api.types.is_numeric_dtype(df[col]):
93
  df[col] = LabelEncoder().fit_transform(df[col].astype(str))
@@ -100,11 +128,14 @@ def run_ml(target):
100
 
101
  is_classification = len(np.unique(y)) <= 20
102
 
103
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 
 
104
 
105
  results = []
106
- best_score = 0
107
 
 
108
  if is_classification:
109
 
110
  models = {
@@ -129,13 +160,29 @@ def run_ml(target):
129
 
130
  if acc > best_score:
131
  best_score = acc
132
- best_model_global = name
133
  best_model_obj = model
 
134
 
135
  leaderboard = pd.DataFrame(results).sort_values("Accuracy", ascending=False)
136
 
137
- return "Classification", leaderboard
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
 
139
  else:
140
 
141
  models = {
@@ -158,42 +205,65 @@ def run_ml(target):
158
 
159
  leaderboard = pd.DataFrame(results).sort_values("R2", ascending=False)
160
 
161
- best_model_global = leaderboard.iloc[0]["Model"]
 
 
162
 
163
- return "Regression", leaderboard
164
 
165
  # =========================
166
- # FEATURE IMPORTANCE OUTPUT
167
  # =========================
168
- def show_feature_importance():
169
 
170
  global best_model_obj, X_global
171
 
172
- if best_model_obj is None:
173
- return None
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- return feature_importance_plot(best_model_obj, X_global, "Feature Importance")
176
 
177
  # =========================
178
  # PDF REPORT
179
  # =========================
180
- def download_report():
181
 
182
- global best_model_global
183
 
184
  file_path = "/tmp/report.pdf"
185
 
186
- c = canvas.Canvas(file_path, pagesize=letter)
187
 
188
  c.drawString(100, 750, "Auto ML Report")
189
- c.drawString(100, 730, f"Best Model: {best_model_global}")
190
 
191
- c.drawString(100, 700, "Generated by Auto ML System")
192
 
193
  c.save()
194
 
195
  return file_path
196
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  # =========================
198
  # UI
199
  # =========================
@@ -210,17 +280,22 @@ with gr.Blocks() as demo:
210
 
211
  target = gr.Dropdown(label="Target")
212
 
213
- run_btn = gr.Button("Run ML")
214
 
215
  ml_status = gr.Textbox()
216
  leaderboard = gr.Dataframe()
217
 
218
- cm = gr.Image()
 
 
 
 
 
219
 
220
- feature_btn = gr.Button("Show Feature Importance")
221
- feature_img = gr.Image()
222
 
223
- pdf_btn = gr.Button("Download Report PDF")
224
  pdf_file = gr.File()
225
 
226
  # upload
@@ -230,23 +305,23 @@ with gr.Blocks() as demo:
230
  [status, preview, target, target]
231
  )
232
 
233
- # ML
234
  run_btn.click(
235
- run_ml,
236
  target,
237
- [ml_status, leaderboard]
238
  )
239
 
240
  # feature importance
241
- feature_btn.click(
242
- show_feature_importance,
243
  None,
244
- feature_img
245
  )
246
 
247
  # pdf
248
  pdf_btn.click(
249
- download_report,
250
  None,
251
  pdf_file
252
  )
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
 
24
  r2_score
25
  )
26
 
 
27
  from reportlab.pdfgen import canvas
28
 
29
  # =========================
30
  # GLOBAL
31
  # =========================
32
  df_global = None
 
33
  best_model_obj = None
34
+ best_model_name = None
35
  X_global = None
36
  y_global = None
37
 
38
+
39
  # =========================
40
+ # UPLOAD + CLEAN
41
  # =========================
42
  def upload_and_clean(file):
43
 
 
52
  else:
53
  df[col] = df[col].fillna(df[col].mode()[0])
54
 
55
+ df_global = df
56
+
57
+ return (
58
+ "Data Loaded Successfully",
59
+ df.head(),
60
+ gr.update(choices=list(df.columns)),
61
+ gr.update(choices=list(df.columns))
62
+ )
63
 
 
64
 
65
  # =========================
66
+ # VISUALIZATION (BAR + PIE)
67
  # =========================
68
+ def analyze_data(target):
69
 
70
+ global df_global
71
 
72
+ df = df_global.copy()
73
+ images = []
74
 
75
+ cols = [c for c in df.columns if c != target]
76
+
77
+ for col in cols[:8]:
78
+
79
+ fig, axes = plt.subplots(1, 2, figsize=(12, 4))
80
+
81
+ # BAR
82
+ df[col].astype(str).value_counts().head(10).plot(
83
+ kind="bar",
84
+ ax=axes[0]
85
+ )
86
+ axes[0].set_title(f"Bar - {col}")
87
+ axes[0].tick_params(axis='x', rotation=45)
88
 
89
+ # PIE
90
+ df[col].astype(str).value_counts().head(6).plot(
91
+ kind="pie",
92
+ ax=axes[1],
93
+ autopct="%1.1f%%"
94
+ )
95
+ axes[1].set_title(f"Pie - {col}")
96
+ axes[1].set_ylabel("")
97
+
98
+ plt.tight_layout()
99
+
100
+ path = f"/tmp/{col}.png"
101
  plt.savefig(path)
102
  plt.close()
103
 
104
+ images.append(path)
105
+
106
+ return images
107
 
 
108
 
109
  # =========================
110
+ # ML TRAINING
111
  # =========================
112
  def run_ml(target):
113
 
114
+ global df_global, best_model_obj, best_model_name, X_global, y_global
115
 
116
  df = df_global.copy()
117
 
118
+ # encode all categorical
119
  for col in df.columns:
120
  if not pd.api.types.is_numeric_dtype(df[col]):
121
  df[col] = LabelEncoder().fit_transform(df[col].astype(str))
 
128
 
129
  is_classification = len(np.unique(y)) <= 20
130
 
131
+ X_train, X_test, y_train, y_test = train_test_split(
132
+ X, y, test_size=0.2, random_state=42
133
+ )
134
 
135
  results = []
136
+ best_score = -999
137
 
138
+ # ================= CLASSIFICATION =================
139
  if is_classification:
140
 
141
  models = {
 
160
 
161
  if acc > best_score:
162
  best_score = acc
 
163
  best_model_obj = model
164
+ best_model_name = name
165
 
166
  leaderboard = pd.DataFrame(results).sort_values("Accuracy", ascending=False)
167
 
168
+ # confusion matrix
169
+ cm = confusion_matrix(y_test, best_model_obj.predict(X_test))
170
+
171
+ fig = plt.figure()
172
+ plt.imshow(cm, cmap="Blues")
173
+ plt.title(f"Best Model: {best_model_name}")
174
+
175
+ for i in range(cm.shape[0]):
176
+ for j in range(cm.shape[1]):
177
+ plt.text(j, i, cm[i, j], ha="center", va="center")
178
+
179
+ cm_path = "/tmp/cm.png"
180
+ plt.savefig(cm_path)
181
+ plt.close()
182
+
183
+ return "Classification", leaderboard, cm_path
184
 
185
+ # ================= REGRESSION =================
186
  else:
187
 
188
  models = {
 
205
 
206
  leaderboard = pd.DataFrame(results).sort_values("R2", ascending=False)
207
 
208
+ best_model_name = leaderboard.iloc[0]["Model"]
209
+
210
+ return "Regression", leaderboard, None
211
 
 
212
 
213
  # =========================
214
+ # FEATURE IMPORTANCE
215
  # =========================
216
+ def feature_importance():
217
 
218
  global best_model_obj, X_global
219
 
220
+ if hasattr(best_model_obj, "feature_importances_"):
221
+
222
+ plt.figure(figsize=(6,4))
223
+
224
+ plt.barh(X_global.columns, best_model_obj.feature_importances_)
225
+
226
+ path = "/tmp/feature.png"
227
+ plt.savefig(path)
228
+ plt.close()
229
+
230
+ return path
231
+
232
+ return None
233
 
 
234
 
235
  # =========================
236
  # PDF REPORT
237
  # =========================
238
+ def download_pdf():
239
 
240
+ global best_model_name
241
 
242
  file_path = "/tmp/report.pdf"
243
 
244
+ c = canvas.Canvas(file_path)
245
 
246
  c.drawString(100, 750, "Auto ML Report")
247
+ c.drawString(100, 730, f"Best Model: {best_model_name}")
248
 
249
+ c.drawString(100, 700, "Generated Successfully")
250
 
251
  c.save()
252
 
253
  return file_path
254
 
255
+
256
+ # =========================
257
+ # COMBINED RUN
258
+ # =========================
259
+ def full_run(target):
260
+
261
+ status, leaderboard, cm = run_ml(target)
262
+ images = analyze_data(target)
263
+
264
+ return status, leaderboard, cm, images
265
+
266
+
267
  # =========================
268
  # UI
269
  # =========================
 
280
 
281
  target = gr.Dropdown(label="Target")
282
 
283
+ run_btn = gr.Button("RUN FULL ANALYSIS")
284
 
285
  ml_status = gr.Textbox()
286
  leaderboard = gr.Dataframe()
287
 
288
+ cm_img = gr.Image()
289
+
290
+ gallery = gr.Gallery(
291
+ label="Analysis Charts (Click to Enlarge)",
292
+ columns=2
293
+ )
294
 
295
+ feat_btn = gr.Button("Feature Importance")
296
+ feat_img = gr.Image()
297
 
298
+ pdf_btn = gr.Button("Download Report")
299
  pdf_file = gr.File()
300
 
301
  # upload
 
305
  [status, preview, target, target]
306
  )
307
 
308
+ # full analysis
309
  run_btn.click(
310
+ full_run,
311
  target,
312
+ [ml_status, leaderboard, cm_img, gallery]
313
  )
314
 
315
  # feature importance
316
+ feat_btn.click(
317
+ feature_importance,
318
  None,
319
+ feat_img
320
  )
321
 
322
  # pdf
323
  pdf_btn.click(
324
+ download_pdf,
325
  None,
326
  pdf_file
327
  )