shimaa22 commited on
Commit
3c53f52
·
verified ·
1 Parent(s): d886a54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -90
app.py CHANGED
@@ -26,101 +26,68 @@ from sklearn.metrics import (
26
  r2_score
27
  )
28
 
 
 
 
29
  # =========================
30
  # GLOBAL
31
  # =========================
32
  df_global = None
 
 
 
 
33
 
34
  # =========================
35
- # UPLOAD + CLEAN
36
  # =========================
37
  def upload_and_clean(file):
38
- global df_global
39
 
40
- if file is None:
41
- return "Upload file first", None, gr.update(choices=[]), gr.update(choices=[])
42
 
43
  df = pd.read_csv(file.name)
44
-
45
  df = df.drop_duplicates()
46
 
47
- # Clean missing values safely
48
  for col in df.columns:
49
  if pd.api.types.is_numeric_dtype(df[col]):
50
  df[col] = df[col].fillna(df[col].median())
51
  else:
52
- df[col] = df[col].fillna(df[col].mode()[0] if not df[col].mode().empty else "Unknown")
53
 
54
  df_global = df.copy()
55
 
56
- return (
57
- "Data Loaded Successfully",
58
- df.head(),
59
- gr.update(choices=list(df.columns)),
60
- gr.update(choices=list(df.columns))
61
- )
62
 
63
  # =========================
64
- # VISUALIZATION (BAR + PIE)
65
  # =========================
66
- def generate_visualizations(target):
67
 
68
- global df_global
69
 
70
- if df_global is None:
71
- return []
72
 
73
- df = df_global.copy()
 
 
74
 
75
- plots = []
76
- cols = [c for c in df.columns if c != target]
77
-
78
- for col in cols[:10]:
79
-
80
- fig, axes = plt.subplots(1, 2, figsize=(12, 4))
81
-
82
- # BAR CHART
83
- try:
84
- df[col].astype(str).value_counts().head(10).plot(
85
- kind="bar",
86
- ax=axes[0]
87
- )
88
- axes[0].set_title(f"Bar - {col}")
89
- except:
90
- axes[0].set_visible(False)
91
-
92
- # PIE CHART
93
- try:
94
- df[col].astype(str).value_counts().head(6).plot(
95
- kind="pie",
96
- ax=axes[1],
97
- autopct="%1.1f%%"
98
- )
99
- axes[1].set_ylabel("")
100
- axes[1].set_title(f"Pie - {col}")
101
- except:
102
- axes[1].set_visible(False)
103
-
104
- plt.tight_layout()
105
-
106
- path = f"/tmp/{col}.png"
107
  plt.savefig(path)
108
- plt.close(fig)
109
 
110
- plots.append(path)
111
 
112
- return plots
113
 
114
  # =========================
115
- # MACHINE LEARNING
116
  # =========================
117
  def run_ml(target):
118
 
119
- global df_global
120
 
121
  df = df_global.copy()
122
 
123
- # Encode all categorical columns
124
  for col in df.columns:
125
  if not pd.api.types.is_numeric_dtype(df[col]):
126
  df[col] = LabelEncoder().fit_transform(df[col].astype(str))
@@ -128,15 +95,16 @@ def run_ml(target):
128
  X = df.drop(columns=[target])
129
  y = df[target]
130
 
 
 
 
131
  is_classification = len(np.unique(y)) <= 20
132
 
133
- X_train, X_test, y_train, y_test = train_test_split(
134
- X, y, test_size=0.2, random_state=42
135
- )
136
 
137
  results = []
 
138
 
139
- # CLASSIFICATION
140
  if is_classification:
141
 
142
  models = {
@@ -146,20 +114,28 @@ def run_ml(target):
146
  }
147
 
148
  for name, model in models.items():
 
149
  model.fit(X_train, y_train)
150
  pred = model.predict(X_test)
151
 
 
 
152
  results.append({
153
  "Model": name,
154
- "Accuracy": accuracy_score(y_test, pred),
155
  "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
156
- "Recall": recall_score(y_test, pred, average="weighted", zero_division=0),
157
- "Confusion Matrix": str(confusion_matrix(y_test, pred))
158
  })
159
 
160
- return "Classification", pd.DataFrame(results).sort_values("Accuracy", ascending=False)
 
 
 
 
 
 
 
161
 
162
- # REGRESSION
163
  else:
164
 
165
  models = {
@@ -169,6 +145,7 @@ def run_ml(target):
169
  }
170
 
171
  for name, model in models.items():
 
172
  model.fit(X_train, y_train)
173
  pred = model.predict(X_test)
174
 
@@ -179,7 +156,43 @@ def run_ml(target):
179
  "R2": r2_score(y_test, pred)
180
  })
181
 
182
- return "Regression", pd.DataFrame(results).sort_values("R2", ascending=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
  # =========================
185
  # UI
@@ -188,46 +201,54 @@ with gr.Blocks() as demo:
188
 
189
  gr.Markdown("# 🚀 Auto ML Dashboard")
190
 
191
- file_input = gr.File(label="Upload CSV")
192
 
193
- upload_btn = gr.Button("Upload & Clean")
194
 
195
  status = gr.Textbox()
196
  preview = gr.Dataframe()
197
 
198
- gr.Markdown("## 📊 Visualization")
199
 
200
- target_viz = gr.Dropdown(label="Target (Viz)")
201
- viz_btn = gr.Button("Run Visualization")
202
- gallery = gr.Gallery()
203
-
204
- gr.Markdown("## 🤖 Machine Learning")
205
-
206
- target_ml = gr.Dropdown(label="Target (ML)")
207
- ml_btn = gr.Button("Run ML")
208
 
209
  ml_status = gr.Textbox()
210
  leaderboard = gr.Dataframe()
211
 
212
- # Upload
 
 
 
 
 
 
 
 
213
  upload_btn.click(
214
  upload_and_clean,
215
- file_input,
216
- [status, preview, target_viz, target_ml]
217
- )
218
-
219
- # Viz
220
- viz_btn.click(
221
- generate_visualizations,
222
- target_viz,
223
- gallery
224
  )
225
 
226
  # ML
227
- ml_btn.click(
228
  run_ml,
229
- target_ml,
230
  [ml_status, leaderboard]
231
  )
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  demo.launch(share=True)
 
26
  r2_score
27
  )
28
 
29
+ from reportlab.lib.pagesizes import letter
30
+ from reportlab.pdfgen import canvas
31
+
32
  # =========================
33
  # GLOBAL
34
  # =========================
35
  df_global = None
36
+ best_model_global = None
37
+ best_model_obj = None
38
+ X_global = None
39
+ y_global = None
40
 
41
  # =========================
42
+ # UPLOAD
43
  # =========================
44
  def upload_and_clean(file):
 
45
 
46
+ global df_global
 
47
 
48
  df = pd.read_csv(file.name)
 
49
  df = df.drop_duplicates()
50
 
 
51
  for col in df.columns:
52
  if pd.api.types.is_numeric_dtype(df[col]):
53
  df[col] = df[col].fillna(df[col].median())
54
  else:
55
+ df[col] = df[col].fillna(df[col].mode()[0])
56
 
57
  df_global = df.copy()
58
 
59
+ return "Data Loaded", df.head(), gr.update(choices=list(df.columns)), gr.update(choices=list(df.columns))
 
 
 
 
 
60
 
61
  # =========================
62
+ # FEATURE IMPORTANCE
63
  # =========================
64
+ def feature_importance_plot(model, X, title):
65
 
66
+ if hasattr(model, "feature_importances_"):
67
 
68
+ imp = model.feature_importances_
 
69
 
70
+ plt.figure(figsize=(6,4))
71
+ plt.barh(X.columns, imp)
72
+ plt.title("Feature Importance")
73
 
74
+ path = "/tmp/feature_importance.png"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  plt.savefig(path)
76
+ plt.close()
77
 
78
+ return path
79
 
80
+ return None
81
 
82
  # =========================
83
+ # ML
84
  # =========================
85
  def run_ml(target):
86
 
87
+ global df_global, best_model_global, best_model_obj, X_global, y_global
88
 
89
  df = df_global.copy()
90
 
 
91
  for col in df.columns:
92
  if not pd.api.types.is_numeric_dtype(df[col]):
93
  df[col] = LabelEncoder().fit_transform(df[col].astype(str))
 
95
  X = df.drop(columns=[target])
96
  y = df[target]
97
 
98
+ X_global = X
99
+ y_global = y
100
+
101
  is_classification = len(np.unique(y)) <= 20
102
 
103
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
 
 
104
 
105
  results = []
106
+ best_score = 0
107
 
 
108
  if is_classification:
109
 
110
  models = {
 
114
  }
115
 
116
  for name, model in models.items():
117
+
118
  model.fit(X_train, y_train)
119
  pred = model.predict(X_test)
120
 
121
+ acc = accuracy_score(y_test, pred)
122
+
123
  results.append({
124
  "Model": name,
125
+ "Accuracy": acc,
126
  "Precision": precision_score(y_test, pred, average="weighted", zero_division=0),
127
+ "Recall": recall_score(y_test, pred, average="weighted", zero_division=0)
 
128
  })
129
 
130
+ if acc > best_score:
131
+ best_score = acc
132
+ best_model_global = name
133
+ best_model_obj = model
134
+
135
+ leaderboard = pd.DataFrame(results).sort_values("Accuracy", ascending=False)
136
+
137
+ return "Classification", leaderboard
138
 
 
139
  else:
140
 
141
  models = {
 
145
  }
146
 
147
  for name, model in models.items():
148
+
149
  model.fit(X_train, y_train)
150
  pred = model.predict(X_test)
151
 
 
156
  "R2": r2_score(y_test, pred)
157
  })
158
 
159
+ leaderboard = pd.DataFrame(results).sort_values("R2", ascending=False)
160
+
161
+ best_model_global = leaderboard.iloc[0]["Model"]
162
+
163
+ return "Regression", leaderboard
164
+
165
+ # =========================
166
+ # FEATURE IMPORTANCE OUTPUT
167
+ # =========================
168
+ def show_feature_importance():
169
+
170
+ global best_model_obj, X_global
171
+
172
+ if best_model_obj is None:
173
+ return None
174
+
175
+ return feature_importance_plot(best_model_obj, X_global, "Feature Importance")
176
+
177
+ # =========================
178
+ # PDF REPORT
179
+ # =========================
180
+ def download_report():
181
+
182
+ global best_model_global
183
+
184
+ file_path = "/tmp/report.pdf"
185
+
186
+ c = canvas.Canvas(file_path, pagesize=letter)
187
+
188
+ c.drawString(100, 750, "Auto ML Report")
189
+ c.drawString(100, 730, f"Best Model: {best_model_global}")
190
+
191
+ c.drawString(100, 700, "Generated by Auto ML System")
192
+
193
+ c.save()
194
+
195
+ return file_path
196
 
197
  # =========================
198
  # UI
 
201
 
202
  gr.Markdown("# 🚀 Auto ML Dashboard")
203
 
204
+ file = gr.File()
205
 
206
+ upload_btn = gr.Button("Upload")
207
 
208
  status = gr.Textbox()
209
  preview = gr.Dataframe()
210
 
211
+ target = gr.Dropdown(label="Target")
212
 
213
+ run_btn = gr.Button("Run ML")
 
 
 
 
 
 
 
214
 
215
  ml_status = gr.Textbox()
216
  leaderboard = gr.Dataframe()
217
 
218
+ cm = gr.Image()
219
+
220
+ feature_btn = gr.Button("Show Feature Importance")
221
+ feature_img = gr.Image()
222
+
223
+ pdf_btn = gr.Button("Download Report PDF")
224
+ pdf_file = gr.File()
225
+
226
+ # upload
227
  upload_btn.click(
228
  upload_and_clean,
229
+ file,
230
+ [status, preview, target, target]
 
 
 
 
 
 
 
231
  )
232
 
233
  # ML
234
+ run_btn.click(
235
  run_ml,
236
+ target,
237
  [ml_status, leaderboard]
238
  )
239
 
240
+ # feature importance
241
+ feature_btn.click(
242
+ show_feature_importance,
243
+ None,
244
+ feature_img
245
+ )
246
+
247
+ # pdf
248
+ pdf_btn.click(
249
+ download_report,
250
+ None,
251
+ pdf_file
252
+ )
253
+
254
  demo.launch(share=True)