Learnerbegginer commited on
Commit
c3b2831
Β·
1 Parent(s): 7ee670b

Fix Gradio schema error - simplify to Interface API to avoid complex schema generation

Browse files
Files changed (1) hide show
  1. app.py +37 -150
app.py CHANGED
@@ -174,39 +174,21 @@ def process_dataset(file, prompt):
174
  analysis = preprocessor.analysis
175
 
176
  summary = f"""
177
- ## βœ… **Processing Complete!**
178
-
179
- ### πŸ“Š **Dataset Information**
180
- - **Original Shape**: {df.shape}
181
- - **Processed Shape**: {processed_df.shape}
182
- - **Training Set**: {train_df.shape}
183
- - **Test Set**: {test_df.shape}
184
-
185
- ### πŸ” **Column Analysis**
186
- - **🎯 Identifiers Removed**: {len(analysis['identifiers'])} columns
187
- - **πŸ“ Text Features Removed**: {len(analysis['text_features'])} columns
188
- - **πŸ“… Date Columns Processed**: {len(analysis['dates'])} columns
189
- - **🏷️ Low Cardinality Encoded**: {len(analysis['categorical_low_cardinality'])} columns
190
- - **🎲 High Cardinality Dropped**: {len(analysis['categorical_high_cardinality'])} columns
191
- - **πŸ”’ Numeric Features**: {len(analysis['numeric'])} columns
192
-
193
- ### πŸ—‘οΈ **Dropped Columns**
194
- {', '.join(analysis['identifiers'] + analysis['text_features'] + analysis['categorical_high_cardinality']) if analysis['identifiers'] + analysis['text_features'] + analysis['categorical_high_cardinality'] else 'None'}
195
-
196
- ### πŸ“ˆ **Processing Steps Applied**
197
- 1. βœ… Identifier column detection and removal
198
- 2. βœ… Text feature detection and removal
199
- 3. βœ… Date feature extraction (year, month, day, weekday)
200
- 4. βœ… Missing value imputation
201
- 5. βœ… Categorical encoding (one-hot)
202
- 6. βœ… Numeric feature scaling
203
- 7. βœ… Low-variance feature removal
204
- 8. βœ… Train/test split (80/20)
205
-
206
- ### πŸš€ **Files Ready for Download**
207
- - Processed dataset (clean, ML-ready)
208
- - Training set (80% of data)
209
- - Test set (20% of data)
210
  """
211
 
212
  # Convert DataFrames to CSV for download
@@ -219,123 +201,28 @@ def process_dataset(file, prompt):
219
  except Exception as e:
220
  return f"❌ Error: {str(e)}", None, None, None, None, f"❌ Processing failed: {str(e)}"
221
 
222
- # Create Gradio interface
223
- with gr.Blocks(title="PromptPrepML", theme=gr.themes.Base(), css="""
224
- .gradio-container {
225
- max-width: 1200px !important;
226
- margin: auto !important;
227
- }
228
- .gr-button {
229
- background: linear-gradient(45deg, #667eea 0%, #764ba2 100%) !important;
230
- border: none !important;
231
- color: white !important;
232
- font-weight: bold !important;
233
- padding: 12px 24px !important;
234
- border-radius: 8px !important;
235
- transition: all 0.3s ease !important;
236
- }
237
- .gr-button:hover {
238
- transform: translateY(-2px) !important;
239
- box-shadow: 0 8px 25px rgba(0,0,0,0.15) !important;
240
- }
241
- .gr-file {
242
- border: 2px dashed #667eea !important;
243
- border-radius: 12px !important;
244
- background: #f8f9ff !important;
245
- transition: all 0.3s ease !important;
246
- }
247
- .gr-file:hover {
248
- border-color: #764ba2 !important;
249
- background: #f0f2ff !important;
250
- }
251
- .gr-textbox {
252
- border-radius: 8px !important;
253
- border: 1px solid #e1e5e9 !important;
254
- }
255
- .gr-textbox:focus {
256
- border-color: #667eea !important;
257
- box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
258
- }
259
- .gr-markdown {
260
- text-align: center !important;
261
- }
262
- .gr-dataframe {
263
- border-radius: 8px !important;
264
- overflow: hidden !important;
265
- }
266
- """) as demo:
267
- gr.Markdown("# πŸ€– PromptPrepML")
268
- gr.Markdown("**AI-Powered Machine Learning Data Preprocessing Assistant**")
269
- gr.Markdown("Upload your dataset and get ML-ready results in seconds! πŸš€")
270
-
271
- with gr.Row():
272
- with gr.Column(scale=1):
273
- gr.Markdown("### πŸ“ Upload Dataset")
274
- file_input = gr.File(label="Choose CSV file", file_types=[".csv"])
275
-
276
- gr.Markdown("### πŸ’¬ Processing Instructions")
277
- prompt_input = gr.Textbox(
278
- label="Describe your needs",
279
- value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.",
280
- lines=4
281
- )
282
- process_btn = gr.Button("πŸš€ Process Dataset", variant="primary", size="lg")
283
-
284
- with gr.Column(scale=2):
285
- gr.Markdown("### πŸ“Š Results")
286
- output_summary = gr.Markdown(label="Processing Summary")
287
- status_output = gr.Textbox(label="πŸ”” Status", interactive=False)
288
-
289
- gr.Markdown("---")
290
- gr.Markdown("### πŸ“‹ Dataset Preview")
291
- preview_output = gr.Dataframe(label="First 10 rows of processed dataset")
292
-
293
- gr.Markdown("---")
294
- gr.Markdown("### πŸ“₯ Download Files")
295
- with gr.Row():
296
- with gr.Column():
297
- processed_download = gr.File(label="πŸ“Š Processed Dataset")
298
- with gr.Column():
299
- train_download = gr.File(label="πŸš‚ Training Set")
300
- with gr.Column():
301
- test_download = gr.File(label="πŸ§ͺ Test Set")
302
-
303
- # Event handlers
304
- process_btn.click(
305
- fn=process_dataset,
306
- inputs=[file_input, prompt_input],
307
- outputs=[output_summary, processed_download, train_download, test_download, preview_output, status_output]
308
- )
309
-
310
- gr.Markdown("---")
311
- gr.Markdown("### πŸ“š How to Use")
312
- with gr.Accordion("πŸ“– Instructions", open=False):
313
- gr.Markdown("""
314
- 1. **Upload your CSV dataset** (any size)
315
- 2. **Describe your preprocessing needs** (or use default)
316
- 3. **Click "Process Dataset"**
317
- 4. **Download your ML-ready results**
318
- 5. **Use for machine learning!**
319
-
320
- ### 🧠 **Intelligent Features**
321
- - **Automatic identifier detection** and removal
322
- - **Smart date feature extraction**
323
- - **Text feature handling**
324
- - **Categorical encoding** for low-cardinality features
325
- - **High cardinality handling**
326
- - **Missing value imputation**
327
- - **Feature scaling**
328
- - **Train/test splitting**
329
- """)
330
-
331
- gr.Markdown("---")
332
- gr.Markdown("""
333
- <div style='text-align: center; color: #6b7280; margin-top: 2rem;'>
334
- <p><strong>πŸ€– PromptPrepML</strong> - Automated ML Data Preprocessing</p>
335
- <p><small>Convert natural language prompts into ML-ready datasets</small></p>
336
- </div>
337
- """)
338
 
339
  # Launch the app
340
  if __name__ == "__main__":
341
- demo.launch()
 
174
  analysis = preprocessor.analysis
175
 
176
  summary = f"""
177
+ **βœ… Processing Complete!**
178
+
179
+ **πŸ“Š Dataset Information**
180
+ - Original Shape: {df.shape}
181
+ - Processed Shape: {processed_df.shape}
182
+ - Training Set: {train_df.shape}
183
+ - Test Set: {test_df.shape}
184
+
185
+ **πŸ” Column Analysis**
186
+ - Identifiers Removed: {len(analysis['identifiers'])} columns
187
+ - Text Features Removed: {len(analysis['text_features'])} columns
188
+ - Date Columns Processed: {len(analysis['dates'])} columns
189
+ - Low Cardinality Encoded: {len(analysis['categorical_low_cardinality'])} columns
190
+ - High Cardinality Dropped: {len(analysis['categorical_high_cardinality'])} columns
191
+ - Numeric Features: {len(analysis['numeric'])} columns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  """
193
 
194
  # Convert DataFrames to CSV for download
 
201
  except Exception as e:
202
  return f"❌ Error: {str(e)}", None, None, None, None, f"❌ Processing failed: {str(e)}"
203
 
204
+ # Create simple Gradio interface
205
+ iface = gr.Interface(
206
+ fn=process_dataset,
207
+ inputs=[
208
+ gr.File(label="Upload CSV Dataset", file_types=[".csv"]),
209
+ gr.Textbox(label="Processing Instructions",
210
+ value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.",
211
+ lines=3)
212
+ ],
213
+ outputs=[
214
+ gr.Markdown(label="Results Summary"),
215
+ gr.File(label="Processed Dataset"),
216
+ gr.File(label="Training Set"),
217
+ gr.File(label="Test Set"),
218
+ gr.Dataframe(label="Dataset Preview"),
219
+ gr.Textbox(label="Status")
220
+ ],
221
+ title="πŸ€– PromptPrepML",
222
+ description="AI-Powered Machine Learning Data Preprocessing Assistant",
223
+ allow_flagging="never"
224
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
  # Launch the app
227
  if __name__ == "__main__":
228
+ iface.launch()