Spaces:

Learnerbegginer
/

Auto-ML-Preprocessing

Running

App Files Files Community

Learnerbegginer commited on 9 days ago

Commit

c3b2831

1 Parent(s): 7ee670b

Fix Gradio schema error - simplify to Interface API to avoid complex schema generation

Browse files

Files changed (1) hide show

app.py +37 -150

app.py CHANGED Viewed

@@ -174,39 +174,21 @@ def process_dataset(file, prompt):
         analysis = preprocessor.analysis
         summary = f"""
-        ## ✅ **Processing Complete!**
-        ### 📊 **Dataset Information**
-        - **Original Shape**: {df.shape}
-        - **Processed Shape**: {processed_df.shape}
-        - **Training Set**: {train_df.shape}
-        - **Test Set**: {test_df.shape}
-        ### 🔍 **Column Analysis**
-        - **🎯 Identifiers Removed**: {len(analysis['identifiers'])} columns
-        - **📝 Text Features Removed**: {len(analysis['text_features'])} columns
-        - **📅 Date Columns Processed**: {len(analysis['dates'])} columns
-        - **🏷️ Low Cardinality Encoded**: {len(analysis['categorical_low_cardinality'])} columns
-        - **🎲 High Cardinality Dropped**: {len(analysis['categorical_high_cardinality'])} columns
-        - **🔢 Numeric Features**: {len(analysis['numeric'])} columns
-        ### 🗑️ **Dropped Columns**
-        {', '.join(analysis['identifiers'] + analysis['text_features'] + analysis['categorical_high_cardinality']) if analysis['identifiers'] + analysis['text_features'] + analysis['categorical_high_cardinality'] else 'None'}
-        ### 📈 **Processing Steps Applied**
-        1. ✅ Identifier column detection and removal
-        2. ✅ Text feature detection and removal
-        3. ✅ Date feature extraction (year, month, day, weekday)
-        4. ✅ Missing value imputation
-        5. ✅ Categorical encoding (one-hot)
-        6. ✅ Numeric feature scaling
-        7. ✅ Low-variance feature removal
-        8. ✅ Train/test split (80/20)
-        ### 🚀 **Files Ready for Download**
-        - Processed dataset (clean, ML-ready)
-        - Training set (80% of data)
-        - Test set (20% of data)
         """
         # Convert DataFrames to CSV for download
@@ -219,123 +201,28 @@ def process_dataset(file, prompt):
     except Exception as e:
         return f"❌ Error: {str(e)}", None, None, None, None, f"❌ Processing failed: {str(e)}"
-# Create Gradio interface
-with gr.Blocks(title="PromptPrepML", theme=gr.themes.Base(), css="""
-    .gradio-container {
-        max-width: 1200px !important;
-        margin: auto !important;
-    }
-    .gr-button {
-        background: linear-gradient(45deg, #667eea 0%, #764ba2 100%) !important;
-        border: none !important;
-        color: white !important;
-        font-weight: bold !important;
-        padding: 12px 24px !important;
-        border-radius: 8px !important;
-        transition: all 0.3s ease !important;
-    }
-    .gr-button:hover {
-        transform: translateY(-2px) !important;
-        box-shadow: 0 8px 25px rgba(0,0,0,0.15) !important;
-    }
-    .gr-file {
-        border: 2px dashed #667eea !important;
-        border-radius: 12px !important;
-        background: #f8f9ff !important;
-        transition: all 0.3s ease !important;
-    }
-    .gr-file:hover {
-        border-color: #764ba2 !important;
-        background: #f0f2ff !important;
-    }
-    .gr-textbox {
-        border-radius: 8px !important;
-        border: 1px solid #e1e5e9 !important;
-    }
-    .gr-textbox:focus {
-        border-color: #667eea !important;
-        box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
-    }
-    .gr-markdown {
-        text-align: center !important;
-    }
-    .gr-dataframe {
-        border-radius: 8px !important;
-        overflow: hidden !important;
-    }
-""") as demo:
-    gr.Markdown("# 🤖 PromptPrepML")
-    gr.Markdown("**AI-Powered Machine Learning Data Preprocessing Assistant**")
-    gr.Markdown("Upload your dataset and get ML-ready results in seconds! 🚀")
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 📁 Upload Dataset")
-            file_input = gr.File(label="Choose CSV file", file_types=[".csv"])
-            gr.Markdown("### 💬 Processing Instructions")
-            prompt_input = gr.Textbox(
-                label="Describe your needs",
-                value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.",
-                lines=4
-            )
-            process_btn = gr.Button("🚀 Process Dataset", variant="primary", size="lg")
-        with gr.Column(scale=2):
-            gr.Markdown("### 📊 Results")
-            output_summary = gr.Markdown(label="Processing Summary")
-            status_output = gr.Textbox(label="🔔 Status", interactive=False)
-    gr.Markdown("---")
-    gr.Markdown("### 📋 Dataset Preview")
-    preview_output = gr.Dataframe(label="First 10 rows of processed dataset")
-    gr.Markdown("---")
-    gr.Markdown("### 📥 Download Files")
-    with gr.Row():
-        with gr.Column():
-            processed_download = gr.File(label="📊 Processed Dataset")
-        with gr.Column():
-            train_download = gr.File(label="🚂 Training Set")
-        with gr.Column():
-            test_download = gr.File(label="🧪 Test Set")
-    # Event handlers
-    process_btn.click(
-        fn=process_dataset,
-        inputs=[file_input, prompt_input],
-        outputs=[output_summary, processed_download, train_download, test_download, preview_output, status_output]
-    )
-    gr.Markdown("---")
-    gr.Markdown("### 📚 How to Use")
-    with gr.Accordion("📖 Instructions", open=False):
-        gr.Markdown("""
-        1. **Upload your CSV dataset** (any size)
-        2. **Describe your preprocessing needs** (or use default)
-        3. **Click "Process Dataset"**
-        4. **Download your ML-ready results**
-        5. **Use for machine learning!**
-        ### 🧠 **Intelligent Features**
-        - **Automatic identifier detection** and removal
-        - **Smart date feature extraction**
-        - **Text feature handling**
-        - **Categorical encoding** for low-cardinality features
-        - **High cardinality handling**
-        - **Missing value imputation**
-        - **Feature scaling**
-        - **Train/test splitting**
-        """)
-    gr.Markdown("---")
-    gr.Markdown("""
-    <div style='text-align: center; color: #6b7280; margin-top: 2rem;'>
-        <p><strong>🤖 PromptPrepML</strong> - Automated ML Data Preprocessing</p>
-        <p><small>Convert natural language prompts into ML-ready datasets</small></p>
-    </div>
-    """)
 # Launch the app
 if __name__ == "__main__":
-    demo.launch()

         analysis = preprocessor.analysis
         summary = f"""
+        **✅ Processing Complete!**
+        **📊 Dataset Information**
+        - Original Shape: {df.shape}
+        - Processed Shape: {processed_df.shape}
+        - Training Set: {train_df.shape}
+        - Test Set: {test_df.shape}
+        **🔍 Column Analysis**
+        - Identifiers Removed: {len(analysis['identifiers'])} columns
+        - Text Features Removed: {len(analysis['text_features'])} columns
+        - Date Columns Processed: {len(analysis['dates'])} columns
+        - Low Cardinality Encoded: {len(analysis['categorical_low_cardinality'])} columns
+        - High Cardinality Dropped: {len(analysis['categorical_high_cardinality'])} columns
+        - Numeric Features: {len(analysis['numeric'])} columns
         """
         # Convert DataFrames to CSV for download
     except Exception as e:
         return f"❌ Error: {str(e)}", None, None, None, None, f"❌ Processing failed: {str(e)}"
+# Create simple Gradio interface
+iface = gr.Interface(
+    fn=process_dataset,
+    inputs=[
+        gr.File(label="Upload CSV Dataset", file_types=[".csv"]),
+        gr.Textbox(label="Processing Instructions",
+                  value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.",
+                  lines=3)
+    ],
+    outputs=[
+        gr.Markdown(label="Results Summary"),
+        gr.File(label="Processed Dataset"),
+        gr.File(label="Training Set"),
+        gr.File(label="Test Set"),
+        gr.Dataframe(label="Dataset Preview"),
+        gr.Textbox(label="Status")
+    ],
+    title="🤖 PromptPrepML",
+    description="AI-Powered Machine Learning Data Preprocessing Assistant",
+    allow_flagging="never"
+)
 # Launch the app
 if __name__ == "__main__":
+    iface.launch()