Learnerbegginer commited on
Commit
c3c3195
Β·
1 Parent(s): c3b2831

Deploy working Streamlit version - exact local version that works perfectly

Browse files
Files changed (3) hide show
  1. README.md +3 -3
  2. app.py +355 -207
  3. requirements.txt +0 -0
README.md CHANGED
@@ -3,8 +3,8 @@ title: PromptPrepML
3
  emoji: πŸ€–
4
  colorFrom: blue
5
  colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 4.28.0
8
  python_version: 3.11
9
  app_file: app.py
10
  pinned: false
@@ -37,6 +37,6 @@ Upload your dataset and describe your preprocessing needs in natural language. O
37
 
38
  ## πŸ› οΈ Tech Stack
39
  - **Backend**: Python, FastAPI, scikit-learn
40
- - **Frontend**: Gradio
41
  - **EDA**: ydata-profiling
42
  - **ML**: pandas, numpy
 
3
  emoji: πŸ€–
4
  colorFrom: blue
5
  colorTo: indigo
6
+ sdk: streamlit
7
+ sdk_version: 1.28.0
8
  python_version: 3.11
9
  app_file: app.py
10
  pinned: false
 
37
 
38
  ## πŸ› οΈ Tech Stack
39
  - **Backend**: Python, FastAPI, scikit-learn
40
+ - **Frontend**: Streamlit
41
  - **EDA**: ydata-profiling
42
  - **ML**: pandas, numpy
app.py CHANGED
@@ -1,228 +1,376 @@
1
- import gradio as gr
 
2
  import pandas as pd
3
- import numpy as np
4
- from sklearn.impute import SimpleImputer
5
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
6
- from sklearn.feature_selection import VarianceThreshold
7
- from sklearn.compose import ColumnTransformer
8
- from sklearn.pipeline import Pipeline
9
- from sklearn.model_selection import train_test_split
10
  import io
11
- import warnings
12
- warnings.filterwarnings('ignore')
 
13
 
14
- class StandalonePreprocessor:
15
- def __init__(self):
16
- self.pipeline = None
17
- self.feature_names = []
18
- self.analysis = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- def analyze_columns(self, df):
21
- """Analyze dataset columns"""
22
- analysis = {
23
- 'identifiers': [],
24
- 'dates': [],
25
- 'text_features': [],
26
- 'categorical_low_cardinality': [],
27
- 'categorical_high_cardinality': [],
28
- 'numeric': []
29
- }
30
-
31
- for col in df.columns:
32
- col_lower = col.lower()
33
-
34
- # Identifier detection
35
- is_identifier = (
36
- any(keyword in col_lower for keyword in ['id', 'index', 'uuid', 'key']) and
37
- (df[col].nunique() / len(df) > 0.8)
38
- )
39
-
40
- if is_identifier:
41
- analysis['identifiers'].append(col)
42
- continue
43
-
44
- # Date detection
45
- if df[col].dtype == 'object':
46
- try:
47
- pd.to_datetime(df[col].dropna().head(10))
48
- analysis['dates'].append(col)
49
- continue
50
- except:
51
- pass
52
-
53
- # Text feature detection
54
- text_keywords = ['name', 'email', 'phone', 'website', 'address', 'description']
55
- if any(keyword in col_lower for keyword in text_keywords):
56
- analysis['text_features'].append(col)
57
- continue
58
-
59
- # Categorical vs Numeric
60
- if df[col].dtype == 'object':
61
- unique_ratio = df[col].nunique() / len(df)
62
- if unique_ratio > 0.5:
63
- analysis['categorical_high_cardinality'].append(col)
64
- else:
65
- analysis['categorical_low_cardinality'].append(col)
66
- else:
67
- analysis['numeric'].append(col)
68
-
69
- return analysis
70
 
71
- def extract_date_features(self, df, date_cols):
72
- """Extract features from date columns"""
73
- df_processed = df.copy()
74
-
75
- for col in date_cols:
76
- try:
77
- dates = pd.to_datetime(df_processed[col])
78
- df_processed[f'{col}_year'] = dates.dt.year
79
- df_processed[f'{col}_month'] = dates.dt.month
80
- df_processed[f'{col}_day'] = dates.dt.day
81
- df_processed[f'{col}_weekday'] = dates.dt.weekday
82
- df_processed.drop(col, axis=1, inplace=True)
83
- except:
84
- pass
85
-
86
- return df_processed
87
 
88
- def process(self, df):
89
- """Main processing function"""
90
- # Step 1: Analyze columns
91
- self.analysis = self.analyze_columns(df)
92
-
93
- # Step 2: Remove unwanted columns
94
- columns_to_drop = (
95
- self.analysis['identifiers'] +
96
- self.analysis['text_features'] +
97
- self.analysis['categorical_high_cardinality']
98
- )
99
- df_clean = df.drop(columns=columns_to_drop, errors='ignore')
100
-
101
- # Step 3: Extract date features
102
- if self.analysis['dates']:
103
- df_clean = self.extract_date_features(df_clean, self.analysis['dates'])
104
-
105
- # Step 4: Create preprocessing pipeline
106
- numeric_features = df_clean.select_dtypes(include=[np.number]).columns.tolist()
107
- categorical_features = df_clean.select_dtypes(include=['object']).columns.tolist()
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- preprocessor = ColumnTransformer(
110
- transformers=[
111
- ('numeric', Pipeline([
112
- ('imputer', SimpleImputer(strategy='median')),
113
- ('scaler', StandardScaler())
114
- ]), numeric_features),
115
- ('categorical', Pipeline([
116
- ('imputer', SimpleImputer(strategy='most_frequent')),
117
- ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
118
- ]), categorical_features)
119
- ]
120
  )
121
 
122
- # Step 5: Create full pipeline
123
- self.pipeline = Pipeline([
124
- ('preprocessor', preprocessor),
125
- ('feature_selector', VarianceThreshold(threshold=0.01))
126
- ])
127
-
128
- # Step 6: Fit and transform
129
- processed_data = self.pipeline.fit_transform(df_clean)
130
-
131
- # Step 7: Get feature names
132
- try:
133
- feature_names = []
134
- if numeric_features:
135
- feature_names.extend([f'numeric__{f}' for f in numeric_features])
136
- if categorical_features:
137
- encoder = self.pipeline.named_steps['preprocessor'].named_transformers_['categorical'].named_steps['encoder']
138
- cat_names = encoder.get_feature_names_out(categorical_features)
139
- feature_names.extend([f'categorical__{name}' for name in cat_names])
140
-
141
- self.feature_names = feature_names[:processed_data.shape[1]]
142
- except:
143
- self.feature_names = [f'feature_{i}' for i in range(processed_data.shape[1])]
144
-
145
- # Step 8: Create processed DataFrame
146
- processed_df = pd.DataFrame(processed_data, columns=self.feature_names)
147
-
148
- return processed_df
149
-
150
- def split_data(self, df):
151
- """Split dataset into train and test"""
152
- train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
153
- return train_df, test_df
154
-
155
- # Global preprocessor
156
- preprocessor = StandalonePreprocessor()
157
-
158
- def process_dataset(file, prompt):
159
- if file is None:
160
- return "Please upload a dataset", None, None, None, None, ""
161
 
162
- try:
163
- # Read uploaded file
164
- file_content = file.read()
165
- df = pd.read_csv(io.BytesIO(file_content))
166
 
167
- # Process dataset
168
- processed_df = preprocessor.process(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
- # Split dataset
171
- train_df, test_df = preprocessor.split_data(processed_df)
172
 
173
- # Create summary
174
- analysis = preprocessor.analysis
 
 
 
 
 
 
175
 
176
- summary = f"""
177
- **βœ… Processing Complete!**
 
 
 
 
 
178
 
179
- **πŸ“Š Dataset Information**
180
- - Original Shape: {df.shape}
181
- - Processed Shape: {processed_df.shape}
182
- - Training Set: {train_df.shape}
183
- - Test Set: {test_df.shape}
 
 
184
 
185
- **πŸ” Column Analysis**
186
- - Identifiers Removed: {len(analysis['identifiers'])} columns
187
- - Text Features Removed: {len(analysis['text_features'])} columns
188
- - Date Columns Processed: {len(analysis['dates'])} columns
189
- - Low Cardinality Encoded: {len(analysis['categorical_low_cardinality'])} columns
190
- - High Cardinality Dropped: {len(analysis['categorical_high_cardinality'])} columns
191
- - Numeric Features: {len(analysis['numeric'])} columns
192
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
- # Convert DataFrames to CSV for download
195
- processed_csv = processed_df.to_csv(index=False).encode('utf-8')
196
- train_csv = train_df.to_csv(index=False).encode('utf-8')
197
- test_csv = test_df.to_csv(index=False).encode('utf-8')
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
- return summary, processed_csv, train_csv, test_csv, processed_df.head(10), "βœ… Processing completed successfully!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
- except Exception as e:
202
- return f"❌ Error: {str(e)}", None, None, None, None, f"❌ Processing failed: {str(e)}"
203
-
204
- # Create simple Gradio interface
205
- iface = gr.Interface(
206
- fn=process_dataset,
207
- inputs=[
208
- gr.File(label="Upload CSV Dataset", file_types=[".csv"]),
209
- gr.Textbox(label="Processing Instructions",
210
- value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.",
211
- lines=3)
212
- ],
213
- outputs=[
214
- gr.Markdown(label="Results Summary"),
215
- gr.File(label="Processed Dataset"),
216
- gr.File(label="Training Set"),
217
- gr.File(label="Test Set"),
218
- gr.Dataframe(label="Dataset Preview"),
219
- gr.Textbox(label="Status")
220
- ],
221
- title="πŸ€– PromptPrepML",
222
- description="AI-Powered Machine Learning Data Preprocessing Assistant",
223
- allow_flagging="never"
224
- )
225
 
226
- # Launch the app
227
  if __name__ == "__main__":
228
- iface.launch()
 
1
+ import streamlit as st
2
+ import requests
3
  import pandas as pd
 
 
 
 
 
 
 
4
  import io
5
+ import os
6
+ from PIL import Image
7
+ import time
8
 
9
+ # Configure page
10
+ st.set_page_config(
11
+ page_title="PromptPrepML - Auto ML Data Preprocessing",
12
+ page_icon="πŸ€–",
13
+ layout="wide",
14
+ initial_sidebar_state="expanded"
15
+ )
16
+
17
+ # Custom CSS for better styling
18
+ st.markdown("""
19
+ <style>
20
+ .main-header {
21
+ font-size: 2.5rem;
22
+ font-weight: bold;
23
+ color: #1f2937;
24
+ text-align: center;
25
+ margin-bottom: 2rem;
26
+ }
27
+ .step-header {
28
+ font-size: 1.5rem;
29
+ font-weight: 600;
30
+ color: #374151;
31
+ margin: 1rem 0;
32
+ }
33
+ .success-box {
34
+ background-color: #f0fdf4;
35
+ border: 1px solid #bbf7d0;
36
+ border-radius: 0.5rem;
37
+ padding: 1rem;
38
+ margin: 1rem 0;
39
+ }
40
+ .info-box {
41
+ background-color: #eff6ff;
42
+ border: 1px solid #bfdbfe;
43
+ border-radius: 0.5rem;
44
+ padding: 1rem;
45
+ margin: 1rem 0;
46
+ }
47
+ .warning-box {
48
+ background-color: #fffbeb;
49
+ border: 1px solid #fed7aa;
50
+ border-radius: 0.5rem;
51
+ padding: 1rem;
52
+ margin: 1rem 0;
53
+ }
54
+ </style>
55
+ """, unsafe_allow_html=True)
56
+
57
+ # API base URL
58
+ API_BASE = "http://localhost:8000"
59
+
60
+ def check_backend_health():
61
+ """Check if backend is running"""
62
+ try:
63
+ response = requests.get(f"{API_BASE}/health", timeout=5)
64
+ return response.status_code == 200
65
+ except:
66
+ return False
67
+
68
+ def upload_dataset(uploaded_file):
69
+ """Upload dataset to backend"""
70
+ try:
71
+ files = {'file': uploaded_file}
72
+ response = requests.post(f"{API_BASE}/api/upload-dataset", files=files, timeout=30)
73
+ if response.status_code == 200:
74
+ return response.json()
75
+ else:
76
+ return None
77
+ except Exception as e:
78
+ st.error(f"Upload error: {str(e)}")
79
+ return None
80
+
81
+ def process_pipeline(uploaded_file, prompt):
82
+ """Process dataset with ML pipeline"""
83
+ try:
84
+ files = {'file': uploaded_file}
85
+ data = {'prompt': prompt}
86
+ response = requests.post(f"{API_BASE}/process-pipeline", files=files, data=data, timeout=120)
87
+ if response.status_code == 200:
88
+ return response.json()
89
+ else:
90
+ st.error(f"Processing error: {response.text}")
91
+ return None
92
+ except Exception as e:
93
+ st.error(f"Processing error: {str(e)}")
94
+ return None
95
+
96
+ def download_file(filename):
97
+ """Generate download link for file"""
98
+ return f"{API_BASE}/api/download/{filename}"
99
+
100
+ def main():
101
+ # Main header
102
+ st.markdown('<h1 class="main-header">πŸ€– PromptPrepML</h1>', unsafe_allow_html=True)
103
+ st.markdown('<p style="text-align: center; color: #6b7280; font-size: 1.1rem;">Convert natural language prompts into ML-ready datasets</p>', unsafe_allow_html=True)
104
 
105
+ # Check backend health
106
+ if not check_backend_health():
107
+ st.error("❌ Backend is not running! Please start the backend first:")
108
+ st.code("""
109
+ cd promptprepml/backend
110
+ venv\\Scripts\\activate
111
+ python app/main.py
112
+ """)
113
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ st.success("βœ… Backend is connected and ready!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # Sidebar for navigation
118
+ st.sidebar.title("πŸ“‹ Processing Steps")
119
+
120
+ # Initialize session state
121
+ if 'step' not in st.session_state:
122
+ st.session_state.step = 'upload'
123
+ if 'dataset_info' not in st.session_state:
124
+ st.session_state.dataset_info = None
125
+ if 'processing_results' not in st.session_state:
126
+ st.session_state.processing_results = None
127
+ if 'uploaded_file' not in st.session_state:
128
+ st.session_state.uploaded_file = None
129
+
130
+ # Step indicators
131
+ steps = ['πŸ“€ Upload Dataset', 'πŸ’¬ Enter Prompt', '⚑ Processing', 'πŸ“Š Results']
132
+ step_mapping = {
133
+ 'upload': 0,
134
+ 'prompt': 1,
135
+ 'processing': 2,
136
+ 'results': 3
137
+ }
138
+ current_step_idx = step_mapping.get(st.session_state.step, 0)
139
+
140
+ for i, step in enumerate(steps):
141
+ if i <= current_step_idx:
142
+ st.sidebar.markdown(f"βœ… {step}")
143
+ else:
144
+ st.sidebar.markdown(f"⏳ {step}")
145
+
146
+ # Main content based on current step
147
+ if st.session_state.step == 'upload':
148
+ st.markdown('<h2 class="step-header">πŸ“€ Upload Your Dataset</h2>', unsafe_allow_html=True)
149
 
150
+ # File upload
151
+ uploaded_file = st.file_uploader(
152
+ "Choose a CSV file",
153
+ type=['csv'],
154
+ help="Upload your dataset in CSV format. Maximum file size: 200MB"
 
 
 
 
 
 
155
  )
156
 
157
+ if uploaded_file is not None:
158
+ # Display file info
159
+ st.markdown('<div class="info-box">', unsafe_allow_html=True)
160
+ st.write(f"**Filename:** {uploaded_file.name}")
161
+ st.write(f"**Size:** {uploaded_file.size / 1024 / 1024:.2f} MB")
162
+ st.markdown('</div>', unsafe_allow_html=True)
163
+
164
+ # Preview data
165
+ try:
166
+ df = pd.read_csv(uploaded_file)
167
+ st.write("**Data Preview:**")
168
+ st.dataframe(df.head(), use_container_width=True)
169
+ st.write(f"**Shape:** {df.shape[0]} rows Γ— {df.shape[1]} columns")
170
+
171
+ # Upload button
172
+ if st.button("πŸš€ Upload Dataset", type="primary"):
173
+ with st.spinner("Uploading dataset..."):
174
+ # Reset file pointer
175
+ uploaded_file.seek(0)
176
+ result = upload_dataset(uploaded_file)
177
+
178
+ if result:
179
+ st.session_state.dataset_info = result
180
+ st.session_state.uploaded_file = uploaded_file # Store the file
181
+ st.session_state.step = 'prompt'
182
+ st.rerun()
183
+ else:
184
+ st.error("Upload failed. Please try again.")
185
+
186
+ except Exception as e:
187
+ st.error(f"Error reading CSV file: {str(e)}")
 
 
 
 
 
 
 
 
188
 
189
+ elif st.session_state.step == 'prompt':
190
+ st.markdown('<h2 class="step-header">πŸ’¬ Describe Your Preprocessing Needs</h2>', unsafe_allow_html=True)
 
 
191
 
192
+ # Show dataset info
193
+ if st.session_state.dataset_info:
194
+ info = st.session_state.dataset_info['dataset_info']
195
+ st.markdown('<div class="info-box">', unsafe_allow_html=True)
196
+ col1, col2, col3, col4 = st.columns(4)
197
+ with col1:
198
+ st.metric("Rows", info['shape'][0])
199
+ with col2:
200
+ st.metric("Columns", info['shape'][1])
201
+ with col3:
202
+ st.metric("Missing Values", sum(info['missing_values'].values()))
203
+ with col4:
204
+ st.metric("Duplicates", info['duplicates'])
205
+ st.markdown('</div>', unsafe_allow_html=True)
206
+
207
+ # Show file info
208
+ if st.session_state.uploaded_file:
209
+ st.info(f"πŸ“ File loaded: {st.session_state.uploaded_file.name} ({st.session_state.uploaded_file.size / 1024 / 1024:.2f} MB)")
210
 
211
+ # Prompt input
212
+ st.write("**Enter your preprocessing instructions in natural language:**")
213
 
214
+ # Example prompts
215
+ example_prompts = [
216
+ "Prepare this dataset for fraud classification, handle missing values, encode categorical variables, remove outliers, and scale numeric features.",
217
+ "Clean this dataset for customer churn prediction, fill missing values with median, one-hot encode categories, and apply standard scaling.",
218
+ "Preprocess data for regression analysis, handle null values, remove duplicates, and normalize numerical features.",
219
+ "Get this dataset ready for machine learning, handle missing data, encode categorical variables, and scale features.",
220
+ "Analyze this customer dataset and prepare it for machine learning. Remove duplicate rows and unnecessary identifier columns. Handle missing values appropriately. Encode categorical variables such as country, city, and company. Extract useful features from the subscription date. Scale any numerical features if present. Remove low-variance features and prepare the dataset for clustering or classification."
221
+ ]
222
 
223
+ # Prompt text area
224
+ prompt = st.text_area(
225
+ "Your prompt:",
226
+ height=120,
227
+ placeholder="e.g., Handle missing values, encode categorical variables, remove outliers, and scale numeric features",
228
+ help="Describe how you want to preprocess your dataset in plain English"
229
+ )
230
 
231
+ # Example prompts section
232
+ with st.expander("πŸ’‘ Example Prompts"):
233
+ for i, example in enumerate(example_prompts, 1):
234
+ if st.button(f"Use Example {i}", key=f"example_{i}"):
235
+ prompt = example
236
+ st.rerun()
237
+ st.write(f"{i}. {example}")
238
 
239
+ # Supported operations info
240
+ with st.expander("πŸ”§ Supported Operations"):
241
+ st.write("""
242
+ **Missing Values:**
243
+ - Mean/median/mode imputation
244
+ - Constant value filling
245
+ - Row deletion
246
+
247
+ **Categorical Encoding:**
248
+ - One-hot encoding
249
+ - Label encoding
250
+
251
+ **Feature Scaling:**
252
+ - Standard scaling (Z-score)
253
+ - Min-max scaling
254
+ - Robust scaling
255
+
256
+ **Outlier Detection:**
257
+ - Isolation Forest
258
+ - IQR method
259
+ - Z-score method
260
+
261
+ **Feature Engineering:**
262
+ - Variance threshold selection
263
+ - Correlation filtering
264
+ - Interaction features
265
+ """)
266
 
267
+ # Process button
268
+ if prompt and st.button("πŸš€ Process Dataset", type="primary"):
269
+ if st.session_state.uploaded_file:
270
+ with st.spinner("Processing dataset... This may take a few minutes."):
271
+ # Reset file pointer
272
+ st.session_state.uploaded_file.seek(0)
273
+ result = process_pipeline(st.session_state.uploaded_file, prompt)
274
+
275
+ if result:
276
+ st.session_state.processing_results = result
277
+ st.session_state.step = 'results'
278
+ st.rerun()
279
+ else:
280
+ st.warning("No file found. Please upload your dataset again.")
281
+
282
+ elif st.session_state.step == 'results':
283
+ st.markdown('<h2 class="step-header">πŸŽ‰ Processing Complete!</h2>', unsafe_allow_html=True)
284
 
285
+ if st.session_state.processing_results:
286
+ results = st.session_state.processing_results
287
+
288
+ # Success message
289
+ st.markdown('<div class="success-box">', unsafe_allow_html=True)
290
+ st.success("βœ… Your dataset has been successfully preprocessed and is ready for machine learning!")
291
+ st.markdown('</div>', unsafe_allow_html=True)
292
+
293
+ # Dataset information
294
+ st.write("### πŸ“Š Dataset Information")
295
+ info = results['dataset_info']['basic_info']
296
+ col1, col2, col3, col4 = st.columns(4)
297
+ with col1:
298
+ st.metric("Original Shape", f"{info['shape'][0]} Γ— {info['shape'][1]}")
299
+ with col2:
300
+ st.metric("Numeric Columns", len(info['numeric_columns']))
301
+ with col3:
302
+ st.metric("Categorical Columns", len(info['categorical_columns']))
303
+ with col4:
304
+ missing_total = sum(results['dataset_info']['missing_values']['counts'].values())
305
+ st.metric("Missing Values", missing_total)
306
+
307
+ # Applied preprocessing steps
308
+ st.write("### πŸ”§ Applied Preprocessing Steps")
309
+ for i, step in enumerate(results['preprocessing_steps'], 1):
310
+ st.markdown(f"""
311
+ <div style="padding: 1rem; margin: 0.5rem 0; background-color: #f8fafc; border-left: 4px solid #3b82f6; border-radius: 0.25rem;">
312
+ <strong>Step {i}:</strong> {step['description']}<br>
313
+ <small>Method: {step.get('method', 'N/A')}</small>
314
+ </div>
315
+ """, unsafe_allow_html=True)
316
+
317
+ # Download files
318
+ st.write("### πŸ“ Download Files")
319
+
320
+ files_to_download = [
321
+ ("processed_dataset.csv", "πŸ“Š Processed Dataset", "Fully preprocessed dataset ready for ML"),
322
+ ("train.csv", "πŸš‚ Training Set", "80% of data for model training"),
323
+ ("test.csv", "πŸ§ͺ Test Set", "20% of data for model testing"),
324
+ ("pipeline.pkl", "βš™οΈ Pipeline", "Scikit-learn pipeline for reuse"),
325
+ ("eda_report.html", "πŸ“ˆ EDA Report", "Exploratory Data Analysis report")
326
+ ]
327
+
328
+ col1, col2 = st.columns(2)
329
+ for i, (filename, title, description) in enumerate(files_to_download):
330
+ with col1 if i % 2 == 0 else col2:
331
+ st.markdown(f"""
332
+ <div style="padding: 1rem; margin: 0.5rem 0; border: 1px solid #e5e7eb; border-radius: 0.5rem;">
333
+ <h4>{title}</h4>
334
+ <p><small>{description}</small></p>
335
+ <a href="{download_file(filename)}" download="{filename}" style="text-decoration: none;">
336
+ <button style="background-color: #3b82f6; color: white; padding: 0.5rem 1rem; border: none; border-radius: 0.25rem; cursor: pointer;">
337
+ πŸ“₯ Download {filename}
338
+ </button>
339
+ </a>
340
+ </div>
341
+ """, unsafe_allow_html=True)
342
+
343
+ # Quick actions
344
+ st.write("### ⚑ Quick Actions")
345
+ col1, col2, col3 = st.columns(3)
346
+
347
+ with col1:
348
+ if st.button("πŸ“ˆ View EDA Report", type="secondary"):
349
+ st.info(f"EDA Report will be available at: {download_file('eda_report.html')}")
350
+
351
+ with col2:
352
+ if st.button("βš™οΈ Download Pipeline", type="secondary"):
353
+ st.info(f"Pipeline file: {download_file('pipeline.pkl')}")
354
+
355
+ with col3:
356
+ if st.button("πŸ”„ Process Another Dataset", type="primary"):
357
+ # Reset session state
358
+ for key in st.session_state.keys():
359
+ del st.session_state[key]
360
+ st.session_state.step = 'upload'
361
+ st.rerun()
362
 
363
+ else:
364
+ st.error("No processing results available. Please start over.")
365
+
366
+ # Footer
367
+ st.markdown("---")
368
+ st.markdown("""
369
+ <div style="text-align: center; color: #6b7280; margin-top: 2rem;">
370
+ <p><strong>PromptPrepML</strong> - Automated ML Data Preprocessing</p>
371
+ <p><small>Convert natural language prompts into ML-ready datasets</small></p>
372
+ </div>
373
+ """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
 
375
  if __name__ == "__main__":
376
+ main()
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ