entropy25 commited on
Commit
aa64ef2
·
verified ·
1 Parent(s): 6a83d85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +177 -325
app.py CHANGED
@@ -24,7 +24,7 @@ def initialize_session_state():
24
  st.session_state[key] = value
25
 
26
  def display_header():
27
- """Display enhanced application header"""
28
  st.set_page_config(
29
  page_title="Data Analysis Platform",
30
  page_icon="📊",
@@ -49,20 +49,18 @@ def display_header():
49
  st.metric("📈 Progress", f"{stage_progress:.0f}%")
50
 
51
  def display_sidebar():
52
- """Enhanced sidebar with progress tracking and navigation"""
53
  st.sidebar.header("🗺️ Analysis Progress")
54
 
55
- # Progress bar
56
  progress_value = st.session_state.current_stage / 5
57
  st.sidebar.progress(progress_value)
58
 
59
- # Stage navigation with enhanced UI
60
  stages = [
61
- {"name": "Data Overview", "icon": "📊", "desc": "Basic statistics and quality"},
62
- {"name": "Exploration", "icon": "🔍", "desc": "Patterns and distributions"},
63
- {"name": "Quality Check", "icon": "🧹", "desc": "Cleaning and validation"},
64
- {"name": "Analysis", "icon": "🔬", "desc": "Advanced insights"},
65
- {"name": "Summary", "icon": "📈", "desc": "Results and export"}
66
  ]
67
 
68
  st.sidebar.markdown("### 📋 Analysis Stages")
@@ -70,7 +68,6 @@ def display_sidebar():
70
  for i, stage in enumerate(stages, 1):
71
  if i == st.session_state.current_stage:
72
  st.sidebar.markdown(f"🔄 **{i}. {stage['name']}**")
73
- st.sidebar.caption(f" {stage['desc']}")
74
  elif i < st.session_state.current_stage:
75
  st.sidebar.markdown(f"✅ {i}. {stage['name']}")
76
  else:
@@ -81,68 +78,36 @@ def display_sidebar():
81
  col1, col2 = st.sidebar.columns(2)
82
 
83
  with col1:
84
- if st.button("⬅️ Previous",
85
- disabled=st.session_state.current_stage <= 1,
86
- help="Go to previous analysis stage"):
87
  st.session_state.current_stage -= 1
88
  st.rerun()
89
 
90
  with col2:
91
- if st.button("➡️ Next",
92
- disabled=st.session_state.current_stage >= 5,
93
- help="Go to next analysis stage"):
94
  st.session_state.current_stage += 1
95
  st.rerun()
96
 
97
- # Quick stage jumper
98
- st.sidebar.markdown("### 🚀 Quick Jump")
99
- target_stage = st.sidebar.selectbox(
100
- "Jump to stage:",
101
- options=list(range(1, 6)),
102
- index=st.session_state.current_stage - 1,
103
- format_func=lambda x: f"{x}. {stages[x-1]['name']}"
104
- )
105
-
106
- if target_stage != st.session_state.current_stage:
107
- if st.sidebar.button("🎯 Jump to Stage"):
108
- st.session_state.current_stage = target_stage
109
- st.rerun()
110
-
111
- # Recent insights panel
112
- if st.session_state.workflow and st.session_state.workflow.insights:
113
- st.sidebar.markdown("### 💡 Latest Insights")
114
- recent_insights = st.session_state.workflow.insights[-3:]
115
-
116
- for insight in recent_insights:
117
- icon = {"success": "✅", "warning": "⚠️", "error": "❌"}.get(insight.get('type'), "ℹ️")
118
- with st.sidebar.expander(f"{icon} Stage {insight['stage']}", expanded=False):
119
- st.write(insight['insight'])
120
-
121
- # Help and settings
122
- st.sidebar.markdown("---")
123
- if st.sidebar.button("❓ Toggle Help", help="Show/hide help information"):
124
- st.session_state.show_help = not st.session_state.show_help
125
-
126
  # Error log
127
  if st.session_state.error_log:
128
  with st.sidebar.expander("⚠️ Error Log", expanded=False):
129
- for error in st.session_state.error_log[-5:]: # Show last 5 errors
130
  st.error(error)
131
 
132
  def display_ai_assistant():
133
- """Enhanced AI assistant panel"""
134
  st.subheader("🤖 AI Assistant")
135
 
136
  if st.session_state.ai_assistant is None:
137
  st.session_state.ai_assistant = AIAssistant()
138
 
139
- available_models = st.session_state.ai_assistant.get_available_models()
 
 
 
140
 
141
  if available_models:
142
- selected_model = st.selectbox("AI Model:", available_models,
143
- help="Choose your preferred AI model for analysis")
144
 
145
- # AI analysis button with loading state
146
  if st.button("🧠 Get AI Insights", type="primary"):
147
  if st.session_state.workflow and st.session_state.workflow.insights:
148
  with st.spinner("🔮 AI is analyzing your data..."):
@@ -156,84 +121,31 @@ def display_ai_assistant():
156
  if ai_analysis and "Error" not in ai_analysis:
157
  st.markdown("### 🎯 AI Analysis Results")
158
  st.markdown(ai_analysis)
159
-
160
- # Add AI insight to workflow
161
  st.session_state.workflow.add_insight("AI analysis completed",
162
  st.session_state.current_stage, "success")
163
  else:
164
- st.error(ai_analysis or "Failed to get AI analysis")
165
 
166
  except Exception as e:
167
  error_msg = f"AI analysis failed: {str(e)}"
168
  st.error(error_msg)
169
  st.session_state.error_log.append(error_msg)
170
- logger.error(error_msg)
171
  else:
172
- st.warning("⚠️ Complete some analysis stages first to get AI insights")
173
-
174
- # AI model status
175
- st.markdown("### 📊 AI Status")
176
- for model in available_models:
177
- st.success(f"✅ {model} Ready")
178
-
179
  else:
180
  st.warning("⚠️ No AI models available")
181
- with st.expander("🔧 Setup AI Models", expanded=False):
182
- st.markdown("""
183
- **To enable AI features, add API keys to your environment:**
184
-
185
- ```bash
186
- # For Google Gemini
187
- export GOOGLE_API_KEY="your_gemini_key"
188
-
189
- # For OpenAI GPT
190
- export OPENAI_API_KEY="your_openai_key"
191
- ```
192
-
193
- **Or create a `.env` file:**
194
- ```
195
- GOOGLE_API_KEY=your_gemini_key
196
- OPENAI_API_KEY=your_openai_key
197
- ```
198
- """)
199
-
200
- # Quick insights panel
201
- if st.session_state.workflow:
202
- st.markdown("### ⚡ Quick Stats")
203
-
204
- workflow = st.session_state.workflow
205
-
206
- # Data quality indicator
207
- missing_pct = (workflow.stats['missing_values'] / (len(workflow.df) * len(workflow.df.columns))) * 100
208
- duplicate_pct = (workflow.stats['duplicates'] / len(workflow.df)) * 100
209
-
210
- quality_score = 100 - (missing_pct * 2) - (duplicate_pct * 3)
211
- quality_score = max(0, quality_score)
212
-
213
- if quality_score >= 90:
214
- st.success(f"🌟 Excellent Quality ({quality_score:.0f}%)")
215
- elif quality_score >= 70:
216
- st.info(f"👍 Good Quality ({quality_score:.0f}%)")
217
- else:
218
- st.warning(f"⚠️ Needs Improvement ({quality_score:.0f}%)")
219
-
220
- # Stage completion indicators
221
- st.metric("Current Stage", f"{st.session_state.current_stage}/5")
222
- st.metric("Operations", len(workflow.cleaning_history))
223
 
224
  def handle_file_upload():
225
- """Enhanced file upload with validation and preview"""
226
  st.markdown("### 📁 Upload Your Dataset")
227
 
228
- # File upload with help
229
  uploaded_file = st.file_uploader(
230
  "Choose your data file",
231
  type=['csv', 'xlsx', 'xls'],
232
- help="Supported formats: CSV, Excel (.xlsx, .xls). Maximum recommended size: 200MB"
233
  )
234
 
235
  if uploaded_file is not None:
236
- # File information
237
  file_size = len(uploaded_file.getvalue()) / 1024**2
238
 
239
  col1, col2, col3 = st.columns(3)
@@ -245,37 +157,19 @@ def handle_file_upload():
245
  file_type = uploaded_file.name.split('.')[-1].upper()
246
  st.metric("📋 Format", file_type)
247
 
248
- # Load data with progress
249
- with st.spinner("🔄 Loading and validating your data..."):
250
  try:
251
  df = load_data(uploaded_file)
252
 
253
  if df is not None:
254
- # Validate data
255
  is_valid, validation_issues = validate_dataframe(df)
256
 
257
  if is_valid:
258
- st.success(f"✅ **Dataset loaded successfully!** Shape: {df.shape[0]:,} rows × {df.shape[1]:,} columns")
259
 
260
- # Quick preview
261
- with st.expander("👀 Quick Data Preview", expanded=False):
262
  st.dataframe(df.head(), use_container_width=True)
263
-
264
- # Basic info
265
- col1, col2 = st.columns(2)
266
- with col1:
267
- st.write("**Column Types:**")
268
- dtype_summary = df.dtypes.value_counts()
269
- for dtype, count in dtype_summary.items():
270
- st.write(f"• {dtype}: {count} columns")
271
-
272
- with col2:
273
- st.write("**Quick Stats:**")
274
- st.write(f"• Missing values: {df.isnull().sum().sum():,}")
275
- st.write(f"• Duplicate rows: {df.duplicated().sum():,}")
276
- st.write(f"• Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")
277
 
278
- # Initialize workflow
279
  st.session_state.workflow = DataAnalysisWorkflow(df)
280
  st.session_state.current_stage = 1
281
  st.session_state.analysis_complete = False
@@ -286,127 +180,162 @@ def handle_file_upload():
286
  st.error("❌ **Data validation failed:**")
287
  for issue in validation_issues:
288
  st.write(f"• {issue}")
289
- st.session_state.error_log.extend(validation_issues)
290
  return False
291
  else:
292
- st.error("❌ Failed to load data. Please check file format and try again.")
293
  return False
294
 
295
  except Exception as e:
296
- error_msg = f"Error processing file: {str(e)}"
297
  st.error(f"❌ {error_msg}")
298
  st.session_state.error_log.append(error_msg)
299
- logger.error(error_msg)
300
  return False
301
 
302
  return False
303
 
304
- def display_help_section():
305
- """Display contextual help based on current stage"""
306
- if st.session_state.show_help:
307
- help_content = {
308
- 1: {
309
- "title": "📊 Data Overview Help",
310
- "content": """
311
- **What you'll see:**
312
- - Basic dataset statistics (rows, columns, memory usage)
313
- - Data quality score and grade
314
- - Column type classification and cardinality analysis
315
- - Missing values and duplicates detection
316
-
317
- **What to look for:**
318
- - Quality score below 80 indicates data issues
319
- - Constant columns that can be removed
320
- - High memory usage that can be optimized
321
- - Missing value patterns
322
- """
323
- },
324
- 2: {
325
- "title": "🔍 Exploration Help",
326
- "content": """
327
- **What you'll analyze:**
328
- - Distribution of numeric variables
329
- - Frequency of categorical variables
330
- - Relationships between variables
331
-
332
- **Key insights to find:**
333
- - Skewed distributions that need transformation
334
- - High cardinality categories
335
- - Strong correlations between variables
336
- - Imbalanced categorical data
337
- """
338
- },
339
- 3: {
340
- "title": "🧹 Data Cleaning Help",
341
- "content": """
342
- **Available operations:**
343
- - Missing value treatment (fill, drop, impute)
344
- - Duplicate row removal
345
- - Outlier detection and treatment
346
- - Data type corrections
347
-
348
- **Best practices:**
349
- - Preview operations before applying
350
- - Keep track of all changes made
351
- - Use domain knowledge for cleaning decisions
352
- - Test different approaches
353
- """
354
- },
355
- 4: {
356
- "title": "🔬 Advanced Analysis Help",
357
- "content": """
358
- **Advanced features:**
359
- - Statistical correlation testing
360
- - Group comparisons and ANOVA
361
- - Distribution analysis and normality testing
362
-
363
- **What to look for:**
364
- - Statistically significant relationships
365
- - Group differences in key metrics
366
- - Non-normal distributions
367
- - Interaction effects
368
- """
369
- },
370
- 5: {
371
- "title": "📈 Summary Help",
372
- "content": """
373
- **Final deliverables:**
374
- - Comprehensive analysis report
375
- - Cleaned dataset export
376
- - Reproducible Python code
377
- - Executive summary
378
-
379
- **Export options:**
380
- - Multiple report formats (Markdown, HTML, Text)
381
- - Various data formats (CSV, Excel, Parquet)
382
- - Ready-to-use Python scripts
383
- """
384
- }
385
- }
386
-
387
- current_help = help_content.get(st.session_state.current_stage, {})
388
- if current_help:
389
- st.info(f"**{current_help['title']}**\n{current_help['content']}")
390
-
391
- def execute_analysis_stage():
392
- """Execute the current analysis stage with error handling"""
393
  try:
394
  workflow = st.session_state.workflow
395
  stage = st.session_state.current_stage
396
 
397
  if stage == 1:
398
- workflow.stage_1_overview()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  elif stage == 2:
400
- workflow.stage_2_exploration()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
401
  elif stage == 3:
402
- workflow.stage_3_cleaning()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  elif stage == 4:
404
- workflow.stage_4_analysis()
 
 
 
 
 
 
 
 
 
 
 
405
  elif stage == 5:
406
- workflow.stage_5_summary()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
407
  if not st.session_state.analysis_complete:
408
  st.session_state.analysis_complete = True
409
- st.balloons() # Celebration for completion
 
 
 
 
 
 
 
 
 
410
 
411
  except Exception as e:
412
  error_msg = f"Error in stage {st.session_state.current_stage}: {str(e)}"
@@ -414,130 +343,53 @@ def execute_analysis_stage():
414
  st.session_state.error_log.append(error_msg)
415
  logger.error(error_msg)
416
 
417
- # Fallback UI
418
- st.warning("⚠️ There was an issue with this analysis stage. Please try refreshing or contact support.")
419
-
420
- def display_footer():
421
- """Display application footer with additional information"""
422
- st.markdown("---")
423
-
424
- col1, col2, col3 = st.columns(3)
425
-
426
- with col1:
427
- st.markdown("**📊 Platform Features:**")
428
- st.markdown("• 5-stage analysis workflow")
429
- st.markdown("• AI-powered insights")
430
- st.markdown("• Interactive visualizations")
431
- st.markdown("• Multiple export formats")
432
-
433
- with col2:
434
- st.markdown("**🔧 Supported Formats:**")
435
- st.markdown("• CSV files (any encoding)")
436
- st.markdown("• Excel files (.xlsx, .xls)")
437
- st.markdown("• Large datasets (up to 200MB)")
438
- st.markdown("• Mixed data types")
439
-
440
- with col3:
441
- st.markdown("**💡 Tips for Best Results:**")
442
- st.markdown("• Ensure clean column headers")
443
- st.markdown("• Include data dictionary if available")
444
- st.markdown("• Review quality score recommendations")
445
- st.markdown("• Use AI insights for deeper analysis")
446
 
447
  def main():
448
- """Enhanced main application with comprehensive error handling"""
449
  try:
450
- # Initialize application
451
  initialize_session_state()
452
  display_header()
453
 
454
- # Show help if enabled
455
- display_help_section()
456
-
457
- # File upload section
458
  data_loaded = handle_file_upload()
459
 
460
  if data_loaded and st.session_state.workflow is not None:
461
- # Create main layout
462
  main_col, ai_col = st.columns([3, 1])
463
 
464
  with main_col:
465
- # Execute current analysis stage
466
- execute_analysis_stage()
467
 
468
  with ai_col:
469
- # AI Assistant panel
470
  display_ai_assistant()
471
 
472
- # Display sidebar navigation
473
  display_sidebar()
474
 
475
- # Show completion message
476
  if st.session_state.analysis_complete:
477
- st.success("🎉 **Analysis Complete!** Your comprehensive data analysis is ready.")
478
 
479
  elif not data_loaded:
480
- # Landing page content
481
- st.markdown("### 🚀 Welcome to the Data Analysis Platform")
482
-
483
- col1, col2 = st.columns(2)
484
-
485
- with col1:
486
- st.markdown("""
487
- **🎯 What this platform does:**
488
- - **Automated Data Quality Assessment** - Get instant quality scores and recommendations
489
- - **Interactive Exploration** - Visualize distributions, correlations, and patterns
490
- - **Smart Data Cleaning** - Handle missing values, duplicates, and outliers
491
- - **AI-Powered Insights** - Get business recommendations from your data
492
- - **Professional Reports** - Export analysis in multiple formats
493
- """)
494
 
495
- with col2:
496
- st.markdown("""
497
- **📋 5-Stage Analysis Workflow:**
498
- 1. **📊 Data Overview** - Quality assessment and structure analysis
499
- 2. **🔍 Exploration** - Distribution and pattern discovery
500
- 3. **🧹 Quality Check** - Data cleaning and validation
501
- 4. **🔬 Analysis** - Advanced statistical analysis
502
- 5. **📈 Summary** - Results compilation and export
503
- """)
504
-
505
- # Sample data section
506
- st.markdown("### 📝 Supported Data Formats")
507
- format_info = pd.DataFrame({
508
- 'Format': ['CSV', 'Excel (.xlsx)', 'Excel (.xls)'],
509
- 'Max Size': ['200MB', '200MB', '100MB'],
510
- 'Encoding': ['Auto-detect', 'UTF-8', 'UTF-8'],
511
- 'Features': ['All features', 'All features', 'Basic features']
512
- })
513
- st.dataframe(format_info, use_container_width=True, hide_index=True)
514
-
515
- # Footer
516
- display_footer()
517
 
518
  except Exception as e:
519
- # Global error handler
520
- error_msg = f"Critical application error: {str(e)}"
521
  st.error(f"❌ {error_msg}")
522
  st.session_state.error_log.append(error_msg)
523
- logger.critical(error_msg)
524
 
525
- # Recovery options
526
- st.markdown("### 🔧 Recovery Options")
527
- col1, col2 = st.columns(2)
528
-
529
- with col1:
530
- if st.button("🔄 Restart Analysis"):
531
- # Clear session state
532
- for key in list(st.session_state.keys()):
533
- del st.session_state[key]
534
- st.rerun()
535
-
536
- with col2:
537
- if st.button("📋 View Error Log"):
538
- st.write("**Recent Errors:**")
539
- for error in st.session_state.error_log[-10:]:
540
- st.code(error)
541
 
542
  if __name__ == "__main__":
543
  main()
 
24
  st.session_state[key] = value
25
 
26
  def display_header():
27
+ """Display application header"""
28
  st.set_page_config(
29
  page_title="Data Analysis Platform",
30
  page_icon="📊",
 
49
  st.metric("📈 Progress", f"{stage_progress:.0f}%")
50
 
51
  def display_sidebar():
52
+ """Sidebar with progress tracking and navigation"""
53
  st.sidebar.header("🗺️ Analysis Progress")
54
 
 
55
  progress_value = st.session_state.current_stage / 5
56
  st.sidebar.progress(progress_value)
57
 
 
58
  stages = [
59
+ {"name": "Data Overview", "icon": "📊"},
60
+ {"name": "Exploration", "icon": "🔍"},
61
+ {"name": "Quality Check", "icon": "🧹"},
62
+ {"name": "Analysis", "icon": "🔬"},
63
+ {"name": "Summary", "icon": "📈"}
64
  ]
65
 
66
  st.sidebar.markdown("### 📋 Analysis Stages")
 
68
  for i, stage in enumerate(stages, 1):
69
  if i == st.session_state.current_stage:
70
  st.sidebar.markdown(f"🔄 **{i}. {stage['name']}**")
 
71
  elif i < st.session_state.current_stage:
72
  st.sidebar.markdown(f"✅ {i}. {stage['name']}")
73
  else:
 
78
  col1, col2 = st.sidebar.columns(2)
79
 
80
  with col1:
81
+ if st.button("⬅️ Previous", disabled=st.session_state.current_stage <= 1):
 
 
82
  st.session_state.current_stage -= 1
83
  st.rerun()
84
 
85
  with col2:
86
+ if st.button("➡️ Next", disabled=st.session_state.current_stage >= 5):
 
 
87
  st.session_state.current_stage += 1
88
  st.rerun()
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  # Error log
91
  if st.session_state.error_log:
92
  with st.sidebar.expander("⚠️ Error Log", expanded=False):
93
+ for error in st.session_state.error_log[-5:]:
94
  st.error(error)
95
 
96
  def display_ai_assistant():
97
+ """AI assistant panel"""
98
  st.subheader("🤖 AI Assistant")
99
 
100
  if st.session_state.ai_assistant is None:
101
  st.session_state.ai_assistant = AIAssistant()
102
 
103
+ try:
104
+ available_models = st.session_state.ai_assistant.get_available_models()
105
+ except:
106
+ available_models = []
107
 
108
  if available_models:
109
+ selected_model = st.selectbox("AI Model:", available_models)
 
110
 
 
111
  if st.button("🧠 Get AI Insights", type="primary"):
112
  if st.session_state.workflow and st.session_state.workflow.insights:
113
  with st.spinner("🔮 AI is analyzing your data..."):
 
121
  if ai_analysis and "Error" not in ai_analysis:
122
  st.markdown("### 🎯 AI Analysis Results")
123
  st.markdown(ai_analysis)
 
 
124
  st.session_state.workflow.add_insight("AI analysis completed",
125
  st.session_state.current_stage, "success")
126
  else:
127
+ st.error("Failed to get AI analysis")
128
 
129
  except Exception as e:
130
  error_msg = f"AI analysis failed: {str(e)}"
131
  st.error(error_msg)
132
  st.session_state.error_log.append(error_msg)
 
133
  else:
134
+ st.warning("⚠️ Complete some analysis stages first")
 
 
 
 
 
 
135
  else:
136
  st.warning("⚠️ No AI models available")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  def handle_file_upload():
139
+ """File upload with validation and preview"""
140
  st.markdown("### 📁 Upload Your Dataset")
141
 
 
142
  uploaded_file = st.file_uploader(
143
  "Choose your data file",
144
  type=['csv', 'xlsx', 'xls'],
145
+ help="Supported formats: CSV, Excel (.xlsx, .xls)"
146
  )
147
 
148
  if uploaded_file is not None:
 
149
  file_size = len(uploaded_file.getvalue()) / 1024**2
150
 
151
  col1, col2, col3 = st.columns(3)
 
157
  file_type = uploaded_file.name.split('.')[-1].upper()
158
  st.metric("📋 Format", file_type)
159
 
160
+ with st.spinner("🔄 Loading data..."):
 
161
  try:
162
  df = load_data(uploaded_file)
163
 
164
  if df is not None:
 
165
  is_valid, validation_issues = validate_dataframe(df)
166
 
167
  if is_valid:
168
+ st.success(f"✅ **Dataset loaded!** Shape: {df.shape[0]:,} rows × {df.shape[1]:,} columns")
169
 
170
+ with st.expander("👀 Data Preview", expanded=False):
 
171
  st.dataframe(df.head(), use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
 
173
  st.session_state.workflow = DataAnalysisWorkflow(df)
174
  st.session_state.current_stage = 1
175
  st.session_state.analysis_complete = False
 
180
  st.error("❌ **Data validation failed:**")
181
  for issue in validation_issues:
182
  st.write(f"• {issue}")
 
183
  return False
184
  else:
185
+ st.error("❌ Failed to load data")
186
  return False
187
 
188
  except Exception as e:
189
+ error_msg = f"Error loading file: {str(e)}"
190
  st.error(f"❌ {error_msg}")
191
  st.session_state.error_log.append(error_msg)
 
192
  return False
193
 
194
  return False
195
 
196
+ def safe_execute_stage():
197
+ """Execute current stage with comprehensive error handling"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  try:
199
  workflow = st.session_state.workflow
200
  stage = st.session_state.current_stage
201
 
202
  if stage == 1:
203
+ # Stage 1: Data Overview with safe execution
204
+ st.subheader("📊 Data Overview")
205
+
206
+ # Basic info
207
+ col1, col2, col3 = st.columns(3)
208
+ with col1:
209
+ st.metric("Rows", f"{len(workflow.df):,}")
210
+ with col2:
211
+ st.metric("Columns", f"{len(workflow.df.columns):,}")
212
+ with col3:
213
+ memory_mb = workflow.df.memory_usage(deep=True).sum() / 1024**2
214
+ st.metric("Memory", f"{memory_mb:.1f} MB")
215
+
216
+ # Data types
217
+ st.markdown("### Column Information")
218
+ dtype_counts = workflow.df.dtypes.value_counts()
219
+ for dtype, count in dtype_counts.items():
220
+ st.write(f"• **{dtype}**: {count} columns")
221
+
222
+ # Missing values
223
+ missing_total = workflow.df.isnull().sum().sum()
224
+ duplicates = workflow.df.duplicated().sum()
225
+
226
+ col1, col2 = st.columns(2)
227
+ with col1:
228
+ st.metric("Missing Values", f"{missing_total:,}")
229
+ with col2:
230
+ st.metric("Duplicates", f"{duplicates:,}")
231
+
232
+ # Quality score calculation (fixed)
233
+ total_cells = len(workflow.df) * len(workflow.df.columns)
234
+ missing_pct = (missing_total / total_cells) * 100 if total_cells > 0 else 0
235
+ duplicate_pct = (duplicates / len(workflow.df)) * 100 if len(workflow.df) > 0 else 0
236
+
237
+ quality_score = max(0, 100 - (missing_pct * 2) - (duplicate_pct * 3))
238
+
239
+ if quality_score >= 90:
240
+ st.success(f"🌟 Excellent Data Quality: {quality_score:.0f}%")
241
+ elif quality_score >= 70:
242
+ st.info(f"👍 Good Data Quality: {quality_score:.0f}%")
243
+ else:
244
+ st.warning(f"⚠️ Data Quality Needs Improvement: {quality_score:.0f}%")
245
+
246
+ # Add insight to workflow
247
+ workflow.add_insight(f"Data overview completed. Quality score: {quality_score:.0f}%",
248
+ stage, "success")
249
+
250
  elif stage == 2:
251
+ # Stage 2: Data Exploration
252
+ st.subheader("🔍 Data Exploration")
253
+
254
+ numeric_cols = workflow.df.select_dtypes(include=['number']).columns
255
+ categorical_cols = workflow.df.select_dtypes(include=['object', 'category']).columns
256
+
257
+ if len(numeric_cols) > 0:
258
+ st.markdown("### Numeric Columns")
259
+ st.dataframe(workflow.df[numeric_cols].describe(), use_container_width=True)
260
+
261
+ if len(categorical_cols) > 0:
262
+ st.markdown("### Categorical Columns")
263
+ for col in categorical_cols[:5]: # Show first 5 categorical columns
264
+ unique_count = workflow.df[col].nunique()
265
+ st.write(f"**{col}**: {unique_count} unique values")
266
+
267
+ workflow.add_insight("Data exploration completed", stage, "success")
268
+
269
  elif stage == 3:
270
+ # Stage 3: Data Cleaning
271
+ st.subheader("🧹 Data Quality Check")
272
+
273
+ # Missing values by column
274
+ missing_by_col = workflow.df.isnull().sum()
275
+ missing_cols = missing_by_col[missing_by_col > 0]
276
+
277
+ if len(missing_cols) > 0:
278
+ st.markdown("### Missing Values by Column")
279
+ for col, count in missing_cols.items():
280
+ pct = (count / len(workflow.df)) * 100
281
+ st.write(f"• **{col}**: {count} missing ({pct:.1f}%)")
282
+ else:
283
+ st.success("✅ No missing values found")
284
+
285
+ # Duplicates
286
+ duplicates = workflow.df.duplicated().sum()
287
+ if duplicates > 0:
288
+ st.warning(f"⚠️ Found {duplicates} duplicate rows")
289
+ else:
290
+ st.success("✅ No duplicate rows found")
291
+
292
+ workflow.add_insight("Quality check completed", stage, "success")
293
+
294
  elif stage == 4:
295
+ # Stage 4: Advanced Analysis
296
+ st.subheader("🔬 Advanced Analysis")
297
+
298
+ numeric_cols = workflow.df.select_dtypes(include=['number']).columns
299
+
300
+ if len(numeric_cols) >= 2:
301
+ st.markdown("### Correlation Matrix")
302
+ corr_matrix = workflow.df[numeric_cols].corr()
303
+ st.dataframe(corr_matrix, use_container_width=True)
304
+
305
+ workflow.add_insight("Advanced analysis completed", stage, "success")
306
+
307
  elif stage == 5:
308
+ # Stage 5: Summary
309
+ st.subheader("📈 Analysis Summary")
310
+
311
+ st.markdown("### Analysis Complete!")
312
+ st.write(f"• Dataset: {len(workflow.df):,} rows × {len(workflow.df.columns):,} columns")
313
+ st.write(f"• Insights generated: {len(workflow.insights)}")
314
+ st.write(f"• Analysis stages completed: {st.session_state.current_stage}")
315
+
316
+ # Export options
317
+ st.markdown("### Export Options")
318
+ if st.button("📊 Download CSV"):
319
+ csv = workflow.df.to_csv(index=False)
320
+ st.download_button(
321
+ label="Download CSV",
322
+ data=csv,
323
+ file_name="analyzed_data.csv",
324
+ mime="text/csv"
325
+ )
326
+
327
  if not st.session_state.analysis_complete:
328
  st.session_state.analysis_complete = True
329
+ st.balloons()
330
+
331
+ workflow.add_insight("Analysis summary completed", stage, "success")
332
+
333
+ # Initialize stats if not exists
334
+ if not hasattr(workflow, 'stats'):
335
+ workflow.stats = {
336
+ 'missing_values': workflow.df.isnull().sum().sum(),
337
+ 'duplicates': workflow.df.duplicated().sum()
338
+ }
339
 
340
  except Exception as e:
341
  error_msg = f"Error in stage {st.session_state.current_stage}: {str(e)}"
 
343
  st.session_state.error_log.append(error_msg)
344
  logger.error(error_msg)
345
 
346
+ # Show fallback content
347
+ st.warning("⚠️ Analysis stage encountered an issue. Please try refreshing.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
  def main():
350
+ """Main application"""
351
  try:
 
352
  initialize_session_state()
353
  display_header()
354
 
 
 
 
 
355
  data_loaded = handle_file_upload()
356
 
357
  if data_loaded and st.session_state.workflow is not None:
 
358
  main_col, ai_col = st.columns([3, 1])
359
 
360
  with main_col:
361
+ safe_execute_stage()
 
362
 
363
  with ai_col:
 
364
  display_ai_assistant()
365
 
 
366
  display_sidebar()
367
 
 
368
  if st.session_state.analysis_complete:
369
+ st.success("🎉 **Analysis Complete!**")
370
 
371
  elif not data_loaded:
372
+ st.markdown("### 🚀 Welcome to Data Analysis Platform")
373
+ st.markdown("""
374
+ **Features:**
375
+ - 5-stage analysis workflow
376
+ - AI-powered insights
377
+ - Data quality assessment
378
+ - Interactive visualizations
379
+ - Export capabilities
 
 
 
 
 
 
380
 
381
+ **Supported Formats:** CSV, Excel (.xlsx, .xls)
382
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
  except Exception as e:
385
+ error_msg = f"Application error: {str(e)}"
 
386
  st.error(f"❌ {error_msg}")
387
  st.session_state.error_log.append(error_msg)
 
388
 
389
+ if st.button("🔄 Restart"):
390
+ for key in list(st.session_state.keys()):
391
+ del st.session_state[key]
392
+ st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
393
 
394
  if __name__ == "__main__":
395
  main()