entropy25 commited on
Commit
1348d26
·
verified ·
1 Parent(s): 86805f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +516 -104
app.py CHANGED
@@ -1,131 +1,543 @@
1
  import streamlit as st
2
  import pandas as pd
3
- from data_handler import load_data
 
4
  from analyzer import DataAnalysisWorkflow, AIAssistant
5
 
6
- def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  st.set_page_config(
8
  page_title="Data Analysis Platform",
9
  page_icon="📊",
10
- layout="wide"
 
11
  )
12
 
13
  st.title("📊 Data Analysis Platform")
14
- st.markdown("**Optimized workflow with caching and pagination**")
15
-
16
- # Initialize session state
17
- if 'current_stage' not in st.session_state:
18
- st.session_state.current_stage = 1
19
- if 'workflow' not in st.session_state:
20
- st.session_state.workflow = None
21
- if 'ai_assistant' not in st.session_state:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  st.session_state.ai_assistant = AIAssistant()
23
 
24
- # File upload
25
- uploaded_file = st.file_uploader("Upload Dataset", type=['csv', 'xlsx'])
26
 
27
- if uploaded_file is not None:
28
- try:
29
- # Load data
30
- df = load_data(uploaded_file)
31
- st.success(f"✅ Dataset loaded! Shape: {df.shape}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- # Initialize workflow
34
- if st.session_state.workflow is None:
35
- st.session_state.workflow = DataAnalysisWorkflow(df)
36
 
37
- # Progress sidebar
38
- st.sidebar.header("Progress")
39
- progress = st.sidebar.progress(st.session_state.current_stage / 5)
40
 
41
- stages = ["Data Overview", "Exploration", "Quality Check", "Analysis", "Summary"]
42
- for i, stage in enumerate(stages, 1):
43
- if i == st.session_state.current_stage:
44
- st.sidebar.write(f"🔄 **{i}. {stage}**")
45
- elif i < st.session_state.current_stage:
46
- st.sidebar.write(f"✅ {i}. {stage}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  else:
48
- st.sidebar.write(f" {i}. {stage}")
49
-
50
- # Navigation
51
- col1, col2 = st.sidebar.columns(2)
52
- with col1:
53
- if st.button("← Previous") and st.session_state.current_stage > 1:
54
- st.session_state.current_stage -= 1
55
- st.rerun()
56
- with col2:
57
- if st.button("Next →") and st.session_state.current_stage < 5:
58
- st.session_state.current_stage += 1
59
- st.rerun()
60
-
61
- # Recent insights
62
- st.sidebar.header("💡 Recent Insights")
63
- recent_insights = st.session_state.workflow.insights[-3:]
64
- for insight in recent_insights:
65
- st.sidebar.info(f"**Stage {insight['stage']}:** {insight['insight']}")
66
 
67
- # Main content with AI assistant
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  main_col, ai_col = st.columns([3, 1])
69
 
70
  with main_col:
71
- # Execute current stage
72
- if st.session_state.current_stage == 1:
73
- st.session_state.workflow.stage_1_overview()
74
- elif st.session_state.current_stage == 2:
75
- st.session_state.workflow.stage_2_exploration()
76
- elif st.session_state.current_stage == 3:
77
- st.session_state.workflow.stage_3_cleaning()
78
- elif st.session_state.current_stage == 4:
79
- st.session_state.workflow.stage_4_analysis()
80
- elif st.session_state.current_stage == 5:
81
- st.session_state.workflow.stage_5_summary()
82
 
83
  with ai_col:
84
- st.subheader("🤖 AI Assistant")
85
-
86
- # AI model selection
87
- available_models = st.session_state.ai_assistant.get_available_models()
88
-
89
- if available_models:
90
- selected_model = st.selectbox("AI Model:", available_models)
91
-
92
- if st.button("Get AI Insights"):
93
- if st.session_state.workflow.insights:
94
- with st.spinner("Analyzing with AI..."):
95
- ai_analysis = st.session_state.ai_assistant.analyze_insights(
96
- df, st.session_state.workflow.insights, selected_model
97
- )
98
- st.write("**AI Analysis:**")
99
- st.write(ai_analysis)
100
- else:
101
- st.warning("Complete some analysis stages first.")
102
- else:
103
- st.warning("No AI models available.")
104
- st.info("Set GOOGLE_API_KEY or OPENAI_API_KEY environment variables.")
105
-
106
- # Quick insights
107
- st.subheader("📊 Quick Stats")
108
- if st.session_state.workflow.insights:
109
- st.metric("Total Insights", len(st.session_state.workflow.insights))
110
- st.metric("Current Stage", f"{st.session_state.current_stage}/5")
111
-
112
- # Latest insight
113
- if st.session_state.workflow.insights:
114
- latest = st.session_state.workflow.insights[-1]
115
- st.info(f"**Latest:** {latest['insight']}")
116
-
117
- # Data quality indicator
118
- quality_score = 100
119
- if st.session_state.workflow.stats['missing_values'] > 0:
120
- quality_score -= 30
121
- if st.session_state.workflow.stats['duplicates'] > 0:
122
- quality_score -= 20
123
-
124
- st.metric("Data Quality", f"{quality_score}%")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
- except Exception as e:
127
- st.error(f"Error: {str(e)}")
128
- st.info("Please check your file format and try again.")
 
 
129
 
130
  if __name__ == "__main__":
131
  main()
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import logging
4
+ from data_handler import load_data, validate_dataframe
5
  from analyzer import DataAnalysisWorkflow, AIAssistant
6
 
7
+ # Configure logging
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def initialize_session_state():
12
+ """Initialize all session state variables"""
13
+ defaults = {
14
+ 'current_stage': 1,
15
+ 'workflow': None,
16
+ 'ai_assistant': None,
17
+ 'show_help': False,
18
+ 'analysis_complete': False,
19
+ 'error_log': []
20
+ }
21
+
22
+ for key, value in defaults.items():
23
+ if key not in st.session_state:
24
+ st.session_state[key] = value
25
+
26
+ def display_header():
27
+ """Display enhanced application header"""
28
  st.set_page_config(
29
  page_title="Data Analysis Platform",
30
  page_icon="📊",
31
+ layout="wide",
32
+ initial_sidebar_state="expanded"
33
  )
34
 
35
  st.title("📊 Data Analysis Platform")
36
+ st.markdown("**Professional data analysis workflow with AI assistance**")
37
+
38
+ # Quick stats in header
39
+ if st.session_state.workflow is not None:
40
+ col1, col2, col3, col4 = st.columns(4)
41
+ with col1:
42
+ st.metric("📁 Rows", f"{st.session_state.workflow.df.shape[0]:,}")
43
+ with col2:
44
+ st.metric("📋 Columns", f"{st.session_state.workflow.df.shape[1]:,}")
45
+ with col3:
46
+ st.metric("🔍 Insights", len(st.session_state.workflow.insights))
47
+ with col4:
48
+ stage_progress = (st.session_state.current_stage / 5) * 100
49
+ st.metric("📈 Progress", f"{stage_progress:.0f}%")
50
+
51
+ def display_sidebar():
52
+ """Enhanced sidebar with progress tracking and navigation"""
53
+ st.sidebar.header("🗺️ Analysis Progress")
54
+
55
+ # Progress bar
56
+ progress_value = st.session_state.current_stage / 5
57
+ st.sidebar.progress(progress_value)
58
+
59
+ # Stage navigation with enhanced UI
60
+ stages = [
61
+ {"name": "Data Overview", "icon": "📊", "desc": "Basic statistics and quality"},
62
+ {"name": "Exploration", "icon": "🔍", "desc": "Patterns and distributions"},
63
+ {"name": "Quality Check", "icon": "🧹", "desc": "Cleaning and validation"},
64
+ {"name": "Analysis", "icon": "🔬", "desc": "Advanced insights"},
65
+ {"name": "Summary", "icon": "📈", "desc": "Results and export"}
66
+ ]
67
+
68
+ st.sidebar.markdown("### 📋 Analysis Stages")
69
+
70
+ for i, stage in enumerate(stages, 1):
71
+ if i == st.session_state.current_stage:
72
+ st.sidebar.markdown(f"🔄 **{i}. {stage['name']}**")
73
+ st.sidebar.caption(f" {stage['desc']}")
74
+ elif i < st.session_state.current_stage:
75
+ st.sidebar.markdown(f"✅ {i}. {stage['name']}")
76
+ else:
77
+ st.sidebar.markdown(f"⏳ {i}. {stage['name']}")
78
+
79
+ # Navigation buttons
80
+ st.sidebar.markdown("### 🧭 Navigation")
81
+ col1, col2 = st.sidebar.columns(2)
82
+
83
+ with col1:
84
+ if st.button("⬅️ Previous",
85
+ disabled=st.session_state.current_stage <= 1,
86
+ help="Go to previous analysis stage"):
87
+ st.session_state.current_stage -= 1
88
+ st.rerun()
89
+
90
+ with col2:
91
+ if st.button("➡️ Next",
92
+ disabled=st.session_state.current_stage >= 5,
93
+ help="Go to next analysis stage"):
94
+ st.session_state.current_stage += 1
95
+ st.rerun()
96
+
97
+ # Quick stage jumper
98
+ st.sidebar.markdown("### 🚀 Quick Jump")
99
+ target_stage = st.sidebar.selectbox(
100
+ "Jump to stage:",
101
+ options=list(range(1, 6)),
102
+ index=st.session_state.current_stage - 1,
103
+ format_func=lambda x: f"{x}. {stages[x-1]['name']}"
104
+ )
105
+
106
+ if target_stage != st.session_state.current_stage:
107
+ if st.sidebar.button("🎯 Jump to Stage"):
108
+ st.session_state.current_stage = target_stage
109
+ st.rerun()
110
+
111
+ # Recent insights panel
112
+ if st.session_state.workflow and st.session_state.workflow.insights:
113
+ st.sidebar.markdown("### 💡 Latest Insights")
114
+ recent_insights = st.session_state.workflow.insights[-3:]
115
+
116
+ for insight in recent_insights:
117
+ icon = {"success": "✅", "warning": "⚠️", "error": "❌"}.get(insight.get('type'), "ℹ️")
118
+ with st.sidebar.expander(f"{icon} Stage {insight['stage']}", expanded=False):
119
+ st.write(insight['insight'])
120
+
121
+ # Help and settings
122
+ st.sidebar.markdown("---")
123
+ if st.sidebar.button("❓ Toggle Help", help="Show/hide help information"):
124
+ st.session_state.show_help = not st.session_state.show_help
125
+
126
+ # Error log
127
+ if st.session_state.error_log:
128
+ with st.sidebar.expander("⚠️ Error Log", expanded=False):
129
+ for error in st.session_state.error_log[-5:]: # Show last 5 errors
130
+ st.error(error)
131
+
132
+ def display_ai_assistant():
133
+ """Enhanced AI assistant panel"""
134
+ st.subheader("🤖 AI Assistant")
135
+
136
+ if st.session_state.ai_assistant is None:
137
  st.session_state.ai_assistant = AIAssistant()
138
 
139
+ available_models = st.session_state.ai_assistant.get_available_models()
 
140
 
141
+ if available_models:
142
+ selected_model = st.selectbox("AI Model:", available_models,
143
+ help="Choose your preferred AI model for analysis")
144
+
145
+ # AI analysis button with loading state
146
+ if st.button("🧠 Get AI Insights", type="primary"):
147
+ if st.session_state.workflow and st.session_state.workflow.insights:
148
+ with st.spinner("🔮 AI is analyzing your data..."):
149
+ try:
150
+ ai_analysis = st.session_state.ai_assistant.analyze_insights(
151
+ st.session_state.workflow.df,
152
+ st.session_state.workflow.insights,
153
+ selected_model
154
+ )
155
+
156
+ if ai_analysis and "Error" not in ai_analysis:
157
+ st.markdown("### 🎯 AI Analysis Results")
158
+ st.markdown(ai_analysis)
159
+
160
+ # Add AI insight to workflow
161
+ st.session_state.workflow.add_insight("AI analysis completed",
162
+ st.session_state.current_stage, "success")
163
+ else:
164
+ st.error(ai_analysis or "Failed to get AI analysis")
165
+
166
+ except Exception as e:
167
+ error_msg = f"AI analysis failed: {str(e)}"
168
+ st.error(error_msg)
169
+ st.session_state.error_log.append(error_msg)
170
+ logger.error(error_msg)
171
+ else:
172
+ st.warning("⚠️ Complete some analysis stages first to get AI insights")
173
+
174
+ # AI model status
175
+ st.markdown("### 📊 AI Status")
176
+ for model in available_models:
177
+ st.success(f"✅ {model} Ready")
178
+
179
+ else:
180
+ st.warning("⚠️ No AI models available")
181
+ with st.expander("🔧 Setup AI Models", expanded=False):
182
+ st.markdown("""
183
+ **To enable AI features, add API keys to your environment:**
184
 
185
+ ```bash
186
+ # For Google Gemini
187
+ export GOOGLE_API_KEY="your_gemini_key"
188
 
189
+ # For OpenAI GPT
190
+ export OPENAI_API_KEY="your_openai_key"
191
+ ```
192
 
193
+ **Or create a `.env` file:**
194
+ ```
195
+ GOOGLE_API_KEY=your_gemini_key
196
+ OPENAI_API_KEY=your_openai_key
197
+ ```
198
+ """)
199
+
200
+ # Quick insights panel
201
+ if st.session_state.workflow:
202
+ st.markdown("### ⚡ Quick Stats")
203
+
204
+ workflow = st.session_state.workflow
205
+
206
+ # Data quality indicator
207
+ missing_pct = (workflow.stats['missing_values'] / (len(workflow.df) * len(workflow.df.columns))) * 100
208
+ duplicate_pct = (workflow.stats['duplicates'] / len(workflow.df)) * 100
209
+
210
+ quality_score = 100 - (missing_pct * 2) - (duplicate_pct * 3)
211
+ quality_score = max(0, quality_score)
212
+
213
+ if quality_score >= 90:
214
+ st.success(f"🌟 Excellent Quality ({quality_score:.0f}%)")
215
+ elif quality_score >= 70:
216
+ st.info(f"👍 Good Quality ({quality_score:.0f}%)")
217
+ else:
218
+ st.warning(f"⚠️ Needs Improvement ({quality_score:.0f}%)")
219
+
220
+ # Stage completion indicators
221
+ st.metric("Current Stage", f"{st.session_state.current_stage}/5")
222
+ st.metric("Operations", len(workflow.cleaning_history))
223
+
224
+ def handle_file_upload():
225
+ """Enhanced file upload with validation and preview"""
226
+ st.markdown("### 📁 Upload Your Dataset")
227
+
228
+ # File upload with help
229
+ uploaded_file = st.file_uploader(
230
+ "Choose your data file",
231
+ type=['csv', 'xlsx', 'xls'],
232
+ help="Supported formats: CSV, Excel (.xlsx, .xls). Maximum recommended size: 200MB"
233
+ )
234
+
235
+ if uploaded_file is not None:
236
+ # File information
237
+ file_size = len(uploaded_file.getvalue()) / 1024**2
238
+
239
+ col1, col2, col3 = st.columns(3)
240
+ with col1:
241
+ st.metric("📁 File Name", uploaded_file.name)
242
+ with col2:
243
+ st.metric("📊 File Size", f"{file_size:.1f} MB")
244
+ with col3:
245
+ file_type = uploaded_file.name.split('.')[-1].upper()
246
+ st.metric("📋 Format", file_type)
247
+
248
+ # Load data with progress
249
+ with st.spinner("🔄 Loading and validating your data..."):
250
+ try:
251
+ df = load_data(uploaded_file)
252
+
253
+ if df is not None:
254
+ # Validate data
255
+ is_valid, validation_issues = validate_dataframe(df)
256
+
257
+ if is_valid:
258
+ st.success(f"✅ **Dataset loaded successfully!** Shape: {df.shape[0]:,} rows × {df.shape[1]:,} columns")
259
+
260
+ # Quick preview
261
+ with st.expander("👀 Quick Data Preview", expanded=False):
262
+ st.dataframe(df.head(), use_container_width=True)
263
+
264
+ # Basic info
265
+ col1, col2 = st.columns(2)
266
+ with col1:
267
+ st.write("**Column Types:**")
268
+ dtype_summary = df.dtypes.value_counts()
269
+ for dtype, count in dtype_summary.items():
270
+ st.write(f"• {dtype}: {count} columns")
271
+
272
+ with col2:
273
+ st.write("**Quick Stats:**")
274
+ st.write(f"• Missing values: {df.isnull().sum().sum():,}")
275
+ st.write(f"• Duplicate rows: {df.duplicated().sum():,}")
276
+ st.write(f"• Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")
277
+
278
+ # Initialize workflow
279
+ st.session_state.workflow = DataAnalysisWorkflow(df)
280
+ st.session_state.current_stage = 1
281
+ st.session_state.analysis_complete = False
282
+
283
+ return True
284
+
285
+ else:
286
+ st.error("❌ **Data validation failed:**")
287
+ for issue in validation_issues:
288
+ st.write(f"• {issue}")
289
+ st.session_state.error_log.extend(validation_issues)
290
+ return False
291
  else:
292
+ st.error(" Failed to load data. Please check file format and try again.")
293
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
+ except Exception as e:
296
+ error_msg = f"Error processing file: {str(e)}"
297
+ st.error(f"❌ {error_msg}")
298
+ st.session_state.error_log.append(error_msg)
299
+ logger.error(error_msg)
300
+ return False
301
+
302
+ return False
303
+
304
+ def display_help_section():
305
+ """Display contextual help based on current stage"""
306
+ if st.session_state.show_help:
307
+ help_content = {
308
+ 1: {
309
+ "title": "📊 Data Overview Help",
310
+ "content": """
311
+ **What you'll see:**
312
+ - Basic dataset statistics (rows, columns, memory usage)
313
+ - Data quality score and grade
314
+ - Column type classification and cardinality analysis
315
+ - Missing values and duplicates detection
316
+
317
+ **What to look for:**
318
+ - Quality score below 80 indicates data issues
319
+ - Constant columns that can be removed
320
+ - High memory usage that can be optimized
321
+ - Missing value patterns
322
+ """
323
+ },
324
+ 2: {
325
+ "title": "🔍 Exploration Help",
326
+ "content": """
327
+ **What you'll analyze:**
328
+ - Distribution of numeric variables
329
+ - Frequency of categorical variables
330
+ - Relationships between variables
331
+
332
+ **Key insights to find:**
333
+ - Skewed distributions that need transformation
334
+ - High cardinality categories
335
+ - Strong correlations between variables
336
+ - Imbalanced categorical data
337
+ """
338
+ },
339
+ 3: {
340
+ "title": "🧹 Data Cleaning Help",
341
+ "content": """
342
+ **Available operations:**
343
+ - Missing value treatment (fill, drop, impute)
344
+ - Duplicate row removal
345
+ - Outlier detection and treatment
346
+ - Data type corrections
347
+
348
+ **Best practices:**
349
+ - Preview operations before applying
350
+ - Keep track of all changes made
351
+ - Use domain knowledge for cleaning decisions
352
+ - Test different approaches
353
+ """
354
+ },
355
+ 4: {
356
+ "title": "🔬 Advanced Analysis Help",
357
+ "content": """
358
+ **Advanced features:**
359
+ - Statistical correlation testing
360
+ - Group comparisons and ANOVA
361
+ - Distribution analysis and normality testing
362
+
363
+ **What to look for:**
364
+ - Statistically significant relationships
365
+ - Group differences in key metrics
366
+ - Non-normal distributions
367
+ - Interaction effects
368
+ """
369
+ },
370
+ 5: {
371
+ "title": "📈 Summary Help",
372
+ "content": """
373
+ **Final deliverables:**
374
+ - Comprehensive analysis report
375
+ - Cleaned dataset export
376
+ - Reproducible Python code
377
+ - Executive summary
378
+
379
+ **Export options:**
380
+ - Multiple report formats (Markdown, HTML, Text)
381
+ - Various data formats (CSV, Excel, Parquet)
382
+ - Ready-to-use Python scripts
383
+ """
384
+ }
385
+ }
386
+
387
+ current_help = help_content.get(st.session_state.current_stage, {})
388
+ if current_help:
389
+ st.info(f"**{current_help['title']}**\n{current_help['content']}")
390
+
391
+ def execute_analysis_stage():
392
+ """Execute the current analysis stage with error handling"""
393
+ try:
394
+ workflow = st.session_state.workflow
395
+ stage = st.session_state.current_stage
396
+
397
+ if stage == 1:
398
+ workflow.stage_1_overview()
399
+ elif stage == 2:
400
+ workflow.stage_2_exploration()
401
+ elif stage == 3:
402
+ workflow.stage_3_cleaning()
403
+ elif stage == 4:
404
+ workflow.stage_4_analysis()
405
+ elif stage == 5:
406
+ workflow.stage_5_summary()
407
+ if not st.session_state.analysis_complete:
408
+ st.session_state.analysis_complete = True
409
+ st.balloons() # Celebration for completion
410
+
411
+ except Exception as e:
412
+ error_msg = f"Error in stage {st.session_state.current_stage}: {str(e)}"
413
+ st.error(f"❌ {error_msg}")
414
+ st.session_state.error_log.append(error_msg)
415
+ logger.error(error_msg)
416
+
417
+ # Fallback UI
418
+ st.warning("⚠️ There was an issue with this analysis stage. Please try refreshing or contact support.")
419
+
420
+ def display_footer():
421
+ """Display application footer with additional information"""
422
+ st.markdown("---")
423
+
424
+ col1, col2, col3 = st.columns(3)
425
+
426
+ with col1:
427
+ st.markdown("**📊 Platform Features:**")
428
+ st.markdown("• 5-stage analysis workflow")
429
+ st.markdown("• AI-powered insights")
430
+ st.markdown("• Interactive visualizations")
431
+ st.markdown("• Multiple export formats")
432
+
433
+ with col2:
434
+ st.markdown("**🔧 Supported Formats:**")
435
+ st.markdown("• CSV files (any encoding)")
436
+ st.markdown("• Excel files (.xlsx, .xls)")
437
+ st.markdown("• Large datasets (up to 200MB)")
438
+ st.markdown("• Mixed data types")
439
+
440
+ with col3:
441
+ st.markdown("**💡 Tips for Best Results:**")
442
+ st.markdown("• Ensure clean column headers")
443
+ st.markdown("• Include data dictionary if available")
444
+ st.markdown("• Review quality score recommendations")
445
+ st.markdown("• Use AI insights for deeper analysis")
446
+
447
+ def main():
448
+ """Enhanced main application with comprehensive error handling"""
449
+ try:
450
+ # Initialize application
451
+ initialize_session_state()
452
+ display_header()
453
+
454
+ # Show help if enabled
455
+ display_help_section()
456
+
457
+ # File upload section
458
+ data_loaded = handle_file_upload()
459
+
460
+ if data_loaded and st.session_state.workflow is not None:
461
+ # Create main layout
462
  main_col, ai_col = st.columns([3, 1])
463
 
464
  with main_col:
465
+ # Execute current analysis stage
466
+ execute_analysis_stage()
 
 
 
 
 
 
 
 
 
467
 
468
  with ai_col:
469
+ # AI Assistant panel
470
+ display_ai_assistant()
471
+
472
+ # Display sidebar navigation
473
+ display_sidebar()
474
+
475
+ # Show completion message
476
+ if st.session_state.analysis_complete:
477
+ st.success("🎉 **Analysis Complete!** Your comprehensive data analysis is ready.")
478
+
479
+ elif not data_loaded:
480
+ # Landing page content
481
+ st.markdown("### 🚀 Welcome to the Data Analysis Platform")
482
+
483
+ col1, col2 = st.columns(2)
484
+
485
+ with col1:
486
+ st.markdown("""
487
+ **🎯 What this platform does:**
488
+ - **Automated Data Quality Assessment** - Get instant quality scores and recommendations
489
+ - **Interactive Exploration** - Visualize distributions, correlations, and patterns
490
+ - **Smart Data Cleaning** - Handle missing values, duplicates, and outliers
491
+ - **AI-Powered Insights** - Get business recommendations from your data
492
+ - **Professional Reports** - Export analysis in multiple formats
493
+ """)
494
+
495
+ with col2:
496
+ st.markdown("""
497
+ **📋 5-Stage Analysis Workflow:**
498
+ 1. **📊 Data Overview** - Quality assessment and structure analysis
499
+ 2. **🔍 Exploration** - Distribution and pattern discovery
500
+ 3. **🧹 Quality Check** - Data cleaning and validation
501
+ 4. **🔬 Analysis** - Advanced statistical analysis
502
+ 5. **📈 Summary** - Results compilation and export
503
+ """)
504
+
505
+ # Sample data section
506
+ st.markdown("### 📝 Supported Data Formats")
507
+ format_info = pd.DataFrame({
508
+ 'Format': ['CSV', 'Excel (.xlsx)', 'Excel (.xls)'],
509
+ 'Max Size': ['200MB', '200MB', '100MB'],
510
+ 'Encoding': ['Auto-detect', 'UTF-8', 'UTF-8'],
511
+ 'Features': ['All features', 'All features', 'Basic features']
512
+ })
513
+ st.dataframe(format_info, use_container_width=True, hide_index=True)
514
+
515
+ # Footer
516
+ display_footer()
517
+
518
+ except Exception as e:
519
+ # Global error handler
520
+ error_msg = f"Critical application error: {str(e)}"
521
+ st.error(f"❌ {error_msg}")
522
+ st.session_state.error_log.append(error_msg)
523
+ logger.critical(error_msg)
524
+
525
+ # Recovery options
526
+ st.markdown("### 🔧 Recovery Options")
527
+ col1, col2 = st.columns(2)
528
+
529
+ with col1:
530
+ if st.button("🔄 Restart Analysis"):
531
+ # Clear session state
532
+ for key in list(st.session_state.keys()):
533
+ del st.session_state[key]
534
+ st.rerun()
535
 
536
+ with col2:
537
+ if st.button("📋 View Error Log"):
538
+ st.write("**Recent Errors:**")
539
+ for error in st.session_state.error_log[-10:]:
540
+ st.code(error)
541
 
542
  if __name__ == "__main__":
543
  main()