Spaces:

entropy25
/

data-analysis-platform

Build error

App Files Files Community

entropy25 commited on Aug 9, 2025

Commit

aa64ef2

verified ·

1 Parent(s): 6a83d85

Update app.py

Browse files

Files changed (1) hide show

app.py +177 -325

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ def initialize_session_state():
             st.session_state[key] = value
 def display_header():
-    """Display enhanced application header"""
     st.set_page_config(
         page_title="Data Analysis Platform",
         page_icon="📊",
@@ -49,20 +49,18 @@ def display_header():
             st.metric("📈 Progress", f"{stage_progress:.0f}%")
 def display_sidebar():
-    """Enhanced sidebar with progress tracking and navigation"""
     st.sidebar.header("🗺️ Analysis Progress")
-    # Progress bar
     progress_value = st.session_state.current_stage / 5
     st.sidebar.progress(progress_value)
-    # Stage navigation with enhanced UI
     stages = [
-        {"name": "Data Overview", "icon": "📊", "desc": "Basic statistics and quality"},
-        {"name": "Exploration", "icon": "🔍", "desc": "Patterns and distributions"},
-        {"name": "Quality Check", "icon": "🧹", "desc": "Cleaning and validation"},
-        {"name": "Analysis", "icon": "🔬", "desc": "Advanced insights"},
-        {"name": "Summary", "icon": "📈", "desc": "Results and export"}
     ]
     st.sidebar.markdown("### 📋 Analysis Stages")
@@ -70,7 +68,6 @@ def display_sidebar():
     for i, stage in enumerate(stages, 1):
         if i == st.session_state.current_stage:
             st.sidebar.markdown(f"🔄 **{i}. {stage['name']}**")
-            st.sidebar.caption(f"   {stage['desc']}")
         elif i < st.session_state.current_stage:
             st.sidebar.markdown(f"✅ {i}. {stage['name']}")
         else:
@@ -81,68 +78,36 @@ def display_sidebar():
     col1, col2 = st.sidebar.columns(2)
     with col1:
-        if st.button("⬅️ Previous",
-                    disabled=st.session_state.current_stage <= 1,
-                    help="Go to previous analysis stage"):
             st.session_state.current_stage -= 1
             st.rerun()
     with col2:
-        if st.button("➡️ Next",
-                    disabled=st.session_state.current_stage >= 5,
-                    help="Go to next analysis stage"):
             st.session_state.current_stage += 1
             st.rerun()
-    # Quick stage jumper
-    st.sidebar.markdown("### 🚀 Quick Jump")
-    target_stage = st.sidebar.selectbox(
-        "Jump to stage:",
-        options=list(range(1, 6)),
-        index=st.session_state.current_stage - 1,
-        format_func=lambda x: f"{x}. {stages[x-1]['name']}"
-    )
-    if target_stage != st.session_state.current_stage:
-        if st.sidebar.button("🎯 Jump to Stage"):
-            st.session_state.current_stage = target_stage
-            st.rerun()
-    # Recent insights panel
-    if st.session_state.workflow and st.session_state.workflow.insights:
-        st.sidebar.markdown("### 💡 Latest Insights")
-        recent_insights = st.session_state.workflow.insights[-3:]
-        for insight in recent_insights:
-            icon = {"success": "✅", "warning": "⚠️", "error": "❌"}.get(insight.get('type'), "ℹ️")
-            with st.sidebar.expander(f"{icon} Stage {insight['stage']}", expanded=False):
-                st.write(insight['insight'])
-    # Help and settings
-    st.sidebar.markdown("---")
-    if st.sidebar.button("❓ Toggle Help", help="Show/hide help information"):
-        st.session_state.show_help = not st.session_state.show_help
     # Error log
     if st.session_state.error_log:
         with st.sidebar.expander("⚠️ Error Log", expanded=False):
-            for error in st.session_state.error_log[-5:]:  # Show last 5 errors
                 st.error(error)
 def display_ai_assistant():
-    """Enhanced AI assistant panel"""
     st.subheader("🤖 AI Assistant")
     if st.session_state.ai_assistant is None:
         st.session_state.ai_assistant = AIAssistant()
-    available_models = st.session_state.ai_assistant.get_available_models()
     if available_models:
-        selected_model = st.selectbox("AI Model:", available_models,
-                                    help="Choose your preferred AI model for analysis")
-        # AI analysis button with loading state
         if st.button("🧠 Get AI Insights", type="primary"):
             if st.session_state.workflow and st.session_state.workflow.insights:
                 with st.spinner("🔮 AI is analyzing your data..."):
@@ -156,84 +121,31 @@ def display_ai_assistant():
                         if ai_analysis and "Error" not in ai_analysis:
                             st.markdown("### 🎯 AI Analysis Results")
                             st.markdown(ai_analysis)
-                            # Add AI insight to workflow
                             st.session_state.workflow.add_insight("AI analysis completed",
                                                                 st.session_state.current_stage, "success")
                         else:
-                            st.error(ai_analysis or "Failed to get AI analysis")
                     except Exception as e:
                         error_msg = f"AI analysis failed: {str(e)}"
                         st.error(error_msg)
                         st.session_state.error_log.append(error_msg)
-                        logger.error(error_msg)
             else:
-                st.warning("⚠️ Complete some analysis stages first to get AI insights")
-        # AI model status
-        st.markdown("### 📊 AI Status")
-        for model in available_models:
-            st.success(f"✅ {model} Ready")
     else:
         st.warning("⚠️ No AI models available")
-        with st.expander("🔧 Setup AI Models", expanded=False):
-            st.markdown("""
-            **To enable AI features, add API keys to your environment:**
-            ```bash
-            # For Google Gemini
-            export GOOGLE_API_KEY="your_gemini_key"
-            # For OpenAI GPT
-            export OPENAI_API_KEY="your_openai_key"
-            ```
-            **Or create a `.env` file:**
-            ```
-            GOOGLE_API_KEY=your_gemini_key
-            OPENAI_API_KEY=your_openai_key
-            ```
-            """)
-    # Quick insights panel
-    if st.session_state.workflow:
-        st.markdown("### ⚡ Quick Stats")
-        workflow = st.session_state.workflow
-        # Data quality indicator
-        missing_pct = (workflow.stats['missing_values'] / (len(workflow.df) * len(workflow.df.columns))) * 100
-        duplicate_pct = (workflow.stats['duplicates'] / len(workflow.df)) * 100
-        quality_score = 100 - (missing_pct * 2) - (duplicate_pct * 3)
-        quality_score = max(0, quality_score)
-        if quality_score >= 90:
-            st.success(f"🌟 Excellent Quality ({quality_score:.0f}%)")
-        elif quality_score >= 70:
-            st.info(f"👍 Good Quality ({quality_score:.0f}%)")
-        else:
-            st.warning(f"⚠️ Needs Improvement ({quality_score:.0f}%)")
-        # Stage completion indicators
-        st.metric("Current Stage", f"{st.session_state.current_stage}/5")
-        st.metric("Operations", len(workflow.cleaning_history))
 def handle_file_upload():
-    """Enhanced file upload with validation and preview"""
     st.markdown("### 📁 Upload Your Dataset")
-    # File upload with help
     uploaded_file = st.file_uploader(
         "Choose your data file",
         type=['csv', 'xlsx', 'xls'],
-        help="Supported formats: CSV, Excel (.xlsx, .xls). Maximum recommended size: 200MB"
     )
     if uploaded_file is not None:
-        # File information
         file_size = len(uploaded_file.getvalue()) / 1024**2
         col1, col2, col3 = st.columns(3)
@@ -245,37 +157,19 @@ def handle_file_upload():
             file_type = uploaded_file.name.split('.')[-1].upper()
             st.metric("📋 Format", file_type)
-        # Load data with progress
-        with st.spinner("🔄 Loading and validating your data..."):
             try:
                 df = load_data(uploaded_file)
                 if df is not None:
-                    # Validate data
                     is_valid, validation_issues = validate_dataframe(df)
                     if is_valid:
-                        st.success(f"✅ **Dataset loaded successfully!** Shape: {df.shape[0]:,} rows × {df.shape[1]:,} columns")
-                        # Quick preview
-                        with st.expander("👀 Quick Data Preview", expanded=False):
                             st.dataframe(df.head(), use_container_width=True)
-                            # Basic info
-                            col1, col2 = st.columns(2)
-                            with col1:
-                                st.write("**Column Types:**")
-                                dtype_summary = df.dtypes.value_counts()
-                                for dtype, count in dtype_summary.items():
-                                    st.write(f"• {dtype}: {count} columns")
-                            with col2:
-                                st.write("**Quick Stats:**")
-                                st.write(f"• Missing values: {df.isnull().sum().sum():,}")
-                                st.write(f"• Duplicate rows: {df.duplicated().sum():,}")
-                                st.write(f"• Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")
-                        # Initialize workflow
                         st.session_state.workflow = DataAnalysisWorkflow(df)
                         st.session_state.current_stage = 1
                         st.session_state.analysis_complete = False
@@ -286,127 +180,162 @@ def handle_file_upload():
                         st.error("❌ **Data validation failed:**")
                         for issue in validation_issues:
                             st.write(f"• {issue}")
-                        st.session_state.error_log.extend(validation_issues)
                         return False
                 else:
-                    st.error("❌ Failed to load data. Please check file format and try again.")
                     return False
             except Exception as e:
-                error_msg = f"Error processing file: {str(e)}"
                 st.error(f"❌ {error_msg}")
                 st.session_state.error_log.append(error_msg)
-                logger.error(error_msg)
                 return False
     return False
-def display_help_section():
-    """Display contextual help based on current stage"""
-    if st.session_state.show_help:
-        help_content = {
-            1: {
-                "title": "📊 Data Overview Help",
-                "content": """
-                **What you'll see:**
-                - Basic dataset statistics (rows, columns, memory usage)
-                - Data quality score and grade
-                - Column type classification and cardinality analysis
-                - Missing values and duplicates detection
-                **What to look for:**
-                - Quality score below 80 indicates data issues
-                - Constant columns that can be removed
-                - High memory usage that can be optimized
-                - Missing value patterns
-                """
-            },
-            2: {
-                "title": "🔍 Exploration Help",
-                "content": """
-                **What you'll analyze:**
-                - Distribution of numeric variables
-                - Frequency of categorical variables
-                - Relationships between variables
-                **Key insights to find:**
-                - Skewed distributions that need transformation
-                - High cardinality categories
-                - Strong correlations between variables
-                - Imbalanced categorical data
-                """
-            },
-            3: {
-                "title": "🧹 Data Cleaning Help",
-                "content": """
-                **Available operations:**
-                - Missing value treatment (fill, drop, impute)
-                - Duplicate row removal
-                - Outlier detection and treatment
-                - Data type corrections
-                **Best practices:**
-                - Preview operations before applying
-                - Keep track of all changes made
-                - Use domain knowledge for cleaning decisions
-                - Test different approaches
-                """
-            },
-            4: {
-                "title": "🔬 Advanced Analysis Help",
-                "content": """
-                **Advanced features:**
-                - Statistical correlation testing
-                - Group comparisons and ANOVA
-                - Distribution analysis and normality testing
-                **What to look for:**
-                - Statistically significant relationships
-                - Group differences in key metrics
-                - Non-normal distributions
-                - Interaction effects
-                """
-            },
-            5: {
-                "title": "📈 Summary Help",
-                "content": """
-                **Final deliverables:**
-                - Comprehensive analysis report
-                - Cleaned dataset export
-                - Reproducible Python code
-                - Executive summary
-                **Export options:**
-                - Multiple report formats (Markdown, HTML, Text)
-                - Various data formats (CSV, Excel, Parquet)
-                - Ready-to-use Python scripts
-                """
-            }
-        }
-        current_help = help_content.get(st.session_state.current_stage, {})
-        if current_help:
-            st.info(f"**{current_help['title']}**\n{current_help['content']}")
-def execute_analysis_stage():
-    """Execute the current analysis stage with error handling"""
     try:
         workflow = st.session_state.workflow
         stage = st.session_state.current_stage
         if stage == 1:
-            workflow.stage_1_overview()
         elif stage == 2:
-            workflow.stage_2_exploration()
         elif stage == 3:
-            workflow.stage_3_cleaning()
         elif stage == 4:
-            workflow.stage_4_analysis()
         elif stage == 5:
-            workflow.stage_5_summary()
             if not st.session_state.analysis_complete:
                 st.session_state.analysis_complete = True
-                st.balloons()  # Celebration for completion
     except Exception as e:
         error_msg = f"Error in stage {st.session_state.current_stage}: {str(e)}"
@@ -414,130 +343,53 @@ def execute_analysis_stage():
         st.session_state.error_log.append(error_msg)
         logger.error(error_msg)
-        # Fallback UI
-        st.warning("⚠️ There was an issue with this analysis stage. Please try refreshing or contact support.")
-def display_footer():
-    """Display application footer with additional information"""
-    st.markdown("---")
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        st.markdown("**📊 Platform Features:**")
-        st.markdown("• 5-stage analysis workflow")
-        st.markdown("• AI-powered insights")
-        st.markdown("• Interactive visualizations")
-        st.markdown("• Multiple export formats")
-    with col2:
-        st.markdown("**🔧 Supported Formats:**")
-        st.markdown("• CSV files (any encoding)")
-        st.markdown("• Excel files (.xlsx, .xls)")
-        st.markdown("• Large datasets (up to 200MB)")
-        st.markdown("• Mixed data types")
-    with col3:
-        st.markdown("**💡 Tips for Best Results:**")
-        st.markdown("• Ensure clean column headers")
-        st.markdown("• Include data dictionary if available")
-        st.markdown("• Review quality score recommendations")
-        st.markdown("• Use AI insights for deeper analysis")
 def main():
-    """Enhanced main application with comprehensive error handling"""
     try:
-        # Initialize application
         initialize_session_state()
         display_header()
-        # Show help if enabled
-        display_help_section()
-        # File upload section
         data_loaded = handle_file_upload()
         if data_loaded and st.session_state.workflow is not None:
-            # Create main layout
             main_col, ai_col = st.columns([3, 1])
             with main_col:
-                # Execute current analysis stage
-                execute_analysis_stage()
             with ai_col:
-                # AI Assistant panel
                 display_ai_assistant()
-            # Display sidebar navigation
             display_sidebar()
-            # Show completion message
             if st.session_state.analysis_complete:
-                st.success("🎉 **Analysis Complete!** Your comprehensive data analysis is ready.")
         elif not data_loaded:
-            # Landing page content
-            st.markdown("### 🚀 Welcome to the Data Analysis Platform")
-            col1, col2 = st.columns(2)
-            with col1:
-                st.markdown("""
-                **🎯 What this platform does:**
-                - **Automated Data Quality Assessment** - Get instant quality scores and recommendations
-                - **Interactive Exploration** - Visualize distributions, correlations, and patterns
-                - **Smart Data Cleaning** - Handle missing values, duplicates, and outliers
-                - **AI-Powered Insights** - Get business recommendations from your data
-                - **Professional Reports** - Export analysis in multiple formats
-                """)
-            with col2:
-                st.markdown("""
-                **📋 5-Stage Analysis Workflow:**
-                1. **📊 Data Overview** - Quality assessment and structure analysis
-                2. **🔍 Exploration** - Distribution and pattern discovery
-                3. **🧹 Quality Check** - Data cleaning and validation
-                4. **🔬 Analysis** - Advanced statistical analysis
-                5. **📈 Summary** - Results compilation and export
-                """)
-            # Sample data section
-            st.markdown("### 📝 Supported Data Formats")
-            format_info = pd.DataFrame({
-                'Format': ['CSV', 'Excel (.xlsx)', 'Excel (.xls)'],
-                'Max Size': ['200MB', '200MB', '100MB'],
-                'Encoding': ['Auto-detect', 'UTF-8', 'UTF-8'],
-                'Features': ['All features', 'All features', 'Basic features']
-            })
-            st.dataframe(format_info, use_container_width=True, hide_index=True)
-        # Footer
-        display_footer()
     except Exception as e:
-        # Global error handler
-        error_msg = f"Critical application error: {str(e)}"
         st.error(f"❌ {error_msg}")
         st.session_state.error_log.append(error_msg)
-        logger.critical(error_msg)
-        # Recovery options
-        st.markdown("### 🔧 Recovery Options")
-        col1, col2 = st.columns(2)
-        with col1:
-            if st.button("🔄 Restart Analysis"):
-                # Clear session state
-                for key in list(st.session_state.keys()):
-                    del st.session_state[key]
-                st.rerun()
-        with col2:
-            if st.button("📋 View Error Log"):
-                st.write("**Recent Errors:**")
-                for error in st.session_state.error_log[-10:]:
-                    st.code(error)
 if __name__ == "__main__":
     main()

             st.session_state[key] = value
 def display_header():
+    """Display application header"""
     st.set_page_config(
         page_title="Data Analysis Platform",
         page_icon="📊",
             st.metric("📈 Progress", f"{stage_progress:.0f}%")
 def display_sidebar():
+    """Sidebar with progress tracking and navigation"""
     st.sidebar.header("🗺️ Analysis Progress")
     progress_value = st.session_state.current_stage / 5
     st.sidebar.progress(progress_value)
     stages = [
+        {"name": "Data Overview", "icon": "📊"},
+        {"name": "Exploration", "icon": "🔍"},
+        {"name": "Quality Check", "icon": "🧹"},
+        {"name": "Analysis", "icon": "🔬"},
+        {"name": "Summary", "icon": "📈"}
     ]
     st.sidebar.markdown("### 📋 Analysis Stages")
     for i, stage in enumerate(stages, 1):
         if i == st.session_state.current_stage:
             st.sidebar.markdown(f"🔄 **{i}. {stage['name']}**")
         elif i < st.session_state.current_stage:
             st.sidebar.markdown(f"✅ {i}. {stage['name']}")
         else:
     col1, col2 = st.sidebar.columns(2)
     with col1:
+        if st.button("⬅️ Previous", disabled=st.session_state.current_stage <= 1):
             st.session_state.current_stage -= 1
             st.rerun()
     with col2:
+        if st.button("➡️ Next", disabled=st.session_state.current_stage >= 5):
             st.session_state.current_stage += 1
             st.rerun()
     # Error log
     if st.session_state.error_log:
         with st.sidebar.expander("⚠️ Error Log", expanded=False):
+            for error in st.session_state.error_log[-5:]:
                 st.error(error)
 def display_ai_assistant():
+    """AI assistant panel"""
     st.subheader("🤖 AI Assistant")
     if st.session_state.ai_assistant is None:
         st.session_state.ai_assistant = AIAssistant()
+    try:
+        available_models = st.session_state.ai_assistant.get_available_models()
+    except:
+        available_models = []
     if available_models:
+        selected_model = st.selectbox("AI Model:", available_models)
         if st.button("🧠 Get AI Insights", type="primary"):
             if st.session_state.workflow and st.session_state.workflow.insights:
                 with st.spinner("🔮 AI is analyzing your data..."):
                         if ai_analysis and "Error" not in ai_analysis:
                             st.markdown("### 🎯 AI Analysis Results")
                             st.markdown(ai_analysis)
                             st.session_state.workflow.add_insight("AI analysis completed",
                                                                 st.session_state.current_stage, "success")
                         else:
+                            st.error("Failed to get AI analysis")
                     except Exception as e:
                         error_msg = f"AI analysis failed: {str(e)}"
                         st.error(error_msg)
                         st.session_state.error_log.append(error_msg)
             else:
+                st.warning("⚠️ Complete some analysis stages first")
     else:
         st.warning("⚠️ No AI models available")
 def handle_file_upload():
+    """File upload with validation and preview"""
     st.markdown("### 📁 Upload Your Dataset")
     uploaded_file = st.file_uploader(
         "Choose your data file",
         type=['csv', 'xlsx', 'xls'],
+        help="Supported formats: CSV, Excel (.xlsx, .xls)"
     )
     if uploaded_file is not None:
         file_size = len(uploaded_file.getvalue()) / 1024**2
         col1, col2, col3 = st.columns(3)
             file_type = uploaded_file.name.split('.')[-1].upper()
             st.metric("📋 Format", file_type)
+        with st.spinner("🔄 Loading data..."):
             try:
                 df = load_data(uploaded_file)
                 if df is not None:
                     is_valid, validation_issues = validate_dataframe(df)
                     if is_valid:
+                        st.success(f"✅ **Dataset loaded!** Shape: {df.shape[0]:,} rows × {df.shape[1]:,} columns")
+                        with st.expander("👀 Data Preview", expanded=False):
                             st.dataframe(df.head(), use_container_width=True)
                         st.session_state.workflow = DataAnalysisWorkflow(df)
                         st.session_state.current_stage = 1
                         st.session_state.analysis_complete = False
                         st.error("❌ **Data validation failed:**")
                         for issue in validation_issues:
                             st.write(f"• {issue}")
                         return False
                 else:
+                    st.error("❌ Failed to load data")
                     return False
             except Exception as e:
+                error_msg = f"Error loading file: {str(e)}"
                 st.error(f"❌ {error_msg}")
                 st.session_state.error_log.append(error_msg)
                 return False
     return False
+def safe_execute_stage():
+    """Execute current stage with comprehensive error handling"""
     try:
         workflow = st.session_state.workflow
         stage = st.session_state.current_stage
         if stage == 1:
+            # Stage 1: Data Overview with safe execution
+            st.subheader("📊 Data Overview")
+            # Basic info
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.metric("Rows", f"{len(workflow.df):,}")
+            with col2:
+                st.metric("Columns", f"{len(workflow.df.columns):,}")
+            with col3:
+                memory_mb = workflow.df.memory_usage(deep=True).sum() / 1024**2
+                st.metric("Memory", f"{memory_mb:.1f} MB")
+            # Data types
+            st.markdown("### Column Information")
+            dtype_counts = workflow.df.dtypes.value_counts()
+            for dtype, count in dtype_counts.items():
+                st.write(f"• **{dtype}**: {count} columns")
+            # Missing values
+            missing_total = workflow.df.isnull().sum().sum()
+            duplicates = workflow.df.duplicated().sum()
+            col1, col2 = st.columns(2)
+            with col1:
+                st.metric("Missing Values", f"{missing_total:,}")
+            with col2:
+                st.metric("Duplicates", f"{duplicates:,}")
+            # Quality score calculation (fixed)
+            total_cells = len(workflow.df) * len(workflow.df.columns)
+            missing_pct = (missing_total / total_cells) * 100 if total_cells > 0 else 0
+            duplicate_pct = (duplicates / len(workflow.df)) * 100 if len(workflow.df) > 0 else 0
+            quality_score = max(0, 100 - (missing_pct * 2) - (duplicate_pct * 3))
+            if quality_score >= 90:
+                st.success(f"🌟 Excellent Data Quality: {quality_score:.0f}%")
+            elif quality_score >= 70:
+                st.info(f"👍 Good Data Quality: {quality_score:.0f}%")
+            else:
+                st.warning(f"⚠️ Data Quality Needs Improvement: {quality_score:.0f}%")
+            # Add insight to workflow
+            workflow.add_insight(f"Data overview completed. Quality score: {quality_score:.0f}%",
+                               stage, "success")
         elif stage == 2:
+            # Stage 2: Data Exploration
+            st.subheader("🔍 Data Exploration")
+            numeric_cols = workflow.df.select_dtypes(include=['number']).columns
+            categorical_cols = workflow.df.select_dtypes(include=['object', 'category']).columns
+            if len(numeric_cols) > 0:
+                st.markdown("### Numeric Columns")
+                st.dataframe(workflow.df[numeric_cols].describe(), use_container_width=True)
+            if len(categorical_cols) > 0:
+                st.markdown("### Categorical Columns")
+                for col in categorical_cols[:5]:  # Show first 5 categorical columns
+                    unique_count = workflow.df[col].nunique()
+                    st.write(f"**{col}**: {unique_count} unique values")
+            workflow.add_insight("Data exploration completed", stage, "success")
         elif stage == 3:
+            # Stage 3: Data Cleaning
+            st.subheader("🧹 Data Quality Check")
+            # Missing values by column
+            missing_by_col = workflow.df.isnull().sum()
+            missing_cols = missing_by_col[missing_by_col > 0]
+            if len(missing_cols) > 0:
+                st.markdown("### Missing Values by Column")
+                for col, count in missing_cols.items():
+                    pct = (count / len(workflow.df)) * 100
+                    st.write(f"• **{col}**: {count} missing ({pct:.1f}%)")
+            else:
+                st.success("✅ No missing values found")
+            # Duplicates
+            duplicates = workflow.df.duplicated().sum()
+            if duplicates > 0:
+                st.warning(f"⚠️ Found {duplicates} duplicate rows")
+            else:
+                st.success("✅ No duplicate rows found")
+            workflow.add_insight("Quality check completed", stage, "success")
         elif stage == 4:
+            # Stage 4: Advanced Analysis
+            st.subheader("🔬 Advanced Analysis")
+            numeric_cols = workflow.df.select_dtypes(include=['number']).columns
+            if len(numeric_cols) >= 2:
+                st.markdown("### Correlation Matrix")
+                corr_matrix = workflow.df[numeric_cols].corr()
+                st.dataframe(corr_matrix, use_container_width=True)
+            workflow.add_insight("Advanced analysis completed", stage, "success")
         elif stage == 5:
+            # Stage 5: Summary
+            st.subheader("📈 Analysis Summary")
+            st.markdown("### Analysis Complete!")
+            st.write(f"• Dataset: {len(workflow.df):,} rows × {len(workflow.df.columns):,} columns")
+            st.write(f"• Insights generated: {len(workflow.insights)}")
+            st.write(f"• Analysis stages completed: {st.session_state.current_stage}")
+            # Export options
+            st.markdown("### Export Options")
+            if st.button("📊 Download CSV"):
+                csv = workflow.df.to_csv(index=False)
+                st.download_button(
+                    label="Download CSV",
+                    data=csv,
+                    file_name="analyzed_data.csv",
+                    mime="text/csv"
+                )
             if not st.session_state.analysis_complete:
                 st.session_state.analysis_complete = True
+                st.balloons()
+            workflow.add_insight("Analysis summary completed", stage, "success")
+        # Initialize stats if not exists
+        if not hasattr(workflow, 'stats'):
+            workflow.stats = {
+                'missing_values': workflow.df.isnull().sum().sum(),
+                'duplicates': workflow.df.duplicated().sum()
+            }
     except Exception as e:
         error_msg = f"Error in stage {st.session_state.current_stage}: {str(e)}"
         st.session_state.error_log.append(error_msg)
         logger.error(error_msg)
+        # Show fallback content
+        st.warning("⚠️ Analysis stage encountered an issue. Please try refreshing.")
 def main():
+    """Main application"""
     try:
         initialize_session_state()
         display_header()
         data_loaded = handle_file_upload()
         if data_loaded and st.session_state.workflow is not None:
             main_col, ai_col = st.columns([3, 1])
             with main_col:
+                safe_execute_stage()
             with ai_col:
                 display_ai_assistant()
             display_sidebar()
             if st.session_state.analysis_complete:
+                st.success("🎉 **Analysis Complete!**")
         elif not data_loaded:
+            st.markdown("### 🚀 Welcome to Data Analysis Platform")
+            st.markdown("""
+            **Features:**
+            - 5-stage analysis workflow
+            - AI-powered insights
+            - Data quality assessment
+            - Interactive visualizations
+            - Export capabilities
+            **Supported Formats:** CSV, Excel (.xlsx, .xls)
+            """)
     except Exception as e:
+        error_msg = f"Application error: {str(e)}"
         st.error(f"❌ {error_msg}")
         st.session_state.error_log.append(error_msg)
+        if st.button("🔄 Restart"):
+            for key in list(st.session_state.keys()):
+                del st.session_state[key]
+            st.rerun()
 if __name__ == "__main__":
     main()