import streamlit as st import pandas as pd from data_handler import load_data from analyzer import DataAnalysisWorkflow, AIAssistant def main(): st.set_page_config( page_title="Data Analysis Platform", page_icon="📊", layout="wide" ) st.title("📊 Data Analysis Platform") st.markdown("**Optimized workflow with caching and pagination**") # Initialize session state if 'current_stage' not in st.session_state: st.session_state.current_stage = 1 if 'workflow' not in st.session_state: st.session_state.workflow = None if 'ai_assistant' not in st.session_state: st.session_state.ai_assistant = AIAssistant() # File upload uploaded_file = st.file_uploader("Upload Dataset", type=['csv', 'xlsx']) if uploaded_file is not None: try: # Load data df = load_data(uploaded_file) st.success(f"✅ Dataset loaded! Shape: {df.shape}") # Initialize workflow if st.session_state.workflow is None: st.session_state.workflow = DataAnalysisWorkflow(df) # Progress sidebar st.sidebar.header("Progress") progress = st.sidebar.progress(st.session_state.current_stage / 5) stages = ["Data Overview", "Exploration", "Quality Check", "Analysis", "Summary"] for i, stage in enumerate(stages, 1): if i == st.session_state.current_stage: st.sidebar.write(f"🔄 **{i}. {stage}**") elif i < st.session_state.current_stage: st.sidebar.write(f"✅ {i}. {stage}") else: st.sidebar.write(f"⏳ {i}. {stage}") # Navigation col1, col2 = st.sidebar.columns(2) with col1: if st.button("← Previous") and st.session_state.current_stage > 1: st.session_state.current_stage -= 1 st.rerun() with col2: if st.button("Next →") and st.session_state.current_stage < 5: st.session_state.current_stage += 1 st.rerun() # Recent insights st.sidebar.header("💡 Recent Insights") recent_insights = st.session_state.workflow.insights[-3:] for insight in recent_insights: st.sidebar.info(f"**Stage {insight['stage']}:** {insight['insight']}") # Main content with AI assistant main_col, ai_col = st.columns([3, 1]) with main_col: # Execute current stage if st.session_state.current_stage == 1: st.session_state.workflow.stage_1_overview() elif st.session_state.current_stage == 2: st.session_state.workflow.stage_2_exploration() elif st.session_state.current_stage == 3: st.session_state.workflow.stage_3_cleaning() elif st.session_state.current_stage == 4: st.session_state.workflow.stage_4_analysis() elif st.session_state.current_stage == 5: st.session_state.workflow.stage_5_summary() with ai_col: st.subheader("🤖 AI Assistant") # AI model selection available_models = st.session_state.ai_assistant.get_available_models() if available_models: selected_model = st.selectbox("AI Model:", available_models) if st.button("Get AI Insights"): if st.session_state.workflow.insights: with st.spinner("Analyzing with AI..."): ai_analysis = st.session_state.ai_assistant.analyze_insights( df, st.session_state.workflow.insights, selected_model ) st.write("**AI Analysis:**") st.write(ai_analysis) else: st.warning("Complete some analysis stages first.") else: st.warning("No AI models available.") st.info("Set GOOGLE_API_KEY or OPENAI_API_KEY environment variables.") # Quick insights st.subheader("📊 Quick Stats") if st.session_state.workflow.insights: st.metric("Total Insights", len(st.session_state.workflow.insights)) st.metric("Current Stage", f"{st.session_state.current_stage}/5") # Latest insight if st.session_state.workflow.insights: latest = st.session_state.workflow.insights[-1] st.info(f"**Latest:** {latest['insight']}") # Data quality indicator quality_score = 100 if st.session_state.workflow.stats['missing_values'] > 0: quality_score -= 30 if st.session_state.workflow.stats['duplicates'] > 0: quality_score -= 20 st.metric("Data Quality", f"{quality_score}%") except Exception as e: st.error(f"Error: {str(e)}") st.info("Please check your file format and try again.") if __name__ == "__main__": main()