File size: 5,725 Bytes
6337cd5
 
2fad68d
f71de9c
6337cd5
2fad68d
f71de9c
78b8458
 
2fad68d
f71de9c
6337cd5
78b8458
2fad68d
 
 
 
 
 
 
 
f71de9c
 
2fad68d
 
1348d26
 
2fad68d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9dd7d77
2fad68d
aa64ef2
2fad68d
 
aa64ef2
2fad68d
 
 
aa64ef2
2fad68d
 
 
aa64ef2
2fad68d
 
 
 
 
aa64ef2
2fad68d
78b8458
 
 
2fad68d
 
 
 
 
 
 
 
 
 
 
78b8458
 
2fad68d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1348d26
2fad68d
 
 
6337cd5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import streamlit as st
import pandas as pd
from data_handler import load_data
from analyzer import DataAnalysisWorkflow, AIAssistant

def main():
    st.set_page_config(
        page_title="Data Analysis Platform",
        page_icon="πŸ“Š",
        layout="wide"
    )
    
    st.title("πŸ“Š Data Analysis Platform")
    st.markdown("**Optimized workflow with caching and pagination**")
    
    # Initialize session state
    if 'current_stage' not in st.session_state:
        st.session_state.current_stage = 1
    if 'workflow' not in st.session_state:
        st.session_state.workflow = None
    if 'ai_assistant' not in st.session_state:
        st.session_state.ai_assistant = AIAssistant()
    
    # File upload
    uploaded_file = st.file_uploader("Upload Dataset", type=['csv', 'xlsx'])
    
    if uploaded_file is not None:
        try:
            # Load data
            df = load_data(uploaded_file)
            st.success(f"βœ… Dataset loaded! Shape: {df.shape}")
            
            # Initialize workflow
            if st.session_state.workflow is None:
                st.session_state.workflow = DataAnalysisWorkflow(df)
            
            # Progress sidebar
            st.sidebar.header("Progress")
            progress = st.sidebar.progress(st.session_state.current_stage / 5)
            
            stages = ["Data Overview", "Exploration", "Quality Check", "Analysis", "Summary"]
            for i, stage in enumerate(stages, 1):
                if i == st.session_state.current_stage:
                    st.sidebar.write(f"πŸ”„ **{i}. {stage}**")
                elif i < st.session_state.current_stage:
                    st.sidebar.write(f"βœ… {i}. {stage}")
                else:
                    st.sidebar.write(f"⏳ {i}. {stage}")
            
            # Navigation
            col1, col2 = st.sidebar.columns(2)
            with col1:
                if st.button("← Previous") and st.session_state.current_stage > 1:
                    st.session_state.current_stage -= 1
                    st.rerun()
            with col2:
                if st.button("Next β†’") and st.session_state.current_stage < 5:
                    st.session_state.current_stage += 1
                    st.rerun()
            
            # Recent insights
            st.sidebar.header("πŸ’‘ Recent Insights")
            recent_insights = st.session_state.workflow.insights[-3:]
            for insight in recent_insights:
                st.sidebar.info(f"**Stage {insight['stage']}:** {insight['insight']}")
            
            # Main content with AI assistant
            main_col, ai_col = st.columns([3, 1])
            
            with main_col:
                # Execute current stage
                if st.session_state.current_stage == 1:
                    st.session_state.workflow.stage_1_overview()
                elif st.session_state.current_stage == 2:
                    st.session_state.workflow.stage_2_exploration()
                elif st.session_state.current_stage == 3:
                    st.session_state.workflow.stage_3_cleaning()
                elif st.session_state.current_stage == 4:
                    st.session_state.workflow.stage_4_analysis()
                elif st.session_state.current_stage == 5:
                    st.session_state.workflow.stage_5_summary()
            
            with ai_col:
                st.subheader("πŸ€– AI Assistant")
                
                # AI model selection
                available_models = st.session_state.ai_assistant.get_available_models()
                
                if available_models:
                    selected_model = st.selectbox("AI Model:", available_models)
                    
                    if st.button("Get AI Insights"):
                        if st.session_state.workflow.insights:
                            with st.spinner("Analyzing with AI..."):
                                ai_analysis = st.session_state.ai_assistant.analyze_insights(
                                    df, st.session_state.workflow.insights, selected_model
                                )
                                st.write("**AI Analysis:**")
                                st.write(ai_analysis)
                        else:
                            st.warning("Complete some analysis stages first.")
                else:
                    st.warning("No AI models available.")
                    st.info("Set GOOGLE_API_KEY or OPENAI_API_KEY environment variables.")
                    
                # Quick insights
                st.subheader("πŸ“Š Quick Stats")
                if st.session_state.workflow.insights:
                    st.metric("Total Insights", len(st.session_state.workflow.insights))
                    st.metric("Current Stage", f"{st.session_state.current_stage}/5")
                    
                    # Latest insight
                    if st.session_state.workflow.insights:
                        latest = st.session_state.workflow.insights[-1]
                        st.info(f"**Latest:** {latest['insight']}")
                    
                # Data quality indicator
                quality_score = 100
                if st.session_state.workflow.stats['missing_values'] > 0:
                    quality_score -= 30
                if st.session_state.workflow.stats['duplicates'] > 0:
                    quality_score -= 20
                    
                st.metric("Data Quality", f"{quality_score}%")
        
        except Exception as e:
            st.error(f"Error: {str(e)}")
            st.info("Please check your file format and try again.")

if __name__ == "__main__":
    main()