entropy25's picture
Update app.py
2fad68d verified
import streamlit as st
import pandas as pd
from data_handler import load_data
from analyzer import DataAnalysisWorkflow, AIAssistant
def main():
st.set_page_config(
page_title="Data Analysis Platform",
page_icon="πŸ“Š",
layout="wide"
)
st.title("πŸ“Š Data Analysis Platform")
st.markdown("**Optimized workflow with caching and pagination**")
# Initialize session state
if 'current_stage' not in st.session_state:
st.session_state.current_stage = 1
if 'workflow' not in st.session_state:
st.session_state.workflow = None
if 'ai_assistant' not in st.session_state:
st.session_state.ai_assistant = AIAssistant()
# File upload
uploaded_file = st.file_uploader("Upload Dataset", type=['csv', 'xlsx'])
if uploaded_file is not None:
try:
# Load data
df = load_data(uploaded_file)
st.success(f"βœ… Dataset loaded! Shape: {df.shape}")
# Initialize workflow
if st.session_state.workflow is None:
st.session_state.workflow = DataAnalysisWorkflow(df)
# Progress sidebar
st.sidebar.header("Progress")
progress = st.sidebar.progress(st.session_state.current_stage / 5)
stages = ["Data Overview", "Exploration", "Quality Check", "Analysis", "Summary"]
for i, stage in enumerate(stages, 1):
if i == st.session_state.current_stage:
st.sidebar.write(f"πŸ”„ **{i}. {stage}**")
elif i < st.session_state.current_stage:
st.sidebar.write(f"βœ… {i}. {stage}")
else:
st.sidebar.write(f"⏳ {i}. {stage}")
# Navigation
col1, col2 = st.sidebar.columns(2)
with col1:
if st.button("← Previous") and st.session_state.current_stage > 1:
st.session_state.current_stage -= 1
st.rerun()
with col2:
if st.button("Next β†’") and st.session_state.current_stage < 5:
st.session_state.current_stage += 1
st.rerun()
# Recent insights
st.sidebar.header("πŸ’‘ Recent Insights")
recent_insights = st.session_state.workflow.insights[-3:]
for insight in recent_insights:
st.sidebar.info(f"**Stage {insight['stage']}:** {insight['insight']}")
# Main content with AI assistant
main_col, ai_col = st.columns([3, 1])
with main_col:
# Execute current stage
if st.session_state.current_stage == 1:
st.session_state.workflow.stage_1_overview()
elif st.session_state.current_stage == 2:
st.session_state.workflow.stage_2_exploration()
elif st.session_state.current_stage == 3:
st.session_state.workflow.stage_3_cleaning()
elif st.session_state.current_stage == 4:
st.session_state.workflow.stage_4_analysis()
elif st.session_state.current_stage == 5:
st.session_state.workflow.stage_5_summary()
with ai_col:
st.subheader("πŸ€– AI Assistant")
# AI model selection
available_models = st.session_state.ai_assistant.get_available_models()
if available_models:
selected_model = st.selectbox("AI Model:", available_models)
if st.button("Get AI Insights"):
if st.session_state.workflow.insights:
with st.spinner("Analyzing with AI..."):
ai_analysis = st.session_state.ai_assistant.analyze_insights(
df, st.session_state.workflow.insights, selected_model
)
st.write("**AI Analysis:**")
st.write(ai_analysis)
else:
st.warning("Complete some analysis stages first.")
else:
st.warning("No AI models available.")
st.info("Set GOOGLE_API_KEY or OPENAI_API_KEY environment variables.")
# Quick insights
st.subheader("πŸ“Š Quick Stats")
if st.session_state.workflow.insights:
st.metric("Total Insights", len(st.session_state.workflow.insights))
st.metric("Current Stage", f"{st.session_state.current_stage}/5")
# Latest insight
if st.session_state.workflow.insights:
latest = st.session_state.workflow.insights[-1]
st.info(f"**Latest:** {latest['insight']}")
# Data quality indicator
quality_score = 100
if st.session_state.workflow.stats['missing_values'] > 0:
quality_score -= 30
if st.session_state.workflow.stats['duplicates'] > 0:
quality_score -= 20
st.metric("Data Quality", f"{quality_score}%")
except Exception as e:
st.error(f"Error: {str(e)}")
st.info("Please check your file format and try again.")
if __name__ == "__main__":
main()