Spaces:
Sleeping
Sleeping
File size: 5,725 Bytes
6337cd5 2fad68d f71de9c 6337cd5 2fad68d f71de9c 78b8458 2fad68d f71de9c 6337cd5 78b8458 2fad68d f71de9c 2fad68d 1348d26 2fad68d 9dd7d77 2fad68d aa64ef2 2fad68d aa64ef2 2fad68d aa64ef2 2fad68d aa64ef2 2fad68d aa64ef2 2fad68d 78b8458 2fad68d 78b8458 2fad68d 1348d26 2fad68d 6337cd5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | import streamlit as st
import pandas as pd
from data_handler import load_data
from analyzer import DataAnalysisWorkflow, AIAssistant
def main():
st.set_page_config(
page_title="Data Analysis Platform",
page_icon="π",
layout="wide"
)
st.title("π Data Analysis Platform")
st.markdown("**Optimized workflow with caching and pagination**")
# Initialize session state
if 'current_stage' not in st.session_state:
st.session_state.current_stage = 1
if 'workflow' not in st.session_state:
st.session_state.workflow = None
if 'ai_assistant' not in st.session_state:
st.session_state.ai_assistant = AIAssistant()
# File upload
uploaded_file = st.file_uploader("Upload Dataset", type=['csv', 'xlsx'])
if uploaded_file is not None:
try:
# Load data
df = load_data(uploaded_file)
st.success(f"β
Dataset loaded! Shape: {df.shape}")
# Initialize workflow
if st.session_state.workflow is None:
st.session_state.workflow = DataAnalysisWorkflow(df)
# Progress sidebar
st.sidebar.header("Progress")
progress = st.sidebar.progress(st.session_state.current_stage / 5)
stages = ["Data Overview", "Exploration", "Quality Check", "Analysis", "Summary"]
for i, stage in enumerate(stages, 1):
if i == st.session_state.current_stage:
st.sidebar.write(f"π **{i}. {stage}**")
elif i < st.session_state.current_stage:
st.sidebar.write(f"β
{i}. {stage}")
else:
st.sidebar.write(f"β³ {i}. {stage}")
# Navigation
col1, col2 = st.sidebar.columns(2)
with col1:
if st.button("β Previous") and st.session_state.current_stage > 1:
st.session_state.current_stage -= 1
st.rerun()
with col2:
if st.button("Next β") and st.session_state.current_stage < 5:
st.session_state.current_stage += 1
st.rerun()
# Recent insights
st.sidebar.header("π‘ Recent Insights")
recent_insights = st.session_state.workflow.insights[-3:]
for insight in recent_insights:
st.sidebar.info(f"**Stage {insight['stage']}:** {insight['insight']}")
# Main content with AI assistant
main_col, ai_col = st.columns([3, 1])
with main_col:
# Execute current stage
if st.session_state.current_stage == 1:
st.session_state.workflow.stage_1_overview()
elif st.session_state.current_stage == 2:
st.session_state.workflow.stage_2_exploration()
elif st.session_state.current_stage == 3:
st.session_state.workflow.stage_3_cleaning()
elif st.session_state.current_stage == 4:
st.session_state.workflow.stage_4_analysis()
elif st.session_state.current_stage == 5:
st.session_state.workflow.stage_5_summary()
with ai_col:
st.subheader("π€ AI Assistant")
# AI model selection
available_models = st.session_state.ai_assistant.get_available_models()
if available_models:
selected_model = st.selectbox("AI Model:", available_models)
if st.button("Get AI Insights"):
if st.session_state.workflow.insights:
with st.spinner("Analyzing with AI..."):
ai_analysis = st.session_state.ai_assistant.analyze_insights(
df, st.session_state.workflow.insights, selected_model
)
st.write("**AI Analysis:**")
st.write(ai_analysis)
else:
st.warning("Complete some analysis stages first.")
else:
st.warning("No AI models available.")
st.info("Set GOOGLE_API_KEY or OPENAI_API_KEY environment variables.")
# Quick insights
st.subheader("π Quick Stats")
if st.session_state.workflow.insights:
st.metric("Total Insights", len(st.session_state.workflow.insights))
st.metric("Current Stage", f"{st.session_state.current_stage}/5")
# Latest insight
if st.session_state.workflow.insights:
latest = st.session_state.workflow.insights[-1]
st.info(f"**Latest:** {latest['insight']}")
# Data quality indicator
quality_score = 100
if st.session_state.workflow.stats['missing_values'] > 0:
quality_score -= 30
if st.session_state.workflow.stats['duplicates'] > 0:
quality_score -= 20
st.metric("Data Quality", f"{quality_score}%")
except Exception as e:
st.error(f"Error: {str(e)}")
st.info("Please check your file format and try again.")
if __name__ == "__main__":
main() |