Jayandhan Soruban
New APIs added
8437d61
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import streamlit as st
import pandas as pd
import os
import time
import shutil
import tempfile
import base64
import traceback
from langgraph.graph import START, StateGraph, END
# --- Import Agent Logic ---
# Assumes these are synchronous functions returning a dictionary with 'success' and structured data
from Cleaner_Agent import DataAnalystAgent, AgentStateModel
from Report_agent import Report_agent
from Visualizer_agent import Visualizer_agent
# --- Matplotlib Backend Fix ---
import matplotlib
matplotlib.use('Agg')
# --- Streamlit Page Configuration ---
st.set_page_config(
page_title="AI Data Analyst",
page_icon="πŸ€–",
layout="wide",
initial_sidebar_state="expanded"
)
# --- Custom CSS for an Extremely Impressive and Cool UI ---
st.markdown("""
<style>
/* Main App Background */
body {
color: #E0E0E0; /* Light grey text */
background-color: #0F172A; /* Deep navy blue */
}
.main {
background-color: #0F172A;
}
/* Page Title & Headers */
h1, h2, h3 {
font-family: 'Roboto', sans-serif;
font-weight: bold;
text-align: center;
}
h1 {
color: #FFFFFF;
text-shadow: 2px 2px 8px rgba(0, 255, 255, 0.5);
}
h3 {
color: #A0AEC0; /* Lighter grey for subtitle */
}
/* Sidebar Styling */
.st-sidebar {
background-color: #1E293B; /* Slightly lighter navy */
border-right: 2px solid #334155;
}
.st-sidebar h2 {
color: #FFFFFF;
text-align: left;
}
/* Start Button & Interactive Elements */
.stButton>button {
color: #FFFFFF;
background-image: linear-gradient(45deg, #3B82F6 0%, #8B5CF6 100%);
border: none;
border-radius: 12px;
padding: 15px 30px;
font-size: 18px;
font-weight: bold;
transition: all 0.3s ease;
box-shadow: 0 4px 15px 0 rgba(59, 130, 246, 0.4);
}
.stButton>button:hover {
transform: translateY(-3px);
box-shadow: 0 8px 25px 0 rgba(139, 92, 246, 0.5);
}
/* Card Layout for Content */
.st-emotion-cache-r421ms { /* Streamlit's default container class */
background-color: #1E293B;
border: 2px solid transparent;
border-image: linear-gradient(45deg, #3B82F6, #8B5CF6) 1;
border-radius: 12px;
box-shadow: 0 4px 20px 0 rgba(0, 0, 0, 0.3);
padding: 25px;
transition: all 0.3s ease;
}
.st-emotion-cache-r421ms:hover {
transform: translateY(-5px);
box-shadow: 0 8px 30px 0 rgba(139, 92, 246, 0.4);
}
/* Custom Class for Empty State */
.empty-state {
text-align: center;
padding: 40px;
border: 2px dashed #334155;
border-radius: 12px;
}
.empty-state h2 {
color: #FFFFFF;
}
.empty-state p {
color: #A0AEC0;
font-size: 1.1rem;
}
/* Custom Class for Live Status Log */
.status-log {
background-color: #1E293B;
border-radius: 12px;
padding: 20px;
font-family: 'Courier New', Courier, monospace;
color: #E0E0E0;
}
</style>
""", unsafe_allow_html=True)
# --- SYNC HELPER FUNCTION ---
def run_report_and_viz_agents(df_path: str, output_dir: str):
"""
Runs the Report and Visualizer agents sequentially.
"""
report_result = Report_agent(df_path=df_path)
viz_result = Visualizer_agent(df_path=df_path, output_dir=output_dir)
return report_result, viz_result
# --- HELPER FUNCTIONS ---
def cleanup_session_files():
"""Deletes the temporary directory and clears associated session state keys."""
if 'temp_dir_path' in st.session_state and st.session_state.temp_dir_path:
temp_dir = st.session_state.temp_dir_path
if os.path.exists(temp_dir):
try:
shutil.rmtree(temp_dir)
except Exception as e:
print(f"Error removing temp directory {temp_dir}: {e}")
# Extended list of keys to clear for a full reset
keys_to_clear = [
'temp_dir_path', 'pipeline_run_complete',
'final_report_structured', 'final_visuals_structured'
]
for key in keys_to_clear:
st.session_state.pop(key, None)
@st.cache_data
def get_image_as_base64(path):
"""Reads an image file and returns its Base64 encoded string."""
with open(path, "rb") as f:
data = f.read()
return base64.b64encode(data).decode()
def display_empty_state():
"""Shows a visually appealing message when no file is uploaded."""
st.markdown(
"""
<div class="empty-state">
<h2>Welcome to the AI Data Analyst</h2>
<p>Upload your data and provide instructions in the sidebar to begin.</p>
<p>Let's turn your raw data into stunning insights! ✨</p>
</div>
""",
unsafe_allow_html=True
)
# --- MAIN APP ---
def main():
# --- HEADER ---
st.title("πŸ€– AI Data Analyst")
st.markdown("<h3>Derive actionable insights from raw data in minutes from a specialized team of AI agents</h3>", unsafe_allow_html=True)
st.write("")
# --- SIDEBAR ---
with st.sidebar:
st.header("βš™οΈ Pipeline Configuration")
uploaded_file = st.file_uploader("1. Upload Your Data File", type=["csv", "xlsx"])
instructions = st.text_area("2. Describe Your Analysis Goal", height=150, placeholder="e.g., 'Analyze monthly sales trends and identify top-performing products.'")
col1, col2 = st.columns(2)
start_button = col1.button("✨ Run Analysis", type="primary")
if col2.button("🧹 New Analysis"):
cleanup_session_files()
st.success("Session cleared.")
time.sleep(1)
st.rerun()
# --- MAIN CONTENT AREA ---
# Display empty state if no file is uploaded.
if not uploaded_file:
display_empty_state()
return
# Show data preview if a file is uploaded.
with st.expander("πŸ“Š **View Data Preview**", expanded=False):
try:
uploaded_file.seek(0)
df_preview = pd.read_csv(uploaded_file, nrows=100) if uploaded_file.name.endswith('.csv') else pd.read_excel(uploaded_file, nrows=100)
st.dataframe(df_preview, use_container_width=True)
except Exception as e:
st.error(f"Could not read the file preview. Error: {e}")
# --- PIPELINE EXECUTION ---
if start_button:
if not instructions:
st.warning("Please describe your analysis goal before starting.")
return
# Clean up previous session and set up a new one
cleanup_session_files()
st.session_state.temp_dir_path = tempfile.mkdtemp().replace('\\', '/')
temp_file_path = os.path.join(st.session_state.temp_dir_path, uploaded_file.name).replace('\\', '/')
try:
with open(temp_file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# UI container for live logs
log_container = st.container()
with log_container:
st.subheader("πŸ€– Agent Status Log")
status_log = st.empty()
log_messages = ["[INITIALIZING] Pipeline started..."]
status_log.markdown(f"<div class='status-log'>{'<br>'.join(log_messages)}</div>", unsafe_allow_html=True)
# --- STAGE 1: DATA CLEANING ---
log_messages.append("πŸš€ **Stage 1/3:** Data Cleaning Agent activated...")
status_log.markdown(f"<div class='status-log'>{'<br>'.join(log_messages)}</div>", unsafe_allow_html=True)
with st.spinner("Agent is analyzing and cleaning the data..."):
cleaner_agent = DataAnalystAgent()
graph = StateGraph(AgentStateModel)
graph.add_node("supervisor", cleaner_agent.supervisor_node)
graph.add_node("PreprocessingPlanner_node", cleaner_agent.PreprocessingPlanner_node)
graph.add_node("Cleaner_node", cleaner_agent.Cleaner_node)
graph.add_edge(START, "supervisor")
cleaning_app = graph.compile()
initial_state = AgentStateModel(Instructions=instructions, Path=temp_file_path, messages=[], Analysis=[])
final_cleaning_state = cleaning_app.invoke(initial_state)
if final_cleaning_state.get('next') != END:
st.error("❗️ **Data Cleaning Failed.** Please check instructions or data.")
cleanup_session_files()
return
log_messages.append("βœ… **Stage 1/3:** Data Cleaning Complete!")
status_log.markdown(f"<div class='status-log'>{'<br>'.join(log_messages)}</div>", unsafe_allow_html=True)
st.balloons()
# --- STAGES 2 & 3: REPORTING & VISUALIZATION ---
log_messages.append("πŸš€ **Stages 2 & 3:** Reporting and Visualization agents activated...")
status_log.markdown(f"<div class='status-log'>{'<br>'.join(log_messages)}</div>", unsafe_allow_html=True)
with st.spinner("AI agents are generating the report and plots..."):
report_result, viz_result = run_report_and_viz_agents(
df_path=temp_file_path,
output_dir=st.session_state.temp_dir_path
)
# Process and store results in session state
if report_result and report_result.get("success"):
st.session_state.final_report_structured = report_result.get("parsed_report")
else:
st.error(f"Report generation failed: {report_result.get('error', 'Unknown error')}")
if viz_result and viz_result.get("success"):
st.session_state.final_visuals_structured = viz_result.get("parsed_visuals")
else:
st.error(f"Visualization generation failed: {viz_result.get('error', 'Unknown error')}")
# Final log update
if st.session_state.final_report_structured and st.session_state.final_visuals_structured:
log_messages.append("βœ… **Stages 2 & 3:** Report and Visualizations Complete!")
log_messages.append("πŸŽ‰ **Pipeline Complete!** Displaying results below.")
st.session_state.pipeline_run_complete = True
else:
log_messages.append("❗️ **PIPELINE FAILED:** One or more agents failed. Check error messages above.")
status_log.markdown(f"<div class='status-log'>{'<br>'.join(log_messages)}</div>", unsafe_allow_html=True)
except Exception as e:
st.error("An unexpected pipeline error occurred.")
st.code(traceback.format_exc())
cleanup_session_files()
return
# Rerun to display results from session state
st.rerun()
# --- DISPLAY RESULTS (persisted in session state) ---
if st.session_state.get("pipeline_run_complete"):
st.write("---")
st.header("✨ Analysis Results")
# Display the structured report
if st.session_state.get("final_report_structured"):
report_data = st.session_state.final_report_structured
with st.container(border=True):
st.subheader(report_data.get("subject", "Business Report"))
# Use columns for a better summary layout
col1, col2 = st.columns(2)
with col1:
st.info("Executive Summary")
st.markdown(report_data.get("executive_summary", "Not available."))
with col2:
st.info("πŸ’‘ Biggest Strategic Opportunity")
st.markdown(report_data.get("strategic_opportunity", "Not available."))
st.info("πŸ”‘ Key Insights & Patterns")
st.markdown(report_data.get("key_insights_and_patterns", "Not available."))
with st.expander("View Full Detailed Report"):
st.markdown("---")
st.subheader("Data Overview and Quality Review")
st.markdown(report_data.get("data_overview_and_quality_review", "Not available."))
st.markdown("---")
st.subheader("Descriptive and Diagnostic Analysis")
st.markdown(report_data.get("descriptive_and_diagnostic_analysis", "Not available."))
st.markdown("---")
st.subheader("Recommendations and Forecast")
st.markdown(report_data.get("recommendations_and_forecast", "Not available."))
# Display the visualizations
if st.session_state.get("final_visuals_structured"):
visuals_data = st.session_state.final_visuals_structured
st.write("")
with st.container(border=True):
st.subheader(visuals_data.get("report_title", "Generated Visualizations"))
visualizations = visuals_data.get("visualizations", [])
if not visualizations:
st.warning("The visualization agent did not return any visuals.")
else:
# Create a grid layout for visualizations
cols = st.columns(2)
col_idx = 0
for vis in visualizations:
with cols[col_idx % 2]:
try:
st.subheader(vis.get("title", "Untitled Chart"))
image_path = vis.get("file_path")
if image_path and os.path.exists(image_path):
st.image(image_path, use_column_width=True)
st.markdown(f"**Insight:** {vis.get('insight', 'No insight provided.')}")
st.caption(f"File: {os.path.basename(image_path)}")
st.write("---")
else:
st.warning(f"Chart image not found at path: {image_path}")
except Exception as e:
st.error(f"Could not display visual '{vis.get('title')}': {e}")
col_idx += 1
if __name__ == "__main__":
main()