import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import io
import base64
from datetime import datetime
import json
import os
import sys
from pathlib import Path
import time
import re
# Add the current directory to path to import our agent
sys.path.append(str(Path(__file__).parent))
try:
from data_analysis_agent import DataAnalysisAgent, DataAnalysisConfig
except ImportError:
st.error("❌ Please ensure data_analysis_agent.py is in the same directory")
st.info("Download both files and place them in the same folder")
st.stop()
# Page configuration
st.set_page_config(
page_title="AI Data Analysis Agent",
page_icon="🤖",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'Get Help': 'https://github.com/yourusername/ai-data-analysis-agent',
'Report a bug': "https://github.com/yourusername/ai-data-analysis-agent/issues",
'About': "# AI Data Analysis Agent\nPowered by Llama 3 & LangGraph"
}
)
# Custom CSS for beautiful styling
st.markdown("""
""", unsafe_allow_html=True)
def initialize_session_state():
"""Initialize session state variables"""
if 'analysis_results' not in st.session_state:
st.session_state.analysis_results = None
if 'dataset' not in st.session_state:
st.session_state.dataset = None
if 'agent' not in st.session_state:
st.session_state.agent = None
if 'groq_api_key' not in st.session_state:
st.session_state.groq_api_key = ""
if 'model_name' not in st.session_state:
st.session_state.model_name = "llama3-70b-8192"
if 'analysis_complete' not in st.session_state:
st.session_state.analysis_complete = False
def create_agent():
"""Create and configure the data analysis agent"""
try:
# Check environment variable first, then session state
groq_api_key = os.environ.get('GROQ_API_KEY') or st.session_state.get('groq_api_key', '')
if not groq_api_key:
return None
agent = DataAnalysisAgent(
groq_api_key=groq_api_key,
model_name=st.session_state.get('model_name', 'llama3-70b-8192')
)
return agent
except Exception as e:
st.error(f"Failed to create agent: {str(e)}")
return None
def sidebar_config():
"""Configure the beautiful sidebar"""
with st.sidebar:
st.markdown("""
🤖
AI Agents on action
Powered by Llama 3
""", unsafe_allow_html=True)
st.markdown("---")
# Check for environment variable first
env_api_key = os.environ.get('GROQ_API_KEY')
if env_api_key:
st.success("✅ API Key Configured")
st.session_state.groq_api_key = env_api_key
api_key_configured = True
else:
st.subheader("🔑 API Setup")
st.info("💡 Set GROQ_API_KEY environment variable")
groq_api_key = st.text_input(
"Groq API Key",
type="password",
value=st.session_state.groq_api_key,
help="Get your API key from console.groq.com"
)
if groq_api_key:
st.session_state.groq_api_key = groq_api_key
api_key_configured = True
else:
api_key_configured = False
st.markdown("---")
# Model Selection
st.subheader("🧠 AI Model")
model_options = {
"llama3-70b-8192": "Llama 3 70B (Recommended)",
"llama3-8b-8192": "Llama 3 8B (Faster)",
"mixtral-8x7b-32768": "Mixtral 8x7B"
}
selected_model = st.selectbox(
"Choose Model",
options=list(model_options.keys()),
format_func=lambda x: model_options[x],
index=0
)
st.session_state.model_name = selected_model
st.markdown("---")
# Analysis Options
st.subheader("⚙️ Analysis Settings")
industry_type = st.selectbox(
"Industry Focus",
["General", "Retail", "Healthcare", "Finance", "Manufacturing", "Technology"],
help="Customize insights for your industry"
)
st.session_state.industry_type = industry_type
enable_advanced = st.toggle(
"Advanced Analysis",
value=True,
help="Include correlation analysis and advanced insights"
)
st.session_state.enable_advanced = enable_advanced
auto_insights = st.toggle(
"Auto-Generate Insights",
value=True,
help="Automatically generate business insights"
)
st.session_state.auto_insights = auto_insights
st.markdown("---")
# Quick Stats with dynamic insights count
if st.session_state.dataset is not None:
st.subheader("📊 Dataset Info")
df = st.session_state.dataset
col1, col2 = st.columns(2)
with col1:
st.metric("Rows", f"{df.shape[0]:,}")
st.metric("Columns", df.shape[1])
with col2:
st.metric("Missing", f"{df.isnull().sum().sum():,}")
st.metric("Size", f"{df.memory_usage(deep=True).sum() / 1024**2:.1f} MB")
# Show insights count if analysis is complete (now shows exactly 5 each)
if st.session_state.analysis_results:
insights = st.session_state.analysis_results.get('insights', [])
recommendations = st.session_state.analysis_results.get('recommendations', [])
# Process to get clean counts (exactly 5 each)
processed_insights_count = len([i for i in insights if isinstance(i, str) and len(i.strip()) > 10])
processed_recommendations_count = len([r for r in recommendations if isinstance(r, str) and len(r.strip()) > 10])
st.markdown("---")
st.subheader("🧠 Analysis Results")
col1, col2 = st.columns(2)
with col1:
st.metric("💡 Insights", processed_insights_count)
with col2:
st.metric("🎯 Recommendations", processed_recommendations_count)
st.markdown("---")
# Help Section
with st.expander("💡 Quick Help"):
st.markdown("""
**Supported Formats:**
- CSV files (.csv)
- Excel files (.xlsx, .xls)
- JSON files (.json)
**Best Practices:**
- Clean column names
- Handle missing values
- Include date columns
- Mix numeric & categorical data
**Need Help?**
- [Documentation](https://github.com/yourusername/ai-data-analysis-agent)
- [Examples](https://github.com/yourusername/ai-data-analysis-agent/examples)
""")
return api_key_configured
def display_hero_section():
"""Display the beautiful hero section"""
st.markdown('AIDA-AI Data Analyzer
', unsafe_allow_html=True)
st.markdown("""
Transform your raw data into actionable business insights with the power of AI.
Upload, analyze, and discover patterns automatically using intelligent agents.
""", unsafe_allow_html=True)
def display_features():
"""Display feature cards"""
st.markdown("### ✨ What This AI Agent Can Do")
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("""
🧠
Intelligent Analysis
Our AI automatically understands your data structure, identifies patterns,
and generates meaningful insights without any manual configuration.
""", unsafe_allow_html=True)
with col2:
st.markdown("""
📊
Smart Visualizations
Intelligently creates the most appropriate charts and graphs for your data, with interactive visualizations.
""", unsafe_allow_html=True)
with col3:
st.markdown("""
🎯
Actionable Recommendations
Get specific, measurable recommendations for improving your business
based on data-driven insights.
""", unsafe_allow_html=True)
def upload_dataset():
"""Beautiful dataset upload section"""
st.markdown("### 📊 Upload Your Dataset")
uploaded_file = st.file_uploader(
"",
type=['csv', 'xlsx', 'xls', 'json'],
help="Drag and drop your file here or click to browse",
label_visibility="collapsed"
)
if uploaded_file is not None:
try:
# Show loading spinner
with st.spinner("🔍 Processing your dataset..."):
time.sleep(1) # Small delay for UX
# Read the file based on extension
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
elif uploaded_file.name.endswith(('.xlsx', '.xls')):
df = pd.read_excel(uploaded_file)
elif uploaded_file.name.endswith('.json'):
df = pd.read_json(uploaded_file)
else:
st.error("Unsupported file format")
return False
st.session_state.dataset = df
st.session_state.uploaded_filename = uploaded_file.name
# Success message
st.success(f"✅ Successfully loaded **{uploaded_file.name}**")
# Beautiful metrics display
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown(f"""
""", unsafe_allow_html=True)
with col2:
st.markdown(f"""
""", unsafe_allow_html=True)
with col3:
missing = df.isnull().sum().sum()
st.markdown(f"""
{missing:,}
Missing Values
""", unsafe_allow_html=True)
with col4:
size_mb = df.memory_usage(deep=True).sum() / 1024**2
st.markdown(f"""
{size_mb:.1f} MB
File Size
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# Data preview with beautiful styling
st.markdown("#### 📋 Data Preview")
st.dataframe(
df.head(10),
use_container_width=True,
height=300
)
# Column information in expandable section
with st.expander("📊 Detailed Column Information", expanded=False):
col_info = pd.DataFrame({
'Column': df.columns,
'Type': df.dtypes.astype(str),
'Non-Null': df.count(),
'Null Count': df.isnull().sum(),
'Unique Values': df.nunique(),
'Sample Data': [str(df[col].iloc[0]) if len(df) > 0 else '' for col in df.columns]
})
st.dataframe(col_info, use_container_width=True)
return True
except Exception as e:
st.error(f"❌ Error reading file: {str(e)}")
return False
else:
# Show upload placeholder
st.markdown("""
📁
Drop your dataset here
Supports CSV, Excel, and JSON files • Max 200MB
""", unsafe_allow_html=True)
return False
def run_analysis():
"""Run the AI analysis with beautiful progress indicators"""
if st.session_state.dataset is None:
st.warning("Please upload a dataset first.")
return
# Check for API key from environment or session state
api_key = os.environ.get('GROQ_API_KEY') or st.session_state.get('groq_api_key')
if not api_key:
st.warning("Please set GROQ_API_KEY environment variable or enter it in the sidebar.")
return
# Create agent
with st.spinner("🤖 Initializing AI agent..."):
agent = create_agent()
if agent is None:
st.error("Failed to initialize AI agent. Check your API key.")
return
st.session_state.agent = agent
# Save dataset temporarily
temp_file = "temp_dataset.csv"
st.session_state.dataset.to_csv(temp_file, index=False)
# Beautiful progress tracking
progress_container = st.container()
with progress_container:
st.markdown("### 🚀 AI Analysis in Progress")
# Progress bar
progress_bar = st.progress(0)
status_text = st.empty()
# Step indicators
steps = [
("🔍", "Analyzing dataset structure"),
("📊", "Examining columns and data quality"),
("🧠", "Generating AI insights"),
("📈", "Planning visualizations"),
("🎨", "Creating charts"),
("🎯", "Formulating recommendations")
]
step_cols = st.columns(len(steps))
step_indicators = []
for i, (icon, desc) in enumerate(steps):
with step_cols[i]:
step_indicators.append(st.empty())
step_indicators[i].markdown(f"""
""", unsafe_allow_html=True)
try:
# Step 1
step_indicators[0].markdown(f"""
""", unsafe_allow_html=True)
status_text.markdown("**🔍 AI agent analyzing dataset structure...**")
progress_bar.progress(15)
time.sleep(1)
# Step 2
step_indicators[1].markdown(f"""
""", unsafe_allow_html=True)
status_text.markdown("**📊 Analyzing columns and data quality...**")
progress_bar.progress(30)
time.sleep(1)
# Step 3
step_indicators[2].markdown(f"""
""", unsafe_allow_html=True)
status_text.markdown("**🧠 Generating insights with AI...**")
progress_bar.progress(50)
time.sleep(1)
# Step 4
step_indicators[3].markdown(f"""
""", unsafe_allow_html=True)
status_text.markdown("**📈 Planning optimal visualizations...**")
progress_bar.progress(70)
time.sleep(1)
# Step 5
step_indicators[4].markdown(f"""
""", unsafe_allow_html=True)
status_text.markdown("**🎨 Creating beautiful visualizations...**")
progress_bar.progress(85)
# Run the actual analysis
results = agent.analyze_dataset(temp_file)
# Step 6
step_indicators[5].markdown(f"""
""", unsafe_allow_html=True)
status_text.markdown("**🎯 Formulating actionable recommendations...**")
progress_bar.progress(100)
# Clean up temp file
if os.path.exists(temp_file):
os.remove(temp_file)
if "error" in results:
st.error(f"❌ Analysis failed: {results['error']}")
return
st.session_state.analysis_results = results
st.session_state.analysis_complete = True
# Success animation
status_text.markdown("**✅ Analysis completed successfully!**")
# Show completion message
st.balloons()
time.sleep(1)
# Clear progress and show results
progress_container.empty()
st.rerun()
except Exception as e:
st.error(f"❌ Analysis failed: {str(e)}")
if os.path.exists(temp_file):
os.remove(temp_file)
def parse_insights_and_recommendations(items, item_type="insight"):
"""Parse insights or recommendations into individual items"""
if not items:
return []
parsed_items = []
# If items is a list of strings, process each one
if isinstance(items, list):
for item in items:
if isinstance(item, str):
# Remove any existing numbering or formatting
clean_item = re.sub(r'^\d+\.\s*', '', item.strip())
clean_item = re.sub(r'^\*\*.*?\*\*:\s*', '', clean_item)
if len(clean_item) > 15: # Only include meaningful content
parsed_items.append(clean_item)
# If it's a single string, try to split into multiple items
elif isinstance(items, str):
# Split by numbered lines
lines = items.split('\n')
current_item = ""
for line in lines:
line = line.strip()
# Check if line starts with a number
if line and len(line) > 3 and line[0].isdigit() and line[1:3] in ['. ', ') ', ': ']:
# Save previous item
if current_item:
clean_item = current_item.strip()
if len(clean_item) > 15:
parsed_items.append(clean_item)
# Start new item
current_item = line[2:].strip() if line[1] == '.' else line[3:].strip()
elif current_item and line and not line[0].isdigit():
# Continue previous item
current_item += " " + line
# Don't forget the last item
if current_item:
clean_item = current_item.strip()
if len(clean_item) > 15:
parsed_items.append(clean_item)
# Ensure we return exactly 5 items
if len(parsed_items) < 5:
fallback_items = {
"insight": [
"Dataset contains valuable information that can drive business decisions and strategic planning initiatives",
"Data quality assessment reveals opportunities for improvement in collection and validation processes",
"Statistical patterns indicate significant relationships between key variables requiring further investigation",
"Distribution analysis shows interesting trends that could inform operational and strategic decisions",
"Business intelligence opportunities exist through advanced analytics and machine learning applications"
],
"recommendation": [
"Implement comprehensive data quality monitoring and validation procedures to ensure accuracy and completeness",
"Develop automated reporting dashboards that provide real-time visibility into key business metrics and KPIs",
"Establish regular data governance workflows and collection protocols to maintain consistent, high-quality data",
"Consider implementing advanced analytics and machine learning models to uncover predictive insights and opportunities",
"Create standardized documentation and metadata management practices to improve data discoverability and collaboration"
]
}
fallbacks = fallback_items.get(item_type, fallback_items["insight"])
while len(parsed_items) < 5:
idx = len(parsed_items)
if idx < len(fallbacks):
parsed_items.append(fallbacks[idx])
else:
parsed_items.append(f"Additional {item_type} opportunities exist for strategic business improvement and data optimization")
return parsed_items[:5] # Return exactly 5 items
def display_results():
"""Display beautiful analysis results"""
results = st.session_state.analysis_results
if results is None:
return
# Results header
st.markdown("""
📊 Analysis Complete!
Here are your AI-generated insights and recommendations
""", unsafe_allow_html=True)
# Dataset Overview with beautiful cards
st.markdown("### 📋 Dataset Overview")
info = results.get('dataset_info', {})
col1, col2, col3, col4, col5 = st.columns(5)
metrics = [
("📊", "Total Rows", f"{info.get('shape', [0])[0]:,}", "#3b82f6"),
("📋", "Columns", str(info.get('shape', [0, 0])[1]), "#8b5cf6"),
("🔢", "Numeric", str(len(info.get('numeric_columns', []))), "#06b6d4"),
("📝", "Categorical", str(len(info.get('categorical_columns', []))), "#10b981"),
("✨", "Quality Score", f"{max(0, 100 - (sum(info.get('null_counts', {}).values()) / max(info.get('shape', [1, 1])[0] * info.get('shape', [1, 1])[1], 1) * 100)):.0f}%", "#f59e0b")
]
for i, (icon, label, value, color) in enumerate(metrics):
with [col1, col2, col3, col4, col5][i]:
st.markdown(f"""
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# Key Insights Section - Parse and display individually
st.markdown("### 💡 Key Insights")
raw_insights = results.get('insights', [])
if raw_insights:
# Parse insights into individual items
parsed_insights = parse_insights_and_recommendations(raw_insights, "insight")
if parsed_insights:
st.markdown(f"**{len(parsed_insights)} key insights discovered from your data:**")
st.markdown("
", unsafe_allow_html=True)
for i, insight in enumerate(parsed_insights):
st.markdown(f"""
{i+1}
💡 Key Insight {i+1}:
{insight}
""", unsafe_allow_html=True)
else:
st.info("🔍 No insights could be extracted from the analysis.")
else:
st.info("🔍 No insights were generated.")
# Interactive Visualizations Section
st.markdown("### 📈 Interactive Data Exploration")
if st.session_state.dataset is not None:
df = st.session_state.dataset
# Beautiful tabs
tab1, tab2, tab3, tab4 = st.tabs([
"📊 Distributions",
"🔗 Correlations",
"📈 Trends & Patterns",
"🎯 Custom Analysis"
])
with tab1:
st.markdown("#### 📊 Distribution Analysis")
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
if len(numeric_cols) > 0:
# Column selector at the top
selected_col = st.selectbox(
"Select column to analyze",
numeric_cols,
key="dist_col"
)
st.markdown("
", unsafe_allow_html=True)
# Show all three plots side by side
col1, col2, col3 = st.columns(3)
with col1:
st.markdown("**Histogram**")
fig_hist = px.histogram(
df,
x=selected_col,
title=f"Histogram",
nbins=30,
color_discrete_sequence=['#3b82f6']
)
fig_hist.update_layout(
height=380,
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
title_font_size=14,
margin=dict(t=40, b=40, l=40, r=40)
)
st.plotly_chart(fig_hist, use_container_width=True)
with col2:
st.markdown("**Box Plot**")
fig_box = px.box(
df,
y=selected_col,
title=f"Box Plot",
color_discrete_sequence=['#8b5cf6']
)
fig_box.update_layout(
height=380,
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
title_font_size=14,
margin=dict(t=40, b=40, l=40, r=40)
)
st.plotly_chart(fig_box, use_container_width=True)
with col3:
st.markdown("**Violin Plot**")
fig_violin = px.violin(
df,
y=selected_col,
title=f"Violin Plot",
color_discrete_sequence=['#06b6d4']
)
fig_violin.update_layout(
height=380,
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
title_font_size=14,
margin=dict(t=40, b=40, l=40, r=40)
)
st.plotly_chart(fig_violin, use_container_width=True)
# Statistics cards below the plots
st.markdown("#### 📊 Statistical Summary")
stats_col1, stats_col2, stats_col3, stats_col4, stats_col5 = st.columns(5)
stats = [
("Mean", f"{df[selected_col].mean():.2f}", "#3b82f6"),
("Median", f"{df[selected_col].median():.2f}", "#8b5cf6"),
("Std Dev", f"{df[selected_col].std():.2f}", "#06b6d4"),
("Min", f"{df[selected_col].min():.2f}", "#10b981"),
("Max", f"{df[selected_col].max():.2f}", "#f59e0b")
]
for i, (label, value, color) in enumerate(stats):
with [stats_col1, stats_col2, stats_col3, stats_col4, stats_col5][i]:
st.markdown(f"""
""", unsafe_allow_html=True)
else:
st.info("📊 No numeric columns found for distribution analysis.")
with tab2:
st.markdown("#### 🔗 Correlation Analysis")
if len(numeric_cols) > 1:
# Correlation matrix heatmap
corr_matrix = df[numeric_cols].corr()
fig = px.imshow(
corr_matrix,
text_auto=True,
aspect="auto",
title="Correlation Matrix",
color_continuous_scale="RdBu_r",
zmin=-1,
zmax=1
)
fig.update_layout(
height=500,
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)'
)
st.plotly_chart(fig, use_container_width=True)
# Top correlations
st.markdown("#### 🔗 Strongest Correlations")
correlations = []
for i in range(len(corr_matrix.columns)):
for j in range(i+1, len(corr_matrix.columns)):
corr_val = corr_matrix.iloc[i, j]
if not pd.isna(corr_val):
correlations.append({
'Variable 1': corr_matrix.columns[i],
'Variable 2': corr_matrix.columns[j],
'Correlation': corr_val,
'Strength': abs(corr_val)
})
if correlations:
corr_df = pd.DataFrame(correlations)
corr_df = corr_df.sort_values('Strength', ascending=False).head(10)
# Display as beautiful cards
for _, row in corr_df.head(5).iterrows():
strength = "Strong" if row['Strength'] > 0.7 else "Moderate" if row['Strength'] > 0.5 else "Weak"
color = "#ef4444" if row['Strength'] > 0.7 else "#f59e0b" if row['Strength'] > 0.5 else "#10b981"
st.markdown(f"""
{row['Variable 1']} ↔ {row['Variable 2']}
Correlation: {row['Correlation']:.3f}
({strength} relationship)
""", unsafe_allow_html=True)
else:
st.info("🔗 Need at least 2 numeric columns for correlation analysis.")
with tab3:
st.markdown("#### 📈 Trends & Patterns")
date_cols = df.select_dtypes(include=['datetime64']).columns.tolist()
cat_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
if len(date_cols) > 0 and len(numeric_cols) > 0:
col1, col2 = st.columns(2)
with col1:
date_col = st.selectbox("Date column", date_cols, key="trend_date")
with col2:
value_col = st.selectbox("Value column", numeric_cols, key="trend_value")
df_sorted = df.sort_values(date_col)
fig = px.line(
df_sorted,
x=date_col,
y=value_col,
title=f"{value_col} Over Time",
color_discrete_sequence=['#3b82f6']
)
fig.update_layout(height=400)
st.plotly_chart(fig, use_container_width=True)
elif cat_cols and numeric_cols:
st.markdown("#### 📊 Category-based Analysis")
col1, col2, col3 = st.columns(3)
with col1:
cat_col = st.selectbox("Category", cat_cols, key="cat_trend")
with col2:
num_col = st.selectbox("Numeric value", numeric_cols, key="num_trend")
with col3:
agg_func = st.selectbox("Aggregation", ["mean", "sum", "count", "median"])
if agg_func == "count":
grouped = df.groupby(cat_col).size().reset_index(name='count')
y_col = 'count'
else:
grouped = df.groupby(cat_col)[num_col].agg(agg_func).reset_index()
y_col = num_col
fig = px.bar(
grouped,
x=cat_col,
y=y_col,
title=f"{agg_func.title()} of {num_col if agg_func != 'count' else 'Count'} by {cat_col}",
color_discrete_sequence=['#8b5cf6']
)
fig.update_layout(height=400)
st.plotly_chart(fig, use_container_width=True)
else:
st.info("📈 Upload data with date columns or categorical data to see trends.")
with tab4:
st.markdown("#### 🎯 Custom Analysis Builder")
col1, col2 = st.columns([1, 2])
with col1:
viz_type = st.selectbox(
"Chart Type",
["Scatter Plot", "Bar Chart", "Pie Chart", "Sunburst", "Treemap"]
)
if viz_type == "Scatter Plot" and len(numeric_cols) >= 2:
x_col = st.selectbox("X-axis", numeric_cols, key="custom_x")
y_col = st.selectbox("Y-axis", numeric_cols, key="custom_y")
color_col = st.selectbox("Color by", ["None"] + list(df.columns), key="custom_color")
size_col = st.selectbox("Size by", ["None"] + numeric_cols, key="custom_size")
elif viz_type in ["Bar Chart", "Pie Chart"] and cat_cols:
cat_col = st.selectbox("Category", cat_cols, key="custom_cat")
if numeric_cols:
val_col = st.selectbox("Value (optional)", ["Count"] + numeric_cols, key="custom_val")
else:
val_col = "Count"
with col2:
try:
if viz_type == "Scatter Plot" and len(numeric_cols) >= 2:
fig = px.scatter(
df,
x=x_col,
y=y_col,
color=None if color_col == "None" else color_col,
size=None if size_col == "None" else size_col,
title=f"{y_col} vs {x_col}",
color_discrete_sequence=['#3b82f6'],
hover_data=df.columns[:5].tolist()
)
fig.update_layout(height=500)
st.plotly_chart(fig, use_container_width=True)
elif viz_type == "Pie Chart" and cat_cols:
if val_col == "Count":
value_counts = df[cat_col].value_counts().head(8)
fig = px.pie(
values=value_counts.values,
names=value_counts.index,
title=f"Distribution of {cat_col}"
)
else:
grouped = df.groupby(cat_col)[val_col].sum().head(8)
fig = px.pie(
values=grouped.values,
names=grouped.index,
title=f"{val_col} by {cat_col}"
)
fig.update_layout(height=500)
st.plotly_chart(fig, use_container_width=True)
except Exception as e:
st.error(f"Error creating visualization: {str(e)}")
# Recommendations Section - Parse and display individually
st.markdown("### 🎯 AI-Generated Recommendations")
raw_recommendations = results.get('recommendations', [])
if raw_recommendations:
# Parse recommendations into individual items
parsed_recommendations = parse_insights_and_recommendations(raw_recommendations, "recommendation")
if parsed_recommendations:
st.markdown(f"**{len(parsed_recommendations)} actionable recommendations:**")
st.markdown("
", unsafe_allow_html=True)
for i, rec in enumerate(parsed_recommendations):
st.markdown(f"""
{i+1}
🎯 Recommendation {i+1}:
{rec}
""", unsafe_allow_html=True)
else:
st.info("🎯 No recommendations could be extracted from the analysis.")
else:
st.info("🎯 No recommendations were generated.")
# Download Results Section
st.markdown("### 💾 Download Your Results")
col1, col2, col3 = st.columns(3)
download_items = [
("📄", "Analysis Report (JSON)", "Download complete analysis", "json"),
("📊", "Enhanced Dataset (CSV)", "Download processed data", "csv"),
("📋", "Executive Summary (MD)", "Download business report", "md")
]
for i, (icon, title, desc, file_type) in enumerate(download_items):
with [col1, col2, col3][i]:
st.markdown(f"""
{icon}
{title}
{desc}
""", unsafe_allow_html=True)
if file_type == "json":
data = json.dumps(results, indent=2, default=str)
filename = f"analysis_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
mime = "application/json"
elif file_type == "csv":
data = st.session_state.dataset.to_csv(index=False)
filename = f"enhanced_dataset_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
mime = "text/csv"
else: # md
data = generate_report(results)
filename = f"executive_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
mime = "text/markdown"
st.download_button(
label=f"Download {file_type.upper()}",
data=data,
file_name=filename,
mime=mime,
use_container_width=True
)
st.markdown("
", unsafe_allow_html=True)
def generate_report(results):
"""Generate a beautiful markdown report"""
filename = getattr(st.session_state, 'uploaded_filename', 'dataset')
report = f"""# 🤖 AI Data Analysis Executive Summary
**Dataset:** {filename}
**Generated:** {datetime.now().strftime('%B %d, %Y at %I:%M %p')}
**Powered by:** Llama 3 & LangGraph AI Agents
---
## 📊 Executive Overview
This report presents key findings from an AI-powered analysis of your dataset. Our advanced language models have identified patterns, trends, and opportunities that can drive business decisions.
### Dataset Metrics
- **Total Records:** {results.get('dataset_info', {}).get('shape', [0])[0]:,}
- **Data Points:** {len(results.get('dataset_info', {}).get('columns', []))}
- **Data Quality Score:** {max(0, 100 - (sum(results.get('dataset_info', {}).get('null_counts', {}).values()) / max(results.get('dataset_info', {}).get('shape', [1, 1])[0] * results.get('dataset_info', {}).get('shape', [1, 1])[1], 1) * 100)):.0f}%
---
## 💡 Strategic Insights
Our AI analysis has uncovered the following key insights:
"""
insights = results.get('insights', [])
parsed_insights = parse_insights_and_recommendations(insights, "insight")
for i, insight in enumerate(parsed_insights, 1):
report += f"**{i}.** {insight}\n\n"
report += """---
## 🎯 Recommended Actions
Based on the data analysis, we recommend the following strategic actions:
"""
recommendations = results.get('recommendations', [])
parsed_recommendations = parse_insights_and_recommendations(recommendations, "recommendation")
for i, rec in enumerate(parsed_recommendations, 1):
report += f"**{i}.** {rec}\n\n"
report += f"""---
## 🔧 Technical Summary
- **Analysis Completed:** {results.get('analysis_timestamp', 'N/A')}
- **Visualizations Created:** {len(results.get('visualizations', []))}
- **Processing Errors:** {len(results.get('errors', []))}
- **AI Model Used:** Llama 3 (70B parameters)
---
## 📈 Next Steps
1. **Review Insights:** Analyze each insight for immediate actionable opportunities
2. **Implement Recommendations:** Prioritize recommendations based on business impact
3. **Monitor Progress:** Track key metrics identified in this analysis
4. **Iterate:** Regular re-analysis as new data becomes available
---
*This report was generated automatically by our AI Data Analysis Agent. For questions or support, please contact your data team.*
"""
return report
def main():
"""Main application function with beautiful design"""
initialize_session_state()
# Check if analysis is complete to show results immediately
if st.session_state.analysis_complete and st.session_state.analysis_results:
display_results()
# Add a "Start New Analysis" button
st.markdown("---")
col1, col2, col3 = st.columns([1, 1, 1])
with col2:
if st.button("🔄 Start New Analysis", use_container_width=True):
# Reset session state
st.session_state.analysis_results = None
st.session_state.analysis_complete = False
st.session_state.dataset = None
st.rerun()
return
# Hero Section
display_hero_section()
# Feature showcase
display_features()
# Sidebar configuration
api_configured = sidebar_config()
if not api_configured:
# Beautiful warning with setup instructions
st.markdown("""
🔑
API Key Required
Please configure your Groq API key to unlock the power of AI analysis
""", unsafe_allow_html=True)
# Expandable setup guide
with st.expander("🚀 Quick Setup Guide", expanded=True):
st.markdown("""
### Option 1: Environment Variable (Recommended)
```bash
export GROQ_API_KEY="your_api_key_here"
streamlit run web_app.py
```
### Option 2: Manual Entry
1. Visit [Groq Console](https://console.groq.com/) 🔗
2. Create a free account and generate your API key
3. Enter the key in the sidebar ←
4. Upload your dataset and start analyzing!
### Supported File Formats
- **CSV files** (.csv) - Most common format
- **Excel files** (.xlsx, .xls) - Spreadsheet data
- **JSON files** (.json) - Structured data
### Tips for Best Results
- Ensure clean, well-structured data
- Include meaningful column names
- Mix of numeric and categorical columns works best
- Date/time columns enable trend analysis
""")
return
# Main content area with beautiful layout
st.markdown("---")
# Dataset upload section
dataset_uploaded = upload_dataset()
# Analysis section
if dataset_uploaded:
st.markdown("---")
# Center the analyze button with beautiful styling
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
if st.button(
"🚀 Analyze My Data with AI",
type="primary",
use_container_width=True,
help="Start the AI-powered analysis of your dataset"
):
run_analysis()
# Footer
st.markdown("""
""", unsafe_allow_html=True)
if __name__ == "__main__":
main()