import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots import io import base64 from datetime import datetime import json import os import sys from pathlib import Path import time import re # Add the current directory to path to import our agent sys.path.append(str(Path(__file__).parent)) try: from data_analysis_agent import DataAnalysisAgent, DataAnalysisConfig except ImportError: st.error("❌ Please ensure data_analysis_agent.py is in the same directory") st.info("Download both files and place them in the same folder") st.stop() # Page configuration st.set_page_config( page_title="AI Data Analysis Agent", page_icon="🤖", layout="wide", initial_sidebar_state="expanded", menu_items={ 'Get Help': 'https://github.com/yourusername/ai-data-analysis-agent', 'Report a bug': "https://github.com/yourusername/ai-data-analysis-agent/issues", 'About': "# AI Data Analysis Agent\nPowered by Llama 3 & LangGraph" } ) # Custom CSS for beautiful styling st.markdown(""" """, unsafe_allow_html=True) def initialize_session_state(): """Initialize session state variables""" if 'analysis_results' not in st.session_state: st.session_state.analysis_results = None if 'dataset' not in st.session_state: st.session_state.dataset = None if 'agent' not in st.session_state: st.session_state.agent = None if 'groq_api_key' not in st.session_state: st.session_state.groq_api_key = "" if 'model_name' not in st.session_state: st.session_state.model_name = "llama3-70b-8192" if 'analysis_complete' not in st.session_state: st.session_state.analysis_complete = False def create_agent(): """Create and configure the data analysis agent""" try: # Check environment variable first, then session state groq_api_key = os.environ.get('GROQ_API_KEY') or st.session_state.get('groq_api_key', '') if not groq_api_key: return None agent = DataAnalysisAgent( groq_api_key=groq_api_key, model_name=st.session_state.get('model_name', 'llama3-70b-8192') ) return agent except Exception as e: st.error(f"Failed to create agent: {str(e)}") return None def sidebar_config(): """Configure the beautiful sidebar""" with st.sidebar: st.markdown("""
🤖

AI Agents on action

Powered by Llama 3

""", unsafe_allow_html=True) st.markdown("---") # Check for environment variable first env_api_key = os.environ.get('GROQ_API_KEY') if env_api_key: st.success("✅ API Key Configured") st.session_state.groq_api_key = env_api_key api_key_configured = True else: st.subheader("🔑 API Setup") st.info("💡 Set GROQ_API_KEY environment variable") groq_api_key = st.text_input( "Groq API Key", type="password", value=st.session_state.groq_api_key, help="Get your API key from console.groq.com" ) if groq_api_key: st.session_state.groq_api_key = groq_api_key api_key_configured = True else: api_key_configured = False st.markdown("---") # Model Selection st.subheader("🧠 AI Model") model_options = { "llama3-70b-8192": "Llama 3 70B (Recommended)", "llama3-8b-8192": "Llama 3 8B (Faster)", "mixtral-8x7b-32768": "Mixtral 8x7B" } selected_model = st.selectbox( "Choose Model", options=list(model_options.keys()), format_func=lambda x: model_options[x], index=0 ) st.session_state.model_name = selected_model st.markdown("---") # Analysis Options st.subheader("⚙️ Analysis Settings") industry_type = st.selectbox( "Industry Focus", ["General", "Retail", "Healthcare", "Finance", "Manufacturing", "Technology"], help="Customize insights for your industry" ) st.session_state.industry_type = industry_type enable_advanced = st.toggle( "Advanced Analysis", value=True, help="Include correlation analysis and advanced insights" ) st.session_state.enable_advanced = enable_advanced auto_insights = st.toggle( "Auto-Generate Insights", value=True, help="Automatically generate business insights" ) st.session_state.auto_insights = auto_insights st.markdown("---") # Quick Stats with dynamic insights count if st.session_state.dataset is not None: st.subheader("📊 Dataset Info") df = st.session_state.dataset col1, col2 = st.columns(2) with col1: st.metric("Rows", f"{df.shape[0]:,}") st.metric("Columns", df.shape[1]) with col2: st.metric("Missing", f"{df.isnull().sum().sum():,}") st.metric("Size", f"{df.memory_usage(deep=True).sum() / 1024**2:.1f} MB") # Show insights count if analysis is complete (now shows exactly 5 each) if st.session_state.analysis_results: insights = st.session_state.analysis_results.get('insights', []) recommendations = st.session_state.analysis_results.get('recommendations', []) # Process to get clean counts (exactly 5 each) processed_insights_count = len([i for i in insights if isinstance(i, str) and len(i.strip()) > 10]) processed_recommendations_count = len([r for r in recommendations if isinstance(r, str) and len(r.strip()) > 10]) st.markdown("---") st.subheader("🧠 Analysis Results") col1, col2 = st.columns(2) with col1: st.metric("💡 Insights", processed_insights_count) with col2: st.metric("🎯 Recommendations", processed_recommendations_count) st.markdown("---") # Help Section with st.expander("💡 Quick Help"): st.markdown(""" **Supported Formats:** - CSV files (.csv) - Excel files (.xlsx, .xls) - JSON files (.json) **Best Practices:** - Clean column names - Handle missing values - Include date columns - Mix numeric & categorical data **Need Help?** - [Documentation](https://github.com/yourusername/ai-data-analysis-agent) - [Examples](https://github.com/yourusername/ai-data-analysis-agent/examples) """) return api_key_configured def display_hero_section(): """Display the beautiful hero section""" st.markdown('
AIDA-AI Data Analyzer
', unsafe_allow_html=True) st.markdown("""
Transform your raw data into actionable business insights with the power of AI.
Upload, analyze, and discover patterns automatically using intelligent agents.
""", unsafe_allow_html=True) def display_features(): """Display feature cards""" st.markdown("### ✨ What This AI Agent Can Do") col1, col2, col3 = st.columns(3) with col1: st.markdown("""
🧠
Intelligent Analysis
Our AI automatically understands your data structure, identifies patterns, and generates meaningful insights without any manual configuration.
""", unsafe_allow_html=True) with col2: st.markdown("""
📊
Smart Visualizations
Intelligently creates the most appropriate charts and graphs for your data, with interactive visualizations.
""", unsafe_allow_html=True) with col3: st.markdown("""
🎯
Actionable Recommendations
Get specific, measurable recommendations for improving your business based on data-driven insights.
""", unsafe_allow_html=True) def upload_dataset(): """Beautiful dataset upload section""" st.markdown("### 📊 Upload Your Dataset") uploaded_file = st.file_uploader( "", type=['csv', 'xlsx', 'xls', 'json'], help="Drag and drop your file here or click to browse", label_visibility="collapsed" ) if uploaded_file is not None: try: # Show loading spinner with st.spinner("🔍 Processing your dataset..."): time.sleep(1) # Small delay for UX # Read the file based on extension if uploaded_file.name.endswith('.csv'): df = pd.read_csv(uploaded_file) elif uploaded_file.name.endswith(('.xlsx', '.xls')): df = pd.read_excel(uploaded_file) elif uploaded_file.name.endswith('.json'): df = pd.read_json(uploaded_file) else: st.error("Unsupported file format") return False st.session_state.dataset = df st.session_state.uploaded_filename = uploaded_file.name # Success message st.success(f"✅ Successfully loaded **{uploaded_file.name}**") # Beautiful metrics display col1, col2, col3, col4 = st.columns(4) with col1: st.markdown(f"""
{df.shape[0]:,}
Rows
""", unsafe_allow_html=True) with col2: st.markdown(f"""
{df.shape[1]}
Columns
""", unsafe_allow_html=True) with col3: missing = df.isnull().sum().sum() st.markdown(f"""
{missing:,}
Missing Values
""", unsafe_allow_html=True) with col4: size_mb = df.memory_usage(deep=True).sum() / 1024**2 st.markdown(f"""
{size_mb:.1f} MB
File Size
""", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # Data preview with beautiful styling st.markdown("#### 📋 Data Preview") st.dataframe( df.head(10), use_container_width=True, height=300 ) # Column information in expandable section with st.expander("📊 Detailed Column Information", expanded=False): col_info = pd.DataFrame({ 'Column': df.columns, 'Type': df.dtypes.astype(str), 'Non-Null': df.count(), 'Null Count': df.isnull().sum(), 'Unique Values': df.nunique(), 'Sample Data': [str(df[col].iloc[0]) if len(df) > 0 else '' for col in df.columns] }) st.dataframe(col_info, use_container_width=True) return True except Exception as e: st.error(f"❌ Error reading file: {str(e)}") return False else: # Show upload placeholder st.markdown("""
📁
Drop your dataset here
Supports CSV, Excel, and JSON files • Max 200MB
""", unsafe_allow_html=True) return False def run_analysis(): """Run the AI analysis with beautiful progress indicators""" if st.session_state.dataset is None: st.warning("Please upload a dataset first.") return # Check for API key from environment or session state api_key = os.environ.get('GROQ_API_KEY') or st.session_state.get('groq_api_key') if not api_key: st.warning("Please set GROQ_API_KEY environment variable or enter it in the sidebar.") return # Create agent with st.spinner("🤖 Initializing AI agent..."): agent = create_agent() if agent is None: st.error("Failed to initialize AI agent. Check your API key.") return st.session_state.agent = agent # Save dataset temporarily temp_file = "temp_dataset.csv" st.session_state.dataset.to_csv(temp_file, index=False) # Beautiful progress tracking progress_container = st.container() with progress_container: st.markdown("### 🚀 AI Analysis in Progress") # Progress bar progress_bar = st.progress(0) status_text = st.empty() # Step indicators steps = [ ("🔍", "Analyzing dataset structure"), ("📊", "Examining columns and data quality"), ("🧠", "Generating AI insights"), ("📈", "Planning visualizations"), ("🎨", "Creating charts"), ("🎯", "Formulating recommendations") ] step_cols = st.columns(len(steps)) step_indicators = [] for i, (icon, desc) in enumerate(steps): with step_cols[i]: step_indicators.append(st.empty()) step_indicators[i].markdown(f"""
{icon}
{desc}
""", unsafe_allow_html=True) try: # Step 1 step_indicators[0].markdown(f"""
🔍
Analyzing Structure
""", unsafe_allow_html=True) status_text.markdown("**🔍 AI agent analyzing dataset structure...**") progress_bar.progress(15) time.sleep(1) # Step 2 step_indicators[1].markdown(f"""
📊
Examining Data
""", unsafe_allow_html=True) status_text.markdown("**📊 Analyzing columns and data quality...**") progress_bar.progress(30) time.sleep(1) # Step 3 step_indicators[2].markdown(f"""
🧠
AI Thinking
""", unsafe_allow_html=True) status_text.markdown("**🧠 Generating insights with AI...**") progress_bar.progress(50) time.sleep(1) # Step 4 step_indicators[3].markdown(f"""
📈
Planning Charts
""", unsafe_allow_html=True) status_text.markdown("**📈 Planning optimal visualizations...**") progress_bar.progress(70) time.sleep(1) # Step 5 step_indicators[4].markdown(f"""
🎨
Creating Charts
""", unsafe_allow_html=True) status_text.markdown("**🎨 Creating beautiful visualizations...**") progress_bar.progress(85) # Run the actual analysis results = agent.analyze_dataset(temp_file) # Step 6 step_indicators[5].markdown(f"""
🎯
Final Recommendations
""", unsafe_allow_html=True) status_text.markdown("**🎯 Formulating actionable recommendations...**") progress_bar.progress(100) # Clean up temp file if os.path.exists(temp_file): os.remove(temp_file) if "error" in results: st.error(f"❌ Analysis failed: {results['error']}") return st.session_state.analysis_results = results st.session_state.analysis_complete = True # Success animation status_text.markdown("**✅ Analysis completed successfully!**") # Show completion message st.balloons() time.sleep(1) # Clear progress and show results progress_container.empty() st.rerun() except Exception as e: st.error(f"❌ Analysis failed: {str(e)}") if os.path.exists(temp_file): os.remove(temp_file) def parse_insights_and_recommendations(items, item_type="insight"): """Parse insights or recommendations into individual items""" if not items: return [] parsed_items = [] # If items is a list of strings, process each one if isinstance(items, list): for item in items: if isinstance(item, str): # Remove any existing numbering or formatting clean_item = re.sub(r'^\d+\.\s*', '', item.strip()) clean_item = re.sub(r'^\*\*.*?\*\*:\s*', '', clean_item) if len(clean_item) > 15: # Only include meaningful content parsed_items.append(clean_item) # If it's a single string, try to split into multiple items elif isinstance(items, str): # Split by numbered lines lines = items.split('\n') current_item = "" for line in lines: line = line.strip() # Check if line starts with a number if line and len(line) > 3 and line[0].isdigit() and line[1:3] in ['. ', ') ', ': ']: # Save previous item if current_item: clean_item = current_item.strip() if len(clean_item) > 15: parsed_items.append(clean_item) # Start new item current_item = line[2:].strip() if line[1] == '.' else line[3:].strip() elif current_item and line and not line[0].isdigit(): # Continue previous item current_item += " " + line # Don't forget the last item if current_item: clean_item = current_item.strip() if len(clean_item) > 15: parsed_items.append(clean_item) # Ensure we return exactly 5 items if len(parsed_items) < 5: fallback_items = { "insight": [ "Dataset contains valuable information that can drive business decisions and strategic planning initiatives", "Data quality assessment reveals opportunities for improvement in collection and validation processes", "Statistical patterns indicate significant relationships between key variables requiring further investigation", "Distribution analysis shows interesting trends that could inform operational and strategic decisions", "Business intelligence opportunities exist through advanced analytics and machine learning applications" ], "recommendation": [ "Implement comprehensive data quality monitoring and validation procedures to ensure accuracy and completeness", "Develop automated reporting dashboards that provide real-time visibility into key business metrics and KPIs", "Establish regular data governance workflows and collection protocols to maintain consistent, high-quality data", "Consider implementing advanced analytics and machine learning models to uncover predictive insights and opportunities", "Create standardized documentation and metadata management practices to improve data discoverability and collaboration" ] } fallbacks = fallback_items.get(item_type, fallback_items["insight"]) while len(parsed_items) < 5: idx = len(parsed_items) if idx < len(fallbacks): parsed_items.append(fallbacks[idx]) else: parsed_items.append(f"Additional {item_type} opportunities exist for strategic business improvement and data optimization") return parsed_items[:5] # Return exactly 5 items def display_results(): """Display beautiful analysis results""" results = st.session_state.analysis_results if results is None: return # Results header st.markdown("""

📊 Analysis Complete!

Here are your AI-generated insights and recommendations

""", unsafe_allow_html=True) # Dataset Overview with beautiful cards st.markdown("### 📋 Dataset Overview") info = results.get('dataset_info', {}) col1, col2, col3, col4, col5 = st.columns(5) metrics = [ ("📊", "Total Rows", f"{info.get('shape', [0])[0]:,}", "#3b82f6"), ("📋", "Columns", str(info.get('shape', [0, 0])[1]), "#8b5cf6"), ("🔢", "Numeric", str(len(info.get('numeric_columns', []))), "#06b6d4"), ("📝", "Categorical", str(len(info.get('categorical_columns', []))), "#10b981"), ("✨", "Quality Score", f"{max(0, 100 - (sum(info.get('null_counts', {}).values()) / max(info.get('shape', [1, 1])[0] * info.get('shape', [1, 1])[1], 1) * 100)):.0f}%", "#f59e0b") ] for i, (icon, label, value, color) in enumerate(metrics): with [col1, col2, col3, col4, col5][i]: st.markdown(f"""
{icon}
{value}
{label}
""", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # Key Insights Section - Parse and display individually st.markdown("### 💡 Key Insights") raw_insights = results.get('insights', []) if raw_insights: # Parse insights into individual items parsed_insights = parse_insights_and_recommendations(raw_insights, "insight") if parsed_insights: st.markdown(f"**{len(parsed_insights)} key insights discovered from your data:**") st.markdown("
", unsafe_allow_html=True) for i, insight in enumerate(parsed_insights): st.markdown(f"""
{i+1}
💡 Key Insight {i+1}:
{insight}
""", unsafe_allow_html=True) else: st.info("🔍 No insights could be extracted from the analysis.") else: st.info("🔍 No insights were generated.") # Interactive Visualizations Section st.markdown("### 📈 Interactive Data Exploration") if st.session_state.dataset is not None: df = st.session_state.dataset # Beautiful tabs tab1, tab2, tab3, tab4 = st.tabs([ "📊 Distributions", "🔗 Correlations", "📈 Trends & Patterns", "🎯 Custom Analysis" ]) with tab1: st.markdown("#### 📊 Distribution Analysis") numeric_cols = df.select_dtypes(include=['number']).columns.tolist() if len(numeric_cols) > 0: # Column selector at the top selected_col = st.selectbox( "Select column to analyze", numeric_cols, key="dist_col" ) st.markdown("
", unsafe_allow_html=True) # Show all three plots side by side col1, col2, col3 = st.columns(3) with col1: st.markdown("**Histogram**") fig_hist = px.histogram( df, x=selected_col, title=f"Histogram", nbins=30, color_discrete_sequence=['#3b82f6'] ) fig_hist.update_layout( height=380, plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', title_font_size=14, margin=dict(t=40, b=40, l=40, r=40) ) st.plotly_chart(fig_hist, use_container_width=True) with col2: st.markdown("**Box Plot**") fig_box = px.box( df, y=selected_col, title=f"Box Plot", color_discrete_sequence=['#8b5cf6'] ) fig_box.update_layout( height=380, plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', title_font_size=14, margin=dict(t=40, b=40, l=40, r=40) ) st.plotly_chart(fig_box, use_container_width=True) with col3: st.markdown("**Violin Plot**") fig_violin = px.violin( df, y=selected_col, title=f"Violin Plot", color_discrete_sequence=['#06b6d4'] ) fig_violin.update_layout( height=380, plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', title_font_size=14, margin=dict(t=40, b=40, l=40, r=40) ) st.plotly_chart(fig_violin, use_container_width=True) # Statistics cards below the plots st.markdown("#### 📊 Statistical Summary") stats_col1, stats_col2, stats_col3, stats_col4, stats_col5 = st.columns(5) stats = [ ("Mean", f"{df[selected_col].mean():.2f}", "#3b82f6"), ("Median", f"{df[selected_col].median():.2f}", "#8b5cf6"), ("Std Dev", f"{df[selected_col].std():.2f}", "#06b6d4"), ("Min", f"{df[selected_col].min():.2f}", "#10b981"), ("Max", f"{df[selected_col].max():.2f}", "#f59e0b") ] for i, (label, value, color) in enumerate(stats): with [stats_col1, stats_col2, stats_col3, stats_col4, stats_col5][i]: st.markdown(f"""
{value}
{label}
""", unsafe_allow_html=True) else: st.info("📊 No numeric columns found for distribution analysis.") with tab2: st.markdown("#### 🔗 Correlation Analysis") if len(numeric_cols) > 1: # Correlation matrix heatmap corr_matrix = df[numeric_cols].corr() fig = px.imshow( corr_matrix, text_auto=True, aspect="auto", title="Correlation Matrix", color_continuous_scale="RdBu_r", zmin=-1, zmax=1 ) fig.update_layout( height=500, plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)' ) st.plotly_chart(fig, use_container_width=True) # Top correlations st.markdown("#### 🔗 Strongest Correlations") correlations = [] for i in range(len(corr_matrix.columns)): for j in range(i+1, len(corr_matrix.columns)): corr_val = corr_matrix.iloc[i, j] if not pd.isna(corr_val): correlations.append({ 'Variable 1': corr_matrix.columns[i], 'Variable 2': corr_matrix.columns[j], 'Correlation': corr_val, 'Strength': abs(corr_val) }) if correlations: corr_df = pd.DataFrame(correlations) corr_df = corr_df.sort_values('Strength', ascending=False).head(10) # Display as beautiful cards for _, row in corr_df.head(5).iterrows(): strength = "Strong" if row['Strength'] > 0.7 else "Moderate" if row['Strength'] > 0.5 else "Weak" color = "#ef4444" if row['Strength'] > 0.7 else "#f59e0b" if row['Strength'] > 0.5 else "#10b981" st.markdown(f"""
{row['Variable 1']} ↔ {row['Variable 2']}
Correlation: {row['Correlation']:.3f} ({strength} relationship)
""", unsafe_allow_html=True) else: st.info("🔗 Need at least 2 numeric columns for correlation analysis.") with tab3: st.markdown("#### 📈 Trends & Patterns") date_cols = df.select_dtypes(include=['datetime64']).columns.tolist() cat_cols = df.select_dtypes(include=['object', 'category']).columns.tolist() if len(date_cols) > 0 and len(numeric_cols) > 0: col1, col2 = st.columns(2) with col1: date_col = st.selectbox("Date column", date_cols, key="trend_date") with col2: value_col = st.selectbox("Value column", numeric_cols, key="trend_value") df_sorted = df.sort_values(date_col) fig = px.line( df_sorted, x=date_col, y=value_col, title=f"{value_col} Over Time", color_discrete_sequence=['#3b82f6'] ) fig.update_layout(height=400) st.plotly_chart(fig, use_container_width=True) elif cat_cols and numeric_cols: st.markdown("#### 📊 Category-based Analysis") col1, col2, col3 = st.columns(3) with col1: cat_col = st.selectbox("Category", cat_cols, key="cat_trend") with col2: num_col = st.selectbox("Numeric value", numeric_cols, key="num_trend") with col3: agg_func = st.selectbox("Aggregation", ["mean", "sum", "count", "median"]) if agg_func == "count": grouped = df.groupby(cat_col).size().reset_index(name='count') y_col = 'count' else: grouped = df.groupby(cat_col)[num_col].agg(agg_func).reset_index() y_col = num_col fig = px.bar( grouped, x=cat_col, y=y_col, title=f"{agg_func.title()} of {num_col if agg_func != 'count' else 'Count'} by {cat_col}", color_discrete_sequence=['#8b5cf6'] ) fig.update_layout(height=400) st.plotly_chart(fig, use_container_width=True) else: st.info("📈 Upload data with date columns or categorical data to see trends.") with tab4: st.markdown("#### 🎯 Custom Analysis Builder") col1, col2 = st.columns([1, 2]) with col1: viz_type = st.selectbox( "Chart Type", ["Scatter Plot", "Bar Chart", "Pie Chart", "Sunburst", "Treemap"] ) if viz_type == "Scatter Plot" and len(numeric_cols) >= 2: x_col = st.selectbox("X-axis", numeric_cols, key="custom_x") y_col = st.selectbox("Y-axis", numeric_cols, key="custom_y") color_col = st.selectbox("Color by", ["None"] + list(df.columns), key="custom_color") size_col = st.selectbox("Size by", ["None"] + numeric_cols, key="custom_size") elif viz_type in ["Bar Chart", "Pie Chart"] and cat_cols: cat_col = st.selectbox("Category", cat_cols, key="custom_cat") if numeric_cols: val_col = st.selectbox("Value (optional)", ["Count"] + numeric_cols, key="custom_val") else: val_col = "Count" with col2: try: if viz_type == "Scatter Plot" and len(numeric_cols) >= 2: fig = px.scatter( df, x=x_col, y=y_col, color=None if color_col == "None" else color_col, size=None if size_col == "None" else size_col, title=f"{y_col} vs {x_col}", color_discrete_sequence=['#3b82f6'], hover_data=df.columns[:5].tolist() ) fig.update_layout(height=500) st.plotly_chart(fig, use_container_width=True) elif viz_type == "Pie Chart" and cat_cols: if val_col == "Count": value_counts = df[cat_col].value_counts().head(8) fig = px.pie( values=value_counts.values, names=value_counts.index, title=f"Distribution of {cat_col}" ) else: grouped = df.groupby(cat_col)[val_col].sum().head(8) fig = px.pie( values=grouped.values, names=grouped.index, title=f"{val_col} by {cat_col}" ) fig.update_layout(height=500) st.plotly_chart(fig, use_container_width=True) except Exception as e: st.error(f"Error creating visualization: {str(e)}") # Recommendations Section - Parse and display individually st.markdown("### 🎯 AI-Generated Recommendations") raw_recommendations = results.get('recommendations', []) if raw_recommendations: # Parse recommendations into individual items parsed_recommendations = parse_insights_and_recommendations(raw_recommendations, "recommendation") if parsed_recommendations: st.markdown(f"**{len(parsed_recommendations)} actionable recommendations:**") st.markdown("
", unsafe_allow_html=True) for i, rec in enumerate(parsed_recommendations): st.markdown(f"""
{i+1}
🎯 Recommendation {i+1}:
{rec}
""", unsafe_allow_html=True) else: st.info("🎯 No recommendations could be extracted from the analysis.") else: st.info("🎯 No recommendations were generated.") # Download Results Section st.markdown("### 💾 Download Your Results") col1, col2, col3 = st.columns(3) download_items = [ ("📄", "Analysis Report (JSON)", "Download complete analysis", "json"), ("📊", "Enhanced Dataset (CSV)", "Download processed data", "csv"), ("📋", "Executive Summary (MD)", "Download business report", "md") ] for i, (icon, title, desc, file_type) in enumerate(download_items): with [col1, col2, col3][i]: st.markdown(f"""
{icon}
{title}
{desc}
""", unsafe_allow_html=True) if file_type == "json": data = json.dumps(results, indent=2, default=str) filename = f"analysis_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" mime = "application/json" elif file_type == "csv": data = st.session_state.dataset.to_csv(index=False) filename = f"enhanced_dataset_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" mime = "text/csv" else: # md data = generate_report(results) filename = f"executive_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md" mime = "text/markdown" st.download_button( label=f"Download {file_type.upper()}", data=data, file_name=filename, mime=mime, use_container_width=True ) st.markdown("
", unsafe_allow_html=True) def generate_report(results): """Generate a beautiful markdown report""" filename = getattr(st.session_state, 'uploaded_filename', 'dataset') report = f"""# 🤖 AI Data Analysis Executive Summary **Dataset:** {filename} **Generated:** {datetime.now().strftime('%B %d, %Y at %I:%M %p')} **Powered by:** Llama 3 & LangGraph AI Agents --- ## 📊 Executive Overview This report presents key findings from an AI-powered analysis of your dataset. Our advanced language models have identified patterns, trends, and opportunities that can drive business decisions. ### Dataset Metrics - **Total Records:** {results.get('dataset_info', {}).get('shape', [0])[0]:,} - **Data Points:** {len(results.get('dataset_info', {}).get('columns', []))} - **Data Quality Score:** {max(0, 100 - (sum(results.get('dataset_info', {}).get('null_counts', {}).values()) / max(results.get('dataset_info', {}).get('shape', [1, 1])[0] * results.get('dataset_info', {}).get('shape', [1, 1])[1], 1) * 100)):.0f}% --- ## 💡 Strategic Insights Our AI analysis has uncovered the following key insights: """ insights = results.get('insights', []) parsed_insights = parse_insights_and_recommendations(insights, "insight") for i, insight in enumerate(parsed_insights, 1): report += f"**{i}.** {insight}\n\n" report += """--- ## 🎯 Recommended Actions Based on the data analysis, we recommend the following strategic actions: """ recommendations = results.get('recommendations', []) parsed_recommendations = parse_insights_and_recommendations(recommendations, "recommendation") for i, rec in enumerate(parsed_recommendations, 1): report += f"**{i}.** {rec}\n\n" report += f"""--- ## 🔧 Technical Summary - **Analysis Completed:** {results.get('analysis_timestamp', 'N/A')} - **Visualizations Created:** {len(results.get('visualizations', []))} - **Processing Errors:** {len(results.get('errors', []))} - **AI Model Used:** Llama 3 (70B parameters) --- ## 📈 Next Steps 1. **Review Insights:** Analyze each insight for immediate actionable opportunities 2. **Implement Recommendations:** Prioritize recommendations based on business impact 3. **Monitor Progress:** Track key metrics identified in this analysis 4. **Iterate:** Regular re-analysis as new data becomes available --- *This report was generated automatically by our AI Data Analysis Agent. For questions or support, please contact your data team.* """ return report def main(): """Main application function with beautiful design""" initialize_session_state() # Check if analysis is complete to show results immediately if st.session_state.analysis_complete and st.session_state.analysis_results: display_results() # Add a "Start New Analysis" button st.markdown("---") col1, col2, col3 = st.columns([1, 1, 1]) with col2: if st.button("🔄 Start New Analysis", use_container_width=True): # Reset session state st.session_state.analysis_results = None st.session_state.analysis_complete = False st.session_state.dataset = None st.rerun() return # Hero Section display_hero_section() # Feature showcase display_features() # Sidebar configuration api_configured = sidebar_config() if not api_configured: # Beautiful warning with setup instructions st.markdown("""
🔑

API Key Required

Please configure your Groq API key to unlock the power of AI analysis

""", unsafe_allow_html=True) # Expandable setup guide with st.expander("🚀 Quick Setup Guide", expanded=True): st.markdown(""" ### Option 1: Environment Variable (Recommended) ```bash export GROQ_API_KEY="your_api_key_here" streamlit run web_app.py ``` ### Option 2: Manual Entry 1. Visit [Groq Console](https://console.groq.com/) 🔗 2. Create a free account and generate your API key 3. Enter the key in the sidebar ← 4. Upload your dataset and start analyzing! ### Supported File Formats - **CSV files** (.csv) - Most common format - **Excel files** (.xlsx, .xls) - Spreadsheet data - **JSON files** (.json) - Structured data ### Tips for Best Results - Ensure clean, well-structured data - Include meaningful column names - Mix of numeric and categorical columns works best - Date/time columns enable trend analysis """) return # Main content area with beautiful layout st.markdown("---") # Dataset upload section dataset_uploaded = upload_dataset() # Analysis section if dataset_uploaded: st.markdown("---") # Center the analyze button with beautiful styling col1, col2, col3 = st.columns([1, 2, 1]) with col2: if st.button( "🚀 Analyze My Data with AI", type="primary", use_container_width=True, help="Start the AI-powered analysis of your dataset" ): run_analysis() # Footer st.markdown(""" """, unsafe_allow_html=True) if __name__ == "__main__": main()