Spaces:

saptyfun
/

multiagent

Sleeping

App Files Files Community

saptyfun commited on Jun 17, 2025

Commit

1cc7ca7

verified ·

1 Parent(s): ea8cbdd

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +689 -34

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,695 @@
-import altair as alt
-import numpy as np
-import pandas as pd
-import streamlit as st
 """
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
 """
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

+#!/usr/bin/env python3
 """
+Multi-Agent System Dashboard - Hugging Face Spaces Demo
 """
+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import sqlite3
+from datetime import datetime, timedelta
+import json
+import numpy as np
+from typing import Dict, List, Any, Optional
+import os
+from pathlib import Path
+# Set page config first
+st.set_page_config(
+    page_title="🤖 Multi-Agent System Dashboard",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+class HuggingFaceDashboard:
+    def __init__(self):
+        self.db_path = "evaluation_logs.db"
+        self.setup_demo_data()
+    def setup_demo_data(self):
+        """Setup demo data if database doesn't exist or is empty"""
+        if not os.path.exists(self.db_path):
+            self.create_demo_database()
+        else:
+            # Check if database has data
+            try:
+                conn = sqlite3.connect(self.db_path)
+                cursor = conn.cursor()
+                cursor.execute("SELECT COUNT(*) FROM evaluation_logs")
+                count = cursor.fetchone()[0]
+                conn.close()
+                # If database is empty or has very little data, recreate it
+                if count < 50:
+                    os.remove(self.db_path)
+                    self.create_demo_database()
+            except:
+                # If there's any error reading the database, recreate it
+                if os.path.exists(self.db_path):
+                    os.remove(self.db_path)
+                self.create_demo_database()
+    def create_demo_database(self):
+        """Create a demo database with sample data"""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        # Create evaluation_logs table
+        cursor.execute('''
+        CREATE TABLE IF NOT EXISTS evaluation_logs (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            session_id TEXT NOT NULL,
+            agent_name TEXT NOT NULL,
+            query TEXT NOT NULL,
+            response TEXT,
+            overall_score REAL,
+            relevance_score REAL,
+            accuracy_score REAL,
+            completeness_score REAL,
+            coherence_score REAL,
+            guardrails_passed BOOLEAN,
+            safety_score REAL,
+            execution_time_ms REAL,
+            error_occurred BOOLEAN DEFAULT FALSE,
+            llm_provider TEXT,
+            model_name TEXT,
+            judge_reasoning TEXT,
+            guardrails_failures TEXT DEFAULT '[]',
+            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+        )
+        ''')
+        # Create workflow_traces table
+        cursor.execute('''
+        CREATE TABLE IF NOT EXISTS workflow_traces (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            session_id TEXT NOT NULL,
+            step_name TEXT NOT NULL,
+            agent_name TEXT,
+            step_type TEXT,
+            input_data TEXT,
+            output_data TEXT,
+            execution_time_ms REAL,
+            error_occurred BOOLEAN DEFAULT FALSE,
+            error_details TEXT,
+            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+        )
+        ''')
+        # Insert demo data
+        self.insert_demo_data(cursor)
+        conn.commit()
+        conn.close()
+    def insert_demo_data(self, cursor):
+        """Insert comprehensive demo data"""
+        import random
+        from datetime import datetime, timedelta
+        agents = ["Diet Agent", "Support Agent", "Queries Agent"]
+        # Comprehensive sample queries for each agent
+        sample_queries = {
+            "Diet Agent": [
+                "What's a healthy meal plan for weight loss?",
+                "Can you suggest low-carb breakfast options?",
+                "What are the benefits of intermittent fasting?",
+                "How much protein should I eat daily?",
+                "What foods are good for heart health?",
+                "Can you create a vegetarian meal plan?",
+                "What snacks are good for diabetics?",
+                "How to meal prep for the week?",
+                "What are superfoods I should include?",
+                "How to calculate my daily calorie needs?",
+                "What's the Mediterranean diet about?",
+                "Are supplements necessary for nutrition?",
+                "How to eat healthy on a budget?",
+                "What foods help with inflammation?",
+                "Can you suggest post-workout meals?",
+                "What's a balanced breakfast for energy?",
+                "How to reduce sugar in my diet?",
+                "What are healthy cooking methods?",
+                "Can you help with portion control?",
+                "What foods boost metabolism?"
+            ],
+            "Support Agent": [
+                "I'm having trouble sleeping, can you help?",
+                "How do I manage work stress?",
+                "I feel overwhelmed with my tasks",
+                "Can you help me organize my schedule?",
+                "I'm having difficulty focusing",
+                "How to improve my productivity?",
+                "I need help with time management",
+                "How to deal with anxiety?",
+                "Can you suggest relaxation techniques?",
+                "I'm feeling burned out at work",
+                "How to maintain work-life balance?",
+                "I need motivation to exercise",
+                "How to build better habits?",
+                "I'm struggling with procrastination",
+                "Can you help me set goals?",
+                "How to handle difficult conversations?",
+                "I need help with decision making",
+                "How to boost my confidence?",
+                "Can you help me manage emotions?",
+                "What are good stress relief activities?"
+            ],
+            "Queries Agent": [
+                "What are the latest developments in AI?",
+                "How does blockchain technology work?",
+                "What is quantum computing?",
+                "Explain machine learning algorithms",
+                "What are the benefits of cloud computing?",
+                "How does renewable energy work?",
+                "What is the future of electric vehicles?",
+                "Explain cryptocurrency and Bitcoin",
+                "What is cybersecurity and why is it important?",
+                "How do neural networks function?",
+                "What are the applications of IoT?",
+                "Explain data science and analytics",
+                "What is edge computing?",
+                "How does 5G technology work?",
+                "What are the trends in biotechnology?",
+                "How does virtual reality work?",
+                "What is artificial general intelligence?",
+                "Explain the metaverse concept",
+                "What are smart contracts?",
+                "How does automation impact jobs?"
+            ]
+        }
+        # Generate comprehensive demo data
+        total_evaluations = 300  # Increased for better demo
+        for i in range(total_evaluations):
+            agent = random.choice(agents)
+            query = random.choice(sample_queries[agent])
+            # Add query variations for realism
+            if random.random() < 0.3:  # 30% chance to modify query
+                variations = [
+                    f"Can you please {query.lower()}",
+                    f"I need help with: {query.lower()}",
+                    f"Could you explain {query.lower()}",
+                    f"What's your advice on {query.lower()}"
+                ]
+                query = random.choice(variations)
+            # Generate realistic scores with agent-specific tendencies
+            if agent == "Diet Agent":
+                base_score = random.uniform(7.5, 9.2)  # Diet agent performs well
+            elif agent == "Support Agent":
+                base_score = random.uniform(7.8, 9.5)  # Support agent is consistent
+            else:  # Queries Agent
+                base_score = random.uniform(6.8, 8.8)  # More variable for complex queries
+            # Create realistic timestamp distribution
+            if i < 50:  # Recent data (last 3 days)
+                days_ago = random.randint(0, 2)
+            elif i < 150:  # Medium recent (last 2 weeks)
+                days_ago = random.randint(3, 14)
+            else:  # Historical (last 30 days)
+                days_ago = random.randint(15, 29)
+            hours_ago = random.randint(0, 23)
+            minutes_ago = random.randint(0, 59)
+            timestamp = datetime.now() - timedelta(days=days_ago, hours=hours_ago, minutes=minutes_ago)
+            # Generate realistic response
+            response_templates = {
+                "Diet Agent": [
+                    f"Based on your query about {query[:30]}..., I recommend focusing on balanced nutrition with emphasis on whole foods, proper portion sizes, and regular meal timing.",
+                    f"For your question regarding {query[:30]}..., here's a comprehensive approach that considers your nutritional needs and health goals.",
+                    f"Addressing your concern about {query[:30]}..., let me provide evidence-based dietary guidance tailored to your situation."
+                ],
+                "Support Agent": [
+                    f"I understand you're dealing with {query[:30]}... This is a common challenge, and I'm here to help you work through it step by step.",
+                    f"Thank you for sharing your concern about {query[:30]}... Let's explore some practical strategies that can make a real difference.",
+                    f"Your question about {query[:30]}... resonates with many people. Here are some effective approaches you can try."
+                ],
+                "Queries Agent": [
+                    f"Great question about {query[:30]}... This is a complex topic that involves several key concepts and recent developments.",
+                    f"To answer your query about {query[:30]}..., let me break this down into the fundamental principles and current applications.",
+                    f"Your question regarding {query[:30]}... touches on important technological and societal implications. Here's a comprehensive overview."
+                ]
+            }
+            response = random.choice(response_templates[agent])
+            # Generate correlated scores (realistic relationships)
+            relevance_score = max(0, min(10, base_score + random.uniform(-0.3, 0.3)))
+            accuracy_score = max(0, min(10, base_score + random.uniform(-0.4, 0.2)))
+            completeness_score = max(0, min(10, base_score + random.uniform(-0.5, 0.3)))
+            coherence_score = max(0, min(10, base_score + random.uniform(-0.2, 0.4)))
+            # Realistic safety scenarios
+            safety_pass_rate = 0.95  # 95% pass rate
+            if random.random() < 0.02:  # 2% chance of safety issues
+                guardrails_passed = False
+                safety_score = random.uniform(3.0, 6.0)
+                guardrails_failures = '["content_safety", "inappropriate_advice"]'
+            else:
+                guardrails_passed = True
+                safety_score = random.uniform(8.5, 10.0)
+                guardrails_failures = "[]"
+            # Realistic execution times (with some variation)
+            if agent == "Diet Agent":
+                execution_time = random.uniform(1500, 4000)  # Moderate complexity
+            elif agent == "Support Agent":
+                execution_time = random.uniform(2000, 5000)  # More thoughtful responses
+            else:  # Queries Agent
+                execution_time = random.uniform(2500, 6000)  # Complex information retrieval
+            eval_data = (
+                f"demo_session_{i // 4 + 1}",  # session_id (4 queries per session)
+                agent,  # agent_name
+                query,  # query
+                response,  # response
+                base_score,  # overall_score
+                relevance_score,  # relevance_score
+                accuracy_score,  # accuracy_score
+                completeness_score,  # completeness_score
+                coherence_score,  # coherence_score
+                guardrails_passed,  # guardrails_passed
+                safety_score,  # safety_score
+                execution_time,  # execution_time_ms
+                False,  # error_occurred
+                "azure",  # llm_provider
+                "gpt-4o",  # model_name
+                f"Comprehensive evaluation for {agent}: The response demonstrates good understanding of the query with appropriate depth and accuracy. Score breakdown reflects the quality across multiple dimensions.",  # judge_reasoning
+                guardrails_failures,  # guardrails_failures
+                timestamp.isoformat()  # timestamp
+            )
+            cursor.execute('''
+            INSERT INTO evaluation_logs (
+                session_id, agent_name, query, response, overall_score,
+                relevance_score, accuracy_score, completeness_score, coherence_score,
+                guardrails_passed, safety_score, execution_time_ms, error_occurred,
+                llm_provider, model_name, judge_reasoning, guardrails_failures, timestamp
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            ''', eval_data)
+    def safe_column_access(self, df: pd.DataFrame, column: str, default_value=None):
+        """Safely access DataFrame columns"""
+        try:
+            if column in df.columns:
+                return df[column]
+            else:
+                return pd.Series([default_value] * len(df), index=df.index)
+        except Exception:
+            return pd.Series([default_value] * len(df) if len(df) > 0 else [])
+    def load_data(self, date_filter: tuple = None, agent_filter: List[str] = None, session_filter: str = None) -> Dict[str, pd.DataFrame]:
+        """Load and filter data from database"""
+        try:
+            conn = sqlite3.connect(self.db_path)
+            # Base queries
+            eval_query = "SELECT * FROM evaluation_logs"
+            trace_query = "SELECT * FROM workflow_traces"
+            # Apply filters
+            conditions = []
+            params = []
+            if date_filter:
+                conditions.append("timestamp BETWEEN ? AND ?")
+                params.extend([date_filter[0].strftime('%Y-%m-%d'), date_filter[1].strftime('%Y-%m-%d')])
+            if agent_filter:
+                placeholders = ','.join(['?' for _ in agent_filter])
+                conditions.append(f"agent_name IN ({placeholders})")
+                params.extend(agent_filter)
+            if session_filter:
+                conditions.append("session_id LIKE ?")
+                params.append(f"%{session_filter}%")
+            if conditions:
+                eval_query += " WHERE " + " AND ".join(conditions)
+                trace_query += " WHERE " + " AND ".join(conditions)
+            # Load data
+            evaluations = pd.read_sql_query(eval_query, conn, params=params)
+            traces = pd.read_sql_query(trace_query, conn, params=params)
+            conn.close()
+            # Convert timestamp columns
+            if not evaluations.empty:
+                evaluations['timestamp'] = pd.to_datetime(evaluations['timestamp'])
+            if not traces.empty:
+                traces['timestamp'] = pd.to_datetime(traces['timestamp'])
+            return {
+                'evaluations': evaluations,
+                'traces': traces
+            }
+        except Exception as e:
+            st.error(f"Error loading data: {str(e)}")
+            return {'evaluations': pd.DataFrame(), 'traces': pd.DataFrame()}
+    def create_sidebar_filters(self, data: Dict[str, pd.DataFrame]) -> Dict[str, Any]:
+        """Create sidebar filters"""
+        st.sidebar.header("🔍 Filters")
+        filters = {}
+        # Date range filter
+        if not data['evaluations'].empty:
+            min_date = data['evaluations']['timestamp'].min().date()
+            max_date = data['evaluations']['timestamp'].max().date()
+            filters['date_range'] = st.sidebar.date_input(
+                "📅 Date Range",
+                value=(min_date, max_date),
+                min_value=min_date,
+                max_value=max_date
+            )
+        # Agent filter
+        if not data['evaluations'].empty:
+            agents = data['evaluations']['agent_name'].unique().tolist()
+            filters['agents'] = st.sidebar.multiselect(
+                "🤖 Agents",
+                options=agents,
+                default=agents
+            )
+        # Session filter
+        filters['session'] = st.sidebar.text_input(
+            "🔍 Session ID (partial match)",
+            placeholder="Enter session ID..."
+        )
+        # Score range filter
+        filters['score_range'] = st.sidebar.slider(
+            "📊 Score Range",
+            min_value=0.0,
+            max_value=10.0,
+            value=(0.0, 10.0),
+            step=0.1
+        )
+        # Safety filter
+        filters['safety_only'] = st.sidebar.checkbox(
+            "🛡️ Show only safe responses",
+            value=False
+        )
+        return filters
+    def show_executive_summary(self, data: Dict[str, pd.DataFrame]):
+        """Show executive summary with key metrics"""
+        st.header("📈 Executive Summary")
+        if data['evaluations'].empty:
+            st.warning("No evaluation data available")
+            return
+        df = data['evaluations']
+        # Key metrics
+        col1, col2, col3, col4, col5 = st.columns(5)
+        with col1:
+            total_evals = len(df)
+            st.metric("Total Evaluations", f"{total_evals:,}")
+        with col2:
+            avg_score = self.safe_column_access(df, 'overall_score', 0).mean()
+            st.metric("Average Score", f"{avg_score:.2f}/10")
+        with col3:
+            safety_rate = (self.safe_column_access(df, 'guardrails_passed', True).sum() / len(df)) * 100
+            st.metric("Safety Pass Rate", f"{safety_rate:.1f}%")
+        with col4:
+            avg_time = self.safe_column_access(df, 'execution_time_ms', 0).mean() / 1000
+            st.metric("Avg Response Time", f"{avg_time:.2f}s")
+        with col5:
+            unique_sessions = df['session_id'].nunique()
+            st.metric("Unique Sessions", f"{unique_sessions:,}")
+        # Performance trends
+        st.subheader("📊 Performance Trends")
+        # Daily performance trend
+        df_daily = df.groupby(df['timestamp'].dt.date).agg({
+            'overall_score': 'mean',
+            'execution_time_ms': 'mean',
+            'guardrails_passed': lambda x: (x.sum() / len(x)) * 100
+        }).reset_index()
+        fig = make_subplots(
+            rows=2, cols=2,
+            subplot_titles=('Daily Average Score', 'Daily Response Time', 'Daily Safety Rate', 'Score Distribution'),
+            specs=[[{"secondary_y": False}, {"secondary_y": False}],
+                   [{"secondary_y": False}, {"secondary_y": False}]]
+        )
+        # Score trend
+        fig.add_trace(
+            go.Scatter(x=df_daily['timestamp'], y=df_daily['overall_score'],
+                      mode='lines+markers', name='Score', line=dict(color='#1f77b4')),
+            row=1, col=1
+        )
+        # Response time trend
+        fig.add_trace(
+            go.Scatter(x=df_daily['timestamp'], y=df_daily['execution_time_ms']/1000,
+                      mode='lines+markers', name='Response Time', line=dict(color='#ff7f0e')),
+            row=1, col=2
+        )
+        # Safety rate trend
+        fig.add_trace(
+            go.Scatter(x=df_daily['timestamp'], y=df_daily['guardrails_passed'],
+                      mode='lines+markers', name='Safety Rate', line=dict(color='#2ca02c')),
+            row=2, col=1
+        )
+        # Score distribution
+        fig.add_trace(
+            go.Histogram(x=self.safe_column_access(df, 'overall_score', 0),
+                        nbinsx=20, name='Score Distribution', marker_color='#d62728'),
+            row=2, col=2
+        )
+        fig.update_layout(height=600, showlegend=False, title_text="Performance Analytics")
+        st.plotly_chart(fig, use_container_width=True)
+    def show_agent_performance(self, data: Dict[str, pd.DataFrame]):
+        """Show detailed agent performance analysis"""
+        st.header("🤖 Agent Performance Analysis")
+        if data['evaluations'].empty:
+            st.warning("No evaluation data available")
+            return
+        df = data['evaluations']
+        # Agent comparison
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("📊 Agent Score Comparison")
+            agent_scores = df.groupby('agent_name').agg({
+                'overall_score': ['mean', 'std', 'count'],
+                'relevance_score': 'mean',
+                'accuracy_score': 'mean',
+                'completeness_score': 'mean',
+                'coherence_score': 'mean'
+            }).round(2)
+            # Flatten column names
+            agent_scores.columns = ['_'.join(col).strip() for col in agent_scores.columns]
+            fig = px.bar(
+                x=agent_scores.index,
+                y=agent_scores['overall_score_mean'],
+                error_y=agent_scores['overall_score_std'],
+                title="Average Score by Agent",
+                labels={'x': 'Agent', 'y': 'Average Score'}
+            )
+            fig.update_layout(showlegend=False)
+            st.plotly_chart(fig, use_container_width=True)
+        with col2:
+            st.subheader("⚡ Response Time Analysis")
+            agent_times = df.groupby('agent_name')['execution_time_ms'].agg(['mean', 'std']).reset_index()
+            agent_times['mean'] = agent_times['mean'] / 1000  # Convert to seconds
+            agent_times['std'] = agent_times['std'] / 1000
+            fig = px.bar(
+                agent_times,
+                x='agent_name',
+                y='mean',
+                error_y='std',
+                title="Average Response Time by Agent",
+                labels={'agent_name': 'Agent', 'mean': 'Response Time (seconds)'}
+            )
+            st.plotly_chart(fig, use_container_width=True)
+        # Detailed score breakdown
+        st.subheader("🎯 Detailed Score Breakdown")
+        score_columns = ['relevance_score', 'accuracy_score', 'completeness_score', 'coherence_score']
+        available_scores = [col for col in score_columns if col in df.columns]
+        if available_scores:
+            agent_detailed = df.groupby('agent_name')[available_scores].mean().reset_index()
+            fig = go.Figure()
+            for agent in agent_detailed['agent_name'].unique():
+                agent_data = agent_detailed[agent_detailed['agent_name'] == agent]
+                fig.add_trace(go.Scatterpolar(
+                    r=[agent_data[col].iloc[0] for col in available_scores],
+                    theta=[col.replace('_score', '').title() for col in available_scores],
+                    fill='toself',
+                    name=agent
+                ))
+            fig.update_layout(
+                polar=dict(
+                    radialaxis=dict(visible=True, range=[0, 10])
+                ),
+                showlegend=True,
+                title="Agent Performance Radar Chart"
+            )
+            st.plotly_chart(fig, use_container_width=True)
+    def show_safety_analysis(self, data: Dict[str, pd.DataFrame]):
+        """Show safety and guardrails analysis"""
+        st.header("🛡️ Safety & Guardrails Analysis")
+        if data['evaluations'].empty:
+            st.warning("No evaluation data available")
+            return
+        df = data['evaluations']
+        # Safety metrics
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            total_checks = len(df)
+            passed_checks = self.safe_column_access(df, 'guardrails_passed', True).sum()
+            safety_rate = (passed_checks / total_checks) * 100 if total_checks > 0 else 0
+            st.metric("Overall Safety Rate", f"{safety_rate:.1f}%", f"{passed_checks}/{total_checks}")
+        with col2:
+            avg_safety_score = self.safe_column_access(df, 'safety_score', 10).mean()
+            st.metric("Average Safety Score", f"{avg_safety_score:.2f}/10")
+        with col3:
+            failed_checks = total_checks - passed_checks
+            st.metric("Failed Checks", f"{failed_checks:,}")
+        # Safety by agent
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("🤖 Safety Rate by Agent")
+            safety_by_agent = df.groupby('agent_name').agg({
+                'guardrails_passed': lambda x: (x.sum() / len(x)) * 100
+            }).reset_index()
+            fig = px.bar(
+                safety_by_agent,
+                x='agent_name',
+                y='guardrails_passed',
+                title="Safety Pass Rate by Agent",
+                labels={'agent_name': 'Agent', 'guardrails_passed': 'Safety Rate (%)'},
+                color='guardrails_passed',
+                color_continuous_scale='RdYlGn'
+            )
+            fig.update_layout(showlegend=False)
+            st.plotly_chart(fig, use_container_width=True)
+        with col2:
+            st.subheader("📅 Safety Trends Over Time")
+            df_daily_safety = df.groupby(df['timestamp'].dt.date).agg({
+                'guardrails_passed': lambda x: (x.sum() / len(x)) * 100
+            }).reset_index()
+            fig = px.line(
+                df_daily_safety,
+                x='timestamp',
+                y='guardrails_passed',
+                title="Daily Safety Rate Trend",
+                labels={'timestamp': 'Date', 'guardrails_passed': 'Safety Rate (%)'}
+            )
+            fig.add_hline(y=95, line_dash="dash", line_color="red",
+                         annotation_text="95% Target")
+            st.plotly_chart(fig, use_container_width=True)
+    def run(self):
+        """Run the dashboard"""
+        st.title("🤖 Multi-Agent System Dashboard - Demo")
+        st.markdown("---")
+        # Demo info
+        st.info("🎉 **Welcome to the Multi-Agent System Dashboard Demo!** This showcases a comprehensive evaluation system with LLM judge scoring, safety guardrails, and advanced analytics for Diet, Support, and Queries agents.")
+        # Load initial data
+        initial_data = self.load_data()
+        # Create filters
+        filters = self.create_sidebar_filters(initial_data)
+        # Apply filters and reload data
+        filtered_data = self.load_data(
+            date_filter=filters.get('date_range'),
+            agent_filter=filters.get('agents'),
+            session_filter=filters.get('session')
+        )
+        # Apply additional filters
+        if not filtered_data['evaluations'].empty:
+            df = filtered_data['evaluations']
+            # Score range filter
+            if 'score_range' in filters:
+                score_min, score_max = filters['score_range']
+                df = df[(df['overall_score'] >= score_min) & (df['overall_score'] <= score_max)]
+            # Safety filter
+            if filters.get('safety_only', False):
+                df = df[df['guardrails_passed'] == True]
+            filtered_data['evaluations'] = df
+        # Create tabs
+        tab1, tab2, tab3 = st.tabs([
+            "📈 Executive Summary",
+            "🤖 Agent Performance",
+            "🛡️ Safety Analysis"
+        ])
+        with tab1:
+            self.show_executive_summary(filtered_data)
+        with tab2:
+            self.show_agent_performance(filtered_data)
+        with tab3:
+            self.show_safety_analysis(filtered_data)
+        # Footer
+        st.markdown("---")
+        st.markdown("🚀 **Multi-Agent System Dashboard** | Built with Streamlit & Plotly | Demo hosted on Hugging Face Spaces")
+if __name__ == "__main__":
+    dashboard = HuggingFaceDashboard()
+    dashboard.run()