Spaces:

saptyfun
/

multiagent

Sleeping

App Files Files Community

saptyfun commited on Jun 17, 2025

Commit

2e8352c

verified ·

1 Parent(s): 1350f34

Upload streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +617 -0

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,617 @@

+#!/usr/bin/env python3
+"""
+Simplified Hugging Face Spaces compatible Multi-Agent System Dashboard
+"""
+import os
+import sys
+import tempfile
+import sqlite3
+from pathlib import Path
+import warnings
+from datetime import datetime, timedelta
+import random
+# Suppress warnings
+warnings.filterwarnings('ignore')
+# Set environment variables for Hugging Face Spaces
+os.environ['STREAMLIT_SERVER_HEADLESS'] = 'true'
+os.environ['STREAMLIT_SERVER_PORT'] = '7860'
+os.environ['STREAMLIT_BROWSER_GATHER_USAGE_STATS'] = 'false'
+# Import streamlit first and set page config
+import streamlit as st
+st.set_page_config(
+    page_title="🤖 Multi-Agent System Dashboard",
+    page_icon="🤖",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Import other required modules
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+import json
+import numpy as np
+from typing import Dict, List, Any
+class SimpleDashboard:
+    def __init__(self):
+        # Use temp directory for database
+        temp_dir = tempfile.gettempdir()
+        self.db_path = os.path.join(temp_dir, "evaluation_logs.db")
+        try:
+            self.setup_demo_data()
+        except Exception as e:
+            st.error(f"Setup error: {str(e)}")
+            self.create_fallback_data()
+    def create_fallback_data(self):
+        """Create fallback data if database fails"""
+        st.warning("Using fallback demo data")
+        # Create sample data directly
+        agents = ["Diet Agent", "Support Agent", "Queries Agent"]
+        data = []
+        for i in range(50):
+            base_score = random.uniform(7.0, 9.5)
+            accuracy = random.uniform(7.0, 9.5)
+            data.append({
+                'id': i,
+                'session_id': f"session_{random.randint(1000, 9999)}",
+                'agent_name': random.choice(agents),
+                'query': f"Sample query {i}",
+                'response': f"Sample response {i} with detailed information and comprehensive guidance...",
+                'overall_score': base_score,
+                'relevance_score': random.uniform(7.0, 9.5),
+                'accuracy_score': accuracy,
+                'completeness_score': random.uniform(7.0, 9.5),
+                'coherence_score': random.uniform(7.0, 9.5),
+                'hallucination_score': max(0, min(10, 10 - accuracy + random.uniform(-1.0, 1.0))),
+                'guardrails_passed': True,
+                'safety_score': random.uniform(8.0, 10.0),
+                'execution_time_ms': random.uniform(500, 2000),
+                'input_tokens': random.randint(20, 100),
+                'output_tokens': random.randint(100, 500),
+                'total_tokens': random.randint(120, 600),
+                'cost_usd': random.uniform(0.001, 0.02),
+                'llm_provider': random.choice(["azure", "openai", "anthropic"]),
+                'model_name': 'gpt-4o',
+                'timestamp': datetime.now() - timedelta(days=random.randint(0, 30))
+            })
+        self.fallback_df = pd.DataFrame(data)
+        self.use_fallback = True
+    def setup_demo_data(self):
+        """Setup demo database"""
+        self.use_fallback = False
+        if not os.path.exists(self.db_path):
+            self.create_demo_database()
+    def create_demo_database(self):
+        """Create demo database"""
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        # Create table
+        cursor.execute('''
+        CREATE TABLE IF NOT EXISTS evaluation_logs (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            session_id TEXT NOT NULL,
+            agent_name TEXT NOT NULL,
+            query TEXT NOT NULL,
+            response TEXT,
+            overall_score REAL,
+            relevance_score REAL,
+            accuracy_score REAL,
+            completeness_score REAL,
+            coherence_score REAL,
+            hallucination_score REAL,
+            guardrails_passed BOOLEAN,
+            safety_score REAL,
+            execution_time_ms REAL,
+            input_tokens INTEGER,
+            output_tokens INTEGER,
+            total_tokens INTEGER,
+            cost_usd REAL,
+            llm_provider TEXT,
+            model_name TEXT,
+            timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+        )
+        ''')
+        # Insert demo data
+        agents = ["Diet Agent", "Support Agent", "Queries Agent"]
+        sample_queries = {
+            "Diet Agent": [
+                "What's a healthy meal plan for weight loss?",
+                "Can you suggest low-carb breakfast options?",
+                "What are the benefits of intermittent fasting?",
+                "How much protein should I eat daily?",
+                "What foods are good for heart health?"
+            ],
+            "Support Agent": [
+                "I'm having trouble sleeping, can you help?",
+                "How do I manage work stress?",
+                "I feel overwhelmed with my tasks",
+                "Can you help me organize my schedule?",
+                "How to improve my productivity?"
+            ],
+            "Queries Agent": [
+                "What are the latest developments in AI?",
+                "How does blockchain technology work?",
+                "What is quantum computing?",
+                "Explain machine learning algorithms",
+                "What are the benefits of cloud computing?"
+            ]
+        }
+        for i in range(100):
+            session_id = f"session_{random.randint(1000, 9999)}"
+            agent = random.choice(agents)
+            query = random.choice(sample_queries[agent])
+            # Generate comprehensive response
+            response_templates = {
+                "Diet Agent": [
+                    "Thank you for your question about nutrition and dietary guidance. I'd be happy to help you develop a healthier relationship with food and create sustainable eating habits.",
+                    "I understand you're looking for dietary advice, and I'm here to provide evidence-based nutritional guidance tailored to your specific needs and goals."
+                ],
+                "Support Agent": [
+                    "I appreciate you reaching out for support. It takes courage to ask for help, and I'm here to provide you with practical strategies and emotional guidance.",
+                    "Thank you for sharing your concerns with me. I understand this can be challenging, and I want to help you work through this step by step with compassion and understanding."
+                ],
+                "Queries Agent": [
+                    "Excellent question! This is a fascinating topic that involves cutting-edge technology and has significant implications for our future. Let me provide you with a comprehensive overview.",
+                    "Thank you for this thought-provoking question. This subject encompasses multiple disciplines and recent innovations. I'll break this down into key concepts and practical applications."
+                ]
+            }
+            base_response = random.choice(response_templates[agent])
+            # Add detailed information
+            if agent == "Diet Agent":
+                details = "**Key Nutritional Recommendations:**\n\n1. **Whole Foods Focus**: Prioritize unprocessed foods like fresh fruits, vegetables, whole grains, lean proteins, and healthy fats.\n\n2. **Portion Control**: Use the plate method - fill half your plate with non-starchy vegetables, one quarter with lean protein, and one quarter with complex carbohydrates.\n\n3. **Hydration**: Aim for 8-10 glasses of water daily to support metabolism and overall health."
+            elif agent == "Support Agent":
+                details = "**Comprehensive Support Strategy:**\n\n**Immediate Coping Techniques:**\n1. **Deep Breathing**: Practice the 4-7-8 technique - inhale for 4 counts, hold for 7, exhale for 8.\n\n2. **Grounding Exercises**: Use the 5-4-3-2-1 method - identify 5 things you can see, 4 you can touch, 3 you can hear, 2 you can smell, and 1 you can taste.\n\n**Long-term Strategies:**\n- Establish a consistent daily routine\n- Practice mindfulness meditation for 10-15 minutes daily"
+            else:  # Queries Agent
+                details = "**Technical Deep Dive:**\n\n**Fundamental Concepts:**\nThis technology represents a convergence of multiple disciplines including computer science, mathematics, engineering, and domain-specific expertise.\n\n**Current Implementation:**\n1. **Healthcare**: AI-powered diagnostic tools and personalized treatment plans\n2. **Finance**: Algorithmic trading and fraud detection\n3. **Transportation**: Autonomous vehicles and traffic optimization"
+            response = f"{base_response}\n\n{details}"
+            # Generate realistic scores
+            base_score = random.uniform(7.0, 9.5)
+            relevance_score = max(0, min(10, base_score + random.uniform(-0.3, 0.3)))
+            accuracy_score = max(0, min(10, base_score + random.uniform(-0.4, 0.2)))
+            completeness_score = max(0, min(10, base_score + random.uniform(-0.5, 0.3)))
+            coherence_score = max(0, min(10, base_score + random.uniform(-0.2, 0.4)))
+            hallucination_score = max(0, min(10, 10 - accuracy_score + random.uniform(-1.0, 1.0)))
+            # Generate token consumption
+            response_length = len(response)
+            input_tokens = int(len(query.split()) * 1.3)
+            output_tokens = int(response_length / 4)
+            total_tokens = input_tokens + output_tokens
+            # Calculate cost
+            llm_provider = random.choice(["azure", "openai", "anthropic"])
+            cost_per_1k = {"azure": 0.03, "openai": 0.03, "anthropic": 0.025}
+            cost_usd = (total_tokens / 1000) * cost_per_1k[llm_provider]
+            timestamp = datetime.now() - timedelta(days=random.randint(0, 30))
+            cursor.execute('''
+            INSERT INTO evaluation_logs (
+                session_id, agent_name, query, response, overall_score,
+                relevance_score, accuracy_score, completeness_score, coherence_score,
+                hallucination_score, guardrails_passed, safety_score, execution_time_ms,
+                input_tokens, output_tokens, total_tokens, cost_usd, llm_provider, model_name, timestamp
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            ''', (
+                session_id, agent, query, response, base_score,
+                relevance_score, accuracy_score, completeness_score, coherence_score,
+                hallucination_score, random.choice([True, True, True, False]),  # 75% pass rate
+                random.uniform(8.0, 10.0), random.uniform(500, 2000),
+                input_tokens, output_tokens, total_tokens, round(cost_usd, 4),
+                llm_provider, "gpt-4o", timestamp.isoformat()
+            ))
+        conn.commit()
+        conn.close()
+    def load_data(self):
+        """Load data"""
+        if self.use_fallback:
+            return self.fallback_df
+        try:
+            conn = sqlite3.connect(self.db_path)
+            df = pd.read_sql_query("SELECT * FROM evaluation_logs ORDER BY timestamp DESC", conn)
+            conn.close()
+            if not df.empty:
+                df['timestamp'] = pd.to_datetime(df['timestamp'])
+            return df
+        except Exception as e:
+            st.error(f"Data loading error: {str(e)}")
+            return pd.DataFrame()
+    def show_overview(self, df):
+        """Show overview tab"""
+        st.header("📈 Executive Summary")
+        if df.empty:
+            st.warning("No data available")
+            return
+        # Key metrics
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            st.metric("Total Evaluations", len(df))
+        with col2:
+            avg_score = df['overall_score'].mean()
+            st.metric("Average Score", f"{avg_score:.2f}/10")
+        with col3:
+            safety_rate = (df['guardrails_passed'].sum() / len(df)) * 100
+            st.metric("Safety Rate", f"{safety_rate:.1f}%")
+        with col4:
+            avg_time = df['execution_time_ms'].mean() / 1000
+            st.metric("Avg Response Time", f"{avg_time:.2f}s")
+        # Charts
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("📊 Performance by Agent")
+            agent_scores = df.groupby('agent_name')['overall_score'].mean().reset_index()
+            fig = px.bar(
+                agent_scores,
+                x='agent_name',
+                y='overall_score',
+                title="Average Score by Agent",
+                color='overall_score',
+                color_continuous_scale='viridis'
+            )
+            st.plotly_chart(fig, use_container_width=True)
+        with col2:
+            st.subheader("📈 Score Distribution")
+            fig = px.histogram(
+                df,
+                x='overall_score',
+                nbins=20,
+                title="Score Distribution",
+                color_discrete_sequence=['#1f77b4']
+            )
+            st.plotly_chart(fig, use_container_width=True)
+    def show_agent_performance(self, df):
+        """Show agent performance tab"""
+        st.header("🤖 Agent Performance Analysis")
+        if df.empty:
+            st.warning("No data available")
+            return
+        # Agent selector
+        agents = df['agent_name'].unique()
+        selected_agent = st.selectbox("Select Agent", ["All Agents"] + list(agents))
+        # Filter data
+        if selected_agent != "All Agents":
+            filtered_df = df[df['agent_name'] == selected_agent]
+        else:
+            filtered_df = df
+        # Performance metrics
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("🎯 Score Breakdown")
+            score_cols = ['relevance_score', 'accuracy_score', 'completeness_score', 'coherence_score']
+            available_scores = [col for col in score_cols if col in filtered_df.columns]
+            if available_scores:
+                avg_scores = filtered_df[available_scores].mean()
+                fig = px.bar(
+                    x=avg_scores.index,
+                    y=avg_scores.values,
+                    title=f"Average Scores - {selected_agent}",
+                    labels={'x': 'Metric', 'y': 'Score'}
+                )
+                st.plotly_chart(fig, use_container_width=True)
+        with col2:
+            st.subheader("⏱️ Response Time Analysis")
+            fig = px.box(
+                filtered_df,
+                x='agent_name',
+                y='execution_time_ms',
+                title="Response Time Distribution"
+            )
+            st.plotly_chart(fig, use_container_width=True)
+        # Recent evaluations table
+        st.subheader("📋 Recent Evaluations")
+        display_cols = ['agent_name', 'query', 'overall_score', 'execution_time_ms', 'timestamp']
+        available_cols = [col for col in display_cols if col in filtered_df.columns]
+        if available_cols:
+            recent_data = filtered_df[available_cols].head(20)
+            st.dataframe(recent_data, use_container_width=True)
+    def show_response_analysis(self, df):
+        """Show response analysis tab"""
+        st.header("📝 Response Analysis & Tracing")
+        if df.empty:
+            st.warning("No data available")
+            return
+        # Response metrics
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            if 'response' in df.columns:
+                avg_length = df['response'].str.len().mean()
+                st.metric("Avg Response Length", f"{avg_length:.0f} chars")
+            else:
+                st.metric("Avg Response Length", "N/A")
+        with col2:
+            if 'response' in df.columns:
+                avg_words = df['response'].str.split().str.len().mean()
+                st.metric("Avg Word Count", f"{avg_words:.0f} words")
+            else:
+                st.metric("Avg Word Count", "N/A")
+        with col3:
+            response_rate = (df['response'].notna().sum() / len(df)) * 100
+            st.metric("Response Rate", f"{response_rate:.1f}%")
+        # Search functionality
+        st.subheader("🔍 Search Responses")
+        search_term = st.text_input("Search in responses:", placeholder="Enter keywords...")
+        if search_term and 'response' in df.columns:
+            mask = df['response'].str.contains(search_term, case=False, na=False)
+            search_results = df[mask]
+        else:
+            search_results = df.head(10)
+        # Display results
+        if not search_results.empty:
+            st.write(f"**Showing {len(search_results)} results**")
+            for idx, row in search_results.iterrows():
+                with st.expander(f"🤖 {row['agent_name']} - Score: {row['overall_score']:.1f}"):
+                    col1, col2 = st.columns([2, 1])
+                    with col1:
+                        st.write("**Query:**")
+                        st.write(row['query'])
+                        if 'response' in row and pd.notna(row['response']):
+                            st.write("**Response:**")
+                            st.write(row['response'])
+                    with col2:
+                        st.write("**Metrics:**")
+                        st.write(f"Overall Score: {row['overall_score']:.1f}/10")
+                        if 'execution_time_ms' in row:
+                            st.write(f"Response Time: {row['execution_time_ms']:.0f}ms")
+                        if 'timestamp' in row:
+                            st.write(f"Timestamp: {row['timestamp']}")
+    def show_workflow_visualization(self, df):
+        """Show workflow visualization tab"""
+        st.header("🔄 Workflow Visualization")
+        if df.empty:
+            st.warning("No data available for workflow visualization.")
+            return
+        # Session selection
+        sessions = df['session_id'].unique()
+        selected_session = st.selectbox("Select Session", sessions, key="workflow_session")
+        # Filter data for selected session
+        session_data = df[df['session_id'] == selected_session]
+        if session_data.empty:
+            st.warning("No data found for selected session.")
+            return
+        # Session metrics overview
+        st.subheader("📈 Session Metrics Overview")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            avg_score = session_data['overall_score'].mean()
+            st.metric("Avg Overall Score", f"{avg_score:.2f}/10")
+        with col2:
+            avg_latency = session_data['execution_time_ms'].mean()
+            st.metric("Avg Response Time", f"{avg_latency:.0f}ms")
+        with col3:
+            if 'hallucination_score' in session_data.columns:
+                avg_hallucination = session_data['hallucination_score'].mean()
+                st.metric("Avg Hallucination", f"{avg_hallucination:.2f}/10")
+            else:
+                st.metric("Avg Hallucination", "N/A")
+        with col4:
+            if 'total_tokens' in session_data.columns:
+                total_tokens = session_data['total_tokens'].sum()
+                total_cost = session_data['cost_usd'].sum() if 'cost_usd' in session_data.columns else 0
+                st.metric("Total Cost", f"${total_cost:.4f}", f"{total_tokens:,} tokens")
+            else:
+                st.metric("Total Cost", "N/A")
+        # Workflow steps
+        st.subheader("🔍 Workflow Steps")
+        for idx, (_, row) in enumerate(session_data.iterrows()):
+            with st.expander(f"Step {idx + 1}: {row['agent_name']} - Score: {row['overall_score']:.2f}/10"):
+                col1, col2 = st.columns([1, 1])
+                with col1:
+                    st.markdown("**Query:**")
+                    st.write(row['query'])
+                    # Performance metrics chart
+                    st.markdown("**Performance Metrics:**")
+                    metrics_data = {
+                        'Overall': row['overall_score'],
+                        'Relevance': row.get('relevance_score', 0),
+                        'Accuracy': row.get('accuracy_score', 0),
+                        'Completeness': row.get('completeness_score', 0),
+                        'Coherence': row.get('coherence_score', 0)
+                    }
+                    if 'hallucination_score' in row:
+                        metrics_data['Hallucination'] = row['hallucination_score']
+                    fig = px.bar(
+                        x=list(metrics_data.keys()),
+                        y=list(metrics_data.values()),
+                        title="Score Breakdown",
+                        labels={'x': 'Metric', 'y': 'Score (0-10)'}
+                    )
+                    fig.update_layout(height=300, showlegend=False)
+                    st.plotly_chart(fig, use_container_width=True)
+                with col2:
+                    st.markdown("**Response:**")
+                    if pd.notna(row['response']):
+                        st.write(row['response'])
+                    else:
+                        st.write("No response available")
+                    # Resource consumption
+                    st.markdown("**Resource Consumption:**")
+                    if 'input_tokens' in row and pd.notna(row['input_tokens']):
+                        token_col1, token_col2 = st.columns(2)
+                        with token_col1:
+                            st.metric("Input Tokens", f"{int(row['input_tokens']):,}")
+                            st.metric("Output Tokens", f"{int(row.get('output_tokens', 0)):,}")
+                        with token_col2:
+                            st.metric("Total Tokens", f"{int(row.get('total_tokens', 0)):,}")
+                            st.metric("Cost", f"${row.get('cost_usd', 0):.4f}")
+                    # Execution details
+                    st.markdown("**Execution Details:**")
+                    st.write(f"⏱️ **Execution Time:** {row['execution_time_ms']:.0f}ms")
+                    if 'llm_provider' in row:
+                        st.write(f"🤖 **LLM Provider:** {row['llm_provider']}")
+                    if 'model_name' in row:
+                        st.write(f"🧠 **Model:** {row['model_name']}")
+                    st.write(f"🛡️ **Safety Passed:** {'✅' if row['guardrails_passed'] else '❌'}")
+        # Session summary
+        st.subheader("📋 Session Summary")
+        summary_col1, summary_col2, summary_col3 = st.columns(3)
+        with summary_col1:
+            st.markdown("**Quality Metrics:**")
+            st.write(f"• Average Overall Score: {session_data['overall_score'].mean():.2f}/10")
+            best_step = session_data.loc[session_data['overall_score'].idxmax()]
+            st.write(f"• Best Performing Step: {best_step['agent_name']}")
+            st.write(f"• Consistency (Std Dev): {session_data['overall_score'].std():.2f}")
+        with summary_col2:
+            st.markdown("**Performance Metrics:**")
+            st.write(f"• Total Execution Time: {session_data['execution_time_ms'].sum():.0f}ms")
+            st.write(f"• Average Response Time: {session_data['execution_time_ms'].mean():.0f}ms")
+            st.write(f"• Fastest Step: {session_data['execution_time_ms'].min():.0f}ms")
+        with summary_col3:
+            st.markdown("**Resource Usage:**")
+            if 'total_tokens' in session_data.columns:
+                st.write(f"• Total Tokens Used: {session_data['total_tokens'].sum():,}")
+                if 'cost_usd' in session_data.columns:
+                    st.write(f"• Total Cost: ${session_data['cost_usd'].sum():.4f}")
+                    st.write(f"• Avg Cost per Query: ${session_data['cost_usd'].mean():.4f}")
+            else:
+                st.write("• Token data not available")
+        # Export functionality
+        st.subheader("📤 Export Workflow Data")
+        if st.button("Export Session Data to CSV", key="export_workflow"):
+            csv_data = session_data.to_csv(index=False)
+            st.download_button(
+                label="Download CSV",
+                data=csv_data,
+                file_name=f"workflow_session_{selected_session}.csv",
+                mime="text/csv"
+            )
+    def run(self):
+        """Run the dashboard"""
+        st.title("🤖 Multi-Agent System Dashboard")
+        st.markdown("---")
+        st.info("🎉 **Welcome!** This dashboard showcases evaluation metrics for Diet, Support, and Queries agents.")
+        # Load data
+        df = self.load_data()
+        # Create tabs
+        tab1, tab2, tab3, tab4 = st.tabs([
+            "📈 Overview",
+            "🤖 Agent Performance",
+            "📝 Response Analysis",
+            "🔄 Workflow Visualization"
+        ])
+        with tab1:
+            self.show_overview(df)
+        with tab2:
+            self.show_agent_performance(df)
+        with tab3:
+            self.show_response_analysis(df)
+        with tab4:
+            self.show_workflow_visualization(df)
+        # Footer
+        st.markdown("---")
+        st.markdown("🚀 **Multi-Agent System Dashboard** | Built with Streamlit & Plotly")
+# Run the dashboard
+try:
+    dashboard = SimpleDashboard()
+    dashboard.run()
+except Exception as e:
+    st.error(f"Application Error: {str(e)}")
+    st.info("Please refresh the page.")
+    with st.expander("Debug Information"):
+        st.code(f"""
+Error: {str(e)}
+Type: {type(e).__name__}
+Python: {sys.version}
+Working Dir: {os.getcwd()}
+Temp Dir: {tempfile.gettempdir()}
+        """)