Spaces:

saptyfun
/

multiagent

Sleeping

App Files Files Community

saptyfun commited on Jun 17, 2025

Commit

d053b0b

verified ·

1 Parent(s): 2e8352c

Upload streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +226 -17

src/streamlit_app.py CHANGED Viewed

@@ -58,20 +58,29 @@ class SimpleDashboard:
         data = []
         for i in range(50):
             data.append({
                 'id': i,
                 'session_id': f"session_{random.randint(1000, 9999)}",
                 'agent_name': random.choice(agents),
                 'query': f"Sample query {i}",
-                'response': f"Sample response {i} with detailed information...",
-                'overall_score': random.uniform(7.0, 9.5),
                 'relevance_score': random.uniform(7.0, 9.5),
-                'accuracy_score': random.uniform(7.0, 9.5),
                 'completeness_score': random.uniform(7.0, 9.5),
                 'coherence_score': random.uniform(7.0, 9.5),
                 'guardrails_passed': True,
                 'safety_score': random.uniform(8.0, 10.0),
                 'execution_time_ms': random.uniform(500, 2000),
                 'timestamp': datetime.now() - timedelta(days=random.randint(0, 30))
             })
@@ -103,9 +112,16 @@ class SimpleDashboard:
             accuracy_score REAL,
             completeness_score REAL,
             coherence_score REAL,
             guardrails_passed BOOLEAN,
             safety_score REAL,
             execution_time_ms REAL,
             timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
         )
         ''')
@@ -142,11 +158,52 @@ class SimpleDashboard:
             agent = random.choice(agents)
             query = random.choice(sample_queries[agent])
-            # Generate detailed response
-            response = f"Based on your query about {query[:30]}..., here's a comprehensive response with detailed information and actionable recommendations."
             # Generate realistic scores
             base_score = random.uniform(7.0, 9.5)
             timestamp = datetime.now() - timedelta(days=random.randint(0, 30))
@@ -154,18 +211,16 @@ class SimpleDashboard:
             INSERT INTO evaluation_logs (
                 session_id, agent_name, query, response, overall_score,
                 relevance_score, accuracy_score, completeness_score, coherence_score,
-                guardrails_passed, safety_score, execution_time_ms, timestamp
-            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
             ''', (
                 session_id, agent, query, response, base_score,
-                base_score + random.uniform(-0.3, 0.3),
-                base_score + random.uniform(-0.4, 0.2),
-                base_score + random.uniform(-0.5, 0.3),
-                base_score + random.uniform(-0.2, 0.4),
-                random.choice([True, True, True, False]),  # 75% pass rate
-                random.uniform(8.0, 10.0),
-                random.uniform(500, 2000),
-                timestamp.isoformat()
             ))
         conn.commit()
@@ -360,6 +415,156 @@ class SimpleDashboard:
                         if 'timestamp' in row:
                             st.write(f"Timestamp: {row['timestamp']}")
     def run(self):
         """Run the dashboard"""
         st.title("🤖 Multi-Agent System Dashboard")
@@ -371,10 +576,11 @@ class SimpleDashboard:
         df = self.load_data()
         # Create tabs
-        tab1, tab2, tab3 = st.tabs([
             "📈 Overview",
             "🤖 Agent Performance",
-            "📝 Response Analysis"
         ])
         with tab1:
@@ -386,6 +592,9 @@ class SimpleDashboard:
         with tab3:
             self.show_response_analysis(df)
         # Footer
         st.markdown("---")
         st.markdown("🚀 **Multi-Agent System Dashboard** | Built with Streamlit & Plotly")

         data = []
         for i in range(50):
+            base_score = random.uniform(7.0, 9.5)
+            accuracy = random.uniform(7.0, 9.5)
             data.append({
                 'id': i,
                 'session_id': f"session_{random.randint(1000, 9999)}",
                 'agent_name': random.choice(agents),
                 'query': f"Sample query {i}",
+                'response': f"Sample response {i} with detailed information and comprehensive guidance...",
+                'overall_score': base_score,
                 'relevance_score': random.uniform(7.0, 9.5),
+                'accuracy_score': accuracy,
                 'completeness_score': random.uniform(7.0, 9.5),
                 'coherence_score': random.uniform(7.0, 9.5),
+                'hallucination_score': max(0, min(10, 10 - accuracy + random.uniform(-1.0, 1.0))),
                 'guardrails_passed': True,
                 'safety_score': random.uniform(8.0, 10.0),
                 'execution_time_ms': random.uniform(500, 2000),
+                'input_tokens': random.randint(20, 100),
+                'output_tokens': random.randint(100, 500),
+                'total_tokens': random.randint(120, 600),
+                'cost_usd': random.uniform(0.001, 0.02),
+                'llm_provider': random.choice(["azure", "openai", "anthropic"]),
+                'model_name': 'gpt-4o',
                 'timestamp': datetime.now() - timedelta(days=random.randint(0, 30))
             })
             accuracy_score REAL,
             completeness_score REAL,
             coherence_score REAL,
+            hallucination_score REAL,
             guardrails_passed BOOLEAN,
             safety_score REAL,
             execution_time_ms REAL,
+            input_tokens INTEGER,
+            output_tokens INTEGER,
+            total_tokens INTEGER,
+            cost_usd REAL,
+            llm_provider TEXT,
+            model_name TEXT,
             timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
         )
         ''')
             agent = random.choice(agents)
             query = random.choice(sample_queries[agent])
+            # Generate comprehensive response
+            response_templates = {
+                "Diet Agent": [
+                    "Thank you for your question about nutrition and dietary guidance. I'd be happy to help you develop a healthier relationship with food and create sustainable eating habits.",
+                    "I understand you're looking for dietary advice, and I'm here to provide evidence-based nutritional guidance tailored to your specific needs and goals."
+                ],
+                "Support Agent": [
+                    "I appreciate you reaching out for support. It takes courage to ask for help, and I'm here to provide you with practical strategies and emotional guidance.",
+                    "Thank you for sharing your concerns with me. I understand this can be challenging, and I want to help you work through this step by step with compassion and understanding."
+                ],
+                "Queries Agent": [
+                    "Excellent question! This is a fascinating topic that involves cutting-edge technology and has significant implications for our future. Let me provide you with a comprehensive overview.",
+                    "Thank you for this thought-provoking question. This subject encompasses multiple disciplines and recent innovations. I'll break this down into key concepts and practical applications."
+                ]
+            }
+            base_response = random.choice(response_templates[agent])
+            # Add detailed information
+            if agent == "Diet Agent":
+                details = "**Key Nutritional Recommendations:**\n\n1. **Whole Foods Focus**: Prioritize unprocessed foods like fresh fruits, vegetables, whole grains, lean proteins, and healthy fats.\n\n2. **Portion Control**: Use the plate method - fill half your plate with non-starchy vegetables, one quarter with lean protein, and one quarter with complex carbohydrates.\n\n3. **Hydration**: Aim for 8-10 glasses of water daily to support metabolism and overall health."
+            elif agent == "Support Agent":
+                details = "**Comprehensive Support Strategy:**\n\n**Immediate Coping Techniques:**\n1. **Deep Breathing**: Practice the 4-7-8 technique - inhale for 4 counts, hold for 7, exhale for 8.\n\n2. **Grounding Exercises**: Use the 5-4-3-2-1 method - identify 5 things you can see, 4 you can touch, 3 you can hear, 2 you can smell, and 1 you can taste.\n\n**Long-term Strategies:**\n- Establish a consistent daily routine\n- Practice mindfulness meditation for 10-15 minutes daily"
+            else:  # Queries Agent
+                details = "**Technical Deep Dive:**\n\n**Fundamental Concepts:**\nThis technology represents a convergence of multiple disciplines including computer science, mathematics, engineering, and domain-specific expertise.\n\n**Current Implementation:**\n1. **Healthcare**: AI-powered diagnostic tools and personalized treatment plans\n2. **Finance**: Algorithmic trading and fraud detection\n3. **Transportation**: Autonomous vehicles and traffic optimization"
+            response = f"{base_response}\n\n{details}"
             # Generate realistic scores
             base_score = random.uniform(7.0, 9.5)
+            relevance_score = max(0, min(10, base_score + random.uniform(-0.3, 0.3)))
+            accuracy_score = max(0, min(10, base_score + random.uniform(-0.4, 0.2)))
+            completeness_score = max(0, min(10, base_score + random.uniform(-0.5, 0.3)))
+            coherence_score = max(0, min(10, base_score + random.uniform(-0.2, 0.4)))
+            hallucination_score = max(0, min(10, 10 - accuracy_score + random.uniform(-1.0, 1.0)))
+            # Generate token consumption
+            response_length = len(response)
+            input_tokens = int(len(query.split()) * 1.3)
+            output_tokens = int(response_length / 4)
+            total_tokens = input_tokens + output_tokens
+            # Calculate cost
+            llm_provider = random.choice(["azure", "openai", "anthropic"])
+            cost_per_1k = {"azure": 0.03, "openai": 0.03, "anthropic": 0.025}
+            cost_usd = (total_tokens / 1000) * cost_per_1k[llm_provider]
             timestamp = datetime.now() - timedelta(days=random.randint(0, 30))
             INSERT INTO evaluation_logs (
                 session_id, agent_name, query, response, overall_score,
                 relevance_score, accuracy_score, completeness_score, coherence_score,
+                hallucination_score, guardrails_passed, safety_score, execution_time_ms,
+                input_tokens, output_tokens, total_tokens, cost_usd, llm_provider, model_name, timestamp
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
             ''', (
                 session_id, agent, query, response, base_score,
+                relevance_score, accuracy_score, completeness_score, coherence_score,
+                hallucination_score, random.choice([True, True, True, False]),  # 75% pass rate
+                random.uniform(8.0, 10.0), random.uniform(500, 2000),
+                input_tokens, output_tokens, total_tokens, round(cost_usd, 4),
+                llm_provider, "gpt-4o", timestamp.isoformat()
             ))
         conn.commit()
                         if 'timestamp' in row:
                             st.write(f"Timestamp: {row['timestamp']}")
+    def show_workflow_visualization(self, df):
+        """Show workflow visualization tab"""
+        st.header("🔄 Workflow Visualization")
+        if df.empty:
+            st.warning("No data available for workflow visualization.")
+            return
+        # Session selection
+        sessions = df['session_id'].unique()
+        selected_session = st.selectbox("Select Session", sessions, key="workflow_session")
+        # Filter data for selected session
+        session_data = df[df['session_id'] == selected_session]
+        if session_data.empty:
+            st.warning("No data found for selected session.")
+            return
+        # Session metrics overview
+        st.subheader("📈 Session Metrics Overview")
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            avg_score = session_data['overall_score'].mean()
+            st.metric("Avg Overall Score", f"{avg_score:.2f}/10")
+        with col2:
+            avg_latency = session_data['execution_time_ms'].mean()
+            st.metric("Avg Response Time", f"{avg_latency:.0f}ms")
+        with col3:
+            if 'hallucination_score' in session_data.columns:
+                avg_hallucination = session_data['hallucination_score'].mean()
+                st.metric("Avg Hallucination", f"{avg_hallucination:.2f}/10")
+            else:
+                st.metric("Avg Hallucination", "N/A")
+        with col4:
+            if 'total_tokens' in session_data.columns:
+                total_tokens = session_data['total_tokens'].sum()
+                total_cost = session_data['cost_usd'].sum() if 'cost_usd' in session_data.columns else 0
+                st.metric("Total Cost", f"${total_cost:.4f}", f"{total_tokens:,} tokens")
+            else:
+                st.metric("Total Cost", "N/A")
+        # Workflow steps
+        st.subheader("🔍 Workflow Steps")
+        for idx, (_, row) in enumerate(session_data.iterrows()):
+            with st.expander(f"Step {idx + 1}: {row['agent_name']} - Score: {row['overall_score']:.2f}/10"):
+                col1, col2 = st.columns([1, 1])
+                with col1:
+                    st.markdown("**Query:**")
+                    st.write(row['query'])
+                    # Performance metrics chart
+                    st.markdown("**Performance Metrics:**")
+                    metrics_data = {
+                        'Overall': row['overall_score'],
+                        'Relevance': row.get('relevance_score', 0),
+                        'Accuracy': row.get('accuracy_score', 0),
+                        'Completeness': row.get('completeness_score', 0),
+                        'Coherence': row.get('coherence_score', 0)
+                    }
+                    if 'hallucination_score' in row:
+                        metrics_data['Hallucination'] = row['hallucination_score']
+                    fig = px.bar(
+                        x=list(metrics_data.keys()),
+                        y=list(metrics_data.values()),
+                        title="Score Breakdown",
+                        labels={'x': 'Metric', 'y': 'Score (0-10)'}
+                    )
+                    fig.update_layout(height=300, showlegend=False)
+                    st.plotly_chart(fig, use_container_width=True)
+                with col2:
+                    st.markdown("**Response:**")
+                    if pd.notna(row['response']):
+                        st.write(row['response'])
+                    else:
+                        st.write("No response available")
+                    # Resource consumption
+                    st.markdown("**Resource Consumption:**")
+                    if 'input_tokens' in row and pd.notna(row['input_tokens']):
+                        token_col1, token_col2 = st.columns(2)
+                        with token_col1:
+                            st.metric("Input Tokens", f"{int(row['input_tokens']):,}")
+                            st.metric("Output Tokens", f"{int(row.get('output_tokens', 0)):,}")
+                        with token_col2:
+                            st.metric("Total Tokens", f"{int(row.get('total_tokens', 0)):,}")
+                            st.metric("Cost", f"${row.get('cost_usd', 0):.4f}")
+                    # Execution details
+                    st.markdown("**Execution Details:**")
+                    st.write(f"⏱️ **Execution Time:** {row['execution_time_ms']:.0f}ms")
+                    if 'llm_provider' in row:
+                        st.write(f"🤖 **LLM Provider:** {row['llm_provider']}")
+                    if 'model_name' in row:
+                        st.write(f"🧠 **Model:** {row['model_name']}")
+                    st.write(f"🛡️ **Safety Passed:** {'✅' if row['guardrails_passed'] else '❌'}")
+        # Session summary
+        st.subheader("📋 Session Summary")
+        summary_col1, summary_col2, summary_col3 = st.columns(3)
+        with summary_col1:
+            st.markdown("**Quality Metrics:**")
+            st.write(f"• Average Overall Score: {session_data['overall_score'].mean():.2f}/10")
+            best_step = session_data.loc[session_data['overall_score'].idxmax()]
+            st.write(f"• Best Performing Step: {best_step['agent_name']}")
+            st.write(f"• Consistency (Std Dev): {session_data['overall_score'].std():.2f}")
+        with summary_col2:
+            st.markdown("**Performance Metrics:**")
+            st.write(f"• Total Execution Time: {session_data['execution_time_ms'].sum():.0f}ms")
+            st.write(f"• Average Response Time: {session_data['execution_time_ms'].mean():.0f}ms")
+            st.write(f"• Fastest Step: {session_data['execution_time_ms'].min():.0f}ms")
+        with summary_col3:
+            st.markdown("**Resource Usage:**")
+            if 'total_tokens' in session_data.columns:
+                st.write(f"• Total Tokens Used: {session_data['total_tokens'].sum():,}")
+                if 'cost_usd' in session_data.columns:
+                    st.write(f"• Total Cost: ${session_data['cost_usd'].sum():.4f}")
+                    st.write(f"• Avg Cost per Query: ${session_data['cost_usd'].mean():.4f}")
+            else:
+                st.write("• Token data not available")
+        # Export functionality
+        st.subheader("📤 Export Workflow Data")
+        if st.button("Export Session Data to CSV", key="export_workflow"):
+            csv_data = session_data.to_csv(index=False)
+            st.download_button(
+                label="Download CSV",
+                data=csv_data,
+                file_name=f"workflow_session_{selected_session}.csv",
+                mime="text/csv"
+            )
     def run(self):
         """Run the dashboard"""
         st.title("🤖 Multi-Agent System Dashboard")
         df = self.load_data()
         # Create tabs
+        tab1, tab2, tab3, tab4 = st.tabs([
             "📈 Overview",
             "🤖 Agent Performance",
+            "📝 Response Analysis",
+            "🔄 Workflow Visualization"
         ])
         with tab1:
         with tab3:
             self.show_response_analysis(df)
+        with tab4:
+            self.show_workflow_visualization(df)
         # Footer
         st.markdown("---")
         st.markdown("🚀 **Multi-Agent System Dashboard** | Built with Streamlit & Plotly")