import streamlit as st import time import json from typing import Dict, Any import plotly.graph_objects as go import plotly.express as px import pandas as pd from datetime import datetime # Import our agents from agents.coder_agent import CoderAgent from agents.reviewer_agent import ReviewerAgent from agents.tester_agent import TesterAgent from utils.rag_system import RAGSystem from utils.monitoring import MonitoringSystem # Set page config st.set_page_config( page_title="Multi-Agent Code Assistant", page_icon="๐Ÿค–", layout="wide" ) # Initialize agents (with caching for performance) @st.cache_resource def init_agents(): """Initialize all agents once.""" return { "coder": CoderAgent(), "reviewer": ReviewerAgent(), "tester": TesterAgent(), "rag": RAGSystem(), "monitor": MonitoringSystem() } # Initialize session state if 'conversation' not in st.session_state: st.session_state.conversation = [] if 'metrics' not in st.session_state: st.session_state.metrics = { "total_requests": 0, "successful_generations": 0, "average_test_score": 0, "average_review_score": 0 } # Title and description st.title("๐Ÿค– Multi-Agent Code Assistant") st.markdown(""" This system uses multiple AI agents working together: 1. **Coder Agent**: Writes code based on your requirements 2. **Reviewer Agent**: Checks code quality and suggests improvements 3. **Tester Agent**: Creates and runs tests to verify functionality 4. **RAG System**: Provides relevant documentation context """) # Sidebar for configuration with st.sidebar: st.header("โš™๏ธ Configuration") # Model selection model_choice = st.selectbox( "Select Model", ["gpt-3.5-turbo", "gpt-4"], help="GPT-4 is more accurate but slower and more expensive" ) # Temperature slider temperature = st.slider( "Creativity (Temperature)", min_value=0.0, max_value=1.0, value=0.7, help="Higher values make output more creative, lower values more deterministic" ) # Advanced options with st.expander("Advanced Options"): use_rag = st.checkbox("Use RAG (Documentation Context)", value=True) auto_test = st.checkbox("Auto-run Tests", value=True) show_raw = st.checkbox("Show Raw Responses", value=False) st.divider() # System metrics st.header("๐Ÿ“Š System Metrics") col1, col2 = st.columns(2) with col1: st.metric("Total Requests", st.session_state.metrics["total_requests"]) with col2: st.metric("Success Rate", f"{(st.session_state.metrics['successful_generations'] / max(st.session_state.metrics['total_requests'], 1)) * 100:.1f}%") # Add custom documentation st.divider() st.header("๐Ÿ“š Add Documentation") custom_doc = st.text_area("Add custom documentation for RAG system:") if st.button("Add to Knowledge Base") and custom_doc: agents = init_agents() agents["rag"].add_document(custom_doc, "user") st.success("Documentation added successfully!") # Main input area st.subheader("๐Ÿ’ก What code would you like to generate?") # Example prompts example_prompts = [ "Write a function to reverse a string", "Create a function that calculates factorial", "Write a function to check if a number is prime", "Create a function that finds the Fibonacci number at position n", "Write a function to sort a list of integers" ] # Create columns for example buttons cols = st.columns(3) for i, prompt in enumerate(example_prompts): with cols[i % 3]: if st.button(prompt, key=f"example_{i}"): st.session_state.user_prompt = prompt # Text input for custom prompt user_prompt = st.text_input( "Or enter your own prompt:", key="user_prompt", placeholder="e.g., Write a Python function to validate email addresses" ) # Initialize agents agents = init_agents() # Update coder agent parameters agents["coder"].model = model_choice agents["coder"].temperature = temperature if st.button("๐Ÿš€ Generate Code", type="primary") and user_prompt: # Update metrics st.session_state.metrics["total_requests"] += 1 # Create progress tracking progress_bar = st.progress(0) status_text = st.empty() # Step 1: Get RAG context status_text.text("๐Ÿ” Retrieving relevant documentation...") context = agents["rag"].get_context(user_prompt) if use_rag else "" progress_bar.progress(20) # Step 2: Generate code status_text.text("๐Ÿ’ป Generating code with Coder Agent...") code_result = agents["coder"].generate_code(user_prompt, context) progress_bar.progress(40) if code_result["status"] == "success": # Update success metrics st.session_state.metrics["successful_generations"] += 1 # Display generated code st.subheader("โœ… Generated Code") st.code(code_result["code"], language="python") # Show raw response if enabled if show_raw: with st.expander("View Raw Response"): st.text(code_result["raw_response"]) # Step 3: Review code status_text.text("๐Ÿ” Reviewing code with Reviewer Agent...") review_result = agents["reviewer"].comprehensive_review(code_result["code"]) progress_bar.progress(60) # Display review results st.subheader("๐Ÿ“ Code Review Results") # Create tabs for different review aspects review_tab1, review_tab2, review_tab3 = st.tabs([ "Overall Score", "Static Analysis", "LLM Suggestions" ]) with review_tab1: score = review_result["overall_score"] st.metric("Overall Quality Score", f"{score:.1f}/10.0") # Visual gauge fig = go.Figure(go.Indicator( mode="gauge+number", value=score, domain={'x': [0, 1], 'y': [0, 1]}, title={'text': "Code Quality"}, gauge={ 'axis': {'range': [0, 10]}, 'bar': {'color': "darkblue"}, 'steps': [ {'range': [0, 3], 'color': "red"}, {'range': [3, 7], 'color': "yellow"}, {'range': [7, 10], 'color': "green"} ], 'threshold': { 'line': {'color': "black", 'width': 4}, 'thickness': 0.75, 'value': score } } )) fig.update_layout(height=200) st.plotly_chart(fig, use_container_width=True) with review_tab2: static = review_result["static_analysis"] if static["status"] == "success": st.write(f"**Pylint Score:** {static.get('score', 0):.1f}/10.0") if static.get("issues"): st.write("**Issues Found:**") for issue in static["issues"][:5]: # Show first 5 issues st.write(f"- {issue.get('message', 'Unknown issue')}") else: st.success("No issues found in static analysis!") else: st.error(f"Static analysis failed: {static.get('error')}") with review_tab3: llm = review_result["llm_review"] if llm["status"] == "success": st.write(llm["review"]) if llm.get("suggestions"): st.write("**Key Suggestions:**") for suggestion in llm["suggestions"]: st.write(f"โ€ข {suggestion}") else: st.error(f"LLM review failed: {llm.get('error')}") # Step 4: Test code if enabled if auto_test: status_text.text("๐Ÿงช Testing code with Tester Agent...") test_result = agents["tester"].test_code(code_result["code"], user_prompt) progress_bar.progress(80) # Display test results st.subheader("๐Ÿงช Test Results") # Test metrics if test_result["test_results"]["status"] == "success": metrics = test_result["test_results"]["metrics"] col1, col2, col3 = st.columns(3) with col1: st.metric("Total Tests", metrics["total_tests"]) with col2: st.metric("Passed Tests", metrics["passed_tests"]) with col3: st.metric("Pass Rate", f"{metrics['pass_rate']:.1f}%") # Test details with st.expander("View Test Details"): for i, test_case in enumerate(test_result["test_cases"]): st.write(f"**Test {i+1}:**") st.write(f"- Input: `{test_case['input']}`") st.write(f"- Expected: `{test_case['expected']}`") # Update average score current_avg = st.session_state.metrics["average_test_score"] total_req = max(1, st.session_state.metrics["successful_generations"]) st.session_state.metrics["average_test_score"] = ( (current_avg * (total_req - 1) + metrics["pass_rate"]) / total_req ) else: st.error(f"Testing failed: {test_result['test_results'].get('error')}") # Step 5: Log to monitoring status_text.text("๐Ÿ“Š Logging results to monitoring system...") agents["monitor"].log_generation({ "timestamp": datetime.now().isoformat(), "prompt": user_prompt, "code": code_result["code"], "model": model_choice, "temperature": temperature, "review_score": review_result["overall_score"], "test_score": test_result.get("test_results", {}).get("metrics", {}).get("pass_rate", 0) if auto_test else None, "tokens_used": code_result.get("tokens_used", 0) }) # Update conversation history st.session_state.conversation.append({ "role": "user", "content": user_prompt, "timestamp": datetime.now().strftime("%H:%M:%S") }) st.session_state.conversation.append({ "role": "assistant", "content": f"Generated code with score {review_result['overall_score']:.1f}/10.0", "timestamp": datetime.now().strftime("%H:%M:%S") }) progress_bar.progress(100) status_text.text("โœ… Process completed!") # Update review score average current_avg = st.session_state.metrics["average_review_score"] total_req = max(1, st.session_state.metrics["successful_generations"]) st.session_state.metrics["average_review_score"] = ( (current_avg * (total_req - 1) + review_result["overall_score"]) / total_req ) else: st.error(f"Code generation failed: {code_result.get('error')}") progress_bar.progress(100) status_text.text("โŒ Process failed!") # Conversation history if st.session_state.conversation: st.divider() st.subheader("๐Ÿ“œ Conversation History") for message in reversed(st.session_state.conversation[-10:]): # Show last 10 messages if message["role"] == "user": st.markdown(f"**๐Ÿ‘ค You** ({message['timestamp']}):") st.info(message["content"]) else: st.markdown(f"**๐Ÿค– Assistant** ({message['timestamp']}):") st.success(message["content"]) # Monitoring dashboard (collapsible) with st.expander("๐Ÿ“ˆ View Monitoring Dashboard"): if st.button("Refresh Dashboard"): st.rerun() metrics_data = agents["monitor"].get_metrics() if metrics_data: # Convert to DataFrame for visualization df = pd.DataFrame(metrics_data) # Create metrics visualization col1, col2 = st.columns(2) with col1: if len(df) > 1: # Score trend fig = px.line( df, x='timestamp', y='review_score', title='Code Quality Score Trend', markers=True ) st.plotly_chart(fig, use_container_width=True) with col2: if 'tokens_used' in df.columns: # Token usage fig = px.bar( df, x='timestamp', y='tokens_used', title='Token Usage per Request' ) st.plotly_chart(fig, use_container_width=True) # Export data st.download_button( label="๐Ÿ“ฅ Export Metrics Data", data=df.to_csv(index=False), file_name=f"code_assistant_metrics_{datetime.now().strftime('%Y%m%d')}.csv", mime="text/csv" ) else: st.info("No metrics data available yet. Generate some code first!") # Footer st.divider() st.markdown(""" --- ### ๐ŸŽฏ **How This System Works:** 1. **Multi-Agent Architecture**: Each agent specializes in one task (coding, reviewing, testing) 2. **Agent Communication**: Agents pass structured data between each other 3. **RAG Integration**: Provides context from documentation to reduce hallucinations 4. **Evaluation Pipeline**: Continuous monitoring of code quality and test performance 5. **Incremental Improvement**: Start simple and add features gradually **Built with**: OpenAI GPT, LangChain, ChromaDB, Streamlit """)