Spaces:

Navya-Sree
/

multi-agent-code-assistant

Build error

File size: 14,027 Bytes

import streamlit as st
import time
import json
from typing import Dict, Any
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from datetime import datetime

# Import our agents
from agents.coder_agent import CoderAgent
from agents.reviewer_agent import ReviewerAgent
from agents.tester_agent import TesterAgent
from utils.rag_system import RAGSystem
from utils.monitoring import MonitoringSystem

# Set page config
st.set_page_config(
    page_title="Multi-Agent Code Assistant",
    page_icon="🤖",
    layout="wide"
)

# Initialize agents (with caching for performance)
@st.cache_resource
def init_agents():
    """Initialize all agents once."""
    return {
        "coder": CoderAgent(),
        "reviewer": ReviewerAgent(),
        "tester": TesterAgent(),
        "rag": RAGSystem(),
        "monitor": MonitoringSystem()
    }

# Initialize session state
if 'conversation' not in st.session_state:
    st.session_state.conversation = []
if 'metrics' not in st.session_state:
    st.session_state.metrics = {
        "total_requests": 0,
        "successful_generations": 0,
        "average_test_score": 0,
        "average_review_score": 0
    }

# Title and description
st.title("🤖 Multi-Agent Code Assistant")
st.markdown("""
This system uses multiple AI agents working together:
1. **Coder Agent**: Writes code based on your requirements
2. **Reviewer Agent**: Checks code quality and suggests improvements  
3. **Tester Agent**: Creates and runs tests to verify functionality
4. **RAG System**: Provides relevant documentation context
""")

# Sidebar for configuration
with st.sidebar:
    st.header("⚙️ Configuration")
    
    # Model selection
    model_choice = st.selectbox(
        "Select Model",
        ["gpt-3.5-turbo", "gpt-4"],
        help="GPT-4 is more accurate but slower and more expensive"
    )
    
    # Temperature slider
    temperature = st.slider(
        "Creativity (Temperature)",
        min_value=0.0,
        max_value=1.0,
        value=0.7,
        help="Higher values make output more creative, lower values more deterministic"
    )
    
    # Advanced options
    with st.expander("Advanced Options"):
        use_rag = st.checkbox("Use RAG (Documentation Context)", value=True)
        auto_test = st.checkbox("Auto-run Tests", value=True)
        show_raw = st.checkbox("Show Raw Responses", value=False)
    
    st.divider()
    
    # System metrics
    st.header("📊 System Metrics")
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Requests", st.session_state.metrics["total_requests"])
    with col2:
        st.metric("Success Rate", 
                 f"{(st.session_state.metrics['successful_generations'] / max(st.session_state.metrics['total_requests'], 1)) * 100:.1f}%")
    
    # Add custom documentation
    st.divider()
    st.header("📚 Add Documentation")
    custom_doc = st.text_area("Add custom documentation for RAG system:")
    if st.button("Add to Knowledge Base") and custom_doc:
        agents = init_agents()
        agents["rag"].add_document(custom_doc, "user")
        st.success("Documentation added successfully!")

# Main input area
st.subheader("💡 What code would you like to generate?")

# Example prompts
example_prompts = [
    "Write a function to reverse a string",
    "Create a function that calculates factorial",
    "Write a function to check if a number is prime",
    "Create a function that finds the Fibonacci number at position n",
    "Write a function to sort a list of integers"
]

# Create columns for example buttons
cols = st.columns(3)
for i, prompt in enumerate(example_prompts):
    with cols[i % 3]:
        if st.button(prompt, key=f"example_{i}"):
            st.session_state.user_prompt = prompt

# Text input for custom prompt
user_prompt = st.text_input(
    "Or enter your own prompt:",
    key="user_prompt",
    placeholder="e.g., Write a Python function to validate email addresses"
)

# Initialize agents
agents = init_agents()

# Update coder agent parameters
agents["coder"].model = model_choice
agents["coder"].temperature = temperature

if st.button("🚀 Generate Code", type="primary") and user_prompt:
    # Update metrics
    st.session_state.metrics["total_requests"] += 1
    
    # Create progress tracking
    progress_bar = st.progress(0)
    status_text = st.empty()
    
    # Step 1: Get RAG context
    status_text.text("🔍 Retrieving relevant documentation...")
    context = agents["rag"].get_context(user_prompt) if use_rag else ""
    progress_bar.progress(20)
    
    # Step 2: Generate code
    status_text.text("💻 Generating code with Coder Agent...")
    code_result = agents["coder"].generate_code(user_prompt, context)
    progress_bar.progress(40)
    
    if code_result["status"] == "success":
        # Update success metrics
        st.session_state.metrics["successful_generations"] += 1
        
        # Display generated code
        st.subheader("✅ Generated Code")
        st.code(code_result["code"], language="python")
        
        # Show raw response if enabled
        if show_raw:
            with st.expander("View Raw Response"):
                st.text(code_result["raw_response"])
        
        # Step 3: Review code
        status_text.text("🔍 Reviewing code with Reviewer Agent...")
        review_result = agents["reviewer"].comprehensive_review(code_result["code"])
        progress_bar.progress(60)
        
        # Display review results
        st.subheader("📝 Code Review Results")
        
        # Create tabs for different review aspects
        review_tab1, review_tab2, review_tab3 = st.tabs([
            "Overall Score", 
            "Static Analysis", 
            "LLM Suggestions"
        ])
        
        with review_tab1:
            score = review_result["overall_score"]
            st.metric("Overall Quality Score", f"{score:.1f}/10.0")
            
            # Visual gauge
            fig = go.Figure(go.Indicator(
                mode="gauge+number",
                value=score,
                domain={'x': [0, 1], 'y': [0, 1]},
                title={'text': "Code Quality"},
                gauge={
                    'axis': {'range': [0, 10]},
                    'bar': {'color': "darkblue"},
                    'steps': [
                        {'range': [0, 3], 'color': "red"},
                        {'range': [3, 7], 'color': "yellow"},
                        {'range': [7, 10], 'color': "green"}
                    ],
                    'threshold': {
                        'line': {'color': "black", 'width': 4},
                        'thickness': 0.75,
                        'value': score
                    }
                }
            ))
            fig.update_layout(height=200)
            st.plotly_chart(fig, use_container_width=True)
        
        with review_tab2:
            static = review_result["static_analysis"]
            if static["status"] == "success":
                st.write(f"**Pylint Score:** {static.get('score', 0):.1f}/10.0")
                if static.get("issues"):
                    st.write("**Issues Found:**")
                    for issue in static["issues"][:5]:  # Show first 5 issues
                        st.write(f"- {issue.get('message', 'Unknown issue')}")
                else:
                    st.success("No issues found in static analysis!")
            else:
                st.error(f"Static analysis failed: {static.get('error')}")
        
        with review_tab3:
            llm = review_result["llm_review"]
            if llm["status"] == "success":
                st.write(llm["review"])
                if llm.get("suggestions"):
                    st.write("**Key Suggestions:**")
                    for suggestion in llm["suggestions"]:
                        st.write(f"• {suggestion}")
            else:
                st.error(f"LLM review failed: {llm.get('error')}")
        
        # Step 4: Test code if enabled
        if auto_test:
            status_text.text("🧪 Testing code with Tester Agent...")
            test_result = agents["tester"].test_code(code_result["code"], user_prompt)
            progress_bar.progress(80)
            
            # Display test results
            st.subheader("🧪 Test Results")
            
            # Test metrics
            if test_result["test_results"]["status"] == "success":
                metrics = test_result["test_results"]["metrics"]
                
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.metric("Total Tests", metrics["total_tests"])
                with col2:
                    st.metric("Passed Tests", metrics["passed_tests"])
                with col3:
                    st.metric("Pass Rate", f"{metrics['pass_rate']:.1f}%")
                
                # Test details
                with st.expander("View Test Details"):
                    for i, test_case in enumerate(test_result["test_cases"]):
                        st.write(f"**Test {i+1}:**")
                        st.write(f"- Input: `{test_case['input']}`")
                        st.write(f"- Expected: `{test_case['expected']}`")
                
                # Update average score
                current_avg = st.session_state.metrics["average_test_score"]
                total_req = max(1, st.session_state.metrics["successful_generations"])
                st.session_state.metrics["average_test_score"] = (
                    (current_avg * (total_req - 1) + metrics["pass_rate"]) / total_req
                )
            else:
                st.error(f"Testing failed: {test_result['test_results'].get('error')}")
        
        # Step 5: Log to monitoring
        status_text.text("📊 Logging results to monitoring system...")
        agents["monitor"].log_generation({
            "timestamp": datetime.now().isoformat(),
            "prompt": user_prompt,
            "code": code_result["code"],
            "model": model_choice,
            "temperature": temperature,
            "review_score": review_result["overall_score"],
            "test_score": test_result.get("test_results", {}).get("metrics", {}).get("pass_rate", 0) if auto_test else None,
            "tokens_used": code_result.get("tokens_used", 0)
        })
        
        # Update conversation history
        st.session_state.conversation.append({
            "role": "user",
            "content": user_prompt,
            "timestamp": datetime.now().strftime("%H:%M:%S")
        })
        st.session_state.conversation.append({
            "role": "assistant",
            "content": f"Generated code with score {review_result['overall_score']:.1f}/10.0",
            "timestamp": datetime.now().strftime("%H:%M:%S")
        })
        
        progress_bar.progress(100)
        status_text.text("✅ Process completed!")
        
        # Update review score average
        current_avg = st.session_state.metrics["average_review_score"]
        total_req = max(1, st.session_state.metrics["successful_generations"])
        st.session_state.metrics["average_review_score"] = (
            (current_avg * (total_req - 1) + review_result["overall_score"]) / total_req
        )
        
    else:
        st.error(f"Code generation failed: {code_result.get('error')}")
        progress_bar.progress(100)
        status_text.text("❌ Process failed!")

# Conversation history
if st.session_state.conversation:
    st.divider()
    st.subheader("📜 Conversation History")
    
    for message in reversed(st.session_state.conversation[-10:]):  # Show last 10 messages
        if message["role"] == "user":
            st.markdown(f"**👤 You** ({message['timestamp']}):")
            st.info(message["content"])
        else:
            st.markdown(f"**🤖 Assistant** ({message['timestamp']}):")
            st.success(message["content"])

# Monitoring dashboard (collapsible)
with st.expander("📈 View Monitoring Dashboard"):
    if st.button("Refresh Dashboard"):
        st.rerun()
    
    metrics_data = agents["monitor"].get_metrics()
    
    if metrics_data:
        # Convert to DataFrame for visualization
        df = pd.DataFrame(metrics_data)
        
        # Create metrics visualization
        col1, col2 = st.columns(2)
        
        with col1:
            if len(df) > 1:
                # Score trend
                fig = px.line(
                    df, 
                    x='timestamp', 
                    y='review_score',
                    title='Code Quality Score Trend',
                    markers=True
                )
                st.plotly_chart(fig, use_container_width=True)
        
        with col2:
            if 'tokens_used' in df.columns:
                # Token usage
                fig = px.bar(
                    df,
                    x='timestamp',
                    y='tokens_used',
                    title='Token Usage per Request'
                )
                st.plotly_chart(fig, use_container_width=True)
        
        # Export data
        st.download_button(
            label="📥 Export Metrics Data",
            data=df.to_csv(index=False),
            file_name=f"code_assistant_metrics_{datetime.now().strftime('%Y%m%d')}.csv",
            mime="text/csv"
        )
    else:
        st.info("No metrics data available yet. Generate some code first!")

# Footer
st.divider()
st.markdown("""
---
### 🎯 **How This System Works:**

1. **Multi-Agent Architecture**: Each agent specializes in one task (coding, reviewing, testing)
2. **Agent Communication**: Agents pass structured data between each other
3. **RAG Integration**: Provides context from documentation to reduce hallucinations
4. **Evaluation Pipeline**: Continuous monitoring of code quality and test performance
5. **Incremental Improvement**: Start simple and add features gradually

**Built with**: OpenAI GPT, LangChain, ChromaDB, Streamlit
""")