Navya-Sree's picture
Update app.py
c54e455 verified
import streamlit as st
import time
import json
from typing import Dict, Any
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from datetime import datetime
# Import our agents
from agents.coder_agent import CoderAgent
from agents.reviewer_agent import ReviewerAgent
from agents.tester_agent import TesterAgent
from utils.rag_system import RAGSystem
from utils.monitoring import MonitoringSystem
# Set page config
st.set_page_config(
page_title="Multi-Agent Code Assistant",
page_icon="πŸ€–",
layout="wide"
)
# Initialize agents (with caching for performance)
@st.cache_resource
def init_agents():
"""Initialize all agents once."""
return {
"coder": CoderAgent(),
"reviewer": ReviewerAgent(),
"tester": TesterAgent(),
"rag": RAGSystem(),
"monitor": MonitoringSystem()
}
# Initialize session state
if 'conversation' not in st.session_state:
st.session_state.conversation = []
if 'metrics' not in st.session_state:
st.session_state.metrics = {
"total_requests": 0,
"successful_generations": 0,
"average_test_score": 0,
"average_review_score": 0
}
# Title and description
st.title("πŸ€– Multi-Agent Code Assistant")
st.markdown("""
This system uses multiple AI agents working together:
1. **Coder Agent**: Writes code based on your requirements
2. **Reviewer Agent**: Checks code quality and suggests improvements
3. **Tester Agent**: Creates and runs tests to verify functionality
4. **RAG System**: Provides relevant documentation context
""")
# Sidebar for configuration
with st.sidebar:
st.header("βš™οΈ Configuration")
# Model selection
model_choice = st.selectbox(
"Select Model",
["gpt-3.5-turbo", "gpt-4"],
help="GPT-4 is more accurate but slower and more expensive"
)
# Temperature slider
temperature = st.slider(
"Creativity (Temperature)",
min_value=0.0,
max_value=1.0,
value=0.7,
help="Higher values make output more creative, lower values more deterministic"
)
# Advanced options
with st.expander("Advanced Options"):
use_rag = st.checkbox("Use RAG (Documentation Context)", value=True)
auto_test = st.checkbox("Auto-run Tests", value=True)
show_raw = st.checkbox("Show Raw Responses", value=False)
st.divider()
# System metrics
st.header("πŸ“Š System Metrics")
col1, col2 = st.columns(2)
with col1:
st.metric("Total Requests", st.session_state.metrics["total_requests"])
with col2:
st.metric("Success Rate",
f"{(st.session_state.metrics['successful_generations'] / max(st.session_state.metrics['total_requests'], 1)) * 100:.1f}%")
# Add custom documentation
st.divider()
st.header("πŸ“š Add Documentation")
custom_doc = st.text_area("Add custom documentation for RAG system:")
if st.button("Add to Knowledge Base") and custom_doc:
agents = init_agents()
agents["rag"].add_document(custom_doc, "user")
st.success("Documentation added successfully!")
# Main input area
st.subheader("πŸ’‘ What code would you like to generate?")
# Example prompts
example_prompts = [
"Write a function to reverse a string",
"Create a function that calculates factorial",
"Write a function to check if a number is prime",
"Create a function that finds the Fibonacci number at position n",
"Write a function to sort a list of integers"
]
# Create columns for example buttons
cols = st.columns(3)
for i, prompt in enumerate(example_prompts):
with cols[i % 3]:
if st.button(prompt, key=f"example_{i}"):
st.session_state.user_prompt = prompt
# Text input for custom prompt
user_prompt = st.text_input(
"Or enter your own prompt:",
key="user_prompt",
placeholder="e.g., Write a Python function to validate email addresses"
)
# Initialize agents
agents = init_agents()
# Update coder agent parameters
agents["coder"].model = model_choice
agents["coder"].temperature = temperature
if st.button("πŸš€ Generate Code", type="primary") and user_prompt:
# Update metrics
st.session_state.metrics["total_requests"] += 1
# Create progress tracking
progress_bar = st.progress(0)
status_text = st.empty()
# Step 1: Get RAG context
status_text.text("πŸ” Retrieving relevant documentation...")
context = agents["rag"].get_context(user_prompt) if use_rag else ""
progress_bar.progress(20)
# Step 2: Generate code
status_text.text("πŸ’» Generating code with Coder Agent...")
code_result = agents["coder"].generate_code(user_prompt, context)
progress_bar.progress(40)
if code_result["status"] == "success":
# Update success metrics
st.session_state.metrics["successful_generations"] += 1
# Display generated code
st.subheader("βœ… Generated Code")
st.code(code_result["code"], language="python")
# Show raw response if enabled
if show_raw:
with st.expander("View Raw Response"):
st.text(code_result["raw_response"])
# Step 3: Review code
status_text.text("πŸ” Reviewing code with Reviewer Agent...")
review_result = agents["reviewer"].comprehensive_review(code_result["code"])
progress_bar.progress(60)
# Display review results
st.subheader("πŸ“ Code Review Results")
# Create tabs for different review aspects
review_tab1, review_tab2, review_tab3 = st.tabs([
"Overall Score",
"Static Analysis",
"LLM Suggestions"
])
with review_tab1:
score = review_result["overall_score"]
st.metric("Overall Quality Score", f"{score:.1f}/10.0")
# Visual gauge
fig = go.Figure(go.Indicator(
mode="gauge+number",
value=score,
domain={'x': [0, 1], 'y': [0, 1]},
title={'text': "Code Quality"},
gauge={
'axis': {'range': [0, 10]},
'bar': {'color': "darkblue"},
'steps': [
{'range': [0, 3], 'color': "red"},
{'range': [3, 7], 'color': "yellow"},
{'range': [7, 10], 'color': "green"}
],
'threshold': {
'line': {'color': "black", 'width': 4},
'thickness': 0.75,
'value': score
}
}
))
fig.update_layout(height=200)
st.plotly_chart(fig, use_container_width=True)
with review_tab2:
static = review_result["static_analysis"]
if static["status"] == "success":
st.write(f"**Pylint Score:** {static.get('score', 0):.1f}/10.0")
if static.get("issues"):
st.write("**Issues Found:**")
for issue in static["issues"][:5]: # Show first 5 issues
st.write(f"- {issue.get('message', 'Unknown issue')}")
else:
st.success("No issues found in static analysis!")
else:
st.error(f"Static analysis failed: {static.get('error')}")
with review_tab3:
llm = review_result["llm_review"]
if llm["status"] == "success":
st.write(llm["review"])
if llm.get("suggestions"):
st.write("**Key Suggestions:**")
for suggestion in llm["suggestions"]:
st.write(f"β€’ {suggestion}")
else:
st.error(f"LLM review failed: {llm.get('error')}")
# Step 4: Test code if enabled
if auto_test:
status_text.text("πŸ§ͺ Testing code with Tester Agent...")
test_result = agents["tester"].test_code(code_result["code"], user_prompt)
progress_bar.progress(80)
# Display test results
st.subheader("πŸ§ͺ Test Results")
# Test metrics
if test_result["test_results"]["status"] == "success":
metrics = test_result["test_results"]["metrics"]
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Tests", metrics["total_tests"])
with col2:
st.metric("Passed Tests", metrics["passed_tests"])
with col3:
st.metric("Pass Rate", f"{metrics['pass_rate']:.1f}%")
# Test details
with st.expander("View Test Details"):
for i, test_case in enumerate(test_result["test_cases"]):
st.write(f"**Test {i+1}:**")
st.write(f"- Input: `{test_case['input']}`")
st.write(f"- Expected: `{test_case['expected']}`")
# Update average score
current_avg = st.session_state.metrics["average_test_score"]
total_req = max(1, st.session_state.metrics["successful_generations"])
st.session_state.metrics["average_test_score"] = (
(current_avg * (total_req - 1) + metrics["pass_rate"]) / total_req
)
else:
st.error(f"Testing failed: {test_result['test_results'].get('error')}")
# Step 5: Log to monitoring
status_text.text("πŸ“Š Logging results to monitoring system...")
agents["monitor"].log_generation({
"timestamp": datetime.now().isoformat(),
"prompt": user_prompt,
"code": code_result["code"],
"model": model_choice,
"temperature": temperature,
"review_score": review_result["overall_score"],
"test_score": test_result.get("test_results", {}).get("metrics", {}).get("pass_rate", 0) if auto_test else None,
"tokens_used": code_result.get("tokens_used", 0)
})
# Update conversation history
st.session_state.conversation.append({
"role": "user",
"content": user_prompt,
"timestamp": datetime.now().strftime("%H:%M:%S")
})
st.session_state.conversation.append({
"role": "assistant",
"content": f"Generated code with score {review_result['overall_score']:.1f}/10.0",
"timestamp": datetime.now().strftime("%H:%M:%S")
})
progress_bar.progress(100)
status_text.text("βœ… Process completed!")
# Update review score average
current_avg = st.session_state.metrics["average_review_score"]
total_req = max(1, st.session_state.metrics["successful_generations"])
st.session_state.metrics["average_review_score"] = (
(current_avg * (total_req - 1) + review_result["overall_score"]) / total_req
)
else:
st.error(f"Code generation failed: {code_result.get('error')}")
progress_bar.progress(100)
status_text.text("❌ Process failed!")
# Conversation history
if st.session_state.conversation:
st.divider()
st.subheader("πŸ“œ Conversation History")
for message in reversed(st.session_state.conversation[-10:]): # Show last 10 messages
if message["role"] == "user":
st.markdown(f"**πŸ‘€ You** ({message['timestamp']}):")
st.info(message["content"])
else:
st.markdown(f"**πŸ€– Assistant** ({message['timestamp']}):")
st.success(message["content"])
# Monitoring dashboard (collapsible)
with st.expander("πŸ“ˆ View Monitoring Dashboard"):
if st.button("Refresh Dashboard"):
st.rerun()
metrics_data = agents["monitor"].get_metrics()
if metrics_data:
# Convert to DataFrame for visualization
df = pd.DataFrame(metrics_data)
# Create metrics visualization
col1, col2 = st.columns(2)
with col1:
if len(df) > 1:
# Score trend
fig = px.line(
df,
x='timestamp',
y='review_score',
title='Code Quality Score Trend',
markers=True
)
st.plotly_chart(fig, use_container_width=True)
with col2:
if 'tokens_used' in df.columns:
# Token usage
fig = px.bar(
df,
x='timestamp',
y='tokens_used',
title='Token Usage per Request'
)
st.plotly_chart(fig, use_container_width=True)
# Export data
st.download_button(
label="πŸ“₯ Export Metrics Data",
data=df.to_csv(index=False),
file_name=f"code_assistant_metrics_{datetime.now().strftime('%Y%m%d')}.csv",
mime="text/csv"
)
else:
st.info("No metrics data available yet. Generate some code first!")
# Footer
st.divider()
st.markdown("""
---
### 🎯 **How This System Works:**
1. **Multi-Agent Architecture**: Each agent specializes in one task (coding, reviewing, testing)
2. **Agent Communication**: Agents pass structured data between each other
3. **RAG Integration**: Provides context from documentation to reduce hallucinations
4. **Evaluation Pipeline**: Continuous monitoring of code quality and test performance
5. **Incremental Improvement**: Start simple and add features gradually
**Built with**: OpenAI GPT, LangChain, ChromaDB, Streamlit
""")