File size: 14,027 Bytes
378f180
c54e455
 
 
 
 
 
 
378f180
c54e455
 
 
 
 
 
378f180
c54e455
 
 
 
 
 
378f180
c54e455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378f180
c54e455
378f180
c54e455
 
 
 
 
 
378f180
c54e455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378f180
 
c54e455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
import streamlit as st
import time
import json
from typing import Dict, Any
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from datetime import datetime

# Import our agents
from agents.coder_agent import CoderAgent
from agents.reviewer_agent import ReviewerAgent
from agents.tester_agent import TesterAgent
from utils.rag_system import RAGSystem
from utils.monitoring import MonitoringSystem

# Set page config
st.set_page_config(
    page_title="Multi-Agent Code Assistant",
    page_icon="πŸ€–",
    layout="wide"
)

# Initialize agents (with caching for performance)
@st.cache_resource
def init_agents():
    """Initialize all agents once."""
    return {
        "coder": CoderAgent(),
        "reviewer": ReviewerAgent(),
        "tester": TesterAgent(),
        "rag": RAGSystem(),
        "monitor": MonitoringSystem()
    }

# Initialize session state
if 'conversation' not in st.session_state:
    st.session_state.conversation = []
if 'metrics' not in st.session_state:
    st.session_state.metrics = {
        "total_requests": 0,
        "successful_generations": 0,
        "average_test_score": 0,
        "average_review_score": 0
    }

# Title and description
st.title("πŸ€– Multi-Agent Code Assistant")
st.markdown("""
This system uses multiple AI agents working together:
1. **Coder Agent**: Writes code based on your requirements
2. **Reviewer Agent**: Checks code quality and suggests improvements  
3. **Tester Agent**: Creates and runs tests to verify functionality
4. **RAG System**: Provides relevant documentation context
""")

# Sidebar for configuration
with st.sidebar:
    st.header("βš™οΈ Configuration")
    
    # Model selection
    model_choice = st.selectbox(
        "Select Model",
        ["gpt-3.5-turbo", "gpt-4"],
        help="GPT-4 is more accurate but slower and more expensive"
    )
    
    # Temperature slider
    temperature = st.slider(
        "Creativity (Temperature)",
        min_value=0.0,
        max_value=1.0,
        value=0.7,
        help="Higher values make output more creative, lower values more deterministic"
    )
    
    # Advanced options
    with st.expander("Advanced Options"):
        use_rag = st.checkbox("Use RAG (Documentation Context)", value=True)
        auto_test = st.checkbox("Auto-run Tests", value=True)
        show_raw = st.checkbox("Show Raw Responses", value=False)
    
    st.divider()
    
    # System metrics
    st.header("πŸ“Š System Metrics")
    col1, col2 = st.columns(2)
    with col1:
        st.metric("Total Requests", st.session_state.metrics["total_requests"])
    with col2:
        st.metric("Success Rate", 
                 f"{(st.session_state.metrics['successful_generations'] / max(st.session_state.metrics['total_requests'], 1)) * 100:.1f}%")
    
    # Add custom documentation
    st.divider()
    st.header("πŸ“š Add Documentation")
    custom_doc = st.text_area("Add custom documentation for RAG system:")
    if st.button("Add to Knowledge Base") and custom_doc:
        agents = init_agents()
        agents["rag"].add_document(custom_doc, "user")
        st.success("Documentation added successfully!")

# Main input area
st.subheader("πŸ’‘ What code would you like to generate?")

# Example prompts
example_prompts = [
    "Write a function to reverse a string",
    "Create a function that calculates factorial",
    "Write a function to check if a number is prime",
    "Create a function that finds the Fibonacci number at position n",
    "Write a function to sort a list of integers"
]

# Create columns for example buttons
cols = st.columns(3)
for i, prompt in enumerate(example_prompts):
    with cols[i % 3]:
        if st.button(prompt, key=f"example_{i}"):
            st.session_state.user_prompt = prompt

# Text input for custom prompt
user_prompt = st.text_input(
    "Or enter your own prompt:",
    key="user_prompt",
    placeholder="e.g., Write a Python function to validate email addresses"
)

# Initialize agents
agents = init_agents()

# Update coder agent parameters
agents["coder"].model = model_choice
agents["coder"].temperature = temperature

if st.button("πŸš€ Generate Code", type="primary") and user_prompt:
    # Update metrics
    st.session_state.metrics["total_requests"] += 1
    
    # Create progress tracking
    progress_bar = st.progress(0)
    status_text = st.empty()
    
    # Step 1: Get RAG context
    status_text.text("πŸ” Retrieving relevant documentation...")
    context = agents["rag"].get_context(user_prompt) if use_rag else ""
    progress_bar.progress(20)
    
    # Step 2: Generate code
    status_text.text("πŸ’» Generating code with Coder Agent...")
    code_result = agents["coder"].generate_code(user_prompt, context)
    progress_bar.progress(40)
    
    if code_result["status"] == "success":
        # Update success metrics
        st.session_state.metrics["successful_generations"] += 1
        
        # Display generated code
        st.subheader("βœ… Generated Code")
        st.code(code_result["code"], language="python")
        
        # Show raw response if enabled
        if show_raw:
            with st.expander("View Raw Response"):
                st.text(code_result["raw_response"])
        
        # Step 3: Review code
        status_text.text("πŸ” Reviewing code with Reviewer Agent...")
        review_result = agents["reviewer"].comprehensive_review(code_result["code"])
        progress_bar.progress(60)
        
        # Display review results
        st.subheader("πŸ“ Code Review Results")
        
        # Create tabs for different review aspects
        review_tab1, review_tab2, review_tab3 = st.tabs([
            "Overall Score", 
            "Static Analysis", 
            "LLM Suggestions"
        ])
        
        with review_tab1:
            score = review_result["overall_score"]
            st.metric("Overall Quality Score", f"{score:.1f}/10.0")
            
            # Visual gauge
            fig = go.Figure(go.Indicator(
                mode="gauge+number",
                value=score,
                domain={'x': [0, 1], 'y': [0, 1]},
                title={'text': "Code Quality"},
                gauge={
                    'axis': {'range': [0, 10]},
                    'bar': {'color': "darkblue"},
                    'steps': [
                        {'range': [0, 3], 'color': "red"},
                        {'range': [3, 7], 'color': "yellow"},
                        {'range': [7, 10], 'color': "green"}
                    ],
                    'threshold': {
                        'line': {'color': "black", 'width': 4},
                        'thickness': 0.75,
                        'value': score
                    }
                }
            ))
            fig.update_layout(height=200)
            st.plotly_chart(fig, use_container_width=True)
        
        with review_tab2:
            static = review_result["static_analysis"]
            if static["status"] == "success":
                st.write(f"**Pylint Score:** {static.get('score', 0):.1f}/10.0")
                if static.get("issues"):
                    st.write("**Issues Found:**")
                    for issue in static["issues"][:5]:  # Show first 5 issues
                        st.write(f"- {issue.get('message', 'Unknown issue')}")
                else:
                    st.success("No issues found in static analysis!")
            else:
                st.error(f"Static analysis failed: {static.get('error')}")
        
        with review_tab3:
            llm = review_result["llm_review"]
            if llm["status"] == "success":
                st.write(llm["review"])
                if llm.get("suggestions"):
                    st.write("**Key Suggestions:**")
                    for suggestion in llm["suggestions"]:
                        st.write(f"β€’ {suggestion}")
            else:
                st.error(f"LLM review failed: {llm.get('error')}")
        
        # Step 4: Test code if enabled
        if auto_test:
            status_text.text("πŸ§ͺ Testing code with Tester Agent...")
            test_result = agents["tester"].test_code(code_result["code"], user_prompt)
            progress_bar.progress(80)
            
            # Display test results
            st.subheader("πŸ§ͺ Test Results")
            
            # Test metrics
            if test_result["test_results"]["status"] == "success":
                metrics = test_result["test_results"]["metrics"]
                
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.metric("Total Tests", metrics["total_tests"])
                with col2:
                    st.metric("Passed Tests", metrics["passed_tests"])
                with col3:
                    st.metric("Pass Rate", f"{metrics['pass_rate']:.1f}%")
                
                # Test details
                with st.expander("View Test Details"):
                    for i, test_case in enumerate(test_result["test_cases"]):
                        st.write(f"**Test {i+1}:**")
                        st.write(f"- Input: `{test_case['input']}`")
                        st.write(f"- Expected: `{test_case['expected']}`")
                
                # Update average score
                current_avg = st.session_state.metrics["average_test_score"]
                total_req = max(1, st.session_state.metrics["successful_generations"])
                st.session_state.metrics["average_test_score"] = (
                    (current_avg * (total_req - 1) + metrics["pass_rate"]) / total_req
                )
            else:
                st.error(f"Testing failed: {test_result['test_results'].get('error')}")
        
        # Step 5: Log to monitoring
        status_text.text("πŸ“Š Logging results to monitoring system...")
        agents["monitor"].log_generation({
            "timestamp": datetime.now().isoformat(),
            "prompt": user_prompt,
            "code": code_result["code"],
            "model": model_choice,
            "temperature": temperature,
            "review_score": review_result["overall_score"],
            "test_score": test_result.get("test_results", {}).get("metrics", {}).get("pass_rate", 0) if auto_test else None,
            "tokens_used": code_result.get("tokens_used", 0)
        })
        
        # Update conversation history
        st.session_state.conversation.append({
            "role": "user",
            "content": user_prompt,
            "timestamp": datetime.now().strftime("%H:%M:%S")
        })
        st.session_state.conversation.append({
            "role": "assistant",
            "content": f"Generated code with score {review_result['overall_score']:.1f}/10.0",
            "timestamp": datetime.now().strftime("%H:%M:%S")
        })
        
        progress_bar.progress(100)
        status_text.text("βœ… Process completed!")
        
        # Update review score average
        current_avg = st.session_state.metrics["average_review_score"]
        total_req = max(1, st.session_state.metrics["successful_generations"])
        st.session_state.metrics["average_review_score"] = (
            (current_avg * (total_req - 1) + review_result["overall_score"]) / total_req
        )
        
    else:
        st.error(f"Code generation failed: {code_result.get('error')}")
        progress_bar.progress(100)
        status_text.text("❌ Process failed!")

# Conversation history
if st.session_state.conversation:
    st.divider()
    st.subheader("πŸ“œ Conversation History")
    
    for message in reversed(st.session_state.conversation[-10:]):  # Show last 10 messages
        if message["role"] == "user":
            st.markdown(f"**πŸ‘€ You** ({message['timestamp']}):")
            st.info(message["content"])
        else:
            st.markdown(f"**πŸ€– Assistant** ({message['timestamp']}):")
            st.success(message["content"])

# Monitoring dashboard (collapsible)
with st.expander("πŸ“ˆ View Monitoring Dashboard"):
    if st.button("Refresh Dashboard"):
        st.rerun()
    
    metrics_data = agents["monitor"].get_metrics()
    
    if metrics_data:
        # Convert to DataFrame for visualization
        df = pd.DataFrame(metrics_data)
        
        # Create metrics visualization
        col1, col2 = st.columns(2)
        
        with col1:
            if len(df) > 1:
                # Score trend
                fig = px.line(
                    df, 
                    x='timestamp', 
                    y='review_score',
                    title='Code Quality Score Trend',
                    markers=True
                )
                st.plotly_chart(fig, use_container_width=True)
        
        with col2:
            if 'tokens_used' in df.columns:
                # Token usage
                fig = px.bar(
                    df,
                    x='timestamp',
                    y='tokens_used',
                    title='Token Usage per Request'
                )
                st.plotly_chart(fig, use_container_width=True)
        
        # Export data
        st.download_button(
            label="πŸ“₯ Export Metrics Data",
            data=df.to_csv(index=False),
            file_name=f"code_assistant_metrics_{datetime.now().strftime('%Y%m%d')}.csv",
            mime="text/csv"
        )
    else:
        st.info("No metrics data available yet. Generate some code first!")

# Footer
st.divider()
st.markdown("""
---
### 🎯 **How This System Works:**

1. **Multi-Agent Architecture**: Each agent specializes in one task (coding, reviewing, testing)
2. **Agent Communication**: Agents pass structured data between each other
3. **RAG Integration**: Provides context from documentation to reduce hallucinations
4. **Evaluation Pipeline**: Continuous monitoring of code quality and test performance
5. **Incremental Improvement**: Start simple and add features gradually

**Built with**: OpenAI GPT, LangChain, ChromaDB, Streamlit
""")