import subprocess import tempfile import os import openai from typing import Dict, Any from dotenv import load_dotenv load_dotenv() class ReviewerAgent: """ Agent responsible for reviewing code for quality, style, and potential issues. Uses both static analysis (pylint) and LLM-based review. """ def __init__(self): self.api_key = os.getenv("OPENAI_API_KEY") openai.api_key = self.api_key def static_analysis(self, code: str) -> Dict[str, Any]: """ Perform static code analysis using pylint. Args: code: Python code to analyze Returns: Dictionary with pylint results """ try: # Create a temporary file with the code with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: f.write(code) temp_file_path = f.name # Run pylint on the temporary file result = subprocess.run( ['pylint', temp_file_path, '--output-format=json'], capture_output=True, text=True ) # Clean up temporary file os.unlink(temp_file_path) # Parse pylint output if result.returncode == 0: # Parse JSON output import json try: issues = json.loads(result.stdout) return { "status": "success", "issues": issues, "score": self._calculate_pylint_score(issues), "summary": f"Found {len(issues)} issues" } except: return { "status": "success", "issues": [], "score": 10.0, "summary": "No issues found" } else: return { "status": "error", "error": result.stderr, "issues": [] } except Exception as e: return { "status": "error", "error": str(e), "issues": [] } def _calculate_pylint_score(self, issues: list) -> float: """Calculate a normalized score from pylint issues.""" if not issues: return 10.0 # Count issues by type error_count = sum(1 for issue in issues if issue.get('type') == 'error') warning_count = sum(1 for issue in issues if issue.get('type') == 'warning') # Simple scoring: start from 10 and deduct points score = 10.0 score -= error_count * 0.5 score -= warning_count * 0.1 return max(0, min(10, score)) def llm_review(self, code: str) -> Dict[str, Any]: """ Use LLM to review code for logical errors, improvements, and best practices. Args: code: Python code to review Returns: Dictionary with LLM review results """ try: system_message = """You are an expert code reviewer. Analyze the code for: 1. Logical errors 2. Security issues 3. Performance improvements 4. Code style and best practices 5. Edge cases not handled Provide specific, actionable feedback.""" response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": system_message}, {"role": "user", "content": f"Review this code:\n\n{code}"} ], temperature=0.3, max_tokens=300 ) review_text = response.choices[0].message.content # Extract key points import re suggestions = re.findall(r'[-•]\s*(.*?)(?=\n\n|\Z)', review_text, re.DOTALL) return { "status": "success", "review": review_text, "suggestions": suggestions, "tokens_used": response.usage.total_tokens } except Exception as e: return { "status": "error", "error": str(e), "review": "" } def comprehensive_review(self, code: str) -> Dict[str, Any]: """ Combine static analysis and LLM review for comprehensive feedback. Args: code: Python code to review Returns: Complete review results """ static_result = self.static_analysis(code) llm_result = self.llm_review(code) return { "static_analysis": static_result, "llm_review": llm_result, "overall_score": self._calculate_overall_score(static_result, llm_result) } def _calculate_overall_score(self, static: Dict, llm: Dict) -> float: """Calculate an overall code quality score.""" if static.get("status") != "success": return 0.0 static_score = static.get("score", 0.0) # LLM review doesn't give numeric score, so we estimate based on suggestions llm_suggestions = len(llm.get("suggestions", [])) llm_score = max(0, 10 - llm_suggestions * 0.5) # Weighted average: 70% static analysis, 30% LLM review return static_score * 0.7 + llm_score * 0.3