Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| AI Review Service | |
| This module provides functionality for AI-powered code review using Nebius Qwen2.5-72B-Instruct model. | |
| """ | |
| import os | |
| import logging | |
| import json | |
| import re | |
| import concurrent.futures | |
| from openai import OpenAI | |
| from dotenv import load_dotenv | |
| logger = logging.getLogger(__name__) | |
| # Load environment variables | |
| load_dotenv() | |
| class AIReviewService: | |
| """ | |
| Service for AI-powered code review using Nebius Qwen2.5-72B-Instruct model. | |
| """ | |
| def __init__(self): | |
| """ | |
| Initialize the AIReviewService. | |
| """ | |
| self.api_key = os.getenv('NEBIUS_API_KEY') | |
| if not self.api_key: | |
| logger.warning("NEBIUS_API_KEY not found in environment variables. AI review will not be available.") | |
| else: | |
| self.client = OpenAI( | |
| base_url="https://api.studio.nebius.com/v1/", | |
| api_key=self.api_key | |
| ) | |
| logger.info("Initialized AIReviewService") | |
| def is_available(self): | |
| """ | |
| Check if the AI review service is available. | |
| Returns: | |
| bool: True if the service is available, False otherwise. | |
| """ | |
| return self.api_key is not None | |
| def review_code(self, file_path, file_content, language, context=None): | |
| """ | |
| Review code using Qwen. | |
| Args: | |
| file_path (str): The path to the file being reviewed. | |
| file_content (str): The content of the file being reviewed. | |
| language (str): The programming language of the file. | |
| context (dict, optional): Additional context for the review. | |
| Returns: | |
| dict: The review results. | |
| """ | |
| if not self.is_available(): | |
| return { | |
| 'status': 'error', | |
| 'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.', | |
| 'suggestions': [], | |
| } | |
| logger.info(f"Reviewing {language} code in {file_path}") | |
| # Prepare the prompt for Qwen | |
| prompt = self._prepare_prompt(file_path, file_content, language, context) | |
| try: | |
| # Call Nebius API with Qwen2.5-72B-Instruct model | |
| response = self.client.chat.completions.create( | |
| model="Qwen/Qwen2.5-72B-Instruct", | |
| max_tokens=4000, | |
| temperature=0, | |
| messages=[ | |
| {"role": "system", "content": self._get_system_prompt(language)}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| ) | |
| # Parse the response | |
| review_text = response.choices[0].message.content | |
| suggestions = self._parse_review(review_text) | |
| return { | |
| 'status': 'success', | |
| 'review_text': review_text, | |
| 'suggestions': suggestions, | |
| } | |
| except Exception as e: | |
| logger.error(f"Error calling Qwen API: {e}") | |
| return { | |
| 'status': 'error', | |
| 'error': str(e), | |
| 'suggestions': [], | |
| } | |
| def review_repository(self, repo_path, files, languages, analysis_results=None): | |
| """ | |
| Review a repository using Qwen with parallel processing. | |
| Args: | |
| repo_path (str): The path to the repository. | |
| files (list): A list of files to review. | |
| languages (list): A list of programming languages in the repository. | |
| analysis_results (dict, optional): Results from other analysis tools. | |
| Returns: | |
| dict: The review results. | |
| """ | |
| if not self.is_available(): | |
| return { | |
| 'status': 'error', | |
| 'error': 'AI review service is not available. Please set NEBIUS_API_KEY in environment variables.', | |
| 'reviews': {}, | |
| 'summary': '', | |
| } | |
| logger.info(f"Reviewing repository at {repo_path} with {len(files)} files") | |
| # Limit the number of files to review to avoid excessive API usage | |
| max_files = 20 | |
| if len(files) > max_files: | |
| logger.warning(f"Too many files to review ({len(files)}). Limiting to {max_files} files.") | |
| files = files[:max_files] | |
| # Function to review a single file | |
| def review_file(file_path): | |
| try: | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| file_content = f.read() | |
| # Determine the language based on file extension | |
| _, ext = os.path.splitext(file_path) | |
| language = self._get_language_from_extension(ext) | |
| if language: | |
| # Provide context from analysis results if available | |
| context = None | |
| if analysis_results: | |
| context = self._extract_context_for_file(file_path, analysis_results) | |
| # Review the file | |
| review_result = self.review_code(file_path, file_content, language, context) | |
| return file_path, review_result | |
| return file_path, { | |
| 'status': 'error', | |
| 'error': f'Unsupported language for file {file_path}', | |
| 'suggestions': [], | |
| } | |
| except Exception as e: | |
| logger.error(f"Error reviewing file {file_path}: {e}") | |
| return file_path, { | |
| 'status': 'error', | |
| 'error': str(e), | |
| 'suggestions': [], | |
| } | |
| # Review files in parallel using ThreadPoolExecutor | |
| reviews = {} | |
| with concurrent.futures.ThreadPoolExecutor(max_workers=min(5, len(files))) as executor: | |
| # Submit all file review tasks | |
| future_to_file = {executor.submit(review_file, file_path): file_path for file_path in files} | |
| # Process results as they complete | |
| for future in concurrent.futures.as_completed(future_to_file): | |
| file_path = future_to_file[future] | |
| try: | |
| path, result = future.result() | |
| reviews[path] = result | |
| logger.info(f"Completed review for {path}") | |
| except Exception as e: | |
| logger.error(f"Exception occurred during review of {file_path}: {e}") | |
| reviews[file_path] = { | |
| 'status': 'error', | |
| 'error': str(e), | |
| 'suggestions': [], | |
| } | |
| # Generate a summary of the review | |
| summary = self._generate_repository_summary(repo_path, reviews, languages, analysis_results) | |
| return { | |
| 'status': 'success', | |
| 'reviews': reviews, | |
| 'summary': summary, | |
| } | |
| def _prepare_prompt(self, file_path, file_content, language, context=None): | |
| """ | |
| Prepare a prompt for Qwen. | |
| Args: | |
| file_path (str): The path to the file being reviewed. | |
| file_content (str): The content of the file being reviewed. | |
| language (str): The programming language of the file. | |
| context (dict, optional): Additional context for the review. | |
| Returns: | |
| str: The prompt for Qwen. | |
| """ | |
| prompt = f"""Please review the following {language} code and provide constructive feedback: | |
| File: {file_path} | |
| ```{language} | |
| {file_content} | |
| ``` | |
| """ | |
| if context: | |
| prompt += "Additional context:\n" | |
| if 'issues' in context: | |
| prompt += "\nIssues detected by other tools:\n" | |
| for issue in context['issues']: | |
| prompt += f"- {issue.get('issue', 'Unknown issue')} at line {issue.get('line', 'unknown')}: {issue.get('description', '')}\n" | |
| if 'vulnerabilities' in context: | |
| prompt += "\nSecurity vulnerabilities detected:\n" | |
| for vuln in context['vulnerabilities']: | |
| prompt += f"- {vuln.get('issue', 'Unknown vulnerability')} at line {vuln.get('line', 'unknown')}: {vuln.get('description', '')}\n" | |
| prompt += "\nPlease provide your review with the following sections:\n" | |
| prompt += "1. Code Quality: Assess the overall quality, readability, and maintainability.\n" | |
| prompt += "2. Potential Issues: Identify any bugs, edge cases, or potential problems.\n" | |
| prompt += "3. Security Concerns: Highlight any security vulnerabilities or risks.\n" | |
| prompt += "4. Performance Considerations: Note any performance bottlenecks or inefficiencies.\n" | |
| prompt += "5. Specific Suggestions: Provide concrete, actionable suggestions for improvement.\n" | |
| return prompt | |
| def _get_system_prompt(self, language): | |
| """ | |
| Get the system prompt for Qwen based on the programming language. | |
| Args: | |
| language (str): The programming language. | |
| Returns: | |
| str: The system prompt for Qwen. | |
| """ | |
| base_prompt = """You are an expert code reviewer with deep knowledge of software development best practices, design patterns, and security. | |
| Your task is to review code and provide constructive, actionable feedback. | |
| Be thorough but prioritize the most important issues. | |
| Format your response in markdown with clear sections. | |
| For each suggestion, include the line number, the issue, and a recommended solution. | |
| Focus on: | |
| - Code quality and readability | |
| - Potential bugs and edge cases | |
| - Security vulnerabilities | |
| - Performance optimizations | |
| - Adherence to best practices | |
| Your feedback should be specific, actionable, and educational. Explain why each suggestion matters. | |
| Do not hallucinate vulnerabilities. Base claims on code patterns. | |
| """ | |
| # Add language-specific guidance | |
| if language == 'Python': | |
| base_prompt += "\nFor Python code, pay special attention to PEP 8 compliance, proper exception handling, and Pythonic idioms." | |
| elif language in ['JavaScript', 'TypeScript']: | |
| base_prompt += "\nFor JavaScript/TypeScript code, focus on modern ES6+ practices, proper async handling, and potential type issues." | |
| elif language == 'Java': | |
| base_prompt += "\nFor Java code, examine object-oriented design, proper exception handling, and resource management." | |
| elif language == 'Go': | |
| base_prompt += "\nFor Go code, check for idiomatic Go patterns, proper error handling, and concurrency issues." | |
| elif language == 'Rust': | |
| base_prompt += "\nFor Rust code, verify memory safety, proper use of ownership/borrowing, and idiomatic Rust patterns." | |
| return base_prompt | |
| def _parse_review(self, review_text): | |
| """ | |
| Parse the review text from Qwen to extract structured suggestions. | |
| Args: | |
| review_text (str): The review text from Qwen. | |
| Returns: | |
| list: A list of structured suggestions. | |
| """ | |
| suggestions = [] | |
| # Split the review into sections | |
| sections = review_text.split('##') | |
| for section in sections: | |
| if not section.strip(): | |
| continue | |
| # Extract suggestions from the section | |
| lines = section.strip().split('\n') | |
| section_title = lines[0].strip() | |
| current_suggestion = None | |
| for line in lines[1:]: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| # Check if this line starts a new suggestion (often contains line numbers) | |
| line_number_match = re.search(r'line\s+(\d+)', line, re.IGNORECASE) | |
| if line_number_match or line.startswith('- ') or line.startswith('* '): | |
| # Save the previous suggestion if it exists | |
| if current_suggestion: | |
| suggestions.append(current_suggestion) | |
| # Start a new suggestion | |
| line_number = int(line_number_match.group(1)) if line_number_match else None | |
| current_suggestion = { | |
| 'section': section_title, | |
| 'line': line_number, | |
| 'description': line, | |
| 'details': '', | |
| } | |
| elif current_suggestion: | |
| # Add to the details of the current suggestion | |
| current_suggestion['details'] += line + '\n' | |
| # Add the last suggestion in the section | |
| if current_suggestion: | |
| suggestions.append(current_suggestion) | |
| return suggestions | |
| # The review_repository method is already implemented above | |
| def _get_language_from_extension(self, extension): | |
| """ | |
| Get the programming language from a file extension. | |
| Args: | |
| extension (str): The file extension. | |
| Returns: | |
| str: The programming language, or None if unknown. | |
| """ | |
| extension_to_language = { | |
| '.py': 'Python', | |
| '.js': 'JavaScript', | |
| '.jsx': 'JavaScript', | |
| '.ts': 'TypeScript', | |
| '.tsx': 'TypeScript', | |
| '.java': 'Java', | |
| '.go': 'Go', | |
| '.rs': 'Rust', | |
| '.cpp': 'C++', | |
| '.cc': 'C++', | |
| '.c': 'C', | |
| '.h': 'C', | |
| '.hpp': 'C++', | |
| '.cs': 'C#', | |
| '.php': 'PHP', | |
| '.rb': 'Ruby', | |
| } | |
| return extension_to_language.get(extension.lower()) | |
| def _extract_context_for_file(self, file_path, analysis_results): | |
| """ | |
| Extract relevant context for a file from analysis results. | |
| Args: | |
| file_path (str): The path to the file. | |
| analysis_results (dict): Results from other analysis tools. | |
| Returns: | |
| dict: Context for the file. | |
| """ | |
| context = { | |
| 'issues': [], | |
| 'vulnerabilities': [], | |
| } | |
| # Extract code quality issues | |
| if 'code_analysis' in analysis_results: | |
| for language, language_results in analysis_results['code_analysis'].items(): | |
| for issue in language_results.get('issues', []): | |
| if issue.get('file', '') == file_path: | |
| context['issues'].append(issue) | |
| # Extract security vulnerabilities | |
| if 'security_scan' in analysis_results: | |
| for language, language_results in analysis_results['security_scan'].items(): | |
| for vuln in language_results.get('vulnerabilities', []): | |
| if vuln.get('file', '') == file_path: | |
| context['vulnerabilities'].append(vuln) | |
| # Extract performance issues | |
| if 'performance_analysis' in analysis_results: | |
| for language, language_results in analysis_results['performance_analysis'].get('language_results', {}).items(): | |
| for issue in language_results.get('issues', []): | |
| if issue.get('file', '') == file_path: | |
| context['issues'].append(issue) | |
| return context | |
| def _generate_repository_summary(self, repo_path, reviews, languages, analysis_results=None): | |
| """ | |
| Generate a summary of the repository review. | |
| Args: | |
| repo_path (str): The path to the repository. | |
| reviews (dict): The review results for each file. | |
| languages (list): A list of programming languages in the repository. | |
| analysis_results (dict, optional): Results from other analysis tools. | |
| Returns: | |
| str: A summary of the repository review. | |
| """ | |
| if not self.is_available(): | |
| return "AI review service is not available. Please set ANTHROPIC_API_KEY in environment variables." | |
| # Prepare the prompt for Qwen | |
| prompt = f"""Please provide a summary of the code review for the repository at {repo_path}. | |
| Languages used in the repository: {', '.join(languages)} | |
| """ | |
| # Add information about the files reviewed | |
| prompt += "\nFiles reviewed:\n" | |
| for file_path, review in reviews.items(): | |
| if review.get('status') == 'success': | |
| suggestion_count = len(review.get('suggestions', [])) | |
| prompt += f"- {file_path}: {suggestion_count} suggestions\n" | |
| else: | |
| prompt += f"- {file_path}: Error - {review.get('error', 'Unknown error')}\n" | |
| # Add summary of analysis results if available | |
| if analysis_results: | |
| prompt += "\nAnalysis results summary:\n" | |
| if 'code_analysis' in analysis_results: | |
| total_issues = sum(result.get('issue_count', 0) for result in analysis_results['code_analysis'].values()) | |
| prompt += f"- Code quality issues: {total_issues}\n" | |
| if 'security_scan' in analysis_results: | |
| total_vulns = sum(result.get('vulnerability_count', 0) for result in analysis_results['security_scan'].values()) | |
| prompt += f"- Security vulnerabilities: {total_vulns}\n" | |
| if 'performance_analysis' in analysis_results: | |
| total_perf_issues = sum(result.get('issue_count', 0) for result in analysis_results['performance_analysis'].get('language_results', {}).values()) | |
| prompt += f"- Performance issues: {total_perf_issues}\n" | |
| prompt += "\nPlease provide a comprehensive summary of the code review, including:\n" | |
| prompt += "1. Overall code quality assessment\n" | |
| prompt += "2. Common patterns and issues found across the codebase\n" | |
| prompt += "3. Strengths of the codebase\n" | |
| prompt += "4. Areas for improvement\n" | |
| prompt += "5. Prioritized recommendations\n" | |
| try: | |
| # Call Nebius API with Qwen2.5-72B-Instruct model | |
| response = self.client.chat.completions.create( | |
| model="Qwen/Qwen2.5-72B-Instruct", | |
| max_tokens=4000, | |
| temperature=0, | |
| messages=[ | |
| {"role": "system", "content": "You are an expert code reviewer providing a summary of a repository review. Be concise, insightful, and actionable in your feedback. Format your response in markdown with clear sections."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| logger.error(f"Error generating repository summary: {e}") | |
| return f"Error generating repository summary: {e}" |