""" Simple LLM service that works without API keys using basic text processing """ import re from typing import List, Dict class SimpleLLM: """A basic LLM that provides helpful responses about code repositories without requiring API keys""" def __init__(self): self.code_keywords = { 'python': ['def ', 'class ', 'import ', 'from ', '__init__', 'self.'], 'javascript': ['function', 'const ', 'let ', 'var ', 'async', 'await'], 'java': ['public class', 'private ', 'public ', 'static', 'void'], 'cpp': ['#include', 'int main', 'class ', 'namespace'], 'go': ['func ', 'package ', 'import', 'type'], 'rust': ['fn ', 'struct ', 'impl ', 'use ', 'mod '], } def analyze_code_context(self, context: str) -> Dict: """Analyze the code context to extract useful information""" lines = context.split('\n') # Detect programming languages languages = set() for lang, keywords in self.code_keywords.items(): for keyword in keywords: if keyword in context: languages.add(lang) # Extract file paths file_paths = [] for line in lines: if 'path' in line.lower() and ('/' in line or '\\' in line): file_paths.append(line.strip()) # Count functions and classes functions = len(re.findall(r'\bdef\s+\w+|function\s+\w+|func\s+\w+', context)) classes = len(re.findall(r'\bclass\s+\w+', context)) # Extract imports/dependencies imports = re.findall(r'import\s+[\w.]+|from\s+[\w.]+\s+import|#include\s*<[\w.]+>', context) return { 'languages': list(languages), 'file_paths': file_paths[:5], # Limit to 5 paths 'functions_count': functions, 'classes_count': classes, 'imports': imports[:10], # Limit to 10 imports 'total_lines': len(lines) } def generate_response(self, question: str, context: str) -> str: """Generate a helpful response based on the question and code context""" question_lower = question.lower() analysis = self.analyze_code_context(context) # Question type detection and response generation if any(word in question_lower for word in ['what', 'about', 'project', 'repository']): return self._describe_project(analysis, context) elif any(word in question_lower for word in ['structure', 'organized', 'architecture']): return self._describe_structure(analysis, context) elif any(word in question_lower for word in ['function', 'method', 'class']): return self._describe_functions_classes(analysis, context) elif any(word in question_lower for word in ['dependency', 'dependencies', 'import', 'library']): return self._describe_dependencies(analysis, context) elif any(word in question_lower for word in ['test', 'testing']): return self._describe_tests(analysis, context) elif any(word in question_lower for word in ['error', 'exception', 'handling']): return self._describe_error_handling(analysis, context) else: return self._general_response(analysis, context) def _describe_project(self, analysis: Dict, context: str) -> str: languages = ", ".join(analysis['languages']) if analysis['languages'] else "multiple languages" response = f"This project appears to be written in {languages}. " if analysis['classes_count'] > 0: response += f"It contains {analysis['classes_count']} classes " if analysis['functions_count'] > 0: response += f"and {analysis['functions_count']} functions. " if 'api' in context.lower() or 'endpoint' in context.lower(): response += "It appears to be an API or web service. " if 'test' in context.lower(): response += "The project includes test files. " return response.strip() def _describe_structure(self, analysis: Dict, context: str) -> str: response = "The code is organized with the following structure:\n\n" if analysis['file_paths']: response += "**Key files/directories:**\n" for path in analysis['file_paths']: response += f"- {path}\n" response += "\n" if analysis['languages']: response += f"**Languages used:** {', '.join(analysis['languages'])}\n\n" if analysis['classes_count'] > 0: response += f"**Classes found:** {analysis['classes_count']}\n" if analysis['functions_count'] > 0: response += f"**Functions found:** {analysis['functions_count']}\n" return response def _describe_functions_classes(self, analysis: Dict, context: str) -> str: response = "" if analysis['classes_count'] > 0: response += f"Found {analysis['classes_count']} classes in the codebase. " if analysis['functions_count'] > 0: response += f"Found {analysis['functions_count']} functions/methods. " # Extract some actual function/class names from context class_names = re.findall(r'class\s+(\w+)', context) function_names = re.findall(r'def\s+(\w+)|function\s+(\w+)', context) if class_names: response += f"\n\n**Some classes:** {', '.join(class_names[:5])}" if function_names: func_list = [name for group in function_names for name in group if name] response += f"\n\n**Some functions:** {', '.join(func_list[:5])}" return response if response else "No classes or functions clearly identified in the provided context." def _describe_dependencies(self, analysis: Dict, context: str) -> str: if analysis['imports']: response = "**Dependencies and imports found:**\n\n" for imp in analysis['imports']: response += f"- {imp}\n" return response else: return "No clear dependencies or imports identified in the provided context." def _describe_tests(self, analysis: Dict, context: str) -> str: if 'test' in context.lower(): return "Test files appear to be present in this codebase. Look for files with 'test' in their names or directories." else: return "No obvious test files identified in the provided context." def _describe_error_handling(self, analysis: Dict, context: str) -> str: error_patterns = ['try:', 'except:', 'catch', 'throw', 'error', 'exception'] found_patterns = [pattern for pattern in error_patterns if pattern in context.lower()] if found_patterns: return f"Error handling appears to be implemented using: {', '.join(found_patterns)}" else: return "No obvious error handling patterns identified in the provided context." def _general_response(self, analysis: Dict, context: str) -> str: response = "Based on the code context provided:\n\n" if analysis['languages']: response += f"- **Languages:** {', '.join(analysis['languages'])}\n" if analysis['total_lines'] > 0: response += f"- **Code size:** {analysis['total_lines']} lines analyzed\n" if analysis['functions_count'] > 0: response += f"- **Functions:** {analysis['functions_count']} found\n" if analysis['classes_count'] > 0: response += f"- **Classes:** {analysis['classes_count']} found\n" response += "\nFor more specific information, please ask about particular aspects of the code." return response class NoAPILLM: """Wrapper to use SimpleLLM with the same interface as other LLMs""" def __init__(self): self.simple_llm = SimpleLLM() def __call__(self, prompt: str) -> str: # Extract context and question from the prompt if "Context from repository:" in prompt and "Question:" in prompt: parts = prompt.split("Question:") if len(parts) == 2: context_part = parts[0].replace("Context from repository:", "").strip() question_part = parts[1].replace("Answer:", "").strip() return self.simple_llm.generate_response(question_part, context_part) # Fallback for other prompt formats return "I can help analyze code repositories. Please provide specific questions about the codebase structure, functions, dependencies, or other aspects of the code."