| | """
|
| | PYTHON CODE ANALYZER & IMPROVER
|
| | Analyzes Python code, learns patterns, generates better Python code
|
| | """
|
| |
|
| | import ast
|
| | import re
|
| | import logging
|
| | from typing import Dict, List, Optional, Tuple, Any
|
| | import json
|
| | import os
|
| |
|
| | logger = logging.getLogger(__name__)
|
| |
|
| | class PythonAnalyzer:
|
| | """Analyzes and learns from Python code"""
|
| |
|
| | def __init__(self):
|
| | self.learned_patterns = {}
|
| | self.code_quality_metrics = {}
|
| | self.best_practices = self._initialize_best_practices()
|
| | self.load_learned_patterns()
|
| |
|
| | def _initialize_best_practices(self) -> Dict:
|
| | """Initialize Python best practices database"""
|
| | return {
|
| | 'naming_conventions': {
|
| | 'functions': r'^[a-z_][a-z0-9_]*$',
|
| | 'classes': r'^[A-Z][a-zA-Z0-9]*$',
|
| | 'constants': r'^[A-Z_][A-Z0-9_]*$',
|
| | 'variables': r'^[a-z_][a-z0-9_]*$',
|
| | },
|
| | 'anti_patterns': [
|
| | {'pattern': r'import \*', 'message': 'Avoid wildcard imports'},
|
| | {'pattern': r'except:', 'message': 'Avoid bare except clauses'},
|
| | {'pattern': r'== True', 'message': 'Use "if value:" instead of "== True"'},
|
| | {'pattern': r'== False', 'message': 'Use "if not value:" instead of "== False"'},
|
| | {'pattern': r'== None', 'message': 'Use "if value is None:" instead'},
|
| | ],
|
| | 'style_checks': [
|
| | 'Use 4 spaces for indentation',
|
| | 'Use list comprehensions instead of loops',
|
| | 'Use context managers (with statements)',
|
| | 'Add docstrings to all functions',
|
| | 'Use type hints for function parameters',
|
| | ]
|
| | }
|
| |
|
| | def analyze_python_code(self, code: str) -> Dict:
|
| | """Comprehensive analysis of Python code"""
|
| |
|
| | analysis = {
|
| | 'syntax_valid': False,
|
| | 'structure': {},
|
| | 'quality_score': 0.0,
|
| | 'issues': [],
|
| | 'suggestions': [],
|
| | 'complexity': 0,
|
| | }
|
| |
|
| | try:
|
| | tree = ast.parse(code)
|
| | analysis['syntax_valid'] = True
|
| |
|
| |
|
| | analysis['structure'] = self._extract_structure(tree)
|
| |
|
| |
|
| | analysis['issues'] = self._check_code_issues(code)
|
| |
|
| |
|
| | analysis['suggestions'] = self._generate_suggestions(code, tree)
|
| |
|
| |
|
| | analysis['complexity'] = self._calculate_complexity(tree)
|
| |
|
| |
|
| | analysis['quality_score'] = self._calculate_quality_score(analysis)
|
| |
|
| |
|
| | self._learn_from_code(code, analysis)
|
| |
|
| | except SyntaxError as e:
|
| | analysis['syntax_valid'] = False
|
| | analysis['issues'].append(f"Syntax Error: {str(e)}")
|
| | except Exception as e:
|
| | analysis['issues'].append(f"Analysis Error: {str(e)}")
|
| |
|
| | return analysis
|
| |
|
| | def _extract_structure(self, tree: ast.AST) -> Dict:
|
| | """Extract code structure"""
|
| | structure = {
|
| | 'functions': [],
|
| | 'classes': [],
|
| | 'imports': [],
|
| | 'global_vars': []
|
| | }
|
| |
|
| | for node in ast.walk(tree):
|
| | if isinstance(node, ast.FunctionDef):
|
| | structure['functions'].append({
|
| | 'name': node.name,
|
| | 'args': len(node.args.args),
|
| | 'lines': node.end_lineno - node.lineno if node.end_lineno else 0,
|
| | 'docstring': ast.get_docstring(node) is not None
|
| | })
|
| | elif isinstance(node, ast.ClassDef):
|
| | structure['classes'].append({
|
| | 'name': node.name,
|
| | 'methods': len([n for n in node.body if isinstance(n, ast.FunctionDef)]),
|
| | 'has_init': any(isinstance(n, ast.FunctionDef) and n.name == '__init__' for n in node.body)
|
| | })
|
| | elif isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom):
|
| | structure['imports'].append(self._extract_import(node))
|
| |
|
| | return structure
|
| |
|
| | def _extract_import(self, node) -> str:
|
| | """Extract import statement"""
|
| | if isinstance(node, ast.Import):
|
| | return ', '.join(alias.name for alias in node.names)
|
| | elif isinstance(node, ast.ImportFrom):
|
| | module = node.module or ''
|
| | names = ', '.join(alias.name for alias in node.names)
|
| | return f"from {module} import {names}"
|
| | return ""
|
| |
|
| | def _check_code_issues(self, code: str) -> List[str]:
|
| | """Check for code issues"""
|
| | issues = []
|
| |
|
| |
|
| | for anti_pattern in self.best_practices['anti_patterns']:
|
| | if re.search(anti_pattern['pattern'], code):
|
| | issues.append(anti_pattern['message'])
|
| |
|
| |
|
| | functions = re.findall(r'def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(', code)
|
| | for func_name in functions:
|
| | if not re.match(self.best_practices['naming_conventions']['functions'], func_name):
|
| | issues.append(f"Function '{func_name}' doesn't follow naming convention")
|
| |
|
| |
|
| | if 'def ' in code and '"""' not in code and "'''" not in code:
|
| | issues.append("Missing docstrings in functions")
|
| |
|
| | return issues
|
| |
|
| | def _generate_suggestions(self, code: str, tree: ast.AST) -> List[str]:
|
| | """Generate improvement suggestions"""
|
| | suggestions = []
|
| |
|
| |
|
| | for node in ast.walk(tree):
|
| | if isinstance(node, ast.For):
|
| | suggestions.append("Consider using list comprehension instead of for loop")
|
| |
|
| |
|
| | for node in ast.walk(tree):
|
| | if isinstance(node, ast.ExceptHandler) and node.type is None:
|
| | suggestions.append("Specify exception type instead of bare except")
|
| |
|
| |
|
| | for node in ast.walk(tree):
|
| | if isinstance(node, ast.FunctionDef) and not node.returns:
|
| | suggestions.append(f"Add return type hint to function '{node.name}'")
|
| |
|
| | return suggestions
|
| |
|
| | def _calculate_complexity(self, tree: ast.AST) -> int:
|
| | """Calculate cyclomatic complexity"""
|
| | complexity = 1
|
| | for node in ast.walk(tree):
|
| | if isinstance(node, (ast.If, ast.While, ast.For, ast.ExceptHandler)):
|
| | complexity += 1
|
| | return complexity
|
| |
|
| | def _calculate_quality_score(self, analysis: Dict) -> float:
|
| | """Calculate overall code quality score (0-100)"""
|
| | score = 100.0
|
| |
|
| |
|
| | score -= len(analysis['issues']) * 10
|
| |
|
| |
|
| | if analysis['complexity'] > 5:
|
| | score -= (analysis['complexity'] - 5) * 2
|
| |
|
| |
|
| | structure = analysis['structure']
|
| | if structure['functions']:
|
| | score += min(5, len(structure['functions']))
|
| | if structure['classes']:
|
| | score += min(5, len(structure['classes']))
|
| |
|
| | return max(0, min(100, score))
|
| |
|
| | def _learn_from_code(self, code: str, analysis: Dict):
|
| | """Learn patterns from Python code"""
|
| |
|
| |
|
| | key = f"pattern_{hash(code) % 10000}"
|
| | self.learned_patterns[key] = {
|
| | 'code_snippet': code[:500],
|
| | 'quality': analysis['quality_score'],
|
| | 'complexity': analysis['complexity'],
|
| | 'structure': analysis['structure'],
|
| | 'issues': analysis['issues']
|
| | }
|
| |
|
| | def improve_python_code(self, code: str) -> Dict:
|
| | """Generate improved version of Python code"""
|
| |
|
| | analysis = self.analyze_python_code(code)
|
| |
|
| | if not analysis['syntax_valid']:
|
| | return {
|
| | 'success': False,
|
| | 'error': 'Code has syntax errors',
|
| | 'original': code
|
| | }
|
| |
|
| | improved = code
|
| | improvements = []
|
| |
|
| |
|
| |
|
| |
|
| | if 'Avoid bare except clauses' in analysis['issues']:
|
| | improved = improved.replace('except:', 'except Exception:')
|
| | improvements.append('Fixed bare except clause')
|
| |
|
| |
|
| | if 'Missing docstrings in functions' in analysis['issues']:
|
| | improved = self._add_docstrings(improved)
|
| | improvements.append('Added docstrings')
|
| |
|
| |
|
| | if any('list comprehension' in s for s in analysis['suggestions']):
|
| | improved = self._suggest_list_comprehensions(improved)
|
| | improvements.append('Suggested list comprehensions')
|
| |
|
| |
|
| | improved = self._add_type_hints(improved)
|
| | improvements.append('Added type hints')
|
| |
|
| | return {
|
| | 'success': True,
|
| | 'original': code,
|
| | 'improved': improved,
|
| | 'analysis': analysis,
|
| | 'improvements': improvements,
|
| | 'quality_before': 0,
|
| | 'quality_after': analysis['quality_score']
|
| | }
|
| |
|
| | def _add_docstrings(self, code: str) -> str:
|
| | """Add docstrings to functions"""
|
| | improved = code
|
| | functions = re.findall(r'def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\((.*?)\):', code)
|
| |
|
| | for func_name, args in functions:
|
| | if '"""' not in code:
|
| | docstring = f'\n """{func_name}.\n \n Args:\n {args}\n """\n '
|
| | improved = improved.replace(
|
| | f'def {func_name}({args}):',
|
| | f'def {func_name}({args}):{docstring}'
|
| | )
|
| |
|
| | return improved
|
| |
|
| | def _add_type_hints(self, code: str) -> str:
|
| | """Add type hints to functions"""
|
| | improved = code
|
| |
|
| |
|
| | improved = re.sub(
|
| | r'def\s+(\w+)\s*\(([^)]*)\)\s*:',
|
| | r'def \1(\2) -> Any:',
|
| | improved
|
| | )
|
| |
|
| | return improved
|
| |
|
| | def _suggest_list_comprehensions(self, code: str) -> str:
|
| | """Suggest list comprehensions instead of loops"""
|
| |
|
| | return code
|
| |
|
| | def generate_python_from_requirement(self, requirement: str) -> Dict:
|
| | """Generate Python code from natural language requirement"""
|
| |
|
| | templates = {
|
| | 'function': '''def {name}({args}):
|
| | """{description}"""
|
| | # TODO: Implement function
|
| | pass''',
|
| |
|
| | 'class': '''class {name}:
|
| | """Class for {description}"""
|
| |
|
| | def __init__(self):
|
| | """Initialize {name}"""
|
| | pass
|
| |
|
| | def method(self):
|
| | """Class method"""
|
| | pass''',
|
| |
|
| | 'loop': '''for item in items:
|
| | # Process item
|
| | print(item)''',
|
| |
|
| | 'request': '''import requests
|
| |
|
| | def fetch_data(url):
|
| | """Fetch data from URL"""
|
| | try:
|
| | response = requests.get(url)
|
| | response.raise_for_status()
|
| | return response.json()
|
| | except requests.RequestException as e:
|
| | print(f"Error: {e}")
|
| | return None''',
|
| |
|
| | 'file_operation': '''def read_file(filepath):
|
| | """Read file safely"""
|
| | try:
|
| | with open(filepath, 'r', encoding='utf-8') as f:
|
| | return f.read()
|
| | except FileNotFoundError:
|
| | print(f"File not found: {filepath}")
|
| | return None''',
|
| | }
|
| |
|
| |
|
| | requirement_lower = requirement.lower()
|
| |
|
| | selected_template = 'function'
|
| |
|
| | if 'class' in requirement_lower:
|
| | selected_template = 'class'
|
| | elif 'loop' in requirement_lower or 'iterate' in requirement_lower:
|
| | selected_template = 'loop'
|
| | elif 'request' in requirement_lower or 'fetch' in requirement_lower or 'api' in requirement_lower:
|
| | selected_template = 'request'
|
| | elif 'file' in requirement_lower or 'read' in requirement_lower or 'write' in requirement_lower:
|
| | selected_template = 'file_operation'
|
| |
|
| | code_template = templates[selected_template]
|
| |
|
| | return {
|
| | 'success': True,
|
| | 'code': code_template,
|
| | 'template_used': selected_template,
|
| | 'requirement': requirement,
|
| | 'quality': 'template',
|
| | 'needs_customization': True
|
| | }
|
| |
|
| | def load_learned_patterns(self):
|
| | """Load previously learned patterns"""
|
| | try:
|
| | pattern_file = 'noahski_data/python_patterns.json'
|
| | if os.path.exists(pattern_file):
|
| | with open(pattern_file, 'r', encoding='utf-8') as f:
|
| | self.learned_patterns = json.load(f)
|
| | logger.info(f"Loaded {len(self.learned_patterns)} Python patterns")
|
| | except Exception as e:
|
| | logger.error(f"Error loading patterns: {e}")
|
| |
|
| | def save_learned_patterns(self):
|
| | """Save learned patterns for future use"""
|
| | try:
|
| | os.makedirs('noahski_data', exist_ok=True)
|
| | pattern_file = 'noahski_data/python_patterns.json'
|
| | with open(pattern_file, 'w', encoding='utf-8') as f:
|
| | json.dump(self.learned_patterns, f, indent=2)
|
| | logger.info(f"Saved {len(self.learned_patterns)} Python patterns")
|
| | except Exception as e:
|
| | logger.error(f"Error saving patterns: {e}")
|
| |
|
| |
|
| |
|
| | _python_analyzer = None
|
| |
|
| | def get_python_analyzer() -> PythonAnalyzer:
|
| | """Get or create global Python analyzer"""
|
| | global _python_analyzer
|
| | if _python_analyzer is None:
|
| | _python_analyzer = PythonAnalyzer()
|
| | return _python_analyzer
|
| |
|