"""Lightweight but more accurate code analyzer. Produces a small profile with stylistic hints and simple complexity metrics. """ import re import ast from typing import Dict, Any, List class ComplexityVisitor(ast.NodeVisitor): """Compute cyclomatic complexity-like metric per function.""" def __init__(self): self.current = 0 self.functions = [] # list of (name, complexity, length) self._start_line = None def visit_FunctionDef(self, node: ast.FunctionDef): self.current = 1 # baseline self._start_line = getattr(node, 'lineno', None) self.generic_visit(node) end_line = self._find_end_line(node) length = None if self._start_line and end_line: length = end_line - self._start_line + 1 self.functions.append((node.name, self.current, length)) def visit_If(self, node: ast.If): # each 'if' and 'elif' increases complexity self.current += 1 self.generic_visit(node) def visit_For(self, node: ast.For): self.current += 1 self.generic_visit(node) def visit_While(self, node: ast.While): self.current += 1 self.generic_visit(node) def visit_Try(self, node: ast.Try): # try/except blocks add branching self.current += max(0, len(node.handlers)) self.generic_visit(node) def _find_end_line(self, node: ast.AST): # heuristic: largest lineno in node body max_line = getattr(node, 'lineno', None) for n in ast.walk(node): if hasattr(n, 'lineno'): max_line = max(max_line, n.lineno) return max_line def detect_indentation(lines: List[str]) -> Dict[str, Any]: """Detect whether tabs or spaces and typical indent size.""" tabs = sum(1 for l in lines if l.startswith('\t')) space_indents = [] for l in lines: if l.startswith(' '): # count leading spaces count = len(l) - len(l.lstrip(' ')) if count > 0: space_indents.append(count) if tabs > sum(1 for l in lines if l.startswith(' ')): return {"type": "tabs", "size": None} if space_indents: # common indent size (mode) from collections import Counter cnt = Counter(space_indents) size = cnt.most_common(1)[0][0] return {"type": "spaces", "size": size} return {"type": "unknown", "size": None} def detect_naming(code: str) -> str: # look for snake_case, camelCase, or PascalCase identifiers if re.search(r"\b[a-z_]+_[a-z0-9_]+\b", code): return "snake_case" if re.search(r"\b[a-z]+[A-Z][a-zA-Z0-9]+\b", code): return "camelCase" if re.search(r"\b[A-Z][a-z]+[A-Za-z0-9]+\b", code): return "PascalCase" return "unknown" def analyze_code(code: str) -> Dict[str, Any]: """Return a dictionary summarizing style and simple complexity metrics.""" lines = code.splitlines() result: Dict[str, Any] = {} # indentation result['indentation'] = detect_indentation(lines) # naming result['naming'] = detect_naming(code) # comments comment_lines = [l for l in lines if l.strip().startswith('#')] result['comments'] = { 'count': len(comment_lines), 'density': len(comment_lines) / max(1, len(lines)), } # AST based metrics try: tree = ast.parse(code) funcs = [n for n in ast.walk(tree) if isinstance(n, ast.FunctionDef)] classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)] result['counts'] = {'functions': len(funcs), 'classes': len(classes), 'lines': len(lines)} visitor = ComplexityVisitor() visitor.visit(tree) # average function complexity if visitor.functions: avg_complexity = sum(c for _, c, _ in visitor.functions) / len(visitor.functions) avg_length = sum(l for _, _, l in visitor.functions if l) / len([1 for _, _, l in visitor.functions if l]) else: avg_complexity = 0.0 avg_length = 0.0 result['functions'] = [{'name': n, 'complexity': c, 'length': l} for n, c, l in visitor.functions] result['average_complexity'] = avg_complexity result['average_function_length'] = avg_length except Exception as e: result['counts'] = {'functions': 0, 'classes': 0, 'lines': len(lines)} result['functions'] = [] result['average_complexity'] = 0.0 result['average_function_length'] = 0.0 result['error'] = str(e) # suggestions (lightweight) suggestions = [] if result['comments']['density'] < 0.05: suggestions.append('Add short docstrings or comments for complex functions.') if result['average_complexity'] > 4: suggestions.append('Consider splitting large functions to reduce cyclomatic complexity.') if result['indentation']['type'] == 'unknown': suggestions.append('Indentation not detected consistently; ensure a consistent style (tabs or spaces).') result['suggestions'] = suggestions return result