Human-AII / ai /code_analyzer.py
swayamshetkar
Backend with Llama auto-download, Dockerfile, deployment setup
5cb6ded
"""Lightweight but more accurate code analyzer.
Produces a small profile with stylistic hints and simple complexity metrics.
"""
import re
import ast
from typing import Dict, Any, List
class ComplexityVisitor(ast.NodeVisitor):
"""Compute cyclomatic complexity-like metric per function."""
def __init__(self):
self.current = 0
self.functions = [] # list of (name, complexity, length)
self._start_line = None
def visit_FunctionDef(self, node: ast.FunctionDef):
self.current = 1 # baseline
self._start_line = getattr(node, 'lineno', None)
self.generic_visit(node)
end_line = self._find_end_line(node)
length = None
if self._start_line and end_line:
length = end_line - self._start_line + 1
self.functions.append((node.name, self.current, length))
def visit_If(self, node: ast.If):
# each 'if' and 'elif' increases complexity
self.current += 1
self.generic_visit(node)
def visit_For(self, node: ast.For):
self.current += 1
self.generic_visit(node)
def visit_While(self, node: ast.While):
self.current += 1
self.generic_visit(node)
def visit_Try(self, node: ast.Try):
# try/except blocks add branching
self.current += max(0, len(node.handlers))
self.generic_visit(node)
def _find_end_line(self, node: ast.AST):
# heuristic: largest lineno in node body
max_line = getattr(node, 'lineno', None)
for n in ast.walk(node):
if hasattr(n, 'lineno'):
max_line = max(max_line, n.lineno)
return max_line
def detect_indentation(lines: List[str]) -> Dict[str, Any]:
"""Detect whether tabs or spaces and typical indent size."""
tabs = sum(1 for l in lines if l.startswith('\t'))
space_indents = []
for l in lines:
if l.startswith(' '):
# count leading spaces
count = len(l) - len(l.lstrip(' '))
if count > 0:
space_indents.append(count)
if tabs > sum(1 for l in lines if l.startswith(' ')):
return {"type": "tabs", "size": None}
if space_indents:
# common indent size (mode)
from collections import Counter
cnt = Counter(space_indents)
size = cnt.most_common(1)[0][0]
return {"type": "spaces", "size": size}
return {"type": "unknown", "size": None}
def detect_naming(code: str) -> str:
# look for snake_case, camelCase, or PascalCase identifiers
if re.search(r"\b[a-z_]+_[a-z0-9_]+\b", code):
return "snake_case"
if re.search(r"\b[a-z]+[A-Z][a-zA-Z0-9]+\b", code):
return "camelCase"
if re.search(r"\b[A-Z][a-z]+[A-Za-z0-9]+\b", code):
return "PascalCase"
return "unknown"
def analyze_code(code: str) -> Dict[str, Any]:
"""Return a dictionary summarizing style and simple complexity metrics."""
lines = code.splitlines()
result: Dict[str, Any] = {}
# indentation
result['indentation'] = detect_indentation(lines)
# naming
result['naming'] = detect_naming(code)
# comments
comment_lines = [l for l in lines if l.strip().startswith('#')]
result['comments'] = {
'count': len(comment_lines),
'density': len(comment_lines) / max(1, len(lines)),
}
# AST based metrics
try:
tree = ast.parse(code)
funcs = [n for n in ast.walk(tree) if isinstance(n, ast.FunctionDef)]
classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)]
result['counts'] = {'functions': len(funcs), 'classes': len(classes), 'lines': len(lines)}
visitor = ComplexityVisitor()
visitor.visit(tree)
# average function complexity
if visitor.functions:
avg_complexity = sum(c for _, c, _ in visitor.functions) / len(visitor.functions)
avg_length = sum(l for _, _, l in visitor.functions if l) / len([1 for _, _, l in visitor.functions if l])
else:
avg_complexity = 0.0
avg_length = 0.0
result['functions'] = [{'name': n, 'complexity': c, 'length': l} for n, c, l in visitor.functions]
result['average_complexity'] = avg_complexity
result['average_function_length'] = avg_length
except Exception as e:
result['counts'] = {'functions': 0, 'classes': 0, 'lines': len(lines)}
result['functions'] = []
result['average_complexity'] = 0.0
result['average_function_length'] = 0.0
result['error'] = str(e)
# suggestions (lightweight)
suggestions = []
if result['comments']['density'] < 0.05:
suggestions.append('Add short docstrings or comments for complex functions.')
if result['average_complexity'] > 4:
suggestions.append('Consider splitting large functions to reduce cyclomatic complexity.')
if result['indentation']['type'] == 'unknown':
suggestions.append('Indentation not detected consistently; ensure a consistent style (tabs or spaces).')
result['suggestions'] = suggestions
return result