File size: 5,120 Bytes
5cb6ded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""Lightweight but more accurate code analyzer.

Produces a small profile with stylistic hints and simple complexity metrics.
"""

import re
import ast
from typing import Dict, Any, List


class ComplexityVisitor(ast.NodeVisitor):
    """Compute cyclomatic complexity-like metric per function."""

    def __init__(self):
        self.current = 0
        self.functions = []  # list of (name, complexity, length)
        self._start_line = None

    def visit_FunctionDef(self, node: ast.FunctionDef):
        self.current = 1  # baseline
        self._start_line = getattr(node, 'lineno', None)
        self.generic_visit(node)
        end_line = self._find_end_line(node)
        length = None
        if self._start_line and end_line:
            length = end_line - self._start_line + 1
        self.functions.append((node.name, self.current, length))

    def visit_If(self, node: ast.If):
        # each 'if' and 'elif' increases complexity
        self.current += 1
        self.generic_visit(node)

    def visit_For(self, node: ast.For):
        self.current += 1
        self.generic_visit(node)

    def visit_While(self, node: ast.While):
        self.current += 1
        self.generic_visit(node)

    def visit_Try(self, node: ast.Try):
        # try/except blocks add branching
        self.current += max(0, len(node.handlers))
        self.generic_visit(node)

    def _find_end_line(self, node: ast.AST):
        # heuristic: largest lineno in node body
        max_line = getattr(node, 'lineno', None)
        for n in ast.walk(node):
            if hasattr(n, 'lineno'):
                max_line = max(max_line, n.lineno)
        return max_line


def detect_indentation(lines: List[str]) -> Dict[str, Any]:
    """Detect whether tabs or spaces and typical indent size."""
    tabs = sum(1 for l in lines if l.startswith('\t'))
    space_indents = []
    for l in lines:
        if l.startswith(' '):
            # count leading spaces
            count = len(l) - len(l.lstrip(' '))
            if count > 0:
                space_indents.append(count)
    if tabs > sum(1 for l in lines if l.startswith(' ')):
        return {"type": "tabs", "size": None}
    if space_indents:
        # common indent size (mode)
        from collections import Counter

        cnt = Counter(space_indents)
        size = cnt.most_common(1)[0][0]
        return {"type": "spaces", "size": size}
    return {"type": "unknown", "size": None}


def detect_naming(code: str) -> str:
    # look for snake_case, camelCase, or PascalCase identifiers
    if re.search(r"\b[a-z_]+_[a-z0-9_]+\b", code):
        return "snake_case"
    if re.search(r"\b[a-z]+[A-Z][a-zA-Z0-9]+\b", code):
        return "camelCase"
    if re.search(r"\b[A-Z][a-z]+[A-Za-z0-9]+\b", code):
        return "PascalCase"
    return "unknown"


def analyze_code(code: str) -> Dict[str, Any]:
    """Return a dictionary summarizing style and simple complexity metrics."""
    lines = code.splitlines()
    result: Dict[str, Any] = {}

    # indentation
    result['indentation'] = detect_indentation(lines)

    # naming
    result['naming'] = detect_naming(code)

    # comments
    comment_lines = [l for l in lines if l.strip().startswith('#')]
    result['comments'] = {
        'count': len(comment_lines),
        'density': len(comment_lines) / max(1, len(lines)),
    }

    # AST based metrics
    try:
        tree = ast.parse(code)
        funcs = [n for n in ast.walk(tree) if isinstance(n, ast.FunctionDef)]
        classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)]
        result['counts'] = {'functions': len(funcs), 'classes': len(classes), 'lines': len(lines)}

        visitor = ComplexityVisitor()
        visitor.visit(tree)
        # average function complexity
        if visitor.functions:
            avg_complexity = sum(c for _, c, _ in visitor.functions) / len(visitor.functions)
            avg_length = sum(l for _, _, l in visitor.functions if l) / len([1 for _, _, l in visitor.functions if l])
        else:
            avg_complexity = 0.0
            avg_length = 0.0
        result['functions'] = [{'name': n, 'complexity': c, 'length': l} for n, c, l in visitor.functions]
        result['average_complexity'] = avg_complexity
        result['average_function_length'] = avg_length
    except Exception as e:
        result['counts'] = {'functions': 0, 'classes': 0, 'lines': len(lines)}
        result['functions'] = []
        result['average_complexity'] = 0.0
        result['average_function_length'] = 0.0
        result['error'] = str(e)

    # suggestions (lightweight)
    suggestions = []
    if result['comments']['density'] < 0.05:
        suggestions.append('Add short docstrings or comments for complex functions.')
    if result['average_complexity'] > 4:
        suggestions.append('Consider splitting large functions to reduce cyclomatic complexity.')
    if result['indentation']['type'] == 'unknown':
        suggestions.append('Indentation not detected consistently; ensure a consistent style (tabs or spaces).')

    result['suggestions'] = suggestions
    return result