File size: 6,138 Bytes
673435a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
"""
VoiceForge Code Quality & Complexity Analyzer
----------------------------------------------
Analyzes the codebase for:
- File sizes and line counts (identifies heavy files)
- Cyclomatic complexity (using radon)
- Maintainability index
- Long functions detection
- Import dependency analysis
"""

import os
import ast
import sys
from pathlib import Path
from collections import defaultdict

# Thresholds
MAX_FILE_LINES = 500
MAX_FUNCTION_LINES = 50
MAX_COMPLEXITY = 10  # McCabe Cyclomatic Complexity

def count_lines(file_path: Path) -> tuple[int, int]:
    """Count total lines and code lines (excluding blanks/comments)"""
    total = 0
    code = 0
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            for line in f:
                total += 1
                stripped = line.strip()
                if stripped and not stripped.startswith('#'):
                    code += 1
    except Exception:
        pass
    return total, code

def analyze_functions(file_path: Path) -> list[dict]:
    """Analyze functions in a Python file using AST"""
    functions = []
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            source = f.read()
        tree = ast.parse(source)
        
        for node in ast.walk(tree):
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                func_lines = node.end_lineno - node.lineno + 1
                functions.append({
                    'name': node.name,
                    'line': node.lineno,
                    'lines': func_lines,
                    'is_async': isinstance(node, ast.AsyncFunctionDef),
                    'has_docstring': (
                        isinstance(node.body[0], ast.Expr) and
                        isinstance(node.body[0].value, ast.Constant) and
                        isinstance(node.body[0].value.value, str)
                    ) if node.body else False
                })
    except SyntaxError as e:
        print(f"  ⚠️ Syntax Error in {file_path}: {e}")
    except Exception as e:
        print(f"  ⚠️ Error parsing {file_path}: {e}")
    return functions

def analyze_imports(file_path: Path) -> list[str]:
    """Extract import statements"""
    imports = []
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            source = f.read()
        tree = ast.parse(source)
        
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    imports.append(alias.name)
            elif isinstance(node, ast.ImportFrom):
                module = node.module or ''
                imports.append(module)
    except Exception:
        pass
    return imports

def run_analysis(root_dir: str = "app"):
    """Run full codebase analysis"""
    print("=" * 60)
    print("🔍 VoiceForge Code Quality Analyzer")
    print("=" * 60)
    
    root = Path(root_dir)
    if not root.exists():
        print(f"❌ Directory not found: {root_dir}")
        sys.exit(1)
    
    all_files = list(root.rglob("*.py"))
    print(f"\n📁 Analyzing {len(all_files)} Python files...\n")
    
    heavy_files = []
    long_functions = []
    missing_docstrings = []
    total_lines = 0
    total_code_lines = 0
    total_functions = 0
    dependency_counts = defaultdict(int)
    
    for py_file in all_files:
        if '__pycache__' in str(py_file):
            continue
            
        lines, code = count_lines(py_file)
        total_lines += lines
        total_code_lines += code
        
        relative_path = py_file.relative_to(root)
        
        # Flag heavy files
        if lines > MAX_FILE_LINES:
            heavy_files.append((relative_path, lines))
        
        # Analyze functions
        functions = analyze_functions(py_file)
        total_functions += len(functions)
        
        for func in functions:
            if func['lines'] > MAX_FUNCTION_LINES:
                long_functions.append((relative_path, func['name'], func['lines']))
            if not func['has_docstring'] and not func['name'].startswith('_'):
                missing_docstrings.append((relative_path, func['name']))
        
        # Track imports
        for imp in analyze_imports(py_file):
            dependency_counts[imp.split('.')[0]] += 1
    
    # --- Report ---
    print("📊 SUMMARY")
    print("-" * 40)
    print(f"  Total Files:     {len(all_files)}")
    print(f"  Total Lines:     {total_lines:,}")
    print(f"  Code Lines:      {total_code_lines:,}")
    print(f"  Total Functions: {total_functions}")
    
    print("\n⚠️ HEAVY FILES (>{} lines)".format(MAX_FILE_LINES))
    print("-" * 40)
    if heavy_files:
        for path, lines in sorted(heavy_files, key=lambda x: -x[1]):
            print(f"  ❌ {path}: {lines} lines")
    else:
        print("  ✅ No heavy files found!")
    
    print("\n⚠️ LONG FUNCTIONS (>{} lines)".format(MAX_FUNCTION_LINES))
    print("-" * 40)
    if long_functions:
        for path, name, lines in sorted(long_functions, key=lambda x: -x[2])[:10]:
            print(f"  ❌ {path}:{name}() - {lines} lines")
    else:
        print("  ✅ No excessively long functions!")
    
    print("\n📦 TOP DEPENDENCIES")
    print("-" * 40)
    for dep, count in sorted(dependency_counts.items(), key=lambda x: -x[1])[:15]:
        print(f"  {dep}: {count} imports")
    
    print("\n📝 MISSING DOCSTRINGS (top 10)")
    print("-" * 40)
    for path, name in missing_docstrings[:10]:
        print(f"  {path}:{name}()")
    
    print("\n" + "=" * 60)
    
    # Return status code
    if heavy_files or long_functions:
        print("⚠️ Code Quality: NEEDS ATTENTION")
        return 1
    else:
        print("✅ Code Quality: GOOD")
        return 0

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Analyze VoiceForge codebase")
    parser.add_argument("--path", default="app", help="Root directory to analyze")
    args = parser.parse_args()
    
    sys.exit(run_analysis(args.path))