voiceforge / backend /tests /quality /analyze_codebase.py
lordofgaming
Initial VoiceForge deployment (clean)
673435a
"""
VoiceForge Code Quality & Complexity Analyzer
----------------------------------------------
Analyzes the codebase for:
- File sizes and line counts (identifies heavy files)
- Cyclomatic complexity (using radon)
- Maintainability index
- Long functions detection
- Import dependency analysis
"""
import os
import ast
import sys
from pathlib import Path
from collections import defaultdict
# Thresholds
MAX_FILE_LINES = 500
MAX_FUNCTION_LINES = 50
MAX_COMPLEXITY = 10 # McCabe Cyclomatic Complexity
def count_lines(file_path: Path) -> tuple[int, int]:
"""Count total lines and code lines (excluding blanks/comments)"""
total = 0
code = 0
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
total += 1
stripped = line.strip()
if stripped and not stripped.startswith('#'):
code += 1
except Exception:
pass
return total, code
def analyze_functions(file_path: Path) -> list[dict]:
"""Analyze functions in a Python file using AST"""
functions = []
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
source = f.read()
tree = ast.parse(source)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
func_lines = node.end_lineno - node.lineno + 1
functions.append({
'name': node.name,
'line': node.lineno,
'lines': func_lines,
'is_async': isinstance(node, ast.AsyncFunctionDef),
'has_docstring': (
isinstance(node.body[0], ast.Expr) and
isinstance(node.body[0].value, ast.Constant) and
isinstance(node.body[0].value.value, str)
) if node.body else False
})
except SyntaxError as e:
print(f" ⚠️ Syntax Error in {file_path}: {e}")
except Exception as e:
print(f" ⚠️ Error parsing {file_path}: {e}")
return functions
def analyze_imports(file_path: Path) -> list[str]:
"""Extract import statements"""
imports = []
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
source = f.read()
tree = ast.parse(source)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
imports.append(alias.name)
elif isinstance(node, ast.ImportFrom):
module = node.module or ''
imports.append(module)
except Exception:
pass
return imports
def run_analysis(root_dir: str = "app"):
"""Run full codebase analysis"""
print("=" * 60)
print("🔍 VoiceForge Code Quality Analyzer")
print("=" * 60)
root = Path(root_dir)
if not root.exists():
print(f"❌ Directory not found: {root_dir}")
sys.exit(1)
all_files = list(root.rglob("*.py"))
print(f"\n📁 Analyzing {len(all_files)} Python files...\n")
heavy_files = []
long_functions = []
missing_docstrings = []
total_lines = 0
total_code_lines = 0
total_functions = 0
dependency_counts = defaultdict(int)
for py_file in all_files:
if '__pycache__' in str(py_file):
continue
lines, code = count_lines(py_file)
total_lines += lines
total_code_lines += code
relative_path = py_file.relative_to(root)
# Flag heavy files
if lines > MAX_FILE_LINES:
heavy_files.append((relative_path, lines))
# Analyze functions
functions = analyze_functions(py_file)
total_functions += len(functions)
for func in functions:
if func['lines'] > MAX_FUNCTION_LINES:
long_functions.append((relative_path, func['name'], func['lines']))
if not func['has_docstring'] and not func['name'].startswith('_'):
missing_docstrings.append((relative_path, func['name']))
# Track imports
for imp in analyze_imports(py_file):
dependency_counts[imp.split('.')[0]] += 1
# --- Report ---
print("📊 SUMMARY")
print("-" * 40)
print(f" Total Files: {len(all_files)}")
print(f" Total Lines: {total_lines:,}")
print(f" Code Lines: {total_code_lines:,}")
print(f" Total Functions: {total_functions}")
print("\n⚠️ HEAVY FILES (>{} lines)".format(MAX_FILE_LINES))
print("-" * 40)
if heavy_files:
for path, lines in sorted(heavy_files, key=lambda x: -x[1]):
print(f" ❌ {path}: {lines} lines")
else:
print(" ✅ No heavy files found!")
print("\n⚠️ LONG FUNCTIONS (>{} lines)".format(MAX_FUNCTION_LINES))
print("-" * 40)
if long_functions:
for path, name, lines in sorted(long_functions, key=lambda x: -x[2])[:10]:
print(f" ❌ {path}:{name}() - {lines} lines")
else:
print(" ✅ No excessively long functions!")
print("\n📦 TOP DEPENDENCIES")
print("-" * 40)
for dep, count in sorted(dependency_counts.items(), key=lambda x: -x[1])[:15]:
print(f" {dep}: {count} imports")
print("\n📝 MISSING DOCSTRINGS (top 10)")
print("-" * 40)
for path, name in missing_docstrings[:10]:
print(f" {path}:{name}()")
print("\n" + "=" * 60)
# Return status code
if heavy_files or long_functions:
print("⚠️ Code Quality: NEEDS ATTENTION")
return 1
else:
print("✅ Code Quality: GOOD")
return 0
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Analyze VoiceForge codebase")
parser.add_argument("--path", default="app", help="Root directory to analyze")
args = parser.parse_args()
sys.exit(run_analysis(args.path))