Spaces:
Sleeping
Sleeping
File size: 6,138 Bytes
673435a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 |
"""
VoiceForge Code Quality & Complexity Analyzer
----------------------------------------------
Analyzes the codebase for:
- File sizes and line counts (identifies heavy files)
- Cyclomatic complexity (using radon)
- Maintainability index
- Long functions detection
- Import dependency analysis
"""
import os
import ast
import sys
from pathlib import Path
from collections import defaultdict
# Thresholds
MAX_FILE_LINES = 500
MAX_FUNCTION_LINES = 50
MAX_COMPLEXITY = 10 # McCabe Cyclomatic Complexity
def count_lines(file_path: Path) -> tuple[int, int]:
"""Count total lines and code lines (excluding blanks/comments)"""
total = 0
code = 0
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
for line in f:
total += 1
stripped = line.strip()
if stripped and not stripped.startswith('#'):
code += 1
except Exception:
pass
return total, code
def analyze_functions(file_path: Path) -> list[dict]:
"""Analyze functions in a Python file using AST"""
functions = []
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
source = f.read()
tree = ast.parse(source)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
func_lines = node.end_lineno - node.lineno + 1
functions.append({
'name': node.name,
'line': node.lineno,
'lines': func_lines,
'is_async': isinstance(node, ast.AsyncFunctionDef),
'has_docstring': (
isinstance(node.body[0], ast.Expr) and
isinstance(node.body[0].value, ast.Constant) and
isinstance(node.body[0].value.value, str)
) if node.body else False
})
except SyntaxError as e:
print(f" ⚠️ Syntax Error in {file_path}: {e}")
except Exception as e:
print(f" ⚠️ Error parsing {file_path}: {e}")
return functions
def analyze_imports(file_path: Path) -> list[str]:
"""Extract import statements"""
imports = []
try:
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
source = f.read()
tree = ast.parse(source)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
imports.append(alias.name)
elif isinstance(node, ast.ImportFrom):
module = node.module or ''
imports.append(module)
except Exception:
pass
return imports
def run_analysis(root_dir: str = "app"):
"""Run full codebase analysis"""
print("=" * 60)
print("🔍 VoiceForge Code Quality Analyzer")
print("=" * 60)
root = Path(root_dir)
if not root.exists():
print(f"❌ Directory not found: {root_dir}")
sys.exit(1)
all_files = list(root.rglob("*.py"))
print(f"\n📁 Analyzing {len(all_files)} Python files...\n")
heavy_files = []
long_functions = []
missing_docstrings = []
total_lines = 0
total_code_lines = 0
total_functions = 0
dependency_counts = defaultdict(int)
for py_file in all_files:
if '__pycache__' in str(py_file):
continue
lines, code = count_lines(py_file)
total_lines += lines
total_code_lines += code
relative_path = py_file.relative_to(root)
# Flag heavy files
if lines > MAX_FILE_LINES:
heavy_files.append((relative_path, lines))
# Analyze functions
functions = analyze_functions(py_file)
total_functions += len(functions)
for func in functions:
if func['lines'] > MAX_FUNCTION_LINES:
long_functions.append((relative_path, func['name'], func['lines']))
if not func['has_docstring'] and not func['name'].startswith('_'):
missing_docstrings.append((relative_path, func['name']))
# Track imports
for imp in analyze_imports(py_file):
dependency_counts[imp.split('.')[0]] += 1
# --- Report ---
print("📊 SUMMARY")
print("-" * 40)
print(f" Total Files: {len(all_files)}")
print(f" Total Lines: {total_lines:,}")
print(f" Code Lines: {total_code_lines:,}")
print(f" Total Functions: {total_functions}")
print("\n⚠️ HEAVY FILES (>{} lines)".format(MAX_FILE_LINES))
print("-" * 40)
if heavy_files:
for path, lines in sorted(heavy_files, key=lambda x: -x[1]):
print(f" ❌ {path}: {lines} lines")
else:
print(" ✅ No heavy files found!")
print("\n⚠️ LONG FUNCTIONS (>{} lines)".format(MAX_FUNCTION_LINES))
print("-" * 40)
if long_functions:
for path, name, lines in sorted(long_functions, key=lambda x: -x[2])[:10]:
print(f" ❌ {path}:{name}() - {lines} lines")
else:
print(" ✅ No excessively long functions!")
print("\n📦 TOP DEPENDENCIES")
print("-" * 40)
for dep, count in sorted(dependency_counts.items(), key=lambda x: -x[1])[:15]:
print(f" {dep}: {count} imports")
print("\n📝 MISSING DOCSTRINGS (top 10)")
print("-" * 40)
for path, name in missing_docstrings[:10]:
print(f" {path}:{name}()")
print("\n" + "=" * 60)
# Return status code
if heavy_files or long_functions:
print("⚠️ Code Quality: NEEDS ATTENTION")
return 1
else:
print("✅ Code Quality: GOOD")
return 0
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Analyze VoiceForge codebase")
parser.add_argument("--path", default="app", help="Root directory to analyze")
args = parser.parse_args()
sys.exit(run_analysis(args.path))
|