Spaces:

lordofgaming
/

voiceforge

Sleeping

voiceforge / backend /tests /quality /analyze_codebase.py

lordofgaming

Initial VoiceForge deployment (clean)

673435a 18 days ago

6.14 kB

	"""
	VoiceForge Code Quality & Complexity Analyzer
	----------------------------------------------
	Analyzes the codebase for:
	- File sizes and line counts (identifies heavy files)
	- Cyclomatic complexity (using radon)
	- Maintainability index
	- Long functions detection
	- Import dependency analysis
	"""

	import os
	import ast
	import sys
	from pathlib import Path
	from collections import defaultdict

	# Thresholds
	MAX_FILE_LINES = 500
	MAX_FUNCTION_LINES = 50
	MAX_COMPLEXITY = 10 # McCabe Cyclomatic Complexity

	def count_lines(file_path: Path) -> tuple[int, int]:
	"""Count total lines and code lines (excluding blanks/comments)"""
	total = 0
	code = 0
	try:
	with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
	for line in f:
	total += 1
	stripped = line.strip()
	if stripped and not stripped.startswith('#'):
	code += 1
	except Exception:
	pass
	return total, code

	def analyze_functions(file_path: Path) -> list[dict]:
	"""Analyze functions in a Python file using AST"""
	functions = []
	try:
	with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
	source = f.read()
	tree = ast.parse(source)

	for node in ast.walk(tree):
	if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
	func_lines = node.end_lineno - node.lineno + 1
	functions.append({
	'name': node.name,
	'line': node.lineno,
	'lines': func_lines,
	'is_async': isinstance(node, ast.AsyncFunctionDef),
	'has_docstring': (
	isinstance(node.body[0], ast.Expr) and
	isinstance(node.body[0].value, ast.Constant) and
	isinstance(node.body[0].value.value, str)
	) if node.body else False
	})
	except SyntaxError as e:
	print(f" ⚠️ Syntax Error in {file_path}: {e}")
	except Exception as e:
	print(f" ⚠️ Error parsing {file_path}: {e}")
	return functions

	def analyze_imports(file_path: Path) -> list[str]:
	"""Extract import statements"""
	imports = []
	try:
	with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
	source = f.read()
	tree = ast.parse(source)

	for node in ast.walk(tree):
	if isinstance(node, ast.Import):
	for alias in node.names:
	imports.append(alias.name)
	elif isinstance(node, ast.ImportFrom):
	module = node.module or ''
	imports.append(module)
	except Exception:
	pass
	return imports

	def run_analysis(root_dir: str = "app"):
	"""Run full codebase analysis"""
	print("=" * 60)
	print("🔍 VoiceForge Code Quality Analyzer")
	print("=" * 60)

	root = Path(root_dir)
	if not root.exists():
	print(f"❌ Directory not found: {root_dir}")
	sys.exit(1)

	all_files = list(root.rglob("*.py"))
	print(f"\n📁 Analyzing {len(all_files)} Python files...\n")

	heavy_files = []
	long_functions = []
	missing_docstrings = []
	total_lines = 0
	total_code_lines = 0
	total_functions = 0
	dependency_counts = defaultdict(int)

	for py_file in all_files:
	if '__pycache__' in str(py_file):
	continue

	lines, code = count_lines(py_file)
	total_lines += lines
	total_code_lines += code

	relative_path = py_file.relative_to(root)

	# Flag heavy files
	if lines > MAX_FILE_LINES:
	heavy_files.append((relative_path, lines))

	# Analyze functions
	functions = analyze_functions(py_file)
	total_functions += len(functions)

	for func in functions:
	if func['lines'] > MAX_FUNCTION_LINES:
	long_functions.append((relative_path, func['name'], func['lines']))
	if not func['has_docstring'] and not func['name'].startswith('_'):
	missing_docstrings.append((relative_path, func['name']))

	# Track imports
	for imp in analyze_imports(py_file):
	dependency_counts[imp.split('.')[0]] += 1

	# --- Report ---
	print("📊 SUMMARY")
	print("-" * 40)
	print(f" Total Files: {len(all_files)}")
	print(f" Total Lines: {total_lines:,}")
	print(f" Code Lines: {total_code_lines:,}")
	print(f" Total Functions: {total_functions}")

	print("\n⚠️ HEAVY FILES (>{} lines)".format(MAX_FILE_LINES))
	print("-" * 40)
	if heavy_files:
	for path, lines in sorted(heavy_files, key=lambda x: -x[1]):
	print(f" ❌ {path}: {lines} lines")
	else:
	print(" ✅ No heavy files found!")

	print("\n⚠️ LONG FUNCTIONS (>{} lines)".format(MAX_FUNCTION_LINES))
	print("-" * 40)
	if long_functions:
	for path, name, lines in sorted(long_functions, key=lambda x: -x[2])[:10]:
	print(f" ❌ {path}:{name}() - {lines} lines")
	else:
	print(" ✅ No excessively long functions!")

	print("\n📦 TOP DEPENDENCIES")
	print("-" * 40)
	for dep, count in sorted(dependency_counts.items(), key=lambda x: -x[1])[:15]:
	print(f" {dep}: {count} imports")

	print("\n📝 MISSING DOCSTRINGS (top 10)")
	print("-" * 40)
	for path, name in missing_docstrings[:10]:
	print(f" {path}:{name}()")

	print("\n" + "=" * 60)

	# Return status code
	if heavy_files or long_functions:
	print("⚠️ Code Quality: NEEDS ATTENTION")
	return 1
	else:
	print("✅ Code Quality: GOOD")
	return 0

	if __name__ == "__main__":
	import argparse
	parser = argparse.ArgumentParser(description="Analyze VoiceForge codebase")
	parser.add_argument("--path", default="app", help="Root directory to analyze")
	args = parser.parse_args()

	sys.exit(run_analysis(args.path))