""" VoiceForge Code Quality & Complexity Analyzer ---------------------------------------------- Analyzes the codebase for: - File sizes and line counts (identifies heavy files) - Cyclomatic complexity (using radon) - Maintainability index - Long functions detection - Import dependency analysis """ import os import ast import sys from pathlib import Path from collections import defaultdict # Thresholds MAX_FILE_LINES = 500 MAX_FUNCTION_LINES = 50 MAX_COMPLEXITY = 10 # McCabe Cyclomatic Complexity def count_lines(file_path: Path) -> tuple[int, int]: """Count total lines and code lines (excluding blanks/comments)""" total = 0 code = 0 try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: for line in f: total += 1 stripped = line.strip() if stripped and not stripped.startswith('#'): code += 1 except Exception: pass return total, code def analyze_functions(file_path: Path) -> list[dict]: """Analyze functions in a Python file using AST""" functions = [] try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: source = f.read() tree = ast.parse(source) for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): func_lines = node.end_lineno - node.lineno + 1 functions.append({ 'name': node.name, 'line': node.lineno, 'lines': func_lines, 'is_async': isinstance(node, ast.AsyncFunctionDef), 'has_docstring': ( isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant) and isinstance(node.body[0].value.value, str) ) if node.body else False }) except SyntaxError as e: print(f" āš ļø Syntax Error in {file_path}: {e}") except Exception as e: print(f" āš ļø Error parsing {file_path}: {e}") return functions def analyze_imports(file_path: Path) -> list[str]: """Extract import statements""" imports = [] try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: source = f.read() tree = ast.parse(source) for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: imports.append(alias.name) elif isinstance(node, ast.ImportFrom): module = node.module or '' imports.append(module) except Exception: pass return imports def run_analysis(root_dir: str = "app"): """Run full codebase analysis""" print("=" * 60) print("šŸ” VoiceForge Code Quality Analyzer") print("=" * 60) root = Path(root_dir) if not root.exists(): print(f"āŒ Directory not found: {root_dir}") sys.exit(1) all_files = list(root.rglob("*.py")) print(f"\nšŸ“ Analyzing {len(all_files)} Python files...\n") heavy_files = [] long_functions = [] missing_docstrings = [] total_lines = 0 total_code_lines = 0 total_functions = 0 dependency_counts = defaultdict(int) for py_file in all_files: if '__pycache__' in str(py_file): continue lines, code = count_lines(py_file) total_lines += lines total_code_lines += code relative_path = py_file.relative_to(root) # Flag heavy files if lines > MAX_FILE_LINES: heavy_files.append((relative_path, lines)) # Analyze functions functions = analyze_functions(py_file) total_functions += len(functions) for func in functions: if func['lines'] > MAX_FUNCTION_LINES: long_functions.append((relative_path, func['name'], func['lines'])) if not func['has_docstring'] and not func['name'].startswith('_'): missing_docstrings.append((relative_path, func['name'])) # Track imports for imp in analyze_imports(py_file): dependency_counts[imp.split('.')[0]] += 1 # --- Report --- print("šŸ“Š SUMMARY") print("-" * 40) print(f" Total Files: {len(all_files)}") print(f" Total Lines: {total_lines:,}") print(f" Code Lines: {total_code_lines:,}") print(f" Total Functions: {total_functions}") print("\nāš ļø HEAVY FILES (>{} lines)".format(MAX_FILE_LINES)) print("-" * 40) if heavy_files: for path, lines in sorted(heavy_files, key=lambda x: -x[1]): print(f" āŒ {path}: {lines} lines") else: print(" āœ… No heavy files found!") print("\nāš ļø LONG FUNCTIONS (>{} lines)".format(MAX_FUNCTION_LINES)) print("-" * 40) if long_functions: for path, name, lines in sorted(long_functions, key=lambda x: -x[2])[:10]: print(f" āŒ {path}:{name}() - {lines} lines") else: print(" āœ… No excessively long functions!") print("\nšŸ“¦ TOP DEPENDENCIES") print("-" * 40) for dep, count in sorted(dependency_counts.items(), key=lambda x: -x[1])[:15]: print(f" {dep}: {count} imports") print("\nšŸ“ MISSING DOCSTRINGS (top 10)") print("-" * 40) for path, name in missing_docstrings[:10]: print(f" {path}:{name}()") print("\n" + "=" * 60) # Return status code if heavy_files or long_functions: print("āš ļø Code Quality: NEEDS ATTENTION") return 1 else: print("āœ… Code Quality: GOOD") return 0 if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Analyze VoiceForge codebase") parser.add_argument("--path", default="app", help="Root directory to analyze") args = parser.parse_args() sys.exit(run_analysis(args.path))