Spaces:

lordofgaming
/

voiceforge

Sleeping

File size: 6,138 Bytes

673435a

"""
VoiceForge Code Quality & Complexity Analyzer
----------------------------------------------
Analyzes the codebase for:
- File sizes and line counts (identifies heavy files)
- Cyclomatic complexity (using radon)
- Maintainability index
- Long functions detection
- Import dependency analysis
"""

import os
import ast
import sys
from pathlib import Path
from collections import defaultdict

# Thresholds
MAX_FILE_LINES = 500
MAX_FUNCTION_LINES = 50
MAX_COMPLEXITY = 10  # McCabe Cyclomatic Complexity

def count_lines(file_path: Path) -> tuple[int, int]:
    """Count total lines and code lines (excluding blanks/comments)"""
    total = 0
    code = 0
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            for line in f:
                total += 1
                stripped = line.strip()
                if stripped and not stripped.startswith('#'):
                    code += 1
    except Exception:
        pass
    return total, code

def analyze_functions(file_path: Path) -> list[dict]:
    """Analyze functions in a Python file using AST"""
    functions = []
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            source = f.read()
        tree = ast.parse(source)
        
        for node in ast.walk(tree):
            if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
                func_lines = node.end_lineno - node.lineno + 1
                functions.append({
                    'name': node.name,
                    'line': node.lineno,
                    'lines': func_lines,
                    'is_async': isinstance(node, ast.AsyncFunctionDef),
                    'has_docstring': (
                        isinstance(node.body[0], ast.Expr) and
                        isinstance(node.body[0].value, ast.Constant) and
                        isinstance(node.body[0].value.value, str)
                    ) if node.body else False
                })
    except SyntaxError as e:
        print(f"  ⚠️ Syntax Error in {file_path}: {e}")
    except Exception as e:
        print(f"  ⚠️ Error parsing {file_path}: {e}")
    return functions

def analyze_imports(file_path: Path) -> list[str]:
    """Extract import statements"""
    imports = []
    try:
        with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
            source = f.read()
        tree = ast.parse(source)
        
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    imports.append(alias.name)
            elif isinstance(node, ast.ImportFrom):
                module = node.module or ''
                imports.append(module)
    except Exception:
        pass
    return imports

def run_analysis(root_dir: str = "app"):
    """Run full codebase analysis"""
    print("=" * 60)
    print("🔍 VoiceForge Code Quality Analyzer")
    print("=" * 60)
    
    root = Path(root_dir)
    if not root.exists():
        print(f"❌ Directory not found: {root_dir}")
        sys.exit(1)
    
    all_files = list(root.rglob("*.py"))
    print(f"\n📁 Analyzing {len(all_files)} Python files...\n")
    
    heavy_files = []
    long_functions = []
    missing_docstrings = []
    total_lines = 0
    total_code_lines = 0
    total_functions = 0
    dependency_counts = defaultdict(int)
    
    for py_file in all_files:
        if '__pycache__' in str(py_file):
            continue
            
        lines, code = count_lines(py_file)
        total_lines += lines
        total_code_lines += code
        
        relative_path = py_file.relative_to(root)
        
        # Flag heavy files
        if lines > MAX_FILE_LINES:
            heavy_files.append((relative_path, lines))
        
        # Analyze functions
        functions = analyze_functions(py_file)
        total_functions += len(functions)
        
        for func in functions:
            if func['lines'] > MAX_FUNCTION_LINES:
                long_functions.append((relative_path, func['name'], func['lines']))
            if not func['has_docstring'] and not func['name'].startswith('_'):
                missing_docstrings.append((relative_path, func['name']))
        
        # Track imports
        for imp in analyze_imports(py_file):
            dependency_counts[imp.split('.')[0]] += 1
    
    # --- Report ---
    print("📊 SUMMARY")
    print("-" * 40)
    print(f"  Total Files:     {len(all_files)}")
    print(f"  Total Lines:     {total_lines:,}")
    print(f"  Code Lines:      {total_code_lines:,}")
    print(f"  Total Functions: {total_functions}")
    
    print("\n⚠️ HEAVY FILES (>{} lines)".format(MAX_FILE_LINES))
    print("-" * 40)
    if heavy_files:
        for path, lines in sorted(heavy_files, key=lambda x: -x[1]):
            print(f"  ❌ {path}: {lines} lines")
    else:
        print("  ✅ No heavy files found!")
    
    print("\n⚠️ LONG FUNCTIONS (>{} lines)".format(MAX_FUNCTION_LINES))
    print("-" * 40)
    if long_functions:
        for path, name, lines in sorted(long_functions, key=lambda x: -x[2])[:10]:
            print(f"  ❌ {path}:{name}() - {lines} lines")
    else:
        print("  ✅ No excessively long functions!")
    
    print("\n📦 TOP DEPENDENCIES")
    print("-" * 40)
    for dep, count in sorted(dependency_counts.items(), key=lambda x: -x[1])[:15]:
        print(f"  {dep}: {count} imports")
    
    print("\n📝 MISSING DOCSTRINGS (top 10)")
    print("-" * 40)
    for path, name in missing_docstrings[:10]:
        print(f"  {path}:{name}()")
    
    print("\n" + "=" * 60)
    
    # Return status code
    if heavy_files or long_functions:
        print("⚠️ Code Quality: NEEDS ATTENTION")
        return 1
    else:
        print("✅ Code Quality: GOOD")
        return 0

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Analyze VoiceForge codebase")
    parser.add_argument("--path", default="app", help="Root directory to analyze")
    args = parser.parse_args()
    
    sys.exit(run_analysis(args.path))