import ast from radon.complexity import cc_visit from radon.metrics import h_visit from radon.raw import analyze import math import re def max_nesting_depth(code_string): """Calculate maximum nesting depth for Python code using AST.""" class NestingVisitor(ast.NodeVisitor): def __init__(self): self.current_depth = 0 self.max_depth = 0 def generic_visit(self, node): if isinstance( node, ( ast.If, ast.For, ast.While, ast.With, ast.Try, ast.FunctionDef, ast.AsyncFunctionDef, ), ): self.current_depth += 1 self.max_depth = max(self.max_depth, self.current_depth) super().generic_visit(node) self.current_depth -= 1 else: super().generic_visit(node) tree = ast.parse(code_string) visitor = NestingVisitor() visitor.visit(tree) return visitor.max_depth def analyze_python_complexity(code_string): """ Comprehensive complexity analysis for Python code using radon library. Uses AST parsing and advanced metrics like Halstead complexity. Args: code_string: Python source code to analyze Returns: Dictionary of complexity metrics Raises: SyntaxError: If the code cannot be parsed as valid Python """ cc_results = cc_visit(code_string) total_cc = sum(block.complexity for block in cc_results) avg_cc = total_cc / len(cc_results) if cc_results else 0 h_metrics = h_visit(code_string) halstead_total = h_metrics.total if h_metrics.total else None halstead_volume = halstead_total.volume if halstead_total else 1 halstead_difficulty = halstead_total.difficulty if halstead_total else 0 halstead_effort = halstead_total.effort if halstead_total else 0 raw_metrics = analyze(code_string) loc = raw_metrics.loc lloc = raw_metrics.lloc comments = raw_metrics.comments mi = ( 171 - 5.2 * (math.log2(halstead_volume) if halstead_volume > 0 else 0) - 0.23 * total_cc - 16.2 * (math.log2(loc) if loc > 0 else 0) ) nesting_depth = max_nesting_depth(code_string) # Normalized scores for aggregation norm_cc = total_cc / 10 # Assuming 10 is high complexity norm_halstead = math.log2(halstead_volume + 1) / 10 norm_loc = math.log2(loc + 1) / 10 norm_nesting = nesting_depth / 5 # Assuming depth 5 is quite nested # Complexity Score (weighted sum) complexity_score = ( 0.4 * norm_cc + 0.4 * norm_halstead + 0.1 * norm_loc + 0.1 * norm_nesting ) return { "cyclomatic_complexity": total_cc, "average_cyclomatic_complexity": avg_cc, "halstead_volume": halstead_volume, "halstead_difficulty": halstead_difficulty, "halstead_effort": halstead_effort, "lines_of_code": loc, "logical_lines_of_code": lloc, "comments": comments, "maintainability_index": mi, "max_nesting_depth": nesting_depth, "complexity_score": round(min(complexity_score, 1.0), 3), } def analyze_cpp_complexity(code_string): """ Simple complexity analysis for C/C++/CUDA code using regex patterns. Returns metrics similar to Python analysis but using basic text analysis. Args: code_string: C/C++/CUDA source code to analyze Returns: Dictionary of complexity metrics """ lines = code_string.split("\n") # Count lines of code (excluding empty lines and comments) loc = len(lines) lloc = 0 comments = 0 for line in lines: stripped = line.strip() if not stripped: continue if ( stripped.startswith("//") or stripped.startswith("/*") or stripped.endswith("*/") ): comments += 1 else: lloc += 1 # Simple cyclomatic complexity - count decision points complexity_patterns = [ r"\bif\b", r"\belse\b", r"\bwhile\b", r"\bfor\b", r"\bswitch\b", r"\bcase\b", r"\bcatch\b", r"\b\?\b", ] total_cc = 1 # Base complexity for pattern in complexity_patterns: total_cc += len(re.findall(pattern, code_string, re.IGNORECASE)) # Estimate nesting depth by counting braces max_nesting = 0 current_nesting = 0 for char in code_string: if char == "{": current_nesting += 1 max_nesting = max(max_nesting, current_nesting) elif char == "}": current_nesting = max(0, current_nesting - 1) # Simple maintainability index approximation volume = max(1, lloc * math.log2(max(1, total_cc))) mi = max( 0, 171 - 5.2 * math.log2(max(1, volume)) - 0.23 * total_cc - 16.2 * math.log2(max(1, loc)), ) # Normalized scores norm_cc = min(total_cc / 10, 1.0) norm_volume = min(math.log2(volume + 1) / 10, 1.0) norm_loc = min(math.log2(loc + 1) / 10, 1.0) norm_nesting = min(max_nesting / 5, 1.0) complexity_score = ( 0.4 * norm_cc + 0.4 * norm_volume + 0.1 * norm_loc + 0.1 * norm_nesting ) return { "cyclomatic_complexity": total_cc, "average_cyclomatic_complexity": total_cc, # Same as total for simplicity "halstead_volume": volume, "halstead_difficulty": 1.0, # Placeholder "halstead_effort": volume, # Simplified "lines_of_code": loc, "logical_lines_of_code": lloc, "comments": comments, "maintainability_index": mi, "max_nesting_depth": max_nesting, "complexity_score": round(min(complexity_score, 1.0), 3), } def analyze_generic_complexity(code_string): """ Simple line-based complexity analysis for unknown languages. Args: code_string: Source code in any language Returns: Dictionary of basic complexity metrics """ lines = code_string.split("\n") loc = len([line for line in lines if line.strip()]) # Very simple complexity estimate based on code length complexity_score = min(math.log2(max(1, loc)) / 10, 1.0) return { "cyclomatic_complexity": 1, "average_cyclomatic_complexity": 1, "halstead_volume": max(1, loc), "halstead_difficulty": 1.0, "halstead_effort": max(1, loc), "lines_of_code": loc, "logical_lines_of_code": loc, "comments": 0, "maintainability_index": 100.0, # Default good score "max_nesting_depth": 1, "complexity_score": round(complexity_score, 3), } def analyze_code_metrics(code_string, language="python"): """ Analyze code complexity metrics for different programming languages. This function routes to appropriate analysis methods based on the language: - Python: Full AST-based analysis with Halstead metrics - C/C++/CUDA: Regex-based pattern matching analysis - Other languages: Simple line-based complexity estimation Args: code_string: The source code to analyze language: Programming language ("python", "cpp", "c", "cuda", etc.) Returns: Dictionary of complexity metrics including: - cyclomatic_complexity: Code complexity measure - halstead_volume: Code volume metric - lines_of_code: Total lines - maintainability_index: Code maintainability score - complexity_score: Normalized overall complexity (0-1) """ # Normalize language name language = language.lower() # For Python, use the full radon-based analysis if language == "python": try: return analyze_python_complexity(code_string) except SyntaxError: # If Python parsing fails, fall back to C++ analysis return analyze_cpp_complexity(code_string) # For C/C++/CUDA/Rust/Swift/JSON and other languages, use regex-based analysis elif language in ["cpp", "c", "cuda", "c++", "rust", "swift", "json", "json5"]: return analyze_cpp_complexity(code_string) # For unknown languages, use simple line-based complexity else: return analyze_generic_complexity(code_string)