| import ast |
| from radon.complexity import cc_visit |
| from radon.metrics import h_visit |
| from radon.raw import analyze |
| import math |
| import re |
|
|
|
|
| def max_nesting_depth(code_string): |
| """Calculate maximum nesting depth for Python code using AST.""" |
|
|
| class NestingVisitor(ast.NodeVisitor): |
| def __init__(self): |
| self.current_depth = 0 |
| self.max_depth = 0 |
|
|
| def generic_visit(self, node): |
| if isinstance( |
| node, |
| ( |
| ast.If, |
| ast.For, |
| ast.While, |
| ast.With, |
| ast.Try, |
| ast.FunctionDef, |
| ast.AsyncFunctionDef, |
| ), |
| ): |
| self.current_depth += 1 |
| self.max_depth = max(self.max_depth, self.current_depth) |
| super().generic_visit(node) |
| self.current_depth -= 1 |
| else: |
| super().generic_visit(node) |
|
|
| tree = ast.parse(code_string) |
| visitor = NestingVisitor() |
| visitor.visit(tree) |
| return visitor.max_depth |
|
|
|
|
| def analyze_python_complexity(code_string): |
| """ |
| Comprehensive complexity analysis for Python code using radon library. |
| Uses AST parsing and advanced metrics like Halstead complexity. |
| |
| Args: |
| code_string: Python source code to analyze |
| |
| Returns: |
| Dictionary of complexity metrics |
| |
| Raises: |
| SyntaxError: If the code cannot be parsed as valid Python |
| """ |
| cc_results = cc_visit(code_string) |
| total_cc = sum(block.complexity for block in cc_results) |
| avg_cc = total_cc / len(cc_results) if cc_results else 0 |
|
|
| h_metrics = h_visit(code_string) |
| halstead_total = h_metrics.total if h_metrics.total else None |
| halstead_volume = halstead_total.volume if halstead_total else 1 |
| halstead_difficulty = halstead_total.difficulty if halstead_total else 0 |
| halstead_effort = halstead_total.effort if halstead_total else 0 |
|
|
| raw_metrics = analyze(code_string) |
| loc = raw_metrics.loc |
| lloc = raw_metrics.lloc |
| comments = raw_metrics.comments |
|
|
| mi = ( |
| 171 |
| - 5.2 * (math.log2(halstead_volume) if halstead_volume > 0 else 0) |
| - 0.23 * total_cc |
| - 16.2 * (math.log2(loc) if loc > 0 else 0) |
| ) |
|
|
| nesting_depth = max_nesting_depth(code_string) |
|
|
| |
| norm_cc = total_cc / 10 |
| norm_halstead = math.log2(halstead_volume + 1) / 10 |
| norm_loc = math.log2(loc + 1) / 10 |
| norm_nesting = nesting_depth / 5 |
|
|
| |
| complexity_score = ( |
| 0.4 * norm_cc + 0.4 * norm_halstead + 0.1 * norm_loc + 0.1 * norm_nesting |
| ) |
|
|
| return { |
| "cyclomatic_complexity": total_cc, |
| "average_cyclomatic_complexity": avg_cc, |
| "halstead_volume": halstead_volume, |
| "halstead_difficulty": halstead_difficulty, |
| "halstead_effort": halstead_effort, |
| "lines_of_code": loc, |
| "logical_lines_of_code": lloc, |
| "comments": comments, |
| "maintainability_index": mi, |
| "max_nesting_depth": nesting_depth, |
| "complexity_score": round(min(complexity_score, 1.0), 3), |
| } |
|
|
|
|
| def analyze_cpp_complexity(code_string): |
| """ |
| Simple complexity analysis for C/C++/CUDA code using regex patterns. |
| Returns metrics similar to Python analysis but using basic text analysis. |
| |
| Args: |
| code_string: C/C++/CUDA source code to analyze |
| |
| Returns: |
| Dictionary of complexity metrics |
| """ |
| lines = code_string.split("\n") |
|
|
| |
| loc = len(lines) |
| lloc = 0 |
| comments = 0 |
|
|
| for line in lines: |
| stripped = line.strip() |
| if not stripped: |
| continue |
| if ( |
| stripped.startswith("//") |
| or stripped.startswith("/*") |
| or stripped.endswith("*/") |
| ): |
| comments += 1 |
| else: |
| lloc += 1 |
|
|
| |
| complexity_patterns = [ |
| r"\bif\b", |
| r"\belse\b", |
| r"\bwhile\b", |
| r"\bfor\b", |
| r"\bswitch\b", |
| r"\bcase\b", |
| r"\bcatch\b", |
| r"\b\?\b", |
| ] |
|
|
| total_cc = 1 |
| for pattern in complexity_patterns: |
| total_cc += len(re.findall(pattern, code_string, re.IGNORECASE)) |
|
|
| |
| max_nesting = 0 |
| current_nesting = 0 |
| for char in code_string: |
| if char == "{": |
| current_nesting += 1 |
| max_nesting = max(max_nesting, current_nesting) |
| elif char == "}": |
| current_nesting = max(0, current_nesting - 1) |
|
|
| |
| volume = max(1, lloc * math.log2(max(1, total_cc))) |
| mi = max( |
| 0, |
| 171 |
| - 5.2 * math.log2(max(1, volume)) |
| - 0.23 * total_cc |
| - 16.2 * math.log2(max(1, loc)), |
| ) |
|
|
| |
| norm_cc = min(total_cc / 10, 1.0) |
| norm_volume = min(math.log2(volume + 1) / 10, 1.0) |
| norm_loc = min(math.log2(loc + 1) / 10, 1.0) |
| norm_nesting = min(max_nesting / 5, 1.0) |
|
|
| complexity_score = ( |
| 0.4 * norm_cc + 0.4 * norm_volume + 0.1 * norm_loc + 0.1 * norm_nesting |
| ) |
|
|
| return { |
| "cyclomatic_complexity": total_cc, |
| "average_cyclomatic_complexity": total_cc, |
| "halstead_volume": volume, |
| "halstead_difficulty": 1.0, |
| "halstead_effort": volume, |
| "lines_of_code": loc, |
| "logical_lines_of_code": lloc, |
| "comments": comments, |
| "maintainability_index": mi, |
| "max_nesting_depth": max_nesting, |
| "complexity_score": round(min(complexity_score, 1.0), 3), |
| } |
|
|
|
|
| def analyze_generic_complexity(code_string): |
| """ |
| Simple line-based complexity analysis for unknown languages. |
| |
| Args: |
| code_string: Source code in any language |
| |
| Returns: |
| Dictionary of basic complexity metrics |
| """ |
| lines = code_string.split("\n") |
| loc = len([line for line in lines if line.strip()]) |
|
|
| |
| complexity_score = min(math.log2(max(1, loc)) / 10, 1.0) |
|
|
| return { |
| "cyclomatic_complexity": 1, |
| "average_cyclomatic_complexity": 1, |
| "halstead_volume": max(1, loc), |
| "halstead_difficulty": 1.0, |
| "halstead_effort": max(1, loc), |
| "lines_of_code": loc, |
| "logical_lines_of_code": loc, |
| "comments": 0, |
| "maintainability_index": 100.0, |
| "max_nesting_depth": 1, |
| "complexity_score": round(complexity_score, 3), |
| } |
|
|
|
|
| def analyze_code_metrics(code_string, language="python"): |
| """ |
| Analyze code complexity metrics for different programming languages. |
| |
| This function routes to appropriate analysis methods based on the language: |
| - Python: Full AST-based analysis with Halstead metrics |
| - C/C++/CUDA: Regex-based pattern matching analysis |
| - Other languages: Simple line-based complexity estimation |
| |
| Args: |
| code_string: The source code to analyze |
| language: Programming language ("python", "cpp", "c", "cuda", etc.) |
| |
| Returns: |
| Dictionary of complexity metrics including: |
| - cyclomatic_complexity: Code complexity measure |
| - halstead_volume: Code volume metric |
| - lines_of_code: Total lines |
| - maintainability_index: Code maintainability score |
| - complexity_score: Normalized overall complexity (0-1) |
| """ |
| |
| language = language.lower() |
|
|
| |
| if language == "python": |
| try: |
| return analyze_python_complexity(code_string) |
| except SyntaxError: |
| |
| return analyze_cpp_complexity(code_string) |
|
|
| |
| elif language in ["cpp", "c", "cuda", "c++", "rust", "swift", "json", "json5"]: |
| return analyze_cpp_complexity(code_string) |
|
|
| |
| else: |
| return analyze_generic_complexity(code_string) |
|
|