File size: 8,424 Bytes
2facf1f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 | import ast
from radon.complexity import cc_visit
from radon.metrics import h_visit
from radon.raw import analyze
import math
import re
def max_nesting_depth(code_string):
"""Calculate maximum nesting depth for Python code using AST."""
class NestingVisitor(ast.NodeVisitor):
def __init__(self):
self.current_depth = 0
self.max_depth = 0
def generic_visit(self, node):
if isinstance(
node,
(
ast.If,
ast.For,
ast.While,
ast.With,
ast.Try,
ast.FunctionDef,
ast.AsyncFunctionDef,
),
):
self.current_depth += 1
self.max_depth = max(self.max_depth, self.current_depth)
super().generic_visit(node)
self.current_depth -= 1
else:
super().generic_visit(node)
tree = ast.parse(code_string)
visitor = NestingVisitor()
visitor.visit(tree)
return visitor.max_depth
def analyze_python_complexity(code_string):
"""
Comprehensive complexity analysis for Python code using radon library.
Uses AST parsing and advanced metrics like Halstead complexity.
Args:
code_string: Python source code to analyze
Returns:
Dictionary of complexity metrics
Raises:
SyntaxError: If the code cannot be parsed as valid Python
"""
cc_results = cc_visit(code_string)
total_cc = sum(block.complexity for block in cc_results)
avg_cc = total_cc / len(cc_results) if cc_results else 0
h_metrics = h_visit(code_string)
halstead_total = h_metrics.total if h_metrics.total else None
halstead_volume = halstead_total.volume if halstead_total else 1
halstead_difficulty = halstead_total.difficulty if halstead_total else 0
halstead_effort = halstead_total.effort if halstead_total else 0
raw_metrics = analyze(code_string)
loc = raw_metrics.loc
lloc = raw_metrics.lloc
comments = raw_metrics.comments
mi = (
171
- 5.2 * (math.log2(halstead_volume) if halstead_volume > 0 else 0)
- 0.23 * total_cc
- 16.2 * (math.log2(loc) if loc > 0 else 0)
)
nesting_depth = max_nesting_depth(code_string)
# Normalized scores for aggregation
norm_cc = total_cc / 10 # Assuming 10 is high complexity
norm_halstead = math.log2(halstead_volume + 1) / 10
norm_loc = math.log2(loc + 1) / 10
norm_nesting = nesting_depth / 5 # Assuming depth 5 is quite nested
# Complexity Score (weighted sum)
complexity_score = (
0.4 * norm_cc + 0.4 * norm_halstead + 0.1 * norm_loc + 0.1 * norm_nesting
)
return {
"cyclomatic_complexity": total_cc,
"average_cyclomatic_complexity": avg_cc,
"halstead_volume": halstead_volume,
"halstead_difficulty": halstead_difficulty,
"halstead_effort": halstead_effort,
"lines_of_code": loc,
"logical_lines_of_code": lloc,
"comments": comments,
"maintainability_index": mi,
"max_nesting_depth": nesting_depth,
"complexity_score": round(min(complexity_score, 1.0), 3),
}
def analyze_cpp_complexity(code_string):
"""
Simple complexity analysis for C/C++/CUDA code using regex patterns.
Returns metrics similar to Python analysis but using basic text analysis.
Args:
code_string: C/C++/CUDA source code to analyze
Returns:
Dictionary of complexity metrics
"""
lines = code_string.split("\n")
# Count lines of code (excluding empty lines and comments)
loc = len(lines)
lloc = 0
comments = 0
for line in lines:
stripped = line.strip()
if not stripped:
continue
if (
stripped.startswith("//")
or stripped.startswith("/*")
or stripped.endswith("*/")
):
comments += 1
else:
lloc += 1
# Simple cyclomatic complexity - count decision points
complexity_patterns = [
r"\bif\b",
r"\belse\b",
r"\bwhile\b",
r"\bfor\b",
r"\bswitch\b",
r"\bcase\b",
r"\bcatch\b",
r"\b\?\b",
]
total_cc = 1 # Base complexity
for pattern in complexity_patterns:
total_cc += len(re.findall(pattern, code_string, re.IGNORECASE))
# Estimate nesting depth by counting braces
max_nesting = 0
current_nesting = 0
for char in code_string:
if char == "{":
current_nesting += 1
max_nesting = max(max_nesting, current_nesting)
elif char == "}":
current_nesting = max(0, current_nesting - 1)
# Simple maintainability index approximation
volume = max(1, lloc * math.log2(max(1, total_cc)))
mi = max(
0,
171
- 5.2 * math.log2(max(1, volume))
- 0.23 * total_cc
- 16.2 * math.log2(max(1, loc)),
)
# Normalized scores
norm_cc = min(total_cc / 10, 1.0)
norm_volume = min(math.log2(volume + 1) / 10, 1.0)
norm_loc = min(math.log2(loc + 1) / 10, 1.0)
norm_nesting = min(max_nesting / 5, 1.0)
complexity_score = (
0.4 * norm_cc + 0.4 * norm_volume + 0.1 * norm_loc + 0.1 * norm_nesting
)
return {
"cyclomatic_complexity": total_cc,
"average_cyclomatic_complexity": total_cc, # Same as total for simplicity
"halstead_volume": volume,
"halstead_difficulty": 1.0, # Placeholder
"halstead_effort": volume, # Simplified
"lines_of_code": loc,
"logical_lines_of_code": lloc,
"comments": comments,
"maintainability_index": mi,
"max_nesting_depth": max_nesting,
"complexity_score": round(min(complexity_score, 1.0), 3),
}
def analyze_generic_complexity(code_string):
"""
Simple line-based complexity analysis for unknown languages.
Args:
code_string: Source code in any language
Returns:
Dictionary of basic complexity metrics
"""
lines = code_string.split("\n")
loc = len([line for line in lines if line.strip()])
# Very simple complexity estimate based on code length
complexity_score = min(math.log2(max(1, loc)) / 10, 1.0)
return {
"cyclomatic_complexity": 1,
"average_cyclomatic_complexity": 1,
"halstead_volume": max(1, loc),
"halstead_difficulty": 1.0,
"halstead_effort": max(1, loc),
"lines_of_code": loc,
"logical_lines_of_code": loc,
"comments": 0,
"maintainability_index": 100.0, # Default good score
"max_nesting_depth": 1,
"complexity_score": round(complexity_score, 3),
}
def analyze_code_metrics(code_string, language="python"):
"""
Analyze code complexity metrics for different programming languages.
This function routes to appropriate analysis methods based on the language:
- Python: Full AST-based analysis with Halstead metrics
- C/C++/CUDA: Regex-based pattern matching analysis
- Other languages: Simple line-based complexity estimation
Args:
code_string: The source code to analyze
language: Programming language ("python", "cpp", "c", "cuda", etc.)
Returns:
Dictionary of complexity metrics including:
- cyclomatic_complexity: Code complexity measure
- halstead_volume: Code volume metric
- lines_of_code: Total lines
- maintainability_index: Code maintainability score
- complexity_score: Normalized overall complexity (0-1)
"""
# Normalize language name
language = language.lower()
# For Python, use the full radon-based analysis
if language == "python":
try:
return analyze_python_complexity(code_string)
except SyntaxError:
# If Python parsing fails, fall back to C++ analysis
return analyze_cpp_complexity(code_string)
# For C/C++/CUDA/Rust/Swift/JSON and other languages, use regex-based analysis
elif language in ["cpp", "c", "cuda", "c++", "rust", "swift", "json", "json5"]:
return analyze_cpp_complexity(code_string)
# For unknown languages, use simple line-based complexity
else:
return analyze_generic_complexity(code_string)
|