shinka-backup / shinka /database /complexity.py
JustinTX's picture
Add files using upload-large-folder tool
2facf1f verified
import ast
from radon.complexity import cc_visit
from radon.metrics import h_visit
from radon.raw import analyze
import math
import re
def max_nesting_depth(code_string):
"""Calculate maximum nesting depth for Python code using AST."""
class NestingVisitor(ast.NodeVisitor):
def __init__(self):
self.current_depth = 0
self.max_depth = 0
def generic_visit(self, node):
if isinstance(
node,
(
ast.If,
ast.For,
ast.While,
ast.With,
ast.Try,
ast.FunctionDef,
ast.AsyncFunctionDef,
),
):
self.current_depth += 1
self.max_depth = max(self.max_depth, self.current_depth)
super().generic_visit(node)
self.current_depth -= 1
else:
super().generic_visit(node)
tree = ast.parse(code_string)
visitor = NestingVisitor()
visitor.visit(tree)
return visitor.max_depth
def analyze_python_complexity(code_string):
"""
Comprehensive complexity analysis for Python code using radon library.
Uses AST parsing and advanced metrics like Halstead complexity.
Args:
code_string: Python source code to analyze
Returns:
Dictionary of complexity metrics
Raises:
SyntaxError: If the code cannot be parsed as valid Python
"""
cc_results = cc_visit(code_string)
total_cc = sum(block.complexity for block in cc_results)
avg_cc = total_cc / len(cc_results) if cc_results else 0
h_metrics = h_visit(code_string)
halstead_total = h_metrics.total if h_metrics.total else None
halstead_volume = halstead_total.volume if halstead_total else 1
halstead_difficulty = halstead_total.difficulty if halstead_total else 0
halstead_effort = halstead_total.effort if halstead_total else 0
raw_metrics = analyze(code_string)
loc = raw_metrics.loc
lloc = raw_metrics.lloc
comments = raw_metrics.comments
mi = (
171
- 5.2 * (math.log2(halstead_volume) if halstead_volume > 0 else 0)
- 0.23 * total_cc
- 16.2 * (math.log2(loc) if loc > 0 else 0)
)
nesting_depth = max_nesting_depth(code_string)
# Normalized scores for aggregation
norm_cc = total_cc / 10 # Assuming 10 is high complexity
norm_halstead = math.log2(halstead_volume + 1) / 10
norm_loc = math.log2(loc + 1) / 10
norm_nesting = nesting_depth / 5 # Assuming depth 5 is quite nested
# Complexity Score (weighted sum)
complexity_score = (
0.4 * norm_cc + 0.4 * norm_halstead + 0.1 * norm_loc + 0.1 * norm_nesting
)
return {
"cyclomatic_complexity": total_cc,
"average_cyclomatic_complexity": avg_cc,
"halstead_volume": halstead_volume,
"halstead_difficulty": halstead_difficulty,
"halstead_effort": halstead_effort,
"lines_of_code": loc,
"logical_lines_of_code": lloc,
"comments": comments,
"maintainability_index": mi,
"max_nesting_depth": nesting_depth,
"complexity_score": round(min(complexity_score, 1.0), 3),
}
def analyze_cpp_complexity(code_string):
"""
Simple complexity analysis for C/C++/CUDA code using regex patterns.
Returns metrics similar to Python analysis but using basic text analysis.
Args:
code_string: C/C++/CUDA source code to analyze
Returns:
Dictionary of complexity metrics
"""
lines = code_string.split("\n")
# Count lines of code (excluding empty lines and comments)
loc = len(lines)
lloc = 0
comments = 0
for line in lines:
stripped = line.strip()
if not stripped:
continue
if (
stripped.startswith("//")
or stripped.startswith("/*")
or stripped.endswith("*/")
):
comments += 1
else:
lloc += 1
# Simple cyclomatic complexity - count decision points
complexity_patterns = [
r"\bif\b",
r"\belse\b",
r"\bwhile\b",
r"\bfor\b",
r"\bswitch\b",
r"\bcase\b",
r"\bcatch\b",
r"\b\?\b",
]
total_cc = 1 # Base complexity
for pattern in complexity_patterns:
total_cc += len(re.findall(pattern, code_string, re.IGNORECASE))
# Estimate nesting depth by counting braces
max_nesting = 0
current_nesting = 0
for char in code_string:
if char == "{":
current_nesting += 1
max_nesting = max(max_nesting, current_nesting)
elif char == "}":
current_nesting = max(0, current_nesting - 1)
# Simple maintainability index approximation
volume = max(1, lloc * math.log2(max(1, total_cc)))
mi = max(
0,
171
- 5.2 * math.log2(max(1, volume))
- 0.23 * total_cc
- 16.2 * math.log2(max(1, loc)),
)
# Normalized scores
norm_cc = min(total_cc / 10, 1.0)
norm_volume = min(math.log2(volume + 1) / 10, 1.0)
norm_loc = min(math.log2(loc + 1) / 10, 1.0)
norm_nesting = min(max_nesting / 5, 1.0)
complexity_score = (
0.4 * norm_cc + 0.4 * norm_volume + 0.1 * norm_loc + 0.1 * norm_nesting
)
return {
"cyclomatic_complexity": total_cc,
"average_cyclomatic_complexity": total_cc, # Same as total for simplicity
"halstead_volume": volume,
"halstead_difficulty": 1.0, # Placeholder
"halstead_effort": volume, # Simplified
"lines_of_code": loc,
"logical_lines_of_code": lloc,
"comments": comments,
"maintainability_index": mi,
"max_nesting_depth": max_nesting,
"complexity_score": round(min(complexity_score, 1.0), 3),
}
def analyze_generic_complexity(code_string):
"""
Simple line-based complexity analysis for unknown languages.
Args:
code_string: Source code in any language
Returns:
Dictionary of basic complexity metrics
"""
lines = code_string.split("\n")
loc = len([line for line in lines if line.strip()])
# Very simple complexity estimate based on code length
complexity_score = min(math.log2(max(1, loc)) / 10, 1.0)
return {
"cyclomatic_complexity": 1,
"average_cyclomatic_complexity": 1,
"halstead_volume": max(1, loc),
"halstead_difficulty": 1.0,
"halstead_effort": max(1, loc),
"lines_of_code": loc,
"logical_lines_of_code": loc,
"comments": 0,
"maintainability_index": 100.0, # Default good score
"max_nesting_depth": 1,
"complexity_score": round(complexity_score, 3),
}
def analyze_code_metrics(code_string, language="python"):
"""
Analyze code complexity metrics for different programming languages.
This function routes to appropriate analysis methods based on the language:
- Python: Full AST-based analysis with Halstead metrics
- C/C++/CUDA: Regex-based pattern matching analysis
- Other languages: Simple line-based complexity estimation
Args:
code_string: The source code to analyze
language: Programming language ("python", "cpp", "c", "cuda", etc.)
Returns:
Dictionary of complexity metrics including:
- cyclomatic_complexity: Code complexity measure
- halstead_volume: Code volume metric
- lines_of_code: Total lines
- maintainability_index: Code maintainability score
- complexity_score: Normalized overall complexity (0-1)
"""
# Normalize language name
language = language.lower()
# For Python, use the full radon-based analysis
if language == "python":
try:
return analyze_python_complexity(code_string)
except SyntaxError:
# If Python parsing fails, fall back to C++ analysis
return analyze_cpp_complexity(code_string)
# For C/C++/CUDA/Rust/Swift/JSON and other languages, use regex-based analysis
elif language in ["cpp", "c", "cuda", "c++", "rust", "swift", "json", "json5"]:
return analyze_cpp_complexity(code_string)
# For unknown languages, use simple line-based complexity
else:
return analyze_generic_complexity(code_string)