9.11 kB

	# Copyright (c) Microsoft Corporation. All rights reserved.
	# Licensed under the MIT License.

	"""Static code complexity analysis using radon for Python and regex patterns for TypeScript."""

	import pathlib
	import re
	from dataclasses import dataclass
	from typing import Dict, List, Optional

	from .file_discovery import get_tracked_source_files

	# Radon is optional - graceful fallback if not available
	try:
	from radon.complexity import cc_visit
	from radon.metrics import mi_visit

	RADON_AVAILABLE = True
	except ImportError:
	RADON_AVAILABLE = False


	@dataclass
	class FunctionComplexity:
	"""Complexity metrics for a single function/method."""

	name: str
	line: int
	complexity: int
	length: int # lines of code

	def to_dict(self) -> dict:
	return {
	"name": self.name,
	"line": self.line,
	"complexity": self.complexity,
	"length": self.length,
	}


	@dataclass
	class FileComplexity:
	"""Complexity metrics for a file."""

	path: str
	total_lines: int
	code_lines: int
	functions: List[FunctionComplexity]
	max_complexity: int
	avg_complexity: float
	maintainability_index: Optional[float] = None

	def to_dict(self) -> dict:
	return {
	"path": self.path,
	"total_lines": self.total_lines,
	"code_lines": self.code_lines,
	"function_count": len(self.functions),
	"max_complexity": self.max_complexity,
	"avg_complexity": round(self.avg_complexity, 2),
	"maintainability_index": round(self.maintainability_index, 2)
	if self.maintainability_index is not None
	else None,
	"functions": [f.to_dict() for f in self.functions],
	}


	def analyze_python_file(
	filepath: pathlib.Path, repo_root: pathlib.Path
	) -> Optional[FileComplexity]:
	"""Analyze a Python file for complexity metrics."""
	if not RADON_AVAILABLE:
	return None

	try:
	content = filepath.read_text(encoding="utf-8")
	except (UnicodeDecodeError, OSError):
	return None

	lines = content.splitlines()
	total_lines = len(lines)
	code_lines = sum(
	1 for line in lines if line.strip() and not line.strip().startswith("#")
	)

	try:
	cc_results = cc_visit(content)
	mi_score = mi_visit(content, multi=False)
	except SyntaxError:
	return None

	functions = []
	for block in cc_results:
	# radon returns different block types (Function, Class, etc.)
	func = FunctionComplexity(
	name=block.name,
	line=block.lineno,
	complexity=block.complexity,
	length=block.endline - block.lineno + 1 if hasattr(block, "endline") else 0,
	)
	functions.append(func)

	max_cc = max((f.complexity for f in functions), default=0)
	avg_cc = sum(f.complexity for f in functions) / len(functions) if functions else 0

	rel_path = filepath.relative_to(repo_root).as_posix()
	return FileComplexity(
	path=rel_path,
	total_lines=total_lines,
	code_lines=code_lines,
	functions=functions,
	max_complexity=max_cc,
	avg_complexity=avg_cc,
	maintainability_index=mi_score,
	)


	def analyze_typescript_file(
	filepath: pathlib.Path, repo_root: pathlib.Path
	) -> Optional[FileComplexity]:
	"""Analyze a TypeScript file for complexity metrics using regex patterns.

	This is a simplified analysis - for accurate TypeScript complexity,
	consider using ts-morph or typescript compiler API.
	"""
	try:
	content = filepath.read_text(encoding="utf-8")
	except (UnicodeDecodeError, OSError):
	return None

	lines = content.splitlines()
	total_lines = len(lines)
	code_lines = sum(
	1 for line in lines if line.strip() and not line.strip().startswith("//")
	)

	# Find function/method definitions
	# Matches: function name, async function name, methodName(, async methodName(
	function_pattern = re.compile(
	r"^\s(?:export\s+)?(?:async\s+)?(?:function\s+(\w+)\|(\w+)\s(?:<[^>]>)?\s\([^)]\)\s(?::\s[^{]+)?\s\{)",
	re.MULTILINE,
	)

	# Complexity indicators (simplified cyclomatic complexity estimation)
	branch_patterns = [
	r"\bif\s*\(",
	r"\belse\s+if\s*\(",
	r"\belse\s*\{",
	r"\bfor\s*\(",
	r"\bwhile\s*\(",
	r"\bswitch\s*\(",
	r"\bcase\s+",
	r"\bcatch\s*\(",
	r"\b\?\s[^:]+\s:", # ternary
	r"\?\?", # nullish coalescing
	r"\\|\\|", # logical or
	r"&&", # logical and
	]

	functions = []
	func_matches = list(function_pattern.finditer(content))

	for i, match in enumerate(func_matches):
	func_name = match.group(1) or match.group(2) or "anonymous"
	start_line = content[: match.start()].count("\n") + 1

	# Find function end (rough estimate - count braces)
	func_end = len(content)

	if i + 1 < len(func_matches):
	func_end = func_matches[i + 1].start()

	func_content = content[match.start() : func_end]

	# Count complexity
	complexity = 1 # Base complexity
	for pattern in branch_patterns:
	complexity += len(re.findall(pattern, func_content))

	length = func_content.count("\n") + 1

	functions.append(
	FunctionComplexity(
	name=func_name,
	line=start_line,
	complexity=complexity,
	length=length,
	)
	)

	max_cc = max((f.complexity for f in functions), default=0)
	avg_cc = sum(f.complexity for f in functions) / len(functions) if functions else 0

	rel_path = filepath.relative_to(repo_root).as_posix()
	return FileComplexity(
	path=rel_path,
	total_lines=total_lines,
	code_lines=code_lines,
	functions=functions,
	max_complexity=max_cc,
	avg_complexity=avg_cc,
	maintainability_index=None, # Not computed for TypeScript
	)


	def find_source_files(
	repo_root: pathlib.Path, extensions: List[str]
	) -> List[pathlib.Path]:
	"""Find all source files with given extensions using git ls-files."""
	return get_tracked_source_files(repo_root, extensions)


	def analyze_complexity(repo_root: pathlib.Path) -> dict:
	"""Run complexity analysis on the repository.

	Returns:
	Dictionary with complexity metrics for all analyzed files
	"""
	results: Dict[str, List[dict]] = {
	"python": [],
	"typescript": [],
	}

	# Analyze Python files
	python_files = find_source_files(repo_root, [".py"])
	for filepath in python_files:
	file_complexity = analyze_python_file(filepath, repo_root)
	if file_complexity:
	results["python"].append(file_complexity.to_dict())

	# Analyze TypeScript/JavaScript files
	ts_files = find_source_files(repo_root, [".ts", ".tsx", ".js", ".jsx"])
	for filepath in ts_files:
	file_complexity = analyze_typescript_file(filepath, repo_root)
	if file_complexity:
	results["typescript"].append(file_complexity.to_dict())

	# Compute summary statistics
	all_files = results["python"] + results["typescript"]

	summary = {
	"total_files": len(all_files),
	"total_functions": sum(f["function_count"] for f in all_files),
	"total_lines": sum(f["total_lines"] for f in all_files),
	"total_code_lines": sum(f["code_lines"] for f in all_files),
	"files_with_high_complexity": [
	f["path"] for f in all_files if f["max_complexity"] > 10
	],
	"avg_file_complexity": round(
	sum(f["avg_complexity"] for f in all_files) / len(all_files), 2
	)
	if all_files
	else 0,
	}

	# Sort files by max complexity (most complex first)
	results["python"].sort(key=lambda f: f["max_complexity"], reverse=True)
	results["typescript"].sort(key=lambda f: f["max_complexity"], reverse=True)

	return {
	"by_language": results,
	"summary": summary,
	"high_complexity_functions": _get_high_complexity_functions(
	all_files, threshold=10
	),
	}


	def _get_high_complexity_functions(
	files: List[dict], threshold: int = 10
	) -> List[dict]:
	"""Extract functions with complexity above threshold."""
	high_cc = []
	for file_data in files:
	for func in file_data.get("functions", []):
	if func["complexity"] > threshold:
	high_cc.append(
	{
	"file": file_data["path"],
	"function": func["name"],
	"line": func["line"],
	"complexity": func["complexity"],
	}
	)

	high_cc.sort(key=lambda f: f["complexity"], reverse=True)
	return high_cc[:30] # Top 30


	if __name__ == "__main__":
	import json

	repo = pathlib.Path(__file__).parent.parent
	result = analyze_complexity(repo)
	print(json.dumps(result, indent=2))

Xet Storage Details

Size:: 9.11 kB
Xet hash:: 690c2ceefb073227dc10f22ebda0fa63fb67669561ff2a788f139d17330fdeb7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.