Buckets:
ktongue/docker_container / .vscode-server /extensions /ms-python.vscode-python-envs-1.22.0 /analysis /complexity_analysis.py
| # Copyright (c) Microsoft Corporation. All rights reserved. | |
| # Licensed under the MIT License. | |
| """Static code complexity analysis using radon for Python and regex patterns for TypeScript.""" | |
| import pathlib | |
| import re | |
| from dataclasses import dataclass | |
| from typing import Dict, List, Optional | |
| from .file_discovery import get_tracked_source_files | |
| # Radon is optional - graceful fallback if not available | |
| try: | |
| from radon.complexity import cc_visit | |
| from radon.metrics import mi_visit | |
| RADON_AVAILABLE = True | |
| except ImportError: | |
| RADON_AVAILABLE = False | |
| class FunctionComplexity: | |
| """Complexity metrics for a single function/method.""" | |
| name: str | |
| line: int | |
| complexity: int | |
| length: int # lines of code | |
| def to_dict(self) -> dict: | |
| return { | |
| "name": self.name, | |
| "line": self.line, | |
| "complexity": self.complexity, | |
| "length": self.length, | |
| } | |
| class FileComplexity: | |
| """Complexity metrics for a file.""" | |
| path: str | |
| total_lines: int | |
| code_lines: int | |
| functions: List[FunctionComplexity] | |
| max_complexity: int | |
| avg_complexity: float | |
| maintainability_index: Optional[float] = None | |
| def to_dict(self) -> dict: | |
| return { | |
| "path": self.path, | |
| "total_lines": self.total_lines, | |
| "code_lines": self.code_lines, | |
| "function_count": len(self.functions), | |
| "max_complexity": self.max_complexity, | |
| "avg_complexity": round(self.avg_complexity, 2), | |
| "maintainability_index": round(self.maintainability_index, 2) | |
| if self.maintainability_index is not None | |
| else None, | |
| "functions": [f.to_dict() for f in self.functions], | |
| } | |
| def analyze_python_file( | |
| filepath: pathlib.Path, repo_root: pathlib.Path | |
| ) -> Optional[FileComplexity]: | |
| """Analyze a Python file for complexity metrics.""" | |
| if not RADON_AVAILABLE: | |
| return None | |
| try: | |
| content = filepath.read_text(encoding="utf-8") | |
| except (UnicodeDecodeError, OSError): | |
| return None | |
| lines = content.splitlines() | |
| total_lines = len(lines) | |
| code_lines = sum( | |
| 1 for line in lines if line.strip() and not line.strip().startswith("#") | |
| ) | |
| try: | |
| cc_results = cc_visit(content) | |
| mi_score = mi_visit(content, multi=False) | |
| except SyntaxError: | |
| return None | |
| functions = [] | |
| for block in cc_results: | |
| # radon returns different block types (Function, Class, etc.) | |
| func = FunctionComplexity( | |
| name=block.name, | |
| line=block.lineno, | |
| complexity=block.complexity, | |
| length=block.endline - block.lineno + 1 if hasattr(block, "endline") else 0, | |
| ) | |
| functions.append(func) | |
| max_cc = max((f.complexity for f in functions), default=0) | |
| avg_cc = sum(f.complexity for f in functions) / len(functions) if functions else 0 | |
| rel_path = filepath.relative_to(repo_root).as_posix() | |
| return FileComplexity( | |
| path=rel_path, | |
| total_lines=total_lines, | |
| code_lines=code_lines, | |
| functions=functions, | |
| max_complexity=max_cc, | |
| avg_complexity=avg_cc, | |
| maintainability_index=mi_score, | |
| ) | |
| def analyze_typescript_file( | |
| filepath: pathlib.Path, repo_root: pathlib.Path | |
| ) -> Optional[FileComplexity]: | |
| """Analyze a TypeScript file for complexity metrics using regex patterns. | |
| This is a simplified analysis - for accurate TypeScript complexity, | |
| consider using ts-morph or typescript compiler API. | |
| """ | |
| try: | |
| content = filepath.read_text(encoding="utf-8") | |
| except (UnicodeDecodeError, OSError): | |
| return None | |
| lines = content.splitlines() | |
| total_lines = len(lines) | |
| code_lines = sum( | |
| 1 for line in lines if line.strip() and not line.strip().startswith("//") | |
| ) | |
| # Find function/method definitions | |
| # Matches: function name, async function name, methodName(, async methodName( | |
| function_pattern = re.compile( | |
| r"^\s*(?:export\s+)?(?:async\s+)?(?:function\s+(\w+)|(\w+)\s*(?:<[^>]*>)?\s*\([^)]*\)\s*(?::\s*[^{]+)?\s*\{)", | |
| re.MULTILINE, | |
| ) | |
| # Complexity indicators (simplified cyclomatic complexity estimation) | |
| branch_patterns = [ | |
| r"\bif\s*\(", | |
| r"\belse\s+if\s*\(", | |
| r"\belse\s*\{", | |
| r"\bfor\s*\(", | |
| r"\bwhile\s*\(", | |
| r"\bswitch\s*\(", | |
| r"\bcase\s+", | |
| r"\bcatch\s*\(", | |
| r"\b\?\s*[^:]+\s*:", # ternary | |
| r"\?\?", # nullish coalescing | |
| r"\|\|", # logical or | |
| r"&&", # logical and | |
| ] | |
| functions = [] | |
| func_matches = list(function_pattern.finditer(content)) | |
| for i, match in enumerate(func_matches): | |
| func_name = match.group(1) or match.group(2) or "anonymous" | |
| start_line = content[: match.start()].count("\n") + 1 | |
| # Find function end (rough estimate - count braces) | |
| func_end = len(content) | |
| if i + 1 < len(func_matches): | |
| func_end = func_matches[i + 1].start() | |
| func_content = content[match.start() : func_end] | |
| # Count complexity | |
| complexity = 1 # Base complexity | |
| for pattern in branch_patterns: | |
| complexity += len(re.findall(pattern, func_content)) | |
| length = func_content.count("\n") + 1 | |
| functions.append( | |
| FunctionComplexity( | |
| name=func_name, | |
| line=start_line, | |
| complexity=complexity, | |
| length=length, | |
| ) | |
| ) | |
| max_cc = max((f.complexity for f in functions), default=0) | |
| avg_cc = sum(f.complexity for f in functions) / len(functions) if functions else 0 | |
| rel_path = filepath.relative_to(repo_root).as_posix() | |
| return FileComplexity( | |
| path=rel_path, | |
| total_lines=total_lines, | |
| code_lines=code_lines, | |
| functions=functions, | |
| max_complexity=max_cc, | |
| avg_complexity=avg_cc, | |
| maintainability_index=None, # Not computed for TypeScript | |
| ) | |
| def find_source_files( | |
| repo_root: pathlib.Path, extensions: List[str] | |
| ) -> List[pathlib.Path]: | |
| """Find all source files with given extensions using git ls-files.""" | |
| return get_tracked_source_files(repo_root, extensions) | |
| def analyze_complexity(repo_root: pathlib.Path) -> dict: | |
| """Run complexity analysis on the repository. | |
| Returns: | |
| Dictionary with complexity metrics for all analyzed files | |
| """ | |
| results: Dict[str, List[dict]] = { | |
| "python": [], | |
| "typescript": [], | |
| } | |
| # Analyze Python files | |
| python_files = find_source_files(repo_root, [".py"]) | |
| for filepath in python_files: | |
| file_complexity = analyze_python_file(filepath, repo_root) | |
| if file_complexity: | |
| results["python"].append(file_complexity.to_dict()) | |
| # Analyze TypeScript/JavaScript files | |
| ts_files = find_source_files(repo_root, [".ts", ".tsx", ".js", ".jsx"]) | |
| for filepath in ts_files: | |
| file_complexity = analyze_typescript_file(filepath, repo_root) | |
| if file_complexity: | |
| results["typescript"].append(file_complexity.to_dict()) | |
| # Compute summary statistics | |
| all_files = results["python"] + results["typescript"] | |
| summary = { | |
| "total_files": len(all_files), | |
| "total_functions": sum(f["function_count"] for f in all_files), | |
| "total_lines": sum(f["total_lines"] for f in all_files), | |
| "total_code_lines": sum(f["code_lines"] for f in all_files), | |
| "files_with_high_complexity": [ | |
| f["path"] for f in all_files if f["max_complexity"] > 10 | |
| ], | |
| "avg_file_complexity": round( | |
| sum(f["avg_complexity"] for f in all_files) / len(all_files), 2 | |
| ) | |
| if all_files | |
| else 0, | |
| } | |
| # Sort files by max complexity (most complex first) | |
| results["python"].sort(key=lambda f: f["max_complexity"], reverse=True) | |
| results["typescript"].sort(key=lambda f: f["max_complexity"], reverse=True) | |
| return { | |
| "by_language": results, | |
| "summary": summary, | |
| "high_complexity_functions": _get_high_complexity_functions( | |
| all_files, threshold=10 | |
| ), | |
| } | |
| def _get_high_complexity_functions( | |
| files: List[dict], threshold: int = 10 | |
| ) -> List[dict]: | |
| """Extract functions with complexity above threshold.""" | |
| high_cc = [] | |
| for file_data in files: | |
| for func in file_data.get("functions", []): | |
| if func["complexity"] > threshold: | |
| high_cc.append( | |
| { | |
| "file": file_data["path"], | |
| "function": func["name"], | |
| "line": func["line"], | |
| "complexity": func["complexity"], | |
| } | |
| ) | |
| high_cc.sort(key=lambda f: f["complexity"], reverse=True) | |
| return high_cc[:30] # Top 30 | |
| if __name__ == "__main__": | |
| import json | |
| repo = pathlib.Path(__file__).parent.parent | |
| result = analyze_complexity(repo) | |
| print(json.dumps(result, indent=2)) | |
Xet Storage Details
- Size:
- 9.11 kB
- Xet hash:
- 690c2ceefb073227dc10f22ebda0fa63fb67669561ff2a788f139d17330fdeb7
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.