Buckets:
ktongue/docker_container / .vscode-server /extensions /ms-python.vscode-python-envs-1.20.1-linux-arm64 /analysis /debt_indicators.py
| # Copyright (c) Microsoft Corporation. All rights reserved. | |
| # Licensed under the MIT License. | |
| """Technical debt indicator detection. | |
| Scans code for common debt markers like TODO comments, large files, | |
| long functions, and code smells. | |
| """ | |
| import pathlib | |
| import re | |
| from dataclasses import dataclass | |
| from typing import Dict, List, Optional | |
| from .file_discovery import get_tracked_source_files | |
| class DebtMarker: | |
| """A technical debt marker found in code.""" | |
| file: str | |
| line: int | |
| marker_type: str # TODO, FIXME, HACK, XXX, etc. | |
| text: str | |
| def to_dict(self) -> dict: | |
| return { | |
| "file": self.file, | |
| "line": self.line, | |
| "type": self.marker_type, | |
| "text": self.text[:200], # Truncate long comments | |
| } | |
| class LargeFile: | |
| """A file that exceeds size thresholds.""" | |
| path: str | |
| total_lines: int | |
| code_lines: int | |
| def to_dict(self) -> dict: | |
| return { | |
| "path": self.path, | |
| "total_lines": self.total_lines, | |
| "code_lines": self.code_lines, | |
| } | |
| class LongFunction: | |
| """A function that exceeds length thresholds.""" | |
| file: str | |
| function_name: str | |
| line: int | |
| length: int | |
| def to_dict(self) -> dict: | |
| return { | |
| "file": self.file, | |
| "function": self.function_name, | |
| "line": self.line, | |
| "length": self.length, | |
| } | |
| # Debt marker patterns | |
| DEBT_PATTERNS = [ | |
| (r"#\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:\s](.*)$", "python"), | |
| ( | |
| r"//\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:\s](.*)$", | |
| "typescript", | |
| ), | |
| ( | |
| r"/\*\s*(TODO|FIXME|HACK|XXX|BUG|REFACTOR|OPTIMIZE|REVIEW)[:\s](.*?)\*/", | |
| "typescript", | |
| ), | |
| ] | |
| # Thresholds (configurable) | |
| LARGE_FILE_THRESHOLD = 500 # lines of code | |
| LONG_FUNCTION_THRESHOLD = 50 # lines | |
| def find_debt_markers( | |
| filepath: pathlib.Path, repo_root: pathlib.Path | |
| ) -> List[DebtMarker]: | |
| """Find TODO/FIXME/HACK markers in a file.""" | |
| try: | |
| content = filepath.read_text(encoding="utf-8") | |
| except (UnicodeDecodeError, OSError): | |
| return [] | |
| rel_path = filepath.relative_to(repo_root).as_posix() | |
| markers = [] | |
| # Determine file type | |
| suffix = filepath.suffix.lower() | |
| if suffix == ".py": | |
| file_type = "python" | |
| elif suffix in {".ts", ".js", ".tsx", ".jsx"}: | |
| file_type = "typescript" | |
| else: | |
| return [] | |
| for pattern, pattern_type in DEBT_PATTERNS: | |
| if pattern_type != file_type: | |
| continue | |
| for line_num, line in enumerate(content.splitlines(), start=1): | |
| match = re.search(pattern, line, re.IGNORECASE) | |
| if match: | |
| marker_type = match.group(1).upper() | |
| text = match.group(2).strip() if match.lastindex >= 2 else "" | |
| markers.append( | |
| DebtMarker( | |
| file=rel_path, | |
| line=line_num, | |
| marker_type=marker_type, | |
| text=text, | |
| ) | |
| ) | |
| return markers | |
| def analyze_file_size( | |
| filepath: pathlib.Path, repo_root: pathlib.Path | |
| ) -> Optional[LargeFile]: | |
| """Check if a file exceeds size thresholds.""" | |
| try: | |
| content = filepath.read_text(encoding="utf-8") | |
| except (UnicodeDecodeError, OSError): | |
| return None | |
| lines = content.splitlines() | |
| total_lines = len(lines) | |
| # Count code lines (non-empty, non-comment) | |
| suffix = filepath.suffix.lower() | |
| if suffix == ".py": | |
| code_lines = sum( | |
| 1 for line in lines if line.strip() and not line.strip().startswith("#") | |
| ) | |
| elif suffix in {".ts", ".js", ".tsx", ".jsx"}: | |
| code_lines = sum( | |
| 1 for line in lines if line.strip() and not line.strip().startswith("//") | |
| ) | |
| else: | |
| code_lines = total_lines | |
| if code_lines > LARGE_FILE_THRESHOLD: | |
| rel_path = filepath.relative_to(repo_root).as_posix() | |
| return LargeFile(path=rel_path, total_lines=total_lines, code_lines=code_lines) | |
| return None | |
| def find_long_functions_python( | |
| filepath: pathlib.Path, repo_root: pathlib.Path | |
| ) -> List[LongFunction]: | |
| """Find Python functions that exceed length threshold.""" | |
| try: | |
| content = filepath.read_text(encoding="utf-8") | |
| except (UnicodeDecodeError, OSError): | |
| return [] | |
| rel_path = filepath.relative_to(repo_root).as_posix() | |
| lines = content.splitlines() | |
| long_funcs = [] | |
| # Simple pattern to find function definitions | |
| func_pattern = re.compile(r"^(\s*)(?:async\s+)?def\s+(\w+)\s*\(") | |
| current_func = None | |
| current_indent = 0 | |
| func_start = 0 | |
| for i, line in enumerate(lines): | |
| match = func_pattern.match(line) | |
| if match: | |
| # Check previous function | |
| if current_func: | |
| length = i - func_start | |
| if length > LONG_FUNCTION_THRESHOLD: | |
| long_funcs.append( | |
| LongFunction( | |
| file=rel_path, | |
| function_name=current_func, | |
| line=func_start + 1, | |
| length=length, | |
| ) | |
| ) | |
| current_indent = len(match.group(1)) | |
| current_func = match.group(2) | |
| func_start = i | |
| # Detect when we've left the current function (dedent to same or less level) | |
| elif current_func and line.strip(): | |
| line_indent = len(line) - len(line.lstrip()) | |
| if line_indent <= current_indent and not line.strip().startswith("#"): | |
| # End of function | |
| length = i - func_start | |
| if length > LONG_FUNCTION_THRESHOLD: | |
| long_funcs.append( | |
| LongFunction( | |
| file=rel_path, | |
| function_name=current_func, | |
| line=func_start + 1, | |
| length=length, | |
| ) | |
| ) | |
| current_func = None | |
| # Check last function | |
| if current_func: | |
| length = len(lines) - func_start | |
| if length > LONG_FUNCTION_THRESHOLD: | |
| long_funcs.append( | |
| LongFunction( | |
| file=rel_path, | |
| function_name=current_func, | |
| line=func_start + 1, | |
| length=length, | |
| ) | |
| ) | |
| return long_funcs | |
| def find_long_functions_typescript( | |
| filepath: pathlib.Path, repo_root: pathlib.Path | |
| ) -> List[LongFunction]: | |
| """Find TypeScript functions that exceed length threshold.""" | |
| try: | |
| content = filepath.read_text(encoding="utf-8") | |
| except (UnicodeDecodeError, OSError): | |
| return [] | |
| rel_path = filepath.relative_to(repo_root).as_posix() | |
| lines = content.splitlines() | |
| long_funcs = [] | |
| # Simplified pattern for function definitions | |
| func_pattern = re.compile( | |
| r"^\s*(?:export\s+)?(?:async\s+)?(?:function\s+(\w+)|(\w+)\s*(?:<[^>]*>)?\s*\([^)]*\)\s*(?::\s*[^{]+)?\s*\{)" | |
| ) | |
| i = 0 | |
| while i < len(lines): | |
| match = func_pattern.match(lines[i]) | |
| if match: | |
| func_name = match.group(1) or match.group(2) or "anonymous" | |
| func_start = i | |
| # Count braces to find function end | |
| brace_count = 0 | |
| found_open = False | |
| for j in range(i, len(lines)): | |
| line = lines[j] | |
| for char in line: | |
| if char == "{": | |
| brace_count += 1 | |
| found_open = True | |
| elif char == "}": | |
| brace_count -= 1 | |
| if found_open and brace_count == 0: | |
| length = j - func_start + 1 | |
| if length > LONG_FUNCTION_THRESHOLD: | |
| long_funcs.append( | |
| LongFunction( | |
| file=rel_path, | |
| function_name=func_name, | |
| line=func_start + 1, | |
| length=length, | |
| ) | |
| ) | |
| i = j | |
| break | |
| else: | |
| # Reached end without closing brace | |
| break | |
| i += 1 | |
| return long_funcs | |
| def analyze_debt(repo_root: pathlib.Path) -> dict: | |
| """Run complete debt indicator analysis. | |
| Returns: | |
| Dictionary with all debt indicators | |
| """ | |
| all_markers: List[DebtMarker] = [] | |
| large_files: List[LargeFile] = [] | |
| long_functions: List[LongFunction] = [] | |
| # Find all source files using git ls-files (respects .gitignore) | |
| extensions = [".py", ".ts", ".js", ".tsx", ".jsx"] | |
| source_files = get_tracked_source_files(repo_root, extensions) | |
| # Analyze each file | |
| for filepath in source_files: | |
| # Find debt markers | |
| markers = find_debt_markers(filepath, repo_root) | |
| all_markers.extend(markers) | |
| # Check file size | |
| large_file = analyze_file_size(filepath, repo_root) | |
| if large_file: | |
| large_files.append(large_file) | |
| # Find long functions | |
| suffix = filepath.suffix.lower() | |
| if suffix == ".py": | |
| long_funcs = find_long_functions_python(filepath, repo_root) | |
| elif suffix in {".ts", ".js", ".tsx", ".jsx"}: | |
| long_funcs = find_long_functions_typescript(filepath, repo_root) | |
| else: | |
| long_funcs = [] | |
| long_functions.extend(long_funcs) | |
| # Group markers by type | |
| markers_by_type: Dict[str, List[dict]] = {} | |
| for marker in all_markers: | |
| if marker.marker_type not in markers_by_type: | |
| markers_by_type[marker.marker_type] = [] | |
| markers_by_type[marker.marker_type].append(marker.to_dict()) | |
| # Sort large files and long functions | |
| large_files.sort(key=lambda f: f.code_lines, reverse=True) | |
| long_functions.sort(key=lambda f: f.length, reverse=True) | |
| return { | |
| "debt_markers": { | |
| "by_type": markers_by_type, | |
| "total_count": len(all_markers), | |
| "summary": { | |
| marker_type: len(markers) | |
| for marker_type, markers in markers_by_type.items() | |
| }, | |
| }, | |
| "large_files": { | |
| "files": [f.to_dict() for f in large_files], | |
| "count": len(large_files), | |
| "threshold": LARGE_FILE_THRESHOLD, | |
| }, | |
| "long_functions": { | |
| "functions": [f.to_dict() for f in long_functions[:50]], # Top 50 | |
| "count": len(long_functions), | |
| "threshold": LONG_FUNCTION_THRESHOLD, | |
| }, | |
| "files_analyzed": len(source_files), | |
| } | |
| if __name__ == "__main__": | |
| import json | |
| repo = pathlib.Path(__file__).parent.parent | |
| result = analyze_debt(repo) | |
| print(json.dumps(result, indent=2)) | |
Xet Storage Details
- Size:
- 11.1 kB
- Xet hash:
- 4d232cb4d5d68ae871e9ace48327f878aa8efa35c5bcf8e6c3217cce48c38488
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.