| | """
|
| | Repository Structure Scanner
|
| | =============================
|
| | Scans the entire file structure of a repository and outputs a tree-like
|
| | Markdown representation. Handles massive (1GB+) repositories efficiently.
|
| |
|
| | Usage:
|
| | python scan_structure.py [path] [--output FILE] [--ignore PATTERN ...]
|
| |
|
| | Output:
|
| | A Markdown file containing the full directory tree.
|
| | """
|
| |
|
| | import os
|
| | import sys
|
| | import argparse
|
| | import subprocess
|
| | from pathlib import Path
|
| | from collections import defaultdict
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | DEFAULT_IGNORE = {
|
| | ".git",
|
| | "__pycache__",
|
| | ".venv",
|
| | "venv",
|
| | "env",
|
| | "node_modules",
|
| | ".ipynb_checkpoints",
|
| | ".mypy_cache",
|
| | ".pytest_cache",
|
| | ".tox",
|
| | ".eggs",
|
| | "*.egg-info",
|
| | ".DS_Store",
|
| | "Thumbs.db",
|
| | "desktop.ini",
|
| | }
|
| |
|
| |
|
| | def should_ignore(name: str, ignore_set: set) -> bool:
|
| | """Return True if *name* matches any pattern in the ignore set."""
|
| | if name in ignore_set:
|
| | return True
|
| | for pattern in ignore_set:
|
| | if pattern.startswith("*") and name.endswith(pattern[1:]):
|
| | return True
|
| | return False
|
| |
|
| |
|
| | def build_tree(root_path: str, ignore_set: set) -> list[str]:
|
| | """
|
| | Walk *root_path* depth-first and return a list of tree-formatted lines.
|
| |
|
| | Uses ``os.scandir`` for performance on large filesystems and sorts
|
| | entries alphabetically (directories first).
|
| | """
|
| |
|
| | lines: list[str] = []
|
| |
|
| | def _walk(current: str, prefix: str) -> None:
|
| | try:
|
| | entries = sorted(
|
| | os.scandir(current),
|
| | key=lambda e: (not e.is_dir(follow_symlinks=False), e.name.lower()),
|
| | )
|
| | except PermissionError:
|
| | return
|
| |
|
| |
|
| | entries = [e for e in entries if not should_ignore(e.name, ignore_set)]
|
| |
|
| | for idx, entry in enumerate(entries):
|
| | is_last = idx == len(entries) - 1
|
| | connector = "βββ " if is_last else "βββ "
|
| | suffix = "/" if entry.is_dir(follow_symlinks=False) else ""
|
| | lines.append(f"{prefix}{connector}{entry.name}{suffix}")
|
| |
|
| | if entry.is_dir(follow_symlinks=False):
|
| | extension = " " if is_last else "β "
|
| | _walk(entry.path, prefix + extension)
|
| |
|
| | _walk(root_path, "")
|
| | return lines
|
| |
|
| |
|
| | def main() -> None:
|
| | parser = argparse.ArgumentParser(
|
| | description="Scan repository file structure and output a Markdown tree."
|
| | )
|
| | parser.add_argument(
|
| | "path",
|
| | nargs="?",
|
| | default=".",
|
| | help="Root directory to scan (default: current directory).",
|
| | )
|
| | parser.add_argument(
|
| | "--output",
|
| | "-o",
|
| | default=None,
|
| | help="Output Markdown file path (default: STRUCTURE.md in scanned dir).",
|
| | )
|
| | parser.add_argument(
|
| | "--ignore",
|
| | nargs="*",
|
| | default=None,
|
| | help="Extra patterns to ignore (added to built-in defaults).",
|
| | )
|
| | parser.add_argument(
|
| | "--no-default-ignore",
|
| | action="store_true",
|
| | help="Disable the built-in ignore list (scan everything).",
|
| | )
|
| | args = parser.parse_args()
|
| |
|
| | root = os.path.abspath(args.path)
|
| | root_name = os.path.basename(root)
|
| |
|
| |
|
| | ignore_set: set = set() if args.no_default_ignore else set(DEFAULT_IGNORE)
|
| | if args.ignore:
|
| | ignore_set.update(args.ignore)
|
| |
|
| |
|
| | out_path = args.output or os.path.join(root, "STRUCTURE.md")
|
| | out_name = os.path.basename(out_path)
|
| | ignore_set.add(out_name)
|
| |
|
| | print(f"Scanning: {root}")
|
| | print(f"Ignoring: {', '.join(sorted(ignore_set))}")
|
| |
|
| | tree_lines = build_tree(root, ignore_set)
|
| |
|
| |
|
| | md_lines = [
|
| | f"## Project Structure\n",
|
| | f"```text",
|
| | f"{root_name}/",
|
| | ]
|
| | md_lines.extend(tree_lines)
|
| | md_lines.append("```\n")
|
| |
|
| | content = "\n".join(md_lines)
|
| |
|
| | with open(out_path, "w", encoding="utf-8") as fh:
|
| | fh.write(content)
|
| |
|
| | total_entries = len(tree_lines)
|
| | print(f"Done β {total_entries} entries written to {out_path}")
|
| |
|
| |
|
| | if __name__ == "__main__":
|
| | main()
|
| |
|