from pathlib import Path TEXT_SUFFIXES = {".py", ".md", ".txt", ".json", ".toml", ".yaml", ".yml"} SKIP_DIRS = {".cache", ".git", ".venv", "__pycache__", "runtime", "finetuning", "hf_upload"} def collect_project_context(root: str | Path, max_chars: int) -> str: base = Path(root).resolve() if not base.exists(): raise FileNotFoundError(f"Context root does not exist: {base}") chunks: list[str] = [] used = 0 for path in sorted(base.rglob("*")): if path.is_dir() or path.suffix.lower() not in TEXT_SUFFIXES: continue if any(part in SKIP_DIRS for part in path.parts): continue rel = path.relative_to(base) try: text = path.read_text(encoding="utf-8", errors="ignore") except OSError: continue block = f"\n### {rel}\n{text[:4000]}\n" if used + len(block) > max_chars: remaining = max_chars - used if remaining > 200: chunks.append(block[:remaining]) break chunks.append(block) used += len(block) return "".join(chunks)