from __future__ import annotations from app.config import Settings from app.errors import InvalidPath from app.naming import ( AGENT_ID_RE, SLUG_RE, SourceURI, agent_id_from_bucket, parse_source_uri, ) BLOCKED_TARGETS = { "README.md", "LEADERBOARD.md", "shared_resources/README.md", } BLOCKED_PREFIXES = ("audit/",) def validate_agent_id(agent_id: str) -> None: if not AGENT_ID_RE.match(agent_id): raise InvalidPath(f"invalid agent_id: {agent_id!r}") def validate_slug(slug: str) -> None: if not SLUG_RE.match(slug): raise InvalidPath(f"invalid slug: {slug!r}") def validate_path_components(path: str) -> None: if not path: raise InvalidPath("empty path") if path.startswith("/"): raise InvalidPath("path must not be absolute") for part in path.rstrip("/").split("/"): if part in ("", ".", ".."): raise InvalidPath(f"invalid path component: {part!r}") if part.startswith("."): raise InvalidPath(f"path component must not start with '.': {part!r}") if any(ord(c) < 32 for c in part): raise InvalidPath("path contains control characters") def check_dest_not_blocked(target: str) -> None: norm = target.lstrip("/") if norm in BLOCKED_TARGETS: raise InvalidPath(f"target path blocked: {norm}", hint="this path is reserved") for prefix in BLOCKED_PREFIXES: if norm.startswith(prefix): raise InvalidPath(f"target path blocked: {norm}", hint=f"prefix '{prefix}' is reserved") def resolve_source(settings: Settings, source: str) -> tuple[SourceURI, str]: """Parse a source URI and confirm it points inside a valid agent bucket. Returns (parsed_uri, agent_id). Raises InvalidPath otherwise. """ parsed = parse_source_uri(source) if parsed is None: raise InvalidPath(f"source must be an hf://buckets/... URI, got: {source!r}") if parsed.org != settings.org: raise InvalidPath( f"source must be under org '{settings.org}', got '{parsed.org}'", hint="agents post from buckets in this org only", ) agent_id = agent_id_from_bucket(parsed.bucket, settings.collab_slug) if agent_id is None: raise InvalidPath( f"source bucket '{parsed.bucket}' does not match '{settings.collab_slug}-'", hint="source must be under your own scratch bucket", ) if parsed.path: validate_path_components(parsed.path) return parsed, agent_id def validate_shared_dest_path(dest_path: str, agent_id: str) -> None: validate_path_components(dest_path) leaf = dest_path.rsplit("/", 1)[-1] marker = f"_{agent_id}" leaf_no_ext = leaf.rsplit(".", 1)[0] if marker not in leaf_no_ext and marker not in dest_path: raise InvalidPath( f"shared_resources dest path must include '_{agent_id}' in the leaf component", hint=f"e.g. 'tokenizers/{agent_id}_bpe.json' or 'plots/curve_{agent_id}.png'", ) full_target = f"shared_resources/{dest_path}" check_dest_not_blocked(full_target)