| from __future__ import annotations |
|
|
| from app.config import Settings |
| from app.errors import InvalidPath |
| from app.naming import ( |
| AGENT_ID_RE, |
| SLUG_RE, |
| SourceURI, |
| agent_id_from_bucket, |
| parse_source_uri, |
| ) |
|
|
|
|
| BLOCKED_TARGETS = { |
| "README.md", |
| "LEADERBOARD.md", |
| "shared_resources/README.md", |
| } |
| BLOCKED_PREFIXES = ("audit/",) |
|
|
|
|
| def validate_agent_id(agent_id: str) -> None: |
| if not AGENT_ID_RE.match(agent_id): |
| raise InvalidPath(f"invalid agent_id: {agent_id!r}") |
|
|
|
|
| def validate_slug(slug: str) -> None: |
| if not SLUG_RE.match(slug): |
| raise InvalidPath(f"invalid slug: {slug!r}") |
|
|
|
|
| def validate_path_components(path: str) -> None: |
| if not path: |
| raise InvalidPath("empty path") |
| if path.startswith("/"): |
| raise InvalidPath("path must not be absolute") |
| for part in path.rstrip("/").split("/"): |
| if part in ("", ".", ".."): |
| raise InvalidPath(f"invalid path component: {part!r}") |
| if part.startswith("."): |
| raise InvalidPath(f"path component must not start with '.': {part!r}") |
| if any(ord(c) < 32 for c in part): |
| raise InvalidPath("path contains control characters") |
|
|
|
|
| def check_dest_not_blocked(target: str) -> None: |
| norm = target.lstrip("/") |
| if norm in BLOCKED_TARGETS: |
| raise InvalidPath(f"target path blocked: {norm}", hint="this path is reserved") |
| for prefix in BLOCKED_PREFIXES: |
| if norm.startswith(prefix): |
| raise InvalidPath(f"target path blocked: {norm}", hint=f"prefix '{prefix}' is reserved") |
|
|
|
|
| def resolve_source(settings: Settings, source: str) -> tuple[SourceURI, str]: |
| """Parse a source URI and confirm it points inside a valid agent bucket. |
| |
| Returns (parsed_uri, agent_id). Raises InvalidPath otherwise. |
| """ |
| parsed = parse_source_uri(source) |
| if parsed is None: |
| raise InvalidPath(f"source must be an hf://buckets/... URI, got: {source!r}") |
| if parsed.org != settings.org: |
| raise InvalidPath( |
| f"source must be under org '{settings.org}', got '{parsed.org}'", |
| hint="agents post from buckets in this org only", |
| ) |
| agent_id = agent_id_from_bucket(parsed.bucket, settings.collab_slug) |
| if agent_id is None: |
| raise InvalidPath( |
| f"source bucket '{parsed.bucket}' does not match '{settings.collab_slug}-<agent_id>'", |
| hint="source must be under your own scratch bucket", |
| ) |
| if parsed.path: |
| validate_path_components(parsed.path) |
| return parsed, agent_id |
|
|
|
|
| def validate_shared_dest_path(dest_path: str, agent_id: str) -> None: |
| validate_path_components(dest_path) |
| leaf = dest_path.rsplit("/", 1)[-1] |
| marker = f"_{agent_id}" |
| leaf_no_ext = leaf.rsplit(".", 1)[0] |
| if marker not in leaf_no_ext and marker not in dest_path: |
| raise InvalidPath( |
| f"shared_resources dest path must include '_{agent_id}' in the leaf component", |
| hint=f"e.g. 'tokenizers/{agent_id}_bpe.json' or 'plots/curve_{agent_id}.png'", |
| ) |
| full_target = f"shared_resources/{dest_path}" |
| check_dest_not_blocked(full_target) |
|
|