File size: 3,742 Bytes
a99e0a8 ca67170 a99e0a8 ad067af ca67170 a99e0a8 ca67170 276a433 ca67170 ef2699c ca67170 a99e0a8 ca67170 a99e0a8 ca67170 a99e0a8 ca67170 276a433 a99e0a8 ca67170 a99e0a8 ca67170 276a433 ca67170 538c63e ca67170 a99e0a8 ca67170 a99e0a8 ca67170 a99e0a8 ca67170 a99e0a8 ca67170 a99e0a8 ca67170 a99e0a8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 | import json
import os
from datetime import datetime, timezone
from typing import Optional
from huggingface_hub import CommitOperationAdd
from src.envs import API, QUEUE_REPO, PROJ_DIR
def _assert_file_exists(path: Optional[str], name: str, required: bool = True) -> Optional[str]:
if not path:
if required:
raise FileNotFoundError(f"Missing required file: {name}")
return None
if not os.path.isfile(path):
raise FileNotFoundError(
f"Provided path for {name} is not a file on the local file system: {path}"
)
if os.path.getsize(path) == 0:
raise ValueError(f"{name} is empty: {path}")
return path
def _sanitize_alias(alias: Optional[str]) -> str:
if not isinstance(alias, str) or not alias.strip():
return "NA"
# Keep this consistent with frontend polling:
# frontend uses folder_name = f"{group_id_norm} {alias_clean}"
# so avoid changing spaces, only remove path-breaking characters.
return alias.strip().replace("/", "_")
def queue_student_submission(
group_id: str,
alias: Optional[str],
state_dict_file: Optional[str],
model_py_file: str,
preproc_py_file: str,
) -> tuple[str, str]:
"""
Upload submitted files to the private queue dataset for offline evaluation.
Layout in repo:
{PROJ_DIR}/{group_id} {alias}/{timestamp}/model.py
{PROJ_DIR}/{group_id} {alias}/{timestamp}/preprocess.py
{PROJ_DIR}/{group_id} {alias}/{timestamp}/model.pt (optional)
{PROJ_DIR}/{group_id} {alias}/{timestamp}/request.json
"""
if not group_id or not group_id.strip():
raise ValueError("Group ID is required.")
group_id = group_id.strip()
alias = alias.strip() if isinstance(alias, str) else alias
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
alias_clean = _sanitize_alias(alias)
# IMPORTANT:
# The frontend polling code looks for:
# folder_name = f"{group_id_norm} {alias_clean}"
# so this must be "{group_id} {alias}", not "{group_id} + {alias}".
folder_name = f"{group_id} {alias_clean}"
base_path = f"{PROJ_DIR}/{folder_name}/{ts}"
model_py_file = _assert_file_exists(model_py_file, "model.py", required=True)
preproc_py_file = _assert_file_exists(preproc_py_file, "preprocess.py", required=True)
state_dict_file = _assert_file_exists(state_dict_file, "model.pt", required=False)
request_json = {
"group_id": group_id,
"alias": alias,
"timestamp": ts,
"status": "PENDING",
"datasets": ["img_val"],
"has_weights": bool(state_dict_file),
}
operations = [
CommitOperationAdd(
path_in_repo=f"{base_path}/model.py",
path_or_fileobj=model_py_file,
),
CommitOperationAdd(
path_in_repo=f"{base_path}/preprocess.py",
path_or_fileobj=preproc_py_file,
),
]
if state_dict_file:
operations.append(
CommitOperationAdd(
path_in_repo=f"{base_path}/model.pt",
path_or_fileobj=state_dict_file,
)
)
# Add request.json last. This is the worker's "ready" signal.
operations.append(
CommitOperationAdd(
path_in_repo=f"{base_path}/request.json",
path_or_fileobj=json.dumps(request_json).encode("utf-8"),
)
)
API.create_commit(
repo_id=QUEUE_REPO,
repo_type="dataset",
operations=operations,
commit_message=f"Queue IMG2GPS submission {group_id}/{ts}",
)
return (
f"Submission queued for Group '{group_id}' at {ts}. "
"Your model will be evaluated shortly.",
ts,
) |