IMG2GPS / src /submission /student_queue.py
realdanzo's picture
update
a99e0a8
import json
import os
from datetime import datetime, timezone
from typing import Optional
from huggingface_hub import CommitOperationAdd
from src.envs import API, QUEUE_REPO, PROJ_DIR
def _assert_file_exists(path: Optional[str], name: str, required: bool = True) -> Optional[str]:
if not path:
if required:
raise FileNotFoundError(f"Missing required file: {name}")
return None
if not os.path.isfile(path):
raise FileNotFoundError(
f"Provided path for {name} is not a file on the local file system: {path}"
)
if os.path.getsize(path) == 0:
raise ValueError(f"{name} is empty: {path}")
return path
def _sanitize_alias(alias: Optional[str]) -> str:
if not isinstance(alias, str) or not alias.strip():
return "NA"
# Keep this consistent with frontend polling:
# frontend uses folder_name = f"{group_id_norm} {alias_clean}"
# so avoid changing spaces, only remove path-breaking characters.
return alias.strip().replace("/", "_")
def queue_student_submission(
group_id: str,
alias: Optional[str],
state_dict_file: Optional[str],
model_py_file: str,
preproc_py_file: str,
) -> tuple[str, str]:
"""
Upload submitted files to the private queue dataset for offline evaluation.
Layout in repo:
{PROJ_DIR}/{group_id} {alias}/{timestamp}/model.py
{PROJ_DIR}/{group_id} {alias}/{timestamp}/preprocess.py
{PROJ_DIR}/{group_id} {alias}/{timestamp}/model.pt (optional)
{PROJ_DIR}/{group_id} {alias}/{timestamp}/request.json
"""
if not group_id or not group_id.strip():
raise ValueError("Group ID is required.")
group_id = group_id.strip()
alias = alias.strip() if isinstance(alias, str) else alias
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
alias_clean = _sanitize_alias(alias)
# IMPORTANT:
# The frontend polling code looks for:
# folder_name = f"{group_id_norm} {alias_clean}"
# so this must be "{group_id} {alias}", not "{group_id} + {alias}".
folder_name = f"{group_id} {alias_clean}"
base_path = f"{PROJ_DIR}/{folder_name}/{ts}"
model_py_file = _assert_file_exists(model_py_file, "model.py", required=True)
preproc_py_file = _assert_file_exists(preproc_py_file, "preprocess.py", required=True)
state_dict_file = _assert_file_exists(state_dict_file, "model.pt", required=False)
request_json = {
"group_id": group_id,
"alias": alias,
"timestamp": ts,
"status": "PENDING",
"datasets": ["img_val"],
"has_weights": bool(state_dict_file),
}
operations = [
CommitOperationAdd(
path_in_repo=f"{base_path}/model.py",
path_or_fileobj=model_py_file,
),
CommitOperationAdd(
path_in_repo=f"{base_path}/preprocess.py",
path_or_fileobj=preproc_py_file,
),
]
if state_dict_file:
operations.append(
CommitOperationAdd(
path_in_repo=f"{base_path}/model.pt",
path_or_fileobj=state_dict_file,
)
)
# Add request.json last. This is the worker's "ready" signal.
operations.append(
CommitOperationAdd(
path_in_repo=f"{base_path}/request.json",
path_or_fileobj=json.dumps(request_json).encode("utf-8"),
)
)
API.create_commit(
repo_id=QUEUE_REPO,
repo_type="dataset",
operations=operations,
commit_message=f"Queue IMG2GPS submission {group_id}/{ts}",
)
return (
f"Submission queued for Group '{group_id}' at {ts}. "
"Your model will be evaluated shortly.",
ts,
)