import json import os from datetime import datetime, timezone from typing import Optional from huggingface_hub import CommitOperationAdd from src.envs import API, QUEUE_REPO, PROJ_DIR def _assert_file_exists(path: Optional[str], name: str, required: bool = True) -> Optional[str]: if not path: if required: raise FileNotFoundError(f"Missing required file: {name}") return None if not os.path.isfile(path): raise FileNotFoundError( f"Provided path for {name} is not a file on the local file system: {path}" ) if os.path.getsize(path) == 0: raise ValueError(f"{name} is empty: {path}") return path def _sanitize_alias(alias: Optional[str]) -> str: if not isinstance(alias, str) or not alias.strip(): return "NA" # Keep this consistent with frontend polling: # frontend uses folder_name = f"{group_id_norm} {alias_clean}" # so avoid changing spaces, only remove path-breaking characters. return alias.strip().replace("/", "_") def queue_student_submission( group_id: str, alias: Optional[str], state_dict_file: Optional[str], model_py_file: str, preproc_py_file: str, ) -> tuple[str, str]: """ Upload submitted files to the private queue dataset for offline evaluation. Layout in repo: {PROJ_DIR}/{group_id} {alias}/{timestamp}/model.py {PROJ_DIR}/{group_id} {alias}/{timestamp}/preprocess.py {PROJ_DIR}/{group_id} {alias}/{timestamp}/model.pt (optional) {PROJ_DIR}/{group_id} {alias}/{timestamp}/request.json """ if not group_id or not group_id.strip(): raise ValueError("Group ID is required.") group_id = group_id.strip() alias = alias.strip() if isinstance(alias, str) else alias ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") alias_clean = _sanitize_alias(alias) # IMPORTANT: # The frontend polling code looks for: # folder_name = f"{group_id_norm} {alias_clean}" # so this must be "{group_id} {alias}", not "{group_id} + {alias}". folder_name = f"{group_id} {alias_clean}" base_path = f"{PROJ_DIR}/{folder_name}/{ts}" model_py_file = _assert_file_exists(model_py_file, "model.py", required=True) preproc_py_file = _assert_file_exists(preproc_py_file, "preprocess.py", required=True) state_dict_file = _assert_file_exists(state_dict_file, "model.pt", required=False) request_json = { "group_id": group_id, "alias": alias, "timestamp": ts, "status": "PENDING", "datasets": ["img_val"], "has_weights": bool(state_dict_file), } operations = [ CommitOperationAdd( path_in_repo=f"{base_path}/model.py", path_or_fileobj=model_py_file, ), CommitOperationAdd( path_in_repo=f"{base_path}/preprocess.py", path_or_fileobj=preproc_py_file, ), ] if state_dict_file: operations.append( CommitOperationAdd( path_in_repo=f"{base_path}/model.pt", path_or_fileobj=state_dict_file, ) ) # Add request.json last. This is the worker's "ready" signal. operations.append( CommitOperationAdd( path_in_repo=f"{base_path}/request.json", path_or_fileobj=json.dumps(request_json).encode("utf-8"), ) ) API.create_commit( repo_id=QUEUE_REPO, repo_type="dataset", operations=operations, commit_message=f"Queue IMG2GPS submission {group_id}/{ts}", ) return ( f"Submission queued for Group '{group_id}' at {ts}. " "Your model will be evaluated shortly.", ts, )