Spaces:
Running
Running
File size: 3,258 Bytes
cadcc2a 5561a21 cadcc2a 53c87fc 5561a21 cadcc2a 5561a21 bbbd395 5561a21 70d372c 5561a21 cadcc2a 5561a21 cadcc2a 5561a21 cadcc2a 5561a21 bbbd395 cadcc2a 5561a21 56ce6be 53c87fc 5561a21 cadcc2a 5561a21 53c87fc 5561a21 bbbd395 5561a21 8630736 5561a21 cadcc2a 5561a21 cadcc2a 5561a21 cadcc2a 5561a21 cadcc2a 5561a21 cadcc2a 5561a21 cadcc2a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | import json
import os
from datetime import datetime, timezone
from typing import Optional
from huggingface_hub import CommitOperationAdd
from src.envs import API, QUEUE_REPO, PROJ_DIR, PROJ_ID
def _assert_file_exists(path: Optional[str], name: str, required: bool = True) -> Optional[str]:
if not path:
if required:
raise FileNotFoundError(f"Missing required file: {name}")
return None
if not os.path.isfile(path):
raise FileNotFoundError(
f"Provided path for {name} is not a file on the local file system: {path}"
)
if os.path.getsize(path) == 0:
raise ValueError(f"{name} is empty: {path}")
return path
def queue_student_submission(
group_id: str,
alias: Optional[str],
state_dict_file: Optional[str],
model_py_file: str,
preproc_py_file: str,
) -> tuple[str, str]:
"""
Upload submitted files to the private queue dataset for offline evaluation.
Layout in repo:
{PROJ_DIR}/{group_id alias}/{timestamp}/model.py
{PROJ_DIR}/{group_id alias}/{timestamp}/preprocess.py
{PROJ_DIR}/{group_id alias}/{timestamp}/model.pt (optional)
{PROJ_DIR}/{group_id alias}/{timestamp}/request.json
"""
if not group_id or not group_id.strip():
raise ValueError("Group ID is required.")
group_id = group_id.strip()
alias = alias.strip() if isinstance(alias, str) else alias
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
alias_clean = alias.strip() if isinstance(alias, str) and alias.strip() else "NA"
folder_name = f"{group_id} {alias_clean}"
base_path = f"{PROJ_DIR}/{folder_name}/{ts}"
model_py_file = _assert_file_exists(model_py_file, "model.py", required=True)
preproc_py_file = _assert_file_exists(preproc_py_file, "preprocess.py", required=True)
state_dict_file = _assert_file_exists(state_dict_file, "model.pt", required=False)
request_json = {
"proj": PROJ_ID,
"group_id": group_id,
"alias": alias,
"timestamp": ts,
"status": "PENDING",
"datasets": ["url_val"],
"has_weights": bool(state_dict_file),
}
operations = [
CommitOperationAdd(
path_in_repo=f"{base_path}/model.py",
path_or_fileobj=model_py_file,
),
CommitOperationAdd(
path_in_repo=f"{base_path}/preprocess.py",
path_or_fileobj=preproc_py_file,
),
]
if state_dict_file:
operations.append(
CommitOperationAdd(
path_in_repo=f"{base_path}/model.pt",
path_or_fileobj=state_dict_file,
)
)
# Add request.json last. This is the worker's "ready" signal.
operations.append(
CommitOperationAdd(
path_in_repo=f"{base_path}/request.json",
path_or_fileobj=json.dumps(request_json).encode("utf-8"),
)
)
API.create_commit(
repo_id=QUEUE_REPO,
repo_type="dataset",
operations=operations,
commit_message=f"Queue submission {group_id}/{ts}",
)
return (
f"Submission queued for Group '{group_id}' at {ts}. "
"Your model will be evaluated shortly.",
ts,
) |