Spaces:
Running
Running
| import json | |
| import os | |
| from datetime import datetime, timezone | |
| from typing import Optional | |
| from huggingface_hub import CommitOperationAdd | |
| from src.envs import API, QUEUE_REPO, PROJ_DIR, PROJ_ID | |
| def _assert_file_exists(path: Optional[str], name: str, required: bool = True) -> Optional[str]: | |
| if not path: | |
| if required: | |
| raise FileNotFoundError(f"Missing required file: {name}") | |
| return None | |
| if not os.path.isfile(path): | |
| raise FileNotFoundError( | |
| f"Provided path for {name} is not a file on the local file system: {path}" | |
| ) | |
| if os.path.getsize(path) == 0: | |
| raise ValueError(f"{name} is empty: {path}") | |
| return path | |
| def queue_student_submission( | |
| group_id: str, | |
| alias: Optional[str], | |
| state_dict_file: Optional[str], | |
| model_py_file: str, | |
| preproc_py_file: str, | |
| ) -> tuple[str, str]: | |
| """ | |
| Upload submitted files to the private queue dataset for offline evaluation. | |
| Layout in repo: | |
| {PROJ_DIR}/{group_id alias}/{timestamp}/model.py | |
| {PROJ_DIR}/{group_id alias}/{timestamp}/preprocess.py | |
| {PROJ_DIR}/{group_id alias}/{timestamp}/model.pt (optional) | |
| {PROJ_DIR}/{group_id alias}/{timestamp}/request.json | |
| """ | |
| if not group_id or not group_id.strip(): | |
| raise ValueError("Group ID is required.") | |
| group_id = group_id.strip() | |
| alias = alias.strip() if isinstance(alias, str) else alias | |
| ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") | |
| alias_clean = alias.strip() if isinstance(alias, str) and alias.strip() else "NA" | |
| folder_name = f"{group_id} {alias_clean}" | |
| base_path = f"{PROJ_DIR}/{folder_name}/{ts}" | |
| model_py_file = _assert_file_exists(model_py_file, "model.py", required=True) | |
| preproc_py_file = _assert_file_exists(preproc_py_file, "preprocess.py", required=True) | |
| state_dict_file = _assert_file_exists(state_dict_file, "model.pt", required=False) | |
| request_json = { | |
| "proj": PROJ_ID, | |
| "group_id": group_id, | |
| "alias": alias, | |
| "timestamp": ts, | |
| "status": "PENDING", | |
| "datasets": ["url_val"], | |
| "has_weights": bool(state_dict_file), | |
| } | |
| operations = [ | |
| CommitOperationAdd( | |
| path_in_repo=f"{base_path}/model.py", | |
| path_or_fileobj=model_py_file, | |
| ), | |
| CommitOperationAdd( | |
| path_in_repo=f"{base_path}/preprocess.py", | |
| path_or_fileobj=preproc_py_file, | |
| ), | |
| ] | |
| if state_dict_file: | |
| operations.append( | |
| CommitOperationAdd( | |
| path_in_repo=f"{base_path}/model.pt", | |
| path_or_fileobj=state_dict_file, | |
| ) | |
| ) | |
| # Add request.json last. This is the worker's "ready" signal. | |
| operations.append( | |
| CommitOperationAdd( | |
| path_in_repo=f"{base_path}/request.json", | |
| path_or_fileobj=json.dumps(request_json).encode("utf-8"), | |
| ) | |
| ) | |
| API.create_commit( | |
| repo_id=QUEUE_REPO, | |
| repo_type="dataset", | |
| operations=operations, | |
| commit_message=f"Queue submission {group_id}/{ts}", | |
| ) | |
| return ( | |
| f"Submission queued for Group '{group_id}' at {ts}. " | |
| "Your model will be evaluated shortly.", | |
| ts, | |
| ) |