File size: 3,258 Bytes
cadcc2a
5561a21
 
 
 
cadcc2a
 
53c87fc
5561a21
 
cadcc2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5561a21
 
 
 
bbbd395
5561a21
 
 
70d372c
5561a21
cadcc2a
 
5561a21
cadcc2a
 
 
 
5561a21
 
 
cadcc2a
5561a21
bbbd395
cadcc2a
5561a21
56ce6be
 
53c87fc
5561a21
cadcc2a
 
 
 
5561a21
53c87fc
5561a21
bbbd395
5561a21
 
8630736
5561a21
 
 
cadcc2a
 
 
 
 
 
 
 
 
 
5561a21
cadcc2a
 
 
 
 
 
 
5561a21
cadcc2a
 
 
 
 
 
5561a21
 
cadcc2a
5561a21
 
cadcc2a
 
5561a21
 
cadcc2a
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import json
import os
from datetime import datetime, timezone
from typing import Optional

from huggingface_hub import CommitOperationAdd

from src.envs import API, QUEUE_REPO, PROJ_DIR, PROJ_ID


def _assert_file_exists(path: Optional[str], name: str, required: bool = True) -> Optional[str]:
    if not path:
        if required:
            raise FileNotFoundError(f"Missing required file: {name}")
        return None

    if not os.path.isfile(path):
        raise FileNotFoundError(
            f"Provided path for {name} is not a file on the local file system: {path}"
        )

    if os.path.getsize(path) == 0:
        raise ValueError(f"{name} is empty: {path}")

    return path


def queue_student_submission(
    group_id: str,
    alias: Optional[str],
    state_dict_file: Optional[str],
    model_py_file: str,
    preproc_py_file: str,
) -> tuple[str, str]:
    """
    Upload submitted files to the private queue dataset for offline evaluation.

    Layout in repo:
      {PROJ_DIR}/{group_id alias}/{timestamp}/model.py
      {PROJ_DIR}/{group_id alias}/{timestamp}/preprocess.py
      {PROJ_DIR}/{group_id alias}/{timestamp}/model.pt  (optional)
      {PROJ_DIR}/{group_id alias}/{timestamp}/request.json
    """
    if not group_id or not group_id.strip():
        raise ValueError("Group ID is required.")

    group_id = group_id.strip()
    alias = alias.strip() if isinstance(alias, str) else alias

    ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
    alias_clean = alias.strip() if isinstance(alias, str) and alias.strip() else "NA"
    folder_name = f"{group_id} {alias_clean}"
    base_path = f"{PROJ_DIR}/{folder_name}/{ts}"

    model_py_file = _assert_file_exists(model_py_file, "model.py", required=True)
    preproc_py_file = _assert_file_exists(preproc_py_file, "preprocess.py", required=True)
    state_dict_file = _assert_file_exists(state_dict_file, "model.pt", required=False)

    request_json = {
        "proj": PROJ_ID,
        "group_id": group_id,
        "alias": alias,
        "timestamp": ts,
        "status": "PENDING",
        "datasets": ["url_val"],
        "has_weights": bool(state_dict_file),
    }

    operations = [
        CommitOperationAdd(
            path_in_repo=f"{base_path}/model.py",
            path_or_fileobj=model_py_file,
        ),
        CommitOperationAdd(
            path_in_repo=f"{base_path}/preprocess.py",
            path_or_fileobj=preproc_py_file,
        ),
    ]

    if state_dict_file:
        operations.append(
            CommitOperationAdd(
                path_in_repo=f"{base_path}/model.pt",
                path_or_fileobj=state_dict_file,
            )
        )

    # Add request.json last. This is the worker's "ready" signal.
    operations.append(
        CommitOperationAdd(
            path_in_repo=f"{base_path}/request.json",
            path_or_fileobj=json.dumps(request_json).encode("utf-8"),
        )
    )

    API.create_commit(
        repo_id=QUEUE_REPO,
        repo_type="dataset",
        operations=operations,
        commit_message=f"Queue submission {group_id}/{ts}",
    )

    return (
        f"Submission queued for Group '{group_id}' at {ts}. "
        "Your model will be evaluated shortly.",
        ts,
    )