File size: 3,742 Bytes
a99e0a8
ca67170
 
 
 
a99e0a8
 
ad067af
ca67170
 
a99e0a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca67170
 
 
 
276a433
ca67170
 
 
ef2699c
ca67170
a99e0a8
 
ca67170
a99e0a8
 
 
 
ca67170
 
 
a99e0a8
ca67170
276a433
a99e0a8
ca67170
a99e0a8
 
 
 
 
 
 
 
 
 
 
 
ca67170
 
 
276a433
ca67170
 
538c63e
ca67170
 
 
a99e0a8
 
 
 
 
 
 
 
 
 
ca67170
a99e0a8
 
 
 
 
 
 
ca67170
a99e0a8
 
 
 
 
 
ca67170
 
a99e0a8
ca67170
 
a99e0a8
 
ca67170
 
a99e0a8
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import json
import os
from datetime import datetime, timezone
from typing import Optional

from huggingface_hub import CommitOperationAdd

from src.envs import API, QUEUE_REPO, PROJ_DIR


def _assert_file_exists(path: Optional[str], name: str, required: bool = True) -> Optional[str]:
    if not path:
        if required:
            raise FileNotFoundError(f"Missing required file: {name}")
        return None

    if not os.path.isfile(path):
        raise FileNotFoundError(
            f"Provided path for {name} is not a file on the local file system: {path}"
        )

    if os.path.getsize(path) == 0:
        raise ValueError(f"{name} is empty: {path}")

    return path


def _sanitize_alias(alias: Optional[str]) -> str:
    if not isinstance(alias, str) or not alias.strip():
        return "NA"

    # Keep this consistent with frontend polling:
    # frontend uses folder_name = f"{group_id_norm} {alias_clean}"
    # so avoid changing spaces, only remove path-breaking characters.
    return alias.strip().replace("/", "_")


def queue_student_submission(
    group_id: str,
    alias: Optional[str],
    state_dict_file: Optional[str],
    model_py_file: str,
    preproc_py_file: str,
) -> tuple[str, str]:
    """
    Upload submitted files to the private queue dataset for offline evaluation.

    Layout in repo:
      {PROJ_DIR}/{group_id} {alias}/{timestamp}/model.py
      {PROJ_DIR}/{group_id} {alias}/{timestamp}/preprocess.py
      {PROJ_DIR}/{group_id} {alias}/{timestamp}/model.pt  (optional)
      {PROJ_DIR}/{group_id} {alias}/{timestamp}/request.json
    """
    if not group_id or not group_id.strip():
        raise ValueError("Group ID is required.")

    group_id = group_id.strip()
    alias = alias.strip() if isinstance(alias, str) else alias

    ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
    alias_clean = _sanitize_alias(alias)

    # IMPORTANT:
    # The frontend polling code looks for:
    #   folder_name = f"{group_id_norm} {alias_clean}"
    # so this must be "{group_id} {alias}", not "{group_id} + {alias}".
    folder_name = f"{group_id} {alias_clean}"
    base_path = f"{PROJ_DIR}/{folder_name}/{ts}"

    model_py_file = _assert_file_exists(model_py_file, "model.py", required=True)
    preproc_py_file = _assert_file_exists(preproc_py_file, "preprocess.py", required=True)
    state_dict_file = _assert_file_exists(state_dict_file, "model.pt", required=False)

    request_json = {
        "group_id": group_id,
        "alias": alias,
        "timestamp": ts,
        "status": "PENDING",
        "datasets": ["img_val"],
        "has_weights": bool(state_dict_file),
    }

    operations = [
        CommitOperationAdd(
            path_in_repo=f"{base_path}/model.py",
            path_or_fileobj=model_py_file,
        ),
        CommitOperationAdd(
            path_in_repo=f"{base_path}/preprocess.py",
            path_or_fileobj=preproc_py_file,
        ),
    ]

    if state_dict_file:
        operations.append(
            CommitOperationAdd(
                path_in_repo=f"{base_path}/model.pt",
                path_or_fileobj=state_dict_file,
            )
        )

    # Add request.json last. This is the worker's "ready" signal.
    operations.append(
        CommitOperationAdd(
            path_in_repo=f"{base_path}/request.json",
            path_or_fileobj=json.dumps(request_json).encode("utf-8"),
        )
    )

    API.create_commit(
        repo_id=QUEUE_REPO,
        repo_type="dataset",
        operations=operations,
        commit_message=f"Queue IMG2GPS submission {group_id}/{ts}",
    )

    return (
        f"Submission queued for Group '{group_id}' at {ts}. "
        "Your model will be evaluated shortly.",
        ts,
    )