File size: 2,555 Bytes
71be8a5
 
e28e6e6
 
 
 
71be8a5
 
 
 
 
 
 
 
 
e28e6e6
71be8a5
e28e6e6
71be8a5
 
 
e28e6e6
86fd0d5
71be8a5
 
3bf2770
03b0173
3bf2770
 
 
71be8a5
54708e8
71be8a5
3bf2770
71be8a5
 
3bf2770
71be8a5
 
 
 
 
3bf2770
71be8a5
e28e6e6
 
71be8a5
 
 
54708e8
71be8a5
54708e8
 
 
86fd0d5
71be8a5
 
 
e28e6e6
71be8a5
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""Frontend Space configuration (v0.6 — shared `/data` POSIX queue).

All *runtime queue state* lives on a Hugging Face Spaces persistent storage
volume mounted at ``/data``, shared between the Frontend Space and the Worker
Space. Task *scope* (the list of scene_ids that make up a task) lives only in
the Worker repo at ``task_manifest/<task>.json``; the Frontend doesn't read it.

Layout::

    /data/
      queue/
        pending/<sid>.json       (Frontend writes; Worker claims)
        processing/<sid>.json    (Worker only)
        complete/<sid>.json      (Worker only; terminal state)
        progress/<sid>.json      (Worker only; scene-level updates)
      Test/<benchmark>/<scene_id>_trajectory/...   (ground truth, uploaded by ops)
      results/<task>/<team>_best.json
      results/<task>/_history.YYYY-MM.jsonl
      logs/exec-YYYY-MM[.partNN].jsonl
      archive/complete-YYYY-MM/
      tmp/
      tmp_inference_output/<uid>/<benchmark>/<scene_id>_trajectory/
"""
from __future__ import annotations

import os
import re

from huggingface_hub import HfApi

# ---------- HF auth (read-only; only used to probe user_dataset existence) --
TOKEN = os.environ.get("HF_TOKEN")
API = HfApi(token=TOKEN)

# ---------- /data layout ----------------------------------------------------
DATA_ROOT = os.environ.get("PHYS_DATA_ROOT", "/data")

QUEUE_ROOT = os.path.join(DATA_ROOT, "queue")
PENDING_DIR = os.path.join(QUEUE_ROOT, "pending")
PROCESSING_DIR = os.path.join(QUEUE_ROOT, "processing")
COMPLETE_DIR = os.path.join(QUEUE_ROOT, "complete")
PROGRESS_DIR = os.path.join(QUEUE_ROOT, "progress")

RESULTS_DIR = os.path.join(DATA_ROOT, "results")
TEST_DIR = os.path.join(DATA_ROOT, "Test")
KNOWN_BENCHMARKS = ("SinglePhysics", "DoublePhysics", "TriplePhysics")
LOGS_DIR = os.path.join(DATA_ROOT, "logs")
ARCHIVE_DIR = os.path.join(DATA_ROOT, "archive")
TMP_DIR = os.path.join(DATA_ROOT, "tmp")

# ---------- input validation -----------------------------------------------
DISPLAY_NAME_REGEX = re.compile(r"^[\w\-. ]{2,40}$", re.UNICODE)
HF_REPO_ID_REGEX = re.compile(r"^[A-Za-z0-9_.\-]{1,96}/[A-Za-z0-9_.\-]{1,96}$")


def ensure_data_layout() -> None:
    """Create the /data subtree if missing. Safe to call on every Frontend boot."""
    for d in (PENDING_DIR, PROCESSING_DIR, COMPLETE_DIR, PROGRESS_DIR,
              RESULTS_DIR, LOGS_DIR, ARCHIVE_DIR, TMP_DIR):
        try:
            os.makedirs(d, exist_ok=True)
        except OSError:
            # /data may not exist in local dev; we don't want to crash the import.
            pass