Spaces:
Running
Running
File size: 8,134 Bytes
aa3ada1 dfdc0a5 aa3ada1 dfdc0a5 f32bb9c 3ec3248 aa3ada1 f32bb9c aa3ada1 dfdc0a5 aa3ada1 f32bb9c aa3ada1 3ec3248 f32bb9c 3ec3248 f32bb9c 3ec3248 f32bb9c aa3ada1 3ec3248 f32bb9c 451c004 f32bb9c 3ec3248 f32bb9c 3ec3248 f32bb9c a3e15f3 f32bb9c aa3ada1 f32bb9c b4f8c8c aa3ada1 f32bb9c aa3ada1 f32bb9c aa3ada1 f32bb9c aa3ada1 f32bb9c a3e15f3 b4f8c8c f32bb9c aa3ada1 b4f8c8c aa3ada1 b4f8c8c aa3ada1 b4f8c8c 3ec3248 b4f8c8c aa3ada1 b4f8c8c aa3ada1 3ec3248 aa3ada1 f32bb9c aa3ada1 dfdc0a5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | """MoveTSA dataset builder β Hugging Face Space (gated via HF login + allowlist).
Generates a **parametrised** windows parquet (HRV / BSI / RSP / simulator
aggregates / DATEX / subjective labels) from the PRIVATE raw recordings,
entirely server-side. Authorised users only ever download the generated
parquet β they never get access to the raw ECG/SIMU files.
Backing repos (private, read with the ``HF_TOKEN`` Space secret):
- ``MoveTSA/movetsa-raw`` : raw recordings (stay inside the Space)
- ``MoveTSA/MoveTSA`` : pipeline code (downloaded at startup, importable
as the ``MoveTSA`` package)
Access control: the Space is public, but generation requires signing in with
Hugging Face and being on the allowlist (the ``ALLOWLIST`` Space variable, a
comma-separated list of usernames). Access is requested from ``@thbndi``.
Programmatic access (gradio_client / the MovetsaDataset loading script):
- named API endpoint ``/generate`` with 6 inputs, in this order:
window_size, overlap, normalize, include_baselines,
include_familiarization, hf_token
- identity: in the browser via ``gr.OAuthProfile`` (the LoginButton); over
the API via the explicit ``hf_token`` argument, resolved with ``whoami``.
``gr.OAuthToken`` is NOT populated over the gradio_client API β it only
works through the browser OAuth login β so headless callers must pass their
token as the 6th argument.
"""
import os
import sys
import tempfile
import gradio as gr
from huggingface_hub import snapshot_download, whoami
TOKEN = os.environ.get("HF_TOKEN")
CODE_REPO = os.environ.get("CODE_REPO", "MoveTSA/MoveTSA")
RAW_REPO = os.environ.get("RAW_REPO", "MoveTSA/movetsa-raw")
OWNER = os.environ.get("OWNER_HANDLE", "thbndi")
# Comma-separated HF usernames allowed to generate. Edit via the ALLOWLIST
# Space *variable* (Settings β Variables and secrets) β no code change needed.
ALLOWLIST = {
u.strip().lower()
for u in os.environ.get("ALLOWLIST", OWNER).split(",")
if u.strip()
}
# Optional server-side cache: identical parameters reuse the same parquet
# instead of re-running the (heavy) pipeline. Keyed on ALL five parameters.
CACHE_DIR = os.environ.get("MOVETSA_CACHE",
os.path.join(tempfile.gettempdir(), "movetsa_out"))
os.makedirs(CACHE_DIR, exist_ok=True)
# ----------------------------------------------------------------- startup
# 1) Pipeline code β importable as the `MoveTSA` package (cwd on sys.path).
snapshot_download(CODE_REPO, repo_type="dataset", token=TOKEN,
local_dir="MoveTSA", allow_patterns=["*.py", "*.yaml"])
sys.path.insert(0, os.getcwd())
# 2) Raw recordings β private, kept inside the Space, never served to users.
RAW_DIR = snapshot_download(RAW_REPO, repo_type="dataset", token=TOKEN,
allow_patterns=["S*/**", "subjective_scores.csv"])
from MoveTSA.export_hf_dataset import build_windows # noqa: E402 (needs sys.path)
def _resolve_username(profile, oauth_token, hf_token):
"""Identity from the browser OAuth profile, or from an explicit HF token.
In the browser, Gradio injects ``profile`` after the OAuth login. Headless
callers (gradio_client / the MovetsaDataset loading script) cannot go
through the OAuth flow, so they pass their token explicitly and we resolve
the username with ``whoami``. ``oauth_token`` is kept as a fallback but is
``None`` over the API.
"""
if profile is not None:
return profile.username
token = None
if hf_token:
token = hf_token
elif oauth_token is not None and getattr(oauth_token, "token", None):
token = oauth_token.token
if token:
try:
return whoami(token=token).get("name")
except Exception: # noqa: BLE001
return None
return None
def generate(window_size, overlap, normalize, include_baselines,
include_familiarization, hf_token="",
profile: gr.OAuthProfile | None = None,
oauth_token: gr.OAuthToken | None = None,
progress=gr.Progress(track_tqdm=True)):
"""Run the pipeline with the chosen parameters and return a parquet file.
``hf_token`` is the 6th API input: headless callers pass their HF token so
the Space can identify them (the browser leaves it empty and authenticates
via the LoginButton). ``profile``/``oauth_token`` are injected by Gradio and
are NOT part of the API signature.
"""
username = _resolve_username(profile, oauth_token, hf_token)
if username is None:
return None, "Sign in with your Hugging Face account to generate."
if username.lower() not in ALLOWLIST:
return None, (
f"Access not granted for **@{username}**.\n\n"
f"Request access from **@{OWNER}** (to be added to the allowlist)."
)
# --- validate parameters (API callers are not bound by the sliders) ---
try:
ws = int(window_size)
ov = float(overlap)
except (TypeError, ValueError):
return None, "Invalid parameters."
if not (15 <= ws <= 180):
return None, "window_size must be between 15 and 180 s."
if not (0.0 <= ov <= 0.9):
return None, "overlap must be between 0 and 0.9."
if normalize not in ("zscore", "center", "none"):
return None, "normalize must be 'zscore', 'center' or 'none'."
include_baselines = bool(include_baselines)
include_familiarization = bool(include_familiarization)
# Stable filename = also the cache key (now includes the B/F flags).
fname = (f"movetsa_w{ws}_ov{int(ov * 100)}_{normalize}"
f"_b{int(include_baselines)}_f{int(include_familiarization)}.parquet")
cached = os.path.join(CACHE_DIR, fname)
if os.path.exists(cached):
return cached, f"Served from cache β `{fname}`"
df = build_windows(
RAW_DIR,
window_size=ws,
window_overlap=ov,
normalize=(None if normalize == "none" else normalize),
include_baselines=include_baselines,
include_familiarization=include_familiarization,
verbose=False,
)
df.to_parquet(cached, index=False)
msg = (f"**{len(df)} windows Γ {df.shape[1]} columns** "
f"({df['subject'].nunique()} subjects) β `{fname}`")
return cached, msg
with gr.Blocks(title="MoveTSA dataset builder") as demo:
gr.Markdown(
"# MoveTSA dataset builder\n"
"Generate a **parametrised HRV / BSI / RSP / simulator** parquet from the "
"(private) raw recordings. You download **only** the generated parquet β "
"never the raw data.\n\n"
"1. Sign in with Hugging Face. 2. Set the parameters. 3. Generate."
)
gr.LoginButton()
with gr.Row():
with gr.Column():
window_size = gr.Slider(15, 180, value=60, step=5,
label="window_size (s)")
overlap = gr.Slider(0.0, 0.9, value=0.5, step=0.05, label="overlap")
normalize = gr.Dropdown(["zscore", "center", "none"],
value="zscore", label="normalize")
include_baselines = gr.Checkbox(
value=True, label="include baselines (B1βB4)")
include_familiarization = gr.Checkbox(
value=True, label="include familiarization (F)")
# 6th API input: headless callers pass their HF token here; the
# browser leaves it empty and authenticates via the LoginButton.
hf_token = gr.Textbox(value="", visible=False, label="hf_token")
btn = gr.Button("Generate parquet", variant="primary")
with gr.Column():
status = gr.Markdown()
out_file = gr.File(label="generated parquet")
btn.click(
generate,
inputs=[window_size, overlap, normalize, include_baselines,
include_familiarization, hf_token],
outputs=[out_file, status],
api_name="generate", # => client.predict(..., api_name="/generate")
)
if __name__ == "__main__":
demo.launch()
|