Spaces:
Running on Zero
Running on Zero
| """ | |
| Best-effort archiving of each run to a PRIVATE Hugging Face dataset, plus helpers | |
| for the per-user account page. Uploads run in a background thread so they never | |
| delay the user's download, and failures are swallowed — storage never breaks the app. | |
| Layout | |
| ------ | |
| Anonymous runs (auto-deleted after 14 days by the cleanup workflow): | |
| runs/YYYY-MM-DD/<id>/ input.<ext> <stem>.wav ... meta.json | |
| Logged-in users (kept forever — cleanup only touches runs/): | |
| users/<user_id>/songs/<song_id>/ song_id = sha1(audio) so re-uploads group | |
| input.<ext> the original upload (stored once per song) | |
| song.json title, original filename, uploaded_at | |
| runs/<run_id>/ one per separation of this song | |
| <stem>.wav ... | |
| meta.json engine, mode, shifts, overlap, timestamp | |
| Env: HF_TOKEN (WRITE access to the dataset), STORAGE_DATASET, STORE_RUNS=0 to disable. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import io | |
| import re | |
| import json | |
| import shutil | |
| import uuid | |
| import hashlib | |
| import datetime | |
| import tempfile | |
| import threading | |
| DATASET = os.environ.get("STORAGE_DATASET", "vincewin/stem-worker-data") | |
| def _token(): | |
| return os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN") | |
| def enabled() -> bool: | |
| return os.environ.get("STORE_RUNS", "1") != "0" and bool(_token()) | |
| def _safe_id(s) -> str: | |
| return re.sub(r"[^A-Za-z0-9._-]", "_", str(s))[:64] or "user" | |
| def _hash_file(path, n=12) -> str: | |
| h = hashlib.sha1() | |
| with open(path, "rb") as f: | |
| for chunk in iter(lambda: f.read(1 << 20), b""): | |
| h.update(chunk) | |
| return h.hexdigest()[:n] | |
| def save_run_async(input_path, stem_paths, meta=None, user_id=None): | |
| """Fire-and-forget archive of one run. user_id -> permanent per-user storage.""" | |
| if not enabled(): | |
| return | |
| threading.Thread(target=_save_run, | |
| args=(input_path, list(stem_paths or []), dict(meta or {}), user_id), | |
| daemon=True).start() | |
| def _save_run(input_path, stem_paths, meta, user_id=None): | |
| stage = None | |
| try: | |
| from huggingface_hub import HfApi | |
| api = HfApi(token=_token()) | |
| now = datetime.datetime.now() | |
| run_id = now.strftime("%Y%m%d-%H%M%S-") + uuid.uuid4().hex[:6] | |
| has_input = bool(input_path and os.path.exists(input_path)) | |
| in_ext = (os.path.splitext(input_path)[1] if has_input else "") or ".bin" | |
| if user_id: | |
| uid = _safe_id(user_id) | |
| song_id = _hash_file(input_path) if has_input else uuid.uuid4().hex[:12] | |
| song_dir = f"users/{uid}/songs/{song_id}" | |
| # upload the original + song.json once per song | |
| if has_input and not api.file_exists(repo_id=DATASET, repo_type="dataset", | |
| filename=f"{song_dir}/input{in_ext}"): | |
| api.upload_file(path_or_fileobj=input_path, repo_id=DATASET, repo_type="dataset", | |
| path_in_repo=f"{song_dir}/input{in_ext}", | |
| commit_message=f"song {song_id} input") | |
| info = {"title": meta.get("title") or os.path.splitext(os.path.basename(input_path))[0], | |
| "original_filename": os.path.basename(input_path), | |
| "uploaded_at": now.isoformat(), "song_id": song_id} | |
| api.upload_file(path_or_fileobj=io.BytesIO(json.dumps(info, indent=2).encode()), | |
| repo_id=DATASET, repo_type="dataset", | |
| path_in_repo=f"{song_dir}/song.json", | |
| commit_message=f"song {song_id} meta") | |
| run_prefix = f"{song_dir}/runs/{run_id}" | |
| else: | |
| run_prefix = f"runs/{now.strftime('%Y-%m-%d')}/{run_id}" | |
| # stage the run folder (stems + meta, plus input for anonymous) and upload once | |
| stage = tempfile.mkdtemp(prefix="runsave_") | |
| for p in stem_paths: | |
| if p and os.path.exists(p): | |
| shutil.copy2(p, os.path.join(stage, os.path.basename(p))) | |
| if not user_id and has_input: | |
| shutil.copy2(input_path, os.path.join(stage, "input" + in_ext)) | |
| meta = {**meta, "run_id": run_id, "timestamp": now.isoformat(), | |
| "n_stems": len([p for p in stem_paths if p and os.path.exists(p)])} | |
| with open(os.path.join(stage, "meta.json"), "w", encoding="utf-8") as fh: | |
| json.dump(meta, fh, indent=2) | |
| api.upload_folder(folder_path=stage, repo_id=DATASET, repo_type="dataset", | |
| path_in_repo=run_prefix, commit_message=f"run {run_id}") | |
| except Exception: | |
| pass | |
| finally: | |
| if stage and os.path.isdir(stage): | |
| shutil.rmtree(stage, ignore_errors=True) | |
| # ---------------- account-page helpers ---------------- | |
| def list_user_songs(user_id) -> list[dict]: | |
| """[{song_id, title, uploaded_at, runs:[{run_id, stems:[...]}]}] newest run first.""" | |
| if not user_id or not _token(): | |
| return [] | |
| try: | |
| from huggingface_hub import HfApi, hf_hub_download | |
| api = HfApi(token=_token()) | |
| uid = _safe_id(user_id) | |
| prefix = f"users/{uid}/songs/" | |
| files = [f for f in api.list_repo_files(repo_id=DATASET, repo_type="dataset") | |
| if f.startswith(prefix)] | |
| songs: dict[str, dict] = {} | |
| for f in files: | |
| parts = f[len(prefix):].split("/") | |
| sid = parts[0] | |
| s = songs.setdefault(sid, {"song_id": sid, "title": sid, "uploaded_at": "", "runs": {}}) | |
| if len(parts) >= 4 and parts[1] == "runs" and parts[3].endswith(".wav"): | |
| s["runs"].setdefault(parts[2], {"run_id": parts[2], "stems": []})["stems"].append(parts[3]) | |
| # best-effort: read each song.json for a friendly title (cap to keep it snappy) | |
| for sid, s in list(songs.items())[:60]: | |
| try: | |
| p = hf_hub_download(repo_id=DATASET, repo_type="dataset", | |
| filename=f"{prefix}{sid}/song.json") | |
| with open(p, encoding="utf-8") as fh: | |
| info = json.load(fh) | |
| s["title"] = info.get("title", sid) | |
| s["uploaded_at"] = info.get("uploaded_at", "") | |
| except Exception: | |
| pass | |
| out = [] | |
| for s in songs.values(): | |
| s["runs"] = sorted(s["runs"].values(), key=lambda r: r["run_id"], reverse=True) | |
| out.append(s) | |
| out.sort(key=lambda s: s.get("uploaded_at", ""), reverse=True) | |
| return out | |
| except Exception: | |
| return [] | |
| def fetch_run_stems(user_id, song_id, run_id) -> list[str]: | |
| """Download one run's stem files locally and return their paths (for the UI).""" | |
| if not (user_id and song_id and run_id and _token()): | |
| return [] | |
| try: | |
| from huggingface_hub import HfApi, hf_hub_download | |
| api = HfApi(token=_token()) | |
| uid = _safe_id(user_id) | |
| run_prefix = f"users/{uid}/songs/{song_id}/runs/{run_id}/" | |
| paths = [] | |
| for f in api.list_repo_files(repo_id=DATASET, repo_type="dataset"): | |
| if f.startswith(run_prefix) and f.endswith(".wav"): | |
| paths.append(hf_hub_download(repo_id=DATASET, repo_type="dataset", filename=f)) | |
| return paths | |
| except Exception: | |
| return [] | |