"""Persist agent traces to a Hugging Face dataset repo. Uses `huggingface_hub.CommitScheduler`: the `traces/` folder is committed to a dataset repo in the background every minute (only new/changed files are uploaded). This is the recommended pattern for persisting data from a Space — ZeroGPU Spaces have ephemeral disks, so without this every trace is lost on restart. Configuration (env vars): HF_TOKEN write token; on Spaces add it as a secret. Required. TRACES_DATASET_REPO dataset repo id, e.g. "you/hugging-wizards-traces". Defaults to "-traces" when running on a Space. TRACES_DATASET_PRIVATE "0" to create the dataset public (default private). If the token or repo id is missing, syncing is disabled and the game runs exactly as before (traces still land on local disk). """ from __future__ import annotations import contextlib import os _scheduler = None _disabled = False def _repo_id() -> str | None: repo = os.environ.get("TRACES_DATASET_REPO") if repo: return repo space = os.environ.get("SPACE_ID") # "owner/space-name" on HF Spaces return f"{space}-traces" if space else None def start(trace_dir: str): """Begin background sync of `trace_dir` to the dataset repo (idempotent).""" global _scheduler, _disabled if _scheduler is not None or _disabled: return _scheduler token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") repo = _repo_id() if not token or not repo: _disabled = True print("[trace_store] HF sync disabled " "(set HF_TOKEN and TRACES_DATASET_REPO to enable)") return None try: from huggingface_hub import CommitScheduler _scheduler = CommitScheduler( repo_id=repo, repo_type="dataset", folder_path=trace_dir, path_in_repo="traces", every=1, # minutes private=os.environ.get("TRACES_DATASET_PRIVATE", "1") != "0", allow_patterns=["*.json"], token=token, ) print(f"[trace_store] syncing traces to dataset {repo} every minute") except Exception as e: # never let trace upload break the game _disabled = True print(f"[trace_store] HF sync disabled: {e}") return _scheduler def lock(): """Lock to hold while writing trace files, so a half-written file is never committed. A no-op context manager when syncing is disabled.""" if _scheduler is not None: return _scheduler.lock return contextlib.nullcontext()