"""Persist agent traces to a Hugging Face dataset repo. On Spaces the container filesystem is ephemeral: traces written to ./traces/ survive only until the Space restarts, and never show up in the repo Files tab. This module starts a `CommitScheduler` that periodically commits new/changed files from ./traces/ to a dataset repo, so every agent run stays auditable. Requires an HF_TOKEN (write) — set it as a Space secret. Without a token this is a silent no-op so local runs keep working unchanged. Override the target repo with TRACES_REPO (default: build-small-hackathon/flight-transit-agent-traces). """ from __future__ import annotations import os from agent import TRACES_DIR # Keep a module-level reference so the scheduler thread isn't garbage-collected. _SCHEDULER = None def start() -> str: """Start the background sync. Returns a one-line status for logging.""" global _SCHEDULER if _SCHEDULER is not None: return "trace sync already running" token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") if not token: return "trace sync off — set HF_TOKEN (write) to push traces to a dataset repo" try: from huggingface_hub import CommitScheduler # Default to the hackathon org dataset; this is independent of the token # owner, so transferring the Space to the org keeps traces in the org. repo_id = (os.environ.get("TRACES_REPO") or "build-small-hackathon/flight-transit-agent-traces") _SCHEDULER = CommitScheduler( repo_id=repo_id, repo_type="dataset", folder_path=TRACES_DIR, path_in_repo="traces", every=2, # minutes token=token, ) return f"trace sync on → https://huggingface.co/datasets/{repo_id} (every 2 min)" except Exception as e: # noqa: BLE001 — tracing must never take the app down return f"trace sync failed ({type(e).__name__}: {e}) — app continues without it"