File size: 2,067 Bytes
ebaf5c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
"""Persist agent traces to a Hugging Face dataset repo.



On Spaces the container filesystem is ephemeral: traces written to ./traces/

survive only until the Space restarts, and never show up in the repo Files tab.

This module starts a `CommitScheduler` that periodically commits new/changed

files from ./traces/ to a dataset repo, so every agent run stays auditable.



Requires an HF_TOKEN (write) — set it as a Space secret. Without a token this

is a silent no-op so local runs keep working unchanged. Override the target

repo with TRACES_REPO (default: build-small-hackathon/flight-transit-agent-traces).

"""
from __future__ import annotations

import os

from agent import TRACES_DIR

# Keep a module-level reference so the scheduler thread isn't garbage-collected.
_SCHEDULER = None


def start() -> str:
    """Start the background sync. Returns a one-line status for logging."""
    global _SCHEDULER
    if _SCHEDULER is not None:
        return "trace sync already running"
    token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN")
    if not token:
        return "trace sync off — set HF_TOKEN (write) to push traces to a dataset repo"
    try:
        from huggingface_hub import CommitScheduler

        # Default to the hackathon org dataset; this is independent of the token
        # owner, so transferring the Space to the org keeps traces in the org.
        repo_id = (os.environ.get("TRACES_REPO")
                   or "build-small-hackathon/flight-transit-agent-traces")
        _SCHEDULER = CommitScheduler(
            repo_id=repo_id,
            repo_type="dataset",
            folder_path=TRACES_DIR,
            path_in_repo="traces",
            every=2,  # minutes
            token=token,
        )
        return f"trace sync on → https://huggingface.co/datasets/{repo_id} (every 2 min)"
    except Exception as e:  # noqa: BLE001 — tracing must never take the app down
        return f"trace sync failed ({type(e).__name__}: {e}) — app continues without it"