""" PlotWeaver Voice Agent — turn-level logging =========================================== The Spaces filesystem is ephemeral (wiped on restart / rebuild / sleep), so a plain ``open("log.jsonl","a")`` loses everything. This module writes each turn to a local JSONL file and, when configured, syncs it to a *private* HF Dataset via ``CommitScheduler`` — giving both durable logs and a growing, weakly-labeled Hausa corpus (utterance + ASR text + chosen intent + source) for later threshold tuning and INTENT_EXAMPLES expansion. Configuration (Space → Settings → Variables and secrets): HF_TOKEN secret, write-scoped token PW_LOG_DATASET e.g. "plotweaver/voice-agent-logs" (enables sync) PW_LOG_DIR optional; defaults to /data if persistent storage is attached, else ./logs If PW_LOG_DATASET / HF_TOKEN are absent, logging degrades to local-file-only (handy for dev) and never raises into the request path. """ from __future__ import annotations import json import os import threading import uuid from datetime import datetime, timezone from pathlib import Path _HF_TOKEN = os.environ.get("HF_TOKEN") _LOG_DATASET = os.environ.get("PW_LOG_DATASET") _default_dir = "/data/logs" if Path("/data").exists() else "logs" _LOG_DIR = Path(os.environ.get("PW_LOG_DIR", _default_dir)) _LOG_DIR.mkdir(parents=True, exist_ok=True) _LOG_FILE = _LOG_DIR / f"turns_{uuid.uuid4().hex[:8]}.jsonl" _scheduler = None _lock = threading.Lock() if _LOG_DATASET and _HF_TOKEN: try: from huggingface_hub import CommitScheduler _scheduler = CommitScheduler( repo_id=_LOG_DATASET, repo_type="dataset", folder_path=_LOG_DIR, path_in_repo="data", every=5, # minutes between commits token=_HF_TOKEN, private=True, squash_history=True, ) print(f"[logging] syncing turns to dataset {_LOG_DATASET}") except Exception as e: # never block startup over logging print(f"[logging] dataset sync disabled ({e}); local file only") else: print(f"[logging] local file only at {_LOG_FILE}") def log_turn(record: dict) -> None: """Append one turn record. Safe to call from concurrent Gradio handlers.""" record = {"ts": datetime.now(timezone.utc).isoformat(), **record} line = json.dumps(record, ensure_ascii=False) + "\n" try: # CommitScheduler exposes .lock to avoid committing mid-write guard = _scheduler.lock if _scheduler is not None else _lock with guard: with _LOG_FILE.open("a", encoding="utf-8") as f: f.write(line) except Exception as e: print(f"[logging] write failed: {e}")