Spaces:
Sleeping
Sleeping
| """ | |
| PlotWeaver Voice Agent — turn-level logging | |
| =========================================== | |
| The Spaces filesystem is ephemeral (wiped on restart / rebuild / sleep), so a | |
| plain ``open("log.jsonl","a")`` loses everything. This module writes each turn | |
| to a local JSONL file and, when configured, syncs it to a *private* HF Dataset | |
| via ``CommitScheduler`` — giving both durable logs and a growing, weakly-labeled | |
| Hausa corpus (utterance + ASR text + chosen intent + source) for later threshold | |
| tuning and INTENT_EXAMPLES expansion. | |
| Configuration (Space → Settings → Variables and secrets): | |
| HF_TOKEN secret, write-scoped token | |
| PW_LOG_DATASET e.g. "plotweaver/voice-agent-logs" (enables sync) | |
| PW_LOG_DIR optional; defaults to /data if persistent storage is | |
| attached, else ./logs | |
| If PW_LOG_DATASET / HF_TOKEN are absent, logging degrades to local-file-only | |
| (handy for dev) and never raises into the request path. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| import threading | |
| import uuid | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| _HF_TOKEN = os.environ.get("HF_TOKEN") | |
| _LOG_DATASET = os.environ.get("PW_LOG_DATASET") | |
| _default_dir = "/data/logs" if Path("/data").exists() else "logs" | |
| _LOG_DIR = Path(os.environ.get("PW_LOG_DIR", _default_dir)) | |
| _LOG_DIR.mkdir(parents=True, exist_ok=True) | |
| _LOG_FILE = _LOG_DIR / f"turns_{uuid.uuid4().hex[:8]}.jsonl" | |
| _scheduler = None | |
| _lock = threading.Lock() | |
| if _LOG_DATASET and _HF_TOKEN: | |
| try: | |
| from huggingface_hub import CommitScheduler | |
| _scheduler = CommitScheduler( | |
| repo_id=_LOG_DATASET, | |
| repo_type="dataset", | |
| folder_path=_LOG_DIR, | |
| path_in_repo="data", | |
| every=5, # minutes between commits | |
| token=_HF_TOKEN, | |
| private=True, | |
| squash_history=True, | |
| ) | |
| print(f"[logging] syncing turns to dataset {_LOG_DATASET}") | |
| except Exception as e: # never block startup over logging | |
| print(f"[logging] dataset sync disabled ({e}); local file only") | |
| else: | |
| print(f"[logging] local file only at {_LOG_FILE}") | |
| def log_turn(record: dict) -> None: | |
| """Append one turn record. Safe to call from concurrent Gradio handlers.""" | |
| record = {"ts": datetime.now(timezone.utc).isoformat(), **record} | |
| line = json.dumps(record, ensure_ascii=False) + "\n" | |
| try: | |
| # CommitScheduler exposes .lock to avoid committing mid-write | |
| guard = _scheduler.lock if _scheduler is not None else _lock | |
| with guard: | |
| with _LOG_FILE.open("a", encoding="utf-8") as f: | |
| f.write(line) | |
| except Exception as e: | |
| print(f"[logging] write failed: {e}") | |