Voice-AI-Agent-Clean / logging_util.py
Toadoum's picture
Upload 4 files
3c8a5a9 verified
Raw
History Blame Contribute Delete
2.75 kB
"""
PlotWeaver Voice Agent — turn-level logging
===========================================
The Spaces filesystem is ephemeral (wiped on restart / rebuild / sleep), so a
plain ``open("log.jsonl","a")`` loses everything. This module writes each turn
to a local JSONL file and, when configured, syncs it to a *private* HF Dataset
via ``CommitScheduler`` — giving both durable logs and a growing, weakly-labeled
Hausa corpus (utterance + ASR text + chosen intent + source) for later threshold
tuning and INTENT_EXAMPLES expansion.
Configuration (Space → Settings → Variables and secrets):
HF_TOKEN secret, write-scoped token
PW_LOG_DATASET e.g. "plotweaver/voice-agent-logs" (enables sync)
PW_LOG_DIR optional; defaults to /data if persistent storage is
attached, else ./logs
If PW_LOG_DATASET / HF_TOKEN are absent, logging degrades to local-file-only
(handy for dev) and never raises into the request path.
"""
from __future__ import annotations
import json
import os
import threading
import uuid
from datetime import datetime, timezone
from pathlib import Path
_HF_TOKEN = os.environ.get("HF_TOKEN")
_LOG_DATASET = os.environ.get("PW_LOG_DATASET")
_default_dir = "/data/logs" if Path("/data").exists() else "logs"
_LOG_DIR = Path(os.environ.get("PW_LOG_DIR", _default_dir))
_LOG_DIR.mkdir(parents=True, exist_ok=True)
_LOG_FILE = _LOG_DIR / f"turns_{uuid.uuid4().hex[:8]}.jsonl"
_scheduler = None
_lock = threading.Lock()
if _LOG_DATASET and _HF_TOKEN:
try:
from huggingface_hub import CommitScheduler
_scheduler = CommitScheduler(
repo_id=_LOG_DATASET,
repo_type="dataset",
folder_path=_LOG_DIR,
path_in_repo="data",
every=5, # minutes between commits
token=_HF_TOKEN,
private=True,
squash_history=True,
)
print(f"[logging] syncing turns to dataset {_LOG_DATASET}")
except Exception as e: # never block startup over logging
print(f"[logging] dataset sync disabled ({e}); local file only")
else:
print(f"[logging] local file only at {_LOG_FILE}")
def log_turn(record: dict) -> None:
"""Append one turn record. Safe to call from concurrent Gradio handlers."""
record = {"ts": datetime.now(timezone.utc).isoformat(), **record}
line = json.dumps(record, ensure_ascii=False) + "\n"
try:
# CommitScheduler exposes .lock to avoid committing mid-write
guard = _scheduler.lock if _scheduler is not None else _lock
with guard:
with _LOG_FILE.open("a", encoding="utf-8") as f:
f.write(line)
except Exception as e:
print(f"[logging] write failed: {e}")