Spaces:
Running
on
Zero
Running
on
Zero
Switch usage logger to HF Dataset repo via CommitScheduler
Browse filesReplace /data/ CSV logging with CommitScheduler that writes JSONL
locally and auto-pushes to hetchyy/recitation-logs dataset every
5 minutes. Falls back to local-only logging without HF token.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- .gitignore +2 -0
- utils/usage_logger.py +67 -49
.gitignore
CHANGED
|
@@ -12,6 +12,8 @@ CLAUDE.md
|
|
| 12 |
|
| 13 |
data/phonemizer_mappings.json
|
| 14 |
|
|
|
|
|
|
|
| 15 |
# Ignore all README files except the top-level/main README.*
|
| 16 |
# This ignores any README, README.md, etc. in subfolders but not the one at project root
|
| 17 |
!README*
|
|
|
|
| 12 |
|
| 13 |
data/phonemizer_mappings.json
|
| 14 |
|
| 15 |
+
usage_logs/
|
| 16 |
+
|
| 17 |
# Ignore all README files except the top-level/main README.*
|
| 18 |
# This ignores any README, README.md, etc. in subfolders but not the one at project root
|
| 19 |
!README*
|
utils/usage_logger.py
CHANGED
|
@@ -1,50 +1,56 @@
|
|
| 1 |
"""
|
| 2 |
-
Usage logger
|
| 3 |
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
"""
|
| 8 |
|
| 9 |
-
import csv
|
| 10 |
import hashlib
|
| 11 |
-
import
|
| 12 |
import threading
|
|
|
|
| 13 |
from datetime import datetime
|
|
|
|
| 14 |
from typing import Optional, Tuple
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
|
| 20 |
-
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
"segment_ref",
|
| 28 |
-
"canonical_text",
|
| 29 |
-
"canonical_phonemes",
|
| 30 |
-
"detected_phonemes",
|
| 31 |
-
]
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
except Exception:
|
| 47 |
-
pass
|
| 48 |
|
| 49 |
|
| 50 |
def get_user_id(request) -> str:
|
|
@@ -75,12 +81,18 @@ def get_user_id(request) -> str:
|
|
| 75 |
|
| 76 |
def log_error(user_id: str, verse_ref: str, error_message: str) -> None:
|
| 77 |
"""Log a technical error that occurred during analysis."""
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
|
| 86 |
def log_recitation(
|
|
@@ -92,12 +104,18 @@ def log_recitation(
|
|
| 92 |
detected_phonemes: Tuple[str, ...],
|
| 93 |
) -> None:
|
| 94 |
"""Log a single segment's recitation data after analysis."""
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Usage logger that pushes to a HF Dataset repo.
|
| 3 |
|
| 4 |
+
Writes JSONL files locally and uses CommitScheduler to auto-push
|
| 5 |
+
to hetchyy/recitation-logs every 5 minutes in a background thread.
|
| 6 |
+
Falls back to local-only logging if CommitScheduler can't initialize.
|
| 7 |
"""
|
| 8 |
|
|
|
|
| 9 |
import hashlib
|
| 10 |
+
import json
|
| 11 |
import threading
|
| 12 |
+
from contextlib import contextmanager
|
| 13 |
from datetime import datetime
|
| 14 |
+
from pathlib import Path
|
| 15 |
from typing import Optional, Tuple
|
| 16 |
+
from uuid import uuid4
|
| 17 |
|
| 18 |
+
# Local folder for log files
|
| 19 |
+
LOG_DIR = Path("usage_logs")
|
| 20 |
+
LOG_DIR.mkdir(parents=True, exist_ok=True)
|
| 21 |
|
| 22 |
+
# UUID-suffixed files to avoid collision across Space restarts
|
| 23 |
+
ERROR_LOG_PATH = LOG_DIR / f"error_log-{uuid4()}.jsonl"
|
| 24 |
+
RECITATION_LOG_PATH = LOG_DIR / f"recitation_log-{uuid4()}.jsonl"
|
| 25 |
|
| 26 |
+
# CommitScheduler pushes LOG_DIR → data/ in the dataset repo
|
| 27 |
+
# Wrapped in try/except so local dev without HF token still works
|
| 28 |
+
_scheduler = None
|
| 29 |
+
try:
|
| 30 |
+
from huggingface_hub import CommitScheduler
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
_scheduler = CommitScheduler(
|
| 33 |
+
repo_id="hetchyy/recitation-logs",
|
| 34 |
+
repo_type="dataset",
|
| 35 |
+
folder_path=LOG_DIR,
|
| 36 |
+
path_in_repo="data",
|
| 37 |
+
private=True,
|
| 38 |
+
)
|
| 39 |
+
except Exception:
|
| 40 |
+
pass
|
| 41 |
|
| 42 |
+
_fallback_lock = threading.Lock()
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@contextmanager
|
| 46 |
+
def _get_lock():
|
| 47 |
+
"""Use scheduler lock if available, otherwise fallback."""
|
| 48 |
+
if _scheduler is not None:
|
| 49 |
+
with _scheduler.lock:
|
| 50 |
+
yield
|
| 51 |
+
else:
|
| 52 |
+
with _fallback_lock:
|
| 53 |
+
yield
|
|
|
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
def get_user_id(request) -> str:
|
|
|
|
| 81 |
|
| 82 |
def log_error(user_id: str, verse_ref: str, error_message: str) -> None:
|
| 83 |
"""Log a technical error that occurred during analysis."""
|
| 84 |
+
try:
|
| 85 |
+
with _get_lock():
|
| 86 |
+
with ERROR_LOG_PATH.open("a") as f:
|
| 87 |
+
json.dump({
|
| 88 |
+
"timestamp": datetime.now().isoformat(),
|
| 89 |
+
"user_id": user_id,
|
| 90 |
+
"verse_ref": verse_ref or "",
|
| 91 |
+
"error_message": error_message or "",
|
| 92 |
+
}, f)
|
| 93 |
+
f.write("\n")
|
| 94 |
+
except Exception:
|
| 95 |
+
pass
|
| 96 |
|
| 97 |
|
| 98 |
def log_recitation(
|
|
|
|
| 104 |
detected_phonemes: Tuple[str, ...],
|
| 105 |
) -> None:
|
| 106 |
"""Log a single segment's recitation data after analysis."""
|
| 107 |
+
try:
|
| 108 |
+
with _get_lock():
|
| 109 |
+
with RECITATION_LOG_PATH.open("a") as f:
|
| 110 |
+
json.dump({
|
| 111 |
+
"timestamp": datetime.now().isoformat(),
|
| 112 |
+
"user_id": user_id,
|
| 113 |
+
"verse_ref": verse_ref or "",
|
| 114 |
+
"segment_ref": segment_ref or "",
|
| 115 |
+
"canonical_text": canonical_text or "",
|
| 116 |
+
"canonical_phonemes": " ".join(canonical_phonemes) if canonical_phonemes else "",
|
| 117 |
+
"detected_phonemes": " ".join(detected_phonemes) if detected_phonemes else "",
|
| 118 |
+
}, f)
|
| 119 |
+
f.write("\n")
|
| 120 |
+
except Exception:
|
| 121 |
+
pass
|