hetchyy Claude Opus 4.5 commited on
Commit
4c7bfce
·
1 Parent(s): c398505

Switch usage logger to HF Dataset repo via CommitScheduler

Browse files

Replace /data/ CSV logging with CommitScheduler that writes JSONL
locally and auto-pushes to hetchyy/recitation-logs dataset every
5 minutes. Falls back to local-only logging without HF token.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (2) hide show
  1. .gitignore +2 -0
  2. utils/usage_logger.py +67 -49
.gitignore CHANGED
@@ -12,6 +12,8 @@ CLAUDE.md
12
 
13
  data/phonemizer_mappings.json
14
 
 
 
15
  # Ignore all README files except the top-level/main README.*
16
  # This ignores any README, README.md, etc. in subfolders but not the one at project root
17
  !README*
 
12
 
13
  data/phonemizer_mappings.json
14
 
15
+ usage_logs/
16
+
17
  # Ignore all README files except the top-level/main README.*
18
  # This ignores any README, README.md, etc. in subfolders but not the one at project root
19
  !README*
utils/usage_logger.py CHANGED
@@ -1,50 +1,56 @@
1
  """
2
- Usage logger for HF Spaces persistent storage.
3
 
4
- Logs technical errors and per-segment recitation data to CSV files
5
- in /data/ (HF Spaces persistent directory). Silently no-ops if
6
- /data/ doesn't exist (local dev).
7
  """
8
 
9
- import csv
10
  import hashlib
11
- import os
12
  import threading
 
13
  from datetime import datetime
 
14
  from typing import Optional, Tuple
 
15
 
16
- DATA_DIR = "/data"
17
- ERROR_LOG = os.path.join(DATA_DIR, "error_log.csv")
18
- RECITATION_LOG = os.path.join(DATA_DIR, "recitation_log.csv")
19
 
20
- _lock = threading.Lock()
 
 
21
 
22
- ERROR_FIELDS = ["timestamp", "user_id", "verse_ref", "error_message"]
23
- RECITATION_FIELDS = [
24
- "timestamp",
25
- "user_id",
26
- "verse_ref",
27
- "segment_ref",
28
- "canonical_text",
29
- "canonical_phonemes",
30
- "detected_phonemes",
31
- ]
32
 
 
 
 
 
 
 
 
 
 
33
 
34
- def _append_row(filepath: str, fieldnames: list, row: dict) -> None:
35
- """Append a row to a CSV file, creating it with headers if needed."""
36
- if not os.path.isdir(DATA_DIR):
37
- return
38
- try:
39
- with _lock:
40
- file_exists = os.path.exists(filepath)
41
- with open(filepath, "a", newline="", encoding="utf-8") as f:
42
- writer = csv.DictWriter(f, fieldnames=fieldnames)
43
- if not file_exists:
44
- writer.writeheader()
45
- writer.writerow(row)
46
- except Exception:
47
- pass
48
 
49
 
50
  def get_user_id(request) -> str:
@@ -75,12 +81,18 @@ def get_user_id(request) -> str:
75
 
76
  def log_error(user_id: str, verse_ref: str, error_message: str) -> None:
77
  """Log a technical error that occurred during analysis."""
78
- _append_row(ERROR_LOG, ERROR_FIELDS, {
79
- "timestamp": datetime.now().isoformat(),
80
- "user_id": user_id,
81
- "verse_ref": verse_ref or "",
82
- "error_message": error_message or "",
83
- })
 
 
 
 
 
 
84
 
85
 
86
  def log_recitation(
@@ -92,12 +104,18 @@ def log_recitation(
92
  detected_phonemes: Tuple[str, ...],
93
  ) -> None:
94
  """Log a single segment's recitation data after analysis."""
95
- _append_row(RECITATION_LOG, RECITATION_FIELDS, {
96
- "timestamp": datetime.now().isoformat(),
97
- "user_id": user_id,
98
- "verse_ref": verse_ref or "",
99
- "segment_ref": segment_ref or "",
100
- "canonical_text": canonical_text or "",
101
- "canonical_phonemes": " ".join(canonical_phonemes) if canonical_phonemes else "",
102
- "detected_phonemes": " ".join(detected_phonemes) if detected_phonemes else "",
103
- })
 
 
 
 
 
 
 
1
  """
2
+ Usage logger that pushes to a HF Dataset repo.
3
 
4
+ Writes JSONL files locally and uses CommitScheduler to auto-push
5
+ to hetchyy/recitation-logs every 5 minutes in a background thread.
6
+ Falls back to local-only logging if CommitScheduler can't initialize.
7
  """
8
 
 
9
  import hashlib
10
+ import json
11
  import threading
12
+ from contextlib import contextmanager
13
  from datetime import datetime
14
+ from pathlib import Path
15
  from typing import Optional, Tuple
16
+ from uuid import uuid4
17
 
18
+ # Local folder for log files
19
+ LOG_DIR = Path("usage_logs")
20
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
21
 
22
+ # UUID-suffixed files to avoid collision across Space restarts
23
+ ERROR_LOG_PATH = LOG_DIR / f"error_log-{uuid4()}.jsonl"
24
+ RECITATION_LOG_PATH = LOG_DIR / f"recitation_log-{uuid4()}.jsonl"
25
 
26
+ # CommitScheduler pushes LOG_DIR data/ in the dataset repo
27
+ # Wrapped in try/except so local dev without HF token still works
28
+ _scheduler = None
29
+ try:
30
+ from huggingface_hub import CommitScheduler
 
 
 
 
 
31
 
32
+ _scheduler = CommitScheduler(
33
+ repo_id="hetchyy/recitation-logs",
34
+ repo_type="dataset",
35
+ folder_path=LOG_DIR,
36
+ path_in_repo="data",
37
+ private=True,
38
+ )
39
+ except Exception:
40
+ pass
41
 
42
+ _fallback_lock = threading.Lock()
43
+
44
+
45
+ @contextmanager
46
+ def _get_lock():
47
+ """Use scheduler lock if available, otherwise fallback."""
48
+ if _scheduler is not None:
49
+ with _scheduler.lock:
50
+ yield
51
+ else:
52
+ with _fallback_lock:
53
+ yield
 
 
54
 
55
 
56
  def get_user_id(request) -> str:
 
81
 
82
  def log_error(user_id: str, verse_ref: str, error_message: str) -> None:
83
  """Log a technical error that occurred during analysis."""
84
+ try:
85
+ with _get_lock():
86
+ with ERROR_LOG_PATH.open("a") as f:
87
+ json.dump({
88
+ "timestamp": datetime.now().isoformat(),
89
+ "user_id": user_id,
90
+ "verse_ref": verse_ref or "",
91
+ "error_message": error_message or "",
92
+ }, f)
93
+ f.write("\n")
94
+ except Exception:
95
+ pass
96
 
97
 
98
  def log_recitation(
 
104
  detected_phonemes: Tuple[str, ...],
105
  ) -> None:
106
  """Log a single segment's recitation data after analysis."""
107
+ try:
108
+ with _get_lock():
109
+ with RECITATION_LOG_PATH.open("a") as f:
110
+ json.dump({
111
+ "timestamp": datetime.now().isoformat(),
112
+ "user_id": user_id,
113
+ "verse_ref": verse_ref or "",
114
+ "segment_ref": segment_ref or "",
115
+ "canonical_text": canonical_text or "",
116
+ "canonical_phonemes": " ".join(canonical_phonemes) if canonical_phonemes else "",
117
+ "detected_phonemes": " ".join(detected_phonemes) if detected_phonemes else "",
118
+ }, f)
119
+ f.write("\n")
120
+ except Exception:
121
+ pass