Monish BV commited on
Commit
0aec951
·
1 Parent(s): 83d677f

added hf dataset to collect metrics

Browse files
Files changed (4) hide show
  1. .env.example +16 -0
  2. README.md +3 -1
  3. backend/main.py +98 -2
  4. requirements.txt +3 -0
.env.example CHANGED
@@ -35,3 +35,19 @@ KIOSK_LLM_SYSTEM_PROMPT="You are a conversational receptionist for the Northwest
35
 
36
  # Style guidelines (required - ensures speech-friendly responses)
37
  KIOSK_LLM_STYLE="Warm, welcoming tone for a spoken receptionist. Prefer short, plain sentences suitable for text-to-speech and avoid stage directions or annotations like *in a warm voice*."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  # Style guidelines (required - ensures speech-friendly responses)
37
  KIOSK_LLM_STYLE="Warm, welcoming tone for a spoken receptionist. Prefer short, plain sentences suitable for text-to-speech and avoid stage directions or annotations like *in a warm voice*."
38
+
39
+ # =============================================================================
40
+ # Hugging Face Dataset Persistence (optional - for metrics/history across restarts)
41
+ # =============================================================================
42
+
43
+ # HF dataset repo for chat history (e.g. monish563/kiosk-api-metrics)
44
+ # KIOSK_HF_DATASET_REPO=monish563/kiosk-api-metrics
45
+
46
+ # HF token with write access to the dataset (use HF Secrets on Spaces)
47
+ # KIOSK_HF_TOKEN=your-hf-token
48
+
49
+ # Path within the dataset repo (default: chat_history)
50
+ # KIOSK_HF_DATASET_PATH=chat_history
51
+
52
+ # Sync interval in minutes (default: 10)
53
+ # KIOSK_HF_SYNC_INTERVAL_MINUTES=10
README.md CHANGED
@@ -61,8 +61,10 @@ Deploy this API as a public endpoint so your manager (or STT/TTS systems) can se
61
  | `KIOSK_LLM_STYLE` | No | Style guidelines for TTS-friendly responses |
62
  | `OPENAI_API_KEY` | No | If using `provider: "openai"` |
63
  | `GEMINI_API_KEY` | No | If using `provider: "gemini"` |
 
 
64
 
65
- *At least one LLM API key (`ANTHROPIC_API_KEY`, `OPENAI_API_KEY`, or `GEMINI_API_KEY`) is required.
66
 
67
  ### 3. Endpoint URL for your manager
68
 
 
61
  | `KIOSK_LLM_STYLE` | No | Style guidelines for TTS-friendly responses |
62
  | `OPENAI_API_KEY` | No | If using `provider: "openai"` |
63
  | `GEMINI_API_KEY` | No | If using `provider: "gemini"` |
64
+ | `KIOSK_HF_DATASET_REPO` | No | HF dataset for persistence (e.g. `monish563/kiosk-api-metrics`) |
65
+ | `KIOSK_HF_TOKEN` | No* | HF token with write access (required if dataset repo is set) |
66
 
67
+ *At least one LLM API key is required. `KIOSK_HF_TOKEN` is required if `KIOSK_HF_DATASET_REPO` is set.
68
 
69
  ### 3. Endpoint URL for your manager
70
 
backend/main.py CHANGED
@@ -7,6 +7,12 @@ import os
7
  import threading
8
  import time
9
  import warnings
 
 
 
 
 
 
10
  from functools import lru_cache
11
  from pathlib import Path
12
  from typing import Any, Dict, List, Optional, Tuple
@@ -71,6 +77,7 @@ app.add_middleware(
71
 
72
  _orchestrator_lock = threading.Lock()
73
  logger = logging.getLogger(__name__)
 
74
 
75
 
76
  class QueryPayload(BaseModel):
@@ -129,10 +136,99 @@ def _load_env_once() -> None:
129
 
130
 
131
  def _get_env_value(name: Optional[str]) -> str:
132
- """Read environment variable."""
 
 
 
133
  if not name:
134
  return ""
135
- return os.getenv(name, "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
 
138
  def _is_placeholder(value: Optional[str]) -> bool:
 
7
  import threading
8
  import time
9
  import warnings
10
+
11
+ try:
12
+ from huggingface_hub import CommitScheduler, hf_hub_download
13
+ except ImportError:
14
+ CommitScheduler = None # type: ignore
15
+ hf_hub_download = None # type: ignore
16
  from functools import lru_cache
17
  from pathlib import Path
18
  from typing import Any, Dict, List, Optional, Tuple
 
77
 
78
  _orchestrator_lock = threading.Lock()
79
  logger = logging.getLogger(__name__)
80
+ _hf_scheduler = None
81
 
82
 
83
  class QueryPayload(BaseModel):
 
136
 
137
 
138
  def _get_env_value(name: Optional[str]) -> str:
139
+ """
140
+ Read environment variables with an HF Spaces secret fallback.
141
+ HF Secrets expose values as HF_<NAME>, so check both keys.
142
+ """
143
  if not name:
144
  return ""
145
+ direct = os.getenv(name, "").strip()
146
+ if direct:
147
+ return direct
148
+ return os.getenv(f"HF_{name}", "").strip()
149
+
150
+
151
+ def _maybe_download_existing_history() -> None:
152
+ """Download existing chat history from HF dataset on startup."""
153
+ repo_id = os.getenv("KIOSK_HF_DATASET_REPO", "").strip()
154
+ if not repo_id or hf_hub_download is None:
155
+ return
156
+
157
+ _load_env_once()
158
+ token = _get_env_value("KIOSK_HF_TOKEN") or os.getenv("HF_TOKEN", "").strip()
159
+ path_in_repo = os.getenv("KIOSK_HF_DATASET_PATH", "chat_history").strip()
160
+ filename = f"{path_in_repo}/{HISTORY_FILE.name}" if path_in_repo else HISTORY_FILE.name
161
+
162
+ try:
163
+ import shutil
164
+
165
+ downloaded = hf_hub_download(
166
+ repo_id=repo_id,
167
+ repo_type="dataset",
168
+ filename=filename,
169
+ token=token or None,
170
+ )
171
+ HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
172
+ shutil.copy(downloaded, HISTORY_FILE)
173
+ logger.info(
174
+ "Downloaded chat history from HF dataset: repo=%s file=%s",
175
+ repo_id,
176
+ filename,
177
+ )
178
+ except Exception as exc:
179
+ logger.info("No existing chat history to download (starting fresh): %s", exc)
180
+
181
+
182
+ def _maybe_start_hf_sync() -> None:
183
+ """Start optional HF dataset syncing for chat history."""
184
+ global _hf_scheduler
185
+
186
+ if _hf_scheduler is not None:
187
+ return
188
+
189
+ repo_id = os.getenv("KIOSK_HF_DATASET_REPO", "").strip()
190
+ if not repo_id or CommitScheduler is None:
191
+ return
192
+
193
+ _load_env_once()
194
+ token = _get_env_value("KIOSK_HF_TOKEN") or os.getenv("HF_TOKEN", "").strip()
195
+ path_in_repo = os.getenv("KIOSK_HF_DATASET_PATH", "chat_history").strip()
196
+ interval_minutes = float(os.getenv("KIOSK_HF_SYNC_INTERVAL_MINUTES", "10"))
197
+
198
+ try:
199
+ _hf_scheduler = CommitScheduler(
200
+ repo_id=repo_id,
201
+ repo_type="dataset",
202
+ folder_path=str(DATA_DIR),
203
+ path_in_repo=path_in_repo,
204
+ token=token or None,
205
+ allow_patterns=[HISTORY_FILE.name],
206
+ every=interval_minutes,
207
+ )
208
+ logger.info(
209
+ "Started HF CommitScheduler for chat history: repo=%s path=%s interval=%sm",
210
+ repo_id,
211
+ path_in_repo or ".",
212
+ interval_minutes,
213
+ )
214
+ except Exception as exc:
215
+ warnings.warn(f"Unable to start HF sync: {exc}")
216
+
217
+
218
+ def _run_startup_tasks_in_background() -> None:
219
+ """Run HF download and sync in a background thread so the server starts immediately."""
220
+ def _run() -> None:
221
+ try:
222
+ _maybe_download_existing_history()
223
+ _maybe_start_hf_sync()
224
+ except Exception as exc:
225
+ logger.warning("Background startup tasks failed: %s", exc)
226
+
227
+ t = threading.Thread(target=_run, daemon=True)
228
+ t.start()
229
+
230
+
231
+ _run_startup_tasks_in_background()
232
 
233
 
234
  def _is_placeholder(value: Optional[str]) -> bool:
requirements.txt CHANGED
@@ -11,3 +11,6 @@ requests>=2.31.0
11
 
12
  # Utilities
13
  python-dotenv>=1.0.0
 
 
 
 
11
 
12
  # Utilities
13
  python-dotenv>=1.0.0
14
+
15
+ # HF dataset persistence (optional)
16
+ huggingface_hub>=0.23.0