Spaces:

build-small-hackathon
/

hackathon-advisor

Running on Zero

App Files Files Community

JacobLinCool commited on Jun 8

Commit

b7d5967

verified ·

1 Parent(s): 4791c0a

fix: harden dashboard refresh embedding

Browse files

Sync GitHub commit a6a05cd with bucket-backed HF cache, embedding refresh timeout, and streamed progress logs.

Files changed (4) hide show

README.md +4 -1
app.py +80 -4
scripts/build_project_index.py +11 -1
tests/test_app.py +23 -11

README.md CHANGED Viewed

@@ -220,6 +220,7 @@ ADVISOR_ADAPTER_REVISION=25de69bcde397e1bcdd852923b56a42f10222650
 ADVISOR_QUEST_ANALYZER_BACKEND=minicpm-transformers
 ADVISOR_QUEST_ADAPTER_ID=artifacts/quest-lora
 ADVISOR_CACHE_DIR=/data/advisor-cache
 ADVISOR_EMBEDDING_MODEL_REPO=ggml-org/embeddinggemma-300m-qat-q8_0-GGUF
 ADVISOR_EMBEDDING_MODEL_FILE=embeddinggemma-300m-qat-Q8_0.gguf
 ADVISOR_ASR_MODEL_ID=nvidia/nemotron-speech-streaming-en-0.6b
@@ -238,7 +239,9 @@ The retrieval query embedder downloads the GGUF model through `huggingface_hub`
 `ADVISOR_EMBEDDING_MODEL_PATH` points to a local file. `/api/transcribe` uses the same ZeroGPU wrapper for Nemotron ASR.
 On macOS local runs, the app automatically runs llama.cpp query embedding in a worker process so the MiniCPM PyTorch
 runtime and llama.cpp do not load conflicting OpenMP runtimes in the same Python process. Dashboard refresh also builds
-the GGUF embedding index in a subprocess before returning to the app process for MiniCPM quest analysis.
 ## Test

 ADVISOR_QUEST_ANALYZER_BACKEND=minicpm-transformers
 ADVISOR_QUEST_ADAPTER_ID=artifacts/quest-lora
 ADVISOR_CACHE_DIR=/data/advisor-cache
+ADVISOR_REFRESH_EMBEDDING_TIMEOUT_SECONDS=1800
 ADVISOR_EMBEDDING_MODEL_REPO=ggml-org/embeddinggemma-300m-qat-q8_0-GGUF
 ADVISOR_EMBEDDING_MODEL_FILE=embeddinggemma-300m-qat-Q8_0.gguf
 ADVISOR_ASR_MODEL_ID=nvidia/nemotron-speech-streaming-en-0.6b
 `ADVISOR_EMBEDDING_MODEL_PATH` points to a local file. `/api/transcribe` uses the same ZeroGPU wrapper for Nemotron ASR.
 On macOS local runs, the app automatically runs llama.cpp query embedding in a worker process so the MiniCPM PyTorch
 runtime and llama.cpp do not load conflicting OpenMP runtimes in the same Python process. Dashboard refresh also builds
+the GGUF embedding index in a subprocess before returning to the app process for MiniCPM quest analysis. When
+`ADVISOR_CACHE_DIR` is set and `HF_HOME` is not, the refresh subprocess stores Hugging Face downloads under
+`$ADVISOR_CACHE_DIR/huggingface` so the mounted bucket keeps the embedding model cache across refreshes and restarts.
 ## Test

app.py CHANGED Viewed

@@ -4,10 +4,12 @@ from datetime import datetime, timezone
 import json
 import os
 from pathlib import Path
 import subprocess
 import sys
 import tempfile
 from threading import Lock, Thread
 from typing import Any, Iterator
 from uuid import uuid4
@@ -60,6 +62,8 @@ PROFILE_FIELDS = ["skills", "time", "preferences", "constraints"]
 MAX_AUDIO_UPLOAD_BYTES = 25 * 1024 * 1024
 AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".webm"}
 DEFAULT_HF_ORG = "build-small-hackathon"
 REFRESH_STAGE_LABELS = {
     "crawling": "Fetching public Spaces",
     "embedding": "Rebuilding the embedding index",
@@ -283,10 +287,7 @@ def _build_refresh_index_payload(project_path: Path, index_path: Path) -> dict[s
     if n_threads:
         command.extend(["--n-threads", n_threads])
-    completed = subprocess.run(command, cwd=ROOT, capture_output=True, text=True, check=False)
-    if completed.returncode != 0:
-        detail = "\n".join(part for part in (completed.stdout.strip(), completed.stderr.strip()) if part)
-        raise RuntimeError(f"refresh embedding index build failed with exit code {completed.returncode}: {detail}")
     try:
         payload = json.loads(index_path.read_text(encoding="utf-8"))
     except (OSError, json.JSONDecodeError) as error:
@@ -296,6 +297,81 @@ def _build_refresh_index_payload(project_path: Path, index_path: Path) -> dict[s
     return payload
 def _replace_runtime_from_files(projects_path: Path, index_path: Path, refreshed_dashboard: dict[str, Any]) -> None:
     global index, engine, _cpu_engine, dashboard_payload
     new_index = ProjectIndex.from_files(projects_path, index_path)

 import json
 import os
 from pathlib import Path
+import selectors
 import subprocess
 import sys
 import tempfile
 from threading import Lock, Thread
+import time
 from typing import Any, Iterator
 from uuid import uuid4
 MAX_AUDIO_UPLOAD_BYTES = 25 * 1024 * 1024
 AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".webm"}
 DEFAULT_HF_ORG = "build-small-hackathon"
+DEFAULT_REFRESH_EMBEDDING_TIMEOUT_SECONDS = 1800
+REFRESH_SUBPROCESS_LOG_TAIL_LINES = 80
 REFRESH_STAGE_LABELS = {
     "crawling": "Fetching public Spaces",
     "embedding": "Rebuilding the embedding index",
     if n_threads:
         command.extend(["--n-threads", n_threads])
+    _run_refresh_index_command(command)
     try:
         payload = json.loads(index_path.read_text(encoding="utf-8"))
     except (OSError, json.JSONDecodeError) as error:
     return payload
+def _run_refresh_index_command(command: list[str]) -> None:
+    timeout_seconds = _refresh_embedding_timeout_seconds()
+    output_tail: list[str] = []
+    process = subprocess.Popen(
+        command,
+        cwd=ROOT,
+        env=_refresh_subprocess_env(),
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        text=True,
+        bufsize=1,
+    )
+    assert process.stdout is not None
+    selector = selectors.DefaultSelector()
+    selector.register(process.stdout, selectors.EVENT_READ)
+    started = time.monotonic()
+    try:
+        while process.poll() is None:
+            for key, _event in selector.select(timeout=1):
+                line = key.fileobj.readline()
+                if line:
+                    _record_refresh_subprocess_line(output_tail, line)
+            if time.monotonic() - started > timeout_seconds:
+                process.kill()
+                process.wait(timeout=5)
+                raise RuntimeError(
+                    "refresh embedding index build timed out "
+                    f"after {timeout_seconds} seconds. Last output:\n{_format_output_tail(output_tail)}"
+                )
+        for line in process.stdout:
+            _record_refresh_subprocess_line(output_tail, line)
+    finally:
+        selector.close()
+        process.stdout.close()
+    if process.returncode != 0:
+        raise RuntimeError(
+            "refresh embedding index build failed "
+            f"with exit code {process.returncode}. Last output:\n{_format_output_tail(output_tail)}"
+        )
+def _refresh_subprocess_env() -> dict[str, str]:
+    env = os.environ.copy()
+    if not env.get("HF_HOME"):
+        cache_dir = cache_dir_from_env()
+        if cache_dir is not None:
+            hf_home = cache_dir / "huggingface"
+            hf_home.mkdir(parents=True, exist_ok=True)
+            env["HF_HOME"] = str(hf_home)
+    return env
+def _refresh_embedding_timeout_seconds() -> int:
+    raw = os.environ.get("ADVISOR_REFRESH_EMBEDDING_TIMEOUT_SECONDS", "").strip()
+    if not raw:
+        return DEFAULT_REFRESH_EMBEDDING_TIMEOUT_SECONDS
+    timeout = int(raw)
+    if timeout <= 0:
+        raise RuntimeError("ADVISOR_REFRESH_EMBEDDING_TIMEOUT_SECONDS must be a positive integer.")
+    return timeout
+def _record_refresh_subprocess_line(output_tail: list[str], raw_line: str) -> None:
+    line = raw_line.rstrip()
+    if not line:
+        return
+    print(f"[dashboard-refresh embedding] {line}", flush=True)
+    output_tail.append(line)
+    del output_tail[:-REFRESH_SUBPROCESS_LOG_TAIL_LINES]
+def _format_output_tail(output_tail: list[str]) -> str:
+    return "\n".join(output_tail) if output_tail else "(no output)"
 def _replace_runtime_from_files(projects_path: Path, index_path: Path, refreshed_dashboard: dict[str, Any]) -> None:
     global index, engine, _cpu_engine, dashboard_payload
     new_index = ProjectIndex.from_files(projects_path, index_path)

scripts/build_project_index.py CHANGED Viewed

@@ -68,6 +68,7 @@ def build_payload(
 ) -> dict:
     data = json.loads(project_path.read_text(encoding="utf-8"))
     projects = [Project.from_dict(item) for item in data["projects"]]
     embedder = LlamaCppEmbedder(
         model_repo=model_repo,
         model_file=model_file,
@@ -76,7 +77,16 @@ def build_payload(
         n_threads=n_threads,
         verbose=False,
     )
-    embeddings = [embedder.embed(project.searchable_text) for project in projects]
     metadata = {
         "model_repo": model_repo,
         "model_file": model_file,

 ) -> dict:
     data = json.loads(project_path.read_text(encoding="utf-8"))
     projects = [Project.from_dict(item) for item in data["projects"]]
+    print(f"loaded {len(projects)} projects from {project_path}", flush=True)
     embedder = LlamaCppEmbedder(
         model_repo=model_repo,
         model_file=model_file,
         n_threads=n_threads,
         verbose=False,
     )
+    print(
+        "embedding projects with "
+        f"{model_repo}/{model_file}; first vector may download and load the GGUF model",
+        flush=True,
+    )
+    embeddings = []
+    for index, project in enumerate(projects, start=1):
+        embeddings.append(embedder.embed(project.searchable_text))
+        if index == 1 or index % 10 == 0 or index == len(projects):
+            print(f"embedded {index}/{len(projects)} projects", flush=True)
     metadata = {
         "model_repo": model_repo,
         "model_file": model_file,

tests/test_app.py CHANGED Viewed

@@ -173,25 +173,16 @@ def test_dashboard_refresh_embedding_build_runs_in_subprocess(monkeypatch, tmp_p
     monkeypatch.setenv("ADVISOR_EMBEDDING_MODEL_PATH", "/tmp/model.gguf")
     captured = {}
-    def fake_run(command, *, cwd, capture_output, text, check):
         captured["command"] = command
-        captured["cwd"] = cwd
-        captured["capture_output"] = capture_output
-        captured["text"] = text
-        captured["check"] = check
         index_path.write_text(json.dumps({"schema": "ok"}), encoding="utf-8")
-        return app_module.subprocess.CompletedProcess(command, 0, "wrote index", "")
-    monkeypatch.setattr(app_module.subprocess, "run", fake_run)
     payload = app_module._build_refresh_index_payload(project_path, index_path)
     command = captured["command"]
     assert payload == {"schema": "ok"}
-    assert captured["cwd"] == app_module.ROOT
-    assert captured["capture_output"] is True
-    assert captured["text"] is True
-    assert captured["check"] is False
     assert command[1].endswith("scripts/build_project_index.py")
     assert command[command.index("--model-repo") + 1] == "test/repo"
     assert command[command.index("--model-file") + 1] == "model.gguf"
@@ -200,6 +191,27 @@ def test_dashboard_refresh_embedding_build_runs_in_subprocess(monkeypatch, tmp_p
     assert command[command.index("--builder") + 1] == "app.py:/api/dashboard/refresh"
 def test_dashboard_refresh_persists_and_swaps_latest(monkeypatch, tmp_path) -> None:
     monkeypatch.setenv("ADVISOR_CACHE_DIR", str(tmp_path))
     _reset_refresh_state()

     monkeypatch.setenv("ADVISOR_EMBEDDING_MODEL_PATH", "/tmp/model.gguf")
     captured = {}
+    def fake_run_refresh_index_command(command):
         captured["command"] = command
         index_path.write_text(json.dumps({"schema": "ok"}), encoding="utf-8")
+    monkeypatch.setattr(app_module, "_run_refresh_index_command", fake_run_refresh_index_command)
     payload = app_module._build_refresh_index_payload(project_path, index_path)
     command = captured["command"]
     assert payload == {"schema": "ok"}
     assert command[1].endswith("scripts/build_project_index.py")
     assert command[command.index("--model-repo") + 1] == "test/repo"
     assert command[command.index("--model-file") + 1] == "model.gguf"
     assert command[command.index("--builder") + 1] == "app.py:/api/dashboard/refresh"
+def test_refresh_subprocess_env_uses_cache_dir_for_hf_home(monkeypatch, tmp_path) -> None:
+    monkeypatch.setenv("ADVISOR_CACHE_DIR", str(tmp_path))
+    monkeypatch.delenv("HF_HOME", raising=False)
+    env = app_module._refresh_subprocess_env()
+    assert env["HF_HOME"] == str(tmp_path / "huggingface")
+    assert (tmp_path / "huggingface").is_dir()
+def test_refresh_embedding_timeout_rejects_non_positive_env(monkeypatch) -> None:
+    monkeypatch.setenv("ADVISOR_REFRESH_EMBEDDING_TIMEOUT_SECONDS", "0")
+    try:
+        app_module._refresh_embedding_timeout_seconds()
+    except RuntimeError as error:
+        assert "must be a positive integer" in str(error)
+    else:
+        raise AssertionError("non-positive refresh embedding timeout should fail")
 def test_dashboard_refresh_persists_and_swaps_latest(monkeypatch, tmp_path) -> None:
     monkeypatch.setenv("ADVISOR_CACHE_DIR", str(tmp_path))
     _reset_refresh_state()