Spaces:
Running on Zero
Running on Zero
feat: cache dashboard quest analysis
Browse filesCo-authored-by: Codex <noreply@openai.com>
- README.md +19 -3
- app.py +440 -19
- hackathon_advisor/dashboard_storage.py +16 -5
- hackathon_advisor/quest_cache.py +284 -0
- static/app.js +14 -1
- tests/test_app.py +117 -8
- tests/test_quest_cache.py +96 -0
README.md
CHANGED
|
@@ -86,9 +86,20 @@ then swaps the live app to the new dashboard. `GET /api/dashboard/refresh` polls
|
|
| 86 |
|
| 87 |
Live refresh requires a writable dashboard cache directory at `ADVISOR_CACHE_DIR`. On Hugging Face Spaces this should be
|
| 88 |
a mounted Storage Bucket; locally it can be a normal directory such as `.cache/advisor-dashboard`. The job writes
|
| 89 |
-
`runs/{run_id}/projects.json`, `project_index.json`, `dashboard.json`, and `manifest.json`, then
|
| 90 |
-
`latest.json`.
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
Set `ADVISOR_QUEST_ANALYZER_BACKEND=minicpm-transformers` for both local and deployed refresh runs. The local dashboard
|
| 94 |
uses the same MiniCPM analyzer as the deployed Space; test doubles are only used inside pytest.
|
|
@@ -221,6 +232,11 @@ ADVISOR_QUEST_ANALYZER_BACKEND=minicpm-transformers
|
|
| 221 |
ADVISOR_QUEST_ADAPTER_ID=artifacts/quest-lora
|
| 222 |
ADVISOR_QUEST_ANALYSIS_BATCH_SIZE=8
|
| 223 |
ADVISOR_CACHE_DIR=/data/advisor-cache
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
ADVISOR_REFRESH_EMBEDDING_TIMEOUT_SECONDS=1800
|
| 225 |
ADVISOR_EMBEDDING_MODEL_REPO=ggml-org/embeddinggemma-300m-qat-q8_0-GGUF
|
| 226 |
ADVISOR_EMBEDDING_MODEL_FILE=embeddinggemma-300m-qat-Q8_0.gguf
|
|
|
|
| 86 |
|
| 87 |
Live refresh requires a writable dashboard cache directory at `ADVISOR_CACHE_DIR`. On Hugging Face Spaces this should be
|
| 88 |
a mounted Storage Bucket; locally it can be a normal directory such as `.cache/advisor-dashboard`. The job writes
|
| 89 |
+
`runs/{run_id}/projects.json`, `project_index.json`, `dashboard.json`, `quest_analysis.json`, and `manifest.json`, then
|
| 90 |
+
atomically updates `latest.json`. Quest analysis also keeps validated per-project records under
|
| 91 |
+
`quest-cache/v1/{prefix}/{cache_key}.json`, keyed by the rendered README+app-file prompt hash, taxonomy hash, MiniCPM
|
| 92 |
+
model id, adapter id/revision, local adapter digest, and generation config. Refresh logs every cache hit, miss, and newly
|
| 93 |
+
analyzed project. If the cache directory is missing, not writable, or quest analysis fails validation, refresh fails and
|
| 94 |
+
the current validated dashboard stays active.
|
| 95 |
+
|
| 96 |
+
When `ADVISOR_CACHE_DIR` is set, the app starts a scheduler thread that checks once per hour and starts a normal
|
| 97 |
+
dashboard refresh if no refresh is already running. `ADVISOR_SCHEDULED_REFRESH=0` or
|
| 98 |
+
`ADVISOR_DISABLE_SCHEDULED_REFRESH=1` disables it; `ADVISOR_REFRESH_INTERVAL_SECONDS`,
|
| 99 |
+
`ADVISOR_REFRESH_INITIAL_DELAY_SECONDS`, and `ADVISOR_SCHEDULED_REFRESH_COMPUTE` tune the cadence and compute mode.
|
| 100 |
+
Manual and scheduled refreshes both acquire `$ADVISOR_CACHE_DIR/refresh.lock` atomically before work starts, so multiple
|
| 101 |
+
app processes do not analyze the same snapshot concurrently. Stale locks expire after `ADVISOR_REFRESH_LOCK_TTL_SECONDS`
|
| 102 |
+
(default two hours).
|
| 103 |
|
| 104 |
Set `ADVISOR_QUEST_ANALYZER_BACKEND=minicpm-transformers` for both local and deployed refresh runs. The local dashboard
|
| 105 |
uses the same MiniCPM analyzer as the deployed Space; test doubles are only used inside pytest.
|
|
|
|
| 232 |
ADVISOR_QUEST_ADAPTER_ID=artifacts/quest-lora
|
| 233 |
ADVISOR_QUEST_ANALYSIS_BATCH_SIZE=8
|
| 234 |
ADVISOR_CACHE_DIR=/data/advisor-cache
|
| 235 |
+
ADVISOR_REFRESH_COMPUTE=cpu
|
| 236 |
+
ADVISOR_SCHEDULED_REFRESH_COMPUTE=cpu
|
| 237 |
+
ADVISOR_REFRESH_INTERVAL_SECONDS=3600
|
| 238 |
+
ADVISOR_REFRESH_INITIAL_DELAY_SECONDS=300
|
| 239 |
+
ADVISOR_REFRESH_LOCK_TTL_SECONDS=7200
|
| 240 |
ADVISOR_REFRESH_EMBEDDING_TIMEOUT_SECONDS=1800
|
| 241 |
ADVISOR_EMBEDDING_MODEL_REPO=ggml-org/embeddinggemma-300m-qat-q8_0-GGUF
|
| 242 |
ADVISOR_EMBEDDING_MODEL_FILE=embeddinggemma-300m-qat-Q8_0.gguf
|
app.py
CHANGED
|
@@ -49,6 +49,13 @@ from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
|
|
| 49 |
from hackathon_advisor.lora_training_kit import TRAINING_KIT_FILENAME, build_lora_training_kit_zip
|
| 50 |
from hackathon_advisor.png_export import artifact_png_filename, render_artifact_png
|
| 51 |
from hackathon_advisor.prize_ledger import prize_ledger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
from hackathon_advisor.quest_analysis import create_quest_analyzer, validate_matches_by_project
|
| 53 |
from hackathon_advisor.runtime_hooks import install_asyncio_cleanup_hook
|
| 54 |
from hackathon_advisor.submission_packet import build_submission_packet_markdown
|
|
@@ -71,6 +78,11 @@ AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga
|
|
| 71 |
DEFAULT_HF_ORG = "build-small-hackathon"
|
| 72 |
DEFAULT_REFRESH_EMBEDDING_TIMEOUT_SECONDS = 1800
|
| 73 |
DEFAULT_QUEST_ANALYSIS_BATCH_SIZE = 8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
REFRESH_SUBPROCESS_LOG_TAIL_LINES = 80
|
| 75 |
REFRESH_STAGE_LABELS = {
|
| 76 |
"crawling": "Fetching public Spaces",
|
|
@@ -83,6 +95,19 @@ REFRESH_STAGE_LABELS = {
|
|
| 83 |
|
| 84 |
_runtime_lock = Lock()
|
| 85 |
_refresh_lock = Lock()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
def _load_initial_runtime() -> tuple[ProjectIndex, dict[str, Any]]:
|
|
@@ -107,12 +132,15 @@ _cpu_engine: AdvisorEngine | None = None
|
|
| 107 |
_refresh_state: dict[str, Any] = {
|
| 108 |
"status": "idle",
|
| 109 |
"run_id": "",
|
|
|
|
|
|
|
| 110 |
"stage": "",
|
| 111 |
"stage_label": "",
|
| 112 |
"started_at": "",
|
| 113 |
"finished_at": "",
|
| 114 |
"error": "",
|
| 115 |
"result": None,
|
|
|
|
| 116 |
}
|
| 117 |
|
| 118 |
|
|
@@ -140,7 +168,13 @@ def _transcribe_voice(audio_path: str) -> dict[str, Any]:
|
|
| 140 |
return voice_transcriber.transcribe(Path(audio_path)).to_dict()
|
| 141 |
|
| 142 |
|
| 143 |
-
def _analyze_dashboard_quests(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
missing_evidence_keys = [
|
| 145 |
str(item.get("id") or index)
|
| 146 |
for index, item in enumerate(project_rows)
|
|
@@ -152,25 +186,144 @@ def _analyze_dashboard_quests(project_rows: list[dict[str, Any]]) -> dict[str, A
|
|
| 152 |
f"missing evidence keys for {len(missing_evidence_keys)} projects"
|
| 153 |
)
|
| 154 |
projects = [Project.from_dict(item) for item in project_rows]
|
|
|
|
| 155 |
matches_by_project: dict[str, list[dict[str, Any]]] = {}
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
batch_size = _quest_analysis_batch_size()
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
source = str(result["source"])
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
validated = validate_matches_by_project(matches_by_project, projects, source=source)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
return {
|
| 165 |
"source": validated.source,
|
| 166 |
"matches_by_project": validated.matches_by_project,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
}
|
| 168 |
|
| 169 |
|
| 170 |
@gpu_task
|
| 171 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
projects = [Project.from_dict(item) for item in project_rows]
|
| 173 |
-
analyzer = create_quest_analyzer(device=
|
| 174 |
matches = analyzer.analyze(projects)
|
| 175 |
source = getattr(analyzer, "source", "quest-analyzer")
|
| 176 |
validated = validate_matches_by_project(matches, projects, source=source)
|
|
@@ -192,41 +345,191 @@ def _quest_analysis_batch_size() -> int:
|
|
| 192 |
|
| 193 |
def _refresh_public_state() -> dict[str, Any]:
|
| 194 |
with _refresh_lock:
|
| 195 |
-
|
|
|
|
|
|
|
| 196 |
|
| 197 |
|
| 198 |
def _set_refresh_state(**updates: Any) -> None:
|
| 199 |
with _refresh_lock:
|
|
|
|
|
|
|
| 200 |
_refresh_state.update(updates)
|
| 201 |
stage = str(_refresh_state.get("stage") or "")
|
| 202 |
_refresh_state["stage_label"] = REFRESH_STAGE_LABELS.get(stage, "")
|
| 203 |
|
| 204 |
|
| 205 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
with _refresh_lock:
|
| 207 |
if _refresh_state.get("status") == "running":
|
| 208 |
raise HTTPException(status_code=409, detail="Dashboard refresh is already running.")
|
| 209 |
run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + "-" + uuid4().hex[:8]
|
|
|
|
| 210 |
_refresh_state.update(
|
| 211 |
{
|
| 212 |
"status": "running",
|
| 213 |
"run_id": run_id,
|
|
|
|
|
|
|
| 214 |
"stage": "crawling",
|
| 215 |
"stage_label": REFRESH_STAGE_LABELS["crawling"],
|
| 216 |
"started_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 217 |
"finished_at": "",
|
| 218 |
"error": "",
|
| 219 |
"result": None,
|
|
|
|
| 220 |
}
|
| 221 |
)
|
| 222 |
-
thread = Thread(target=_run_refresh_job, args=(run_id, cache_dir), daemon=True)
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
return _refresh_public_state()
|
| 225 |
|
| 226 |
|
| 227 |
-
def _run_refresh_job(run_id: str, cache_dir: Path) -> None:
|
| 228 |
try:
|
| 229 |
-
projects_payload, index_payload, refreshed_dashboard = _build_refresh_payloads(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
_set_refresh_state(stage="persisting")
|
| 231 |
artifacts = persist_refresh_artifacts(
|
| 232 |
cache_dir,
|
|
@@ -234,9 +537,11 @@ def _run_refresh_job(run_id: str, cache_dir: Path) -> None:
|
|
| 234 |
projects_payload=projects_payload,
|
| 235 |
index_payload=index_payload,
|
| 236 |
dashboard_payload=refreshed_dashboard,
|
|
|
|
| 237 |
)
|
| 238 |
_set_refresh_state(stage="swapping")
|
| 239 |
_replace_runtime_from_files(artifacts.projects_path, artifacts.index_path, artifacts.dashboard)
|
|
|
|
| 240 |
_set_refresh_state(
|
| 241 |
status="succeeded",
|
| 242 |
stage="",
|
|
@@ -246,11 +551,13 @@ def _run_refresh_job(run_id: str, cache_dir: Path) -> None:
|
|
| 246 |
"project_count": refreshed_dashboard["project_count"],
|
| 247 |
"snapshot_digest": refreshed_dashboard["provenance"]["snapshot_digest"],
|
| 248 |
"dashboard_generated_at": refreshed_dashboard["generated_at"],
|
|
|
|
| 249 |
},
|
| 250 |
)
|
| 251 |
except Exception as error: # noqa: BLE001 - background job must report every failure as state
|
| 252 |
print("[dashboard-refresh] failed", flush=True)
|
| 253 |
traceback.print_exception(type(error), error, error.__traceback__)
|
|
|
|
| 254 |
_set_refresh_state(
|
| 255 |
status="failed",
|
| 256 |
stage="",
|
|
@@ -258,9 +565,16 @@ def _run_refresh_job(run_id: str, cache_dir: Path) -> None:
|
|
| 258 |
error=_format_refresh_error(error),
|
| 259 |
result=None,
|
| 260 |
)
|
|
|
|
|
|
|
| 261 |
|
| 262 |
|
| 263 |
-
def _build_refresh_payloads(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
from scripts.crawl_hf_spaces import API, crawl_projects
|
| 265 |
|
| 266 |
org = os.environ.get("ADVISOR_HF_ORG", DEFAULT_HF_ORG).strip() or DEFAULT_HF_ORG
|
|
@@ -294,14 +608,19 @@ def _build_refresh_payloads(run_id: str) -> tuple[dict[str, Any], dict[str, Any]
|
|
| 294 |
)
|
| 295 |
|
| 296 |
_set_refresh_state(stage="quest_analysis")
|
| 297 |
-
quest_analysis = _analyze_dashboard_quests(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
_set_refresh_state(stage="atlas")
|
| 299 |
refreshed_dashboard = build_dashboard_payload(
|
| 300 |
refreshed_index,
|
| 301 |
quest_matches=quest_analysis["matches_by_project"],
|
| 302 |
quest_source=str(quest_analysis["source"]),
|
| 303 |
)
|
| 304 |
-
return projects_payload, index_payload, refreshed_dashboard
|
| 305 |
|
| 306 |
|
| 307 |
def _build_refresh_index_payload(
|
|
@@ -561,12 +880,13 @@ def dashboard() -> dict:
|
|
| 561 |
|
| 562 |
|
| 563 |
@app.post("/api/dashboard/refresh")
|
| 564 |
-
def dashboard_refresh_start() -> JSONResponse:
|
| 565 |
try:
|
| 566 |
cache_dir = require_writable_cache_dir()
|
| 567 |
except DashboardStorageError as error:
|
| 568 |
raise HTTPException(status_code=400, detail=str(error)) from error
|
| 569 |
-
|
|
|
|
| 570 |
|
| 571 |
|
| 572 |
@app.get("/api/dashboard/refresh")
|
|
@@ -574,6 +894,104 @@ def dashboard_refresh_status() -> dict:
|
|
| 574 |
return _refresh_public_state()
|
| 575 |
|
| 576 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
@app.get("/health")
|
| 578 |
def health() -> dict:
|
| 579 |
return {
|
|
@@ -817,6 +1235,9 @@ def agent_turn(message: str, session_json: str = "{}", compute: str = "gpu") ->
|
|
| 817 |
yield from _agent_turn_events(message, session_json, _normalize_compute(compute))
|
| 818 |
|
| 819 |
|
|
|
|
|
|
|
|
|
|
| 820 |
if __name__ == "__main__":
|
| 821 |
app.launch(
|
| 822 |
server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
|
|
|
|
| 49 |
from hackathon_advisor.lora_training_kit import TRAINING_KIT_FILENAME, build_lora_training_kit_zip
|
| 50 |
from hackathon_advisor.png_export import artifact_png_filename, render_artifact_png
|
| 51 |
from hackathon_advisor.prize_ledger import prize_ledger
|
| 52 |
+
from hackathon_advisor.quest_cache import (
|
| 53 |
+
build_quest_analysis_run_payload,
|
| 54 |
+
quest_analyzer_fingerprint_from_env,
|
| 55 |
+
quest_cache_run_record,
|
| 56 |
+
read_quest_cache_entry,
|
| 57 |
+
write_quest_cache_entry,
|
| 58 |
+
)
|
| 59 |
from hackathon_advisor.quest_analysis import create_quest_analyzer, validate_matches_by_project
|
| 60 |
from hackathon_advisor.runtime_hooks import install_asyncio_cleanup_hook
|
| 61 |
from hackathon_advisor.submission_packet import build_submission_packet_markdown
|
|
|
|
| 78 |
DEFAULT_HF_ORG = "build-small-hackathon"
|
| 79 |
DEFAULT_REFRESH_EMBEDDING_TIMEOUT_SECONDS = 1800
|
| 80 |
DEFAULT_QUEST_ANALYSIS_BATCH_SIZE = 8
|
| 81 |
+
DEFAULT_REFRESH_COMPUTE = "cpu"
|
| 82 |
+
DEFAULT_SCHEDULED_REFRESH_INTERVAL_SECONDS = 3600
|
| 83 |
+
DEFAULT_SCHEDULED_REFRESH_INITIAL_DELAY_SECONDS = 300
|
| 84 |
+
DEFAULT_REFRESH_LOCK_TTL_SECONDS = 7200
|
| 85 |
+
REFRESH_LOCK_FILENAME = "refresh.lock"
|
| 86 |
REFRESH_SUBPROCESS_LOG_TAIL_LINES = 80
|
| 87 |
REFRESH_STAGE_LABELS = {
|
| 88 |
"crawling": "Fetching public Spaces",
|
|
|
|
| 95 |
|
| 96 |
_runtime_lock = Lock()
|
| 97 |
_refresh_lock = Lock()
|
| 98 |
+
_scheduler_lock = Lock()
|
| 99 |
+
_scheduler_started = False
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _empty_quest_cache_progress() -> dict[str, Any]:
|
| 103 |
+
return {
|
| 104 |
+
"project_count": 0,
|
| 105 |
+
"hit_count": 0,
|
| 106 |
+
"miss_count": 0,
|
| 107 |
+
"analyzed_count": 0,
|
| 108 |
+
"remaining_count": 0,
|
| 109 |
+
"last_project_id": "",
|
| 110 |
+
}
|
| 111 |
|
| 112 |
|
| 113 |
def _load_initial_runtime() -> tuple[ProjectIndex, dict[str, Any]]:
|
|
|
|
| 132 |
_refresh_state: dict[str, Any] = {
|
| 133 |
"status": "idle",
|
| 134 |
"run_id": "",
|
| 135 |
+
"compute": "",
|
| 136 |
+
"reason": "",
|
| 137 |
"stage": "",
|
| 138 |
"stage_label": "",
|
| 139 |
"started_at": "",
|
| 140 |
"finished_at": "",
|
| 141 |
"error": "",
|
| 142 |
"result": None,
|
| 143 |
+
"quest_cache": _empty_quest_cache_progress(),
|
| 144 |
}
|
| 145 |
|
| 146 |
|
|
|
|
| 168 |
return voice_transcriber.transcribe(Path(audio_path)).to_dict()
|
| 169 |
|
| 170 |
|
| 171 |
+
def _analyze_dashboard_quests(
|
| 172 |
+
project_rows: list[dict[str, Any]],
|
| 173 |
+
*,
|
| 174 |
+
cache_dir: Path,
|
| 175 |
+
compute: str,
|
| 176 |
+
run_id: str,
|
| 177 |
+
) -> dict[str, Any]:
|
| 178 |
missing_evidence_keys = [
|
| 179 |
str(item.get("id") or index)
|
| 180 |
for index, item in enumerate(project_rows)
|
|
|
|
| 186 |
f"missing evidence keys for {len(missing_evidence_keys)} projects"
|
| 187 |
)
|
| 188 |
projects = [Project.from_dict(item) for item in project_rows]
|
| 189 |
+
analyzer_fingerprint = quest_analyzer_fingerprint_from_env()
|
| 190 |
matches_by_project: dict[str, list[dict[str, Any]]] = {}
|
| 191 |
+
record_by_project: dict[str, dict[str, Any]] = {}
|
| 192 |
+
misses: list[tuple[Project, dict[str, Any]]] = []
|
| 193 |
+
hit_count = 0
|
| 194 |
+
miss_count = 0
|
| 195 |
+
analyzed_count = 0
|
| 196 |
+
source = str(analyzer_fingerprint["source"])
|
| 197 |
batch_size = _quest_analysis_batch_size()
|
| 198 |
+
_set_quest_cache_progress(
|
| 199 |
+
project_count=len(projects),
|
| 200 |
+
hit_count=0,
|
| 201 |
+
miss_count=0,
|
| 202 |
+
analyzed_count=0,
|
| 203 |
+
remaining_count=len(projects),
|
| 204 |
+
last_project_id="",
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
for project in projects:
|
| 208 |
+
lookup = read_quest_cache_entry(cache_dir, project, analyzer_fingerprint)
|
| 209 |
+
if lookup.entry is not None:
|
| 210 |
+
hit_count += 1
|
| 211 |
+
matches_by_project[project.id] = lookup.entry.matches
|
| 212 |
+
record_by_project[project.id] = quest_cache_run_record(
|
| 213 |
+
project=project,
|
| 214 |
+
identity=lookup.identity,
|
| 215 |
+
matches=lookup.entry.matches,
|
| 216 |
+
status="cached",
|
| 217 |
+
source=lookup.entry.source,
|
| 218 |
+
path=lookup.entry.path,
|
| 219 |
+
)
|
| 220 |
+
print(
|
| 221 |
+
f"[quest-cache] hit {project.id} key={lookup.identity.cache_key[:12]} "
|
| 222 |
+
f"matches={len(lookup.entry.matches)}",
|
| 223 |
+
flush=True,
|
| 224 |
+
)
|
| 225 |
+
else:
|
| 226 |
+
miss_count += 1
|
| 227 |
+
misses.append((project, lookup.identity.to_dict()))
|
| 228 |
+
print(
|
| 229 |
+
f"[quest-cache] miss {project.id} key={lookup.identity.cache_key[:12]} "
|
| 230 |
+
f"reason={lookup.reason}",
|
| 231 |
+
flush=True,
|
| 232 |
+
)
|
| 233 |
+
_set_quest_cache_progress(
|
| 234 |
+
project_count=len(projects),
|
| 235 |
+
hit_count=hit_count,
|
| 236 |
+
miss_count=miss_count,
|
| 237 |
+
analyzed_count=analyzed_count,
|
| 238 |
+
remaining_count=len(projects) - hit_count - analyzed_count,
|
| 239 |
+
last_project_id=project.id,
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
for start in range(0, len(misses), batch_size):
|
| 243 |
+
batch = misses[start : start + batch_size]
|
| 244 |
+
batch_projects = [item[0] for item in batch]
|
| 245 |
+
batch_rows = [project.to_refresh_snapshot_dict() for project in batch_projects]
|
| 246 |
+
result = _analyze_dashboard_quest_batch(batch_rows, compute=compute)
|
| 247 |
source = str(result["source"])
|
| 248 |
+
validated_batch = validate_matches_by_project(
|
| 249 |
+
result["matches_by_project"],
|
| 250 |
+
batch_projects,
|
| 251 |
+
source=source,
|
| 252 |
+
)
|
| 253 |
+
for project, _identity_row in batch:
|
| 254 |
+
entry = write_quest_cache_entry(
|
| 255 |
+
cache_dir,
|
| 256 |
+
project,
|
| 257 |
+
analyzer_fingerprint,
|
| 258 |
+
validated_batch.matches_by_project[project.id],
|
| 259 |
+
source=source,
|
| 260 |
+
)
|
| 261 |
+
analyzed_count += 1
|
| 262 |
+
matches_by_project[project.id] = entry.matches
|
| 263 |
+
record_by_project[project.id] = quest_cache_run_record(
|
| 264 |
+
project=project,
|
| 265 |
+
identity=entry.identity,
|
| 266 |
+
matches=entry.matches,
|
| 267 |
+
status="analyzed",
|
| 268 |
+
source=entry.source,
|
| 269 |
+
path=entry.path,
|
| 270 |
+
)
|
| 271 |
+
print(
|
| 272 |
+
f"[quest-cache] analyzed {project.id} key={entry.identity.cache_key[:12]} "
|
| 273 |
+
f"matches={len(entry.matches)}",
|
| 274 |
+
flush=True,
|
| 275 |
+
)
|
| 276 |
+
_set_quest_cache_progress(
|
| 277 |
+
project_count=len(projects),
|
| 278 |
+
hit_count=hit_count,
|
| 279 |
+
miss_count=miss_count,
|
| 280 |
+
analyzed_count=analyzed_count,
|
| 281 |
+
remaining_count=len(projects) - hit_count - analyzed_count,
|
| 282 |
+
last_project_id=project.id,
|
| 283 |
+
)
|
| 284 |
validated = validate_matches_by_project(matches_by_project, projects, source=source)
|
| 285 |
+
summary = {
|
| 286 |
+
"project_count": len(projects),
|
| 287 |
+
"hit_count": hit_count,
|
| 288 |
+
"miss_count": miss_count,
|
| 289 |
+
"analyzed_count": analyzed_count,
|
| 290 |
+
"remaining_count": 0,
|
| 291 |
+
"compute": compute,
|
| 292 |
+
}
|
| 293 |
+
project_records = [record_by_project[project.id] for project in projects]
|
| 294 |
return {
|
| 295 |
"source": validated.source,
|
| 296 |
"matches_by_project": validated.matches_by_project,
|
| 297 |
+
"quest_analysis_payload": build_quest_analysis_run_payload(
|
| 298 |
+
run_id=run_id,
|
| 299 |
+
analyzer_fingerprint=analyzer_fingerprint,
|
| 300 |
+
summary=summary,
|
| 301 |
+
project_records=project_records,
|
| 302 |
+
),
|
| 303 |
}
|
| 304 |
|
| 305 |
|
| 306 |
@gpu_task
|
| 307 |
+
def _analyze_dashboard_quest_batch_gpu(project_rows: list[dict[str, Any]]) -> dict[str, Any]:
|
| 308 |
+
return _analyze_dashboard_quest_batch_with_device(
|
| 309 |
+
project_rows,
|
| 310 |
+
device="cuda" if zero_gpu_enabled() else "local",
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def _analyze_dashboard_quest_batch_cpu(project_rows: list[dict[str, Any]]) -> dict[str, Any]:
|
| 315 |
+
return _analyze_dashboard_quest_batch_with_device(project_rows, device="cpu")
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
def _analyze_dashboard_quest_batch(project_rows: list[dict[str, Any]], *, compute: str) -> dict[str, Any]:
|
| 319 |
+
if compute == "gpu":
|
| 320 |
+
return _analyze_dashboard_quest_batch_gpu(project_rows)
|
| 321 |
+
return _analyze_dashboard_quest_batch_cpu(project_rows)
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
def _analyze_dashboard_quest_batch_with_device(project_rows: list[dict[str, Any]], *, device: str) -> dict[str, Any]:
|
| 325 |
projects = [Project.from_dict(item) for item in project_rows]
|
| 326 |
+
analyzer = create_quest_analyzer(device=device)
|
| 327 |
matches = analyzer.analyze(projects)
|
| 328 |
source = getattr(analyzer, "source", "quest-analyzer")
|
| 329 |
validated = validate_matches_by_project(matches, projects, source=source)
|
|
|
|
| 345 |
|
| 346 |
def _refresh_public_state() -> dict[str, Any]:
|
| 347 |
with _refresh_lock:
|
| 348 |
+
state = dict(_refresh_state)
|
| 349 |
+
state["quest_cache"] = dict(_refresh_state.get("quest_cache") or _empty_quest_cache_progress())
|
| 350 |
+
return state
|
| 351 |
|
| 352 |
|
| 353 |
def _set_refresh_state(**updates: Any) -> None:
|
| 354 |
with _refresh_lock:
|
| 355 |
+
if "quest_cache" in updates:
|
| 356 |
+
updates["quest_cache"] = dict(updates["quest_cache"])
|
| 357 |
_refresh_state.update(updates)
|
| 358 |
stage = str(_refresh_state.get("stage") or "")
|
| 359 |
_refresh_state["stage_label"] = REFRESH_STAGE_LABELS.get(stage, "")
|
| 360 |
|
| 361 |
|
| 362 |
+
def _set_quest_cache_progress(**updates: Any) -> None:
|
| 363 |
+
with _refresh_lock:
|
| 364 |
+
progress = dict(_refresh_state.get("quest_cache") or _empty_quest_cache_progress())
|
| 365 |
+
progress.update(updates)
|
| 366 |
+
_refresh_state["quest_cache"] = progress
|
| 367 |
+
|
| 368 |
+
|
| 369 |
+
def _normalize_refresh_compute(value: Any) -> str:
|
| 370 |
+
compute = str(value or "").strip().lower() or DEFAULT_REFRESH_COMPUTE
|
| 371 |
+
if compute not in {"cpu", "gpu"}:
|
| 372 |
+
raise HTTPException(status_code=400, detail="Dashboard refresh compute must be 'cpu' or 'gpu'.")
|
| 373 |
+
return compute
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
def _default_refresh_compute() -> str:
|
| 377 |
+
return _normalize_refresh_compute(os.environ.get("ADVISOR_REFRESH_COMPUTE", DEFAULT_REFRESH_COMPUTE))
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def _refresh_lock_ttl_seconds() -> int:
|
| 381 |
+
raw = os.environ.get("ADVISOR_REFRESH_LOCK_TTL_SECONDS", "").strip()
|
| 382 |
+
if not raw:
|
| 383 |
+
return DEFAULT_REFRESH_LOCK_TTL_SECONDS
|
| 384 |
+
ttl = int(raw)
|
| 385 |
+
if ttl <= 0:
|
| 386 |
+
raise RuntimeError("ADVISOR_REFRESH_LOCK_TTL_SECONDS must be a positive integer.")
|
| 387 |
+
return ttl
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
def _refresh_lock_path(cache_dir: Path) -> Path:
|
| 391 |
+
return cache_dir / REFRESH_LOCK_FILENAME
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def _acquire_refresh_lease(cache_dir: Path, *, run_id: str, compute: str, reason: str) -> None:
|
| 395 |
+
lock_path = _refresh_lock_path(cache_dir)
|
| 396 |
+
now = time.time()
|
| 397 |
+
lease = {
|
| 398 |
+
"schema_version": 1,
|
| 399 |
+
"run_id": run_id,
|
| 400 |
+
"compute": compute,
|
| 401 |
+
"reason": reason,
|
| 402 |
+
"owner": _refresh_owner(),
|
| 403 |
+
"started_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 404 |
+
"expires_at_epoch": now + _refresh_lock_ttl_seconds(),
|
| 405 |
+
}
|
| 406 |
+
while True:
|
| 407 |
+
try:
|
| 408 |
+
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o644)
|
| 409 |
+
except FileExistsError as error:
|
| 410 |
+
existing = _read_refresh_lease(lock_path)
|
| 411 |
+
if existing is None or _refresh_lease_expired(existing):
|
| 412 |
+
run_label = str((existing or {}).get("run_id") or "unknown")
|
| 413 |
+
print(f"[dashboard-refresh] removing stale refresh lock run={run_label}", flush=True)
|
| 414 |
+
try:
|
| 415 |
+
lock_path.unlink()
|
| 416 |
+
except FileNotFoundError:
|
| 417 |
+
pass
|
| 418 |
+
except OSError as unlink_error:
|
| 419 |
+
raise HTTPException(
|
| 420 |
+
status_code=409,
|
| 421 |
+
detail=f"Dashboard refresh lock exists and could not be removed: {unlink_error}",
|
| 422 |
+
) from unlink_error
|
| 423 |
+
continue
|
| 424 |
+
raise HTTPException(
|
| 425 |
+
status_code=409,
|
| 426 |
+
detail=(
|
| 427 |
+
"Dashboard refresh is already running "
|
| 428 |
+
f"(run {existing.get('run_id', 'unknown')}, owner {existing.get('owner', 'unknown')})."
|
| 429 |
+
),
|
| 430 |
+
) from error
|
| 431 |
+
with os.fdopen(fd, "w", encoding="utf-8") as handle:
|
| 432 |
+
handle.write(json.dumps(lease, ensure_ascii=False) + "\n")
|
| 433 |
+
print(
|
| 434 |
+
f"[dashboard-refresh] acquired refresh lock run={run_id} compute={compute} reason={reason}",
|
| 435 |
+
flush=True,
|
| 436 |
+
)
|
| 437 |
+
return
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
def _release_refresh_lease(cache_dir: Path, run_id: str) -> None:
|
| 441 |
+
lock_path = _refresh_lock_path(cache_dir)
|
| 442 |
+
existing = _read_refresh_lease(lock_path)
|
| 443 |
+
if existing is None:
|
| 444 |
+
return
|
| 445 |
+
if str(existing.get("run_id") or "") != run_id:
|
| 446 |
+
print(
|
| 447 |
+
f"[dashboard-refresh] refresh lock belongs to {existing.get('run_id', 'unknown')}; "
|
| 448 |
+
f"not releasing run={run_id}",
|
| 449 |
+
flush=True,
|
| 450 |
+
)
|
| 451 |
+
return
|
| 452 |
+
try:
|
| 453 |
+
lock_path.unlink()
|
| 454 |
+
except FileNotFoundError:
|
| 455 |
+
return
|
| 456 |
+
print(f"[dashboard-refresh] released refresh lock run={run_id}", flush=True)
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
def _read_refresh_lease(lock_path: Path) -> dict[str, Any] | None:
|
| 460 |
+
try:
|
| 461 |
+
payload = json.loads(lock_path.read_text(encoding="utf-8"))
|
| 462 |
+
except FileNotFoundError:
|
| 463 |
+
return None
|
| 464 |
+
except (OSError, json.JSONDecodeError):
|
| 465 |
+
return None
|
| 466 |
+
return payload if isinstance(payload, dict) else None
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
def _refresh_lease_expired(lease: dict[str, Any]) -> bool:
|
| 470 |
+
try:
|
| 471 |
+
expires_at = float(lease.get("expires_at_epoch"))
|
| 472 |
+
except (TypeError, ValueError):
|
| 473 |
+
return True
|
| 474 |
+
return expires_at <= time.time()
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
def _refresh_owner() -> str:
|
| 478 |
+
node = getattr(os, "uname", lambda: None)()
|
| 479 |
+
host = getattr(node, "nodename", "") if node is not None else ""
|
| 480 |
+
return f"{host or 'process'}:{os.getpid()}"
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
def _start_refresh_thread(cache_dir: Path, *, compute: str, reason: str) -> dict[str, Any]:
|
| 484 |
+
compute = _normalize_refresh_compute(compute)
|
| 485 |
with _refresh_lock:
|
| 486 |
if _refresh_state.get("status") == "running":
|
| 487 |
raise HTTPException(status_code=409, detail="Dashboard refresh is already running.")
|
| 488 |
run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + "-" + uuid4().hex[:8]
|
| 489 |
+
_acquire_refresh_lease(cache_dir, run_id=run_id, compute=compute, reason=reason)
|
| 490 |
_refresh_state.update(
|
| 491 |
{
|
| 492 |
"status": "running",
|
| 493 |
"run_id": run_id,
|
| 494 |
+
"compute": compute,
|
| 495 |
+
"reason": reason,
|
| 496 |
"stage": "crawling",
|
| 497 |
"stage_label": REFRESH_STAGE_LABELS["crawling"],
|
| 498 |
"started_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 499 |
"finished_at": "",
|
| 500 |
"error": "",
|
| 501 |
"result": None,
|
| 502 |
+
"quest_cache": _empty_quest_cache_progress(),
|
| 503 |
}
|
| 504 |
)
|
| 505 |
+
thread = Thread(target=_run_refresh_job, args=(run_id, cache_dir, compute), daemon=True)
|
| 506 |
+
try:
|
| 507 |
+
thread.start()
|
| 508 |
+
except Exception:
|
| 509 |
+
_release_refresh_lease(cache_dir, run_id)
|
| 510 |
+
_set_refresh_state(
|
| 511 |
+
status="idle",
|
| 512 |
+
run_id="",
|
| 513 |
+
compute="",
|
| 514 |
+
reason="",
|
| 515 |
+
stage="",
|
| 516 |
+
started_at="",
|
| 517 |
+
finished_at="",
|
| 518 |
+
error="",
|
| 519 |
+
result=None,
|
| 520 |
+
quest_cache=_empty_quest_cache_progress(),
|
| 521 |
+
)
|
| 522 |
+
raise
|
| 523 |
return _refresh_public_state()
|
| 524 |
|
| 525 |
|
| 526 |
+
def _run_refresh_job(run_id: str, cache_dir: Path, compute: str) -> None:
|
| 527 |
try:
|
| 528 |
+
projects_payload, index_payload, refreshed_dashboard, quest_analysis_payload = _build_refresh_payloads(
|
| 529 |
+
run_id,
|
| 530 |
+
cache_dir=cache_dir,
|
| 531 |
+
compute=compute,
|
| 532 |
+
)
|
| 533 |
_set_refresh_state(stage="persisting")
|
| 534 |
artifacts = persist_refresh_artifacts(
|
| 535 |
cache_dir,
|
|
|
|
| 537 |
projects_payload=projects_payload,
|
| 538 |
index_payload=index_payload,
|
| 539 |
dashboard_payload=refreshed_dashboard,
|
| 540 |
+
quest_analysis_payload=quest_analysis_payload,
|
| 541 |
)
|
| 542 |
_set_refresh_state(stage="swapping")
|
| 543 |
_replace_runtime_from_files(artifacts.projects_path, artifacts.index_path, artifacts.dashboard)
|
| 544 |
+
_release_refresh_lease(cache_dir, run_id)
|
| 545 |
_set_refresh_state(
|
| 546 |
status="succeeded",
|
| 547 |
stage="",
|
|
|
|
| 551 |
"project_count": refreshed_dashboard["project_count"],
|
| 552 |
"snapshot_digest": refreshed_dashboard["provenance"]["snapshot_digest"],
|
| 553 |
"dashboard_generated_at": refreshed_dashboard["generated_at"],
|
| 554 |
+
"quest_cache": dict(quest_analysis_payload.get("summary") or {}),
|
| 555 |
},
|
| 556 |
)
|
| 557 |
except Exception as error: # noqa: BLE001 - background job must report every failure as state
|
| 558 |
print("[dashboard-refresh] failed", flush=True)
|
| 559 |
traceback.print_exception(type(error), error, error.__traceback__)
|
| 560 |
+
_release_refresh_lease(cache_dir, run_id)
|
| 561 |
_set_refresh_state(
|
| 562 |
status="failed",
|
| 563 |
stage="",
|
|
|
|
| 565 |
error=_format_refresh_error(error),
|
| 566 |
result=None,
|
| 567 |
)
|
| 568 |
+
finally:
|
| 569 |
+
_release_refresh_lease(cache_dir, run_id)
|
| 570 |
|
| 571 |
|
| 572 |
+
def _build_refresh_payloads(
|
| 573 |
+
run_id: str,
|
| 574 |
+
*,
|
| 575 |
+
cache_dir: Path,
|
| 576 |
+
compute: str,
|
| 577 |
+
) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any]]:
|
| 578 |
from scripts.crawl_hf_spaces import API, crawl_projects
|
| 579 |
|
| 580 |
org = os.environ.get("ADVISOR_HF_ORG", DEFAULT_HF_ORG).strip() or DEFAULT_HF_ORG
|
|
|
|
| 608 |
)
|
| 609 |
|
| 610 |
_set_refresh_state(stage="quest_analysis")
|
| 611 |
+
quest_analysis = _analyze_dashboard_quests(
|
| 612 |
+
[project.to_refresh_snapshot_dict() for project in projects],
|
| 613 |
+
cache_dir=cache_dir,
|
| 614 |
+
compute=compute,
|
| 615 |
+
run_id=run_id,
|
| 616 |
+
)
|
| 617 |
_set_refresh_state(stage="atlas")
|
| 618 |
refreshed_dashboard = build_dashboard_payload(
|
| 619 |
refreshed_index,
|
| 620 |
quest_matches=quest_analysis["matches_by_project"],
|
| 621 |
quest_source=str(quest_analysis["source"]),
|
| 622 |
)
|
| 623 |
+
return projects_payload, index_payload, refreshed_dashboard, quest_analysis["quest_analysis_payload"]
|
| 624 |
|
| 625 |
|
| 626 |
def _build_refresh_index_payload(
|
|
|
|
| 880 |
|
| 881 |
|
| 882 |
@app.post("/api/dashboard/refresh")
|
| 883 |
+
def dashboard_refresh_start(payload: dict[str, Any] | None = None) -> JSONResponse:
|
| 884 |
try:
|
| 885 |
cache_dir = require_writable_cache_dir()
|
| 886 |
except DashboardStorageError as error:
|
| 887 |
raise HTTPException(status_code=400, detail=str(error)) from error
|
| 888 |
+
compute = _refresh_compute_from_payload(payload)
|
| 889 |
+
return JSONResponse(_start_refresh_thread(cache_dir, compute=compute, reason="manual"), status_code=202)
|
| 890 |
|
| 891 |
|
| 892 |
@app.get("/api/dashboard/refresh")
|
|
|
|
| 894 |
return _refresh_public_state()
|
| 895 |
|
| 896 |
|
| 897 |
+
def _refresh_compute_from_payload(payload: dict[str, Any] | None) -> str:
|
| 898 |
+
payload = payload or {}
|
| 899 |
+
return _normalize_refresh_compute(payload.get("compute") or _default_refresh_compute())
|
| 900 |
+
|
| 901 |
+
|
| 902 |
+
def _start_scheduled_refresh_loop() -> None:
|
| 903 |
+
global _scheduler_started
|
| 904 |
+
if not _scheduled_refresh_enabled():
|
| 905 |
+
return
|
| 906 |
+
with _scheduler_lock:
|
| 907 |
+
if _scheduler_started:
|
| 908 |
+
return
|
| 909 |
+
_scheduler_started = True
|
| 910 |
+
interval = _scheduled_refresh_interval_seconds()
|
| 911 |
+
initial_delay = _scheduled_refresh_initial_delay_seconds()
|
| 912 |
+
compute = _scheduled_refresh_compute()
|
| 913 |
+
print(
|
| 914 |
+
"[dashboard-refresh scheduler] enabled "
|
| 915 |
+
f"interval={interval}s initial_delay={initial_delay}s compute={compute}",
|
| 916 |
+
flush=True,
|
| 917 |
+
)
|
| 918 |
+
Thread(
|
| 919 |
+
target=_scheduled_refresh_loop,
|
| 920 |
+
args=(interval, initial_delay),
|
| 921 |
+
daemon=True,
|
| 922 |
+
name="dashboard-refresh-scheduler",
|
| 923 |
+
).start()
|
| 924 |
+
|
| 925 |
+
|
| 926 |
+
def _scheduled_refresh_enabled() -> bool:
|
| 927 |
+
disabled = os.environ.get("ADVISOR_DISABLE_SCHEDULED_REFRESH", "").strip().lower()
|
| 928 |
+
if disabled in {"1", "true", "yes", "on"}:
|
| 929 |
+
return False
|
| 930 |
+
raw = os.environ.get("ADVISOR_SCHEDULED_REFRESH", "").strip().lower()
|
| 931 |
+
if raw:
|
| 932 |
+
return raw in {"1", "true", "yes", "on"}
|
| 933 |
+
return cache_dir_from_env() is not None
|
| 934 |
+
|
| 935 |
+
|
| 936 |
+
def _scheduled_refresh_interval_seconds() -> int:
|
| 937 |
+
raw = (
|
| 938 |
+
os.environ.get("ADVISOR_REFRESH_INTERVAL_SECONDS", "").strip()
|
| 939 |
+
or os.environ.get("ADVISOR_SCHEDULED_REFRESH_INTERVAL_SECONDS", "").strip()
|
| 940 |
+
)
|
| 941 |
+
if not raw:
|
| 942 |
+
return DEFAULT_SCHEDULED_REFRESH_INTERVAL_SECONDS
|
| 943 |
+
interval = int(raw)
|
| 944 |
+
if interval <= 0:
|
| 945 |
+
raise RuntimeError("ADVISOR_REFRESH_INTERVAL_SECONDS must be a positive integer.")
|
| 946 |
+
return interval
|
| 947 |
+
|
| 948 |
+
|
| 949 |
+
def _scheduled_refresh_initial_delay_seconds() -> int:
|
| 950 |
+
raw = os.environ.get("ADVISOR_REFRESH_INITIAL_DELAY_SECONDS", "").strip()
|
| 951 |
+
if not raw:
|
| 952 |
+
return DEFAULT_SCHEDULED_REFRESH_INITIAL_DELAY_SECONDS
|
| 953 |
+
delay = int(raw)
|
| 954 |
+
if delay < 0:
|
| 955 |
+
raise RuntimeError("ADVISOR_REFRESH_INITIAL_DELAY_SECONDS must not be negative.")
|
| 956 |
+
return delay
|
| 957 |
+
|
| 958 |
+
|
| 959 |
+
def _scheduled_refresh_compute() -> str:
|
| 960 |
+
return _normalize_refresh_compute(
|
| 961 |
+
os.environ.get("ADVISOR_SCHEDULED_REFRESH_COMPUTE", "").strip() or _default_refresh_compute()
|
| 962 |
+
)
|
| 963 |
+
|
| 964 |
+
|
| 965 |
+
def _scheduled_refresh_loop(interval_seconds: int, initial_delay_seconds: int) -> None:
|
| 966 |
+
if initial_delay_seconds:
|
| 967 |
+
time.sleep(initial_delay_seconds)
|
| 968 |
+
while True:
|
| 969 |
+
_run_scheduled_refresh_once()
|
| 970 |
+
time.sleep(interval_seconds)
|
| 971 |
+
|
| 972 |
+
|
| 973 |
+
def _run_scheduled_refresh_once() -> None:
|
| 974 |
+
try:
|
| 975 |
+
cache_dir = require_writable_cache_dir()
|
| 976 |
+
state = _start_refresh_thread(
|
| 977 |
+
cache_dir,
|
| 978 |
+
compute=_scheduled_refresh_compute(),
|
| 979 |
+
reason="scheduled",
|
| 980 |
+
)
|
| 981 |
+
print(
|
| 982 |
+
f"[dashboard-refresh scheduler] started run={state.get('run_id', '')} "
|
| 983 |
+
f"compute={state.get('compute', '')}",
|
| 984 |
+
flush=True,
|
| 985 |
+
)
|
| 986 |
+
except HTTPException as error:
|
| 987 |
+
if error.status_code == 409:
|
| 988 |
+
print(f"[dashboard-refresh scheduler] skipped: {error.detail}", flush=True)
|
| 989 |
+
return
|
| 990 |
+
print(f"[dashboard-refresh scheduler] failed to start: {error.detail}", flush=True)
|
| 991 |
+
except Exception as error: # noqa: BLE001 - scheduler must keep running after transient failures
|
| 992 |
+
print(f"[dashboard-refresh scheduler] failed to start: {_format_refresh_error(error)}", flush=True)
|
| 993 |
+
|
| 994 |
+
|
| 995 |
@app.get("/health")
|
| 996 |
def health() -> dict:
|
| 997 |
return {
|
|
|
|
| 1235 |
yield from _agent_turn_events(message, session_json, _normalize_compute(compute))
|
| 1236 |
|
| 1237 |
|
| 1238 |
+
_start_scheduled_refresh_loop()
|
| 1239 |
+
|
| 1240 |
+
|
| 1241 |
if __name__ == "__main__":
|
| 1242 |
app.launch(
|
| 1243 |
server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
|
hackathon_advisor/dashboard_storage.py
CHANGED
|
@@ -27,6 +27,7 @@ class DashboardArtifacts:
|
|
| 27 |
manifest_path: Path
|
| 28 |
dashboard: dict[str, Any]
|
| 29 |
manifest: dict[str, Any]
|
|
|
|
| 30 |
|
| 31 |
|
| 32 |
def cache_dir_from_env(env: dict[str, str] | None = None) -> Path | None:
|
|
@@ -88,6 +89,7 @@ def persist_refresh_artifacts(
|
|
| 88 |
projects_payload: dict[str, Any],
|
| 89 |
index_payload: dict[str, Any],
|
| 90 |
dashboard_payload: dict[str, Any],
|
|
|
|
| 91 |
) -> DashboardArtifacts:
|
| 92 |
validate_dashboard_payload(dashboard_payload)
|
| 93 |
relative_run_dir = Path("runs") / run_id
|
|
@@ -97,21 +99,27 @@ def persist_refresh_artifacts(
|
|
| 97 |
projects_path = run_dir / "projects.json"
|
| 98 |
index_path = run_dir / "project_index.json"
|
| 99 |
dashboard_path = run_dir / "dashboard.json"
|
|
|
|
| 100 |
manifest_path = run_dir / "manifest.json"
|
| 101 |
_write_json(projects_path, projects_payload)
|
| 102 |
_write_json(index_path, index_payload)
|
| 103 |
_write_json(dashboard_path, dashboard_payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
manifest = {
|
| 105 |
"schema_version": STORAGE_SCHEMA_VERSION,
|
| 106 |
"run_id": run_id,
|
| 107 |
"generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 108 |
"project_count": dashboard_payload["project_count"],
|
| 109 |
"snapshot_digest": dashboard_payload["provenance"]["snapshot_digest"],
|
| 110 |
-
"artifacts":
|
| 111 |
-
"projects": _relative(cache_dir, projects_path),
|
| 112 |
-
"index": _relative(cache_dir, index_path),
|
| 113 |
-
"dashboard": _relative(cache_dir, dashboard_path),
|
| 114 |
-
},
|
| 115 |
}
|
| 116 |
_write_json(manifest_path, manifest)
|
| 117 |
|
|
@@ -124,6 +132,8 @@ def persist_refresh_artifacts(
|
|
| 124 |
"dashboard": _relative(cache_dir, dashboard_path),
|
| 125 |
"manifest": _relative(cache_dir, manifest_path),
|
| 126 |
}
|
|
|
|
|
|
|
| 127 |
latest_path = cache_dir / LATEST_FILENAME
|
| 128 |
tmp_path = cache_dir / f".{LATEST_FILENAME}.{run_id}.tmp"
|
| 129 |
_write_json(tmp_path, latest)
|
|
@@ -135,6 +145,7 @@ def persist_refresh_artifacts(
|
|
| 135 |
manifest_path=manifest_path,
|
| 136 |
dashboard=dashboard_payload,
|
| 137 |
manifest=manifest,
|
|
|
|
| 138 |
)
|
| 139 |
|
| 140 |
|
|
|
|
| 27 |
manifest_path: Path
|
| 28 |
dashboard: dict[str, Any]
|
| 29 |
manifest: dict[str, Any]
|
| 30 |
+
quest_analysis_path: Path | None = None
|
| 31 |
|
| 32 |
|
| 33 |
def cache_dir_from_env(env: dict[str, str] | None = None) -> Path | None:
|
|
|
|
| 89 |
projects_payload: dict[str, Any],
|
| 90 |
index_payload: dict[str, Any],
|
| 91 |
dashboard_payload: dict[str, Any],
|
| 92 |
+
quest_analysis_payload: dict[str, Any] | None = None,
|
| 93 |
) -> DashboardArtifacts:
|
| 94 |
validate_dashboard_payload(dashboard_payload)
|
| 95 |
relative_run_dir = Path("runs") / run_id
|
|
|
|
| 99 |
projects_path = run_dir / "projects.json"
|
| 100 |
index_path = run_dir / "project_index.json"
|
| 101 |
dashboard_path = run_dir / "dashboard.json"
|
| 102 |
+
quest_analysis_path = run_dir / "quest_analysis.json" if quest_analysis_payload is not None else None
|
| 103 |
manifest_path = run_dir / "manifest.json"
|
| 104 |
_write_json(projects_path, projects_payload)
|
| 105 |
_write_json(index_path, index_payload)
|
| 106 |
_write_json(dashboard_path, dashboard_payload)
|
| 107 |
+
if quest_analysis_path is not None:
|
| 108 |
+
_write_json(quest_analysis_path, quest_analysis_payload)
|
| 109 |
+
artifact_paths = {
|
| 110 |
+
"projects": _relative(cache_dir, projects_path),
|
| 111 |
+
"index": _relative(cache_dir, index_path),
|
| 112 |
+
"dashboard": _relative(cache_dir, dashboard_path),
|
| 113 |
+
}
|
| 114 |
+
if quest_analysis_path is not None:
|
| 115 |
+
artifact_paths["quest_analysis"] = _relative(cache_dir, quest_analysis_path)
|
| 116 |
manifest = {
|
| 117 |
"schema_version": STORAGE_SCHEMA_VERSION,
|
| 118 |
"run_id": run_id,
|
| 119 |
"generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 120 |
"project_count": dashboard_payload["project_count"],
|
| 121 |
"snapshot_digest": dashboard_payload["provenance"]["snapshot_digest"],
|
| 122 |
+
"artifacts": artifact_paths,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
}
|
| 124 |
_write_json(manifest_path, manifest)
|
| 125 |
|
|
|
|
| 132 |
"dashboard": _relative(cache_dir, dashboard_path),
|
| 133 |
"manifest": _relative(cache_dir, manifest_path),
|
| 134 |
}
|
| 135 |
+
if quest_analysis_path is not None:
|
| 136 |
+
latest["quest_analysis"] = _relative(cache_dir, quest_analysis_path)
|
| 137 |
latest_path = cache_dir / LATEST_FILENAME
|
| 138 |
tmp_path = cache_dir / f".{LATEST_FILENAME}.{run_id}.tmp"
|
| 139 |
_write_json(tmp_path, latest)
|
|
|
|
| 145 |
manifest_path=manifest_path,
|
| 146 |
dashboard=dashboard_payload,
|
| 147 |
manifest=manifest,
|
| 148 |
+
quest_analysis_path=quest_analysis_path,
|
| 149 |
)
|
| 150 |
|
| 151 |
|
hackathon_advisor/quest_cache.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from collections.abc import Mapping, Sequence
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from datetime import datetime, timezone
|
| 6 |
+
from hashlib import sha256
|
| 7 |
+
import json
|
| 8 |
+
import os
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any
|
| 11 |
+
from uuid import uuid4
|
| 12 |
+
|
| 13 |
+
from hackathon_advisor.data import Project
|
| 14 |
+
from hackathon_advisor.model_runtime import DEFAULT_MODEL_ID
|
| 15 |
+
from hackathon_advisor.quest_analysis import (
|
| 16 |
+
DEFAULT_QUEST_ADAPTER_ID,
|
| 17 |
+
DEFAULT_QUEST_ADAPTER_REVISION,
|
| 18 |
+
MAX_QUEST_TOKENS,
|
| 19 |
+
QuestAnalysisError,
|
| 20 |
+
render_project_quest_prompt,
|
| 21 |
+
validate_matches_by_project,
|
| 22 |
+
)
|
| 23 |
+
from hackathon_advisor.quest_taxonomy import (
|
| 24 |
+
APP_PROMPT_CHAR_LIMIT,
|
| 25 |
+
QUEST_PROFILES,
|
| 26 |
+
QUEST_SYSTEM_PROMPT,
|
| 27 |
+
README_PROMPT_CHAR_LIMIT,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
QUEST_CACHE_SCHEMA_VERSION = 1
|
| 32 |
+
QUEST_CACHE_ROOT = Path("quest-cache") / "v1"
|
| 33 |
+
QUEST_PROMPT_VERSION = "quest-prompt-v1"
|
| 34 |
+
QUEST_ANALYZER_SOURCE = "minicpm-json-quest-analyzer"
|
| 35 |
+
QUEST_GENERATION_CONFIG = {
|
| 36 |
+
"enable_thinking": False,
|
| 37 |
+
"temperature": 0.0,
|
| 38 |
+
"do_sample": False,
|
| 39 |
+
"max_new_tokens": MAX_QUEST_TOKENS,
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
@dataclass(frozen=True)
|
| 44 |
+
class QuestCacheIdentity:
|
| 45 |
+
project_id: str
|
| 46 |
+
prompt_hash: str
|
| 47 |
+
taxonomy_hash: str
|
| 48 |
+
analyzer_fingerprint: dict[str, Any]
|
| 49 |
+
cache_key: str
|
| 50 |
+
|
| 51 |
+
def to_dict(self) -> dict[str, Any]:
|
| 52 |
+
return {
|
| 53 |
+
"project_id": self.project_id,
|
| 54 |
+
"prompt_hash": self.prompt_hash,
|
| 55 |
+
"taxonomy_hash": self.taxonomy_hash,
|
| 56 |
+
"analyzer_fingerprint": self.analyzer_fingerprint,
|
| 57 |
+
"cache_key": self.cache_key,
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@dataclass(frozen=True)
|
| 62 |
+
class QuestCacheEntry:
|
| 63 |
+
identity: QuestCacheIdentity
|
| 64 |
+
matches: list[dict[str, Any]]
|
| 65 |
+
source: str
|
| 66 |
+
path: Path
|
| 67 |
+
generated_at: str
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
@dataclass(frozen=True)
|
| 71 |
+
class QuestCacheLookup:
|
| 72 |
+
identity: QuestCacheIdentity
|
| 73 |
+
entry: QuestCacheEntry | None
|
| 74 |
+
reason: str
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def quest_analyzer_fingerprint_from_env(env: Mapping[str, str] | None = None) -> dict[str, Any]:
|
| 78 |
+
values = env or os.environ
|
| 79 |
+
model_id = _first_env(values, "ADVISOR_QUEST_MODEL_ID", "ADVISOR_MODEL_ID") or DEFAULT_MODEL_ID
|
| 80 |
+
adapter_id = values.get("ADVISOR_QUEST_ADAPTER_ID", DEFAULT_QUEST_ADAPTER_ID).strip()
|
| 81 |
+
adapter_revision = values.get("ADVISOR_QUEST_ADAPTER_REVISION", DEFAULT_QUEST_ADAPTER_REVISION).strip()
|
| 82 |
+
return {
|
| 83 |
+
"source": QUEST_ANALYZER_SOURCE,
|
| 84 |
+
"model_id": model_id,
|
| 85 |
+
"adapter_id": adapter_id,
|
| 86 |
+
"adapter_revision": adapter_revision,
|
| 87 |
+
"adapter_digest": _local_artifact_digest(adapter_id),
|
| 88 |
+
"prompt_version": QUEST_PROMPT_VERSION,
|
| 89 |
+
"generation": dict(QUEST_GENERATION_CONFIG),
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def quest_taxonomy_hash() -> str:
|
| 94 |
+
payload = {
|
| 95 |
+
"system_prompt": QUEST_SYSTEM_PROMPT,
|
| 96 |
+
"quest_profiles": list(QUEST_PROFILES),
|
| 97 |
+
"readme_prompt_char_limit": README_PROMPT_CHAR_LIMIT,
|
| 98 |
+
"app_prompt_char_limit": APP_PROMPT_CHAR_LIMIT,
|
| 99 |
+
"prompt_version": QUEST_PROMPT_VERSION,
|
| 100 |
+
}
|
| 101 |
+
return sha256(_canonical_json(payload).encode("utf-8")).hexdigest()
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def build_quest_cache_identity(
|
| 105 |
+
project: Project,
|
| 106 |
+
analyzer_fingerprint: Mapping[str, Any],
|
| 107 |
+
) -> QuestCacheIdentity:
|
| 108 |
+
prompt_hash = sha256(render_project_quest_prompt(project).encode("utf-8")).hexdigest()
|
| 109 |
+
taxonomy_hash = quest_taxonomy_hash()
|
| 110 |
+
canonical_fingerprint = json.loads(_canonical_json(analyzer_fingerprint))
|
| 111 |
+
key_payload = {
|
| 112 |
+
"schema_version": QUEST_CACHE_SCHEMA_VERSION,
|
| 113 |
+
"project_id": project.id,
|
| 114 |
+
"prompt_hash": prompt_hash,
|
| 115 |
+
"taxonomy_hash": taxonomy_hash,
|
| 116 |
+
"analyzer_fingerprint": canonical_fingerprint,
|
| 117 |
+
}
|
| 118 |
+
cache_key = sha256(_canonical_json(key_payload).encode("utf-8")).hexdigest()
|
| 119 |
+
return QuestCacheIdentity(
|
| 120 |
+
project_id=project.id,
|
| 121 |
+
prompt_hash=prompt_hash,
|
| 122 |
+
taxonomy_hash=taxonomy_hash,
|
| 123 |
+
analyzer_fingerprint=canonical_fingerprint,
|
| 124 |
+
cache_key=cache_key,
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def quest_cache_path(cache_dir: Path, cache_key: str) -> Path:
|
| 129 |
+
return cache_dir / QUEST_CACHE_ROOT / cache_key[:2] / f"{cache_key}.json"
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def read_quest_cache_entry(
|
| 133 |
+
cache_dir: Path,
|
| 134 |
+
project: Project,
|
| 135 |
+
analyzer_fingerprint: Mapping[str, Any],
|
| 136 |
+
) -> QuestCacheLookup:
|
| 137 |
+
identity = build_quest_cache_identity(project, analyzer_fingerprint)
|
| 138 |
+
path = quest_cache_path(cache_dir, identity.cache_key)
|
| 139 |
+
if not path.is_file():
|
| 140 |
+
return QuestCacheLookup(identity=identity, entry=None, reason="absent")
|
| 141 |
+
try:
|
| 142 |
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
| 143 |
+
except (OSError, json.JSONDecodeError) as error:
|
| 144 |
+
return QuestCacheLookup(identity=identity, entry=None, reason=f"invalid_json:{error}")
|
| 145 |
+
if not isinstance(payload, dict):
|
| 146 |
+
return QuestCacheLookup(identity=identity, entry=None, reason="invalid_payload")
|
| 147 |
+
try:
|
| 148 |
+
entry = _validate_cache_payload(payload, project, identity, path)
|
| 149 |
+
except QuestAnalysisError as error:
|
| 150 |
+
return QuestCacheLookup(identity=identity, entry=None, reason=f"invalid_schema:{error}")
|
| 151 |
+
return QuestCacheLookup(identity=identity, entry=entry, reason="hit")
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def write_quest_cache_entry(
|
| 155 |
+
cache_dir: Path,
|
| 156 |
+
project: Project,
|
| 157 |
+
analyzer_fingerprint: Mapping[str, Any],
|
| 158 |
+
matches: Sequence[Mapping[str, Any]],
|
| 159 |
+
*,
|
| 160 |
+
source: str,
|
| 161 |
+
) -> QuestCacheEntry:
|
| 162 |
+
identity = build_quest_cache_identity(project, analyzer_fingerprint)
|
| 163 |
+
validated = validate_matches_by_project({project.id: list(matches)}, [project], source=source)
|
| 164 |
+
generated_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
| 165 |
+
payload = {
|
| 166 |
+
"schema_version": QUEST_CACHE_SCHEMA_VERSION,
|
| 167 |
+
"generated_at": generated_at,
|
| 168 |
+
"source": validated.source,
|
| 169 |
+
**identity.to_dict(),
|
| 170 |
+
"matches": validated.matches_by_project[project.id],
|
| 171 |
+
}
|
| 172 |
+
path = quest_cache_path(cache_dir, identity.cache_key)
|
| 173 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 174 |
+
tmp_path = path.parent / f".{path.name}.{uuid4().hex}.tmp"
|
| 175 |
+
tmp_path.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
|
| 176 |
+
os.replace(tmp_path, path)
|
| 177 |
+
return QuestCacheEntry(
|
| 178 |
+
identity=identity,
|
| 179 |
+
matches=validated.matches_by_project[project.id],
|
| 180 |
+
source=validated.source,
|
| 181 |
+
path=path,
|
| 182 |
+
generated_at=generated_at,
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def quest_cache_run_record(
|
| 187 |
+
*,
|
| 188 |
+
project: Project,
|
| 189 |
+
identity: QuestCacheIdentity,
|
| 190 |
+
matches: Sequence[Mapping[str, Any]],
|
| 191 |
+
status: str,
|
| 192 |
+
source: str,
|
| 193 |
+
path: Path | None = None,
|
| 194 |
+
) -> dict[str, Any]:
|
| 195 |
+
return {
|
| 196 |
+
"project_id": project.id,
|
| 197 |
+
"cache_key": identity.cache_key,
|
| 198 |
+
"prompt_hash": identity.prompt_hash,
|
| 199 |
+
"taxonomy_hash": identity.taxonomy_hash,
|
| 200 |
+
"status": status,
|
| 201 |
+
"source": source,
|
| 202 |
+
"cache_path": path.as_posix() if path is not None else "",
|
| 203 |
+
"matches": [dict(match) for match in matches],
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def build_quest_analysis_run_payload(
|
| 208 |
+
*,
|
| 209 |
+
run_id: str,
|
| 210 |
+
analyzer_fingerprint: Mapping[str, Any],
|
| 211 |
+
summary: Mapping[str, Any],
|
| 212 |
+
project_records: Sequence[Mapping[str, Any]],
|
| 213 |
+
) -> dict[str, Any]:
|
| 214 |
+
return {
|
| 215 |
+
"schema_version": QUEST_CACHE_SCHEMA_VERSION,
|
| 216 |
+
"run_id": run_id,
|
| 217 |
+
"generated_at": datetime.now(timezone.utc).isoformat(timespec="seconds"),
|
| 218 |
+
"source": QUEST_ANALYZER_SOURCE,
|
| 219 |
+
"analyzer_fingerprint": json.loads(_canonical_json(analyzer_fingerprint)),
|
| 220 |
+
"taxonomy_hash": quest_taxonomy_hash(),
|
| 221 |
+
"summary": dict(summary),
|
| 222 |
+
"projects": [dict(record) for record in project_records],
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def _validate_cache_payload(
|
| 227 |
+
payload: Mapping[str, Any],
|
| 228 |
+
project: Project,
|
| 229 |
+
identity: QuestCacheIdentity,
|
| 230 |
+
path: Path,
|
| 231 |
+
) -> QuestCacheEntry:
|
| 232 |
+
if payload.get("schema_version") != QUEST_CACHE_SCHEMA_VERSION:
|
| 233 |
+
raise QuestAnalysisError("unsupported quest cache schema")
|
| 234 |
+
for field, expected in identity.to_dict().items():
|
| 235 |
+
if payload.get(field) != expected:
|
| 236 |
+
raise QuestAnalysisError(f"cache {field} mismatch")
|
| 237 |
+
source = str(payload.get("source") or QUEST_ANALYZER_SOURCE)
|
| 238 |
+
validated = validate_matches_by_project({project.id: payload.get("matches") or []}, [project], source=source)
|
| 239 |
+
generated_at = str(payload.get("generated_at") or "")
|
| 240 |
+
return QuestCacheEntry(
|
| 241 |
+
identity=identity,
|
| 242 |
+
matches=validated.matches_by_project[project.id],
|
| 243 |
+
source=validated.source,
|
| 244 |
+
path=path,
|
| 245 |
+
generated_at=generated_at,
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def _first_env(env: Mapping[str, str], *names: str) -> str:
|
| 250 |
+
for name in names:
|
| 251 |
+
value = env.get(name, "").strip()
|
| 252 |
+
if value:
|
| 253 |
+
return value
|
| 254 |
+
return ""
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def _local_artifact_digest(raw_path: str) -> str:
|
| 258 |
+
if not raw_path:
|
| 259 |
+
return ""
|
| 260 |
+
path = Path(raw_path).expanduser()
|
| 261 |
+
if not path.is_absolute():
|
| 262 |
+
path = (Path.cwd() / path).resolve()
|
| 263 |
+
if not path.exists():
|
| 264 |
+
return ""
|
| 265 |
+
digest = sha256()
|
| 266 |
+
if path.is_file():
|
| 267 |
+
_hash_file_into(digest, path, path.name)
|
| 268 |
+
return digest.hexdigest()
|
| 269 |
+
for file_path in sorted(item for item in path.rglob("*") if item.is_file()):
|
| 270 |
+
_hash_file_into(digest, file_path, file_path.relative_to(path).as_posix())
|
| 271 |
+
return digest.hexdigest()
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def _hash_file_into(digest: Any, file_path: Path, relative_name: str) -> None:
|
| 275 |
+
digest.update(relative_name.encode("utf-8"))
|
| 276 |
+
digest.update(b"\0")
|
| 277 |
+
with file_path.open("rb") as handle:
|
| 278 |
+
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
| 279 |
+
digest.update(chunk)
|
| 280 |
+
digest.update(b"\0")
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def _canonical_json(payload: Any) -> str:
|
| 284 |
+
return json.dumps(payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
|
static/app.js
CHANGED
|
@@ -300,15 +300,28 @@ function renderRefreshState(state) {
|
|
| 300 |
}
|
| 301 |
if (atlasRefreshProgressEl) {
|
| 302 |
const show = status === "running" || status === "failed";
|
|
|
|
| 303 |
atlasRefreshProgressEl.hidden = !show;
|
| 304 |
atlasRefreshProgressEl.textContent =
|
| 305 |
status === "running"
|
| 306 |
-
? `${stage || "Working"} · run ${state.run_id || ""}`
|
| 307 |
: state.error || "";
|
| 308 |
}
|
| 309 |
if (refreshDashboardButton) refreshDashboardButton.disabled = status === "running";
|
| 310 |
}
|
| 311 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
function renderDashboard(data) {
|
| 313 |
if (!data?.points?.length) {
|
| 314 |
handleDashboardError(new Error("empty dashboard payload"));
|
|
|
|
| 300 |
}
|
| 301 |
if (atlasRefreshProgressEl) {
|
| 302 |
const show = status === "running" || status === "failed";
|
| 303 |
+
const cacheCopy = refreshQuestCacheCopy(state?.quest_cache || {});
|
| 304 |
atlasRefreshProgressEl.hidden = !show;
|
| 305 |
atlasRefreshProgressEl.textContent =
|
| 306 |
status === "running"
|
| 307 |
+
? `${stage || "Working"}${cacheCopy ? ` · ${cacheCopy}` : ""} · run ${state.run_id || ""}`
|
| 308 |
: state.error || "";
|
| 309 |
}
|
| 310 |
if (refreshDashboardButton) refreshDashboardButton.disabled = status === "running";
|
| 311 |
}
|
| 312 |
|
| 313 |
+
function refreshQuestCacheCopy(cache) {
|
| 314 |
+
const total = Number(cache.project_count || 0);
|
| 315 |
+
if (!total) return "";
|
| 316 |
+
const hits = Number(cache.hit_count || 0);
|
| 317 |
+
const misses = Number(cache.miss_count || 0);
|
| 318 |
+
const analyzed = Number(cache.analyzed_count || 0);
|
| 319 |
+
const remaining = Number(cache.remaining_count || 0);
|
| 320 |
+
if (!hits && !misses && !analyzed) return "";
|
| 321 |
+
if (remaining > 0) return `${hits} cached, ${analyzed}/${misses} analyzed`;
|
| 322 |
+
return `${hits} cached, ${analyzed} analyzed`;
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
function renderDashboard(data) {
|
| 326 |
if (!data?.points?.length) {
|
| 327 |
handleDashboardError(new Error("empty dashboard payload"));
|
tests/test_app.py
CHANGED
|
@@ -70,12 +70,15 @@ def _reset_refresh_state(status: str = "idle") -> None:
|
|
| 70 |
{
|
| 71 |
"status": status,
|
| 72 |
"run_id": "test-run" if status == "running" else "",
|
|
|
|
|
|
|
| 73 |
"stage": "crawling" if status == "running" else "",
|
| 74 |
"stage_label": "Fetching public Spaces" if status == "running" else "",
|
| 75 |
"started_at": "",
|
| 76 |
"finished_at": "",
|
| 77 |
"error": "",
|
| 78 |
"result": None,
|
|
|
|
| 79 |
}
|
| 80 |
)
|
| 81 |
|
|
@@ -179,6 +182,29 @@ def test_dashboard_refresh_rejects_concurrent_run(monkeypatch, tmp_path) -> None
|
|
| 179 |
_reset_refresh_state()
|
| 180 |
|
| 181 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
def test_dashboard_refresh_embedding_build_runs_in_subprocess(monkeypatch, tmp_path) -> None:
|
| 183 |
project_path = tmp_path / "projects.json"
|
| 184 |
index_path = tmp_path / "project_index.json"
|
|
@@ -237,7 +263,7 @@ def test_dashboard_refresh_persists_and_swaps_latest(monkeypatch, tmp_path) -> N
|
|
| 237 |
monkeypatch.setenv("ADVISOR_CACHE_DIR", str(tmp_path))
|
| 238 |
_reset_refresh_state()
|
| 239 |
|
| 240 |
-
def fake_refresh_payloads(run_id: str) -> tuple[dict, dict, dict]:
|
| 241 |
projects_payload = json.loads(app_module.DATA_PATH.read_text(encoding="utf-8"))
|
| 242 |
index_payload = json.loads(app_module.INDEX_PATH.read_text(encoding="utf-8"))
|
| 243 |
refreshed_index = ProjectIndex.from_files(app_module.DATA_PATH, app_module.INDEX_PATH)
|
|
@@ -245,7 +271,13 @@ def test_dashboard_refresh_persists_and_swaps_latest(monkeypatch, tmp_path) -> N
|
|
| 245 |
refreshed_index,
|
| 246 |
generated_at="2026-06-08T00:00:00+00:00",
|
| 247 |
)
|
| 248 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
monkeypatch.setattr(app_module, "_build_refresh_payloads", fake_refresh_payloads)
|
| 251 |
response = dashboard_refresh_start()
|
|
@@ -254,11 +286,14 @@ def test_dashboard_refresh_persists_and_swaps_latest(monkeypatch, tmp_path) -> N
|
|
| 254 |
state = _wait_for_refresh()
|
| 255 |
assert state["status"] == "succeeded"
|
| 256 |
assert (tmp_path / "latest.json").is_file()
|
|
|
|
|
|
|
|
|
|
| 257 |
assert state["result"]["project_count"] == len(app_module.index.projects)
|
| 258 |
assert dashboard()["provenance"]["snapshot_digest"] == state["result"]["snapshot_digest"]
|
| 259 |
|
| 260 |
|
| 261 |
-
def test_dashboard_refresh_quest_analysis_uses_minicpm_analyzer(monkeypatch) -> None:
|
| 262 |
project = Project(
|
| 263 |
id="build-small-hackathon/minicpm-refresh-smoke",
|
| 264 |
title="MiniCPM Refresh Smoke",
|
|
@@ -301,14 +336,21 @@ def test_dashboard_refresh_quest_analysis_uses_minicpm_analyzer(monkeypatch) ->
|
|
| 301 |
|
| 302 |
monkeypatch.setattr(app_module, "create_quest_analyzer", lambda device: FakeMiniCPMAnalyzer())
|
| 303 |
|
| 304 |
-
result = app_module._analyze_dashboard_quests(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
quests = {match["quest"] for match in result["matches_by_project"][project.id]}
|
| 307 |
assert result["source"] == "minicpm-json-quest-analyzer"
|
| 308 |
assert quests == {"Off the Grid", "Field Notes"}
|
|
|
|
|
|
|
| 309 |
|
| 310 |
|
| 311 |
-
def test_dashboard_refresh_quest_analysis_batches_minicpm(monkeypatch) -> None:
|
| 312 |
projects = [
|
| 313 |
Project(
|
| 314 |
id=f"build-small-hackathon/batched-{index}",
|
|
@@ -341,7 +383,12 @@ def test_dashboard_refresh_quest_analysis_batches_minicpm(monkeypatch) -> None:
|
|
| 341 |
monkeypatch.setenv("ADVISOR_QUEST_ANALYSIS_BATCH_SIZE", "2")
|
| 342 |
monkeypatch.setattr(app_module, "create_quest_analyzer", lambda device: FakeMiniCPMAnalyzer())
|
| 343 |
|
| 344 |
-
result = app_module._analyze_dashboard_quests(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
assert calls == [
|
| 347 |
["build-small-hackathon/batched-0", "build-small-hackathon/batched-1"],
|
|
@@ -350,7 +397,69 @@ def test_dashboard_refresh_quest_analysis_batches_minicpm(monkeypatch) -> None:
|
|
| 350 |
assert set(result["matches_by_project"]) == {project.id for project in projects}
|
| 351 |
|
| 352 |
|
| 353 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
project = Project(
|
| 355 |
id="build-small-hackathon/missing-evidence",
|
| 356 |
title="Missing Evidence",
|
|
@@ -372,7 +481,7 @@ def test_dashboard_refresh_quest_analysis_requires_two_segment_snapshot() -> Non
|
|
| 372 |
del row["readme_body"]
|
| 373 |
|
| 374 |
try:
|
| 375 |
-
app_module._analyze_dashboard_quests([row])
|
| 376 |
except RuntimeError as error:
|
| 377 |
assert "readme_body and app_file_source" in str(error)
|
| 378 |
else:
|
|
|
|
| 70 |
{
|
| 71 |
"status": status,
|
| 72 |
"run_id": "test-run" if status == "running" else "",
|
| 73 |
+
"compute": "cpu" if status == "running" else "",
|
| 74 |
+
"reason": "test" if status == "running" else "",
|
| 75 |
"stage": "crawling" if status == "running" else "",
|
| 76 |
"stage_label": "Fetching public Spaces" if status == "running" else "",
|
| 77 |
"started_at": "",
|
| 78 |
"finished_at": "",
|
| 79 |
"error": "",
|
| 80 |
"result": None,
|
| 81 |
+
"quest_cache": app_module._empty_quest_cache_progress(),
|
| 82 |
}
|
| 83 |
)
|
| 84 |
|
|
|
|
| 182 |
_reset_refresh_state()
|
| 183 |
|
| 184 |
|
| 185 |
+
def test_dashboard_refresh_rejects_existing_bucket_lock(monkeypatch, tmp_path) -> None:
|
| 186 |
+
monkeypatch.setenv("ADVISOR_CACHE_DIR", str(tmp_path))
|
| 187 |
+
_reset_refresh_state()
|
| 188 |
+
(tmp_path / "refresh.lock").write_text(
|
| 189 |
+
json.dumps(
|
| 190 |
+
{
|
| 191 |
+
"run_id": "other-run",
|
| 192 |
+
"owner": "other-process",
|
| 193 |
+
"expires_at_epoch": time.time() + 3600,
|
| 194 |
+
}
|
| 195 |
+
),
|
| 196 |
+
encoding="utf-8",
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
try:
|
| 200 |
+
dashboard_refresh_start()
|
| 201 |
+
except Exception as error:
|
| 202 |
+
assert getattr(error, "status_code", None) == 409
|
| 203 |
+
assert "other-run" in str(getattr(error, "detail", ""))
|
| 204 |
+
else:
|
| 205 |
+
raise AssertionError("dashboard refresh should honor an existing bucket lock")
|
| 206 |
+
|
| 207 |
+
|
| 208 |
def test_dashboard_refresh_embedding_build_runs_in_subprocess(monkeypatch, tmp_path) -> None:
|
| 209 |
project_path = tmp_path / "projects.json"
|
| 210 |
index_path = tmp_path / "project_index.json"
|
|
|
|
| 263 |
monkeypatch.setenv("ADVISOR_CACHE_DIR", str(tmp_path))
|
| 264 |
_reset_refresh_state()
|
| 265 |
|
| 266 |
+
def fake_refresh_payloads(run_id: str, *, cache_dir, compute) -> tuple[dict, dict, dict, dict]:
|
| 267 |
projects_payload = json.loads(app_module.DATA_PATH.read_text(encoding="utf-8"))
|
| 268 |
index_payload = json.loads(app_module.INDEX_PATH.read_text(encoding="utf-8"))
|
| 269 |
refreshed_index = ProjectIndex.from_files(app_module.DATA_PATH, app_module.INDEX_PATH)
|
|
|
|
| 271 |
refreshed_index,
|
| 272 |
generated_at="2026-06-08T00:00:00+00:00",
|
| 273 |
)
|
| 274 |
+
quest_analysis_payload = {
|
| 275 |
+
"schema_version": 1,
|
| 276 |
+
"run_id": run_id,
|
| 277 |
+
"summary": {"project_count": refreshed_dashboard["project_count"], "compute": compute},
|
| 278 |
+
"projects": [],
|
| 279 |
+
}
|
| 280 |
+
return projects_payload, index_payload, refreshed_dashboard, quest_analysis_payload
|
| 281 |
|
| 282 |
monkeypatch.setattr(app_module, "_build_refresh_payloads", fake_refresh_payloads)
|
| 283 |
response = dashboard_refresh_start()
|
|
|
|
| 286 |
state = _wait_for_refresh()
|
| 287 |
assert state["status"] == "succeeded"
|
| 288 |
assert (tmp_path / "latest.json").is_file()
|
| 289 |
+
assert (tmp_path / "refresh.lock").exists() is False
|
| 290 |
+
latest = json.loads((tmp_path / "latest.json").read_text(encoding="utf-8"))
|
| 291 |
+
assert (tmp_path / latest["quest_analysis"]).is_file()
|
| 292 |
assert state["result"]["project_count"] == len(app_module.index.projects)
|
| 293 |
assert dashboard()["provenance"]["snapshot_digest"] == state["result"]["snapshot_digest"]
|
| 294 |
|
| 295 |
|
| 296 |
+
def test_dashboard_refresh_quest_analysis_uses_minicpm_analyzer(monkeypatch, tmp_path) -> None:
|
| 297 |
project = Project(
|
| 298 |
id="build-small-hackathon/minicpm-refresh-smoke",
|
| 299 |
title="MiniCPM Refresh Smoke",
|
|
|
|
| 336 |
|
| 337 |
monkeypatch.setattr(app_module, "create_quest_analyzer", lambda device: FakeMiniCPMAnalyzer())
|
| 338 |
|
| 339 |
+
result = app_module._analyze_dashboard_quests(
|
| 340 |
+
[project.to_refresh_snapshot_dict()],
|
| 341 |
+
cache_dir=tmp_path,
|
| 342 |
+
compute="cpu",
|
| 343 |
+
run_id="test-run",
|
| 344 |
+
)
|
| 345 |
|
| 346 |
quests = {match["quest"] for match in result["matches_by_project"][project.id]}
|
| 347 |
assert result["source"] == "minicpm-json-quest-analyzer"
|
| 348 |
assert quests == {"Off the Grid", "Field Notes"}
|
| 349 |
+
assert result["quest_analysis_payload"]["summary"]["miss_count"] == 1
|
| 350 |
+
assert result["quest_analysis_payload"]["summary"]["analyzed_count"] == 1
|
| 351 |
|
| 352 |
|
| 353 |
+
def test_dashboard_refresh_quest_analysis_batches_minicpm(monkeypatch, tmp_path) -> None:
|
| 354 |
projects = [
|
| 355 |
Project(
|
| 356 |
id=f"build-small-hackathon/batched-{index}",
|
|
|
|
| 383 |
monkeypatch.setenv("ADVISOR_QUEST_ANALYSIS_BATCH_SIZE", "2")
|
| 384 |
monkeypatch.setattr(app_module, "create_quest_analyzer", lambda device: FakeMiniCPMAnalyzer())
|
| 385 |
|
| 386 |
+
result = app_module._analyze_dashboard_quests(
|
| 387 |
+
[project.to_refresh_snapshot_dict() for project in projects],
|
| 388 |
+
cache_dir=tmp_path,
|
| 389 |
+
compute="cpu",
|
| 390 |
+
run_id="test-run",
|
| 391 |
+
)
|
| 392 |
|
| 393 |
assert calls == [
|
| 394 |
["build-small-hackathon/batched-0", "build-small-hackathon/batched-1"],
|
|
|
|
| 397 |
assert set(result["matches_by_project"]) == {project.id for project in projects}
|
| 398 |
|
| 399 |
|
| 400 |
+
def test_dashboard_refresh_quest_analysis_caches_minicpm_results(monkeypatch, tmp_path) -> None:
|
| 401 |
+
project = Project(
|
| 402 |
+
id="build-small-hackathon/cached-quest",
|
| 403 |
+
title="Cached Quest",
|
| 404 |
+
summary="A small local project.",
|
| 405 |
+
tags=("gradio",),
|
| 406 |
+
models=("openbmb/MiniCPM5-1B",),
|
| 407 |
+
datasets=(),
|
| 408 |
+
likes=0,
|
| 409 |
+
sdk="gradio",
|
| 410 |
+
license="mit",
|
| 411 |
+
created_at="2026-06-01T00:00:00+00:00",
|
| 412 |
+
last_modified="2026-06-08T00:00:00+00:00",
|
| 413 |
+
host="https://cached-quest.hf.space",
|
| 414 |
+
url="https://huggingface.co/spaces/build-small-hackathon/cached-quest",
|
| 415 |
+
readme_body="Runs MiniCPM5-1B locally.",
|
| 416 |
+
app_file_source="from transformers import AutoModelForCausalLM",
|
| 417 |
+
)
|
| 418 |
+
calls = []
|
| 419 |
+
|
| 420 |
+
class FakeMiniCPMAnalyzer:
|
| 421 |
+
source = "minicpm-json-quest-analyzer"
|
| 422 |
+
|
| 423 |
+
def analyze(self, projects):
|
| 424 |
+
calls.append([item.id for item in projects])
|
| 425 |
+
return {
|
| 426 |
+
project.id: [
|
| 427 |
+
{
|
| 428 |
+
"quest": "OpenBMB",
|
| 429 |
+
"confidence": 0.91,
|
| 430 |
+
"evidence": "Runs MiniCPM5-1B locally",
|
| 431 |
+
"source": "readme",
|
| 432 |
+
}
|
| 433 |
+
]
|
| 434 |
+
}
|
| 435 |
+
|
| 436 |
+
monkeypatch.setattr(app_module, "create_quest_analyzer", lambda device: FakeMiniCPMAnalyzer())
|
| 437 |
+
|
| 438 |
+
first = app_module._analyze_dashboard_quests(
|
| 439 |
+
[project.to_refresh_snapshot_dict()],
|
| 440 |
+
cache_dir=tmp_path,
|
| 441 |
+
compute="cpu",
|
| 442 |
+
run_id="first-run",
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
def fail_analyzer(device):
|
| 446 |
+
raise AssertionError("cached quest analysis should not load MiniCPM")
|
| 447 |
+
|
| 448 |
+
monkeypatch.setattr(app_module, "create_quest_analyzer", fail_analyzer)
|
| 449 |
+
second = app_module._analyze_dashboard_quests(
|
| 450 |
+
[project.to_refresh_snapshot_dict()],
|
| 451 |
+
cache_dir=tmp_path,
|
| 452 |
+
compute="cpu",
|
| 453 |
+
run_id="second-run",
|
| 454 |
+
)
|
| 455 |
+
|
| 456 |
+
assert calls == [[project.id]]
|
| 457 |
+
assert first["matches_by_project"] == second["matches_by_project"]
|
| 458 |
+
assert second["quest_analysis_payload"]["summary"]["hit_count"] == 1
|
| 459 |
+
assert second["quest_analysis_payload"]["projects"][0]["status"] == "cached"
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def test_dashboard_refresh_quest_analysis_requires_two_segment_snapshot(tmp_path) -> None:
|
| 463 |
project = Project(
|
| 464 |
id="build-small-hackathon/missing-evidence",
|
| 465 |
title="Missing Evidence",
|
|
|
|
| 481 |
del row["readme_body"]
|
| 482 |
|
| 483 |
try:
|
| 484 |
+
app_module._analyze_dashboard_quests([row], cache_dir=tmp_path, compute="cpu", run_id="test-run")
|
| 485 |
except RuntimeError as error:
|
| 486 |
assert "readme_body and app_file_source" in str(error)
|
| 487 |
else:
|
tests/test_quest_cache.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from hackathon_advisor.data import Project
|
| 6 |
+
from hackathon_advisor.quest_cache import (
|
| 7 |
+
build_quest_cache_identity,
|
| 8 |
+
quest_analyzer_fingerprint_from_env,
|
| 9 |
+
read_quest_cache_entry,
|
| 10 |
+
write_quest_cache_entry,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _project(readme_body: str = "Uses MiniCPM locally.") -> Project:
|
| 15 |
+
return Project(
|
| 16 |
+
id="build-small-hackathon/cache-unit",
|
| 17 |
+
title="Cache Unit",
|
| 18 |
+
summary="A small MiniCPM app.",
|
| 19 |
+
tags=("gradio",),
|
| 20 |
+
models=("openbmb/MiniCPM5-1B",),
|
| 21 |
+
datasets=(),
|
| 22 |
+
likes=0,
|
| 23 |
+
sdk="gradio",
|
| 24 |
+
license="mit",
|
| 25 |
+
created_at="2026-06-01T00:00:00+00:00",
|
| 26 |
+
last_modified="2026-06-08T00:00:00+00:00",
|
| 27 |
+
host="https://cache-unit.hf.space",
|
| 28 |
+
url="https://huggingface.co/spaces/build-small-hackathon/cache-unit",
|
| 29 |
+
app_file="app.py",
|
| 30 |
+
app_file_source="from transformers import AutoModelForCausalLM",
|
| 31 |
+
readme_body=readme_body,
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def test_quest_cache_key_changes_when_prompt_changes() -> None:
|
| 36 |
+
fingerprint = quest_analyzer_fingerprint_from_env({"ADVISOR_QUEST_ADAPTER_ID": ""})
|
| 37 |
+
|
| 38 |
+
first = build_quest_cache_identity(_project("Uses MiniCPM locally."), fingerprint)
|
| 39 |
+
second = build_quest_cache_identity(_project("Exports a PDF report."), fingerprint)
|
| 40 |
+
|
| 41 |
+
assert first.prompt_hash != second.prompt_hash
|
| 42 |
+
assert first.cache_key != second.cache_key
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def test_quest_cache_round_trip_validates_cached_matches(tmp_path) -> None:
|
| 46 |
+
project = _project()
|
| 47 |
+
fingerprint = quest_analyzer_fingerprint_from_env({"ADVISOR_QUEST_ADAPTER_ID": ""})
|
| 48 |
+
matches = [
|
| 49 |
+
{
|
| 50 |
+
"quest": "OpenBMB",
|
| 51 |
+
"confidence": 0.91,
|
| 52 |
+
"evidence": "Uses MiniCPM locally",
|
| 53 |
+
"source": "readme",
|
| 54 |
+
}
|
| 55 |
+
]
|
| 56 |
+
|
| 57 |
+
stored = write_quest_cache_entry(
|
| 58 |
+
tmp_path,
|
| 59 |
+
project,
|
| 60 |
+
fingerprint,
|
| 61 |
+
matches,
|
| 62 |
+
source="minicpm-json-quest-analyzer",
|
| 63 |
+
)
|
| 64 |
+
lookup = read_quest_cache_entry(tmp_path, project, fingerprint)
|
| 65 |
+
|
| 66 |
+
assert lookup.reason == "hit"
|
| 67 |
+
assert lookup.entry is not None
|
| 68 |
+
assert lookup.entry.path == stored.path
|
| 69 |
+
assert lookup.entry.matches == stored.matches
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def test_quest_cache_rejects_corrupt_record(tmp_path) -> None:
|
| 73 |
+
project = _project()
|
| 74 |
+
fingerprint = quest_analyzer_fingerprint_from_env({"ADVISOR_QUEST_ADAPTER_ID": ""})
|
| 75 |
+
stored = write_quest_cache_entry(
|
| 76 |
+
tmp_path,
|
| 77 |
+
project,
|
| 78 |
+
fingerprint,
|
| 79 |
+
[
|
| 80 |
+
{
|
| 81 |
+
"quest": "OpenBMB",
|
| 82 |
+
"confidence": 0.91,
|
| 83 |
+
"evidence": "Uses MiniCPM locally",
|
| 84 |
+
"source": "readme",
|
| 85 |
+
}
|
| 86 |
+
],
|
| 87 |
+
source="minicpm-json-quest-analyzer",
|
| 88 |
+
)
|
| 89 |
+
payload = json.loads(stored.path.read_text(encoding="utf-8"))
|
| 90 |
+
payload["matches"][0]["quest"] = "Unknown Quest"
|
| 91 |
+
stored.path.write_text(json.dumps(payload), encoding="utf-8")
|
| 92 |
+
|
| 93 |
+
lookup = read_quest_cache_entry(tmp_path, project, fingerprint)
|
| 94 |
+
|
| 95 |
+
assert lookup.entry is None
|
| 96 |
+
assert lookup.reason.startswith("invalid_schema:")
|