Spaces:

build-small-hackathon
/

hackathon-advisor

Running on Zero

App Files Files Community

JacobLinCool Codex commited on Jun 7

Commit

6d9770a

verified ·

1 Parent(s): d0718ca

feat: stream advisor progress

Browse files

Co-authored-by: Codex <noreply@openai.com>

Files changed (17) hide show

app.py +97 -31
hackathon_advisor/agent.py +57 -3
hackathon_advisor/asr_runtime.py +113 -14
hackathon_advisor/data.py +9 -0
hackathon_advisor/model_runtime.py +168 -23
hackathon_advisor/profiling.py +165 -0
hackathon_advisor/wood_map.py +74 -18
hackathon_advisor/zerogpu.py +14 -0
scripts/__init__.py +2 -0
static/app.js +131 -2
static/index.html +13 -0
static/styles.css +116 -12
tests/test_agent.py +46 -0
tests/test_app.py +32 -0
tests/test_model_runtime.py +61 -0
tests/test_profiling.py +84 -0
uv.lock +0 -0

app.py CHANGED Viewed

@@ -16,6 +16,12 @@ from hackathon_advisor.asr_runtime import create_asr_transcriber
 from hackathon_advisor.chapter import build_chapter_markdown
 from hackathon_advisor.data import ProjectIndex
 from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
 from hackathon_advisor.field_notes import build_field_notes_markdown
 from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
 from hackathon_advisor.lora_training_kit import TRAINING_KIT_FILENAME, build_lora_training_kit_zip
@@ -26,9 +32,10 @@ from hackathon_advisor.submission_packet import build_submission_packet_markdown
 from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
 from hackathon_advisor.tools import GOALS, goal_profiles
 from hackathon_advisor.trace_export import build_trace_jsonl, trace_metadata
-from hackathon_advisor.zerogpu import gpu_task
 install_asyncio_cleanup_hook()
 ROOT = Path(__file__).parent
@@ -40,18 +47,33 @@ MAX_AUDIO_UPLOAD_BYTES = 25 * 1024 * 1024
 AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".webm"}
 index = ProjectIndex.from_files(DATA_PATH, INDEX_PATH)
-engine = AdvisorEngine(index)
 voice_transcriber = create_asr_transcriber()
 app = Server()
 def _json_event(payload: dict) -> str:
     return json.dumps(payload, ensure_ascii=False)
 @gpu_task
-def _engine_turn(message: str, session: dict[str, Any]):
-    return engine.turn(message, session)
 @gpu_task
@@ -72,33 +94,71 @@ def _session_from_payload(payload: dict[str, Any] | None) -> dict[str, Any]:
     return _session_from_json(str(payload.get("session_json") or "{}"))
-def _agent_turn_events(message: str, session_json: str = "{}") -> Iterator[str]:
-    session = _session_from_json(session_json)
-    result = _engine_turn(message, session)
-    yield _json_event(
-        {
-            "type": "start",
-            "corrections": [correction.to_dict() for correction in result.corrections],
-            "normalized_text": result.normalized_text,
-            "tool_events": [event.to_dict() for event in result.tool_events],
-        }
-    )
-    for chunk in result.stream_chunks():
-        yield _json_event({"type": "token", "text": chunk})
-    yield _json_event(
-        {
-            "type": "done",
-            "state": result.state,
-            "response": result.response,
-            "projects": [project.to_public_dict() for project in result.projects],
-            "whitespace": [item.to_dict() for item in result.whitespace],
-            "score": result.score.to_dict() if result.score else None,
-            "plan": result.plan,
-            "artifact": result.artifact,
-        }
     )
 @app.get("/", response_class=HTMLResponse)
@@ -197,14 +257,20 @@ def agent_turn_stream(payload: dict[str, Any] | None = Body(default=None)) -> St
     payload = payload or {}
     message = str(payload.get("message") or "")
     session_json = str(payload.get("session_json") or "{}")
     def stream() -> Iterator[str]:
-        for event in _agent_turn_events(message, session_json):
             yield f"{event}\n"
     return StreamingResponse(stream(), media_type="application/x-ndjson")
 @app.post("/api/transcribe")
 async def transcribe_audio(audio: UploadFile = File(...)) -> dict[str, Any]:
     content_type = str(audio.content_type or "")
@@ -347,8 +413,8 @@ def submission_packet_artifact(session_json: str = "{}") -> str:
 @app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
-def agent_turn(message: str, session_json: str = "{}") -> Iterator[str]:
-    yield from _agent_turn_events(message, session_json)
 if __name__ == "__main__":

 from hackathon_advisor.chapter import build_chapter_markdown
 from hackathon_advisor.data import ProjectIndex
 from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
+from hackathon_advisor.model_runtime import create_tool_planner
+from hackathon_advisor.profiling import (
+    TurnProfiler,
+    configure_logging,
+    next_message_index,
+)
 from hackathon_advisor.field_notes import build_field_notes_markdown
 from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
 from hackathon_advisor.lora_training_kit import TRAINING_KIT_FILENAME, build_lora_training_kit_zip
 from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
 from hackathon_advisor.tools import GOALS, goal_profiles
 from hackathon_advisor.trace_export import build_trace_jsonl, trace_metadata
+from hackathon_advisor.zerogpu import gpu_task, is_gpu_quota_error, zero_gpu_enabled
+configure_logging()
 install_asyncio_cleanup_hook()
 ROOT = Path(__file__).parent
 AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".webm"}
 index = ProjectIndex.from_files(DATA_PATH, INDEX_PATH)
+# Acceleration is automatic: on a ZeroGPU Space the GPU path uses accelerate device_map inside
+# the @spaces.GPU fork; locally the device resolves CUDA -> Apple MPS -> CPU. CPU is only used
+# as an explicit override or a quota fallback.
+engine = AdvisorEngine(index, create_tool_planner(device="auto" if zero_gpu_enabled() else "local"))
 voice_transcriber = create_asr_transcriber()
 app = Server()
+_cpu_engine: AdvisorEngine | None = None
 def _json_event(payload: dict) -> str:
     return json.dumps(payload, ensure_ascii=False)
+def _cpu_engine_instance() -> AdvisorEngine:
+    """A CPU-pinned advisor engine used for the explicit CPU override and for the automatic
+    fallback when a ZeroGPU allocation is denied. Loaded lazily so the CPU model only enters
+    memory when CPU is actually used."""
+    global _cpu_engine
+    if _cpu_engine is None:
+        _cpu_engine = AdvisorEngine(index, create_tool_planner(device="cpu"))
+    return _cpu_engine
 @gpu_task
+def _engine_turn_stream_gpu(message: str, session: dict[str, Any]) -> Iterator[dict[str, Any]]:
+    yield from engine.turn_stream(message, session)
 @gpu_task
     return _session_from_json(str(payload.get("session_json") or "{}"))
+def _primary_turn_stream(message: str, session: dict[str, Any]) -> Iterator[dict[str, Any]]:
+    if zero_gpu_enabled():
+        yield from _engine_turn_stream_gpu(message, session)
+    else:
+        yield from engine.turn_stream(message, session)
+def _agent_turn_events(
+    message: str,
+    session_json: str = "{}",
+    compute: str = "gpu",
+) -> Iterator[str]:
+    profiler = TurnProfiler(
+        message_index=next_message_index(),
+        compute=compute,
+        backend=str(engine.runtime_status().get("backend", "")),
+        message_chars=len(message),
     )
+    profiler.log_start()
+    try:
+        for event in _profiled_turn_events(message, session_json, compute):
+            profiler.observe(event)
+            yield _json_event(event)
+        profiler.device = _active_device(compute)
+        profiler.log_summary()
+    except Exception as error:  # noqa: BLE001 - log timing/resources even when a turn fails
+        profiler.device = _active_device(compute)
+        profiler.log_summary(error)
+        raise
+def _active_device(compute: str) -> str:
+    """The torch device the turn actually resolved to (e.g. mps/cuda/cpu), read after the run
+    so the lazy model has reported its resolved device."""
+    active = _cpu_engine if compute == "cpu" else engine
+    try:
+        return str(active.runtime_status().get("device", "")) if active is not None else ""
+    except Exception:  # noqa: BLE001 - profiling must never break a turn
+        return ""
+def _profiled_turn_events(
+    message: str,
+    session_json: str,
+    compute: str,
+) -> Iterator[dict[str, Any]]:
+    session = _session_from_json(session_json)
+    if compute != "cpu":
+        produced = False
+        try:
+            for event in _primary_turn_stream(message, session):
+                produced = True
+                yield event
+            return
+        except Exception as error:  # noqa: BLE001 - fall back to local on a clean quota failure
+            if produced or not is_gpu_quota_error(error):
+                raise
+            yield {
+                "type": "fallback",
+                "to": "cpu",
+                "reason": "ZeroGPU quota reached — running this turn locally (slower).",
+            }
+    for event in _cpu_engine_instance().turn_stream(message, session):
+        yield event
 @app.get("/", response_class=HTMLResponse)
     payload = payload or {}
     message = str(payload.get("message") or "")
     session_json = str(payload.get("session_json") or "{}")
+    compute = _normalize_compute(payload.get("compute"))
     def stream() -> Iterator[str]:
+        for event in _agent_turn_events(message, session_json, compute):
             yield f"{event}\n"
     return StreamingResponse(stream(), media_type="application/x-ndjson")
+def _normalize_compute(value: Any) -> str:
+    # Acceleration is automatic; "cpu" is the only manual override (not surfaced in the UI).
+    return "cpu" if str(value or "").strip().lower() == "cpu" else "gpu"
 @app.post("/api/transcribe")
 async def transcribe_audio(audio: UploadFile = File(...)) -> dict[str, Any]:
     content_type = str(audio.content_type or "")
 @app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
+def agent_turn(message: str, session_json: str = "{}", compute: str = "gpu") -> Iterator[str]:
+    yield from _agent_turn_events(message, session_json, _normalize_compute(compute))
 if __name__ == "__main__":

hackathon_advisor/agent.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
 from dataclasses import dataclass
 from dataclasses import replace
 from typing import Any
@@ -8,7 +9,7 @@ from hackathon_advisor.aliases import Correction, normalize_text
 from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
 from hackathon_advisor.model_runtime import ToolPlanner, create_tool_planner, runtime_status
 from hackathon_advisor.scoring import ScoreCard
-from hackathon_advisor.tool_contracts import ToolCall
 from hackathon_advisor.tools import (
     GOALS,
     AdvisorTools,
@@ -58,13 +59,20 @@ class AdvisorEngine:
     def runtime_status(self) -> dict[str, Any]:
         return runtime_status(self.planner).to_dict()
-    def turn(self, message: str, state: dict[str, Any] | None = None) -> TurnResult:
         state = dict(state or {})
         state.setdefault("ideas", [])
         state.setdefault("profile", {})
         state.setdefault("goals", GOALS[:3])
         normalized, corrections = normalize_text(message)
-        resolution = self.planner.plan(normalized, state)
         state["last_tool_resolution"] = resolution.to_dict()
         tool_events: list[ToolEvent] = []
         projects: list[Project] = []
@@ -134,6 +142,52 @@ class AdvisorEngine:
         return self._idea_research_turn(call, normalized, corrections, state, tool_events)
     def _result(
         self,
         normalized_text: str,

 from __future__ import annotations
+from collections.abc import Iterator
 from dataclasses import dataclass
 from dataclasses import replace
 from typing import Any
 from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
 from hackathon_advisor.model_runtime import ToolPlanner, create_tool_planner, runtime_status
 from hackathon_advisor.scoring import ScoreCard
+from hackathon_advisor.tool_contracts import ToolCall, ToolResolution
 from hackathon_advisor.tools import (
     GOALS,
     AdvisorTools,
     def runtime_status(self) -> dict[str, Any]:
         return runtime_status(self.planner).to_dict()
+    def turn(
+        self,
+        message: str,
+        state: dict[str, Any] | None = None,
+        *,
+        resolution: ToolResolution | None = None,
+    ) -> TurnResult:
         state = dict(state or {})
         state.setdefault("ideas", [])
         state.setdefault("profile", {})
         state.setdefault("goals", GOALS[:3])
         normalized, corrections = normalize_text(message)
+        if resolution is None:
+            resolution = self.planner.plan(normalized, state)
         state["last_tool_resolution"] = resolution.to_dict()
         tool_events: list[ToolEvent] = []
         projects: list[Project] = []
         return self._idea_research_turn(call, normalized, corrections, state, tool_events)
+    def turn_stream(self, message: str, state: dict[str, Any] | None = None) -> Iterator[dict[str, Any]]:
+        """Run a turn while yielding plain-dict progress events, so a caller can stream the
+        real work (tool-call decoding, tool execution, response) instead of replaying a
+        finished string. Every yielded value is JSON-serializable so it can cross a ZeroGPU
+        process boundary."""
+        state = dict(state or {})
+        normalized, corrections = normalize_text(message)
+        yield {
+            "type": "start",
+            "corrections": [correction.to_dict() for correction in corrections],
+            "normalized_text": normalized,
+        }
+        yield {"type": "stage", "stage": "planning", "label": "Choosing the next move"}
+        resolution: ToolResolution | None = None
+        for event in self.planner.plan_iter(normalized, state):
+            if event.get("type") == "resolved":
+                resolution = event["resolution"]
+            else:
+                yield event
+        tool_name = resolution.call.name if resolution is not None else ""
+        yield {
+            "type": "stage",
+            "stage": "running_tool",
+            "tool": tool_name,
+            "label": f"Calling {tool_name}" if tool_name else "Running tools",
+        }
+        result = self.turn(normalized, state, resolution=resolution)
+        for event in result.tool_events:
+            yield {"type": "tool_event", **event.to_dict()}
+        yield {"type": "stage", "stage": "writing", "label": "Writing the page"}
+        for chunk in result.stream_chunks():
+            yield {"type": "token", "text": chunk}
+        yield {
+            "type": "done",
+            "state": result.state,
+            "response": result.response,
+            "projects": [project.to_public_dict() for project in result.projects],
+            "whitespace": [item.to_dict() for item in result.whitespace],
+            "score": result.score.to_dict() if result.score else None,
+            "plan": result.plan,
+            "artifact": result.artifact,
+        }
     def _result(
         self,
         normalized_text: str,

hackathon_advisor/asr_runtime.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 from dataclasses import dataclass
 import os
 from pathlib import Path
 import shutil
@@ -12,6 +13,10 @@ from typing import Any
 DEFAULT_ASR_MODEL_ID = "nvidia/nemotron-speech-streaming-en-0.6b"
 DEFAULT_ASR_BACKEND = "nemo-asr"
 DEFAULT_ASR_SAMPLE_RATE = 16_000
 @dataclass(frozen=True)
@@ -47,22 +52,32 @@ class AsrStatus:
 class NemotronAsrTranscriber:
     backend = DEFAULT_ASR_BACKEND
     def __init__(
         self,
         model_id: str = DEFAULT_ASR_MODEL_ID,
         sample_rate: int = DEFAULT_ASR_SAMPLE_RATE,
     ) -> None:
         self.model_id = model_id.strip() or DEFAULT_ASR_MODEL_ID
         self.sample_rate = sample_rate
-        self._model = None
     def status(self) -> AsrStatus:
         return AsrStatus(
-            backend=self.backend,
-            model_id=self.model_id,
-            loaded=self._model is not None,
             sample_rate=self.sample_rate,
         )
@@ -71,23 +86,41 @@ class NemotronAsrTranscriber:
         if not source.is_file():
             raise RuntimeError("Voice note was not saved before transcription.")
         self._ensure_loaded()
         with tempfile.TemporaryDirectory(prefix="advisor-asr-") as directory:
             wav_path = Path(directory) / "voice.wav"
             normalize_audio_for_asr(source, wav_path, self.sample_rate)
-            outputs = self._model.transcribe([str(wav_path)], batch_size=1)
-        transcript = extract_transcript(outputs).strip()
         if not transcript:
-            raise RuntimeError("Nemotron ASR returned an empty transcript.")
         return AsrTranscript(
             transcript=transcript,
-            model_id=self.model_id,
-            backend=self.backend,
             sample_rate=self.sample_rate,
         )
     def _ensure_loaded(self) -> None:
-        if self._model is not None:
             return
         try:
             import torch
             import nemo.collections.asr as nemo_asr
@@ -97,12 +130,33 @@ class NemotronAsrTranscriber:
                 "before enabling voice transcription."
             ) from error
         model = nemo_asr.models.ASRModel.from_pretrained(model_name=self.model_id)
-        device = os.environ.get("ADVISOR_ASR_DEVICE", "").strip()
-        if not device:
-            device = "cuda" if torch.cuda.is_available() else "cpu"
         model.to(device)
         model.eval()
-        self._model = model
 def create_asr_transcriber() -> NemotronAsrTranscriber:
@@ -112,9 +166,54 @@ def create_asr_transcriber() -> NemotronAsrTranscriber:
     return NemotronAsrTranscriber(
         model_id=os.environ.get("ADVISOR_ASR_MODEL_ID", DEFAULT_ASR_MODEL_ID),
         sample_rate=sample_rate,
     )
 def normalize_audio_for_asr(source: Path, target: Path, sample_rate: int = DEFAULT_ASR_SAMPLE_RATE) -> None:
     ffmpeg = shutil.which("ffmpeg")
     if not ffmpeg:

 from __future__ import annotations
 from dataclasses import dataclass
+import logging
 import os
 from pathlib import Path
 import shutil
 DEFAULT_ASR_MODEL_ID = "nvidia/nemotron-speech-streaming-en-0.6b"
 DEFAULT_ASR_BACKEND = "nemo-asr"
 DEFAULT_ASR_SAMPLE_RATE = 16_000
+DEFAULT_WHISPER_MODEL_ID = "openai/whisper-small.en"
+WHISPER_BACKEND = "whisper-transformers"
+_logger = logging.getLogger("hackathon_advisor")
 @dataclass(frozen=True)
 class NemotronAsrTranscriber:
+    """Nemotron voice input. Its declared identity (status, model id) is the deployed Space
+    backend — NVIDIA NeMo ASR. When NeMo is not installed (e.g. local development on a Mac,
+    where NeMo does not install cleanly), transcription transparently falls back to a local
+    Whisper model through transformers so voice still works; the returned transcript reports
+    whichever engine actually ran."""
     backend = DEFAULT_ASR_BACKEND
     def __init__(
         self,
         model_id: str = DEFAULT_ASR_MODEL_ID,
         sample_rate: int = DEFAULT_ASR_SAMPLE_RATE,
+        whisper_model_id: str = DEFAULT_WHISPER_MODEL_ID,
     ) -> None:
         self.model_id = model_id.strip() or DEFAULT_ASR_MODEL_ID
         self.sample_rate = sample_rate
+        self.whisper_model_id = whisper_model_id.strip() or DEFAULT_WHISPER_MODEL_ID
+        self._engine: tuple[str, Any] | None = None
+        self._active_backend = ""
+        self._active_model_id = ""
     def status(self) -> AsrStatus:
         return AsrStatus(
+            backend=self._active_backend or self.backend,
+            model_id=self._active_model_id or self.model_id,
+            loaded=self._engine is not None,
             sample_rate=self.sample_rate,
         )
         if not source.is_file():
             raise RuntimeError("Voice note was not saved before transcription.")
         self._ensure_loaded()
+        kind, engine = self._engine  # type: ignore[misc]
         with tempfile.TemporaryDirectory(prefix="advisor-asr-") as directory:
             wav_path = Path(directory) / "voice.wav"
             normalize_audio_for_asr(source, wav_path, self.sample_rate)
+            if kind == "nemo":
+                outputs = engine.transcribe([str(wav_path)], batch_size=1)
+                transcript = extract_transcript(outputs).strip()
+            else:
+                transcript = _whisper_transcribe(engine, wav_path, self.sample_rate).strip()
         if not transcript:
+            raise RuntimeError(f"{self._active_backend or self.backend} returned an empty transcript.")
         return AsrTranscript(
             transcript=transcript,
+            model_id=self._active_model_id or self.model_id,
+            backend=self._active_backend or self.backend,
             sample_rate=self.sample_rate,
         )
     def _ensure_loaded(self) -> None:
+        if self._engine is not None:
+            return
+        preference = os.environ.get("ADVISOR_ASR_BACKEND", "auto").strip().lower()
+        if preference in ("whisper", WHISPER_BACKEND):
+            self._load_whisper()
             return
+        try:
+            self._load_nemo()
+            return
+        except RuntimeError:
+            if preference in ("nemo", "nemo-asr", "nemotron"):
+                raise  # explicit Nemotron request: do not silently fall back
+            _logger.warning("NeMo ASR unavailable; falling back to local Whisper (%s).", self.whisper_model_id)
+            self._load_whisper()
+    def _load_nemo(self) -> None:
         try:
             import torch
             import nemo.collections.asr as nemo_asr
                 "before enabling voice transcription."
             ) from error
         model = nemo_asr.models.ASRModel.from_pretrained(model_name=self.model_id)
+        device = os.environ.get("ADVISOR_ASR_DEVICE", "").strip() or ("cuda" if torch.cuda.is_available() else "cpu")
         model.to(device)
         model.eval()
+        self._engine = ("nemo", model)
+        self._active_backend = self.backend
+        self._active_model_id = self.model_id
+    def _load_whisper(self) -> None:
+        try:
+            import torch
+            from transformers import WhisperForConditionalGeneration, WhisperProcessor
+        except ImportError as error:
+            raise RuntimeError(
+                "Local voice fallback requires transformers and torch. Install runtime "
+                "requirements before enabling voice transcription."
+            ) from error
+        device = _resolve_asr_device(torch)
+        if device == "mps":
+            os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
+        processor = WhisperProcessor.from_pretrained(self.whisper_model_id)
+        model = WhisperForConditionalGeneration.from_pretrained(self.whisper_model_id)
+        model.to(device)
+        model.eval()
+        self._engine = ("whisper", (processor, model))
+        self._active_backend = WHISPER_BACKEND
+        self._active_model_id = self.whisper_model_id
+        _logger.info("Whisper ASR loaded | model=%s device=%s", self.whisper_model_id, device)
 def create_asr_transcriber() -> NemotronAsrTranscriber:
     return NemotronAsrTranscriber(
         model_id=os.environ.get("ADVISOR_ASR_MODEL_ID", DEFAULT_ASR_MODEL_ID),
         sample_rate=sample_rate,
+        whisper_model_id=os.environ.get("ADVISOR_ASR_WHISPER_MODEL", DEFAULT_WHISPER_MODEL_ID),
     )
+def _resolve_asr_device(torch: Any) -> str:
+    forced = os.environ.get("ADVISOR_ASR_DEVICE", "").strip().lower()
+    if forced:
+        return forced
+    try:
+        if torch.cuda.is_available():
+            return "cuda"
+    except Exception:  # pragma: no cover - device dependent
+        pass
+    try:
+        if torch.backends.mps.is_available():
+            return "mps"
+    except Exception:  # pragma: no cover - device dependent
+        pass
+    return "cpu"
+def _whisper_transcribe(engine: tuple[Any, Any], wav_path: Path, sample_rate: int) -> str:
+    import torch
+    processor, model = engine
+    audio = _read_wav_mono_float32(wav_path)
+    inputs = processor(audio, sampling_rate=sample_rate, return_tensors="pt")
+    features = inputs.input_features.to(model.device)
+    with torch.inference_mode():
+        generated = model.generate(features, max_new_tokens=128)
+    decoded = processor.batch_decode(generated, skip_special_tokens=True)
+    return decoded[0] if decoded else ""
+def _read_wav_mono_float32(wav_path: Path) -> Any:
+    import wave
+    import numpy as np
+    with wave.open(str(wav_path), "rb") as wav:
+        channels = wav.getnchannels()
+        frames = wav.readframes(wav.getnframes())
+    audio = np.frombuffer(frames, dtype=np.int16).astype(np.float32) / 32768.0
+    if channels > 1:
+        audio = audio.reshape(-1, channels).mean(axis=1)
+    return audio
 def normalize_audio_for_asr(source: Path, target: Path, sample_rate: int = DEFAULT_ASR_SAMPLE_RATE) -> None:
     ffmpeg = shutil.which("ffmpeg")
     if not ffmpeg:

hackathon_advisor/data.py CHANGED Viewed

@@ -344,6 +344,15 @@ class ProjectIndex:
             tuple(float(value) for value in document["vector"])
             for document in index_payload["documents"]
         ]
     @classmethod
     def from_file(cls, path: Path, query_embedder: EmbeddingFunction | None = None) -> "ProjectIndex":

             tuple(float(value) for value in document["vector"])
             for document in index_payload["documents"]
         ]
+        self._vector_by_id = {
+            project.id: vector for project, vector in zip(self.projects, self._vectors)
+        }
+    def vector_for(self, project_id: str) -> tuple[float, ...] | None:
+        return self._vector_by_id.get(project_id)
+    def embed_query(self, text: str) -> tuple[float, ...]:
+        return tuple(normalize_vector(self._embed_query(text)))
     @classmethod
     def from_file(cls, path: Path, query_embedder: EmbeddingFunction | None = None) -> "ProjectIndex":

hackathon_advisor/model_runtime.py CHANGED Viewed

@@ -1,18 +1,25 @@
 from __future__ import annotations
 from contextlib import nullcontext
 from dataclasses import dataclass
 import os
 import re
 from typing import Any, Protocol
 from hackathon_advisor.tools import idea_from_text
 from hackathon_advisor.tool_contracts import ToolResolution, resolve_tool_call, tool_schemas
 DEFAULT_MODEL_ID = "openbmb/MiniCPM5-1B"
 DEFAULT_ADAPTER_ID = "build-small-hackathon/hackathon-advisor-minicpm5-lora"
 DEFAULT_BACKEND = "rules"
 class ToolPlanner(Protocol):
@@ -24,6 +31,11 @@ class ToolPlanner(Protocol):
     def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
         ...
 @dataclass(frozen=True)
 class RuntimeStatus:
@@ -33,6 +45,7 @@ class RuntimeStatus:
     adapter_revision: str
     loaded: bool
     tool_count: int
     def to_dict(self) -> dict[str, Any]:
         return {
@@ -42,6 +55,7 @@ class RuntimeStatus:
             "adapter_revision": self.adapter_revision,
             "loaded": self.loaded,
             "tool_count": self.tool_count,
         }
@@ -61,13 +75,13 @@ class RuleBasedPlanner:
             output = '<function name="list_projects">{"sort":"likes"}</function>'
         elif project_id:
             output = f'<function name="get_project">{{"id":{_json_string(project_id)}}}</function>'
-        elif any(term in lower for term in ("compare", "choose", "rank")):
             output = '<function name="compare_ideas">{}</function>'
-        elif any(term in lower for term in ("plan", "roadmap", "next step", "milestone")):
             output = '<function name="make_plan">{}</function>'
-        elif any(term in lower for term in ("whitespace", "original", "new", "bolder", "unwritten", "gap")):
             output = '<function name="find_whitespace">{}</function>'
-        elif any(term in lower for term in ("search", "similar", "already", "existing", "overlap", "echo")):
             output = f'<function name="search_projects">{{"query":{_json_string(text)}}}</function>'
         else:
             title, pitch = idea_from_text(text)
@@ -78,6 +92,9 @@ class RuleBasedPlanner:
             )
         return resolve_tool_call(output, fallback_query=text)
 class MiniCPMTransformersPlanner:
     backend = "minicpm-transformers"
@@ -87,19 +104,34 @@ class MiniCPMTransformersPlanner:
         model_id: str = DEFAULT_MODEL_ID,
         adapter_id: str = "",
         adapter_revision: str = "",
     ) -> None:
         self.model_id = model_id.strip() or DEFAULT_MODEL_ID
         self.adapter_id = adapter_id.strip()
         self.adapter_revision = adapter_revision.strip()
         self._tokenizer = None
         self._model = None
         self._inference_mode = None
     def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
         self._ensure_loaded()
         prompt = render_context(message, state)
-        output = self._generate_tool_call(prompt)
-        return resolve_tool_call(output, fallback_query=message)
     def _ensure_loaded(self) -> None:
         if self._model is not None and self._tokenizer is not None:
@@ -121,26 +153,60 @@ class MiniCPMTransformersPlanner:
             adapter_config = PeftConfig.from_pretrained(self.adapter_id, **adapter_kwargs)
             base_model_id = str(adapter_config.base_model_name_or_path or base_model_id)
         self._tokenizer = AutoTokenizer.from_pretrained(
             tokenizer_id,
             trust_remote_code=True,
             **(adapter_kwargs if self.adapter_id else {}),
         )
-        model = AutoModelForCausalLM.from_pretrained(
-            base_model_id,
-            dtype="auto",
-            device_map="auto",
-            trust_remote_code=True,
         )
         if self.adapter_id:
             model = PeftModel.from_pretrained(model, self.adapter_id, **adapter_kwargs)
         model.eval()
         _disable_sampling_generation_defaults(model)
         self._model = model
         if hasattr(torch, "inference_mode"):
             self._inference_mode = torch.inference_mode
-    def _generate_tool_call(self, prompt: str) -> str:
         assert self._tokenizer is not None
         assert self._model is not None
         messages = [
@@ -156,19 +222,88 @@ class MiniCPMTransformersPlanner:
             return_tensors="pt",
         ).to(next(self._model.parameters()).device)
         _strip_unused_generation_inputs(inputs)
-        context = self._inference_mode() if self._inference_mode is not None else nullcontext()
-        with context:
-            generated = self._model.generate(
-                **inputs,
-                max_new_tokens=180,
-                do_sample=False,
-            )
-        new_tokens = generated[:, inputs["input_ids"].shape[-1] :]
-        decoded = self._tokenizer.decode(new_tokens[0], skip_special_tokens=True).strip()
-        return _normalize_xml_tool_output(decoded)
-def create_tool_planner() -> ToolPlanner:
     backend = os.environ.get("ADVISOR_MODEL_BACKEND", DEFAULT_BACKEND).strip().lower()
     if backend in ("", "rules"):
         return RuleBasedPlanner()
@@ -177,11 +312,13 @@ def create_tool_planner() -> ToolPlanner:
             os.environ.get("ADVISOR_MODEL_ID", DEFAULT_MODEL_ID),
             os.environ.get("ADVISOR_ADAPTER_ID", ""),
             os.environ.get("ADVISOR_ADAPTER_REVISION", ""),
         )
     raise RuntimeError(f"Unsupported ADVISOR_MODEL_BACKEND={backend!r}")
 def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
     return RuntimeStatus(
         backend=planner.backend,
         model_id=planner.model_id,
@@ -189,6 +326,7 @@ def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
         adapter_revision=planner.adapter_revision,
         loaded=not isinstance(planner, MiniCPMTransformersPlanner) or planner._model is not None,
         tool_count=len(tool_schemas()),
     )
@@ -274,6 +412,13 @@ def _wants_project_list(lower_text: str) -> bool:
     return lower_text in exact_phrases or any(lower_text.startswith(prefix) for prefix in command_prefixes)
 def _project_reference_id(text: str) -> str:
     prefixes = (
         "read project ",

 from __future__ import annotations
+from collections.abc import Iterator
 from contextlib import nullcontext
 from dataclasses import dataclass
+import logging
 import os
 import re
+import threading
 from typing import Any, Protocol
 from hackathon_advisor.tools import idea_from_text
 from hackathon_advisor.tool_contracts import ToolResolution, resolve_tool_call, tool_schemas
+from hackathon_advisor.zerogpu import zero_gpu_enabled
+_logger = logging.getLogger("hackathon_advisor")
 DEFAULT_MODEL_ID = "openbmb/MiniCPM5-1B"
 DEFAULT_ADAPTER_ID = "build-small-hackathon/hackathon-advisor-minicpm5-lora"
 DEFAULT_BACKEND = "rules"
+MAX_TOOL_CALL_TOKENS = 180
 class ToolPlanner(Protocol):
     def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
         ...
+    def plan_iter(self, message: str, state: dict[str, Any]) -> Iterator[dict[str, Any]]:
+        """Yield {"type": "model_progress", "tokens": int} events while planning, then a
+        final {"type": "resolved", "resolution": ToolResolution} event."""
+        ...
 @dataclass(frozen=True)
 class RuntimeStatus:
     adapter_revision: str
     loaded: bool
     tool_count: int
+    device: str = ""
     def to_dict(self) -> dict[str, Any]:
         return {
             "adapter_revision": self.adapter_revision,
             "loaded": self.loaded,
             "tool_count": self.tool_count,
+            "device": self.device,
         }
             output = '<function name="list_projects">{"sort":"likes"}</function>'
         elif project_id:
             output = f'<function name="get_project">{{"id":{_json_string(project_id)}}}</function>'
+        elif _has_command_term(lower, ("compare", "choose", "rank")):
             output = '<function name="compare_ideas">{}</function>'
+        elif _has_command_term(lower, ("plan", "roadmap", "next step", "milestone")):
             output = '<function name="make_plan">{}</function>'
+        elif _has_command_term(lower, ("whitespace", "original", "new", "bolder", "unwritten", "gap")):
             output = '<function name="find_whitespace">{}</function>'
+        elif _has_command_term(lower, ("search", "similar", "already", "existing", "overlap", "echo")):
             output = f'<function name="search_projects">{{"query":{_json_string(text)}}}</function>'
         else:
             title, pitch = idea_from_text(text)
             )
         return resolve_tool_call(output, fallback_query=text)
+    def plan_iter(self, message: str, state: dict[str, Any]) -> Iterator[dict[str, Any]]:
+        yield {"type": "resolved", "resolution": self.plan(message, state)}
 class MiniCPMTransformersPlanner:
     backend = "minicpm-transformers"
         model_id: str = DEFAULT_MODEL_ID,
         adapter_id: str = "",
         adapter_revision: str = "",
+        device: str = "auto",
     ) -> None:
         self.model_id = model_id.strip() or DEFAULT_MODEL_ID
         self.adapter_id = adapter_id.strip()
         self.adapter_revision = adapter_revision.strip()
+        self.device = (device or "auto").strip().lower() or "auto"
+        self.resolved_device = ""
         self._tokenizer = None
         self._model = None
         self._inference_mode = None
     def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
+        resolution: ToolResolution | None = None
+        for event in self.plan_iter(message, state):
+            if event.get("type") == "resolved":
+                resolution = event["resolution"]
+        assert resolution is not None
+        return resolution
+    def plan_iter(self, message: str, state: dict[str, Any]) -> Iterator[dict[str, Any]]:
         self._ensure_loaded()
         prompt = render_context(message, state)
+        pieces: list[str] = []
+        for tokens, piece in self._stream_tool_call(prompt):
+            pieces.append(piece)
+            yield {"type": "model_progress", "tokens": tokens, "max_tokens": MAX_TOOL_CALL_TOKENS}
+        output = _normalize_xml_tool_output("".join(pieces).strip())
+        yield {"type": "resolved", "resolution": resolve_tool_call(output, fallback_query=message)}
     def _ensure_loaded(self) -> None:
         if self._model is not None and self._tokenizer is not None:
             adapter_config = PeftConfig.from_pretrained(self.adapter_id, **adapter_kwargs)
             base_model_id = str(adapter_config.base_model_name_or_path or base_model_id)
+        target = _resolve_torch_device(self.device, torch)
+        self.resolved_device = target
         self._tokenizer = AutoTokenizer.from_pretrained(
             tokenizer_id,
             trust_remote_code=True,
             **(adapter_kwargs if self.adapter_id else {}),
         )
+        model = self._load_model_on_device(
+            AutoModelForCausalLM, base_model_id, target, torch
         )
         if self.adapter_id:
             model = PeftModel.from_pretrained(model, self.adapter_id, **adapter_kwargs)
+            if target not in ("auto", "cpu"):
+                model = model.to(target)
         model.eval()
         _disable_sampling_generation_defaults(model)
         self._model = model
         if hasattr(torch, "inference_mode"):
             self._inference_mode = torch.inference_mode
+        _logger.info(
+            "MiniCPM loaded | requested_device=%s resolved_device=%s adapter=%s",
+            self.device,
+            self.resolved_device,
+            self.adapter_id or "(none)",
+        )
+    def _load_model_on_device(self, model_cls: Any, base_model_id: str, target: str, torch: Any) -> Any:
+        if target == "auto":
+            return model_cls.from_pretrained(
+                base_model_id, dtype="auto", device_map="auto", trust_remote_code=True
+            )
+        if target == "cpu":
+            return model_cls.from_pretrained(
+                base_model_id, dtype=torch.float32, device_map={"": "cpu"}, trust_remote_code=True
+            )
+        # mps / cuda: load on CPU first (no accelerate dispatch), then move to the device.
+        if target == "mps":
+            os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
+        try:
+            model = model_cls.from_pretrained(
+                base_model_id, dtype=torch.float16, trust_remote_code=True
+            )
+            return model.to(target)
+        except Exception as error:  # noqa: BLE001 - keep the turn runnable on CPU
+            if target == "mps":
+                _logger.warning("MPS load failed (%r); falling back to CPU float32.", error)
+                self.resolved_device = "cpu"
+                return model_cls.from_pretrained(
+                    base_model_id, dtype=torch.float32, device_map={"": "cpu"}, trust_remote_code=True
+                )
+            raise
+    def _prepare_inputs(self, prompt: str) -> Any:
         assert self._tokenizer is not None
         assert self._model is not None
         messages = [
             return_tensors="pt",
         ).to(next(self._model.parameters()).device)
         _strip_unused_generation_inputs(inputs)
+        return inputs
+    def _stream_tool_call(self, prompt: str) -> Iterator[tuple[int, str]]:
+        from transformers import TextIteratorStreamer
+        assert self._tokenizer is not None
+        assert self._model is not None
+        inputs = self._prepare_inputs(prompt)
+        streamer = TextIteratorStreamer(
+            self._tokenizer, skip_prompt=True, skip_special_tokens=True
+        )
+        generation_kwargs = {
+            **inputs,
+            "max_new_tokens": MAX_TOOL_CALL_TOKENS,
+            "do_sample": False,
+            "streamer": streamer,
+        }
+        errors: list[BaseException] = []
+        def _run() -> None:
+            context = self._inference_mode() if self._inference_mode is not None else nullcontext()
+            try:
+                with context:
+                    self._model.generate(**generation_kwargs)
+            except BaseException as error:  # surfaced after the streamer drains
+                errors.append(error)
+                # generate() never reached its end sentinel, so wake the consumer instead of
+                # letting it block forever, then re-raise from the main thread below.
+                streamer.end()
+        worker = threading.Thread(target=_run, daemon=True)
+        worker.start()
+        tokens = 0
+        for piece in streamer:
+            if not piece:
+                continue
+            tokens += 1
+            yield tokens, piece
+        worker.join()
+        if errors:
+            raise errors[0]
+def _device_available(device: str, torch: Any) -> bool:
+    try:
+        if device == "cuda":
+            return bool(torch.cuda.is_available())
+        if device == "mps":
+            backend = getattr(torch.backends, "mps", None)
+            return bool(backend is not None and backend.is_available())
+    except Exception:  # pragma: no cover - device dependent
+        return False
+    return False
+def _best_local_device(torch: Any) -> str:
+    # Avoid touching CUDA inside a ZeroGPU main process — there is no local GPU there, and
+    # probing it can disturb the ZeroGPU allocator.
+    if not zero_gpu_enabled() and _device_available("cuda", torch):
+        return "cuda"
+    if _device_available("mps", torch):
+        return "mps"
+    return "cpu"
+def _resolve_torch_device(preference: str, torch: Any) -> str:
+    """Map a configured device preference to a concrete torch device.
+    "auto" stays "auto" (accelerate device_map handles ZeroGPU/CUDA/CPU placement). "local"
+    picks the best on-machine accelerator: CUDA -> MPS (Apple Silicon) -> CPU. An explicit
+    cuda/mps that is unavailable degrades to the best available local device."""
+    pref = (preference or "auto").strip().lower()
+    if pref == "auto":
+        return "auto"
+    if pref == "cpu":
+        return "cpu"
+    if pref in ("cuda", "mps"):
+        return pref if _device_available(pref, torch) else _best_local_device(torch)
+    return _best_local_device(torch)
+def create_tool_planner(device: str = "auto") -> ToolPlanner:
     backend = os.environ.get("ADVISOR_MODEL_BACKEND", DEFAULT_BACKEND).strip().lower()
     if backend in ("", "rules"):
         return RuleBasedPlanner()
             os.environ.get("ADVISOR_MODEL_ID", DEFAULT_MODEL_ID),
             os.environ.get("ADVISOR_ADAPTER_ID", ""),
             os.environ.get("ADVISOR_ADAPTER_REVISION", ""),
+            device=device,
         )
     raise RuntimeError(f"Unsupported ADVISOR_MODEL_BACKEND={backend!r}")
 def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
+    device = getattr(planner, "resolved_device", "") or getattr(planner, "device", "")
     return RuntimeStatus(
         backend=planner.backend,
         model_id=planner.model_id,
         adapter_revision=planner.adapter_revision,
         loaded=not isinstance(planner, MiniCPMTransformersPlanner) or planner._model is not None,
         tool_count=len(tool_schemas()),
+        device=str(device),
     )
     return lower_text in exact_phrases or any(lower_text.startswith(prefix) for prefix in command_prefixes)
+def _has_command_term(lower_text: str, terms: tuple[str, ...]) -> bool:
+    return any(
+        re.search(rf"(?<![a-z0-9]){re.escape(term)}(?![a-z0-9])", lower_text)
+        for term in terms
+    )
 def _project_reference_id(text: str) -> str:
     prefixes = (
         "read project ",

hackathon_advisor/profiling.py ADDED Viewed

	@@ -0,0 +1,165 @@

+"""Lightweight logging and per-turn profiling for the advisor runtime.
+The numbers here are debug/operations signal only — they are written to logs, never to the
+UI. Stage timings are measured by *observing the turn event stream from the main process*, so
+they stay correct even when the model itself runs inside a ZeroGPU fork (where a module-global
+counter would reset on every call).
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+import logging
+import os
+import platform
+import sys
+import threading
+import time
+from typing import Any
+logger = logging.getLogger("hackathon_advisor")
+_counter_lock = threading.Lock()
+_messages_processed = 0
+def configure_logging() -> None:
+    """Attach a stream handler once, honoring ADVISOR_LOG_LEVEL (default INFO)."""
+    level_name = os.environ.get("ADVISOR_LOG_LEVEL", "INFO").strip().upper()
+    logger.setLevel(getattr(logging, level_name, logging.INFO))
+    if not logger.handlers:
+        handler = logging.StreamHandler()
+        handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s"))
+        logger.addHandler(handler)
+    logger.propagate = False
+def next_message_index() -> int:
+    """Increment and return the lifetime count of processed advisor messages (main process)."""
+    global _messages_processed
+    with _counter_lock:
+        _messages_processed += 1
+        return _messages_processed
+def messages_processed() -> int:
+    return _messages_processed
+def _ms(seconds: float) -> float:
+    return round(seconds * 1000.0, 1)
+def resource_snapshot() -> dict[str, Any]:
+    """Best-effort process resource usage via the stdlib plus torch device memory if torch is
+    already imported. Returns whatever could be sampled; never raises."""
+    snapshot: dict[str, Any] = {}
+    try:
+        import resource
+        usage = resource.getrusage(resource.RUSAGE_SELF)
+        # ru_maxrss is bytes on macOS, kilobytes on Linux.
+        divisor = 1024 * 1024 if platform.system() == "Darwin" else 1024
+        snapshot["rss_mb"] = round(usage.ru_maxrss / divisor, 1)
+        snapshot["cpu_user_s"] = round(usage.ru_utime, 3)
+        snapshot["cpu_sys_s"] = round(usage.ru_stime, 3)
+    except Exception:  # pragma: no cover - platform dependent
+        pass
+    snapshot.update(_torch_memory_snapshot())
+    return snapshot
+def _torch_memory_snapshot() -> dict[str, Any]:
+    out: dict[str, Any] = {}
+    torch = sys.modules.get("torch")  # do not import torch just to profile
+    if torch is None:
+        return out
+    try:
+        if torch.cuda.is_available():
+            out["cuda_alloc_mb"] = round(torch.cuda.memory_allocated() / 1e6, 1)
+            out["cuda_peak_mb"] = round(torch.cuda.max_memory_allocated() / 1e6, 1)
+    except Exception:  # pragma: no cover - device dependent
+        pass
+    try:
+        mps = getattr(torch, "mps", None)
+        current = getattr(mps, "current_allocated_memory", None)
+        if current is not None:
+            out["mps_alloc_mb"] = round(current() / 1e6, 1)
+    except Exception:  # pragma: no cover - device dependent
+        pass
+    return out
+@dataclass
+class TurnProfiler:
+    """Times a single advisor turn by observing its event stream. Drive it by calling
+    ``observe(event)`` for every emitted event dict, then ``log_summary()`` when the turn
+    ends (in a finally block, so partial turns still get logged)."""
+    message_index: int
+    compute: str
+    backend: str
+    device: str = ""
+    message_chars: int = 0
+    started: float = field(default_factory=time.perf_counter)
+    stage_at: dict[str, float] = field(default_factory=dict)
+    ended: float | None = None
+    tokens: int = 0
+    tool_count: int = 0
+    fell_back: bool = False
+    logged: bool = False
+    def log_start(self) -> None:
+        logger.info(
+            "turn #%d start | compute=%s backend=%s message_chars=%d",
+            self.message_index,
+            self.compute,
+            self.backend,
+            self.message_chars,
+        )
+    def observe(self, event: dict[str, Any]) -> None:
+        now = time.perf_counter()
+        event_type = event.get("type")
+        if event_type == "stage":
+            self.stage_at.setdefault(str(event.get("stage")), now)
+        elif event_type == "model_progress":
+            self.tokens = max(self.tokens, int(event.get("tokens") or 0))
+        elif event_type == "tool_event":
+            self.tool_count += 1
+        elif event_type == "fallback":
+            self.fell_back = True
+        elif event_type == "done":
+            self.ended = now
+    def durations(self) -> dict[str, float]:
+        end = self.ended if self.ended is not None else time.perf_counter()
+        out: dict[str, float] = {"total_ms": _ms(end - self.started)}
+        planning = self.stage_at.get("planning")
+        running = self.stage_at.get("running_tool")
+        writing = self.stage_at.get("writing")
+        if planning is not None and running is not None:
+            out["decode_ms"] = _ms(running - planning)
+        if running is not None and writing is not None:
+            out["tools_ms"] = _ms(writing - running)
+        if writing is not None:
+            out["write_ms"] = _ms(end - writing)
+        return out
+    def log_summary(self, error: BaseException | None = None) -> None:
+        if self.logged:
+            return
+        self.logged = True
+        durations = self.durations()
+        timing = " ".join(f"{key}={value}" for key, value in durations.items())
+        resources = " ".join(f"{key}={value}" for key, value in resource_snapshot().items())
+        status = "error" if error is not None else "done"
+        message = (
+            f"turn #{self.message_index} {status} | {timing} | "
+            f"tokens={self.tokens} tools={self.tool_count} compute={self.compute} "
+            f"device={self.device or '?'} backend={self.backend} fallback={self.fell_back} | {resources}"
+        )
+        if error is not None:
+            logger.warning("%s | exception=%r", message, error)
+        else:
+            logger.info(message)

hackathon_advisor/wood_map.py CHANGED Viewed

@@ -11,9 +11,13 @@ from hackathon_advisor.tools import Idea
 def build_wood_map(index: ProjectIndex, idea: Idea, score: ScoreCard) -> dict[str, Any]:
     echoes = list(score.echoes)
     background = _background_projects(index, echoes)
-    dots = [_project_dot(project, "inked") for project in background]
-    dots.extend(_echo_dot(hit) for hit in echoes[:5])
-    dots.append(_idea_dot(idea, score, echoes))
     return {
         "caption": _caption(score, echoes),
         "dots": _dedupe_dots(dots),
@@ -26,8 +30,8 @@ def _background_projects(index: ProjectIndex, echoes: list[SearchHit]) -> list[P
     return projects[:16]
-def _project_dot(project: Project, kind: str) -> dict[str, Any]:
-    x, y = _point(project.id)
     return {
         "id": project.id,
         "kind": kind,
@@ -39,8 +43,8 @@ def _project_dot(project: Project, kind: str) -> dict[str, Any]:
     }
-def _echo_dot(hit: SearchHit) -> dict[str, Any]:
-    dot = _project_dot(hit.project, "echo")
     dot["score"] = round(hit.score, 3)
     dot["matched_terms"] = list(hit.matched_terms)
     dot["page_number"] = hit.page_number
@@ -48,13 +52,8 @@ def _echo_dot(hit: SearchHit) -> dict[str, Any]:
     return dot
-def _idea_dot(idea: Idea, score: ScoreCard, echoes: list[SearchHit]) -> dict[str, Any]:
-    if echoes and not score.verdict.startswith("UNWRITTEN"):
-        lead_x, lead_y = _point(echoes[0].project.id)
-        x = _clamp(lead_x + 7, 8, 92)
-        y = _clamp(lead_y - 5, 8, 92)
-    else:
-        x, y = _point(f"idea:{idea.id}:{idea.title}")
     return {
         "id": idea.id,
         "kind": "idea",
@@ -67,6 +66,67 @@ def _idea_dot(idea: Idea, score: ScoreCard, echoes: list[SearchHit]) -> dict[str
     }
 def _caption(score: ScoreCard, echoes: list[SearchHit]) -> str:
     if score.verdict.startswith("UNWRITTEN"):
         return "Your page sits in a pale margin beyond the nearest inked clusters."
@@ -81,10 +141,6 @@ def _point(key: str) -> tuple[int, int]:
     return x, y
-def _clamp(value: int, low: int, high: int) -> int:
-    return max(low, min(high, value))
 def _dedupe_dots(dots: list[dict[str, Any]]) -> list[dict[str, Any]]:
     seen: set[tuple[str, str]] = set()
     deduped: list[dict[str, Any]] = []

 def build_wood_map(index: ProjectIndex, idea: Idea, score: ScoreCard) -> dict[str, Any]:
     echoes = list(score.echoes)
     background = _background_projects(index, echoes)
+    echo_projects = [hit.project for hit in echoes[:5]]
+    layout, idea_xy = _layout(index, idea, background + echo_projects)
+    dots = [_project_dot(project, "inked", layout) for project in background]
+    dots.extend(_echo_dot(hit, layout) for hit in echoes[:5])
+    dots.append(_idea_dot(idea, score, idea_xy))
     return {
         "caption": _caption(score, echoes),
         "dots": _dedupe_dots(dots),
     return projects[:16]
+def _project_dot(project: Project, kind: str, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
+    x, y = layout.get(project.id) or _point(project.id)
     return {
         "id": project.id,
         "kind": kind,
     }
+def _echo_dot(hit: SearchHit, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
+    dot = _project_dot(hit.project, "echo", layout)
     dot["score"] = round(hit.score, 3)
     dot["matched_terms"] = list(hit.matched_terms)
     dot["page_number"] = hit.page_number
     return dot
+def _idea_dot(idea: Idea, score: ScoreCard, idea_xy: tuple[int, int]) -> dict[str, Any]:
+    x, y = idea_xy
     return {
         "id": idea.id,
         "kind": "idea",
     }
+def _layout(
+    index: ProjectIndex,
+    idea: Idea,
+    projects: list[Project],
+) -> tuple[dict[str, tuple[int, int]], tuple[int, int]]:
+    """Place every dot by projecting the real embedding vectors into 2D with PCA, so projects
+    that are semantically similar land near each other and the idea lands among its closest
+    echoes. Falls back to a deterministic hash scatter only when the projection cannot run
+    (missing vectors, too few points, or no embedder)."""
+    ids = [project.id for project in projects]
+    vectors = [index.vector_for(project.id) for project in projects]
+    fallback = ({project_id: _point(project_id) for project_id in ids}, _point(f"idea:{idea.id}:{idea.title}"))
+    if len(vectors) < 3 or any(vector is None for vector in vectors):
+        return fallback
+    try:
+        idea_vector = index.embed_query(idea.pitch or idea.title)
+        coords, idea_xy = _pca_project(vectors, idea_vector)
+    except Exception:  # noqa: BLE001 - any projection failure degrades to the hash scatter
+        return fallback
+    return {project_id: coord for project_id, coord in zip(ids, coords)}, idea_xy
+def _pca_project(
+    vectors: list[tuple[float, ...]],
+    idea_vector: tuple[float, ...],
+) -> tuple[list[tuple[int, int]], tuple[int, int]]:
+    import numpy as np
+    matrix = np.asarray(vectors, dtype=np.float64)
+    query = np.asarray(idea_vector, dtype=np.float64)
+    mean = matrix.mean(axis=0)
+    centered = matrix - mean
+    # Top-2 principal directions of the project cloud define the map; the idea is projected
+    # onto that same basis so its position reflects true embedding similarity.
+    _, _, components = np.linalg.svd(centered, full_matrices=False)
+    basis = components[:2]
+    projected = centered @ basis.T
+    idea_projected = (query - mean) @ basis.T
+    stacked = np.vstack([projected, idea_projected])
+    scaled = _scale_to_canvas(stacked)
+    coords = [(int(round(x)), int(round(y))) for x, y in scaled[:-1]]
+    idea_xy = (int(round(scaled[-1][0])), int(round(scaled[-1][1])))
+    return coords, idea_xy
+def _scale_to_canvas(points: Any, low: float = 10.0, high: float = 90.0) -> Any:
+    import numpy as np
+    scaled = np.empty_like(points)
+    for axis in range(points.shape[1]):
+        column = points[:, axis]
+        lo = float(column.min())
+        hi = float(column.max())
+        span = hi - lo
+        if span < 1e-9:
+            scaled[:, axis] = (low + high) / 2.0
+        else:
+            scaled[:, axis] = low + (column - lo) / span * (high - low)
+    return scaled
 def _caption(score: ScoreCard, echoes: list[SearchHit]) -> str:
     if score.verdict.startswith("UNWRITTEN"):
         return "Your page sits in a pale margin beyond the nearest inked clusters."
     return x, y
 def _dedupe_dots(dots: list[dict[str, Any]]) -> list[dict[str, Any]]:
     seen: set[tuple[str, str]] = set()
     deduped: list[dict[str, Any]] = []

hackathon_advisor/zerogpu.py CHANGED Viewed

@@ -41,3 +41,17 @@ def gpu_task(function: Callable[P, R]) -> Callable[P, R]:
             "Install runtime requirements before enabling ZeroGPU."
         ) from error
     return spaces.GPU(duration=zero_gpu_duration_seconds())(function)

             "Install runtime requirements before enabling ZeroGPU."
         ) from error
     return spaces.GPU(duration=zero_gpu_duration_seconds())(function)
+QUOTA_ERROR_HINTS = ("quota", "gpu task aborted", "no gpu", "exceeded", "gpu is not available")
+def is_gpu_quota_error(error: BaseException) -> bool:
+    """Heuristically detect a ZeroGPU allocation/quota failure so the caller can fall back to
+    a CPU run. ZeroGPU raises before the wrapped function body executes, so this is checked
+    against the exception that surfaces from the first pull of the GPU generator."""
+    name = type(error).__name__.lower()
+    if "quota" in name or "gpu" in name:
+        return True
+    message = str(error).lower()
+    return any(hint in message for hint in QUOTA_ERROR_HINTS)

scripts/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Marks the repository's scripts/ as a regular package so it resolves ahead of any top-level
2	+ # "scripts" package that a dependency (e.g. nemo-toolkit) installs into site-packages.

static/app.js CHANGED Viewed

@@ -29,8 +29,16 @@ const resetButton = document.querySelector("#reset-session");
 const recordVoiceButton = document.querySelector("#record-voice");
 const uploadVoiceButton = document.querySelector("#upload-voice");
 const voiceFileInput = document.querySelector("#voice-file");
 const SESSION_STORAGE_KEY = "hackathon-advisor-session-v2";
 const FIELD_NOTES_FILENAME = "hackathon-advisor-field-notes.md";
 const CHAPTER_FILENAME = "hackathon-advisor-chapter.md";
 const PNG_EXPORT_LABEL = "PNG";
@@ -51,6 +59,8 @@ let voiceRecorder = null;
 let voiceStream = null;
 let voiceChunks = [];
 let voiceRecordingState = "idle";
 setVoiceRecordingState("idle");
 bootstrap().catch(handleBootstrapError);
@@ -168,6 +178,7 @@ async function runTurn(message) {
   corrections.textContent = "";
   planEl.innerHTML = "";
   delete session.ui_status;
   startTurnWatchdog();
   let completed = false;
@@ -194,6 +205,7 @@ async function runTurn(message) {
     ink.classList.add("bleed");
   } finally {
     clearTurnWatchdog();
     submit.disabled = false;
     setSessionControlsDisabled(false);
     setCommandDisabled(false);
@@ -810,6 +822,26 @@ function handleEvent(event) {
     return;
   }
   if (event.type === "token") {
     markFirstTokenSeen();
     ink.textContent += event.text;
@@ -817,6 +849,9 @@ function handleEvent(event) {
   }
   if (event.type === "done") {
     if (!sawTurnToken) {
       clearTurnWatchdog();
       ink.textContent = event.response || ink.textContent;
@@ -1023,8 +1058,9 @@ function renderWoodMap(map) {
   field.className = "wood";
   for (const dot of map.dots) {
     const marker = document.createElement(dot.url ? "a" : "span");
-    const verdictClass = dot.kind === "idea" && String(dot.verdict || "").startsWith("ECHO") ? "bleed" : "";
-    marker.className = `wood-dot ${dot.kind || "inked"} ${verdictClass}`.trim();
     marker.style.left = `${boundedPercent(dot.x)}%`;
     marker.style.top = `${boundedPercent(dot.y)}%`;
     const radius = Math.max(3, Math.min(10, Number(dot.radius || 4)));
@@ -1168,6 +1204,99 @@ function clearTurnWatchdog() {
   }
 }
 function syncCurrentIdeaGoals() {
   const currentId = session.current_idea_id;
   if (!currentId || !Array.isArray(session.ideas)) return;

 const recordVoiceButton = document.querySelector("#record-voice");
 const uploadVoiceButton = document.querySelector("#upload-voice");
 const voiceFileInput = document.querySelector("#voice-file");
+const turnProgressEl = document.querySelector("#turn-progress");
+const turnStageIconEl = document.querySelector("#turn-stage-icon");
+const turnStageTextEl = document.querySelector("#turn-stage-text");
+const turnTokensEl = document.querySelector("#turn-tokens");
+const turnEtaEl = document.querySelector("#turn-eta");
+const turnBarFillEl = document.querySelector("#turn-bar-fill");
+const toolChipsEl = document.querySelector("#tool-chips");
 const SESSION_STORAGE_KEY = "hackathon-advisor-session-v2";
+const STAGE_ICONS = { planning: "🪶", running_tool: "🔧", writing: "✍️" };
 const FIELD_NOTES_FILENAME = "hackathon-advisor-field-notes.md";
 const CHAPTER_FILENAME = "hackathon-advisor-chapter.md";
 const PNG_EXPORT_LABEL = "PNG";
 let voiceStream = null;
 let voiceChunks = [];
 let voiceRecordingState = "idle";
+let decodeStartedAt = 0;
+let turnProgressTimer = null;
 setVoiceRecordingState("idle");
 bootstrap().catch(handleBootstrapError);
   corrections.textContent = "";
   planEl.innerHTML = "";
   delete session.ui_status;
+  resetTurnProgress();
   startTurnWatchdog();
   let completed = false;
     ink.classList.add("bleed");
   } finally {
     clearTurnWatchdog();
+    hideTurnProgress();
     submit.disabled = false;
     setSessionControlsDisabled(false);
     setCommandDisabled(false);
     return;
   }
+  if (event.type === "stage") {
+    setTurnStage(event.stage, event.label);
+    return;
+  }
+  if (event.type === "model_progress") {
+    renderModelProgress(event.tokens, event.max_tokens);
+    return;
+  }
+  if (event.type === "tool_event") {
+    addToolChip(event);
+    return;
+  }
+  if (event.type === "fallback") {
+    renderComputeFallback(event);
+    return;
+  }
   if (event.type === "token") {
     markFirstTokenSeen();
     ink.textContent += event.text;
   }
   if (event.type === "done") {
+    setTurnBar(100);
+    if (turnEtaEl) turnEtaEl.textContent = "";
+    hideTurnProgress();
     if (!sawTurnToken) {
       clearTurnWatchdog();
       ink.textContent = event.response || ink.textContent;
   field.className = "wood";
   for (const dot of map.dots) {
     const marker = document.createElement(dot.url ? "a" : "span");
+    // Namespace the kind class (wood-idea/wood-echo/wood-inked) so it never collides with the
+    // global .idea/.echo card styles. The "you" dot stays green regardless of verdict.
+    marker.className = `wood-dot wood-${dot.kind || "inked"}`;
     marker.style.left = `${boundedPercent(dot.x)}%`;
     marker.style.top = `${boundedPercent(dot.y)}%`;
     const radius = Math.max(3, Math.min(10, Number(dot.radius || 4)));
   }
 }
+// Coarse overall completion per stage, so the bar always advances even when token-level
+// progress is unknown (e.g. the rules backend, or the fast tool/writing stages).
+const STAGE_PROGRESS = { planning: 8, running_tool: 85, writing: 95 };
+function resetTurnProgress() {
+  if (!turnProgressEl) return;
+  // Stay hidden on submit. Only reveal once the turn is genuinely executing — either real
+  // token decoding starts, or it has been running long enough to be worth a progress bar.
+  // A fast turn finishes before the timer fires, so the bar never flashes.
+  turnProgressEl.hidden = true;
+  decodeStartedAt = 0;
+  if (toolChipsEl) toolChipsEl.innerHTML = "";
+  if (turnTokensEl) turnTokensEl.textContent = "";
+  if (turnEtaEl) turnEtaEl.textContent = "";
+  setTurnBar(4);
+  setTurnStageContent("planning", "Thinking");
+  clearTurnProgressTimer();
+  turnProgressTimer = window.setTimeout(revealTurnProgress, 450);
+}
+function revealTurnProgress() {
+  if (turnProgressEl) turnProgressEl.hidden = false;
+}
+function clearTurnProgressTimer() {
+  if (turnProgressTimer) {
+    window.clearTimeout(turnProgressTimer);
+    turnProgressTimer = null;
+  }
+}
+function hideTurnProgress() {
+  clearTurnProgressTimer();
+  if (turnProgressEl) turnProgressEl.hidden = true;
+}
+function setTurnBar(percent) {
+  if (!turnBarFillEl) return;
+  const clamped = Math.max(0, Math.min(100, percent));
+  turnBarFillEl.style.width = `${clamped}%`;
+}
+function setTurnStageContent(stage, label) {
+  if (turnStageIconEl) turnStageIconEl.textContent = STAGE_ICONS[stage] || "🪶";
+  if (turnStageTextEl) turnStageTextEl.textContent = label || "Thinking";
+  if (stage in STAGE_PROGRESS) setTurnBar(STAGE_PROGRESS[stage]);
+  if (stage && stage !== "planning" && turnEtaEl) turnEtaEl.textContent = "";
+}
+function setTurnStage(stage, label) {
+  clearTurnWatchdog();
+  setTurnStageContent(stage, label);
+}
+function renderModelProgress(tokens, maxTokens) {
+  clearTurnWatchdog();
+  revealTurnProgress(); // real token decoding is unambiguous execution — show it now
+  const count = Number(tokens) || 0;
+  if (turnTokensEl) turnTokensEl.textContent = count ? `· decoded ${count} tokens` : "";
+  if (!count) return;
+  if (!decodeStartedAt) decodeStartedAt = performance.now();
+  const cap = Number(maxTokens) || 0;
+  // Map token decode into the 8%–80% band of the overall bar.
+  if (cap > 0) setTurnBar(8 + Math.min(1, count / cap) * 72);
+  // Estimate remaining time from the live decode rate toward the token cap (an upper bound).
+  const elapsed = (performance.now() - decodeStartedAt) / 1000;
+  if (turnEtaEl && cap > 0 && elapsed > 0.3) {
+    const rate = count / elapsed;
+    const remaining = Math.max(0, cap - count) / Math.max(rate, 0.1);
+    turnEtaEl.textContent = remaining >= 1 ? `~${Math.ceil(remaining)}s left` : "almost done";
+  }
+}
+function addToolChip(event) {
+  if (!toolChipsEl) return;
+  const name = event.name || event.tool || "tool";
+  const chip = document.createElement("span");
+  chip.className = "tool-chip";
+  if (event.summary) chip.title = event.summary;
+  chip.innerHTML = `<span class="tc-name"></span><span class="tc-check">✓</span>`;
+  chip.querySelector(".tc-name").textContent = name;
+  toolChipsEl.append(chip);
+}
+function renderComputeFallback(event) {
+  // Acceleration is automatic; a fallback is informational only (no control to flip).
+  const reason = event.reason || "Running on CPU (slower).";
+  if (turnStageTextEl) turnStageTextEl.textContent = reason;
+  if (corrections) corrections.textContent = reason;
+}
 function syncCurrentIdeaGoals() {
   const currentId = session.current_idea_id;
   if (!currentId || !Array.isArray(session.ideas)) return;

static/index.html CHANGED Viewed

@@ -222,6 +222,19 @@
             <div id="corrections" class="marginalia" aria-live="polite"></div>
             <article class="fate">
               <span id="verdict-stamp" class="verdict-stamp verdict-ready">
                 <span class="seal-dot"></span>

             <div id="corrections" class="marginalia" aria-live="polite"></div>
+            <div id="turn-progress" class="turn-progress" hidden aria-live="polite">
+              <div class="turn-stage">
+                <span id="turn-stage-icon" class="ts-icon">🪶</span>
+                <span id="turn-stage-text" class="ts-text">Thinking</span>
+                <span id="turn-tokens" class="ts-tokens"></span>
+                <span id="turn-eta" class="ts-eta"></span>
+              </div>
+              <div class="turn-bar" role="progressbar" aria-label="Turn progress">
+                <div id="turn-bar-fill" class="turn-bar-fill"></div>
+              </div>
+              <div id="tool-chips" class="tool-chips"></div>
+            </div>
             <article class="fate">
               <span id="verdict-stamp" class="verdict-stamp verdict-ready">
                 <span class="seal-dot"></span>

static/styles.css CHANGED Viewed

@@ -738,30 +738,23 @@ textarea:disabled {
   transition: opacity 0.5s;
 }
-.wood-dot.inked {
   background: rgba(73, 49, 22, 0.34);
 }
-.wood-dot.echo {
   background: var(--oxblood);
   box-shadow: 0 0 0 2px rgba(255, 240, 181, 0.5);
   animation: echo-pulse 2.4s ease-in-out infinite;
 }
-.wood-dot.idea {
   z-index: 2;
   background: var(--leaf);
   box-shadow:
     0 0 0 3px #fff0b5,
-    0 0 20px rgba(47, 107, 65, 0.5);
-}
-.wood-dot.idea.bleed,
-.wood-dot.idea.echo-idea {
-  background: var(--oxblood);
-  box-shadow:
-    0 0 0 3px #fff0b5,
-    0 0 20px rgba(154, 43, 34, 0.5);
 }
 @keyframes echo-pulse {
@@ -1298,3 +1291,114 @@ textarea:disabled {
     transition-duration: 0.001ms !important;
   }
 }

   transition: opacity 0.5s;
 }
+.wood-dot.wood-inked {
   background: rgba(73, 49, 22, 0.34);
 }
+.wood-dot.wood-echo {
   background: var(--oxblood);
   box-shadow: 0 0 0 2px rgba(255, 240, 181, 0.5);
   animation: echo-pulse 2.4s ease-in-out infinite;
 }
+.wood-dot.wood-idea {
   z-index: 2;
+  border-radius: 50%;
   background: var(--leaf);
   box-shadow:
     0 0 0 3px #fff0b5,
+    0 0 20px rgba(47, 107, 65, 0.6);
 }
 @keyframes echo-pulse {
     transition-duration: 0.001ms !important;
   }
 }
+/* Live turn progress (stage + token count + tool chips) */
+.turn-progress {
+  display: flex;
+  flex-direction: column;
+  gap: 0.4rem;
+  margin: 0.2rem 0 0.4rem;
+  padding: 0.5rem 0.7rem;
+  border: 1px solid var(--rule-soft);
+  border-left: 3px solid var(--gold);
+  border-radius: 8px;
+  background: rgba(216, 162, 38, 0.06);
+}
+.turn-stage {
+  display: flex;
+  align-items: center;
+  gap: 0.45rem;
+  font-family: var(--label);
+  font-size: 0.78rem;
+  color: var(--ink-soft);
+}
+.turn-stage .ts-icon {
+  font-size: 0.95rem;
+  line-height: 1;
+  animation: ts-pulse 1.6s ease-in-out infinite;
+}
+.turn-stage .ts-text {
+  font-weight: 600;
+  color: var(--ink);
+}
+.turn-stage .ts-tokens {
+  color: var(--ink-faint);
+  font-variant-numeric: tabular-nums;
+}
+.turn-stage .ts-eta {
+  margin-left: auto;
+  color: var(--ink-faint);
+  font-variant-numeric: tabular-nums;
+}
+.turn-bar {
+  height: 5px;
+  border-radius: 999px;
+  background: var(--rule-soft);
+  overflow: hidden;
+}
+.turn-bar-fill {
+  height: 100%;
+  width: 0%;
+  border-radius: 999px;
+  background: linear-gradient(90deg, var(--gold), var(--gold-2));
+  transition: width 0.3s ease;
+}
+@keyframes ts-pulse {
+  0%,
+  100% {
+    opacity: 0.55;
+    transform: translateY(0);
+  }
+  50% {
+    opacity: 1;
+    transform: translateY(-1px);
+  }
+}
+.tool-chips {
+  display: flex;
+  flex-wrap: wrap;
+  gap: 0.35rem;
+}
+.tool-chips:empty {
+  display: none;
+}
+.tool-chip {
+  display: inline-flex;
+  align-items: center;
+  gap: 0.3rem;
+  font-family: var(--label);
+  font-size: 0.68rem;
+  font-weight: 600;
+  color: var(--leaf);
+  background: rgba(47, 107, 65, 0.1);
+  border: 1px solid rgba(47, 107, 65, 0.28);
+  border-radius: 999px;
+  padding: 0.12rem 0.55rem;
+  animation: chip-in 0.22s ease;
+}
+.tool-chip .tc-check {
+  font-size: 0.66rem;
+}
+@keyframes chip-in {
+  from {
+    opacity: 0;
+    transform: translateY(2px);
+  }
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}

tests/test_agent.py CHANGED Viewed

@@ -291,3 +291,49 @@ def test_planner_score_idea_scores_current_idea() -> None:
     assert scored.score is not None
     assert scored.artifact["title"] == first.artifact["title"]

     assert scored.score is not None
     assert scored.artifact["title"] == first.artifact["title"]
+def test_turn_stream_emits_ordered_progress_events() -> None:
+    index = load_test_index()
+    engine = AdvisorEngine(index)
+    events = list(engine.turn_stream("A local-first archive cartographer for family photos", {}))
+    types = [event["type"] for event in events]
+    assert types[0] == "start"
+    assert types[-1] == "done"
+    assert "token" in types
+    # the planning stage is announced before any tool runs, and tools stream as they execute
+    assert types.index("stage") < types.index("tool_event")
+    tool_events = [event for event in events if event["type"] == "tool_event"]
+    assert [event["name"] for event in tool_events] == ["save_idea", "search_projects", "score_idea"]
+    assert events[-1]["state"]["ideas"]
+def test_turn_stream_done_matches_blocking_turn() -> None:
+    # idea ids are randomly generated, so compare the deterministic surface of the turn.
+    index = load_test_index()
+    streamed = list(AdvisorEngine(index).turn_stream("write bolder and find whitespace", {}))
+    done = next(event for event in streamed if event["type"] == "done")
+    blocking = AdvisorEngine(index).turn("write bolder and find whitespace", {})
+    assert done["response"] == blocking.response
+    assert done["score"] == (blocking.score.to_dict() if blocking.score else None)
+    assert done["plan"] == blocking.plan
+    assert [item["label"] for item in done["whitespace"]] == [
+        item.label for item in blocking.whitespace
+    ]
+    assert [idea["title"] for idea in done["state"]["ideas"]] == [
+        idea["title"] for idea in blocking.state["ideas"]
+    ]
+def test_turn_accepts_injected_resolution() -> None:
+    index = load_test_index()
+    engine = AdvisorEngine(index, planner=StaticPlanner(ToolCall("score_idea", {})))
+    injected = ToolResolution(status="valid", call=ToolCall("list_projects", {"sort": "likes"}), errors=())
+    result = engine.turn("score it", {}, resolution=injected)
+    # the injected list_projects call wins over the planner's score_idea call
+    assert result.state["last_tool_resolution"]["call"]["name"] == "list_projects"

tests/test_app.py CHANGED Viewed

@@ -109,6 +109,38 @@ def test_agent_turn_stream_endpoint_exports_ndjson_events() -> None:
     assert lines[-1]["state"]["ideas"]
 def test_transcribe_audio_endpoint_saves_audio(monkeypatch) -> None:
     captured = {}

     assert lines[-1]["state"]["ideas"]
+def test_agent_turn_stream_streams_stage_and_tool_events() -> None:
+    response = agent_turn_stream(
+        {
+            "message": "A local-first archive cartographer for family photos",
+            "session_json": "{}",
+        }
+    )
+    payload = asyncio.run(_read_streaming_response(response))
+    lines = [json.loads(line) for line in payload.splitlines()]
+    types = [line["type"] for line in lines]
+    assert "stage" in types
+    assert any(line["type"] == "tool_event" and line.get("name") for line in lines)
+    assert types.index("stage") < types.index("token")
+def test_agent_turn_stream_runs_on_cpu_compute() -> None:
+    response = agent_turn_stream(
+        {
+            "message": "A local-first archive cartographer for family photos",
+            "session_json": "{}",
+            "compute": "cpu",
+        }
+    )
+    payload = asyncio.run(_read_streaming_response(response))
+    lines = [json.loads(line) for line in payload.splitlines()]
+    assert lines[0]["type"] == "start"
+    assert lines[-1]["type"] == "done"
+    assert lines[-1]["state"]["ideas"]
 def test_transcribe_audio_endpoint_saves_audio(monkeypatch) -> None:
     captured = {}

tests/test_model_runtime.py CHANGED Viewed

@@ -8,13 +8,26 @@ from hackathon_advisor.model_runtime import (
     render_context,
     runtime_status,
     system_prompt,
     _disable_sampling_generation_defaults,
     _normalize_xml_tool_output,
     _strip_unused_generation_inputs,
 )
 from hackathon_advisor.zerogpu import gpu_task, zero_gpu_duration_seconds, zero_gpu_enabled
 def test_rule_planner_emits_valid_search_call() -> None:
     planner = RuleBasedPlanner()
@@ -81,6 +94,18 @@ def test_rule_planner_keeps_project_words_inside_ideas() -> None:
     assert resolution.call.name == "save_idea"
 def test_rule_planner_splits_explicit_idea_pitch() -> None:
     planner = RuleBasedPlanner()
@@ -215,3 +240,39 @@ def test_model_xml_fragment_is_normalized() -> None:
     output = 'name="save_idea">{"title":"A","pitch":"B"}'
     assert _normalize_xml_tool_output(output) == '<function name="save_idea">{"title":"A","pitch":"B"}</function>'

     render_context,
     runtime_status,
     system_prompt,
+    _best_local_device,
     _disable_sampling_generation_defaults,
     _normalize_xml_tool_output,
+    _resolve_torch_device,
     _strip_unused_generation_inputs,
 )
 from hackathon_advisor.zerogpu import gpu_task, zero_gpu_duration_seconds, zero_gpu_enabled
+class FakeBackends:
+    def __init__(self, mps: bool) -> None:
+        self.mps = type("MPS", (), {"is_available": staticmethod(lambda: mps)})()
+class FakeTorch:
+    def __init__(self, cuda: bool = False, mps: bool = False) -> None:
+        self.cuda = type("CUDA", (), {"is_available": staticmethod(lambda: cuda)})()
+        self.backends = FakeBackends(mps)
 def test_rule_planner_emits_valid_search_call() -> None:
     planner = RuleBasedPlanner()
     assert resolution.call.name == "save_idea"
+def test_rule_planner_does_not_match_commands_inside_idea_words() -> None:
+    planner = RuleBasedPlanner()
+    resolution = planner.plan(
+        "A neighborhood seed swap archive that reminds gardeners when to plant shared seeds",
+        {},
+    )
+    assert resolution.status == "valid"
+    assert resolution.call.name == "save_idea"
 def test_rule_planner_splits_explicit_idea_pitch() -> None:
     planner = RuleBasedPlanner()
     output = 'name="save_idea">{"title":"A","pitch":"B"}'
     assert _normalize_xml_tool_output(output) == '<function name="save_idea">{"title":"A","pitch":"B"}</function>'
+def test_resolve_device_keeps_auto_and_explicit_cpu() -> None:
+    assert _resolve_torch_device("auto", FakeTorch()) == "auto"
+    assert _resolve_torch_device("cpu", FakeTorch(cuda=True, mps=True)) == "cpu"
+def test_resolve_device_prefers_cuda_then_mps_then_cpu(monkeypatch) -> None:
+    monkeypatch.delenv("ADVISOR_ZERO_GPU", raising=False)
+    assert _best_local_device(FakeTorch(cuda=True, mps=True)) == "cuda"
+    assert _best_local_device(FakeTorch(cuda=False, mps=True)) == "mps"
+    assert _best_local_device(FakeTorch(cuda=False, mps=False)) == "cpu"
+    # "local" resolves through the same ladder
+    assert _resolve_torch_device("local", FakeTorch(cuda=False, mps=True)) == "mps"
+def test_resolve_device_unavailable_request_degrades_gracefully(monkeypatch) -> None:
+    monkeypatch.delenv("ADVISOR_ZERO_GPU", raising=False)
+    # asking for cuda on an MPS-only box lands on mps, not a crash
+    assert _resolve_torch_device("cuda", FakeTorch(cuda=False, mps=True)) == "mps"
+def test_resolve_device_skips_cuda_under_zero_gpu(monkeypatch) -> None:
+    # In a ZeroGPU main process there is no local CUDA, and probing it is avoided.
+    monkeypatch.setenv("ADVISOR_ZERO_GPU", "1")
+    assert _best_local_device(FakeTorch(cuda=True, mps=False)) == "cpu"
+def test_runtime_status_reports_configured_device() -> None:
+    planner = MiniCPMTransformersPlanner("openbmb/MiniCPM5-1B", device="local")
+    assert runtime_status(planner).to_dict()["device"] == "local"
+    assert runtime_status(RuleBasedPlanner()).to_dict()["device"] == ""

tests/test_profiling.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import logging
+from hackathon_advisor.profiling import (
+    TurnProfiler,
+    configure_logging,
+    messages_processed,
+    next_message_index,
+    resource_snapshot,
+)
+def _turn_events() -> list[dict]:
+    return [
+        {"type": "start"},
+        {"type": "stage", "stage": "planning"},
+        {"type": "model_progress", "tokens": 5, "max_tokens": 180},
+        {"type": "model_progress", "tokens": 12, "max_tokens": 180},
+        {"type": "stage", "stage": "running_tool"},
+        {"type": "tool_event", "name": "save_idea"},
+        {"type": "tool_event", "name": "score_idea"},
+        {"type": "stage", "stage": "writing"},
+        {"type": "token", "text": "hello "},
+        {"type": "done"},
+    ]
+def test_profiler_observes_tokens_tools_and_stage_durations() -> None:
+    profiler = TurnProfiler(message_index=1, compute="cpu", backend="minicpm-transformers")
+    for event in _turn_events():
+        profiler.observe(event)
+    durations = profiler.durations()
+    assert profiler.tokens == 12
+    assert profiler.tool_count == 2
+    assert profiler.fell_back is False
+    assert set(durations) >= {"total_ms", "decode_ms", "tools_ms", "write_ms"}
+    assert all(value >= 0 for value in durations.values())
+def test_profiler_logs_start_and_summary() -> None:
+    configure_logging()  # the advisor logger does not propagate, so capture it directly
+    logger = logging.getLogger("hackathon_advisor")
+    messages: list[str] = []
+    handler = logging.Handler()
+    handler.emit = lambda record: messages.append(record.getMessage())
+    logger.addHandler(handler)
+    try:
+        profiler = TurnProfiler(message_index=7, compute="gpu", backend="rules", message_chars=42)
+        profiler.log_start()
+        for event in _turn_events():
+            profiler.observe(event)
+        profiler.log_summary()
+        profiler.log_summary()  # idempotent: a second call must not log again
+    finally:
+        logger.removeHandler(handler)
+    summaries = [message for message in messages if "turn #7" in message]
+    assert any("start" in message for message in summaries)
+    assert sum("done" in message for message in summaries) == 1  # log_summary is idempotent
+def test_profiler_marks_fallback() -> None:
+    profiler = TurnProfiler(message_index=2, compute="gpu", backend="minicpm-transformers")
+    profiler.observe({"type": "fallback", "to": "cpu"})
+    assert profiler.fell_back is True
+def test_resource_snapshot_is_best_effort_dict() -> None:
+    snapshot = resource_snapshot()
+    assert isinstance(snapshot, dict)
+    # rss is available on the platforms we run on; never raises regardless.
+    assert "rss_mb" in snapshot
+def test_message_counter_increments() -> None:
+    start = messages_processed()
+    first = next_message_index()
+    second = next_message_index()
+    assert second == first + 1
+    assert messages_processed() >= start + 2

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff