JacobLinCool Codex commited on
Commit
6d9770a
·
verified ·
1 Parent(s): d0718ca

feat: stream advisor progress

Browse files

Co-authored-by: Codex <noreply@openai.com>

app.py CHANGED
@@ -16,6 +16,12 @@ from hackathon_advisor.asr_runtime import create_asr_transcriber
16
  from hackathon_advisor.chapter import build_chapter_markdown
17
  from hackathon_advisor.data import ProjectIndex
18
  from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
 
 
 
 
 
 
19
  from hackathon_advisor.field_notes import build_field_notes_markdown
20
  from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
21
  from hackathon_advisor.lora_training_kit import TRAINING_KIT_FILENAME, build_lora_training_kit_zip
@@ -26,9 +32,10 @@ from hackathon_advisor.submission_packet import build_submission_packet_markdown
26
  from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
27
  from hackathon_advisor.tools import GOALS, goal_profiles
28
  from hackathon_advisor.trace_export import build_trace_jsonl, trace_metadata
29
- from hackathon_advisor.zerogpu import gpu_task
30
 
31
 
 
32
  install_asyncio_cleanup_hook()
33
 
34
  ROOT = Path(__file__).parent
@@ -40,18 +47,33 @@ MAX_AUDIO_UPLOAD_BYTES = 25 * 1024 * 1024
40
  AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".webm"}
41
 
42
  index = ProjectIndex.from_files(DATA_PATH, INDEX_PATH)
43
- engine = AdvisorEngine(index)
 
 
 
44
  voice_transcriber = create_asr_transcriber()
45
  app = Server()
46
 
 
 
47
 
48
  def _json_event(payload: dict) -> str:
49
  return json.dumps(payload, ensure_ascii=False)
50
 
51
 
 
 
 
 
 
 
 
 
 
 
52
  @gpu_task
53
- def _engine_turn(message: str, session: dict[str, Any]):
54
- return engine.turn(message, session)
55
 
56
 
57
  @gpu_task
@@ -72,33 +94,71 @@ def _session_from_payload(payload: dict[str, Any] | None) -> dict[str, Any]:
72
  return _session_from_json(str(payload.get("session_json") or "{}"))
73
 
74
 
75
- def _agent_turn_events(message: str, session_json: str = "{}") -> Iterator[str]:
76
- session = _session_from_json(session_json)
77
- result = _engine_turn(message, session)
78
- yield _json_event(
79
- {
80
- "type": "start",
81
- "corrections": [correction.to_dict() for correction in result.corrections],
82
- "normalized_text": result.normalized_text,
83
- "tool_events": [event.to_dict() for event in result.tool_events],
84
- }
85
- )
86
 
87
- for chunk in result.stream_chunks():
88
- yield _json_event({"type": "token", "text": chunk})
89
 
90
- yield _json_event(
91
- {
92
- "type": "done",
93
- "state": result.state,
94
- "response": result.response,
95
- "projects": [project.to_public_dict() for project in result.projects],
96
- "whitespace": [item.to_dict() for item in result.whitespace],
97
- "score": result.score.to_dict() if result.score else None,
98
- "plan": result.plan,
99
- "artifact": result.artifact,
100
- }
101
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
 
104
  @app.get("/", response_class=HTMLResponse)
@@ -197,14 +257,20 @@ def agent_turn_stream(payload: dict[str, Any] | None = Body(default=None)) -> St
197
  payload = payload or {}
198
  message = str(payload.get("message") or "")
199
  session_json = str(payload.get("session_json") or "{}")
 
200
 
201
  def stream() -> Iterator[str]:
202
- for event in _agent_turn_events(message, session_json):
203
  yield f"{event}\n"
204
 
205
  return StreamingResponse(stream(), media_type="application/x-ndjson")
206
 
207
 
 
 
 
 
 
208
  @app.post("/api/transcribe")
209
  async def transcribe_audio(audio: UploadFile = File(...)) -> dict[str, Any]:
210
  content_type = str(audio.content_type or "")
@@ -347,8 +413,8 @@ def submission_packet_artifact(session_json: str = "{}") -> str:
347
 
348
 
349
  @app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
350
- def agent_turn(message: str, session_json: str = "{}") -> Iterator[str]:
351
- yield from _agent_turn_events(message, session_json)
352
 
353
 
354
  if __name__ == "__main__":
 
16
  from hackathon_advisor.chapter import build_chapter_markdown
17
  from hackathon_advisor.data import ProjectIndex
18
  from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
19
+ from hackathon_advisor.model_runtime import create_tool_planner
20
+ from hackathon_advisor.profiling import (
21
+ TurnProfiler,
22
+ configure_logging,
23
+ next_message_index,
24
+ )
25
  from hackathon_advisor.field_notes import build_field_notes_markdown
26
  from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
27
  from hackathon_advisor.lora_training_kit import TRAINING_KIT_FILENAME, build_lora_training_kit_zip
 
32
  from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
33
  from hackathon_advisor.tools import GOALS, goal_profiles
34
  from hackathon_advisor.trace_export import build_trace_jsonl, trace_metadata
35
+ from hackathon_advisor.zerogpu import gpu_task, is_gpu_quota_error, zero_gpu_enabled
36
 
37
 
38
+ configure_logging()
39
  install_asyncio_cleanup_hook()
40
 
41
  ROOT = Path(__file__).parent
 
47
  AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".webm"}
48
 
49
  index = ProjectIndex.from_files(DATA_PATH, INDEX_PATH)
50
+ # Acceleration is automatic: on a ZeroGPU Space the GPU path uses accelerate device_map inside
51
+ # the @spaces.GPU fork; locally the device resolves CUDA -> Apple MPS -> CPU. CPU is only used
52
+ # as an explicit override or a quota fallback.
53
+ engine = AdvisorEngine(index, create_tool_planner(device="auto" if zero_gpu_enabled() else "local"))
54
  voice_transcriber = create_asr_transcriber()
55
  app = Server()
56
 
57
+ _cpu_engine: AdvisorEngine | None = None
58
+
59
 
60
  def _json_event(payload: dict) -> str:
61
  return json.dumps(payload, ensure_ascii=False)
62
 
63
 
64
+ def _cpu_engine_instance() -> AdvisorEngine:
65
+ """A CPU-pinned advisor engine used for the explicit CPU override and for the automatic
66
+ fallback when a ZeroGPU allocation is denied. Loaded lazily so the CPU model only enters
67
+ memory when CPU is actually used."""
68
+ global _cpu_engine
69
+ if _cpu_engine is None:
70
+ _cpu_engine = AdvisorEngine(index, create_tool_planner(device="cpu"))
71
+ return _cpu_engine
72
+
73
+
74
  @gpu_task
75
+ def _engine_turn_stream_gpu(message: str, session: dict[str, Any]) -> Iterator[dict[str, Any]]:
76
+ yield from engine.turn_stream(message, session)
77
 
78
 
79
  @gpu_task
 
94
  return _session_from_json(str(payload.get("session_json") or "{}"))
95
 
96
 
97
+ def _primary_turn_stream(message: str, session: dict[str, Any]) -> Iterator[dict[str, Any]]:
98
+ if zero_gpu_enabled():
99
+ yield from _engine_turn_stream_gpu(message, session)
100
+ else:
101
+ yield from engine.turn_stream(message, session)
 
 
 
 
 
 
102
 
 
 
103
 
104
+ def _agent_turn_events(
105
+ message: str,
106
+ session_json: str = "{}",
107
+ compute: str = "gpu",
108
+ ) -> Iterator[str]:
109
+ profiler = TurnProfiler(
110
+ message_index=next_message_index(),
111
+ compute=compute,
112
+ backend=str(engine.runtime_status().get("backend", "")),
113
+ message_chars=len(message),
 
114
  )
115
+ profiler.log_start()
116
+ try:
117
+ for event in _profiled_turn_events(message, session_json, compute):
118
+ profiler.observe(event)
119
+ yield _json_event(event)
120
+ profiler.device = _active_device(compute)
121
+ profiler.log_summary()
122
+ except Exception as error: # noqa: BLE001 - log timing/resources even when a turn fails
123
+ profiler.device = _active_device(compute)
124
+ profiler.log_summary(error)
125
+ raise
126
+
127
+
128
+ def _active_device(compute: str) -> str:
129
+ """The torch device the turn actually resolved to (e.g. mps/cuda/cpu), read after the run
130
+ so the lazy model has reported its resolved device."""
131
+ active = _cpu_engine if compute == "cpu" else engine
132
+ try:
133
+ return str(active.runtime_status().get("device", "")) if active is not None else ""
134
+ except Exception: # noqa: BLE001 - profiling must never break a turn
135
+ return ""
136
+
137
+
138
+ def _profiled_turn_events(
139
+ message: str,
140
+ session_json: str,
141
+ compute: str,
142
+ ) -> Iterator[dict[str, Any]]:
143
+ session = _session_from_json(session_json)
144
+ if compute != "cpu":
145
+ produced = False
146
+ try:
147
+ for event in _primary_turn_stream(message, session):
148
+ produced = True
149
+ yield event
150
+ return
151
+ except Exception as error: # noqa: BLE001 - fall back to local on a clean quota failure
152
+ if produced or not is_gpu_quota_error(error):
153
+ raise
154
+ yield {
155
+ "type": "fallback",
156
+ "to": "cpu",
157
+ "reason": "ZeroGPU quota reached — running this turn locally (slower).",
158
+ }
159
+
160
+ for event in _cpu_engine_instance().turn_stream(message, session):
161
+ yield event
162
 
163
 
164
  @app.get("/", response_class=HTMLResponse)
 
257
  payload = payload or {}
258
  message = str(payload.get("message") or "")
259
  session_json = str(payload.get("session_json") or "{}")
260
+ compute = _normalize_compute(payload.get("compute"))
261
 
262
  def stream() -> Iterator[str]:
263
+ for event in _agent_turn_events(message, session_json, compute):
264
  yield f"{event}\n"
265
 
266
  return StreamingResponse(stream(), media_type="application/x-ndjson")
267
 
268
 
269
+ def _normalize_compute(value: Any) -> str:
270
+ # Acceleration is automatic; "cpu" is the only manual override (not surfaced in the UI).
271
+ return "cpu" if str(value or "").strip().lower() == "cpu" else "gpu"
272
+
273
+
274
  @app.post("/api/transcribe")
275
  async def transcribe_audio(audio: UploadFile = File(...)) -> dict[str, Any]:
276
  content_type = str(audio.content_type or "")
 
413
 
414
 
415
  @app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
416
+ def agent_turn(message: str, session_json: str = "{}", compute: str = "gpu") -> Iterator[str]:
417
+ yield from _agent_turn_events(message, session_json, _normalize_compute(compute))
418
 
419
 
420
  if __name__ == "__main__":
hackathon_advisor/agent.py CHANGED
@@ -1,5 +1,6 @@
1
  from __future__ import annotations
2
 
 
3
  from dataclasses import dataclass
4
  from dataclasses import replace
5
  from typing import Any
@@ -8,7 +9,7 @@ from hackathon_advisor.aliases import Correction, normalize_text
8
  from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
9
  from hackathon_advisor.model_runtime import ToolPlanner, create_tool_planner, runtime_status
10
  from hackathon_advisor.scoring import ScoreCard
11
- from hackathon_advisor.tool_contracts import ToolCall
12
  from hackathon_advisor.tools import (
13
  GOALS,
14
  AdvisorTools,
@@ -58,13 +59,20 @@ class AdvisorEngine:
58
  def runtime_status(self) -> dict[str, Any]:
59
  return runtime_status(self.planner).to_dict()
60
 
61
- def turn(self, message: str, state: dict[str, Any] | None = None) -> TurnResult:
 
 
 
 
 
 
62
  state = dict(state or {})
63
  state.setdefault("ideas", [])
64
  state.setdefault("profile", {})
65
  state.setdefault("goals", GOALS[:3])
66
  normalized, corrections = normalize_text(message)
67
- resolution = self.planner.plan(normalized, state)
 
68
  state["last_tool_resolution"] = resolution.to_dict()
69
  tool_events: list[ToolEvent] = []
70
  projects: list[Project] = []
@@ -134,6 +142,52 @@ class AdvisorEngine:
134
 
135
  return self._idea_research_turn(call, normalized, corrections, state, tool_events)
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  def _result(
138
  self,
139
  normalized_text: str,
 
1
  from __future__ import annotations
2
 
3
+ from collections.abc import Iterator
4
  from dataclasses import dataclass
5
  from dataclasses import replace
6
  from typing import Any
 
9
  from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
10
  from hackathon_advisor.model_runtime import ToolPlanner, create_tool_planner, runtime_status
11
  from hackathon_advisor.scoring import ScoreCard
12
+ from hackathon_advisor.tool_contracts import ToolCall, ToolResolution
13
  from hackathon_advisor.tools import (
14
  GOALS,
15
  AdvisorTools,
 
59
  def runtime_status(self) -> dict[str, Any]:
60
  return runtime_status(self.planner).to_dict()
61
 
62
+ def turn(
63
+ self,
64
+ message: str,
65
+ state: dict[str, Any] | None = None,
66
+ *,
67
+ resolution: ToolResolution | None = None,
68
+ ) -> TurnResult:
69
  state = dict(state or {})
70
  state.setdefault("ideas", [])
71
  state.setdefault("profile", {})
72
  state.setdefault("goals", GOALS[:3])
73
  normalized, corrections = normalize_text(message)
74
+ if resolution is None:
75
+ resolution = self.planner.plan(normalized, state)
76
  state["last_tool_resolution"] = resolution.to_dict()
77
  tool_events: list[ToolEvent] = []
78
  projects: list[Project] = []
 
142
 
143
  return self._idea_research_turn(call, normalized, corrections, state, tool_events)
144
 
145
+ def turn_stream(self, message: str, state: dict[str, Any] | None = None) -> Iterator[dict[str, Any]]:
146
+ """Run a turn while yielding plain-dict progress events, so a caller can stream the
147
+ real work (tool-call decoding, tool execution, response) instead of replaying a
148
+ finished string. Every yielded value is JSON-serializable so it can cross a ZeroGPU
149
+ process boundary."""
150
+ state = dict(state or {})
151
+ normalized, corrections = normalize_text(message)
152
+ yield {
153
+ "type": "start",
154
+ "corrections": [correction.to_dict() for correction in corrections],
155
+ "normalized_text": normalized,
156
+ }
157
+ yield {"type": "stage", "stage": "planning", "label": "Choosing the next move"}
158
+
159
+ resolution: ToolResolution | None = None
160
+ for event in self.planner.plan_iter(normalized, state):
161
+ if event.get("type") == "resolved":
162
+ resolution = event["resolution"]
163
+ else:
164
+ yield event
165
+ tool_name = resolution.call.name if resolution is not None else ""
166
+ yield {
167
+ "type": "stage",
168
+ "stage": "running_tool",
169
+ "tool": tool_name,
170
+ "label": f"Calling {tool_name}" if tool_name else "Running tools",
171
+ }
172
+
173
+ result = self.turn(normalized, state, resolution=resolution)
174
+ for event in result.tool_events:
175
+ yield {"type": "tool_event", **event.to_dict()}
176
+
177
+ yield {"type": "stage", "stage": "writing", "label": "Writing the page"}
178
+ for chunk in result.stream_chunks():
179
+ yield {"type": "token", "text": chunk}
180
+ yield {
181
+ "type": "done",
182
+ "state": result.state,
183
+ "response": result.response,
184
+ "projects": [project.to_public_dict() for project in result.projects],
185
+ "whitespace": [item.to_dict() for item in result.whitespace],
186
+ "score": result.score.to_dict() if result.score else None,
187
+ "plan": result.plan,
188
+ "artifact": result.artifact,
189
+ }
190
+
191
  def _result(
192
  self,
193
  normalized_text: str,
hackathon_advisor/asr_runtime.py CHANGED
@@ -1,6 +1,7 @@
1
  from __future__ import annotations
2
 
3
  from dataclasses import dataclass
 
4
  import os
5
  from pathlib import Path
6
  import shutil
@@ -12,6 +13,10 @@ from typing import Any
12
  DEFAULT_ASR_MODEL_ID = "nvidia/nemotron-speech-streaming-en-0.6b"
13
  DEFAULT_ASR_BACKEND = "nemo-asr"
14
  DEFAULT_ASR_SAMPLE_RATE = 16_000
 
 
 
 
15
 
16
 
17
  @dataclass(frozen=True)
@@ -47,22 +52,32 @@ class AsrStatus:
47
 
48
 
49
  class NemotronAsrTranscriber:
 
 
 
 
 
 
50
  backend = DEFAULT_ASR_BACKEND
51
 
52
  def __init__(
53
  self,
54
  model_id: str = DEFAULT_ASR_MODEL_ID,
55
  sample_rate: int = DEFAULT_ASR_SAMPLE_RATE,
 
56
  ) -> None:
57
  self.model_id = model_id.strip() or DEFAULT_ASR_MODEL_ID
58
  self.sample_rate = sample_rate
59
- self._model = None
 
 
 
60
 
61
  def status(self) -> AsrStatus:
62
  return AsrStatus(
63
- backend=self.backend,
64
- model_id=self.model_id,
65
- loaded=self._model is not None,
66
  sample_rate=self.sample_rate,
67
  )
68
 
@@ -71,23 +86,41 @@ class NemotronAsrTranscriber:
71
  if not source.is_file():
72
  raise RuntimeError("Voice note was not saved before transcription.")
73
  self._ensure_loaded()
 
74
  with tempfile.TemporaryDirectory(prefix="advisor-asr-") as directory:
75
  wav_path = Path(directory) / "voice.wav"
76
  normalize_audio_for_asr(source, wav_path, self.sample_rate)
77
- outputs = self._model.transcribe([str(wav_path)], batch_size=1)
78
- transcript = extract_transcript(outputs).strip()
 
 
 
79
  if not transcript:
80
- raise RuntimeError("Nemotron ASR returned an empty transcript.")
81
  return AsrTranscript(
82
  transcript=transcript,
83
- model_id=self.model_id,
84
- backend=self.backend,
85
  sample_rate=self.sample_rate,
86
  )
87
 
88
  def _ensure_loaded(self) -> None:
89
- if self._model is not None:
 
 
 
 
90
  return
 
 
 
 
 
 
 
 
 
 
91
  try:
92
  import torch
93
  import nemo.collections.asr as nemo_asr
@@ -97,12 +130,33 @@ class NemotronAsrTranscriber:
97
  "before enabling voice transcription."
98
  ) from error
99
  model = nemo_asr.models.ASRModel.from_pretrained(model_name=self.model_id)
100
- device = os.environ.get("ADVISOR_ASR_DEVICE", "").strip()
101
- if not device:
102
- device = "cuda" if torch.cuda.is_available() else "cpu"
103
  model.to(device)
104
  model.eval()
105
- self._model = model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
 
108
  def create_asr_transcriber() -> NemotronAsrTranscriber:
@@ -112,9 +166,54 @@ def create_asr_transcriber() -> NemotronAsrTranscriber:
112
  return NemotronAsrTranscriber(
113
  model_id=os.environ.get("ADVISOR_ASR_MODEL_ID", DEFAULT_ASR_MODEL_ID),
114
  sample_rate=sample_rate,
 
115
  )
116
 
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def normalize_audio_for_asr(source: Path, target: Path, sample_rate: int = DEFAULT_ASR_SAMPLE_RATE) -> None:
119
  ffmpeg = shutil.which("ffmpeg")
120
  if not ffmpeg:
 
1
  from __future__ import annotations
2
 
3
  from dataclasses import dataclass
4
+ import logging
5
  import os
6
  from pathlib import Path
7
  import shutil
 
13
  DEFAULT_ASR_MODEL_ID = "nvidia/nemotron-speech-streaming-en-0.6b"
14
  DEFAULT_ASR_BACKEND = "nemo-asr"
15
  DEFAULT_ASR_SAMPLE_RATE = 16_000
16
+ DEFAULT_WHISPER_MODEL_ID = "openai/whisper-small.en"
17
+ WHISPER_BACKEND = "whisper-transformers"
18
+
19
+ _logger = logging.getLogger("hackathon_advisor")
20
 
21
 
22
  @dataclass(frozen=True)
 
52
 
53
 
54
  class NemotronAsrTranscriber:
55
+ """Nemotron voice input. Its declared identity (status, model id) is the deployed Space
56
+ backend — NVIDIA NeMo ASR. When NeMo is not installed (e.g. local development on a Mac,
57
+ where NeMo does not install cleanly), transcription transparently falls back to a local
58
+ Whisper model through transformers so voice still works; the returned transcript reports
59
+ whichever engine actually ran."""
60
+
61
  backend = DEFAULT_ASR_BACKEND
62
 
63
  def __init__(
64
  self,
65
  model_id: str = DEFAULT_ASR_MODEL_ID,
66
  sample_rate: int = DEFAULT_ASR_SAMPLE_RATE,
67
+ whisper_model_id: str = DEFAULT_WHISPER_MODEL_ID,
68
  ) -> None:
69
  self.model_id = model_id.strip() or DEFAULT_ASR_MODEL_ID
70
  self.sample_rate = sample_rate
71
+ self.whisper_model_id = whisper_model_id.strip() or DEFAULT_WHISPER_MODEL_ID
72
+ self._engine: tuple[str, Any] | None = None
73
+ self._active_backend = ""
74
+ self._active_model_id = ""
75
 
76
  def status(self) -> AsrStatus:
77
  return AsrStatus(
78
+ backend=self._active_backend or self.backend,
79
+ model_id=self._active_model_id or self.model_id,
80
+ loaded=self._engine is not None,
81
  sample_rate=self.sample_rate,
82
  )
83
 
 
86
  if not source.is_file():
87
  raise RuntimeError("Voice note was not saved before transcription.")
88
  self._ensure_loaded()
89
+ kind, engine = self._engine # type: ignore[misc]
90
  with tempfile.TemporaryDirectory(prefix="advisor-asr-") as directory:
91
  wav_path = Path(directory) / "voice.wav"
92
  normalize_audio_for_asr(source, wav_path, self.sample_rate)
93
+ if kind == "nemo":
94
+ outputs = engine.transcribe([str(wav_path)], batch_size=1)
95
+ transcript = extract_transcript(outputs).strip()
96
+ else:
97
+ transcript = _whisper_transcribe(engine, wav_path, self.sample_rate).strip()
98
  if not transcript:
99
+ raise RuntimeError(f"{self._active_backend or self.backend} returned an empty transcript.")
100
  return AsrTranscript(
101
  transcript=transcript,
102
+ model_id=self._active_model_id or self.model_id,
103
+ backend=self._active_backend or self.backend,
104
  sample_rate=self.sample_rate,
105
  )
106
 
107
  def _ensure_loaded(self) -> None:
108
+ if self._engine is not None:
109
+ return
110
+ preference = os.environ.get("ADVISOR_ASR_BACKEND", "auto").strip().lower()
111
+ if preference in ("whisper", WHISPER_BACKEND):
112
+ self._load_whisper()
113
  return
114
+ try:
115
+ self._load_nemo()
116
+ return
117
+ except RuntimeError:
118
+ if preference in ("nemo", "nemo-asr", "nemotron"):
119
+ raise # explicit Nemotron request: do not silently fall back
120
+ _logger.warning("NeMo ASR unavailable; falling back to local Whisper (%s).", self.whisper_model_id)
121
+ self._load_whisper()
122
+
123
+ def _load_nemo(self) -> None:
124
  try:
125
  import torch
126
  import nemo.collections.asr as nemo_asr
 
130
  "before enabling voice transcription."
131
  ) from error
132
  model = nemo_asr.models.ASRModel.from_pretrained(model_name=self.model_id)
133
+ device = os.environ.get("ADVISOR_ASR_DEVICE", "").strip() or ("cuda" if torch.cuda.is_available() else "cpu")
 
 
134
  model.to(device)
135
  model.eval()
136
+ self._engine = ("nemo", model)
137
+ self._active_backend = self.backend
138
+ self._active_model_id = self.model_id
139
+
140
+ def _load_whisper(self) -> None:
141
+ try:
142
+ import torch
143
+ from transformers import WhisperForConditionalGeneration, WhisperProcessor
144
+ except ImportError as error:
145
+ raise RuntimeError(
146
+ "Local voice fallback requires transformers and torch. Install runtime "
147
+ "requirements before enabling voice transcription."
148
+ ) from error
149
+ device = _resolve_asr_device(torch)
150
+ if device == "mps":
151
+ os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
152
+ processor = WhisperProcessor.from_pretrained(self.whisper_model_id)
153
+ model = WhisperForConditionalGeneration.from_pretrained(self.whisper_model_id)
154
+ model.to(device)
155
+ model.eval()
156
+ self._engine = ("whisper", (processor, model))
157
+ self._active_backend = WHISPER_BACKEND
158
+ self._active_model_id = self.whisper_model_id
159
+ _logger.info("Whisper ASR loaded | model=%s device=%s", self.whisper_model_id, device)
160
 
161
 
162
  def create_asr_transcriber() -> NemotronAsrTranscriber:
 
166
  return NemotronAsrTranscriber(
167
  model_id=os.environ.get("ADVISOR_ASR_MODEL_ID", DEFAULT_ASR_MODEL_ID),
168
  sample_rate=sample_rate,
169
+ whisper_model_id=os.environ.get("ADVISOR_ASR_WHISPER_MODEL", DEFAULT_WHISPER_MODEL_ID),
170
  )
171
 
172
 
173
+ def _resolve_asr_device(torch: Any) -> str:
174
+ forced = os.environ.get("ADVISOR_ASR_DEVICE", "").strip().lower()
175
+ if forced:
176
+ return forced
177
+ try:
178
+ if torch.cuda.is_available():
179
+ return "cuda"
180
+ except Exception: # pragma: no cover - device dependent
181
+ pass
182
+ try:
183
+ if torch.backends.mps.is_available():
184
+ return "mps"
185
+ except Exception: # pragma: no cover - device dependent
186
+ pass
187
+ return "cpu"
188
+
189
+
190
+ def _whisper_transcribe(engine: tuple[Any, Any], wav_path: Path, sample_rate: int) -> str:
191
+ import torch
192
+
193
+ processor, model = engine
194
+ audio = _read_wav_mono_float32(wav_path)
195
+ inputs = processor(audio, sampling_rate=sample_rate, return_tensors="pt")
196
+ features = inputs.input_features.to(model.device)
197
+ with torch.inference_mode():
198
+ generated = model.generate(features, max_new_tokens=128)
199
+ decoded = processor.batch_decode(generated, skip_special_tokens=True)
200
+ return decoded[0] if decoded else ""
201
+
202
+
203
+ def _read_wav_mono_float32(wav_path: Path) -> Any:
204
+ import wave
205
+
206
+ import numpy as np
207
+
208
+ with wave.open(str(wav_path), "rb") as wav:
209
+ channels = wav.getnchannels()
210
+ frames = wav.readframes(wav.getnframes())
211
+ audio = np.frombuffer(frames, dtype=np.int16).astype(np.float32) / 32768.0
212
+ if channels > 1:
213
+ audio = audio.reshape(-1, channels).mean(axis=1)
214
+ return audio
215
+
216
+
217
  def normalize_audio_for_asr(source: Path, target: Path, sample_rate: int = DEFAULT_ASR_SAMPLE_RATE) -> None:
218
  ffmpeg = shutil.which("ffmpeg")
219
  if not ffmpeg:
hackathon_advisor/data.py CHANGED
@@ -344,6 +344,15 @@ class ProjectIndex:
344
  tuple(float(value) for value in document["vector"])
345
  for document in index_payload["documents"]
346
  ]
 
 
 
 
 
 
 
 
 
347
 
348
  @classmethod
349
  def from_file(cls, path: Path, query_embedder: EmbeddingFunction | None = None) -> "ProjectIndex":
 
344
  tuple(float(value) for value in document["vector"])
345
  for document in index_payload["documents"]
346
  ]
347
+ self._vector_by_id = {
348
+ project.id: vector for project, vector in zip(self.projects, self._vectors)
349
+ }
350
+
351
+ def vector_for(self, project_id: str) -> tuple[float, ...] | None:
352
+ return self._vector_by_id.get(project_id)
353
+
354
+ def embed_query(self, text: str) -> tuple[float, ...]:
355
+ return tuple(normalize_vector(self._embed_query(text)))
356
 
357
  @classmethod
358
  def from_file(cls, path: Path, query_embedder: EmbeddingFunction | None = None) -> "ProjectIndex":
hackathon_advisor/model_runtime.py CHANGED
@@ -1,18 +1,25 @@
1
  from __future__ import annotations
2
 
 
3
  from contextlib import nullcontext
4
  from dataclasses import dataclass
 
5
  import os
6
  import re
 
7
  from typing import Any, Protocol
8
 
9
  from hackathon_advisor.tools import idea_from_text
10
  from hackathon_advisor.tool_contracts import ToolResolution, resolve_tool_call, tool_schemas
 
 
 
11
 
12
 
13
  DEFAULT_MODEL_ID = "openbmb/MiniCPM5-1B"
14
  DEFAULT_ADAPTER_ID = "build-small-hackathon/hackathon-advisor-minicpm5-lora"
15
  DEFAULT_BACKEND = "rules"
 
16
 
17
 
18
  class ToolPlanner(Protocol):
@@ -24,6 +31,11 @@ class ToolPlanner(Protocol):
24
  def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
25
  ...
26
 
 
 
 
 
 
27
 
28
  @dataclass(frozen=True)
29
  class RuntimeStatus:
@@ -33,6 +45,7 @@ class RuntimeStatus:
33
  adapter_revision: str
34
  loaded: bool
35
  tool_count: int
 
36
 
37
  def to_dict(self) -> dict[str, Any]:
38
  return {
@@ -42,6 +55,7 @@ class RuntimeStatus:
42
  "adapter_revision": self.adapter_revision,
43
  "loaded": self.loaded,
44
  "tool_count": self.tool_count,
 
45
  }
46
 
47
 
@@ -61,13 +75,13 @@ class RuleBasedPlanner:
61
  output = '<function name="list_projects">{"sort":"likes"}</function>'
62
  elif project_id:
63
  output = f'<function name="get_project">{{"id":{_json_string(project_id)}}}</function>'
64
- elif any(term in lower for term in ("compare", "choose", "rank")):
65
  output = '<function name="compare_ideas">{}</function>'
66
- elif any(term in lower for term in ("plan", "roadmap", "next step", "milestone")):
67
  output = '<function name="make_plan">{}</function>'
68
- elif any(term in lower for term in ("whitespace", "original", "new", "bolder", "unwritten", "gap")):
69
  output = '<function name="find_whitespace">{}</function>'
70
- elif any(term in lower for term in ("search", "similar", "already", "existing", "overlap", "echo")):
71
  output = f'<function name="search_projects">{{"query":{_json_string(text)}}}</function>'
72
  else:
73
  title, pitch = idea_from_text(text)
@@ -78,6 +92,9 @@ class RuleBasedPlanner:
78
  )
79
  return resolve_tool_call(output, fallback_query=text)
80
 
 
 
 
81
 
82
  class MiniCPMTransformersPlanner:
83
  backend = "minicpm-transformers"
@@ -87,19 +104,34 @@ class MiniCPMTransformersPlanner:
87
  model_id: str = DEFAULT_MODEL_ID,
88
  adapter_id: str = "",
89
  adapter_revision: str = "",
 
90
  ) -> None:
91
  self.model_id = model_id.strip() or DEFAULT_MODEL_ID
92
  self.adapter_id = adapter_id.strip()
93
  self.adapter_revision = adapter_revision.strip()
 
 
94
  self._tokenizer = None
95
  self._model = None
96
  self._inference_mode = None
97
 
98
  def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
 
 
 
 
 
 
 
 
99
  self._ensure_loaded()
100
  prompt = render_context(message, state)
101
- output = self._generate_tool_call(prompt)
102
- return resolve_tool_call(output, fallback_query=message)
 
 
 
 
103
 
104
  def _ensure_loaded(self) -> None:
105
  if self._model is not None and self._tokenizer is not None:
@@ -121,26 +153,60 @@ class MiniCPMTransformersPlanner:
121
  adapter_config = PeftConfig.from_pretrained(self.adapter_id, **adapter_kwargs)
122
  base_model_id = str(adapter_config.base_model_name_or_path or base_model_id)
123
 
 
 
 
124
  self._tokenizer = AutoTokenizer.from_pretrained(
125
  tokenizer_id,
126
  trust_remote_code=True,
127
  **(adapter_kwargs if self.adapter_id else {}),
128
  )
129
- model = AutoModelForCausalLM.from_pretrained(
130
- base_model_id,
131
- dtype="auto",
132
- device_map="auto",
133
- trust_remote_code=True,
134
  )
135
  if self.adapter_id:
136
  model = PeftModel.from_pretrained(model, self.adapter_id, **adapter_kwargs)
 
 
137
  model.eval()
138
  _disable_sampling_generation_defaults(model)
139
  self._model = model
140
  if hasattr(torch, "inference_mode"):
141
  self._inference_mode = torch.inference_mode
 
 
 
 
 
 
142
 
143
- def _generate_tool_call(self, prompt: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  assert self._tokenizer is not None
145
  assert self._model is not None
146
  messages = [
@@ -156,19 +222,88 @@ class MiniCPMTransformersPlanner:
156
  return_tensors="pt",
157
  ).to(next(self._model.parameters()).device)
158
  _strip_unused_generation_inputs(inputs)
159
- context = self._inference_mode() if self._inference_mode is not None else nullcontext()
160
- with context:
161
- generated = self._model.generate(
162
- **inputs,
163
- max_new_tokens=180,
164
- do_sample=False,
165
- )
166
- new_tokens = generated[:, inputs["input_ids"].shape[-1] :]
167
- decoded = self._tokenizer.decode(new_tokens[0], skip_special_tokens=True).strip()
168
- return _normalize_xml_tool_output(decoded)
169
 
 
 
170
 
171
- def create_tool_planner() -> ToolPlanner:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  backend = os.environ.get("ADVISOR_MODEL_BACKEND", DEFAULT_BACKEND).strip().lower()
173
  if backend in ("", "rules"):
174
  return RuleBasedPlanner()
@@ -177,11 +312,13 @@ def create_tool_planner() -> ToolPlanner:
177
  os.environ.get("ADVISOR_MODEL_ID", DEFAULT_MODEL_ID),
178
  os.environ.get("ADVISOR_ADAPTER_ID", ""),
179
  os.environ.get("ADVISOR_ADAPTER_REVISION", ""),
 
180
  )
181
  raise RuntimeError(f"Unsupported ADVISOR_MODEL_BACKEND={backend!r}")
182
 
183
 
184
  def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
 
185
  return RuntimeStatus(
186
  backend=planner.backend,
187
  model_id=planner.model_id,
@@ -189,6 +326,7 @@ def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
189
  adapter_revision=planner.adapter_revision,
190
  loaded=not isinstance(planner, MiniCPMTransformersPlanner) or planner._model is not None,
191
  tool_count=len(tool_schemas()),
 
192
  )
193
 
194
 
@@ -274,6 +412,13 @@ def _wants_project_list(lower_text: str) -> bool:
274
  return lower_text in exact_phrases or any(lower_text.startswith(prefix) for prefix in command_prefixes)
275
 
276
 
 
 
 
 
 
 
 
277
  def _project_reference_id(text: str) -> str:
278
  prefixes = (
279
  "read project ",
 
1
  from __future__ import annotations
2
 
3
+ from collections.abc import Iterator
4
  from contextlib import nullcontext
5
  from dataclasses import dataclass
6
+ import logging
7
  import os
8
  import re
9
+ import threading
10
  from typing import Any, Protocol
11
 
12
  from hackathon_advisor.tools import idea_from_text
13
  from hackathon_advisor.tool_contracts import ToolResolution, resolve_tool_call, tool_schemas
14
+ from hackathon_advisor.zerogpu import zero_gpu_enabled
15
+
16
+ _logger = logging.getLogger("hackathon_advisor")
17
 
18
 
19
  DEFAULT_MODEL_ID = "openbmb/MiniCPM5-1B"
20
  DEFAULT_ADAPTER_ID = "build-small-hackathon/hackathon-advisor-minicpm5-lora"
21
  DEFAULT_BACKEND = "rules"
22
+ MAX_TOOL_CALL_TOKENS = 180
23
 
24
 
25
  class ToolPlanner(Protocol):
 
31
  def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
32
  ...
33
 
34
+ def plan_iter(self, message: str, state: dict[str, Any]) -> Iterator[dict[str, Any]]:
35
+ """Yield {"type": "model_progress", "tokens": int} events while planning, then a
36
+ final {"type": "resolved", "resolution": ToolResolution} event."""
37
+ ...
38
+
39
 
40
  @dataclass(frozen=True)
41
  class RuntimeStatus:
 
45
  adapter_revision: str
46
  loaded: bool
47
  tool_count: int
48
+ device: str = ""
49
 
50
  def to_dict(self) -> dict[str, Any]:
51
  return {
 
55
  "adapter_revision": self.adapter_revision,
56
  "loaded": self.loaded,
57
  "tool_count": self.tool_count,
58
+ "device": self.device,
59
  }
60
 
61
 
 
75
  output = '<function name="list_projects">{"sort":"likes"}</function>'
76
  elif project_id:
77
  output = f'<function name="get_project">{{"id":{_json_string(project_id)}}}</function>'
78
+ elif _has_command_term(lower, ("compare", "choose", "rank")):
79
  output = '<function name="compare_ideas">{}</function>'
80
+ elif _has_command_term(lower, ("plan", "roadmap", "next step", "milestone")):
81
  output = '<function name="make_plan">{}</function>'
82
+ elif _has_command_term(lower, ("whitespace", "original", "new", "bolder", "unwritten", "gap")):
83
  output = '<function name="find_whitespace">{}</function>'
84
+ elif _has_command_term(lower, ("search", "similar", "already", "existing", "overlap", "echo")):
85
  output = f'<function name="search_projects">{{"query":{_json_string(text)}}}</function>'
86
  else:
87
  title, pitch = idea_from_text(text)
 
92
  )
93
  return resolve_tool_call(output, fallback_query=text)
94
 
95
+ def plan_iter(self, message: str, state: dict[str, Any]) -> Iterator[dict[str, Any]]:
96
+ yield {"type": "resolved", "resolution": self.plan(message, state)}
97
+
98
 
99
  class MiniCPMTransformersPlanner:
100
  backend = "minicpm-transformers"
 
104
  model_id: str = DEFAULT_MODEL_ID,
105
  adapter_id: str = "",
106
  adapter_revision: str = "",
107
+ device: str = "auto",
108
  ) -> None:
109
  self.model_id = model_id.strip() or DEFAULT_MODEL_ID
110
  self.adapter_id = adapter_id.strip()
111
  self.adapter_revision = adapter_revision.strip()
112
+ self.device = (device or "auto").strip().lower() or "auto"
113
+ self.resolved_device = ""
114
  self._tokenizer = None
115
  self._model = None
116
  self._inference_mode = None
117
 
118
  def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
119
+ resolution: ToolResolution | None = None
120
+ for event in self.plan_iter(message, state):
121
+ if event.get("type") == "resolved":
122
+ resolution = event["resolution"]
123
+ assert resolution is not None
124
+ return resolution
125
+
126
+ def plan_iter(self, message: str, state: dict[str, Any]) -> Iterator[dict[str, Any]]:
127
  self._ensure_loaded()
128
  prompt = render_context(message, state)
129
+ pieces: list[str] = []
130
+ for tokens, piece in self._stream_tool_call(prompt):
131
+ pieces.append(piece)
132
+ yield {"type": "model_progress", "tokens": tokens, "max_tokens": MAX_TOOL_CALL_TOKENS}
133
+ output = _normalize_xml_tool_output("".join(pieces).strip())
134
+ yield {"type": "resolved", "resolution": resolve_tool_call(output, fallback_query=message)}
135
 
136
  def _ensure_loaded(self) -> None:
137
  if self._model is not None and self._tokenizer is not None:
 
153
  adapter_config = PeftConfig.from_pretrained(self.adapter_id, **adapter_kwargs)
154
  base_model_id = str(adapter_config.base_model_name_or_path or base_model_id)
155
 
156
+ target = _resolve_torch_device(self.device, torch)
157
+ self.resolved_device = target
158
+
159
  self._tokenizer = AutoTokenizer.from_pretrained(
160
  tokenizer_id,
161
  trust_remote_code=True,
162
  **(adapter_kwargs if self.adapter_id else {}),
163
  )
164
+ model = self._load_model_on_device(
165
+ AutoModelForCausalLM, base_model_id, target, torch
 
 
 
166
  )
167
  if self.adapter_id:
168
  model = PeftModel.from_pretrained(model, self.adapter_id, **adapter_kwargs)
169
+ if target not in ("auto", "cpu"):
170
+ model = model.to(target)
171
  model.eval()
172
  _disable_sampling_generation_defaults(model)
173
  self._model = model
174
  if hasattr(torch, "inference_mode"):
175
  self._inference_mode = torch.inference_mode
176
+ _logger.info(
177
+ "MiniCPM loaded | requested_device=%s resolved_device=%s adapter=%s",
178
+ self.device,
179
+ self.resolved_device,
180
+ self.adapter_id or "(none)",
181
+ )
182
 
183
+ def _load_model_on_device(self, model_cls: Any, base_model_id: str, target: str, torch: Any) -> Any:
184
+ if target == "auto":
185
+ return model_cls.from_pretrained(
186
+ base_model_id, dtype="auto", device_map="auto", trust_remote_code=True
187
+ )
188
+ if target == "cpu":
189
+ return model_cls.from_pretrained(
190
+ base_model_id, dtype=torch.float32, device_map={"": "cpu"}, trust_remote_code=True
191
+ )
192
+ # mps / cuda: load on CPU first (no accelerate dispatch), then move to the device.
193
+ if target == "mps":
194
+ os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
195
+ try:
196
+ model = model_cls.from_pretrained(
197
+ base_model_id, dtype=torch.float16, trust_remote_code=True
198
+ )
199
+ return model.to(target)
200
+ except Exception as error: # noqa: BLE001 - keep the turn runnable on CPU
201
+ if target == "mps":
202
+ _logger.warning("MPS load failed (%r); falling back to CPU float32.", error)
203
+ self.resolved_device = "cpu"
204
+ return model_cls.from_pretrained(
205
+ base_model_id, dtype=torch.float32, device_map={"": "cpu"}, trust_remote_code=True
206
+ )
207
+ raise
208
+
209
+ def _prepare_inputs(self, prompt: str) -> Any:
210
  assert self._tokenizer is not None
211
  assert self._model is not None
212
  messages = [
 
222
  return_tensors="pt",
223
  ).to(next(self._model.parameters()).device)
224
  _strip_unused_generation_inputs(inputs)
225
+ return inputs
 
 
 
 
 
 
 
 
 
226
 
227
+ def _stream_tool_call(self, prompt: str) -> Iterator[tuple[int, str]]:
228
+ from transformers import TextIteratorStreamer
229
 
230
+ assert self._tokenizer is not None
231
+ assert self._model is not None
232
+ inputs = self._prepare_inputs(prompt)
233
+ streamer = TextIteratorStreamer(
234
+ self._tokenizer, skip_prompt=True, skip_special_tokens=True
235
+ )
236
+ generation_kwargs = {
237
+ **inputs,
238
+ "max_new_tokens": MAX_TOOL_CALL_TOKENS,
239
+ "do_sample": False,
240
+ "streamer": streamer,
241
+ }
242
+ errors: list[BaseException] = []
243
+
244
+ def _run() -> None:
245
+ context = self._inference_mode() if self._inference_mode is not None else nullcontext()
246
+ try:
247
+ with context:
248
+ self._model.generate(**generation_kwargs)
249
+ except BaseException as error: # surfaced after the streamer drains
250
+ errors.append(error)
251
+ # generate() never reached its end sentinel, so wake the consumer instead of
252
+ # letting it block forever, then re-raise from the main thread below.
253
+ streamer.end()
254
+
255
+ worker = threading.Thread(target=_run, daemon=True)
256
+ worker.start()
257
+ tokens = 0
258
+ for piece in streamer:
259
+ if not piece:
260
+ continue
261
+ tokens += 1
262
+ yield tokens, piece
263
+ worker.join()
264
+ if errors:
265
+ raise errors[0]
266
+
267
+
268
+ def _device_available(device: str, torch: Any) -> bool:
269
+ try:
270
+ if device == "cuda":
271
+ return bool(torch.cuda.is_available())
272
+ if device == "mps":
273
+ backend = getattr(torch.backends, "mps", None)
274
+ return bool(backend is not None and backend.is_available())
275
+ except Exception: # pragma: no cover - device dependent
276
+ return False
277
+ return False
278
+
279
+
280
+ def _best_local_device(torch: Any) -> str:
281
+ # Avoid touching CUDA inside a ZeroGPU main process — there is no local GPU there, and
282
+ # probing it can disturb the ZeroGPU allocator.
283
+ if not zero_gpu_enabled() and _device_available("cuda", torch):
284
+ return "cuda"
285
+ if _device_available("mps", torch):
286
+ return "mps"
287
+ return "cpu"
288
+
289
+
290
+ def _resolve_torch_device(preference: str, torch: Any) -> str:
291
+ """Map a configured device preference to a concrete torch device.
292
+
293
+ "auto" stays "auto" (accelerate device_map handles ZeroGPU/CUDA/CPU placement). "local"
294
+ picks the best on-machine accelerator: CUDA -> MPS (Apple Silicon) -> CPU. An explicit
295
+ cuda/mps that is unavailable degrades to the best available local device."""
296
+ pref = (preference or "auto").strip().lower()
297
+ if pref == "auto":
298
+ return "auto"
299
+ if pref == "cpu":
300
+ return "cpu"
301
+ if pref in ("cuda", "mps"):
302
+ return pref if _device_available(pref, torch) else _best_local_device(torch)
303
+ return _best_local_device(torch)
304
+
305
+
306
+ def create_tool_planner(device: str = "auto") -> ToolPlanner:
307
  backend = os.environ.get("ADVISOR_MODEL_BACKEND", DEFAULT_BACKEND).strip().lower()
308
  if backend in ("", "rules"):
309
  return RuleBasedPlanner()
 
312
  os.environ.get("ADVISOR_MODEL_ID", DEFAULT_MODEL_ID),
313
  os.environ.get("ADVISOR_ADAPTER_ID", ""),
314
  os.environ.get("ADVISOR_ADAPTER_REVISION", ""),
315
+ device=device,
316
  )
317
  raise RuntimeError(f"Unsupported ADVISOR_MODEL_BACKEND={backend!r}")
318
 
319
 
320
  def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
321
+ device = getattr(planner, "resolved_device", "") or getattr(planner, "device", "")
322
  return RuntimeStatus(
323
  backend=planner.backend,
324
  model_id=planner.model_id,
 
326
  adapter_revision=planner.adapter_revision,
327
  loaded=not isinstance(planner, MiniCPMTransformersPlanner) or planner._model is not None,
328
  tool_count=len(tool_schemas()),
329
+ device=str(device),
330
  )
331
 
332
 
 
412
  return lower_text in exact_phrases or any(lower_text.startswith(prefix) for prefix in command_prefixes)
413
 
414
 
415
+ def _has_command_term(lower_text: str, terms: tuple[str, ...]) -> bool:
416
+ return any(
417
+ re.search(rf"(?<![a-z0-9]){re.escape(term)}(?![a-z0-9])", lower_text)
418
+ for term in terms
419
+ )
420
+
421
+
422
  def _project_reference_id(text: str) -> str:
423
  prefixes = (
424
  "read project ",
hackathon_advisor/profiling.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Lightweight logging and per-turn profiling for the advisor runtime.
2
+
3
+ The numbers here are debug/operations signal only — they are written to logs, never to the
4
+ UI. Stage timings are measured by *observing the turn event stream from the main process*, so
5
+ they stay correct even when the model itself runs inside a ZeroGPU fork (where a module-global
6
+ counter would reset on every call).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ import logging
13
+ import os
14
+ import platform
15
+ import sys
16
+ import threading
17
+ import time
18
+ from typing import Any
19
+
20
+ logger = logging.getLogger("hackathon_advisor")
21
+
22
+ _counter_lock = threading.Lock()
23
+ _messages_processed = 0
24
+
25
+
26
+ def configure_logging() -> None:
27
+ """Attach a stream handler once, honoring ADVISOR_LOG_LEVEL (default INFO)."""
28
+ level_name = os.environ.get("ADVISOR_LOG_LEVEL", "INFO").strip().upper()
29
+ logger.setLevel(getattr(logging, level_name, logging.INFO))
30
+ if not logger.handlers:
31
+ handler = logging.StreamHandler()
32
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s"))
33
+ logger.addHandler(handler)
34
+ logger.propagate = False
35
+
36
+
37
+ def next_message_index() -> int:
38
+ """Increment and return the lifetime count of processed advisor messages (main process)."""
39
+ global _messages_processed
40
+ with _counter_lock:
41
+ _messages_processed += 1
42
+ return _messages_processed
43
+
44
+
45
+ def messages_processed() -> int:
46
+ return _messages_processed
47
+
48
+
49
+ def _ms(seconds: float) -> float:
50
+ return round(seconds * 1000.0, 1)
51
+
52
+
53
+ def resource_snapshot() -> dict[str, Any]:
54
+ """Best-effort process resource usage via the stdlib plus torch device memory if torch is
55
+ already imported. Returns whatever could be sampled; never raises."""
56
+ snapshot: dict[str, Any] = {}
57
+ try:
58
+ import resource
59
+
60
+ usage = resource.getrusage(resource.RUSAGE_SELF)
61
+ # ru_maxrss is bytes on macOS, kilobytes on Linux.
62
+ divisor = 1024 * 1024 if platform.system() == "Darwin" else 1024
63
+ snapshot["rss_mb"] = round(usage.ru_maxrss / divisor, 1)
64
+ snapshot["cpu_user_s"] = round(usage.ru_utime, 3)
65
+ snapshot["cpu_sys_s"] = round(usage.ru_stime, 3)
66
+ except Exception: # pragma: no cover - platform dependent
67
+ pass
68
+ snapshot.update(_torch_memory_snapshot())
69
+ return snapshot
70
+
71
+
72
+ def _torch_memory_snapshot() -> dict[str, Any]:
73
+ out: dict[str, Any] = {}
74
+ torch = sys.modules.get("torch") # do not import torch just to profile
75
+ if torch is None:
76
+ return out
77
+ try:
78
+ if torch.cuda.is_available():
79
+ out["cuda_alloc_mb"] = round(torch.cuda.memory_allocated() / 1e6, 1)
80
+ out["cuda_peak_mb"] = round(torch.cuda.max_memory_allocated() / 1e6, 1)
81
+ except Exception: # pragma: no cover - device dependent
82
+ pass
83
+ try:
84
+ mps = getattr(torch, "mps", None)
85
+ current = getattr(mps, "current_allocated_memory", None)
86
+ if current is not None:
87
+ out["mps_alloc_mb"] = round(current() / 1e6, 1)
88
+ except Exception: # pragma: no cover - device dependent
89
+ pass
90
+ return out
91
+
92
+
93
+ @dataclass
94
+ class TurnProfiler:
95
+ """Times a single advisor turn by observing its event stream. Drive it by calling
96
+ ``observe(event)`` for every emitted event dict, then ``log_summary()`` when the turn
97
+ ends (in a finally block, so partial turns still get logged)."""
98
+
99
+ message_index: int
100
+ compute: str
101
+ backend: str
102
+ device: str = ""
103
+ message_chars: int = 0
104
+ started: float = field(default_factory=time.perf_counter)
105
+ stage_at: dict[str, float] = field(default_factory=dict)
106
+ ended: float | None = None
107
+ tokens: int = 0
108
+ tool_count: int = 0
109
+ fell_back: bool = False
110
+ logged: bool = False
111
+
112
+ def log_start(self) -> None:
113
+ logger.info(
114
+ "turn #%d start | compute=%s backend=%s message_chars=%d",
115
+ self.message_index,
116
+ self.compute,
117
+ self.backend,
118
+ self.message_chars,
119
+ )
120
+
121
+ def observe(self, event: dict[str, Any]) -> None:
122
+ now = time.perf_counter()
123
+ event_type = event.get("type")
124
+ if event_type == "stage":
125
+ self.stage_at.setdefault(str(event.get("stage")), now)
126
+ elif event_type == "model_progress":
127
+ self.tokens = max(self.tokens, int(event.get("tokens") or 0))
128
+ elif event_type == "tool_event":
129
+ self.tool_count += 1
130
+ elif event_type == "fallback":
131
+ self.fell_back = True
132
+ elif event_type == "done":
133
+ self.ended = now
134
+
135
+ def durations(self) -> dict[str, float]:
136
+ end = self.ended if self.ended is not None else time.perf_counter()
137
+ out: dict[str, float] = {"total_ms": _ms(end - self.started)}
138
+ planning = self.stage_at.get("planning")
139
+ running = self.stage_at.get("running_tool")
140
+ writing = self.stage_at.get("writing")
141
+ if planning is not None and running is not None:
142
+ out["decode_ms"] = _ms(running - planning)
143
+ if running is not None and writing is not None:
144
+ out["tools_ms"] = _ms(writing - running)
145
+ if writing is not None:
146
+ out["write_ms"] = _ms(end - writing)
147
+ return out
148
+
149
+ def log_summary(self, error: BaseException | None = None) -> None:
150
+ if self.logged:
151
+ return
152
+ self.logged = True
153
+ durations = self.durations()
154
+ timing = " ".join(f"{key}={value}" for key, value in durations.items())
155
+ resources = " ".join(f"{key}={value}" for key, value in resource_snapshot().items())
156
+ status = "error" if error is not None else "done"
157
+ message = (
158
+ f"turn #{self.message_index} {status} | {timing} | "
159
+ f"tokens={self.tokens} tools={self.tool_count} compute={self.compute} "
160
+ f"device={self.device or '?'} backend={self.backend} fallback={self.fell_back} | {resources}"
161
+ )
162
+ if error is not None:
163
+ logger.warning("%s | exception=%r", message, error)
164
+ else:
165
+ logger.info(message)
hackathon_advisor/wood_map.py CHANGED
@@ -11,9 +11,13 @@ from hackathon_advisor.tools import Idea
11
  def build_wood_map(index: ProjectIndex, idea: Idea, score: ScoreCard) -> dict[str, Any]:
12
  echoes = list(score.echoes)
13
  background = _background_projects(index, echoes)
14
- dots = [_project_dot(project, "inked") for project in background]
15
- dots.extend(_echo_dot(hit) for hit in echoes[:5])
16
- dots.append(_idea_dot(idea, score, echoes))
 
 
 
 
17
  return {
18
  "caption": _caption(score, echoes),
19
  "dots": _dedupe_dots(dots),
@@ -26,8 +30,8 @@ def _background_projects(index: ProjectIndex, echoes: list[SearchHit]) -> list[P
26
  return projects[:16]
27
 
28
 
29
- def _project_dot(project: Project, kind: str) -> dict[str, Any]:
30
- x, y = _point(project.id)
31
  return {
32
  "id": project.id,
33
  "kind": kind,
@@ -39,8 +43,8 @@ def _project_dot(project: Project, kind: str) -> dict[str, Any]:
39
  }
40
 
41
 
42
- def _echo_dot(hit: SearchHit) -> dict[str, Any]:
43
- dot = _project_dot(hit.project, "echo")
44
  dot["score"] = round(hit.score, 3)
45
  dot["matched_terms"] = list(hit.matched_terms)
46
  dot["page_number"] = hit.page_number
@@ -48,13 +52,8 @@ def _echo_dot(hit: SearchHit) -> dict[str, Any]:
48
  return dot
49
 
50
 
51
- def _idea_dot(idea: Idea, score: ScoreCard, echoes: list[SearchHit]) -> dict[str, Any]:
52
- if echoes and not score.verdict.startswith("UNWRITTEN"):
53
- lead_x, lead_y = _point(echoes[0].project.id)
54
- x = _clamp(lead_x + 7, 8, 92)
55
- y = _clamp(lead_y - 5, 8, 92)
56
- else:
57
- x, y = _point(f"idea:{idea.id}:{idea.title}")
58
  return {
59
  "id": idea.id,
60
  "kind": "idea",
@@ -67,6 +66,67 @@ def _idea_dot(idea: Idea, score: ScoreCard, echoes: list[SearchHit]) -> dict[str
67
  }
68
 
69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  def _caption(score: ScoreCard, echoes: list[SearchHit]) -> str:
71
  if score.verdict.startswith("UNWRITTEN"):
72
  return "Your page sits in a pale margin beyond the nearest inked clusters."
@@ -81,10 +141,6 @@ def _point(key: str) -> tuple[int, int]:
81
  return x, y
82
 
83
 
84
- def _clamp(value: int, low: int, high: int) -> int:
85
- return max(low, min(high, value))
86
-
87
-
88
  def _dedupe_dots(dots: list[dict[str, Any]]) -> list[dict[str, Any]]:
89
  seen: set[tuple[str, str]] = set()
90
  deduped: list[dict[str, Any]] = []
 
11
  def build_wood_map(index: ProjectIndex, idea: Idea, score: ScoreCard) -> dict[str, Any]:
12
  echoes = list(score.echoes)
13
  background = _background_projects(index, echoes)
14
+ echo_projects = [hit.project for hit in echoes[:5]]
15
+
16
+ layout, idea_xy = _layout(index, idea, background + echo_projects)
17
+
18
+ dots = [_project_dot(project, "inked", layout) for project in background]
19
+ dots.extend(_echo_dot(hit, layout) for hit in echoes[:5])
20
+ dots.append(_idea_dot(idea, score, idea_xy))
21
  return {
22
  "caption": _caption(score, echoes),
23
  "dots": _dedupe_dots(dots),
 
30
  return projects[:16]
31
 
32
 
33
+ def _project_dot(project: Project, kind: str, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
34
+ x, y = layout.get(project.id) or _point(project.id)
35
  return {
36
  "id": project.id,
37
  "kind": kind,
 
43
  }
44
 
45
 
46
+ def _echo_dot(hit: SearchHit, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
47
+ dot = _project_dot(hit.project, "echo", layout)
48
  dot["score"] = round(hit.score, 3)
49
  dot["matched_terms"] = list(hit.matched_terms)
50
  dot["page_number"] = hit.page_number
 
52
  return dot
53
 
54
 
55
+ def _idea_dot(idea: Idea, score: ScoreCard, idea_xy: tuple[int, int]) -> dict[str, Any]:
56
+ x, y = idea_xy
 
 
 
 
 
57
  return {
58
  "id": idea.id,
59
  "kind": "idea",
 
66
  }
67
 
68
 
69
+ def _layout(
70
+ index: ProjectIndex,
71
+ idea: Idea,
72
+ projects: list[Project],
73
+ ) -> tuple[dict[str, tuple[int, int]], tuple[int, int]]:
74
+ """Place every dot by projecting the real embedding vectors into 2D with PCA, so projects
75
+ that are semantically similar land near each other and the idea lands among its closest
76
+ echoes. Falls back to a deterministic hash scatter only when the projection cannot run
77
+ (missing vectors, too few points, or no embedder)."""
78
+ ids = [project.id for project in projects]
79
+ vectors = [index.vector_for(project.id) for project in projects]
80
+ fallback = ({project_id: _point(project_id) for project_id in ids}, _point(f"idea:{idea.id}:{idea.title}"))
81
+ if len(vectors) < 3 or any(vector is None for vector in vectors):
82
+ return fallback
83
+ try:
84
+ idea_vector = index.embed_query(idea.pitch or idea.title)
85
+ coords, idea_xy = _pca_project(vectors, idea_vector)
86
+ except Exception: # noqa: BLE001 - any projection failure degrades to the hash scatter
87
+ return fallback
88
+ return {project_id: coord for project_id, coord in zip(ids, coords)}, idea_xy
89
+
90
+
91
+ def _pca_project(
92
+ vectors: list[tuple[float, ...]],
93
+ idea_vector: tuple[float, ...],
94
+ ) -> tuple[list[tuple[int, int]], tuple[int, int]]:
95
+ import numpy as np
96
+
97
+ matrix = np.asarray(vectors, dtype=np.float64)
98
+ query = np.asarray(idea_vector, dtype=np.float64)
99
+ mean = matrix.mean(axis=0)
100
+ centered = matrix - mean
101
+ # Top-2 principal directions of the project cloud define the map; the idea is projected
102
+ # onto that same basis so its position reflects true embedding similarity.
103
+ _, _, components = np.linalg.svd(centered, full_matrices=False)
104
+ basis = components[:2]
105
+ projected = centered @ basis.T
106
+ idea_projected = (query - mean) @ basis.T
107
+ stacked = np.vstack([projected, idea_projected])
108
+ scaled = _scale_to_canvas(stacked)
109
+ coords = [(int(round(x)), int(round(y))) for x, y in scaled[:-1]]
110
+ idea_xy = (int(round(scaled[-1][0])), int(round(scaled[-1][1])))
111
+ return coords, idea_xy
112
+
113
+
114
+ def _scale_to_canvas(points: Any, low: float = 10.0, high: float = 90.0) -> Any:
115
+ import numpy as np
116
+
117
+ scaled = np.empty_like(points)
118
+ for axis in range(points.shape[1]):
119
+ column = points[:, axis]
120
+ lo = float(column.min())
121
+ hi = float(column.max())
122
+ span = hi - lo
123
+ if span < 1e-9:
124
+ scaled[:, axis] = (low + high) / 2.0
125
+ else:
126
+ scaled[:, axis] = low + (column - lo) / span * (high - low)
127
+ return scaled
128
+
129
+
130
  def _caption(score: ScoreCard, echoes: list[SearchHit]) -> str:
131
  if score.verdict.startswith("UNWRITTEN"):
132
  return "Your page sits in a pale margin beyond the nearest inked clusters."
 
141
  return x, y
142
 
143
 
 
 
 
 
144
  def _dedupe_dots(dots: list[dict[str, Any]]) -> list[dict[str, Any]]:
145
  seen: set[tuple[str, str]] = set()
146
  deduped: list[dict[str, Any]] = []
hackathon_advisor/zerogpu.py CHANGED
@@ -41,3 +41,17 @@ def gpu_task(function: Callable[P, R]) -> Callable[P, R]:
41
  "Install runtime requirements before enabling ZeroGPU."
42
  ) from error
43
  return spaces.GPU(duration=zero_gpu_duration_seconds())(function)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  "Install runtime requirements before enabling ZeroGPU."
42
  ) from error
43
  return spaces.GPU(duration=zero_gpu_duration_seconds())(function)
44
+
45
+
46
+ QUOTA_ERROR_HINTS = ("quota", "gpu task aborted", "no gpu", "exceeded", "gpu is not available")
47
+
48
+
49
+ def is_gpu_quota_error(error: BaseException) -> bool:
50
+ """Heuristically detect a ZeroGPU allocation/quota failure so the caller can fall back to
51
+ a CPU run. ZeroGPU raises before the wrapped function body executes, so this is checked
52
+ against the exception that surfaces from the first pull of the GPU generator."""
53
+ name = type(error).__name__.lower()
54
+ if "quota" in name or "gpu" in name:
55
+ return True
56
+ message = str(error).lower()
57
+ return any(hint in message for hint in QUOTA_ERROR_HINTS)
scripts/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Marks the repository's scripts/ as a regular package so it resolves ahead of any top-level
2
+ # "scripts" package that a dependency (e.g. nemo-toolkit) installs into site-packages.
static/app.js CHANGED
@@ -29,8 +29,16 @@ const resetButton = document.querySelector("#reset-session");
29
  const recordVoiceButton = document.querySelector("#record-voice");
30
  const uploadVoiceButton = document.querySelector("#upload-voice");
31
  const voiceFileInput = document.querySelector("#voice-file");
 
 
 
 
 
 
 
32
 
33
  const SESSION_STORAGE_KEY = "hackathon-advisor-session-v2";
 
34
  const FIELD_NOTES_FILENAME = "hackathon-advisor-field-notes.md";
35
  const CHAPTER_FILENAME = "hackathon-advisor-chapter.md";
36
  const PNG_EXPORT_LABEL = "PNG";
@@ -51,6 +59,8 @@ let voiceRecorder = null;
51
  let voiceStream = null;
52
  let voiceChunks = [];
53
  let voiceRecordingState = "idle";
 
 
54
 
55
  setVoiceRecordingState("idle");
56
  bootstrap().catch(handleBootstrapError);
@@ -168,6 +178,7 @@ async function runTurn(message) {
168
  corrections.textContent = "";
169
  planEl.innerHTML = "";
170
  delete session.ui_status;
 
171
  startTurnWatchdog();
172
 
173
  let completed = false;
@@ -194,6 +205,7 @@ async function runTurn(message) {
194
  ink.classList.add("bleed");
195
  } finally {
196
  clearTurnWatchdog();
 
197
  submit.disabled = false;
198
  setSessionControlsDisabled(false);
199
  setCommandDisabled(false);
@@ -810,6 +822,26 @@ function handleEvent(event) {
810
  return;
811
  }
812
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
813
  if (event.type === "token") {
814
  markFirstTokenSeen();
815
  ink.textContent += event.text;
@@ -817,6 +849,9 @@ function handleEvent(event) {
817
  }
818
 
819
  if (event.type === "done") {
 
 
 
820
  if (!sawTurnToken) {
821
  clearTurnWatchdog();
822
  ink.textContent = event.response || ink.textContent;
@@ -1023,8 +1058,9 @@ function renderWoodMap(map) {
1023
  field.className = "wood";
1024
  for (const dot of map.dots) {
1025
  const marker = document.createElement(dot.url ? "a" : "span");
1026
- const verdictClass = dot.kind === "idea" && String(dot.verdict || "").startsWith("ECHO") ? "bleed" : "";
1027
- marker.className = `wood-dot ${dot.kind || "inked"} ${verdictClass}`.trim();
 
1028
  marker.style.left = `${boundedPercent(dot.x)}%`;
1029
  marker.style.top = `${boundedPercent(dot.y)}%`;
1030
  const radius = Math.max(3, Math.min(10, Number(dot.radius || 4)));
@@ -1168,6 +1204,99 @@ function clearTurnWatchdog() {
1168
  }
1169
  }
1170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1171
  function syncCurrentIdeaGoals() {
1172
  const currentId = session.current_idea_id;
1173
  if (!currentId || !Array.isArray(session.ideas)) return;
 
29
  const recordVoiceButton = document.querySelector("#record-voice");
30
  const uploadVoiceButton = document.querySelector("#upload-voice");
31
  const voiceFileInput = document.querySelector("#voice-file");
32
+ const turnProgressEl = document.querySelector("#turn-progress");
33
+ const turnStageIconEl = document.querySelector("#turn-stage-icon");
34
+ const turnStageTextEl = document.querySelector("#turn-stage-text");
35
+ const turnTokensEl = document.querySelector("#turn-tokens");
36
+ const turnEtaEl = document.querySelector("#turn-eta");
37
+ const turnBarFillEl = document.querySelector("#turn-bar-fill");
38
+ const toolChipsEl = document.querySelector("#tool-chips");
39
 
40
  const SESSION_STORAGE_KEY = "hackathon-advisor-session-v2";
41
+ const STAGE_ICONS = { planning: "🪶", running_tool: "🔧", writing: "✍️" };
42
  const FIELD_NOTES_FILENAME = "hackathon-advisor-field-notes.md";
43
  const CHAPTER_FILENAME = "hackathon-advisor-chapter.md";
44
  const PNG_EXPORT_LABEL = "PNG";
 
59
  let voiceStream = null;
60
  let voiceChunks = [];
61
  let voiceRecordingState = "idle";
62
+ let decodeStartedAt = 0;
63
+ let turnProgressTimer = null;
64
 
65
  setVoiceRecordingState("idle");
66
  bootstrap().catch(handleBootstrapError);
 
178
  corrections.textContent = "";
179
  planEl.innerHTML = "";
180
  delete session.ui_status;
181
+ resetTurnProgress();
182
  startTurnWatchdog();
183
 
184
  let completed = false;
 
205
  ink.classList.add("bleed");
206
  } finally {
207
  clearTurnWatchdog();
208
+ hideTurnProgress();
209
  submit.disabled = false;
210
  setSessionControlsDisabled(false);
211
  setCommandDisabled(false);
 
822
  return;
823
  }
824
 
825
+ if (event.type === "stage") {
826
+ setTurnStage(event.stage, event.label);
827
+ return;
828
+ }
829
+
830
+ if (event.type === "model_progress") {
831
+ renderModelProgress(event.tokens, event.max_tokens);
832
+ return;
833
+ }
834
+
835
+ if (event.type === "tool_event") {
836
+ addToolChip(event);
837
+ return;
838
+ }
839
+
840
+ if (event.type === "fallback") {
841
+ renderComputeFallback(event);
842
+ return;
843
+ }
844
+
845
  if (event.type === "token") {
846
  markFirstTokenSeen();
847
  ink.textContent += event.text;
 
849
  }
850
 
851
  if (event.type === "done") {
852
+ setTurnBar(100);
853
+ if (turnEtaEl) turnEtaEl.textContent = "";
854
+ hideTurnProgress();
855
  if (!sawTurnToken) {
856
  clearTurnWatchdog();
857
  ink.textContent = event.response || ink.textContent;
 
1058
  field.className = "wood";
1059
  for (const dot of map.dots) {
1060
  const marker = document.createElement(dot.url ? "a" : "span");
1061
+ // Namespace the kind class (wood-idea/wood-echo/wood-inked) so it never collides with the
1062
+ // global .idea/.echo card styles. The "you" dot stays green regardless of verdict.
1063
+ marker.className = `wood-dot wood-${dot.kind || "inked"}`;
1064
  marker.style.left = `${boundedPercent(dot.x)}%`;
1065
  marker.style.top = `${boundedPercent(dot.y)}%`;
1066
  const radius = Math.max(3, Math.min(10, Number(dot.radius || 4)));
 
1204
  }
1205
  }
1206
 
1207
+ // Coarse overall completion per stage, so the bar always advances even when token-level
1208
+ // progress is unknown (e.g. the rules backend, or the fast tool/writing stages).
1209
+ const STAGE_PROGRESS = { planning: 8, running_tool: 85, writing: 95 };
1210
+
1211
+ function resetTurnProgress() {
1212
+ if (!turnProgressEl) return;
1213
+ // Stay hidden on submit. Only reveal once the turn is genuinely executing — either real
1214
+ // token decoding starts, or it has been running long enough to be worth a progress bar.
1215
+ // A fast turn finishes before the timer fires, so the bar never flashes.
1216
+ turnProgressEl.hidden = true;
1217
+ decodeStartedAt = 0;
1218
+ if (toolChipsEl) toolChipsEl.innerHTML = "";
1219
+ if (turnTokensEl) turnTokensEl.textContent = "";
1220
+ if (turnEtaEl) turnEtaEl.textContent = "";
1221
+ setTurnBar(4);
1222
+ setTurnStageContent("planning", "Thinking");
1223
+ clearTurnProgressTimer();
1224
+ turnProgressTimer = window.setTimeout(revealTurnProgress, 450);
1225
+ }
1226
+
1227
+ function revealTurnProgress() {
1228
+ if (turnProgressEl) turnProgressEl.hidden = false;
1229
+ }
1230
+
1231
+ function clearTurnProgressTimer() {
1232
+ if (turnProgressTimer) {
1233
+ window.clearTimeout(turnProgressTimer);
1234
+ turnProgressTimer = null;
1235
+ }
1236
+ }
1237
+
1238
+ function hideTurnProgress() {
1239
+ clearTurnProgressTimer();
1240
+ if (turnProgressEl) turnProgressEl.hidden = true;
1241
+ }
1242
+
1243
+ function setTurnBar(percent) {
1244
+ if (!turnBarFillEl) return;
1245
+ const clamped = Math.max(0, Math.min(100, percent));
1246
+ turnBarFillEl.style.width = `${clamped}%`;
1247
+ }
1248
+
1249
+ function setTurnStageContent(stage, label) {
1250
+ if (turnStageIconEl) turnStageIconEl.textContent = STAGE_ICONS[stage] || "🪶";
1251
+ if (turnStageTextEl) turnStageTextEl.textContent = label || "Thinking";
1252
+ if (stage in STAGE_PROGRESS) setTurnBar(STAGE_PROGRESS[stage]);
1253
+ if (stage && stage !== "planning" && turnEtaEl) turnEtaEl.textContent = "";
1254
+ }
1255
+
1256
+ function setTurnStage(stage, label) {
1257
+ clearTurnWatchdog();
1258
+ setTurnStageContent(stage, label);
1259
+ }
1260
+
1261
+ function renderModelProgress(tokens, maxTokens) {
1262
+ clearTurnWatchdog();
1263
+ revealTurnProgress(); // real token decoding is unambiguous execution — show it now
1264
+ const count = Number(tokens) || 0;
1265
+ if (turnTokensEl) turnTokensEl.textContent = count ? `· decoded ${count} tokens` : "";
1266
+ if (!count) return;
1267
+ if (!decodeStartedAt) decodeStartedAt = performance.now();
1268
+
1269
+ const cap = Number(maxTokens) || 0;
1270
+ // Map token decode into the 8%–80% band of the overall bar.
1271
+ if (cap > 0) setTurnBar(8 + Math.min(1, count / cap) * 72);
1272
+
1273
+ // Estimate remaining time from the live decode rate toward the token cap (an upper bound).
1274
+ const elapsed = (performance.now() - decodeStartedAt) / 1000;
1275
+ if (turnEtaEl && cap > 0 && elapsed > 0.3) {
1276
+ const rate = count / elapsed;
1277
+ const remaining = Math.max(0, cap - count) / Math.max(rate, 0.1);
1278
+ turnEtaEl.textContent = remaining >= 1 ? `~${Math.ceil(remaining)}s left` : "almost done";
1279
+ }
1280
+ }
1281
+
1282
+ function addToolChip(event) {
1283
+ if (!toolChipsEl) return;
1284
+ const name = event.name || event.tool || "tool";
1285
+ const chip = document.createElement("span");
1286
+ chip.className = "tool-chip";
1287
+ if (event.summary) chip.title = event.summary;
1288
+ chip.innerHTML = `<span class="tc-name"></span><span class="tc-check">✓</span>`;
1289
+ chip.querySelector(".tc-name").textContent = name;
1290
+ toolChipsEl.append(chip);
1291
+ }
1292
+
1293
+ function renderComputeFallback(event) {
1294
+ // Acceleration is automatic; a fallback is informational only (no control to flip).
1295
+ const reason = event.reason || "Running on CPU (slower).";
1296
+ if (turnStageTextEl) turnStageTextEl.textContent = reason;
1297
+ if (corrections) corrections.textContent = reason;
1298
+ }
1299
+
1300
  function syncCurrentIdeaGoals() {
1301
  const currentId = session.current_idea_id;
1302
  if (!currentId || !Array.isArray(session.ideas)) return;
static/index.html CHANGED
@@ -222,6 +222,19 @@
222
 
223
  <div id="corrections" class="marginalia" aria-live="polite"></div>
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  <article class="fate">
226
  <span id="verdict-stamp" class="verdict-stamp verdict-ready">
227
  <span class="seal-dot"></span>
 
222
 
223
  <div id="corrections" class="marginalia" aria-live="polite"></div>
224
 
225
+ <div id="turn-progress" class="turn-progress" hidden aria-live="polite">
226
+ <div class="turn-stage">
227
+ <span id="turn-stage-icon" class="ts-icon">🪶</span>
228
+ <span id="turn-stage-text" class="ts-text">Thinking</span>
229
+ <span id="turn-tokens" class="ts-tokens"></span>
230
+ <span id="turn-eta" class="ts-eta"></span>
231
+ </div>
232
+ <div class="turn-bar" role="progressbar" aria-label="Turn progress">
233
+ <div id="turn-bar-fill" class="turn-bar-fill"></div>
234
+ </div>
235
+ <div id="tool-chips" class="tool-chips"></div>
236
+ </div>
237
+
238
  <article class="fate">
239
  <span id="verdict-stamp" class="verdict-stamp verdict-ready">
240
  <span class="seal-dot"></span>
static/styles.css CHANGED
@@ -738,30 +738,23 @@ textarea:disabled {
738
  transition: opacity 0.5s;
739
  }
740
 
741
- .wood-dot.inked {
742
  background: rgba(73, 49, 22, 0.34);
743
  }
744
 
745
- .wood-dot.echo {
746
  background: var(--oxblood);
747
  box-shadow: 0 0 0 2px rgba(255, 240, 181, 0.5);
748
  animation: echo-pulse 2.4s ease-in-out infinite;
749
  }
750
 
751
- .wood-dot.idea {
752
  z-index: 2;
 
753
  background: var(--leaf);
754
  box-shadow:
755
  0 0 0 3px #fff0b5,
756
- 0 0 20px rgba(47, 107, 65, 0.5);
757
- }
758
-
759
- .wood-dot.idea.bleed,
760
- .wood-dot.idea.echo-idea {
761
- background: var(--oxblood);
762
- box-shadow:
763
- 0 0 0 3px #fff0b5,
764
- 0 0 20px rgba(154, 43, 34, 0.5);
765
  }
766
 
767
  @keyframes echo-pulse {
@@ -1298,3 +1291,114 @@ textarea:disabled {
1298
  transition-duration: 0.001ms !important;
1299
  }
1300
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738
  transition: opacity 0.5s;
739
  }
740
 
741
+ .wood-dot.wood-inked {
742
  background: rgba(73, 49, 22, 0.34);
743
  }
744
 
745
+ .wood-dot.wood-echo {
746
  background: var(--oxblood);
747
  box-shadow: 0 0 0 2px rgba(255, 240, 181, 0.5);
748
  animation: echo-pulse 2.4s ease-in-out infinite;
749
  }
750
 
751
+ .wood-dot.wood-idea {
752
  z-index: 2;
753
+ border-radius: 50%;
754
  background: var(--leaf);
755
  box-shadow:
756
  0 0 0 3px #fff0b5,
757
+ 0 0 20px rgba(47, 107, 65, 0.6);
 
 
 
 
 
 
 
 
758
  }
759
 
760
  @keyframes echo-pulse {
 
1291
  transition-duration: 0.001ms !important;
1292
  }
1293
  }
1294
+
1295
+ /* Live turn progress (stage + token count + tool chips) */
1296
+ .turn-progress {
1297
+ display: flex;
1298
+ flex-direction: column;
1299
+ gap: 0.4rem;
1300
+ margin: 0.2rem 0 0.4rem;
1301
+ padding: 0.5rem 0.7rem;
1302
+ border: 1px solid var(--rule-soft);
1303
+ border-left: 3px solid var(--gold);
1304
+ border-radius: 8px;
1305
+ background: rgba(216, 162, 38, 0.06);
1306
+ }
1307
+
1308
+ .turn-stage {
1309
+ display: flex;
1310
+ align-items: center;
1311
+ gap: 0.45rem;
1312
+ font-family: var(--label);
1313
+ font-size: 0.78rem;
1314
+ color: var(--ink-soft);
1315
+ }
1316
+
1317
+ .turn-stage .ts-icon {
1318
+ font-size: 0.95rem;
1319
+ line-height: 1;
1320
+ animation: ts-pulse 1.6s ease-in-out infinite;
1321
+ }
1322
+
1323
+ .turn-stage .ts-text {
1324
+ font-weight: 600;
1325
+ color: var(--ink);
1326
+ }
1327
+
1328
+ .turn-stage .ts-tokens {
1329
+ color: var(--ink-faint);
1330
+ font-variant-numeric: tabular-nums;
1331
+ }
1332
+
1333
+ .turn-stage .ts-eta {
1334
+ margin-left: auto;
1335
+ color: var(--ink-faint);
1336
+ font-variant-numeric: tabular-nums;
1337
+ }
1338
+
1339
+ .turn-bar {
1340
+ height: 5px;
1341
+ border-radius: 999px;
1342
+ background: var(--rule-soft);
1343
+ overflow: hidden;
1344
+ }
1345
+
1346
+ .turn-bar-fill {
1347
+ height: 100%;
1348
+ width: 0%;
1349
+ border-radius: 999px;
1350
+ background: linear-gradient(90deg, var(--gold), var(--gold-2));
1351
+ transition: width 0.3s ease;
1352
+ }
1353
+
1354
+ @keyframes ts-pulse {
1355
+ 0%,
1356
+ 100% {
1357
+ opacity: 0.55;
1358
+ transform: translateY(0);
1359
+ }
1360
+ 50% {
1361
+ opacity: 1;
1362
+ transform: translateY(-1px);
1363
+ }
1364
+ }
1365
+
1366
+ .tool-chips {
1367
+ display: flex;
1368
+ flex-wrap: wrap;
1369
+ gap: 0.35rem;
1370
+ }
1371
+
1372
+ .tool-chips:empty {
1373
+ display: none;
1374
+ }
1375
+
1376
+ .tool-chip {
1377
+ display: inline-flex;
1378
+ align-items: center;
1379
+ gap: 0.3rem;
1380
+ font-family: var(--label);
1381
+ font-size: 0.68rem;
1382
+ font-weight: 600;
1383
+ color: var(--leaf);
1384
+ background: rgba(47, 107, 65, 0.1);
1385
+ border: 1px solid rgba(47, 107, 65, 0.28);
1386
+ border-radius: 999px;
1387
+ padding: 0.12rem 0.55rem;
1388
+ animation: chip-in 0.22s ease;
1389
+ }
1390
+
1391
+ .tool-chip .tc-check {
1392
+ font-size: 0.66rem;
1393
+ }
1394
+
1395
+ @keyframes chip-in {
1396
+ from {
1397
+ opacity: 0;
1398
+ transform: translateY(2px);
1399
+ }
1400
+ to {
1401
+ opacity: 1;
1402
+ transform: translateY(0);
1403
+ }
1404
+ }
tests/test_agent.py CHANGED
@@ -291,3 +291,49 @@ def test_planner_score_idea_scores_current_idea() -> None:
291
 
292
  assert scored.score is not None
293
  assert scored.artifact["title"] == first.artifact["title"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  assert scored.score is not None
293
  assert scored.artifact["title"] == first.artifact["title"]
294
+
295
+
296
+ def test_turn_stream_emits_ordered_progress_events() -> None:
297
+ index = load_test_index()
298
+ engine = AdvisorEngine(index)
299
+
300
+ events = list(engine.turn_stream("A local-first archive cartographer for family photos", {}))
301
+ types = [event["type"] for event in events]
302
+
303
+ assert types[0] == "start"
304
+ assert types[-1] == "done"
305
+ assert "token" in types
306
+ # the planning stage is announced before any tool runs, and tools stream as they execute
307
+ assert types.index("stage") < types.index("tool_event")
308
+ tool_events = [event for event in events if event["type"] == "tool_event"]
309
+ assert [event["name"] for event in tool_events] == ["save_idea", "search_projects", "score_idea"]
310
+ assert events[-1]["state"]["ideas"]
311
+
312
+
313
+ def test_turn_stream_done_matches_blocking_turn() -> None:
314
+ # idea ids are randomly generated, so compare the deterministic surface of the turn.
315
+ index = load_test_index()
316
+ streamed = list(AdvisorEngine(index).turn_stream("write bolder and find whitespace", {}))
317
+ done = next(event for event in streamed if event["type"] == "done")
318
+ blocking = AdvisorEngine(index).turn("write bolder and find whitespace", {})
319
+
320
+ assert done["response"] == blocking.response
321
+ assert done["score"] == (blocking.score.to_dict() if blocking.score else None)
322
+ assert done["plan"] == blocking.plan
323
+ assert [item["label"] for item in done["whitespace"]] == [
324
+ item.label for item in blocking.whitespace
325
+ ]
326
+ assert [idea["title"] for idea in done["state"]["ideas"]] == [
327
+ idea["title"] for idea in blocking.state["ideas"]
328
+ ]
329
+
330
+
331
+ def test_turn_accepts_injected_resolution() -> None:
332
+ index = load_test_index()
333
+ engine = AdvisorEngine(index, planner=StaticPlanner(ToolCall("score_idea", {})))
334
+ injected = ToolResolution(status="valid", call=ToolCall("list_projects", {"sort": "likes"}), errors=())
335
+
336
+ result = engine.turn("score it", {}, resolution=injected)
337
+
338
+ # the injected list_projects call wins over the planner's score_idea call
339
+ assert result.state["last_tool_resolution"]["call"]["name"] == "list_projects"
tests/test_app.py CHANGED
@@ -109,6 +109,38 @@ def test_agent_turn_stream_endpoint_exports_ndjson_events() -> None:
109
  assert lines[-1]["state"]["ideas"]
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def test_transcribe_audio_endpoint_saves_audio(monkeypatch) -> None:
113
  captured = {}
114
 
 
109
  assert lines[-1]["state"]["ideas"]
110
 
111
 
112
+ def test_agent_turn_stream_streams_stage_and_tool_events() -> None:
113
+ response = agent_turn_stream(
114
+ {
115
+ "message": "A local-first archive cartographer for family photos",
116
+ "session_json": "{}",
117
+ }
118
+ )
119
+ payload = asyncio.run(_read_streaming_response(response))
120
+ lines = [json.loads(line) for line in payload.splitlines()]
121
+ types = [line["type"] for line in lines]
122
+
123
+ assert "stage" in types
124
+ assert any(line["type"] == "tool_event" and line.get("name") for line in lines)
125
+ assert types.index("stage") < types.index("token")
126
+
127
+
128
+ def test_agent_turn_stream_runs_on_cpu_compute() -> None:
129
+ response = agent_turn_stream(
130
+ {
131
+ "message": "A local-first archive cartographer for family photos",
132
+ "session_json": "{}",
133
+ "compute": "cpu",
134
+ }
135
+ )
136
+ payload = asyncio.run(_read_streaming_response(response))
137
+ lines = [json.loads(line) for line in payload.splitlines()]
138
+
139
+ assert lines[0]["type"] == "start"
140
+ assert lines[-1]["type"] == "done"
141
+ assert lines[-1]["state"]["ideas"]
142
+
143
+
144
  def test_transcribe_audio_endpoint_saves_audio(monkeypatch) -> None:
145
  captured = {}
146
 
tests/test_model_runtime.py CHANGED
@@ -8,13 +8,26 @@ from hackathon_advisor.model_runtime import (
8
  render_context,
9
  runtime_status,
10
  system_prompt,
 
11
  _disable_sampling_generation_defaults,
12
  _normalize_xml_tool_output,
 
13
  _strip_unused_generation_inputs,
14
  )
15
  from hackathon_advisor.zerogpu import gpu_task, zero_gpu_duration_seconds, zero_gpu_enabled
16
 
17
 
 
 
 
 
 
 
 
 
 
 
 
18
  def test_rule_planner_emits_valid_search_call() -> None:
19
  planner = RuleBasedPlanner()
20
 
@@ -81,6 +94,18 @@ def test_rule_planner_keeps_project_words_inside_ideas() -> None:
81
  assert resolution.call.name == "save_idea"
82
 
83
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  def test_rule_planner_splits_explicit_idea_pitch() -> None:
85
  planner = RuleBasedPlanner()
86
 
@@ -215,3 +240,39 @@ def test_model_xml_fragment_is_normalized() -> None:
215
  output = 'name="save_idea">{"title":"A","pitch":"B"}'
216
 
217
  assert _normalize_xml_tool_output(output) == '<function name="save_idea">{"title":"A","pitch":"B"}</function>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  render_context,
9
  runtime_status,
10
  system_prompt,
11
+ _best_local_device,
12
  _disable_sampling_generation_defaults,
13
  _normalize_xml_tool_output,
14
+ _resolve_torch_device,
15
  _strip_unused_generation_inputs,
16
  )
17
  from hackathon_advisor.zerogpu import gpu_task, zero_gpu_duration_seconds, zero_gpu_enabled
18
 
19
 
20
+ class FakeBackends:
21
+ def __init__(self, mps: bool) -> None:
22
+ self.mps = type("MPS", (), {"is_available": staticmethod(lambda: mps)})()
23
+
24
+
25
+ class FakeTorch:
26
+ def __init__(self, cuda: bool = False, mps: bool = False) -> None:
27
+ self.cuda = type("CUDA", (), {"is_available": staticmethod(lambda: cuda)})()
28
+ self.backends = FakeBackends(mps)
29
+
30
+
31
  def test_rule_planner_emits_valid_search_call() -> None:
32
  planner = RuleBasedPlanner()
33
 
 
94
  assert resolution.call.name == "save_idea"
95
 
96
 
97
+ def test_rule_planner_does_not_match_commands_inside_idea_words() -> None:
98
+ planner = RuleBasedPlanner()
99
+
100
+ resolution = planner.plan(
101
+ "A neighborhood seed swap archive that reminds gardeners when to plant shared seeds",
102
+ {},
103
+ )
104
+
105
+ assert resolution.status == "valid"
106
+ assert resolution.call.name == "save_idea"
107
+
108
+
109
  def test_rule_planner_splits_explicit_idea_pitch() -> None:
110
  planner = RuleBasedPlanner()
111
 
 
240
  output = 'name="save_idea">{"title":"A","pitch":"B"}'
241
 
242
  assert _normalize_xml_tool_output(output) == '<function name="save_idea">{"title":"A","pitch":"B"}</function>'
243
+
244
+
245
+ def test_resolve_device_keeps_auto_and_explicit_cpu() -> None:
246
+ assert _resolve_torch_device("auto", FakeTorch()) == "auto"
247
+ assert _resolve_torch_device("cpu", FakeTorch(cuda=True, mps=True)) == "cpu"
248
+
249
+
250
+ def test_resolve_device_prefers_cuda_then_mps_then_cpu(monkeypatch) -> None:
251
+ monkeypatch.delenv("ADVISOR_ZERO_GPU", raising=False)
252
+
253
+ assert _best_local_device(FakeTorch(cuda=True, mps=True)) == "cuda"
254
+ assert _best_local_device(FakeTorch(cuda=False, mps=True)) == "mps"
255
+ assert _best_local_device(FakeTorch(cuda=False, mps=False)) == "cpu"
256
+ # "local" resolves through the same ladder
257
+ assert _resolve_torch_device("local", FakeTorch(cuda=False, mps=True)) == "mps"
258
+
259
+
260
+ def test_resolve_device_unavailable_request_degrades_gracefully(monkeypatch) -> None:
261
+ monkeypatch.delenv("ADVISOR_ZERO_GPU", raising=False)
262
+
263
+ # asking for cuda on an MPS-only box lands on mps, not a crash
264
+ assert _resolve_torch_device("cuda", FakeTorch(cuda=False, mps=True)) == "mps"
265
+
266
+
267
+ def test_resolve_device_skips_cuda_under_zero_gpu(monkeypatch) -> None:
268
+ # In a ZeroGPU main process there is no local CUDA, and probing it is avoided.
269
+ monkeypatch.setenv("ADVISOR_ZERO_GPU", "1")
270
+
271
+ assert _best_local_device(FakeTorch(cuda=True, mps=False)) == "cpu"
272
+
273
+
274
+ def test_runtime_status_reports_configured_device() -> None:
275
+ planner = MiniCPMTransformersPlanner("openbmb/MiniCPM5-1B", device="local")
276
+
277
+ assert runtime_status(planner).to_dict()["device"] == "local"
278
+ assert runtime_status(RuleBasedPlanner()).to_dict()["device"] == ""
tests/test_profiling.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ from hackathon_advisor.profiling import (
4
+ TurnProfiler,
5
+ configure_logging,
6
+ messages_processed,
7
+ next_message_index,
8
+ resource_snapshot,
9
+ )
10
+
11
+
12
+ def _turn_events() -> list[dict]:
13
+ return [
14
+ {"type": "start"},
15
+ {"type": "stage", "stage": "planning"},
16
+ {"type": "model_progress", "tokens": 5, "max_tokens": 180},
17
+ {"type": "model_progress", "tokens": 12, "max_tokens": 180},
18
+ {"type": "stage", "stage": "running_tool"},
19
+ {"type": "tool_event", "name": "save_idea"},
20
+ {"type": "tool_event", "name": "score_idea"},
21
+ {"type": "stage", "stage": "writing"},
22
+ {"type": "token", "text": "hello "},
23
+ {"type": "done"},
24
+ ]
25
+
26
+
27
+ def test_profiler_observes_tokens_tools_and_stage_durations() -> None:
28
+ profiler = TurnProfiler(message_index=1, compute="cpu", backend="minicpm-transformers")
29
+ for event in _turn_events():
30
+ profiler.observe(event)
31
+
32
+ durations = profiler.durations()
33
+
34
+ assert profiler.tokens == 12
35
+ assert profiler.tool_count == 2
36
+ assert profiler.fell_back is False
37
+ assert set(durations) >= {"total_ms", "decode_ms", "tools_ms", "write_ms"}
38
+ assert all(value >= 0 for value in durations.values())
39
+
40
+
41
+ def test_profiler_logs_start_and_summary() -> None:
42
+ configure_logging() # the advisor logger does not propagate, so capture it directly
43
+ logger = logging.getLogger("hackathon_advisor")
44
+ messages: list[str] = []
45
+ handler = logging.Handler()
46
+ handler.emit = lambda record: messages.append(record.getMessage())
47
+ logger.addHandler(handler)
48
+ try:
49
+ profiler = TurnProfiler(message_index=7, compute="gpu", backend="rules", message_chars=42)
50
+ profiler.log_start()
51
+ for event in _turn_events():
52
+ profiler.observe(event)
53
+ profiler.log_summary()
54
+ profiler.log_summary() # idempotent: a second call must not log again
55
+ finally:
56
+ logger.removeHandler(handler)
57
+
58
+ summaries = [message for message in messages if "turn #7" in message]
59
+ assert any("start" in message for message in summaries)
60
+ assert sum("done" in message for message in summaries) == 1 # log_summary is idempotent
61
+
62
+
63
+ def test_profiler_marks_fallback() -> None:
64
+ profiler = TurnProfiler(message_index=2, compute="gpu", backend="minicpm-transformers")
65
+ profiler.observe({"type": "fallback", "to": "cpu"})
66
+
67
+ assert profiler.fell_back is True
68
+
69
+
70
+ def test_resource_snapshot_is_best_effort_dict() -> None:
71
+ snapshot = resource_snapshot()
72
+
73
+ assert isinstance(snapshot, dict)
74
+ # rss is available on the platforms we run on; never raises regardless.
75
+ assert "rss_mb" in snapshot
76
+
77
+
78
+ def test_message_counter_increments() -> None:
79
+ start = messages_processed()
80
+ first = next_message_index()
81
+ second = next_message_index()
82
+
83
+ assert second == first + 1
84
+ assert messages_processed() >= start + 2
uv.lock ADDED
The diff for this file is too large to render. See raw diff