Spaces:
Running on Zero
Running on Zero
feat: stream advisor progress
Browse filesCo-authored-by: Codex <noreply@openai.com>
- app.py +97 -31
- hackathon_advisor/agent.py +57 -3
- hackathon_advisor/asr_runtime.py +113 -14
- hackathon_advisor/data.py +9 -0
- hackathon_advisor/model_runtime.py +168 -23
- hackathon_advisor/profiling.py +165 -0
- hackathon_advisor/wood_map.py +74 -18
- hackathon_advisor/zerogpu.py +14 -0
- scripts/__init__.py +2 -0
- static/app.js +131 -2
- static/index.html +13 -0
- static/styles.css +116 -12
- tests/test_agent.py +46 -0
- tests/test_app.py +32 -0
- tests/test_model_runtime.py +61 -0
- tests/test_profiling.py +84 -0
- uv.lock +0 -0
app.py
CHANGED
|
@@ -16,6 +16,12 @@ from hackathon_advisor.asr_runtime import create_asr_transcriber
|
|
| 16 |
from hackathon_advisor.chapter import build_chapter_markdown
|
| 17 |
from hackathon_advisor.data import ProjectIndex
|
| 18 |
from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
from hackathon_advisor.field_notes import build_field_notes_markdown
|
| 20 |
from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
|
| 21 |
from hackathon_advisor.lora_training_kit import TRAINING_KIT_FILENAME, build_lora_training_kit_zip
|
|
@@ -26,9 +32,10 @@ from hackathon_advisor.submission_packet import build_submission_packet_markdown
|
|
| 26 |
from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
|
| 27 |
from hackathon_advisor.tools import GOALS, goal_profiles
|
| 28 |
from hackathon_advisor.trace_export import build_trace_jsonl, trace_metadata
|
| 29 |
-
from hackathon_advisor.zerogpu import gpu_task
|
| 30 |
|
| 31 |
|
|
|
|
| 32 |
install_asyncio_cleanup_hook()
|
| 33 |
|
| 34 |
ROOT = Path(__file__).parent
|
|
@@ -40,18 +47,33 @@ MAX_AUDIO_UPLOAD_BYTES = 25 * 1024 * 1024
|
|
| 40 |
AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".webm"}
|
| 41 |
|
| 42 |
index = ProjectIndex.from_files(DATA_PATH, INDEX_PATH)
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
| 44 |
voice_transcriber = create_asr_transcriber()
|
| 45 |
app = Server()
|
| 46 |
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def _json_event(payload: dict) -> str:
|
| 49 |
return json.dumps(payload, ensure_ascii=False)
|
| 50 |
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
@gpu_task
|
| 53 |
-
def
|
| 54 |
-
|
| 55 |
|
| 56 |
|
| 57 |
@gpu_task
|
|
@@ -72,33 +94,71 @@ def _session_from_payload(payload: dict[str, Any] | None) -> dict[str, Any]:
|
|
| 72 |
return _session_from_json(str(payload.get("session_json") or "{}"))
|
| 73 |
|
| 74 |
|
| 75 |
-
def
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
"type": "start",
|
| 81 |
-
"corrections": [correction.to_dict() for correction in result.corrections],
|
| 82 |
-
"normalized_text": result.normalized_text,
|
| 83 |
-
"tool_events": [event.to_dict() for event in result.tool_events],
|
| 84 |
-
}
|
| 85 |
-
)
|
| 86 |
|
| 87 |
-
for chunk in result.stream_chunks():
|
| 88 |
-
yield _json_event({"type": "token", "text": chunk})
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
}
|
| 101 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
|
| 104 |
@app.get("/", response_class=HTMLResponse)
|
|
@@ -197,14 +257,20 @@ def agent_turn_stream(payload: dict[str, Any] | None = Body(default=None)) -> St
|
|
| 197 |
payload = payload or {}
|
| 198 |
message = str(payload.get("message") or "")
|
| 199 |
session_json = str(payload.get("session_json") or "{}")
|
|
|
|
| 200 |
|
| 201 |
def stream() -> Iterator[str]:
|
| 202 |
-
for event in _agent_turn_events(message, session_json):
|
| 203 |
yield f"{event}\n"
|
| 204 |
|
| 205 |
return StreamingResponse(stream(), media_type="application/x-ndjson")
|
| 206 |
|
| 207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
@app.post("/api/transcribe")
|
| 209 |
async def transcribe_audio(audio: UploadFile = File(...)) -> dict[str, Any]:
|
| 210 |
content_type = str(audio.content_type or "")
|
|
@@ -347,8 +413,8 @@ def submission_packet_artifact(session_json: str = "{}") -> str:
|
|
| 347 |
|
| 348 |
|
| 349 |
@app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
|
| 350 |
-
def agent_turn(message: str, session_json: str = "{}") -> Iterator[str]:
|
| 351 |
-
yield from _agent_turn_events(message, session_json)
|
| 352 |
|
| 353 |
|
| 354 |
if __name__ == "__main__":
|
|
|
|
| 16 |
from hackathon_advisor.chapter import build_chapter_markdown
|
| 17 |
from hackathon_advisor.data import ProjectIndex
|
| 18 |
from hackathon_advisor.demo_rehearsal import build_demo_rehearsal
|
| 19 |
+
from hackathon_advisor.model_runtime import create_tool_planner
|
| 20 |
+
from hackathon_advisor.profiling import (
|
| 21 |
+
TurnProfiler,
|
| 22 |
+
configure_logging,
|
| 23 |
+
next_message_index,
|
| 24 |
+
)
|
| 25 |
from hackathon_advisor.field_notes import build_field_notes_markdown
|
| 26 |
from hackathon_advisor.lora_dataset import build_lora_dataset_jsonl
|
| 27 |
from hackathon_advisor.lora_training_kit import TRAINING_KIT_FILENAME, build_lora_training_kit_zip
|
|
|
|
| 32 |
from hackathon_advisor.tool_contracts import resolve_tool_call, tool_schemas
|
| 33 |
from hackathon_advisor.tools import GOALS, goal_profiles
|
| 34 |
from hackathon_advisor.trace_export import build_trace_jsonl, trace_metadata
|
| 35 |
+
from hackathon_advisor.zerogpu import gpu_task, is_gpu_quota_error, zero_gpu_enabled
|
| 36 |
|
| 37 |
|
| 38 |
+
configure_logging()
|
| 39 |
install_asyncio_cleanup_hook()
|
| 40 |
|
| 41 |
ROOT = Path(__file__).parent
|
|
|
|
| 47 |
AUDIO_UPLOAD_SUFFIXES = {".aac", ".aif", ".aiff", ".flac", ".m4a", ".mp3", ".oga", ".ogg", ".opus", ".wav", ".webm"}
|
| 48 |
|
| 49 |
index = ProjectIndex.from_files(DATA_PATH, INDEX_PATH)
|
| 50 |
+
# Acceleration is automatic: on a ZeroGPU Space the GPU path uses accelerate device_map inside
|
| 51 |
+
# the @spaces.GPU fork; locally the device resolves CUDA -> Apple MPS -> CPU. CPU is only used
|
| 52 |
+
# as an explicit override or a quota fallback.
|
| 53 |
+
engine = AdvisorEngine(index, create_tool_planner(device="auto" if zero_gpu_enabled() else "local"))
|
| 54 |
voice_transcriber = create_asr_transcriber()
|
| 55 |
app = Server()
|
| 56 |
|
| 57 |
+
_cpu_engine: AdvisorEngine | None = None
|
| 58 |
+
|
| 59 |
|
| 60 |
def _json_event(payload: dict) -> str:
|
| 61 |
return json.dumps(payload, ensure_ascii=False)
|
| 62 |
|
| 63 |
|
| 64 |
+
def _cpu_engine_instance() -> AdvisorEngine:
|
| 65 |
+
"""A CPU-pinned advisor engine used for the explicit CPU override and for the automatic
|
| 66 |
+
fallback when a ZeroGPU allocation is denied. Loaded lazily so the CPU model only enters
|
| 67 |
+
memory when CPU is actually used."""
|
| 68 |
+
global _cpu_engine
|
| 69 |
+
if _cpu_engine is None:
|
| 70 |
+
_cpu_engine = AdvisorEngine(index, create_tool_planner(device="cpu"))
|
| 71 |
+
return _cpu_engine
|
| 72 |
+
|
| 73 |
+
|
| 74 |
@gpu_task
|
| 75 |
+
def _engine_turn_stream_gpu(message: str, session: dict[str, Any]) -> Iterator[dict[str, Any]]:
|
| 76 |
+
yield from engine.turn_stream(message, session)
|
| 77 |
|
| 78 |
|
| 79 |
@gpu_task
|
|
|
|
| 94 |
return _session_from_json(str(payload.get("session_json") or "{}"))
|
| 95 |
|
| 96 |
|
| 97 |
+
def _primary_turn_stream(message: str, session: dict[str, Any]) -> Iterator[dict[str, Any]]:
|
| 98 |
+
if zero_gpu_enabled():
|
| 99 |
+
yield from _engine_turn_stream_gpu(message, session)
|
| 100 |
+
else:
|
| 101 |
+
yield from engine.turn_stream(message, session)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
|
|
|
|
|
|
| 103 |
|
| 104 |
+
def _agent_turn_events(
|
| 105 |
+
message: str,
|
| 106 |
+
session_json: str = "{}",
|
| 107 |
+
compute: str = "gpu",
|
| 108 |
+
) -> Iterator[str]:
|
| 109 |
+
profiler = TurnProfiler(
|
| 110 |
+
message_index=next_message_index(),
|
| 111 |
+
compute=compute,
|
| 112 |
+
backend=str(engine.runtime_status().get("backend", "")),
|
| 113 |
+
message_chars=len(message),
|
|
|
|
| 114 |
)
|
| 115 |
+
profiler.log_start()
|
| 116 |
+
try:
|
| 117 |
+
for event in _profiled_turn_events(message, session_json, compute):
|
| 118 |
+
profiler.observe(event)
|
| 119 |
+
yield _json_event(event)
|
| 120 |
+
profiler.device = _active_device(compute)
|
| 121 |
+
profiler.log_summary()
|
| 122 |
+
except Exception as error: # noqa: BLE001 - log timing/resources even when a turn fails
|
| 123 |
+
profiler.device = _active_device(compute)
|
| 124 |
+
profiler.log_summary(error)
|
| 125 |
+
raise
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def _active_device(compute: str) -> str:
|
| 129 |
+
"""The torch device the turn actually resolved to (e.g. mps/cuda/cpu), read after the run
|
| 130 |
+
so the lazy model has reported its resolved device."""
|
| 131 |
+
active = _cpu_engine if compute == "cpu" else engine
|
| 132 |
+
try:
|
| 133 |
+
return str(active.runtime_status().get("device", "")) if active is not None else ""
|
| 134 |
+
except Exception: # noqa: BLE001 - profiling must never break a turn
|
| 135 |
+
return ""
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def _profiled_turn_events(
|
| 139 |
+
message: str,
|
| 140 |
+
session_json: str,
|
| 141 |
+
compute: str,
|
| 142 |
+
) -> Iterator[dict[str, Any]]:
|
| 143 |
+
session = _session_from_json(session_json)
|
| 144 |
+
if compute != "cpu":
|
| 145 |
+
produced = False
|
| 146 |
+
try:
|
| 147 |
+
for event in _primary_turn_stream(message, session):
|
| 148 |
+
produced = True
|
| 149 |
+
yield event
|
| 150 |
+
return
|
| 151 |
+
except Exception as error: # noqa: BLE001 - fall back to local on a clean quota failure
|
| 152 |
+
if produced or not is_gpu_quota_error(error):
|
| 153 |
+
raise
|
| 154 |
+
yield {
|
| 155 |
+
"type": "fallback",
|
| 156 |
+
"to": "cpu",
|
| 157 |
+
"reason": "ZeroGPU quota reached — running this turn locally (slower).",
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
for event in _cpu_engine_instance().turn_stream(message, session):
|
| 161 |
+
yield event
|
| 162 |
|
| 163 |
|
| 164 |
@app.get("/", response_class=HTMLResponse)
|
|
|
|
| 257 |
payload = payload or {}
|
| 258 |
message = str(payload.get("message") or "")
|
| 259 |
session_json = str(payload.get("session_json") or "{}")
|
| 260 |
+
compute = _normalize_compute(payload.get("compute"))
|
| 261 |
|
| 262 |
def stream() -> Iterator[str]:
|
| 263 |
+
for event in _agent_turn_events(message, session_json, compute):
|
| 264 |
yield f"{event}\n"
|
| 265 |
|
| 266 |
return StreamingResponse(stream(), media_type="application/x-ndjson")
|
| 267 |
|
| 268 |
|
| 269 |
+
def _normalize_compute(value: Any) -> str:
|
| 270 |
+
# Acceleration is automatic; "cpu" is the only manual override (not surfaced in the UI).
|
| 271 |
+
return "cpu" if str(value or "").strip().lower() == "cpu" else "gpu"
|
| 272 |
+
|
| 273 |
+
|
| 274 |
@app.post("/api/transcribe")
|
| 275 |
async def transcribe_audio(audio: UploadFile = File(...)) -> dict[str, Any]:
|
| 276 |
content_type = str(audio.content_type or "")
|
|
|
|
| 413 |
|
| 414 |
|
| 415 |
@app.api(name="agent_turn", concurrency_limit=4, stream_every=0.04)
|
| 416 |
+
def agent_turn(message: str, session_json: str = "{}", compute: str = "gpu") -> Iterator[str]:
|
| 417 |
+
yield from _agent_turn_events(message, session_json, _normalize_compute(compute))
|
| 418 |
|
| 419 |
|
| 420 |
if __name__ == "__main__":
|
hackathon_advisor/agent.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
|
|
|
| 3 |
from dataclasses import dataclass
|
| 4 |
from dataclasses import replace
|
| 5 |
from typing import Any
|
|
@@ -8,7 +9,7 @@ from hackathon_advisor.aliases import Correction, normalize_text
|
|
| 8 |
from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
|
| 9 |
from hackathon_advisor.model_runtime import ToolPlanner, create_tool_planner, runtime_status
|
| 10 |
from hackathon_advisor.scoring import ScoreCard
|
| 11 |
-
from hackathon_advisor.tool_contracts import ToolCall
|
| 12 |
from hackathon_advisor.tools import (
|
| 13 |
GOALS,
|
| 14 |
AdvisorTools,
|
|
@@ -58,13 +59,20 @@ class AdvisorEngine:
|
|
| 58 |
def runtime_status(self) -> dict[str, Any]:
|
| 59 |
return runtime_status(self.planner).to_dict()
|
| 60 |
|
| 61 |
-
def turn(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
state = dict(state or {})
|
| 63 |
state.setdefault("ideas", [])
|
| 64 |
state.setdefault("profile", {})
|
| 65 |
state.setdefault("goals", GOALS[:3])
|
| 66 |
normalized, corrections = normalize_text(message)
|
| 67 |
-
resolution
|
|
|
|
| 68 |
state["last_tool_resolution"] = resolution.to_dict()
|
| 69 |
tool_events: list[ToolEvent] = []
|
| 70 |
projects: list[Project] = []
|
|
@@ -134,6 +142,52 @@ class AdvisorEngine:
|
|
| 134 |
|
| 135 |
return self._idea_research_turn(call, normalized, corrections, state, tool_events)
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
def _result(
|
| 138 |
self,
|
| 139 |
normalized_text: str,
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
from collections.abc import Iterator
|
| 4 |
from dataclasses import dataclass
|
| 5 |
from dataclasses import replace
|
| 6 |
from typing import Any
|
|
|
|
| 9 |
from hackathon_advisor.data import Project, ProjectIndex, WhitespaceItem
|
| 10 |
from hackathon_advisor.model_runtime import ToolPlanner, create_tool_planner, runtime_status
|
| 11 |
from hackathon_advisor.scoring import ScoreCard
|
| 12 |
+
from hackathon_advisor.tool_contracts import ToolCall, ToolResolution
|
| 13 |
from hackathon_advisor.tools import (
|
| 14 |
GOALS,
|
| 15 |
AdvisorTools,
|
|
|
|
| 59 |
def runtime_status(self) -> dict[str, Any]:
|
| 60 |
return runtime_status(self.planner).to_dict()
|
| 61 |
|
| 62 |
+
def turn(
|
| 63 |
+
self,
|
| 64 |
+
message: str,
|
| 65 |
+
state: dict[str, Any] | None = None,
|
| 66 |
+
*,
|
| 67 |
+
resolution: ToolResolution | None = None,
|
| 68 |
+
) -> TurnResult:
|
| 69 |
state = dict(state or {})
|
| 70 |
state.setdefault("ideas", [])
|
| 71 |
state.setdefault("profile", {})
|
| 72 |
state.setdefault("goals", GOALS[:3])
|
| 73 |
normalized, corrections = normalize_text(message)
|
| 74 |
+
if resolution is None:
|
| 75 |
+
resolution = self.planner.plan(normalized, state)
|
| 76 |
state["last_tool_resolution"] = resolution.to_dict()
|
| 77 |
tool_events: list[ToolEvent] = []
|
| 78 |
projects: list[Project] = []
|
|
|
|
| 142 |
|
| 143 |
return self._idea_research_turn(call, normalized, corrections, state, tool_events)
|
| 144 |
|
| 145 |
+
def turn_stream(self, message: str, state: dict[str, Any] | None = None) -> Iterator[dict[str, Any]]:
|
| 146 |
+
"""Run a turn while yielding plain-dict progress events, so a caller can stream the
|
| 147 |
+
real work (tool-call decoding, tool execution, response) instead of replaying a
|
| 148 |
+
finished string. Every yielded value is JSON-serializable so it can cross a ZeroGPU
|
| 149 |
+
process boundary."""
|
| 150 |
+
state = dict(state or {})
|
| 151 |
+
normalized, corrections = normalize_text(message)
|
| 152 |
+
yield {
|
| 153 |
+
"type": "start",
|
| 154 |
+
"corrections": [correction.to_dict() for correction in corrections],
|
| 155 |
+
"normalized_text": normalized,
|
| 156 |
+
}
|
| 157 |
+
yield {"type": "stage", "stage": "planning", "label": "Choosing the next move"}
|
| 158 |
+
|
| 159 |
+
resolution: ToolResolution | None = None
|
| 160 |
+
for event in self.planner.plan_iter(normalized, state):
|
| 161 |
+
if event.get("type") == "resolved":
|
| 162 |
+
resolution = event["resolution"]
|
| 163 |
+
else:
|
| 164 |
+
yield event
|
| 165 |
+
tool_name = resolution.call.name if resolution is not None else ""
|
| 166 |
+
yield {
|
| 167 |
+
"type": "stage",
|
| 168 |
+
"stage": "running_tool",
|
| 169 |
+
"tool": tool_name,
|
| 170 |
+
"label": f"Calling {tool_name}" if tool_name else "Running tools",
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
result = self.turn(normalized, state, resolution=resolution)
|
| 174 |
+
for event in result.tool_events:
|
| 175 |
+
yield {"type": "tool_event", **event.to_dict()}
|
| 176 |
+
|
| 177 |
+
yield {"type": "stage", "stage": "writing", "label": "Writing the page"}
|
| 178 |
+
for chunk in result.stream_chunks():
|
| 179 |
+
yield {"type": "token", "text": chunk}
|
| 180 |
+
yield {
|
| 181 |
+
"type": "done",
|
| 182 |
+
"state": result.state,
|
| 183 |
+
"response": result.response,
|
| 184 |
+
"projects": [project.to_public_dict() for project in result.projects],
|
| 185 |
+
"whitespace": [item.to_dict() for item in result.whitespace],
|
| 186 |
+
"score": result.score.to_dict() if result.score else None,
|
| 187 |
+
"plan": result.plan,
|
| 188 |
+
"artifact": result.artifact,
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
def _result(
|
| 192 |
self,
|
| 193 |
normalized_text: str,
|
hackathon_advisor/asr_runtime.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
from dataclasses import dataclass
|
|
|
|
| 4 |
import os
|
| 5 |
from pathlib import Path
|
| 6 |
import shutil
|
|
@@ -12,6 +13,10 @@ from typing import Any
|
|
| 12 |
DEFAULT_ASR_MODEL_ID = "nvidia/nemotron-speech-streaming-en-0.6b"
|
| 13 |
DEFAULT_ASR_BACKEND = "nemo-asr"
|
| 14 |
DEFAULT_ASR_SAMPLE_RATE = 16_000
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
@dataclass(frozen=True)
|
|
@@ -47,22 +52,32 @@ class AsrStatus:
|
|
| 47 |
|
| 48 |
|
| 49 |
class NemotronAsrTranscriber:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
backend = DEFAULT_ASR_BACKEND
|
| 51 |
|
| 52 |
def __init__(
|
| 53 |
self,
|
| 54 |
model_id: str = DEFAULT_ASR_MODEL_ID,
|
| 55 |
sample_rate: int = DEFAULT_ASR_SAMPLE_RATE,
|
|
|
|
| 56 |
) -> None:
|
| 57 |
self.model_id = model_id.strip() or DEFAULT_ASR_MODEL_ID
|
| 58 |
self.sample_rate = sample_rate
|
| 59 |
-
self.
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
def status(self) -> AsrStatus:
|
| 62 |
return AsrStatus(
|
| 63 |
-
backend=self.backend,
|
| 64 |
-
model_id=self.model_id,
|
| 65 |
-
loaded=self.
|
| 66 |
sample_rate=self.sample_rate,
|
| 67 |
)
|
| 68 |
|
|
@@ -71,23 +86,41 @@ class NemotronAsrTranscriber:
|
|
| 71 |
if not source.is_file():
|
| 72 |
raise RuntimeError("Voice note was not saved before transcription.")
|
| 73 |
self._ensure_loaded()
|
|
|
|
| 74 |
with tempfile.TemporaryDirectory(prefix="advisor-asr-") as directory:
|
| 75 |
wav_path = Path(directory) / "voice.wav"
|
| 76 |
normalize_audio_for_asr(source, wav_path, self.sample_rate)
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
| 79 |
if not transcript:
|
| 80 |
-
raise RuntimeError("
|
| 81 |
return AsrTranscript(
|
| 82 |
transcript=transcript,
|
| 83 |
-
model_id=self.model_id,
|
| 84 |
-
backend=self.backend,
|
| 85 |
sample_rate=self.sample_rate,
|
| 86 |
)
|
| 87 |
|
| 88 |
def _ensure_loaded(self) -> None:
|
| 89 |
-
if self.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
try:
|
| 92 |
import torch
|
| 93 |
import nemo.collections.asr as nemo_asr
|
|
@@ -97,12 +130,33 @@ class NemotronAsrTranscriber:
|
|
| 97 |
"before enabling voice transcription."
|
| 98 |
) from error
|
| 99 |
model = nemo_asr.models.ASRModel.from_pretrained(model_name=self.model_id)
|
| 100 |
-
device = os.environ.get("ADVISOR_ASR_DEVICE", "").strip()
|
| 101 |
-
if not device:
|
| 102 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 103 |
model.to(device)
|
| 104 |
model.eval()
|
| 105 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
def create_asr_transcriber() -> NemotronAsrTranscriber:
|
|
@@ -112,9 +166,54 @@ def create_asr_transcriber() -> NemotronAsrTranscriber:
|
|
| 112 |
return NemotronAsrTranscriber(
|
| 113 |
model_id=os.environ.get("ADVISOR_ASR_MODEL_ID", DEFAULT_ASR_MODEL_ID),
|
| 114 |
sample_rate=sample_rate,
|
|
|
|
| 115 |
)
|
| 116 |
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
def normalize_audio_for_asr(source: Path, target: Path, sample_rate: int = DEFAULT_ASR_SAMPLE_RATE) -> None:
|
| 119 |
ffmpeg = shutil.which("ffmpeg")
|
| 120 |
if not ffmpeg:
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
from dataclasses import dataclass
|
| 4 |
+
import logging
|
| 5 |
import os
|
| 6 |
from pathlib import Path
|
| 7 |
import shutil
|
|
|
|
| 13 |
DEFAULT_ASR_MODEL_ID = "nvidia/nemotron-speech-streaming-en-0.6b"
|
| 14 |
DEFAULT_ASR_BACKEND = "nemo-asr"
|
| 15 |
DEFAULT_ASR_SAMPLE_RATE = 16_000
|
| 16 |
+
DEFAULT_WHISPER_MODEL_ID = "openai/whisper-small.en"
|
| 17 |
+
WHISPER_BACKEND = "whisper-transformers"
|
| 18 |
+
|
| 19 |
+
_logger = logging.getLogger("hackathon_advisor")
|
| 20 |
|
| 21 |
|
| 22 |
@dataclass(frozen=True)
|
|
|
|
| 52 |
|
| 53 |
|
| 54 |
class NemotronAsrTranscriber:
|
| 55 |
+
"""Nemotron voice input. Its declared identity (status, model id) is the deployed Space
|
| 56 |
+
backend — NVIDIA NeMo ASR. When NeMo is not installed (e.g. local development on a Mac,
|
| 57 |
+
where NeMo does not install cleanly), transcription transparently falls back to a local
|
| 58 |
+
Whisper model through transformers so voice still works; the returned transcript reports
|
| 59 |
+
whichever engine actually ran."""
|
| 60 |
+
|
| 61 |
backend = DEFAULT_ASR_BACKEND
|
| 62 |
|
| 63 |
def __init__(
|
| 64 |
self,
|
| 65 |
model_id: str = DEFAULT_ASR_MODEL_ID,
|
| 66 |
sample_rate: int = DEFAULT_ASR_SAMPLE_RATE,
|
| 67 |
+
whisper_model_id: str = DEFAULT_WHISPER_MODEL_ID,
|
| 68 |
) -> None:
|
| 69 |
self.model_id = model_id.strip() or DEFAULT_ASR_MODEL_ID
|
| 70 |
self.sample_rate = sample_rate
|
| 71 |
+
self.whisper_model_id = whisper_model_id.strip() or DEFAULT_WHISPER_MODEL_ID
|
| 72 |
+
self._engine: tuple[str, Any] | None = None
|
| 73 |
+
self._active_backend = ""
|
| 74 |
+
self._active_model_id = ""
|
| 75 |
|
| 76 |
def status(self) -> AsrStatus:
|
| 77 |
return AsrStatus(
|
| 78 |
+
backend=self._active_backend or self.backend,
|
| 79 |
+
model_id=self._active_model_id or self.model_id,
|
| 80 |
+
loaded=self._engine is not None,
|
| 81 |
sample_rate=self.sample_rate,
|
| 82 |
)
|
| 83 |
|
|
|
|
| 86 |
if not source.is_file():
|
| 87 |
raise RuntimeError("Voice note was not saved before transcription.")
|
| 88 |
self._ensure_loaded()
|
| 89 |
+
kind, engine = self._engine # type: ignore[misc]
|
| 90 |
with tempfile.TemporaryDirectory(prefix="advisor-asr-") as directory:
|
| 91 |
wav_path = Path(directory) / "voice.wav"
|
| 92 |
normalize_audio_for_asr(source, wav_path, self.sample_rate)
|
| 93 |
+
if kind == "nemo":
|
| 94 |
+
outputs = engine.transcribe([str(wav_path)], batch_size=1)
|
| 95 |
+
transcript = extract_transcript(outputs).strip()
|
| 96 |
+
else:
|
| 97 |
+
transcript = _whisper_transcribe(engine, wav_path, self.sample_rate).strip()
|
| 98 |
if not transcript:
|
| 99 |
+
raise RuntimeError(f"{self._active_backend or self.backend} returned an empty transcript.")
|
| 100 |
return AsrTranscript(
|
| 101 |
transcript=transcript,
|
| 102 |
+
model_id=self._active_model_id or self.model_id,
|
| 103 |
+
backend=self._active_backend or self.backend,
|
| 104 |
sample_rate=self.sample_rate,
|
| 105 |
)
|
| 106 |
|
| 107 |
def _ensure_loaded(self) -> None:
|
| 108 |
+
if self._engine is not None:
|
| 109 |
+
return
|
| 110 |
+
preference = os.environ.get("ADVISOR_ASR_BACKEND", "auto").strip().lower()
|
| 111 |
+
if preference in ("whisper", WHISPER_BACKEND):
|
| 112 |
+
self._load_whisper()
|
| 113 |
return
|
| 114 |
+
try:
|
| 115 |
+
self._load_nemo()
|
| 116 |
+
return
|
| 117 |
+
except RuntimeError:
|
| 118 |
+
if preference in ("nemo", "nemo-asr", "nemotron"):
|
| 119 |
+
raise # explicit Nemotron request: do not silently fall back
|
| 120 |
+
_logger.warning("NeMo ASR unavailable; falling back to local Whisper (%s).", self.whisper_model_id)
|
| 121 |
+
self._load_whisper()
|
| 122 |
+
|
| 123 |
+
def _load_nemo(self) -> None:
|
| 124 |
try:
|
| 125 |
import torch
|
| 126 |
import nemo.collections.asr as nemo_asr
|
|
|
|
| 130 |
"before enabling voice transcription."
|
| 131 |
) from error
|
| 132 |
model = nemo_asr.models.ASRModel.from_pretrained(model_name=self.model_id)
|
| 133 |
+
device = os.environ.get("ADVISOR_ASR_DEVICE", "").strip() or ("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
|
|
|
| 134 |
model.to(device)
|
| 135 |
model.eval()
|
| 136 |
+
self._engine = ("nemo", model)
|
| 137 |
+
self._active_backend = self.backend
|
| 138 |
+
self._active_model_id = self.model_id
|
| 139 |
+
|
| 140 |
+
def _load_whisper(self) -> None:
|
| 141 |
+
try:
|
| 142 |
+
import torch
|
| 143 |
+
from transformers import WhisperForConditionalGeneration, WhisperProcessor
|
| 144 |
+
except ImportError as error:
|
| 145 |
+
raise RuntimeError(
|
| 146 |
+
"Local voice fallback requires transformers and torch. Install runtime "
|
| 147 |
+
"requirements before enabling voice transcription."
|
| 148 |
+
) from error
|
| 149 |
+
device = _resolve_asr_device(torch)
|
| 150 |
+
if device == "mps":
|
| 151 |
+
os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
|
| 152 |
+
processor = WhisperProcessor.from_pretrained(self.whisper_model_id)
|
| 153 |
+
model = WhisperForConditionalGeneration.from_pretrained(self.whisper_model_id)
|
| 154 |
+
model.to(device)
|
| 155 |
+
model.eval()
|
| 156 |
+
self._engine = ("whisper", (processor, model))
|
| 157 |
+
self._active_backend = WHISPER_BACKEND
|
| 158 |
+
self._active_model_id = self.whisper_model_id
|
| 159 |
+
_logger.info("Whisper ASR loaded | model=%s device=%s", self.whisper_model_id, device)
|
| 160 |
|
| 161 |
|
| 162 |
def create_asr_transcriber() -> NemotronAsrTranscriber:
|
|
|
|
| 166 |
return NemotronAsrTranscriber(
|
| 167 |
model_id=os.environ.get("ADVISOR_ASR_MODEL_ID", DEFAULT_ASR_MODEL_ID),
|
| 168 |
sample_rate=sample_rate,
|
| 169 |
+
whisper_model_id=os.environ.get("ADVISOR_ASR_WHISPER_MODEL", DEFAULT_WHISPER_MODEL_ID),
|
| 170 |
)
|
| 171 |
|
| 172 |
|
| 173 |
+
def _resolve_asr_device(torch: Any) -> str:
|
| 174 |
+
forced = os.environ.get("ADVISOR_ASR_DEVICE", "").strip().lower()
|
| 175 |
+
if forced:
|
| 176 |
+
return forced
|
| 177 |
+
try:
|
| 178 |
+
if torch.cuda.is_available():
|
| 179 |
+
return "cuda"
|
| 180 |
+
except Exception: # pragma: no cover - device dependent
|
| 181 |
+
pass
|
| 182 |
+
try:
|
| 183 |
+
if torch.backends.mps.is_available():
|
| 184 |
+
return "mps"
|
| 185 |
+
except Exception: # pragma: no cover - device dependent
|
| 186 |
+
pass
|
| 187 |
+
return "cpu"
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
def _whisper_transcribe(engine: tuple[Any, Any], wav_path: Path, sample_rate: int) -> str:
|
| 191 |
+
import torch
|
| 192 |
+
|
| 193 |
+
processor, model = engine
|
| 194 |
+
audio = _read_wav_mono_float32(wav_path)
|
| 195 |
+
inputs = processor(audio, sampling_rate=sample_rate, return_tensors="pt")
|
| 196 |
+
features = inputs.input_features.to(model.device)
|
| 197 |
+
with torch.inference_mode():
|
| 198 |
+
generated = model.generate(features, max_new_tokens=128)
|
| 199 |
+
decoded = processor.batch_decode(generated, skip_special_tokens=True)
|
| 200 |
+
return decoded[0] if decoded else ""
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def _read_wav_mono_float32(wav_path: Path) -> Any:
|
| 204 |
+
import wave
|
| 205 |
+
|
| 206 |
+
import numpy as np
|
| 207 |
+
|
| 208 |
+
with wave.open(str(wav_path), "rb") as wav:
|
| 209 |
+
channels = wav.getnchannels()
|
| 210 |
+
frames = wav.readframes(wav.getnframes())
|
| 211 |
+
audio = np.frombuffer(frames, dtype=np.int16).astype(np.float32) / 32768.0
|
| 212 |
+
if channels > 1:
|
| 213 |
+
audio = audio.reshape(-1, channels).mean(axis=1)
|
| 214 |
+
return audio
|
| 215 |
+
|
| 216 |
+
|
| 217 |
def normalize_audio_for_asr(source: Path, target: Path, sample_rate: int = DEFAULT_ASR_SAMPLE_RATE) -> None:
|
| 218 |
ffmpeg = shutil.which("ffmpeg")
|
| 219 |
if not ffmpeg:
|
hackathon_advisor/data.py
CHANGED
|
@@ -344,6 +344,15 @@ class ProjectIndex:
|
|
| 344 |
tuple(float(value) for value in document["vector"])
|
| 345 |
for document in index_payload["documents"]
|
| 346 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
|
| 348 |
@classmethod
|
| 349 |
def from_file(cls, path: Path, query_embedder: EmbeddingFunction | None = None) -> "ProjectIndex":
|
|
|
|
| 344 |
tuple(float(value) for value in document["vector"])
|
| 345 |
for document in index_payload["documents"]
|
| 346 |
]
|
| 347 |
+
self._vector_by_id = {
|
| 348 |
+
project.id: vector for project, vector in zip(self.projects, self._vectors)
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
def vector_for(self, project_id: str) -> tuple[float, ...] | None:
|
| 352 |
+
return self._vector_by_id.get(project_id)
|
| 353 |
+
|
| 354 |
+
def embed_query(self, text: str) -> tuple[float, ...]:
|
| 355 |
+
return tuple(normalize_vector(self._embed_query(text)))
|
| 356 |
|
| 357 |
@classmethod
|
| 358 |
def from_file(cls, path: Path, query_embedder: EmbeddingFunction | None = None) -> "ProjectIndex":
|
hackathon_advisor/model_runtime.py
CHANGED
|
@@ -1,18 +1,25 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
|
|
|
| 3 |
from contextlib import nullcontext
|
| 4 |
from dataclasses import dataclass
|
|
|
|
| 5 |
import os
|
| 6 |
import re
|
|
|
|
| 7 |
from typing import Any, Protocol
|
| 8 |
|
| 9 |
from hackathon_advisor.tools import idea_from_text
|
| 10 |
from hackathon_advisor.tool_contracts import ToolResolution, resolve_tool_call, tool_schemas
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
DEFAULT_MODEL_ID = "openbmb/MiniCPM5-1B"
|
| 14 |
DEFAULT_ADAPTER_ID = "build-small-hackathon/hackathon-advisor-minicpm5-lora"
|
| 15 |
DEFAULT_BACKEND = "rules"
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
class ToolPlanner(Protocol):
|
|
@@ -24,6 +31,11 @@ class ToolPlanner(Protocol):
|
|
| 24 |
def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
|
| 25 |
...
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
@dataclass(frozen=True)
|
| 29 |
class RuntimeStatus:
|
|
@@ -33,6 +45,7 @@ class RuntimeStatus:
|
|
| 33 |
adapter_revision: str
|
| 34 |
loaded: bool
|
| 35 |
tool_count: int
|
|
|
|
| 36 |
|
| 37 |
def to_dict(self) -> dict[str, Any]:
|
| 38 |
return {
|
|
@@ -42,6 +55,7 @@ class RuntimeStatus:
|
|
| 42 |
"adapter_revision": self.adapter_revision,
|
| 43 |
"loaded": self.loaded,
|
| 44 |
"tool_count": self.tool_count,
|
|
|
|
| 45 |
}
|
| 46 |
|
| 47 |
|
|
@@ -61,13 +75,13 @@ class RuleBasedPlanner:
|
|
| 61 |
output = '<function name="list_projects">{"sort":"likes"}</function>'
|
| 62 |
elif project_id:
|
| 63 |
output = f'<function name="get_project">{{"id":{_json_string(project_id)}}}</function>'
|
| 64 |
-
elif
|
| 65 |
output = '<function name="compare_ideas">{}</function>'
|
| 66 |
-
elif
|
| 67 |
output = '<function name="make_plan">{}</function>'
|
| 68 |
-
elif
|
| 69 |
output = '<function name="find_whitespace">{}</function>'
|
| 70 |
-
elif
|
| 71 |
output = f'<function name="search_projects">{{"query":{_json_string(text)}}}</function>'
|
| 72 |
else:
|
| 73 |
title, pitch = idea_from_text(text)
|
|
@@ -78,6 +92,9 @@ class RuleBasedPlanner:
|
|
| 78 |
)
|
| 79 |
return resolve_tool_call(output, fallback_query=text)
|
| 80 |
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
class MiniCPMTransformersPlanner:
|
| 83 |
backend = "minicpm-transformers"
|
|
@@ -87,19 +104,34 @@ class MiniCPMTransformersPlanner:
|
|
| 87 |
model_id: str = DEFAULT_MODEL_ID,
|
| 88 |
adapter_id: str = "",
|
| 89 |
adapter_revision: str = "",
|
|
|
|
| 90 |
) -> None:
|
| 91 |
self.model_id = model_id.strip() or DEFAULT_MODEL_ID
|
| 92 |
self.adapter_id = adapter_id.strip()
|
| 93 |
self.adapter_revision = adapter_revision.strip()
|
|
|
|
|
|
|
| 94 |
self._tokenizer = None
|
| 95 |
self._model = None
|
| 96 |
self._inference_mode = None
|
| 97 |
|
| 98 |
def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
self._ensure_loaded()
|
| 100 |
prompt = render_context(message, state)
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
def _ensure_loaded(self) -> None:
|
| 105 |
if self._model is not None and self._tokenizer is not None:
|
|
@@ -121,26 +153,60 @@ class MiniCPMTransformersPlanner:
|
|
| 121 |
adapter_config = PeftConfig.from_pretrained(self.adapter_id, **adapter_kwargs)
|
| 122 |
base_model_id = str(adapter_config.base_model_name_or_path or base_model_id)
|
| 123 |
|
|
|
|
|
|
|
|
|
|
| 124 |
self._tokenizer = AutoTokenizer.from_pretrained(
|
| 125 |
tokenizer_id,
|
| 126 |
trust_remote_code=True,
|
| 127 |
**(adapter_kwargs if self.adapter_id else {}),
|
| 128 |
)
|
| 129 |
-
model =
|
| 130 |
-
base_model_id,
|
| 131 |
-
dtype="auto",
|
| 132 |
-
device_map="auto",
|
| 133 |
-
trust_remote_code=True,
|
| 134 |
)
|
| 135 |
if self.adapter_id:
|
| 136 |
model = PeftModel.from_pretrained(model, self.adapter_id, **adapter_kwargs)
|
|
|
|
|
|
|
| 137 |
model.eval()
|
| 138 |
_disable_sampling_generation_defaults(model)
|
| 139 |
self._model = model
|
| 140 |
if hasattr(torch, "inference_mode"):
|
| 141 |
self._inference_mode = torch.inference_mode
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
assert self._tokenizer is not None
|
| 145 |
assert self._model is not None
|
| 146 |
messages = [
|
|
@@ -156,19 +222,88 @@ class MiniCPMTransformersPlanner:
|
|
| 156 |
return_tensors="pt",
|
| 157 |
).to(next(self._model.parameters()).device)
|
| 158 |
_strip_unused_generation_inputs(inputs)
|
| 159 |
-
|
| 160 |
-
with context:
|
| 161 |
-
generated = self._model.generate(
|
| 162 |
-
**inputs,
|
| 163 |
-
max_new_tokens=180,
|
| 164 |
-
do_sample=False,
|
| 165 |
-
)
|
| 166 |
-
new_tokens = generated[:, inputs["input_ids"].shape[-1] :]
|
| 167 |
-
decoded = self._tokenizer.decode(new_tokens[0], skip_special_tokens=True).strip()
|
| 168 |
-
return _normalize_xml_tool_output(decoded)
|
| 169 |
|
|
|
|
|
|
|
| 170 |
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
backend = os.environ.get("ADVISOR_MODEL_BACKEND", DEFAULT_BACKEND).strip().lower()
|
| 173 |
if backend in ("", "rules"):
|
| 174 |
return RuleBasedPlanner()
|
|
@@ -177,11 +312,13 @@ def create_tool_planner() -> ToolPlanner:
|
|
| 177 |
os.environ.get("ADVISOR_MODEL_ID", DEFAULT_MODEL_ID),
|
| 178 |
os.environ.get("ADVISOR_ADAPTER_ID", ""),
|
| 179 |
os.environ.get("ADVISOR_ADAPTER_REVISION", ""),
|
|
|
|
| 180 |
)
|
| 181 |
raise RuntimeError(f"Unsupported ADVISOR_MODEL_BACKEND={backend!r}")
|
| 182 |
|
| 183 |
|
| 184 |
def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
|
|
|
|
| 185 |
return RuntimeStatus(
|
| 186 |
backend=planner.backend,
|
| 187 |
model_id=planner.model_id,
|
|
@@ -189,6 +326,7 @@ def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
|
|
| 189 |
adapter_revision=planner.adapter_revision,
|
| 190 |
loaded=not isinstance(planner, MiniCPMTransformersPlanner) or planner._model is not None,
|
| 191 |
tool_count=len(tool_schemas()),
|
|
|
|
| 192 |
)
|
| 193 |
|
| 194 |
|
|
@@ -274,6 +412,13 @@ def _wants_project_list(lower_text: str) -> bool:
|
|
| 274 |
return lower_text in exact_phrases or any(lower_text.startswith(prefix) for prefix in command_prefixes)
|
| 275 |
|
| 276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
def _project_reference_id(text: str) -> str:
|
| 278 |
prefixes = (
|
| 279 |
"read project ",
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
+
from collections.abc import Iterator
|
| 4 |
from contextlib import nullcontext
|
| 5 |
from dataclasses import dataclass
|
| 6 |
+
import logging
|
| 7 |
import os
|
| 8 |
import re
|
| 9 |
+
import threading
|
| 10 |
from typing import Any, Protocol
|
| 11 |
|
| 12 |
from hackathon_advisor.tools import idea_from_text
|
| 13 |
from hackathon_advisor.tool_contracts import ToolResolution, resolve_tool_call, tool_schemas
|
| 14 |
+
from hackathon_advisor.zerogpu import zero_gpu_enabled
|
| 15 |
+
|
| 16 |
+
_logger = logging.getLogger("hackathon_advisor")
|
| 17 |
|
| 18 |
|
| 19 |
DEFAULT_MODEL_ID = "openbmb/MiniCPM5-1B"
|
| 20 |
DEFAULT_ADAPTER_ID = "build-small-hackathon/hackathon-advisor-minicpm5-lora"
|
| 21 |
DEFAULT_BACKEND = "rules"
|
| 22 |
+
MAX_TOOL_CALL_TOKENS = 180
|
| 23 |
|
| 24 |
|
| 25 |
class ToolPlanner(Protocol):
|
|
|
|
| 31 |
def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
|
| 32 |
...
|
| 33 |
|
| 34 |
+
def plan_iter(self, message: str, state: dict[str, Any]) -> Iterator[dict[str, Any]]:
|
| 35 |
+
"""Yield {"type": "model_progress", "tokens": int} events while planning, then a
|
| 36 |
+
final {"type": "resolved", "resolution": ToolResolution} event."""
|
| 37 |
+
...
|
| 38 |
+
|
| 39 |
|
| 40 |
@dataclass(frozen=True)
|
| 41 |
class RuntimeStatus:
|
|
|
|
| 45 |
adapter_revision: str
|
| 46 |
loaded: bool
|
| 47 |
tool_count: int
|
| 48 |
+
device: str = ""
|
| 49 |
|
| 50 |
def to_dict(self) -> dict[str, Any]:
|
| 51 |
return {
|
|
|
|
| 55 |
"adapter_revision": self.adapter_revision,
|
| 56 |
"loaded": self.loaded,
|
| 57 |
"tool_count": self.tool_count,
|
| 58 |
+
"device": self.device,
|
| 59 |
}
|
| 60 |
|
| 61 |
|
|
|
|
| 75 |
output = '<function name="list_projects">{"sort":"likes"}</function>'
|
| 76 |
elif project_id:
|
| 77 |
output = f'<function name="get_project">{{"id":{_json_string(project_id)}}}</function>'
|
| 78 |
+
elif _has_command_term(lower, ("compare", "choose", "rank")):
|
| 79 |
output = '<function name="compare_ideas">{}</function>'
|
| 80 |
+
elif _has_command_term(lower, ("plan", "roadmap", "next step", "milestone")):
|
| 81 |
output = '<function name="make_plan">{}</function>'
|
| 82 |
+
elif _has_command_term(lower, ("whitespace", "original", "new", "bolder", "unwritten", "gap")):
|
| 83 |
output = '<function name="find_whitespace">{}</function>'
|
| 84 |
+
elif _has_command_term(lower, ("search", "similar", "already", "existing", "overlap", "echo")):
|
| 85 |
output = f'<function name="search_projects">{{"query":{_json_string(text)}}}</function>'
|
| 86 |
else:
|
| 87 |
title, pitch = idea_from_text(text)
|
|
|
|
| 92 |
)
|
| 93 |
return resolve_tool_call(output, fallback_query=text)
|
| 94 |
|
| 95 |
+
def plan_iter(self, message: str, state: dict[str, Any]) -> Iterator[dict[str, Any]]:
|
| 96 |
+
yield {"type": "resolved", "resolution": self.plan(message, state)}
|
| 97 |
+
|
| 98 |
|
| 99 |
class MiniCPMTransformersPlanner:
|
| 100 |
backend = "minicpm-transformers"
|
|
|
|
| 104 |
model_id: str = DEFAULT_MODEL_ID,
|
| 105 |
adapter_id: str = "",
|
| 106 |
adapter_revision: str = "",
|
| 107 |
+
device: str = "auto",
|
| 108 |
) -> None:
|
| 109 |
self.model_id = model_id.strip() or DEFAULT_MODEL_ID
|
| 110 |
self.adapter_id = adapter_id.strip()
|
| 111 |
self.adapter_revision = adapter_revision.strip()
|
| 112 |
+
self.device = (device or "auto").strip().lower() or "auto"
|
| 113 |
+
self.resolved_device = ""
|
| 114 |
self._tokenizer = None
|
| 115 |
self._model = None
|
| 116 |
self._inference_mode = None
|
| 117 |
|
| 118 |
def plan(self, message: str, state: dict[str, Any]) -> ToolResolution:
|
| 119 |
+
resolution: ToolResolution | None = None
|
| 120 |
+
for event in self.plan_iter(message, state):
|
| 121 |
+
if event.get("type") == "resolved":
|
| 122 |
+
resolution = event["resolution"]
|
| 123 |
+
assert resolution is not None
|
| 124 |
+
return resolution
|
| 125 |
+
|
| 126 |
+
def plan_iter(self, message: str, state: dict[str, Any]) -> Iterator[dict[str, Any]]:
|
| 127 |
self._ensure_loaded()
|
| 128 |
prompt = render_context(message, state)
|
| 129 |
+
pieces: list[str] = []
|
| 130 |
+
for tokens, piece in self._stream_tool_call(prompt):
|
| 131 |
+
pieces.append(piece)
|
| 132 |
+
yield {"type": "model_progress", "tokens": tokens, "max_tokens": MAX_TOOL_CALL_TOKENS}
|
| 133 |
+
output = _normalize_xml_tool_output("".join(pieces).strip())
|
| 134 |
+
yield {"type": "resolved", "resolution": resolve_tool_call(output, fallback_query=message)}
|
| 135 |
|
| 136 |
def _ensure_loaded(self) -> None:
|
| 137 |
if self._model is not None and self._tokenizer is not None:
|
|
|
|
| 153 |
adapter_config = PeftConfig.from_pretrained(self.adapter_id, **adapter_kwargs)
|
| 154 |
base_model_id = str(adapter_config.base_model_name_or_path or base_model_id)
|
| 155 |
|
| 156 |
+
target = _resolve_torch_device(self.device, torch)
|
| 157 |
+
self.resolved_device = target
|
| 158 |
+
|
| 159 |
self._tokenizer = AutoTokenizer.from_pretrained(
|
| 160 |
tokenizer_id,
|
| 161 |
trust_remote_code=True,
|
| 162 |
**(adapter_kwargs if self.adapter_id else {}),
|
| 163 |
)
|
| 164 |
+
model = self._load_model_on_device(
|
| 165 |
+
AutoModelForCausalLM, base_model_id, target, torch
|
|
|
|
|
|
|
|
|
|
| 166 |
)
|
| 167 |
if self.adapter_id:
|
| 168 |
model = PeftModel.from_pretrained(model, self.adapter_id, **adapter_kwargs)
|
| 169 |
+
if target not in ("auto", "cpu"):
|
| 170 |
+
model = model.to(target)
|
| 171 |
model.eval()
|
| 172 |
_disable_sampling_generation_defaults(model)
|
| 173 |
self._model = model
|
| 174 |
if hasattr(torch, "inference_mode"):
|
| 175 |
self._inference_mode = torch.inference_mode
|
| 176 |
+
_logger.info(
|
| 177 |
+
"MiniCPM loaded | requested_device=%s resolved_device=%s adapter=%s",
|
| 178 |
+
self.device,
|
| 179 |
+
self.resolved_device,
|
| 180 |
+
self.adapter_id or "(none)",
|
| 181 |
+
)
|
| 182 |
|
| 183 |
+
def _load_model_on_device(self, model_cls: Any, base_model_id: str, target: str, torch: Any) -> Any:
|
| 184 |
+
if target == "auto":
|
| 185 |
+
return model_cls.from_pretrained(
|
| 186 |
+
base_model_id, dtype="auto", device_map="auto", trust_remote_code=True
|
| 187 |
+
)
|
| 188 |
+
if target == "cpu":
|
| 189 |
+
return model_cls.from_pretrained(
|
| 190 |
+
base_model_id, dtype=torch.float32, device_map={"": "cpu"}, trust_remote_code=True
|
| 191 |
+
)
|
| 192 |
+
# mps / cuda: load on CPU first (no accelerate dispatch), then move to the device.
|
| 193 |
+
if target == "mps":
|
| 194 |
+
os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
|
| 195 |
+
try:
|
| 196 |
+
model = model_cls.from_pretrained(
|
| 197 |
+
base_model_id, dtype=torch.float16, trust_remote_code=True
|
| 198 |
+
)
|
| 199 |
+
return model.to(target)
|
| 200 |
+
except Exception as error: # noqa: BLE001 - keep the turn runnable on CPU
|
| 201 |
+
if target == "mps":
|
| 202 |
+
_logger.warning("MPS load failed (%r); falling back to CPU float32.", error)
|
| 203 |
+
self.resolved_device = "cpu"
|
| 204 |
+
return model_cls.from_pretrained(
|
| 205 |
+
base_model_id, dtype=torch.float32, device_map={"": "cpu"}, trust_remote_code=True
|
| 206 |
+
)
|
| 207 |
+
raise
|
| 208 |
+
|
| 209 |
+
def _prepare_inputs(self, prompt: str) -> Any:
|
| 210 |
assert self._tokenizer is not None
|
| 211 |
assert self._model is not None
|
| 212 |
messages = [
|
|
|
|
| 222 |
return_tensors="pt",
|
| 223 |
).to(next(self._model.parameters()).device)
|
| 224 |
_strip_unused_generation_inputs(inputs)
|
| 225 |
+
return inputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
+
def _stream_tool_call(self, prompt: str) -> Iterator[tuple[int, str]]:
|
| 228 |
+
from transformers import TextIteratorStreamer
|
| 229 |
|
| 230 |
+
assert self._tokenizer is not None
|
| 231 |
+
assert self._model is not None
|
| 232 |
+
inputs = self._prepare_inputs(prompt)
|
| 233 |
+
streamer = TextIteratorStreamer(
|
| 234 |
+
self._tokenizer, skip_prompt=True, skip_special_tokens=True
|
| 235 |
+
)
|
| 236 |
+
generation_kwargs = {
|
| 237 |
+
**inputs,
|
| 238 |
+
"max_new_tokens": MAX_TOOL_CALL_TOKENS,
|
| 239 |
+
"do_sample": False,
|
| 240 |
+
"streamer": streamer,
|
| 241 |
+
}
|
| 242 |
+
errors: list[BaseException] = []
|
| 243 |
+
|
| 244 |
+
def _run() -> None:
|
| 245 |
+
context = self._inference_mode() if self._inference_mode is not None else nullcontext()
|
| 246 |
+
try:
|
| 247 |
+
with context:
|
| 248 |
+
self._model.generate(**generation_kwargs)
|
| 249 |
+
except BaseException as error: # surfaced after the streamer drains
|
| 250 |
+
errors.append(error)
|
| 251 |
+
# generate() never reached its end sentinel, so wake the consumer instead of
|
| 252 |
+
# letting it block forever, then re-raise from the main thread below.
|
| 253 |
+
streamer.end()
|
| 254 |
+
|
| 255 |
+
worker = threading.Thread(target=_run, daemon=True)
|
| 256 |
+
worker.start()
|
| 257 |
+
tokens = 0
|
| 258 |
+
for piece in streamer:
|
| 259 |
+
if not piece:
|
| 260 |
+
continue
|
| 261 |
+
tokens += 1
|
| 262 |
+
yield tokens, piece
|
| 263 |
+
worker.join()
|
| 264 |
+
if errors:
|
| 265 |
+
raise errors[0]
|
| 266 |
+
|
| 267 |
+
|
| 268 |
+
def _device_available(device: str, torch: Any) -> bool:
|
| 269 |
+
try:
|
| 270 |
+
if device == "cuda":
|
| 271 |
+
return bool(torch.cuda.is_available())
|
| 272 |
+
if device == "mps":
|
| 273 |
+
backend = getattr(torch.backends, "mps", None)
|
| 274 |
+
return bool(backend is not None and backend.is_available())
|
| 275 |
+
except Exception: # pragma: no cover - device dependent
|
| 276 |
+
return False
|
| 277 |
+
return False
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def _best_local_device(torch: Any) -> str:
|
| 281 |
+
# Avoid touching CUDA inside a ZeroGPU main process — there is no local GPU there, and
|
| 282 |
+
# probing it can disturb the ZeroGPU allocator.
|
| 283 |
+
if not zero_gpu_enabled() and _device_available("cuda", torch):
|
| 284 |
+
return "cuda"
|
| 285 |
+
if _device_available("mps", torch):
|
| 286 |
+
return "mps"
|
| 287 |
+
return "cpu"
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
def _resolve_torch_device(preference: str, torch: Any) -> str:
|
| 291 |
+
"""Map a configured device preference to a concrete torch device.
|
| 292 |
+
|
| 293 |
+
"auto" stays "auto" (accelerate device_map handles ZeroGPU/CUDA/CPU placement). "local"
|
| 294 |
+
picks the best on-machine accelerator: CUDA -> MPS (Apple Silicon) -> CPU. An explicit
|
| 295 |
+
cuda/mps that is unavailable degrades to the best available local device."""
|
| 296 |
+
pref = (preference or "auto").strip().lower()
|
| 297 |
+
if pref == "auto":
|
| 298 |
+
return "auto"
|
| 299 |
+
if pref == "cpu":
|
| 300 |
+
return "cpu"
|
| 301 |
+
if pref in ("cuda", "mps"):
|
| 302 |
+
return pref if _device_available(pref, torch) else _best_local_device(torch)
|
| 303 |
+
return _best_local_device(torch)
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def create_tool_planner(device: str = "auto") -> ToolPlanner:
|
| 307 |
backend = os.environ.get("ADVISOR_MODEL_BACKEND", DEFAULT_BACKEND).strip().lower()
|
| 308 |
if backend in ("", "rules"):
|
| 309 |
return RuleBasedPlanner()
|
|
|
|
| 312 |
os.environ.get("ADVISOR_MODEL_ID", DEFAULT_MODEL_ID),
|
| 313 |
os.environ.get("ADVISOR_ADAPTER_ID", ""),
|
| 314 |
os.environ.get("ADVISOR_ADAPTER_REVISION", ""),
|
| 315 |
+
device=device,
|
| 316 |
)
|
| 317 |
raise RuntimeError(f"Unsupported ADVISOR_MODEL_BACKEND={backend!r}")
|
| 318 |
|
| 319 |
|
| 320 |
def runtime_status(planner: ToolPlanner) -> RuntimeStatus:
|
| 321 |
+
device = getattr(planner, "resolved_device", "") or getattr(planner, "device", "")
|
| 322 |
return RuntimeStatus(
|
| 323 |
backend=planner.backend,
|
| 324 |
model_id=planner.model_id,
|
|
|
|
| 326 |
adapter_revision=planner.adapter_revision,
|
| 327 |
loaded=not isinstance(planner, MiniCPMTransformersPlanner) or planner._model is not None,
|
| 328 |
tool_count=len(tool_schemas()),
|
| 329 |
+
device=str(device),
|
| 330 |
)
|
| 331 |
|
| 332 |
|
|
|
|
| 412 |
return lower_text in exact_phrases or any(lower_text.startswith(prefix) for prefix in command_prefixes)
|
| 413 |
|
| 414 |
|
| 415 |
+
def _has_command_term(lower_text: str, terms: tuple[str, ...]) -> bool:
|
| 416 |
+
return any(
|
| 417 |
+
re.search(rf"(?<![a-z0-9]){re.escape(term)}(?![a-z0-9])", lower_text)
|
| 418 |
+
for term in terms
|
| 419 |
+
)
|
| 420 |
+
|
| 421 |
+
|
| 422 |
def _project_reference_id(text: str) -> str:
|
| 423 |
prefixes = (
|
| 424 |
"read project ",
|
hackathon_advisor/profiling.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Lightweight logging and per-turn profiling for the advisor runtime.
|
| 2 |
+
|
| 3 |
+
The numbers here are debug/operations signal only — they are written to logs, never to the
|
| 4 |
+
UI. Stage timings are measured by *observing the turn event stream from the main process*, so
|
| 5 |
+
they stay correct even when the model itself runs inside a ZeroGPU fork (where a module-global
|
| 6 |
+
counter would reset on every call).
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
from dataclasses import dataclass, field
|
| 12 |
+
import logging
|
| 13 |
+
import os
|
| 14 |
+
import platform
|
| 15 |
+
import sys
|
| 16 |
+
import threading
|
| 17 |
+
import time
|
| 18 |
+
from typing import Any
|
| 19 |
+
|
| 20 |
+
logger = logging.getLogger("hackathon_advisor")
|
| 21 |
+
|
| 22 |
+
_counter_lock = threading.Lock()
|
| 23 |
+
_messages_processed = 0
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def configure_logging() -> None:
|
| 27 |
+
"""Attach a stream handler once, honoring ADVISOR_LOG_LEVEL (default INFO)."""
|
| 28 |
+
level_name = os.environ.get("ADVISOR_LOG_LEVEL", "INFO").strip().upper()
|
| 29 |
+
logger.setLevel(getattr(logging, level_name, logging.INFO))
|
| 30 |
+
if not logger.handlers:
|
| 31 |
+
handler = logging.StreamHandler()
|
| 32 |
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s"))
|
| 33 |
+
logger.addHandler(handler)
|
| 34 |
+
logger.propagate = False
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def next_message_index() -> int:
|
| 38 |
+
"""Increment and return the lifetime count of processed advisor messages (main process)."""
|
| 39 |
+
global _messages_processed
|
| 40 |
+
with _counter_lock:
|
| 41 |
+
_messages_processed += 1
|
| 42 |
+
return _messages_processed
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def messages_processed() -> int:
|
| 46 |
+
return _messages_processed
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _ms(seconds: float) -> float:
|
| 50 |
+
return round(seconds * 1000.0, 1)
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def resource_snapshot() -> dict[str, Any]:
|
| 54 |
+
"""Best-effort process resource usage via the stdlib plus torch device memory if torch is
|
| 55 |
+
already imported. Returns whatever could be sampled; never raises."""
|
| 56 |
+
snapshot: dict[str, Any] = {}
|
| 57 |
+
try:
|
| 58 |
+
import resource
|
| 59 |
+
|
| 60 |
+
usage = resource.getrusage(resource.RUSAGE_SELF)
|
| 61 |
+
# ru_maxrss is bytes on macOS, kilobytes on Linux.
|
| 62 |
+
divisor = 1024 * 1024 if platform.system() == "Darwin" else 1024
|
| 63 |
+
snapshot["rss_mb"] = round(usage.ru_maxrss / divisor, 1)
|
| 64 |
+
snapshot["cpu_user_s"] = round(usage.ru_utime, 3)
|
| 65 |
+
snapshot["cpu_sys_s"] = round(usage.ru_stime, 3)
|
| 66 |
+
except Exception: # pragma: no cover - platform dependent
|
| 67 |
+
pass
|
| 68 |
+
snapshot.update(_torch_memory_snapshot())
|
| 69 |
+
return snapshot
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _torch_memory_snapshot() -> dict[str, Any]:
|
| 73 |
+
out: dict[str, Any] = {}
|
| 74 |
+
torch = sys.modules.get("torch") # do not import torch just to profile
|
| 75 |
+
if torch is None:
|
| 76 |
+
return out
|
| 77 |
+
try:
|
| 78 |
+
if torch.cuda.is_available():
|
| 79 |
+
out["cuda_alloc_mb"] = round(torch.cuda.memory_allocated() / 1e6, 1)
|
| 80 |
+
out["cuda_peak_mb"] = round(torch.cuda.max_memory_allocated() / 1e6, 1)
|
| 81 |
+
except Exception: # pragma: no cover - device dependent
|
| 82 |
+
pass
|
| 83 |
+
try:
|
| 84 |
+
mps = getattr(torch, "mps", None)
|
| 85 |
+
current = getattr(mps, "current_allocated_memory", None)
|
| 86 |
+
if current is not None:
|
| 87 |
+
out["mps_alloc_mb"] = round(current() / 1e6, 1)
|
| 88 |
+
except Exception: # pragma: no cover - device dependent
|
| 89 |
+
pass
|
| 90 |
+
return out
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
@dataclass
|
| 94 |
+
class TurnProfiler:
|
| 95 |
+
"""Times a single advisor turn by observing its event stream. Drive it by calling
|
| 96 |
+
``observe(event)`` for every emitted event dict, then ``log_summary()`` when the turn
|
| 97 |
+
ends (in a finally block, so partial turns still get logged)."""
|
| 98 |
+
|
| 99 |
+
message_index: int
|
| 100 |
+
compute: str
|
| 101 |
+
backend: str
|
| 102 |
+
device: str = ""
|
| 103 |
+
message_chars: int = 0
|
| 104 |
+
started: float = field(default_factory=time.perf_counter)
|
| 105 |
+
stage_at: dict[str, float] = field(default_factory=dict)
|
| 106 |
+
ended: float | None = None
|
| 107 |
+
tokens: int = 0
|
| 108 |
+
tool_count: int = 0
|
| 109 |
+
fell_back: bool = False
|
| 110 |
+
logged: bool = False
|
| 111 |
+
|
| 112 |
+
def log_start(self) -> None:
|
| 113 |
+
logger.info(
|
| 114 |
+
"turn #%d start | compute=%s backend=%s message_chars=%d",
|
| 115 |
+
self.message_index,
|
| 116 |
+
self.compute,
|
| 117 |
+
self.backend,
|
| 118 |
+
self.message_chars,
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
def observe(self, event: dict[str, Any]) -> None:
|
| 122 |
+
now = time.perf_counter()
|
| 123 |
+
event_type = event.get("type")
|
| 124 |
+
if event_type == "stage":
|
| 125 |
+
self.stage_at.setdefault(str(event.get("stage")), now)
|
| 126 |
+
elif event_type == "model_progress":
|
| 127 |
+
self.tokens = max(self.tokens, int(event.get("tokens") or 0))
|
| 128 |
+
elif event_type == "tool_event":
|
| 129 |
+
self.tool_count += 1
|
| 130 |
+
elif event_type == "fallback":
|
| 131 |
+
self.fell_back = True
|
| 132 |
+
elif event_type == "done":
|
| 133 |
+
self.ended = now
|
| 134 |
+
|
| 135 |
+
def durations(self) -> dict[str, float]:
|
| 136 |
+
end = self.ended if self.ended is not None else time.perf_counter()
|
| 137 |
+
out: dict[str, float] = {"total_ms": _ms(end - self.started)}
|
| 138 |
+
planning = self.stage_at.get("planning")
|
| 139 |
+
running = self.stage_at.get("running_tool")
|
| 140 |
+
writing = self.stage_at.get("writing")
|
| 141 |
+
if planning is not None and running is not None:
|
| 142 |
+
out["decode_ms"] = _ms(running - planning)
|
| 143 |
+
if running is not None and writing is not None:
|
| 144 |
+
out["tools_ms"] = _ms(writing - running)
|
| 145 |
+
if writing is not None:
|
| 146 |
+
out["write_ms"] = _ms(end - writing)
|
| 147 |
+
return out
|
| 148 |
+
|
| 149 |
+
def log_summary(self, error: BaseException | None = None) -> None:
|
| 150 |
+
if self.logged:
|
| 151 |
+
return
|
| 152 |
+
self.logged = True
|
| 153 |
+
durations = self.durations()
|
| 154 |
+
timing = " ".join(f"{key}={value}" for key, value in durations.items())
|
| 155 |
+
resources = " ".join(f"{key}={value}" for key, value in resource_snapshot().items())
|
| 156 |
+
status = "error" if error is not None else "done"
|
| 157 |
+
message = (
|
| 158 |
+
f"turn #{self.message_index} {status} | {timing} | "
|
| 159 |
+
f"tokens={self.tokens} tools={self.tool_count} compute={self.compute} "
|
| 160 |
+
f"device={self.device or '?'} backend={self.backend} fallback={self.fell_back} | {resources}"
|
| 161 |
+
)
|
| 162 |
+
if error is not None:
|
| 163 |
+
logger.warning("%s | exception=%r", message, error)
|
| 164 |
+
else:
|
| 165 |
+
logger.info(message)
|
hackathon_advisor/wood_map.py
CHANGED
|
@@ -11,9 +11,13 @@ from hackathon_advisor.tools import Idea
|
|
| 11 |
def build_wood_map(index: ProjectIndex, idea: Idea, score: ScoreCard) -> dict[str, Any]:
|
| 12 |
echoes = list(score.echoes)
|
| 13 |
background = _background_projects(index, echoes)
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
return {
|
| 18 |
"caption": _caption(score, echoes),
|
| 19 |
"dots": _dedupe_dots(dots),
|
|
@@ -26,8 +30,8 @@ def _background_projects(index: ProjectIndex, echoes: list[SearchHit]) -> list[P
|
|
| 26 |
return projects[:16]
|
| 27 |
|
| 28 |
|
| 29 |
-
def _project_dot(project: Project, kind: str) -> dict[str, Any]:
|
| 30 |
-
x, y = _point(project.id)
|
| 31 |
return {
|
| 32 |
"id": project.id,
|
| 33 |
"kind": kind,
|
|
@@ -39,8 +43,8 @@ def _project_dot(project: Project, kind: str) -> dict[str, Any]:
|
|
| 39 |
}
|
| 40 |
|
| 41 |
|
| 42 |
-
def _echo_dot(hit: SearchHit) -> dict[str, Any]:
|
| 43 |
-
dot = _project_dot(hit.project, "echo")
|
| 44 |
dot["score"] = round(hit.score, 3)
|
| 45 |
dot["matched_terms"] = list(hit.matched_terms)
|
| 46 |
dot["page_number"] = hit.page_number
|
|
@@ -48,13 +52,8 @@ def _echo_dot(hit: SearchHit) -> dict[str, Any]:
|
|
| 48 |
return dot
|
| 49 |
|
| 50 |
|
| 51 |
-
def _idea_dot(idea: Idea, score: ScoreCard,
|
| 52 |
-
|
| 53 |
-
lead_x, lead_y = _point(echoes[0].project.id)
|
| 54 |
-
x = _clamp(lead_x + 7, 8, 92)
|
| 55 |
-
y = _clamp(lead_y - 5, 8, 92)
|
| 56 |
-
else:
|
| 57 |
-
x, y = _point(f"idea:{idea.id}:{idea.title}")
|
| 58 |
return {
|
| 59 |
"id": idea.id,
|
| 60 |
"kind": "idea",
|
|
@@ -67,6 +66,67 @@ def _idea_dot(idea: Idea, score: ScoreCard, echoes: list[SearchHit]) -> dict[str
|
|
| 67 |
}
|
| 68 |
|
| 69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
def _caption(score: ScoreCard, echoes: list[SearchHit]) -> str:
|
| 71 |
if score.verdict.startswith("UNWRITTEN"):
|
| 72 |
return "Your page sits in a pale margin beyond the nearest inked clusters."
|
|
@@ -81,10 +141,6 @@ def _point(key: str) -> tuple[int, int]:
|
|
| 81 |
return x, y
|
| 82 |
|
| 83 |
|
| 84 |
-
def _clamp(value: int, low: int, high: int) -> int:
|
| 85 |
-
return max(low, min(high, value))
|
| 86 |
-
|
| 87 |
-
|
| 88 |
def _dedupe_dots(dots: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
| 89 |
seen: set[tuple[str, str]] = set()
|
| 90 |
deduped: list[dict[str, Any]] = []
|
|
|
|
| 11 |
def build_wood_map(index: ProjectIndex, idea: Idea, score: ScoreCard) -> dict[str, Any]:
|
| 12 |
echoes = list(score.echoes)
|
| 13 |
background = _background_projects(index, echoes)
|
| 14 |
+
echo_projects = [hit.project for hit in echoes[:5]]
|
| 15 |
+
|
| 16 |
+
layout, idea_xy = _layout(index, idea, background + echo_projects)
|
| 17 |
+
|
| 18 |
+
dots = [_project_dot(project, "inked", layout) for project in background]
|
| 19 |
+
dots.extend(_echo_dot(hit, layout) for hit in echoes[:5])
|
| 20 |
+
dots.append(_idea_dot(idea, score, idea_xy))
|
| 21 |
return {
|
| 22 |
"caption": _caption(score, echoes),
|
| 23 |
"dots": _dedupe_dots(dots),
|
|
|
|
| 30 |
return projects[:16]
|
| 31 |
|
| 32 |
|
| 33 |
+
def _project_dot(project: Project, kind: str, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
|
| 34 |
+
x, y = layout.get(project.id) or _point(project.id)
|
| 35 |
return {
|
| 36 |
"id": project.id,
|
| 37 |
"kind": kind,
|
|
|
|
| 43 |
}
|
| 44 |
|
| 45 |
|
| 46 |
+
def _echo_dot(hit: SearchHit, layout: dict[str, tuple[int, int]]) -> dict[str, Any]:
|
| 47 |
+
dot = _project_dot(hit.project, "echo", layout)
|
| 48 |
dot["score"] = round(hit.score, 3)
|
| 49 |
dot["matched_terms"] = list(hit.matched_terms)
|
| 50 |
dot["page_number"] = hit.page_number
|
|
|
|
| 52 |
return dot
|
| 53 |
|
| 54 |
|
| 55 |
+
def _idea_dot(idea: Idea, score: ScoreCard, idea_xy: tuple[int, int]) -> dict[str, Any]:
|
| 56 |
+
x, y = idea_xy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
return {
|
| 58 |
"id": idea.id,
|
| 59 |
"kind": "idea",
|
|
|
|
| 66 |
}
|
| 67 |
|
| 68 |
|
| 69 |
+
def _layout(
|
| 70 |
+
index: ProjectIndex,
|
| 71 |
+
idea: Idea,
|
| 72 |
+
projects: list[Project],
|
| 73 |
+
) -> tuple[dict[str, tuple[int, int]], tuple[int, int]]:
|
| 74 |
+
"""Place every dot by projecting the real embedding vectors into 2D with PCA, so projects
|
| 75 |
+
that are semantically similar land near each other and the idea lands among its closest
|
| 76 |
+
echoes. Falls back to a deterministic hash scatter only when the projection cannot run
|
| 77 |
+
(missing vectors, too few points, or no embedder)."""
|
| 78 |
+
ids = [project.id for project in projects]
|
| 79 |
+
vectors = [index.vector_for(project.id) for project in projects]
|
| 80 |
+
fallback = ({project_id: _point(project_id) for project_id in ids}, _point(f"idea:{idea.id}:{idea.title}"))
|
| 81 |
+
if len(vectors) < 3 or any(vector is None for vector in vectors):
|
| 82 |
+
return fallback
|
| 83 |
+
try:
|
| 84 |
+
idea_vector = index.embed_query(idea.pitch or idea.title)
|
| 85 |
+
coords, idea_xy = _pca_project(vectors, idea_vector)
|
| 86 |
+
except Exception: # noqa: BLE001 - any projection failure degrades to the hash scatter
|
| 87 |
+
return fallback
|
| 88 |
+
return {project_id: coord for project_id, coord in zip(ids, coords)}, idea_xy
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _pca_project(
|
| 92 |
+
vectors: list[tuple[float, ...]],
|
| 93 |
+
idea_vector: tuple[float, ...],
|
| 94 |
+
) -> tuple[list[tuple[int, int]], tuple[int, int]]:
|
| 95 |
+
import numpy as np
|
| 96 |
+
|
| 97 |
+
matrix = np.asarray(vectors, dtype=np.float64)
|
| 98 |
+
query = np.asarray(idea_vector, dtype=np.float64)
|
| 99 |
+
mean = matrix.mean(axis=0)
|
| 100 |
+
centered = matrix - mean
|
| 101 |
+
# Top-2 principal directions of the project cloud define the map; the idea is projected
|
| 102 |
+
# onto that same basis so its position reflects true embedding similarity.
|
| 103 |
+
_, _, components = np.linalg.svd(centered, full_matrices=False)
|
| 104 |
+
basis = components[:2]
|
| 105 |
+
projected = centered @ basis.T
|
| 106 |
+
idea_projected = (query - mean) @ basis.T
|
| 107 |
+
stacked = np.vstack([projected, idea_projected])
|
| 108 |
+
scaled = _scale_to_canvas(stacked)
|
| 109 |
+
coords = [(int(round(x)), int(round(y))) for x, y in scaled[:-1]]
|
| 110 |
+
idea_xy = (int(round(scaled[-1][0])), int(round(scaled[-1][1])))
|
| 111 |
+
return coords, idea_xy
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def _scale_to_canvas(points: Any, low: float = 10.0, high: float = 90.0) -> Any:
|
| 115 |
+
import numpy as np
|
| 116 |
+
|
| 117 |
+
scaled = np.empty_like(points)
|
| 118 |
+
for axis in range(points.shape[1]):
|
| 119 |
+
column = points[:, axis]
|
| 120 |
+
lo = float(column.min())
|
| 121 |
+
hi = float(column.max())
|
| 122 |
+
span = hi - lo
|
| 123 |
+
if span < 1e-9:
|
| 124 |
+
scaled[:, axis] = (low + high) / 2.0
|
| 125 |
+
else:
|
| 126 |
+
scaled[:, axis] = low + (column - lo) / span * (high - low)
|
| 127 |
+
return scaled
|
| 128 |
+
|
| 129 |
+
|
| 130 |
def _caption(score: ScoreCard, echoes: list[SearchHit]) -> str:
|
| 131 |
if score.verdict.startswith("UNWRITTEN"):
|
| 132 |
return "Your page sits in a pale margin beyond the nearest inked clusters."
|
|
|
|
| 141 |
return x, y
|
| 142 |
|
| 143 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
def _dedupe_dots(dots: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
| 145 |
seen: set[tuple[str, str]] = set()
|
| 146 |
deduped: list[dict[str, Any]] = []
|
hackathon_advisor/zerogpu.py
CHANGED
|
@@ -41,3 +41,17 @@ def gpu_task(function: Callable[P, R]) -> Callable[P, R]:
|
|
| 41 |
"Install runtime requirements before enabling ZeroGPU."
|
| 42 |
) from error
|
| 43 |
return spaces.GPU(duration=zero_gpu_duration_seconds())(function)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
"Install runtime requirements before enabling ZeroGPU."
|
| 42 |
) from error
|
| 43 |
return spaces.GPU(duration=zero_gpu_duration_seconds())(function)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
QUOTA_ERROR_HINTS = ("quota", "gpu task aborted", "no gpu", "exceeded", "gpu is not available")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def is_gpu_quota_error(error: BaseException) -> bool:
|
| 50 |
+
"""Heuristically detect a ZeroGPU allocation/quota failure so the caller can fall back to
|
| 51 |
+
a CPU run. ZeroGPU raises before the wrapped function body executes, so this is checked
|
| 52 |
+
against the exception that surfaces from the first pull of the GPU generator."""
|
| 53 |
+
name = type(error).__name__.lower()
|
| 54 |
+
if "quota" in name or "gpu" in name:
|
| 55 |
+
return True
|
| 56 |
+
message = str(error).lower()
|
| 57 |
+
return any(hint in message for hint in QUOTA_ERROR_HINTS)
|
scripts/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Marks the repository's scripts/ as a regular package so it resolves ahead of any top-level
|
| 2 |
+
# "scripts" package that a dependency (e.g. nemo-toolkit) installs into site-packages.
|
static/app.js
CHANGED
|
@@ -29,8 +29,16 @@ const resetButton = document.querySelector("#reset-session");
|
|
| 29 |
const recordVoiceButton = document.querySelector("#record-voice");
|
| 30 |
const uploadVoiceButton = document.querySelector("#upload-voice");
|
| 31 |
const voiceFileInput = document.querySelector("#voice-file");
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
const SESSION_STORAGE_KEY = "hackathon-advisor-session-v2";
|
|
|
|
| 34 |
const FIELD_NOTES_FILENAME = "hackathon-advisor-field-notes.md";
|
| 35 |
const CHAPTER_FILENAME = "hackathon-advisor-chapter.md";
|
| 36 |
const PNG_EXPORT_LABEL = "PNG";
|
|
@@ -51,6 +59,8 @@ let voiceRecorder = null;
|
|
| 51 |
let voiceStream = null;
|
| 52 |
let voiceChunks = [];
|
| 53 |
let voiceRecordingState = "idle";
|
|
|
|
|
|
|
| 54 |
|
| 55 |
setVoiceRecordingState("idle");
|
| 56 |
bootstrap().catch(handleBootstrapError);
|
|
@@ -168,6 +178,7 @@ async function runTurn(message) {
|
|
| 168 |
corrections.textContent = "";
|
| 169 |
planEl.innerHTML = "";
|
| 170 |
delete session.ui_status;
|
|
|
|
| 171 |
startTurnWatchdog();
|
| 172 |
|
| 173 |
let completed = false;
|
|
@@ -194,6 +205,7 @@ async function runTurn(message) {
|
|
| 194 |
ink.classList.add("bleed");
|
| 195 |
} finally {
|
| 196 |
clearTurnWatchdog();
|
|
|
|
| 197 |
submit.disabled = false;
|
| 198 |
setSessionControlsDisabled(false);
|
| 199 |
setCommandDisabled(false);
|
|
@@ -810,6 +822,26 @@ function handleEvent(event) {
|
|
| 810 |
return;
|
| 811 |
}
|
| 812 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
if (event.type === "token") {
|
| 814 |
markFirstTokenSeen();
|
| 815 |
ink.textContent += event.text;
|
|
@@ -817,6 +849,9 @@ function handleEvent(event) {
|
|
| 817 |
}
|
| 818 |
|
| 819 |
if (event.type === "done") {
|
|
|
|
|
|
|
|
|
|
| 820 |
if (!sawTurnToken) {
|
| 821 |
clearTurnWatchdog();
|
| 822 |
ink.textContent = event.response || ink.textContent;
|
|
@@ -1023,8 +1058,9 @@ function renderWoodMap(map) {
|
|
| 1023 |
field.className = "wood";
|
| 1024 |
for (const dot of map.dots) {
|
| 1025 |
const marker = document.createElement(dot.url ? "a" : "span");
|
| 1026 |
-
|
| 1027 |
-
|
|
|
|
| 1028 |
marker.style.left = `${boundedPercent(dot.x)}%`;
|
| 1029 |
marker.style.top = `${boundedPercent(dot.y)}%`;
|
| 1030 |
const radius = Math.max(3, Math.min(10, Number(dot.radius || 4)));
|
|
@@ -1168,6 +1204,99 @@ function clearTurnWatchdog() {
|
|
| 1168 |
}
|
| 1169 |
}
|
| 1170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1171 |
function syncCurrentIdeaGoals() {
|
| 1172 |
const currentId = session.current_idea_id;
|
| 1173 |
if (!currentId || !Array.isArray(session.ideas)) return;
|
|
|
|
| 29 |
const recordVoiceButton = document.querySelector("#record-voice");
|
| 30 |
const uploadVoiceButton = document.querySelector("#upload-voice");
|
| 31 |
const voiceFileInput = document.querySelector("#voice-file");
|
| 32 |
+
const turnProgressEl = document.querySelector("#turn-progress");
|
| 33 |
+
const turnStageIconEl = document.querySelector("#turn-stage-icon");
|
| 34 |
+
const turnStageTextEl = document.querySelector("#turn-stage-text");
|
| 35 |
+
const turnTokensEl = document.querySelector("#turn-tokens");
|
| 36 |
+
const turnEtaEl = document.querySelector("#turn-eta");
|
| 37 |
+
const turnBarFillEl = document.querySelector("#turn-bar-fill");
|
| 38 |
+
const toolChipsEl = document.querySelector("#tool-chips");
|
| 39 |
|
| 40 |
const SESSION_STORAGE_KEY = "hackathon-advisor-session-v2";
|
| 41 |
+
const STAGE_ICONS = { planning: "🪶", running_tool: "🔧", writing: "✍️" };
|
| 42 |
const FIELD_NOTES_FILENAME = "hackathon-advisor-field-notes.md";
|
| 43 |
const CHAPTER_FILENAME = "hackathon-advisor-chapter.md";
|
| 44 |
const PNG_EXPORT_LABEL = "PNG";
|
|
|
|
| 59 |
let voiceStream = null;
|
| 60 |
let voiceChunks = [];
|
| 61 |
let voiceRecordingState = "idle";
|
| 62 |
+
let decodeStartedAt = 0;
|
| 63 |
+
let turnProgressTimer = null;
|
| 64 |
|
| 65 |
setVoiceRecordingState("idle");
|
| 66 |
bootstrap().catch(handleBootstrapError);
|
|
|
|
| 178 |
corrections.textContent = "";
|
| 179 |
planEl.innerHTML = "";
|
| 180 |
delete session.ui_status;
|
| 181 |
+
resetTurnProgress();
|
| 182 |
startTurnWatchdog();
|
| 183 |
|
| 184 |
let completed = false;
|
|
|
|
| 205 |
ink.classList.add("bleed");
|
| 206 |
} finally {
|
| 207 |
clearTurnWatchdog();
|
| 208 |
+
hideTurnProgress();
|
| 209 |
submit.disabled = false;
|
| 210 |
setSessionControlsDisabled(false);
|
| 211 |
setCommandDisabled(false);
|
|
|
|
| 822 |
return;
|
| 823 |
}
|
| 824 |
|
| 825 |
+
if (event.type === "stage") {
|
| 826 |
+
setTurnStage(event.stage, event.label);
|
| 827 |
+
return;
|
| 828 |
+
}
|
| 829 |
+
|
| 830 |
+
if (event.type === "model_progress") {
|
| 831 |
+
renderModelProgress(event.tokens, event.max_tokens);
|
| 832 |
+
return;
|
| 833 |
+
}
|
| 834 |
+
|
| 835 |
+
if (event.type === "tool_event") {
|
| 836 |
+
addToolChip(event);
|
| 837 |
+
return;
|
| 838 |
+
}
|
| 839 |
+
|
| 840 |
+
if (event.type === "fallback") {
|
| 841 |
+
renderComputeFallback(event);
|
| 842 |
+
return;
|
| 843 |
+
}
|
| 844 |
+
|
| 845 |
if (event.type === "token") {
|
| 846 |
markFirstTokenSeen();
|
| 847 |
ink.textContent += event.text;
|
|
|
|
| 849 |
}
|
| 850 |
|
| 851 |
if (event.type === "done") {
|
| 852 |
+
setTurnBar(100);
|
| 853 |
+
if (turnEtaEl) turnEtaEl.textContent = "";
|
| 854 |
+
hideTurnProgress();
|
| 855 |
if (!sawTurnToken) {
|
| 856 |
clearTurnWatchdog();
|
| 857 |
ink.textContent = event.response || ink.textContent;
|
|
|
|
| 1058 |
field.className = "wood";
|
| 1059 |
for (const dot of map.dots) {
|
| 1060 |
const marker = document.createElement(dot.url ? "a" : "span");
|
| 1061 |
+
// Namespace the kind class (wood-idea/wood-echo/wood-inked) so it never collides with the
|
| 1062 |
+
// global .idea/.echo card styles. The "you" dot stays green regardless of verdict.
|
| 1063 |
+
marker.className = `wood-dot wood-${dot.kind || "inked"}`;
|
| 1064 |
marker.style.left = `${boundedPercent(dot.x)}%`;
|
| 1065 |
marker.style.top = `${boundedPercent(dot.y)}%`;
|
| 1066 |
const radius = Math.max(3, Math.min(10, Number(dot.radius || 4)));
|
|
|
|
| 1204 |
}
|
| 1205 |
}
|
| 1206 |
|
| 1207 |
+
// Coarse overall completion per stage, so the bar always advances even when token-level
|
| 1208 |
+
// progress is unknown (e.g. the rules backend, or the fast tool/writing stages).
|
| 1209 |
+
const STAGE_PROGRESS = { planning: 8, running_tool: 85, writing: 95 };
|
| 1210 |
+
|
| 1211 |
+
function resetTurnProgress() {
|
| 1212 |
+
if (!turnProgressEl) return;
|
| 1213 |
+
// Stay hidden on submit. Only reveal once the turn is genuinely executing — either real
|
| 1214 |
+
// token decoding starts, or it has been running long enough to be worth a progress bar.
|
| 1215 |
+
// A fast turn finishes before the timer fires, so the bar never flashes.
|
| 1216 |
+
turnProgressEl.hidden = true;
|
| 1217 |
+
decodeStartedAt = 0;
|
| 1218 |
+
if (toolChipsEl) toolChipsEl.innerHTML = "";
|
| 1219 |
+
if (turnTokensEl) turnTokensEl.textContent = "";
|
| 1220 |
+
if (turnEtaEl) turnEtaEl.textContent = "";
|
| 1221 |
+
setTurnBar(4);
|
| 1222 |
+
setTurnStageContent("planning", "Thinking");
|
| 1223 |
+
clearTurnProgressTimer();
|
| 1224 |
+
turnProgressTimer = window.setTimeout(revealTurnProgress, 450);
|
| 1225 |
+
}
|
| 1226 |
+
|
| 1227 |
+
function revealTurnProgress() {
|
| 1228 |
+
if (turnProgressEl) turnProgressEl.hidden = false;
|
| 1229 |
+
}
|
| 1230 |
+
|
| 1231 |
+
function clearTurnProgressTimer() {
|
| 1232 |
+
if (turnProgressTimer) {
|
| 1233 |
+
window.clearTimeout(turnProgressTimer);
|
| 1234 |
+
turnProgressTimer = null;
|
| 1235 |
+
}
|
| 1236 |
+
}
|
| 1237 |
+
|
| 1238 |
+
function hideTurnProgress() {
|
| 1239 |
+
clearTurnProgressTimer();
|
| 1240 |
+
if (turnProgressEl) turnProgressEl.hidden = true;
|
| 1241 |
+
}
|
| 1242 |
+
|
| 1243 |
+
function setTurnBar(percent) {
|
| 1244 |
+
if (!turnBarFillEl) return;
|
| 1245 |
+
const clamped = Math.max(0, Math.min(100, percent));
|
| 1246 |
+
turnBarFillEl.style.width = `${clamped}%`;
|
| 1247 |
+
}
|
| 1248 |
+
|
| 1249 |
+
function setTurnStageContent(stage, label) {
|
| 1250 |
+
if (turnStageIconEl) turnStageIconEl.textContent = STAGE_ICONS[stage] || "🪶";
|
| 1251 |
+
if (turnStageTextEl) turnStageTextEl.textContent = label || "Thinking";
|
| 1252 |
+
if (stage in STAGE_PROGRESS) setTurnBar(STAGE_PROGRESS[stage]);
|
| 1253 |
+
if (stage && stage !== "planning" && turnEtaEl) turnEtaEl.textContent = "";
|
| 1254 |
+
}
|
| 1255 |
+
|
| 1256 |
+
function setTurnStage(stage, label) {
|
| 1257 |
+
clearTurnWatchdog();
|
| 1258 |
+
setTurnStageContent(stage, label);
|
| 1259 |
+
}
|
| 1260 |
+
|
| 1261 |
+
function renderModelProgress(tokens, maxTokens) {
|
| 1262 |
+
clearTurnWatchdog();
|
| 1263 |
+
revealTurnProgress(); // real token decoding is unambiguous execution — show it now
|
| 1264 |
+
const count = Number(tokens) || 0;
|
| 1265 |
+
if (turnTokensEl) turnTokensEl.textContent = count ? `· decoded ${count} tokens` : "";
|
| 1266 |
+
if (!count) return;
|
| 1267 |
+
if (!decodeStartedAt) decodeStartedAt = performance.now();
|
| 1268 |
+
|
| 1269 |
+
const cap = Number(maxTokens) || 0;
|
| 1270 |
+
// Map token decode into the 8%–80% band of the overall bar.
|
| 1271 |
+
if (cap > 0) setTurnBar(8 + Math.min(1, count / cap) * 72);
|
| 1272 |
+
|
| 1273 |
+
// Estimate remaining time from the live decode rate toward the token cap (an upper bound).
|
| 1274 |
+
const elapsed = (performance.now() - decodeStartedAt) / 1000;
|
| 1275 |
+
if (turnEtaEl && cap > 0 && elapsed > 0.3) {
|
| 1276 |
+
const rate = count / elapsed;
|
| 1277 |
+
const remaining = Math.max(0, cap - count) / Math.max(rate, 0.1);
|
| 1278 |
+
turnEtaEl.textContent = remaining >= 1 ? `~${Math.ceil(remaining)}s left` : "almost done";
|
| 1279 |
+
}
|
| 1280 |
+
}
|
| 1281 |
+
|
| 1282 |
+
function addToolChip(event) {
|
| 1283 |
+
if (!toolChipsEl) return;
|
| 1284 |
+
const name = event.name || event.tool || "tool";
|
| 1285 |
+
const chip = document.createElement("span");
|
| 1286 |
+
chip.className = "tool-chip";
|
| 1287 |
+
if (event.summary) chip.title = event.summary;
|
| 1288 |
+
chip.innerHTML = `<span class="tc-name"></span><span class="tc-check">✓</span>`;
|
| 1289 |
+
chip.querySelector(".tc-name").textContent = name;
|
| 1290 |
+
toolChipsEl.append(chip);
|
| 1291 |
+
}
|
| 1292 |
+
|
| 1293 |
+
function renderComputeFallback(event) {
|
| 1294 |
+
// Acceleration is automatic; a fallback is informational only (no control to flip).
|
| 1295 |
+
const reason = event.reason || "Running on CPU (slower).";
|
| 1296 |
+
if (turnStageTextEl) turnStageTextEl.textContent = reason;
|
| 1297 |
+
if (corrections) corrections.textContent = reason;
|
| 1298 |
+
}
|
| 1299 |
+
|
| 1300 |
function syncCurrentIdeaGoals() {
|
| 1301 |
const currentId = session.current_idea_id;
|
| 1302 |
if (!currentId || !Array.isArray(session.ideas)) return;
|
static/index.html
CHANGED
|
@@ -222,6 +222,19 @@
|
|
| 222 |
|
| 223 |
<div id="corrections" class="marginalia" aria-live="polite"></div>
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
<article class="fate">
|
| 226 |
<span id="verdict-stamp" class="verdict-stamp verdict-ready">
|
| 227 |
<span class="seal-dot"></span>
|
|
|
|
| 222 |
|
| 223 |
<div id="corrections" class="marginalia" aria-live="polite"></div>
|
| 224 |
|
| 225 |
+
<div id="turn-progress" class="turn-progress" hidden aria-live="polite">
|
| 226 |
+
<div class="turn-stage">
|
| 227 |
+
<span id="turn-stage-icon" class="ts-icon">🪶</span>
|
| 228 |
+
<span id="turn-stage-text" class="ts-text">Thinking</span>
|
| 229 |
+
<span id="turn-tokens" class="ts-tokens"></span>
|
| 230 |
+
<span id="turn-eta" class="ts-eta"></span>
|
| 231 |
+
</div>
|
| 232 |
+
<div class="turn-bar" role="progressbar" aria-label="Turn progress">
|
| 233 |
+
<div id="turn-bar-fill" class="turn-bar-fill"></div>
|
| 234 |
+
</div>
|
| 235 |
+
<div id="tool-chips" class="tool-chips"></div>
|
| 236 |
+
</div>
|
| 237 |
+
|
| 238 |
<article class="fate">
|
| 239 |
<span id="verdict-stamp" class="verdict-stamp verdict-ready">
|
| 240 |
<span class="seal-dot"></span>
|
static/styles.css
CHANGED
|
@@ -738,30 +738,23 @@ textarea:disabled {
|
|
| 738 |
transition: opacity 0.5s;
|
| 739 |
}
|
| 740 |
|
| 741 |
-
.wood-dot.inked {
|
| 742 |
background: rgba(73, 49, 22, 0.34);
|
| 743 |
}
|
| 744 |
|
| 745 |
-
.wood-dot.echo {
|
| 746 |
background: var(--oxblood);
|
| 747 |
box-shadow: 0 0 0 2px rgba(255, 240, 181, 0.5);
|
| 748 |
animation: echo-pulse 2.4s ease-in-out infinite;
|
| 749 |
}
|
| 750 |
|
| 751 |
-
.wood-dot.idea {
|
| 752 |
z-index: 2;
|
|
|
|
| 753 |
background: var(--leaf);
|
| 754 |
box-shadow:
|
| 755 |
0 0 0 3px #fff0b5,
|
| 756 |
-
0 0 20px rgba(47, 107, 65, 0.
|
| 757 |
-
}
|
| 758 |
-
|
| 759 |
-
.wood-dot.idea.bleed,
|
| 760 |
-
.wood-dot.idea.echo-idea {
|
| 761 |
-
background: var(--oxblood);
|
| 762 |
-
box-shadow:
|
| 763 |
-
0 0 0 3px #fff0b5,
|
| 764 |
-
0 0 20px rgba(154, 43, 34, 0.5);
|
| 765 |
}
|
| 766 |
|
| 767 |
@keyframes echo-pulse {
|
|
@@ -1298,3 +1291,114 @@ textarea:disabled {
|
|
| 1298 |
transition-duration: 0.001ms !important;
|
| 1299 |
}
|
| 1300 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
transition: opacity 0.5s;
|
| 739 |
}
|
| 740 |
|
| 741 |
+
.wood-dot.wood-inked {
|
| 742 |
background: rgba(73, 49, 22, 0.34);
|
| 743 |
}
|
| 744 |
|
| 745 |
+
.wood-dot.wood-echo {
|
| 746 |
background: var(--oxblood);
|
| 747 |
box-shadow: 0 0 0 2px rgba(255, 240, 181, 0.5);
|
| 748 |
animation: echo-pulse 2.4s ease-in-out infinite;
|
| 749 |
}
|
| 750 |
|
| 751 |
+
.wood-dot.wood-idea {
|
| 752 |
z-index: 2;
|
| 753 |
+
border-radius: 50%;
|
| 754 |
background: var(--leaf);
|
| 755 |
box-shadow:
|
| 756 |
0 0 0 3px #fff0b5,
|
| 757 |
+
0 0 20px rgba(47, 107, 65, 0.6);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
}
|
| 759 |
|
| 760 |
@keyframes echo-pulse {
|
|
|
|
| 1291 |
transition-duration: 0.001ms !important;
|
| 1292 |
}
|
| 1293 |
}
|
| 1294 |
+
|
| 1295 |
+
/* Live turn progress (stage + token count + tool chips) */
|
| 1296 |
+
.turn-progress {
|
| 1297 |
+
display: flex;
|
| 1298 |
+
flex-direction: column;
|
| 1299 |
+
gap: 0.4rem;
|
| 1300 |
+
margin: 0.2rem 0 0.4rem;
|
| 1301 |
+
padding: 0.5rem 0.7rem;
|
| 1302 |
+
border: 1px solid var(--rule-soft);
|
| 1303 |
+
border-left: 3px solid var(--gold);
|
| 1304 |
+
border-radius: 8px;
|
| 1305 |
+
background: rgba(216, 162, 38, 0.06);
|
| 1306 |
+
}
|
| 1307 |
+
|
| 1308 |
+
.turn-stage {
|
| 1309 |
+
display: flex;
|
| 1310 |
+
align-items: center;
|
| 1311 |
+
gap: 0.45rem;
|
| 1312 |
+
font-family: var(--label);
|
| 1313 |
+
font-size: 0.78rem;
|
| 1314 |
+
color: var(--ink-soft);
|
| 1315 |
+
}
|
| 1316 |
+
|
| 1317 |
+
.turn-stage .ts-icon {
|
| 1318 |
+
font-size: 0.95rem;
|
| 1319 |
+
line-height: 1;
|
| 1320 |
+
animation: ts-pulse 1.6s ease-in-out infinite;
|
| 1321 |
+
}
|
| 1322 |
+
|
| 1323 |
+
.turn-stage .ts-text {
|
| 1324 |
+
font-weight: 600;
|
| 1325 |
+
color: var(--ink);
|
| 1326 |
+
}
|
| 1327 |
+
|
| 1328 |
+
.turn-stage .ts-tokens {
|
| 1329 |
+
color: var(--ink-faint);
|
| 1330 |
+
font-variant-numeric: tabular-nums;
|
| 1331 |
+
}
|
| 1332 |
+
|
| 1333 |
+
.turn-stage .ts-eta {
|
| 1334 |
+
margin-left: auto;
|
| 1335 |
+
color: var(--ink-faint);
|
| 1336 |
+
font-variant-numeric: tabular-nums;
|
| 1337 |
+
}
|
| 1338 |
+
|
| 1339 |
+
.turn-bar {
|
| 1340 |
+
height: 5px;
|
| 1341 |
+
border-radius: 999px;
|
| 1342 |
+
background: var(--rule-soft);
|
| 1343 |
+
overflow: hidden;
|
| 1344 |
+
}
|
| 1345 |
+
|
| 1346 |
+
.turn-bar-fill {
|
| 1347 |
+
height: 100%;
|
| 1348 |
+
width: 0%;
|
| 1349 |
+
border-radius: 999px;
|
| 1350 |
+
background: linear-gradient(90deg, var(--gold), var(--gold-2));
|
| 1351 |
+
transition: width 0.3s ease;
|
| 1352 |
+
}
|
| 1353 |
+
|
| 1354 |
+
@keyframes ts-pulse {
|
| 1355 |
+
0%,
|
| 1356 |
+
100% {
|
| 1357 |
+
opacity: 0.55;
|
| 1358 |
+
transform: translateY(0);
|
| 1359 |
+
}
|
| 1360 |
+
50% {
|
| 1361 |
+
opacity: 1;
|
| 1362 |
+
transform: translateY(-1px);
|
| 1363 |
+
}
|
| 1364 |
+
}
|
| 1365 |
+
|
| 1366 |
+
.tool-chips {
|
| 1367 |
+
display: flex;
|
| 1368 |
+
flex-wrap: wrap;
|
| 1369 |
+
gap: 0.35rem;
|
| 1370 |
+
}
|
| 1371 |
+
|
| 1372 |
+
.tool-chips:empty {
|
| 1373 |
+
display: none;
|
| 1374 |
+
}
|
| 1375 |
+
|
| 1376 |
+
.tool-chip {
|
| 1377 |
+
display: inline-flex;
|
| 1378 |
+
align-items: center;
|
| 1379 |
+
gap: 0.3rem;
|
| 1380 |
+
font-family: var(--label);
|
| 1381 |
+
font-size: 0.68rem;
|
| 1382 |
+
font-weight: 600;
|
| 1383 |
+
color: var(--leaf);
|
| 1384 |
+
background: rgba(47, 107, 65, 0.1);
|
| 1385 |
+
border: 1px solid rgba(47, 107, 65, 0.28);
|
| 1386 |
+
border-radius: 999px;
|
| 1387 |
+
padding: 0.12rem 0.55rem;
|
| 1388 |
+
animation: chip-in 0.22s ease;
|
| 1389 |
+
}
|
| 1390 |
+
|
| 1391 |
+
.tool-chip .tc-check {
|
| 1392 |
+
font-size: 0.66rem;
|
| 1393 |
+
}
|
| 1394 |
+
|
| 1395 |
+
@keyframes chip-in {
|
| 1396 |
+
from {
|
| 1397 |
+
opacity: 0;
|
| 1398 |
+
transform: translateY(2px);
|
| 1399 |
+
}
|
| 1400 |
+
to {
|
| 1401 |
+
opacity: 1;
|
| 1402 |
+
transform: translateY(0);
|
| 1403 |
+
}
|
| 1404 |
+
}
|
tests/test_agent.py
CHANGED
|
@@ -291,3 +291,49 @@ def test_planner_score_idea_scores_current_idea() -> None:
|
|
| 291 |
|
| 292 |
assert scored.score is not None
|
| 293 |
assert scored.artifact["title"] == first.artifact["title"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
assert scored.score is not None
|
| 293 |
assert scored.artifact["title"] == first.artifact["title"]
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def test_turn_stream_emits_ordered_progress_events() -> None:
|
| 297 |
+
index = load_test_index()
|
| 298 |
+
engine = AdvisorEngine(index)
|
| 299 |
+
|
| 300 |
+
events = list(engine.turn_stream("A local-first archive cartographer for family photos", {}))
|
| 301 |
+
types = [event["type"] for event in events]
|
| 302 |
+
|
| 303 |
+
assert types[0] == "start"
|
| 304 |
+
assert types[-1] == "done"
|
| 305 |
+
assert "token" in types
|
| 306 |
+
# the planning stage is announced before any tool runs, and tools stream as they execute
|
| 307 |
+
assert types.index("stage") < types.index("tool_event")
|
| 308 |
+
tool_events = [event for event in events if event["type"] == "tool_event"]
|
| 309 |
+
assert [event["name"] for event in tool_events] == ["save_idea", "search_projects", "score_idea"]
|
| 310 |
+
assert events[-1]["state"]["ideas"]
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def test_turn_stream_done_matches_blocking_turn() -> None:
|
| 314 |
+
# idea ids are randomly generated, so compare the deterministic surface of the turn.
|
| 315 |
+
index = load_test_index()
|
| 316 |
+
streamed = list(AdvisorEngine(index).turn_stream("write bolder and find whitespace", {}))
|
| 317 |
+
done = next(event for event in streamed if event["type"] == "done")
|
| 318 |
+
blocking = AdvisorEngine(index).turn("write bolder and find whitespace", {})
|
| 319 |
+
|
| 320 |
+
assert done["response"] == blocking.response
|
| 321 |
+
assert done["score"] == (blocking.score.to_dict() if blocking.score else None)
|
| 322 |
+
assert done["plan"] == blocking.plan
|
| 323 |
+
assert [item["label"] for item in done["whitespace"]] == [
|
| 324 |
+
item.label for item in blocking.whitespace
|
| 325 |
+
]
|
| 326 |
+
assert [idea["title"] for idea in done["state"]["ideas"]] == [
|
| 327 |
+
idea["title"] for idea in blocking.state["ideas"]
|
| 328 |
+
]
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def test_turn_accepts_injected_resolution() -> None:
|
| 332 |
+
index = load_test_index()
|
| 333 |
+
engine = AdvisorEngine(index, planner=StaticPlanner(ToolCall("score_idea", {})))
|
| 334 |
+
injected = ToolResolution(status="valid", call=ToolCall("list_projects", {"sort": "likes"}), errors=())
|
| 335 |
+
|
| 336 |
+
result = engine.turn("score it", {}, resolution=injected)
|
| 337 |
+
|
| 338 |
+
# the injected list_projects call wins over the planner's score_idea call
|
| 339 |
+
assert result.state["last_tool_resolution"]["call"]["name"] == "list_projects"
|
tests/test_app.py
CHANGED
|
@@ -109,6 +109,38 @@ def test_agent_turn_stream_endpoint_exports_ndjson_events() -> None:
|
|
| 109 |
assert lines[-1]["state"]["ideas"]
|
| 110 |
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
def test_transcribe_audio_endpoint_saves_audio(monkeypatch) -> None:
|
| 113 |
captured = {}
|
| 114 |
|
|
|
|
| 109 |
assert lines[-1]["state"]["ideas"]
|
| 110 |
|
| 111 |
|
| 112 |
+
def test_agent_turn_stream_streams_stage_and_tool_events() -> None:
|
| 113 |
+
response = agent_turn_stream(
|
| 114 |
+
{
|
| 115 |
+
"message": "A local-first archive cartographer for family photos",
|
| 116 |
+
"session_json": "{}",
|
| 117 |
+
}
|
| 118 |
+
)
|
| 119 |
+
payload = asyncio.run(_read_streaming_response(response))
|
| 120 |
+
lines = [json.loads(line) for line in payload.splitlines()]
|
| 121 |
+
types = [line["type"] for line in lines]
|
| 122 |
+
|
| 123 |
+
assert "stage" in types
|
| 124 |
+
assert any(line["type"] == "tool_event" and line.get("name") for line in lines)
|
| 125 |
+
assert types.index("stage") < types.index("token")
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def test_agent_turn_stream_runs_on_cpu_compute() -> None:
|
| 129 |
+
response = agent_turn_stream(
|
| 130 |
+
{
|
| 131 |
+
"message": "A local-first archive cartographer for family photos",
|
| 132 |
+
"session_json": "{}",
|
| 133 |
+
"compute": "cpu",
|
| 134 |
+
}
|
| 135 |
+
)
|
| 136 |
+
payload = asyncio.run(_read_streaming_response(response))
|
| 137 |
+
lines = [json.loads(line) for line in payload.splitlines()]
|
| 138 |
+
|
| 139 |
+
assert lines[0]["type"] == "start"
|
| 140 |
+
assert lines[-1]["type"] == "done"
|
| 141 |
+
assert lines[-1]["state"]["ideas"]
|
| 142 |
+
|
| 143 |
+
|
| 144 |
def test_transcribe_audio_endpoint_saves_audio(monkeypatch) -> None:
|
| 145 |
captured = {}
|
| 146 |
|
tests/test_model_runtime.py
CHANGED
|
@@ -8,13 +8,26 @@ from hackathon_advisor.model_runtime import (
|
|
| 8 |
render_context,
|
| 9 |
runtime_status,
|
| 10 |
system_prompt,
|
|
|
|
| 11 |
_disable_sampling_generation_defaults,
|
| 12 |
_normalize_xml_tool_output,
|
|
|
|
| 13 |
_strip_unused_generation_inputs,
|
| 14 |
)
|
| 15 |
from hackathon_advisor.zerogpu import gpu_task, zero_gpu_duration_seconds, zero_gpu_enabled
|
| 16 |
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
def test_rule_planner_emits_valid_search_call() -> None:
|
| 19 |
planner = RuleBasedPlanner()
|
| 20 |
|
|
@@ -81,6 +94,18 @@ def test_rule_planner_keeps_project_words_inside_ideas() -> None:
|
|
| 81 |
assert resolution.call.name == "save_idea"
|
| 82 |
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
def test_rule_planner_splits_explicit_idea_pitch() -> None:
|
| 85 |
planner = RuleBasedPlanner()
|
| 86 |
|
|
@@ -215,3 +240,39 @@ def test_model_xml_fragment_is_normalized() -> None:
|
|
| 215 |
output = 'name="save_idea">{"title":"A","pitch":"B"}'
|
| 216 |
|
| 217 |
assert _normalize_xml_tool_output(output) == '<function name="save_idea">{"title":"A","pitch":"B"}</function>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
render_context,
|
| 9 |
runtime_status,
|
| 10 |
system_prompt,
|
| 11 |
+
_best_local_device,
|
| 12 |
_disable_sampling_generation_defaults,
|
| 13 |
_normalize_xml_tool_output,
|
| 14 |
+
_resolve_torch_device,
|
| 15 |
_strip_unused_generation_inputs,
|
| 16 |
)
|
| 17 |
from hackathon_advisor.zerogpu import gpu_task, zero_gpu_duration_seconds, zero_gpu_enabled
|
| 18 |
|
| 19 |
|
| 20 |
+
class FakeBackends:
|
| 21 |
+
def __init__(self, mps: bool) -> None:
|
| 22 |
+
self.mps = type("MPS", (), {"is_available": staticmethod(lambda: mps)})()
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class FakeTorch:
|
| 26 |
+
def __init__(self, cuda: bool = False, mps: bool = False) -> None:
|
| 27 |
+
self.cuda = type("CUDA", (), {"is_available": staticmethod(lambda: cuda)})()
|
| 28 |
+
self.backends = FakeBackends(mps)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
def test_rule_planner_emits_valid_search_call() -> None:
|
| 32 |
planner = RuleBasedPlanner()
|
| 33 |
|
|
|
|
| 94 |
assert resolution.call.name == "save_idea"
|
| 95 |
|
| 96 |
|
| 97 |
+
def test_rule_planner_does_not_match_commands_inside_idea_words() -> None:
|
| 98 |
+
planner = RuleBasedPlanner()
|
| 99 |
+
|
| 100 |
+
resolution = planner.plan(
|
| 101 |
+
"A neighborhood seed swap archive that reminds gardeners when to plant shared seeds",
|
| 102 |
+
{},
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
assert resolution.status == "valid"
|
| 106 |
+
assert resolution.call.name == "save_idea"
|
| 107 |
+
|
| 108 |
+
|
| 109 |
def test_rule_planner_splits_explicit_idea_pitch() -> None:
|
| 110 |
planner = RuleBasedPlanner()
|
| 111 |
|
|
|
|
| 240 |
output = 'name="save_idea">{"title":"A","pitch":"B"}'
|
| 241 |
|
| 242 |
assert _normalize_xml_tool_output(output) == '<function name="save_idea">{"title":"A","pitch":"B"}</function>'
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
def test_resolve_device_keeps_auto_and_explicit_cpu() -> None:
|
| 246 |
+
assert _resolve_torch_device("auto", FakeTorch()) == "auto"
|
| 247 |
+
assert _resolve_torch_device("cpu", FakeTorch(cuda=True, mps=True)) == "cpu"
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def test_resolve_device_prefers_cuda_then_mps_then_cpu(monkeypatch) -> None:
|
| 251 |
+
monkeypatch.delenv("ADVISOR_ZERO_GPU", raising=False)
|
| 252 |
+
|
| 253 |
+
assert _best_local_device(FakeTorch(cuda=True, mps=True)) == "cuda"
|
| 254 |
+
assert _best_local_device(FakeTorch(cuda=False, mps=True)) == "mps"
|
| 255 |
+
assert _best_local_device(FakeTorch(cuda=False, mps=False)) == "cpu"
|
| 256 |
+
# "local" resolves through the same ladder
|
| 257 |
+
assert _resolve_torch_device("local", FakeTorch(cuda=False, mps=True)) == "mps"
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def test_resolve_device_unavailable_request_degrades_gracefully(monkeypatch) -> None:
|
| 261 |
+
monkeypatch.delenv("ADVISOR_ZERO_GPU", raising=False)
|
| 262 |
+
|
| 263 |
+
# asking for cuda on an MPS-only box lands on mps, not a crash
|
| 264 |
+
assert _resolve_torch_device("cuda", FakeTorch(cuda=False, mps=True)) == "mps"
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def test_resolve_device_skips_cuda_under_zero_gpu(monkeypatch) -> None:
|
| 268 |
+
# In a ZeroGPU main process there is no local CUDA, and probing it is avoided.
|
| 269 |
+
monkeypatch.setenv("ADVISOR_ZERO_GPU", "1")
|
| 270 |
+
|
| 271 |
+
assert _best_local_device(FakeTorch(cuda=True, mps=False)) == "cpu"
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def test_runtime_status_reports_configured_device() -> None:
|
| 275 |
+
planner = MiniCPMTransformersPlanner("openbmb/MiniCPM5-1B", device="local")
|
| 276 |
+
|
| 277 |
+
assert runtime_status(planner).to_dict()["device"] == "local"
|
| 278 |
+
assert runtime_status(RuleBasedPlanner()).to_dict()["device"] == ""
|
tests/test_profiling.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
|
| 3 |
+
from hackathon_advisor.profiling import (
|
| 4 |
+
TurnProfiler,
|
| 5 |
+
configure_logging,
|
| 6 |
+
messages_processed,
|
| 7 |
+
next_message_index,
|
| 8 |
+
resource_snapshot,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _turn_events() -> list[dict]:
|
| 13 |
+
return [
|
| 14 |
+
{"type": "start"},
|
| 15 |
+
{"type": "stage", "stage": "planning"},
|
| 16 |
+
{"type": "model_progress", "tokens": 5, "max_tokens": 180},
|
| 17 |
+
{"type": "model_progress", "tokens": 12, "max_tokens": 180},
|
| 18 |
+
{"type": "stage", "stage": "running_tool"},
|
| 19 |
+
{"type": "tool_event", "name": "save_idea"},
|
| 20 |
+
{"type": "tool_event", "name": "score_idea"},
|
| 21 |
+
{"type": "stage", "stage": "writing"},
|
| 22 |
+
{"type": "token", "text": "hello "},
|
| 23 |
+
{"type": "done"},
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_profiler_observes_tokens_tools_and_stage_durations() -> None:
|
| 28 |
+
profiler = TurnProfiler(message_index=1, compute="cpu", backend="minicpm-transformers")
|
| 29 |
+
for event in _turn_events():
|
| 30 |
+
profiler.observe(event)
|
| 31 |
+
|
| 32 |
+
durations = profiler.durations()
|
| 33 |
+
|
| 34 |
+
assert profiler.tokens == 12
|
| 35 |
+
assert profiler.tool_count == 2
|
| 36 |
+
assert profiler.fell_back is False
|
| 37 |
+
assert set(durations) >= {"total_ms", "decode_ms", "tools_ms", "write_ms"}
|
| 38 |
+
assert all(value >= 0 for value in durations.values())
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def test_profiler_logs_start_and_summary() -> None:
|
| 42 |
+
configure_logging() # the advisor logger does not propagate, so capture it directly
|
| 43 |
+
logger = logging.getLogger("hackathon_advisor")
|
| 44 |
+
messages: list[str] = []
|
| 45 |
+
handler = logging.Handler()
|
| 46 |
+
handler.emit = lambda record: messages.append(record.getMessage())
|
| 47 |
+
logger.addHandler(handler)
|
| 48 |
+
try:
|
| 49 |
+
profiler = TurnProfiler(message_index=7, compute="gpu", backend="rules", message_chars=42)
|
| 50 |
+
profiler.log_start()
|
| 51 |
+
for event in _turn_events():
|
| 52 |
+
profiler.observe(event)
|
| 53 |
+
profiler.log_summary()
|
| 54 |
+
profiler.log_summary() # idempotent: a second call must not log again
|
| 55 |
+
finally:
|
| 56 |
+
logger.removeHandler(handler)
|
| 57 |
+
|
| 58 |
+
summaries = [message for message in messages if "turn #7" in message]
|
| 59 |
+
assert any("start" in message for message in summaries)
|
| 60 |
+
assert sum("done" in message for message in summaries) == 1 # log_summary is idempotent
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def test_profiler_marks_fallback() -> None:
|
| 64 |
+
profiler = TurnProfiler(message_index=2, compute="gpu", backend="minicpm-transformers")
|
| 65 |
+
profiler.observe({"type": "fallback", "to": "cpu"})
|
| 66 |
+
|
| 67 |
+
assert profiler.fell_back is True
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def test_resource_snapshot_is_best_effort_dict() -> None:
|
| 71 |
+
snapshot = resource_snapshot()
|
| 72 |
+
|
| 73 |
+
assert isinstance(snapshot, dict)
|
| 74 |
+
# rss is available on the platforms we run on; never raises regardless.
|
| 75 |
+
assert "rss_mb" in snapshot
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def test_message_counter_increments() -> None:
|
| 79 |
+
start = messages_processed()
|
| 80 |
+
first = next_message_index()
|
| 81 |
+
second = next_message_index()
|
| 82 |
+
|
| 83 |
+
assert second == first + 1
|
| 84 |
+
assert messages_processed() >= start + 2
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|