Spaces:
Running
Running
| """Puck daemon — gradio.Server doubles as the HF Space backend and the local daemon. | |
| Dumb pipes by design: this process receives wire events, validates them against | |
| the shared schema (/schema/event.schema.json), queues them, and serves the built | |
| frontend. All policy — scoring, interruption taste, personality — lives in the | |
| TS engine running in the browser/webview. Keep it that way, or the eventual | |
| Tauri/Rust swap stops being ~150 lines. | |
| """ | |
| import asyncio | |
| import base64 | |
| import json | |
| import os | |
| import re | |
| import shutil | |
| import subprocess | |
| import sys | |
| import threading | |
| import time | |
| import uuid | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| from fastapi import Request, Response | |
| from fastapi.responses import FileResponse, JSONResponse | |
| from gradio import Server | |
| from jsonschema import Draft202012Validator | |
| def _load_env_local() -> None: | |
| """The project keeps URLs/secrets in .env.local (not .env). Load it before | |
| importing brain, which reads PUCK_* env at import time. setdefault so a real | |
| exported env var always wins over the file.""" | |
| env = Path(__file__).resolve().parent.parent / ".env.local" | |
| if not env.exists(): | |
| return | |
| for line in env.read_text().splitlines(): | |
| line = line.strip() | |
| if line and not line.startswith("#") and "=" in line: | |
| k, _, v = line.partition("=") | |
| os.environ.setdefault(k.strip(), v.strip()) | |
| _load_env_local() | |
| import brain # noqa: E402 — must follow the env load so it sees the configured URLs | |
| import ocr # noqa: E402 — Vision OCR: terminal-CLI id + topical grounding (region-local) | |
| import recognizer # noqa: E402 — visual fingerprint matcher (ONNX CLIP), for site labels | |
| ROOT = Path(__file__).resolve().parent.parent | |
| SCHEMA = json.loads((ROOT / "schema" / "event.schema.json").read_text()) | |
| VALIDATOR = Draft202012Validator(SCHEMA) | |
| FRONTEND_DIST = ROOT / "frontend" / "dist" | |
| TRACES_PATH = ROOT / "server" / "data" / "traces.jsonl" | |
| SFT_PATH = ROOT / "molt" / "data" / "sft.jsonl" | |
| # curated rated traces that must accumulate before a molt (LoRA retrain from base) | |
| # is eligible; the molt itself is eval-gated in molt/, not triggered here. | |
| MOLT_THRESHOLD = 200 | |
| app = Server() | |
| # in-memory event queue: watchers POST in, the frontend drains. Survives nothing — | |
| # real persistence arrives with the memory store, not here. | |
| _events: list[dict] = [] | |
| _lock = threading.Lock() | |
| async def post_event(request: Request) -> JSONResponse: | |
| try: | |
| body = await request.json() | |
| except json.JSONDecodeError: | |
| return JSONResponse({"error": "body must be JSON"}, status_code=400) | |
| errors = [f"{e.json_path}: {e.message}" for e in VALIDATOR.iter_errors(body)] | |
| if errors: | |
| # loud contract failure — a watcher drifted from event.schema.json | |
| return JSONResponse({"error": "schema validation failed", "details": errors}, status_code=422) | |
| body.setdefault("id", f"evt_{uuid.uuid4().hex[:12]}") | |
| body.setdefault("ts", datetime.now(timezone.utc).isoformat()) | |
| with _lock: | |
| _events.append(body) | |
| # backstop against an unattended queue growing forever — keep the newest 200 | |
| _events[:] = _events[-200:] | |
| return JSONResponse({"ok": True, "id": body["id"]}, status_code=202) | |
| def pending_events() -> JSONResponse: | |
| """Drain the queue. The frontend polls this; each event is delivered once.""" | |
| with _lock: | |
| drained, _events[:] = _events[:], [] | |
| return JSONResponse({"events": drained}) | |
| async def post_traces(request: Request) -> JSONResponse: | |
| """Night Bloom ships the day's decision traces here — the molt's training data. | |
| Append-only JSONL; the dataset builder reads it later. Dumb storage, no policy.""" | |
| try: | |
| body = await request.json() | |
| except json.JSONDecodeError: | |
| return JSONResponse({"error": "body must be JSON"}, status_code=400) | |
| records = body.get("records") | |
| if not isinstance(records, list) or not all(isinstance(r, dict) for r in records): | |
| return JSONResponse({"error": "expected {records: object[]}"}, status_code=400) | |
| TRACES_PATH.parent.mkdir(parents=True, exist_ok=True) | |
| with _lock, TRACES_PATH.open("a") as f: | |
| for r in records: | |
| f.write(json.dumps(r, separators=(",", ":")) + "\n") | |
| return JSONResponse({"ok": True, "appended": len(records)}, status_code=202) | |
| async def brain_see(request: Request) -> JSONResponse: | |
| """Puck's eyes: a screenshot (data URL) → wire events for what he notices. | |
| Perceived events run the SAME schema validation + queue as hook events, so | |
| the engine scores vision and hooks identically — vision is just another source.""" | |
| body = await request.json() | |
| image = body.get("image") | |
| if not isinstance(image, str) or not image.startswith("data:image"): | |
| return JSONResponse({"error": "expected {image: data-url, fairy_state?}"}, status_code=400) | |
| try: | |
| observed = await asyncio.to_thread(brain.see, image, body.get("fairy_state", {})) | |
| except brain.BrainError as e: | |
| return _brain_503(e) | |
| queued = [] | |
| with _lock: | |
| for ev in observed: | |
| ev.setdefault("id", f"evt_{uuid.uuid4().hex[:12]}") | |
| ev["ts"] = datetime.now(timezone.utc).isoformat() | |
| ev.setdefault("payload", {"via": "vision"}) # tag perceived events (external sources pre-tag their own) | |
| if not list(VALIDATOR.iter_errors(ev)): # off-contract perceptions are dropped, not queued | |
| _events.append(ev) | |
| queued.append(ev) | |
| _events[:] = _events[-200:] | |
| return JSONResponse({"observed": len(observed), "queued": len(queued), "events": queued}) | |
| PEEKS_DIR = ROOT / "server" / "data" / "peeks" | |
| def _log_peek( | |
| image_data_url: str, quip: str, fairy_state: dict, context: str = "", emotion: str = "" | |
| ) -> None: | |
| """Save each (screenshot, context, quip, emotion) for tuning — what Puck saw + the | |
| app context vs. what he said and felt. Gitignored (data/). Best-effort.""" | |
| try: | |
| PEEKS_DIR.mkdir(parents=True, exist_ok=True) | |
| header, _, b64 = image_data_url.partition(",") | |
| ext = "jpg" if "jpeg" in header else "png" | |
| name = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S-%f") + "." + ext | |
| (PEEKS_DIR / name).write_bytes(base64.b64decode(b64)) | |
| with (PEEKS_DIR / "log.jsonl").open("a") as f: | |
| rec = {"ts": name.rsplit(".", 1)[0], "image": name, "context": context, | |
| "quip": quip, "emotion": emotion, "fairy": fairy_state} | |
| f.write(json.dumps(rec, separators=(",", ":")) + "\n") | |
| except Exception as e: # noqa: BLE001 — logging must never break a peek | |
| print(f"peek-log failed: {e}") | |
| def _recent_peeks(n: int) -> list[dict]: | |
| """Puck's memory = his recent observations (the tail of the peek log).""" | |
| try: | |
| lines = (PEEKS_DIR / "log.jsonl").read_text().splitlines() | |
| except FileNotFoundError: | |
| return [] | |
| out = [] | |
| for ln in lines[-n:]: | |
| try: | |
| out.append(json.loads(ln)) | |
| except json.JSONDecodeError: | |
| pass | |
| return out | |
| def memories(limit: int = 30) -> dict: | |
| """Recent things Puck has noticed — for the companion's memory tab.""" | |
| return {"memories": list(reversed(_recent_peeks(limit)))} # newest first | |
| RECOGNIZE_REFS = ROOT / "recognize" / "refs" | |
| def peek_image(name: str) -> Response: | |
| """Serve a saved peek screenshot (for the Memory tab's labeling view).""" | |
| if "/" in name or ".." in name: | |
| return JSONResponse({"error": "bad name"}, status_code=400) | |
| target = (PEEKS_DIR / name).resolve() | |
| if PEEKS_DIR.resolve() in target.parents and target.is_file(): | |
| return FileResponse(target) | |
| return JSONResponse({"error": "not found"}, status_code=404) | |
| def recognize_labels() -> dict: | |
| """Existing reference labels — so the labeling UI can suggest reuse.""" | |
| if not RECOGNIZE_REFS.exists(): | |
| return {"labels": []} | |
| return {"labels": sorted(p.name for p in RECOGNIZE_REFS.iterdir() if p.is_dir())} | |
| async def recognize_label(request: Request) -> JSONResponse: | |
| """User taught Puck what a peek is → enroll that screenshot as a fingerprint | |
| reference under recognize/refs/<label>/ (the Touch-ID-style 'it asked, I told it').""" | |
| body = await request.json() | |
| image = body.get("image") | |
| label = (body.get("label") or "").strip().lower().replace(" ", "-") | |
| if not isinstance(image, str) or "/" in image or not re.fullmatch(r"[a-z0-9._-]+", label or ""): | |
| return JSONResponse({"error": "expected {image, label}"}, status_code=400) | |
| src = (PEEKS_DIR / image).resolve() | |
| if PEEKS_DIR.resolve() not in src.parents or not src.is_file(): | |
| return JSONResponse({"error": "no such peek"}, status_code=404) | |
| dst = RECOGNIZE_REFS / label | |
| dst.mkdir(parents=True, exist_ok=True) | |
| shutil.copy2(src, dst / image) | |
| # re-embed the library so the new fingerprint is live (background — don't block) | |
| threading.Thread(target=recognizer.build_index, daemon=True).start() | |
| return JSONResponse({"ok": True, "label": label, "count": len(list(dst.glob("*")))}) | |
| async def recognize_build() -> dict: | |
| """(Re)embed the whole fingerprint library. Downloads the model on first run.""" | |
| return await asyncio.to_thread(recognizer.build_index) | |
| async def brain_bloom(request: Request) -> JSONResponse: | |
| """Night Bloom: distill the day's peek observations into durable garden memories.""" | |
| body = await request.json() | |
| quips = [p["quip"] for p in _recent_peeks(40) if p.get("quip")] | |
| try: | |
| mems = await asyncio.to_thread(brain.bloom, quips, body.get("fairy_state", {})) | |
| except brain.BrainError as e: | |
| return _brain_503(e) | |
| return JSONResponse({"memories": mems}) | |
| _BROWSERS = {"Safari", "Google Chrome", "Arc", "Brave Browser", "Microsoft Edge"} | |
| # where the visual recognizer earns its keep — app name alone can't tell the CLI tools apart | |
| _TERMINAL_APPS = ("terminal", "iterm", "ghostty", "warp", "kitty", "alacritty", "wezterm", "tabby") | |
| def _osa(script: str) -> str: | |
| try: | |
| r = subprocess.run(["osascript", "-e", script], capture_output=True, text=True, timeout=2) | |
| return r.stdout.strip() | |
| except Exception: # noqa: BLE001 — context is best-effort | |
| return "" | |
| def _active_context() -> str: | |
| """Frontmost app + window title (+ browser URL) as TEXT grounding for a peek, | |
| so the VLM can name the actual app/site (claude vs codex, a github page) rather | |
| than guess from pixels. macOS only; needs Accessibility/Automation permission | |
| (prompts once). Best-effort — empty string if unavailable.""" | |
| if sys.platform != "darwin": | |
| return "" | |
| app = _osa('tell application "System Events" to name of first process whose frontmost is true') | |
| if not app: | |
| return "" | |
| bits = [f"app={app}"] | |
| title = _osa(f'tell application "System Events" to tell process "{app}" to name of front window') | |
| if title: | |
| bits.append(f"window={title}") | |
| if app in _BROWSERS: | |
| tab = "current tab" if app == "Safari" else "active tab" | |
| url = _osa(f'tell application "{app}" to get URL of {tab} of front window') | |
| if url: | |
| bits.append(f"url={url}") | |
| return " · ".join(bits) | |
| async def brain_peek(request: Request) -> JSONResponse: | |
| """The companion loop: Puck peeks at a small screen patch → ONE in-character | |
| line (the VLM sees and voices in one call). No events, no queue — just a quip.""" | |
| body = await request.json() | |
| image = body.get("image") | |
| if not isinstance(image, str) or not image.startswith("data:image"): | |
| return JSONResponse({"error": "expected {image, fairy_state?}"}, status_code=400) | |
| fairy = body.get("fairy_state", {}) | |
| context = await asyncio.to_thread(_active_context) # frontmost app/window/url | |
| # OCR the peek crop (region-local, so it reads what's under the sprite — unlike the | |
| # window title, which lies under tabbed Ghostty/Chrome). Two payoffs: name the CLI | |
| # from its prompt/status text (beats CLIP on dark, small-text screens), and feed the | |
| # real on-screen words back so the quip is topical instead of guessed from pixels. | |
| ocr_lines = await asyncio.to_thread(ocr.ocr_lines, image) | |
| app = context[4:].split(" · ", 1)[0].lower() if context.startswith("app=") else "" | |
| if any(t in app for t in _TERMINAL_APPS): | |
| tool = ocr.detect_tool(ocr_lines) | |
| if tool: | |
| context += f" · recognized={tool}" | |
| excerpt = ocr.topical_excerpt(ocr_lines) | |
| if excerpt: | |
| context += f' · screen="{excerpt}"' | |
| try: | |
| result = await asyncio.to_thread(brain.comment, image, fairy, context) | |
| except brain.BrainError as e: | |
| return _brain_503(e) | |
| quip, emotion = result["quip"], result["emotion"] | |
| await asyncio.to_thread(_log_peek, image, quip, fairy, context, emotion) # tuning dataset | |
| return JSONResponse({"quip": quip, "emotion": emotion}) | |
| def brain_vision_health() -> dict: | |
| return brain.vision_health() | |
| def brain_warm() -> dict: | |
| """Kick the (possibly cold, scale-to-zero) vision backend awake without blocking the | |
| caller — the frontend pings this on load so Modal is warming before the first peek.""" | |
| threading.Thread(target=brain.warm, daemon=True).start() | |
| return {"ok": True, "warming": True} | |
| def health() -> dict: | |
| return {"ok": True, "service": "puck-daemon", "time": time.time()} | |
| # the WKWebView overlay's inspector is unusable behind a fullscreen always-on-top | |
| # transparent window, so the frontend forwards console here → a file we can tail | |
| FRONTEND_LOG = Path("/tmp/puck-frontend.log") | |
| async def debug_log(request: Request) -> JSONResponse: | |
| try: | |
| body = await request.json() | |
| except json.JSONDecodeError: | |
| return JSONResponse({"error": "bad json"}, status_code=400) | |
| stamp = datetime.now(timezone.utc).strftime("%H:%M:%S") | |
| line = f"{stamp} [{body.get('level', 'log')}] {str(body.get('msg', ''))[:2000]}" | |
| with FRONTEND_LOG.open("a") as f: | |
| f.write(line + "\n") | |
| return JSONResponse({"ok": True}) | |
| # ---- native voice (macOS `say`) --------------------------------------------- | |
| # The Tauri overlay is a WKWebView where browser TTS (Kokoro/speechSynthesis) is | |
| # broken, so the daemon speaks for it: `say` plays straight out of the Mac's | |
| # speakers, sidestepping the webview's audio entirely. Local/macOS only — on the | |
| # HF Space (Linux) this 501s and the frontend keeps its browser voice. | |
| _say_proc: subprocess.Popen | None = None | |
| _say_lock = threading.Lock() | |
| _say_voices_cache: list[dict] | None = None | |
| def _say_available() -> bool: | |
| return sys.platform == "darwin" and shutil.which("say") is not None | |
| def _run_say(args: list[str]) -> None: | |
| global _say_proc | |
| with _say_lock: | |
| if _say_proc and _say_proc.poll() is None: | |
| _say_proc.terminate() # a new utterance supersedes the one in flight | |
| proc = subprocess.Popen(args) | |
| _say_proc = proc | |
| proc.wait() # wait on OUR proc (the global may be reassigned by a newer call) | |
| async def voice_say(request: Request) -> JSONResponse: | |
| if not _say_available(): | |
| return JSONResponse({"error": "native voice unavailable (non-macOS)"}, status_code=501) | |
| body = await request.json() | |
| text = body.get("text") | |
| if not isinstance(text, str) or not text.strip(): | |
| return JSONResponse({"error": "expected {text, voice?, rate?}"}, status_code=400) | |
| args = ["say"] | |
| voice = body.get("voice") | |
| if isinstance(voice, str) and voice: | |
| args += ["-v", voice] | |
| rate = body.get("rate") # words per minute | |
| if isinstance(rate, (int, float)) and rate: | |
| args += ["-r", str(int(rate))] | |
| args.append(text[:600]) # cap runaway narration; list form = no shell injection | |
| await asyncio.to_thread(_run_say, args) # returns when speech ends (or is superseded) | |
| return JSONResponse({"ok": True}) | |
| def voice_stop() -> dict: | |
| """Cut the current utterance (mute / interrupt).""" | |
| with _say_lock: | |
| if _say_proc and _say_proc.poll() is None: | |
| _say_proc.terminate() | |
| return {"ok": True} | |
| def voice_voices() -> dict: | |
| """English `say` voices for the Settings picker. Cached — the list is static.""" | |
| global _say_voices_cache | |
| if not _say_available(): | |
| return {"voices": []} | |
| if _say_voices_cache is None: | |
| out = subprocess.run(["say", "-v", "?"], capture_output=True, text=True).stdout | |
| voices = [] | |
| for line in out.splitlines(): | |
| m = re.match(r"^(.+?)\s+(en[_-][A-Za-z]+)\s+#", line) | |
| if m: | |
| voices.append({"id": m.group(1).strip(), "lang": m.group(2)}) | |
| _say_voices_cache = voices | |
| return {"voices": _say_voices_cache} | |
| def _count_lines(path: Path) -> int: | |
| try: | |
| with path.open() as f: | |
| return sum(1 for ln in f if ln.strip()) | |
| except FileNotFoundError: | |
| return 0 | |
| def molt_status() -> dict: | |
| """Real molt readiness for the Night Bloom screen: rated traces accumulated | |
| toward the next adapter, against the hand-built SFT base. Reporting only — | |
| the molt (retrain + eval gate) lives in molt/, never runs from a web request.""" | |
| traces = _count_lines(TRACES_PATH) | |
| return { | |
| "traces": traces, | |
| "threshold": MOLT_THRESHOLD, | |
| "sftBase": _count_lines(SFT_PATH), | |
| "ready": traces >= MOLT_THRESHOLD, | |
| } | |
| # ---- brain proxy ------------------------------------------------------------ | |
| # The frontend treats these as optional: 503 → fall back to the scripted engine. | |
| def _brain_503(e: brain.BrainError) -> JSONResponse: | |
| return JSONResponse({"error": str(e)}, status_code=503) | |
| def brain_health() -> dict: | |
| return brain.health() | |
| async def brain_narrate(request: Request) -> JSONResponse: | |
| body = await request.json() | |
| if not isinstance(body.get("event"), dict): | |
| return JSONResponse({"error": "expected {event, fairy_state}"}, status_code=400) | |
| try: | |
| # brain.py uses blocking urllib; a thread keeps the event loop free for other requests | |
| text = await asyncio.to_thread(brain.narrate, body["event"], body.get("fairy_state", {})) | |
| except brain.BrainError as e: | |
| return _brain_503(e) | |
| return JSONResponse({"text": text}) | |
| async def brain_chat(request: Request) -> JSONResponse: | |
| body = await request.json() | |
| message = body.get("message") | |
| if not isinstance(message, str) or not message.strip(): | |
| return JSONResponse({"error": "expected {message, fairy_state, history?}"}, status_code=400) | |
| # ground the reply in what Puck has recently SEEN (his peek memory) | |
| memory = [p["quip"] for p in _recent_peeks(12) if p.get("quip")] | |
| try: | |
| text = await asyncio.to_thread( | |
| brain.chat, message, body.get("fairy_state", {}), body.get("history"), memory | |
| ) | |
| except brain.BrainError as e: | |
| return _brain_503(e) | |
| return JSONResponse({"text": text}) | |
| # Serve the built frontend (run `bun run build` in frontend/ first). | |
| # NB: a StaticFiles mount at "/" is greedy and shadows gradio's own | |
| # /gradio_api/* routes, which makes gradio's launch self-check 404 and the | |
| # daemon exit. So serve via scoped routes — "/" and "/assets/{path}" can't | |
| # shadow /gradio_api/*. Vite (:5173) is still the live-reload dev path. | |
| def _spa_file(name: str) -> Response: | |
| target = (FRONTEND_DIST / name).resolve() | |
| # stay inside dist — never serve a path that escapes it | |
| if FRONTEND_DIST in target.parents and target.is_file(): | |
| return FileResponse(target) | |
| if name == "index.html": | |
| return JSONResponse( | |
| {"hint": "frontend not built — run `bun run build` in frontend/, or use vite :5173"}, | |
| status_code=503, | |
| ) | |
| return JSONResponse({"error": "not found"}, status_code=404) | |
| def spa_index() -> Response: | |
| return _spa_file("index.html") | |
| def spa_assets(path: str) -> Response: | |
| return _spa_file(f"assets/{path}") | |
| def spa_favicon() -> Response: | |
| return _spa_file("favicon.svg") | |
| if __name__ == "__main__": | |
| # Local dev binds loopback:7777; a container (HF Space) sets PUCK_HOST=0.0.0.0 and | |
| # PORT to the Space's app_port (7860) so the platform can route to it. | |
| app.launch( | |
| server_name=os.environ.get("PUCK_HOST", "127.0.0.1"), | |
| server_port=int(os.environ.get("PORT", "7777")), | |
| ) | |