"""macOS Vision OCR for peeks — shells out to the compiled `recognize/bin/ocr`. Two jobs: (1) identify a coding-CLI from the text in its prompt/status line — reliable on dark, small-text terminal screens where CLIP fingerprinting is fuzzy; (2) ground Puck's quip in the actual on-screen text so it's topical instead of a guess from pixels. Recognition is REGION-LOCAL (the OCR runs on Puck's peek crop), so it matches what's under the sprite — unlike a window title, which is window-global and lies under tabbed terminals (Ghostty) and browsers (Chrome). """ import base64 import os import subprocess import tempfile from pathlib import Path ROOT = Path(__file__).resolve().parent.parent OCR_BIN = ROOT / "recognize" / "bin" / "ocr" # Distinctive CONTENT markers (lowercased) from each CLI's prompt/status — NOT the window # title. codex & pi both surface "gpt-5.5", so neither uses it: pi is pinned by its # "(openai-codex)" backend tag / "pi v0." banner, codex by the SPACED "openai codex". _TOOL_MARKERS = { "claude-code": ["claude code", "claude max", "auto mode on", "for agents", "/release-notes"], "codex": ["openai codex", "/model to change", "codex app", "/fast to enable"], "opencode": ["opencode", "glm-5", "z.ai coding", "esc interrupt"], "pi": ["pi v0.", "openai-codex", "/272k", "ctrl+c/ctrl+d"], "amp": ["welcome to amp", "ctrl+o for help", "- smart -", "— smart —"], } # shell/login chrome that's noise for a topical quip _NOISE = ("last login", "cd /", "exec ", "ttys", "fnm_version") def available() -> bool: return OCR_BIN.exists() def ocr_lines(image_data_url: str, timeout: float = 8.0) -> list[str]: """Recognized text lines from a data-URL image (empty list on any failure).""" if not OCR_BIN.exists(): return [] _, _, b64 = image_data_url.partition(",") with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f: f.write(base64.b64decode(b64)) tmp = f.name try: out = subprocess.run( [str(OCR_BIN), tmp], capture_output=True, text=True, timeout=timeout ) return [ln.strip() for ln in out.stdout.splitlines() if ln.strip()] except Exception: # noqa: BLE001 — OCR is best-effort; a failure just means no hint return [] finally: os.unlink(tmp) def detect_tool(lines: list[str]) -> str | None: """Best-scoring tool by distinctive marker hits; None if nothing matched.""" blob = "\n".join(lines).lower() best, best_score = None, 0 for label, markers in _TOOL_MARKERS.items(): score = sum(1 for m in markers if m in blob) if score > best_score: best, best_score = label, score return best def _is_noise(line: str) -> bool: """Shell/terminal furniture that crowds out real content in the quip's excerpt.""" low = line.lower() if any(n in low for n in _NOISE): return True if len(line) <= 3: # OCR crumbs of the shell powerline ("Evuln", time glyphs) return True if "×" in line: # terminal title-bar dimensions, e.g. "140×43" return True return False def topical_excerpt(lines: list[str], cap: int = 240) -> str: """A short, denoised snippet of on-screen text to anchor the quip in real words. Drops shell/title-bar furniture so content (prompt, output, status) leads the budget.""" return " · ".join(ln for ln in lines if not _is_noise(ln))[:cap]