File size: 3,443 Bytes
3c124f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""macOS Vision OCR for peeks — shells out to the compiled `recognize/bin/ocr`.

Two jobs: (1) identify a coding-CLI from the text in its prompt/status line — reliable on
dark, small-text terminal screens where CLIP fingerprinting is fuzzy; (2) ground Puck's
quip in the actual on-screen text so it's topical instead of a guess from pixels.

Recognition is REGION-LOCAL (the OCR runs on Puck's peek crop), so it matches what's under
the sprite — unlike a window title, which is window-global and lies under tabbed terminals
(Ghostty) and browsers (Chrome).
"""
import base64
import os
import subprocess
import tempfile
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
OCR_BIN = ROOT / "recognize" / "bin" / "ocr"

# Distinctive CONTENT markers (lowercased) from each CLI's prompt/status — NOT the window
# title. codex & pi both surface "gpt-5.5", so neither uses it: pi is pinned by its
# "(openai-codex)" backend tag / "pi v0." banner, codex by the SPACED "openai codex".
_TOOL_MARKERS = {
    "claude-code": ["claude code", "claude max", "auto mode on", "for agents", "/release-notes"],
    "codex": ["openai codex", "/model to change", "codex app", "/fast to enable"],
    "opencode": ["opencode", "glm-5", "z.ai coding", "esc interrupt"],
    "pi": ["pi v0.", "openai-codex", "/272k", "ctrl+c/ctrl+d"],
    "amp": ["welcome to amp", "ctrl+o for help", "- smart -", "— smart —"],
}
# shell/login chrome that's noise for a topical quip
_NOISE = ("last login", "cd /", "exec ", "ttys", "fnm_version")


def available() -> bool:
    return OCR_BIN.exists()


def ocr_lines(image_data_url: str, timeout: float = 8.0) -> list[str]:
    """Recognized text lines from a data-URL image (empty list on any failure)."""
    if not OCR_BIN.exists():
        return []
    _, _, b64 = image_data_url.partition(",")
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
        f.write(base64.b64decode(b64))
        tmp = f.name
    try:
        out = subprocess.run(
            [str(OCR_BIN), tmp], capture_output=True, text=True, timeout=timeout
        )
        return [ln.strip() for ln in out.stdout.splitlines() if ln.strip()]
    except Exception:  # noqa: BLE001 — OCR is best-effort; a failure just means no hint
        return []
    finally:
        os.unlink(tmp)


def detect_tool(lines: list[str]) -> str | None:
    """Best-scoring tool by distinctive marker hits; None if nothing matched."""
    blob = "\n".join(lines).lower()
    best, best_score = None, 0
    for label, markers in _TOOL_MARKERS.items():
        score = sum(1 for m in markers if m in blob)
        if score > best_score:
            best, best_score = label, score
    return best


def _is_noise(line: str) -> bool:
    """Shell/terminal furniture that crowds out real content in the quip's excerpt."""
    low = line.lower()
    if any(n in low for n in _NOISE):
        return True
    if len(line) <= 3:  # OCR crumbs of the shell powerline ("Evuln", time glyphs)
        return True
    if "×" in line:  # terminal title-bar dimensions, e.g. "140×43"
        return True
    return False


def topical_excerpt(lines: list[str], cap: int = 240) -> str:
    """A short, denoised snippet of on-screen text to anchor the quip in real words.
    Drops shell/title-bar furniture so content (prompt, output, status) leads the budget."""
    return " · ".join(ln for ln in lines if not _is_noise(ln))[:cap]