File size: 2,107 Bytes
0366d65 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | """Encode images as base64 data URIs for llama.cpp vision chat handlers.
Shared by the Mac collector (attachments) and the UI (manual upload).
"""
from __future__ import annotations
import base64
import mimetypes
from pathlib import Path
# Skip anything bigger than this to keep payloads/context sane.
MAX_BYTES = 4 * 1024 * 1024 # 4 MB
IMAGE_MIMES = {"image/png", "image/jpeg", "image/gif", "image/webp", "image/heic"}
def is_image(path: str) -> bool:
mime, _ = mimetypes.guess_type(path)
return mime in IMAGE_MIMES
def _heic_to_jpeg(p: Path) -> bytes | None:
"""Transcode HEIC/HEIF to JPEG bytes (pillow-heif), or None if unavailable.
llama.cpp's clip handler can't decode HEIC, so raw pass-through would fail
or waste context — and iPhone attachments are predominantly HEIC."""
try:
import io
import pillow_heif
from PIL import Image
pillow_heif.register_heif_opener()
img = Image.open(p).convert("RGB")
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=88)
return buf.getvalue()
except Exception: # noqa: BLE001 no pillow-heif / corrupt file -> skip
return None
def to_data_uri(path: str) -> str | None:
"""Return a `data:<mime>;base64,...` URI, or None if not a usable image.
HEIC is transcoded to JPEG (the vision stack can't decode HEIC); when
transcoding isn't available the file is skipped, never sent undecodable."""
p = Path(path)
if not p.exists() or p.stat().st_size > MAX_BYTES:
return None
mime, _ = mimetypes.guess_type(str(p))
if mime not in IMAGE_MIMES:
return None
if mime == "image/heic" or p.suffix.lower() in (".heic", ".heif"):
jpeg = _heic_to_jpeg(p)
if jpeg is None:
return None
return "data:image/jpeg;base64," + base64.b64encode(jpeg).decode("ascii")
b64 = base64.b64encode(p.read_bytes()).decode("ascii")
return f"data:{mime};base64,{b64}"
def paths_to_data_uris(paths: list[str]) -> list[str]:
return [u for u in (to_data_uri(p) for p in paths or []) if u]
|