"""Private Gemini vision adapter. This module keeps provider details out of the public toolbox API. """ from __future__ import annotations import os from pathlib import Path from typing import List from google import genai from google.genai import types def _guess_mime(path: Path) -> str: suffix = path.suffix.lower() if suffix == ".png": return "image/png" if suffix in {".jpg", ".jpeg"}: return "image/jpeg" if suffix == ".webp": return "image/webp" if suffix == ".gif": return "image/gif" return "image/png" def gemini_vision_chat(prompt: str, image_paths: List[str]) -> str: """Run a Gemini vision call and return plain text output.""" project = os.getenv("EVAL_TOOLBOX_GCP_PROJECT", "research-01-268019") location = os.getenv("EVAL_TOOLBOX_GCP_LOCATION", "global") model = os.getenv("EVAL_TOOLBOX_VISION_MODEL", "gemini-3-flash-preview") system_instruction = os.getenv( "EVAL_TOOLBOX_VISION_SYSTEM", "You are a visual analysis assistant. Return concise factual output.", ) client = genai.Client(vertexai=True, project=project, location=location) parts = [types.Part.from_text(text=prompt)] for img in image_paths: p = Path(img) if not p.exists(): continue with open(p, "rb") as f: data = f.read() parts.append(types.Part.from_bytes(data=data, mime_type=_guess_mime(p))) if len(parts) == 1: return "TOOL_ERROR: no_valid_images" conversation = [types.Content(role="user", parts=parts)] config = types.GenerateContentConfig(system_instruction=system_instruction) resp = client.models.generate_content(model=model, contents=conversation, config=config) return resp.text or ""