| """Private Gemini vision adapter. |
| |
| This module keeps provider details out of the public toolbox API. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import os |
| from pathlib import Path |
| from typing import List |
|
|
| from google import genai |
| from google.genai import types |
|
|
|
|
| def _guess_mime(path: Path) -> str: |
| suffix = path.suffix.lower() |
| if suffix == ".png": |
| return "image/png" |
| if suffix in {".jpg", ".jpeg"}: |
| return "image/jpeg" |
| if suffix == ".webp": |
| return "image/webp" |
| if suffix == ".gif": |
| return "image/gif" |
| return "image/png" |
|
|
|
|
| def gemini_vision_chat(prompt: str, image_paths: List[str]) -> str: |
| """Run a Gemini vision call and return plain text output.""" |
| project = os.getenv("EVAL_TOOLBOX_GCP_PROJECT", "research-01-268019") |
| location = os.getenv("EVAL_TOOLBOX_GCP_LOCATION", "global") |
| model = os.getenv("EVAL_TOOLBOX_VISION_MODEL", "gemini-3-flash-preview") |
| system_instruction = os.getenv( |
| "EVAL_TOOLBOX_VISION_SYSTEM", |
| "You are a visual analysis assistant. Return concise factual output.", |
| ) |
|
|
| client = genai.Client(vertexai=True, project=project, location=location) |
|
|
| parts = [types.Part.from_text(text=prompt)] |
| for img in image_paths: |
| p = Path(img) |
| if not p.exists(): |
| continue |
| with open(p, "rb") as f: |
| data = f.read() |
| parts.append(types.Part.from_bytes(data=data, mime_type=_guess_mime(p))) |
|
|
| if len(parts) == 1: |
| return "TOOL_ERROR: no_valid_images" |
|
|
| conversation = [types.Content(role="user", parts=parts)] |
| config = types.GenerateContentConfig(system_instruction=system_instruction) |
| resp = client.models.generate_content(model=model, contents=conversation, config=config) |
| return resp.text or "" |
|
|
|
|