JustinTX's picture
Add files using upload-large-folder tool
3f6526a verified
"""Private Gemini vision adapter.
This module keeps provider details out of the public toolbox API.
"""
from __future__ import annotations
import os
from pathlib import Path
from typing import List
from google import genai
from google.genai import types
def _guess_mime(path: Path) -> str:
suffix = path.suffix.lower()
if suffix == ".png":
return "image/png"
if suffix in {".jpg", ".jpeg"}:
return "image/jpeg"
if suffix == ".webp":
return "image/webp"
if suffix == ".gif":
return "image/gif"
return "image/png"
def gemini_vision_chat(prompt: str, image_paths: List[str]) -> str:
"""Run a Gemini vision call and return plain text output."""
project = os.getenv("EVAL_TOOLBOX_GCP_PROJECT", "research-01-268019")
location = os.getenv("EVAL_TOOLBOX_GCP_LOCATION", "global")
model = os.getenv("EVAL_TOOLBOX_VISION_MODEL", "gemini-3-flash-preview")
system_instruction = os.getenv(
"EVAL_TOOLBOX_VISION_SYSTEM",
"You are a visual analysis assistant. Return concise factual output.",
)
client = genai.Client(vertexai=True, project=project, location=location)
parts = [types.Part.from_text(text=prompt)]
for img in image_paths:
p = Path(img)
if not p.exists():
continue
with open(p, "rb") as f:
data = f.read()
parts.append(types.Part.from_bytes(data=data, mime_type=_guess_mime(p)))
if len(parts) == 1:
return "TOOL_ERROR: no_valid_images"
conversation = [types.Content(role="user", parts=parts)]
config = types.GenerateContentConfig(system_instruction=system_instruction)
resp = client.models.generate_content(model=model, contents=conversation, config=config)
return resp.text or ""