"""LLM provider wrappers (OpenAI + Gemini 3) with a unified analyze interface.""" from __future__ import annotations import base64 import json import logging from typing import List, Sequence from openai import OpenAI from google import genai from google.genai import types as genai_types from google.genai import errors as genai_errors from settings import Settings LOGGER = logging.getLogger("llm") # Model identifiers exposed to the UI OPENAI_GPT5 = "gpt-5" OPENAI_GPT5_MINI = "gpt-5-mini" # Gemini 3 multimodal text-out model (supports image+text input, text output) GEMINI_3_VISION = "gemini-3-pro-preview" class LLMError(RuntimeError): pass def _encode_image_to_data_url(image_bytes: bytes, mime: str = "image/png") -> str: b64 = base64.b64encode(image_bytes).decode("utf-8") return f"data:{mime};base64,{b64}" def _collect_openai_messages(system_prompt: str, user_prompt: str, images: Sequence[bytes]): system = {"role": "system", "content": [{"type": "input_text", "text": system_prompt}]} user_content = [{"type": "input_text", "text": user_prompt}] for img in images: user_content.append({"type": "input_image", "image_url": _encode_image_to_data_url(img)}) user = {"role": "user", "content": user_content} return [system, user] def run_openai( images: Sequence[bytes], system_prompt: str, user_prompt: str, model: str, settings: Settings, ) -> str: if not settings.openai_api_key: raise LLMError("OPENAI_API_KEY is missing") client = OpenAI(api_key=settings.openai_api_key) messages = _collect_openai_messages(system_prompt, user_prompt, images) kwargs = {} if settings.openai_reasoning_effort: kwargs["reasoning"] = {"effort": settings.openai_reasoning_effort} LOGGER.info( "Calling OpenAI model=%s reasoning=%s images=%s total_bytes=%s", model, settings.openai_reasoning_effort, len(images), sum(len(i) for i in images), ) resp = client.responses.create(model=model, input=messages, **kwargs) text = getattr(resp, "output_text", None) or str(resp) LOGGER.info("OpenAI response (truncated 500 chars): %s", text[:500]) return text def run_gemini( images: Sequence[bytes], system_prompt: str, user_prompt: str, model: str, settings: Settings, ) -> str: # Two modes: # - Vertex (preferred when GOOGLE_GENAI_USE_VERTEXAI=True): uses ADC / gcloud auth # - API key (Studio): uses GEMINI_API_KEY if settings.google_genai_use_vertexai: client = genai.Client( vertexai=True, project=settings.google_cloud_project, location=settings.google_cloud_location or "us-central1", ) else: if not settings.gemini_api_key: raise LLMError("GEMINI_API_KEY is missing and vertex mode is disabled") client = genai.Client(api_key=settings.gemini_api_key) parts: List[genai_types.Part | str] = [system_prompt] for img in images: parts.append(genai_types.Part.from_bytes(data=img, mime_type="image/png")) parts.append(user_prompt) LOGGER.info( "Calling Gemini model=%s vertex=%s images=%s total_bytes=%s", model, settings.google_genai_use_vertexai, len(images), sum(len(i) for i in images), ) try: response = client.models.generate_content( model=model, contents=parts, config=genai_types.GenerateContentConfig(response_modalities=["text"]), ) except genai_errors.ClientError as exc: # Provide clearer guidance for common auth/model issues. raise LLMError( "Gemini request failed. " "If using Vertex, ensure the model exists in your project/location and ADC is active (`gcloud auth application-default login`). " "If using Studio/API key (e.g., on HuggingFace), set GOOGLE_GENAI_USE_VERTEXAI=false and provide GEMINI_API_KEY. " f"Details: {exc}" ) from exc # Prefer `.text`; fallback to concatenated text parts if getattr(response, "text", None): text = response.text if getattr(response, "parts", None): text_parts = [p.text for p in response.parts if getattr(p, "text", None)] if text_parts: text = "\n".join(text_parts) if "text" not in locals(): text = str(response) LOGGER.info("Gemini response (truncated 500 chars): %s", text[:500]) return text def analyze( images: Sequence[bytes], system_prompt: str, user_prompt: str, model_choice: str, settings: Settings, ) -> str: """Dispatch to the correct provider based on model_choice.""" if model_choice in {OPENAI_GPT5, OPENAI_GPT5_MINI}: return run_openai(images, system_prompt, user_prompt, model_choice, settings) if model_choice.startswith("gemini"): return run_gemini(images, system_prompt, user_prompt, model_choice, settings) raise LLMError(f"Unsupported model choice: {model_choice}")