Spaces:
Sleeping
Sleeping
| """LLM provider wrappers (OpenAI + Gemini 3) with a unified analyze interface.""" | |
| from __future__ import annotations | |
| import base64 | |
| import json | |
| import logging | |
| from typing import List, Sequence | |
| from openai import OpenAI | |
| from google import genai | |
| from google.genai import types as genai_types | |
| from google.genai import errors as genai_errors | |
| from settings import Settings | |
| LOGGER = logging.getLogger("llm") | |
| # Model identifiers exposed to the UI | |
| OPENAI_GPT5 = "gpt-5" | |
| OPENAI_GPT5_MINI = "gpt-5-mini" | |
| # Gemini 3 multimodal text-out model (supports image+text input, text output) | |
| GEMINI_3_VISION = "gemini-3-pro-preview" | |
| class LLMError(RuntimeError): | |
| pass | |
| def _encode_image_to_data_url(image_bytes: bytes, mime: str = "image/png") -> str: | |
| b64 = base64.b64encode(image_bytes).decode("utf-8") | |
| return f"data:{mime};base64,{b64}" | |
| def _collect_openai_messages(system_prompt: str, user_prompt: str, images: Sequence[bytes]): | |
| system = {"role": "system", "content": [{"type": "input_text", "text": system_prompt}]} | |
| user_content = [{"type": "input_text", "text": user_prompt}] | |
| for img in images: | |
| user_content.append({"type": "input_image", "image_url": _encode_image_to_data_url(img)}) | |
| user = {"role": "user", "content": user_content} | |
| return [system, user] | |
| def run_openai( | |
| images: Sequence[bytes], | |
| system_prompt: str, | |
| user_prompt: str, | |
| model: str, | |
| settings: Settings, | |
| ) -> str: | |
| if not settings.openai_api_key: | |
| raise LLMError("OPENAI_API_KEY is missing") | |
| client = OpenAI(api_key=settings.openai_api_key) | |
| messages = _collect_openai_messages(system_prompt, user_prompt, images) | |
| kwargs = {} | |
| if settings.openai_reasoning_effort: | |
| kwargs["reasoning"] = {"effort": settings.openai_reasoning_effort} | |
| LOGGER.info( | |
| "Calling OpenAI model=%s reasoning=%s images=%s total_bytes=%s", | |
| model, | |
| settings.openai_reasoning_effort, | |
| len(images), | |
| sum(len(i) for i in images), | |
| ) | |
| resp = client.responses.create(model=model, input=messages, **kwargs) | |
| text = getattr(resp, "output_text", None) or str(resp) | |
| LOGGER.info("OpenAI response (truncated 500 chars): %s", text[:500]) | |
| return text | |
| def run_gemini( | |
| images: Sequence[bytes], | |
| system_prompt: str, | |
| user_prompt: str, | |
| model: str, | |
| settings: Settings, | |
| ) -> str: | |
| # Two modes: | |
| # - Vertex (preferred when GOOGLE_GENAI_USE_VERTEXAI=True): uses ADC / gcloud auth | |
| # - API key (Studio): uses GEMINI_API_KEY | |
| if settings.google_genai_use_vertexai: | |
| client = genai.Client( | |
| vertexai=True, | |
| project=settings.google_cloud_project, | |
| location=settings.google_cloud_location or "us-central1", | |
| ) | |
| else: | |
| if not settings.gemini_api_key: | |
| raise LLMError("GEMINI_API_KEY is missing and vertex mode is disabled") | |
| client = genai.Client(api_key=settings.gemini_api_key) | |
| parts: List[genai_types.Part | str] = [system_prompt] | |
| for img in images: | |
| parts.append(genai_types.Part.from_bytes(data=img, mime_type="image/png")) | |
| parts.append(user_prompt) | |
| LOGGER.info( | |
| "Calling Gemini model=%s vertex=%s images=%s total_bytes=%s", | |
| model, | |
| settings.google_genai_use_vertexai, | |
| len(images), | |
| sum(len(i) for i in images), | |
| ) | |
| try: | |
| response = client.models.generate_content( | |
| model=model, | |
| contents=parts, | |
| config=genai_types.GenerateContentConfig(response_modalities=["text"]), | |
| ) | |
| except genai_errors.ClientError as exc: | |
| # Provide clearer guidance for common auth/model issues. | |
| raise LLMError( | |
| "Gemini request failed. " | |
| "If using Vertex, ensure the model exists in your project/location and ADC is active (`gcloud auth application-default login`). " | |
| "If using Studio/API key (e.g., on HuggingFace), set GOOGLE_GENAI_USE_VERTEXAI=false and provide GEMINI_API_KEY. " | |
| f"Details: {exc}" | |
| ) from exc | |
| # Prefer `.text`; fallback to concatenated text parts | |
| if getattr(response, "text", None): | |
| text = response.text | |
| if getattr(response, "parts", None): | |
| text_parts = [p.text for p in response.parts if getattr(p, "text", None)] | |
| if text_parts: | |
| text = "\n".join(text_parts) | |
| if "text" not in locals(): | |
| text = str(response) | |
| LOGGER.info("Gemini response (truncated 500 chars): %s", text[:500]) | |
| return text | |
| def analyze( | |
| images: Sequence[bytes], | |
| system_prompt: str, | |
| user_prompt: str, | |
| model_choice: str, | |
| settings: Settings, | |
| ) -> str: | |
| """Dispatch to the correct provider based on model_choice.""" | |
| if model_choice in {OPENAI_GPT5, OPENAI_GPT5_MINI}: | |
| return run_openai(images, system_prompt, user_prompt, model_choice, settings) | |
| if model_choice.startswith("gemini"): | |
| return run_gemini(images, system_prompt, user_prompt, model_choice, settings) | |
| raise LLMError(f"Unsupported model choice: {model_choice}") | |