"""Lightweight OpenAI GPT-5 client for orchestration steps (4–9 prompts, etc.).""" from __future__ import annotations import base64 import json import logging import os from pathlib import Path from typing import Optional from openai import OpenAI from constants import ( ROOT, DEFAULT_SETTINGS, LLM_SETTING_KEYS as SETTING_KEYS, DEFAULT_MODEL, DEFAULT_REASONING, ) def load_settings(path: Path | None) -> dict: """Load settings.json (or a provided path) and keep only recognized keys.""" path = path or DEFAULT_SETTINGS if not path.exists(): return {} data = json.loads(path.read_text(encoding="utf-8")) return {k: v for k, v in data.items() if k in SETTING_KEYS and v} def resolve_api_key(settings: dict) -> str: """Resolve OPENAI_API_KEY preferring env over settings; exit if missing.""" if os.environ.get("OPENAI_API_KEY"): return os.environ["OPENAI_API_KEY"] if settings.get("OPENAI_API_KEY"): return settings["OPENAI_API_KEY"] raise SystemExit("OPENAI_API_KEY is not set (env or settings.json)") def resolve_model(settings: dict, cli_model: Optional[str]) -> str: """Pick the model from CLI override, env, settings, or fallback default.""" return cli_model or os.environ.get("OPENAI_MODEL") or settings.get("OPENAI_MODEL") or DEFAULT_MODEL def resolve_reasoning(settings: dict, cli_reasoning: Optional[str]) -> Optional[str]: """Pick the reasoning effort from CLI override, env, settings, or default.""" return cli_reasoning or os.environ.get("OPENAI_REASONING_EFFORT") or settings.get("OPENAI_REASONING_EFFORT") or DEFAULT_REASONING def run(prompt: object, model: str, reasoning: Optional[str], api_key: str) -> str: """ Use the newer Responses API (per OpenAI 2025 guidelines). Accepts: - str prompt - list/tuple [text, image_bytes] for multimodal """ client = OpenAI(api_key=api_key) kwargs = {} if reasoning: kwargs["reasoning"] = {"effort": reasoning} # Build input payload if isinstance(prompt, (list, tuple)) and len(prompt) == 2 and isinstance(prompt[0], str) and isinstance(prompt[1], (bytes, bytearray)): b64 = base64.b64encode(prompt[1]).decode("utf-8") logging.info("[llm] multimodal input: text_len=%s image_bytes=%s", len(prompt[0]), len(prompt[1])) input_payload = [ { "role": "user", "content": [ {"type": "input_text", "text": prompt[0]}, {"type": "input_image", "image_url": f"data:image/png;base64,{b64}"}, ], } ] else: text_prompt = prompt if isinstance(prompt, str) else str(prompt) input_payload = [ { "role": "user", "content": [{"type": "input_text", "text": text_prompt}], } ] # Debug log full payload for traceability logging.info("[llm] model=%s reasoning=%s payload=%s", model, reasoning, input_payload) resp = client.responses.create( model=model, input=input_payload, **kwargs, ) return getattr(resp, "output_text", None) or str(resp)