"""Real Modal backends: Gemma (vLLM OpenAI endpoint) + FLUX (resident pipeline).

Config via env (set after deploying the two Modal apps in serve/):
  COMIC_GEMMA_URL    - base URL of the vLLM OpenAI server, ending in /v1
  COMIC_GEMMA_MODEL  - served model name (default "gemma-comic")
  COMIC_GEMMA_KEY    - token if the endpoint uses proxy-auth (else "EMPTY")
  COMIC_FLUX_APP     - Modal app name for FLUX (default comic-flux)
  COMIC_FLUX_CLS     - Modal class name (default FluxRenderer)

Pattern mirrors the wisdom2 reference deployment: an OpenAI client to the vLLM /v1
endpoint for the writer, and a modal.Cls handle for the artist. The generous timeout
lets the first call after scale-to-zero ride through the GPU cold boot.
"""

from __future__ import annotations

import os

from .backends import WriterBackend, ArtistBackend

DEFAULT_GEMMA_URL = "https://keshav-public07--comic-gemma-serve.modal.run/v1"
DEFAULT_GEMMA_MODEL = "gemma-comic"


class ModalWriter(WriterBackend):
    def __init__(self, base_url=None, model=None, api_key=None,
                 temperature=0.9, max_tokens=16384, timeout=900):
        from openai import OpenAI

        base_url = base_url or os.environ.get("COMIC_GEMMA_URL", DEFAULT_GEMMA_URL)
        self.model = model or os.environ.get("COMIC_GEMMA_MODEL", DEFAULT_GEMMA_MODEL)
        self.temperature = temperature
        # Bibles/panel batches are large JSON; give plenty of output room.
        self.max_tokens = max_tokens
        # 15 min: absorbs the ~850s first-deploy cold boot without timing out.
        self._client = OpenAI(
            base_url=base_url,
            api_key=api_key or os.environ.get("COMIC_GEMMA_KEY", "EMPTY"),
            timeout=timeout,
        )

    def chat(self, messages: list) -> str:
        resp = self._client.chat.completions.create(
            model=self.model,
            messages=messages,
            stream=False,
            temperature=self.temperature,
            max_tokens=self.max_tokens,
            # Ask vLLM for a JSON object directly when the server supports it; the
            # prompts also demand strict JSON, so this is belt-and-braces.
            response_format={"type": "json_object"},
        )
        return resp.choices[0].message.content or ""

    def warm(self) -> bool:
        try:
            self._client.models.list()
            return True
        except Exception:
            return False


class ModalArtist(ArtistBackend):
    def __init__(self, app=None, cls=None):
        import modal

        app = app or os.environ.get("COMIC_FLUX_APP", "comic-flux")
        cls = cls or os.environ.get("COMIC_FLUX_CLS", "FluxRenderer")
        Renderer = modal.Cls.from_name(app, cls)
        self._obj = Renderer()

    def render(self, prompt: str, seed: int = 0) -> bytes:
        return self._obj.render.remote(prompt, seed)

    def render_batch(self, prompts: list, seeds: list) -> list:
        return self._obj.render_batch.remote(list(prompts), list(seeds))

    def warm(self) -> bool:
        try:
            return bool(self._obj.warm.remote())
        except Exception:
            return False