"""Real Modal backends: Gemma (vLLM OpenAI endpoint) + FLUX (resident pipeline). Config via env (set after deploying the two Modal apps in serve/): COMIC_GEMMA_URL - base URL of the vLLM OpenAI server, ending in /v1 COMIC_GEMMA_MODEL - served model name (default "gemma-comic") COMIC_GEMMA_KEY - token if the endpoint uses proxy-auth (else "EMPTY") COMIC_FLUX_APP - Modal app name for FLUX (default comic-flux) COMIC_FLUX_CLS - Modal class name (default FluxRenderer) Pattern mirrors the wisdom2 reference deployment: an OpenAI client to the vLLM /v1 endpoint for the writer, and a modal.Cls handle for the artist. The generous timeout lets the first call after scale-to-zero ride through the GPU cold boot. """ from __future__ import annotations import os from .backends import WriterBackend, ArtistBackend DEFAULT_GEMMA_URL = "https://keshav-public07--comic-gemma-serve.modal.run/v1" DEFAULT_GEMMA_MODEL = "gemma-comic" class ModalWriter(WriterBackend): def __init__(self, base_url=None, model=None, api_key=None, temperature=0.9, max_tokens=16384, timeout=900): from openai import OpenAI base_url = base_url or os.environ.get("COMIC_GEMMA_URL", DEFAULT_GEMMA_URL) self.model = model or os.environ.get("COMIC_GEMMA_MODEL", DEFAULT_GEMMA_MODEL) self.temperature = temperature # Bibles/panel batches are large JSON; give plenty of output room. self.max_tokens = max_tokens # 15 min: absorbs the ~850s first-deploy cold boot without timing out. self._client = OpenAI( base_url=base_url, api_key=api_key or os.environ.get("COMIC_GEMMA_KEY", "EMPTY"), timeout=timeout, ) def chat(self, messages: list) -> str: resp = self._client.chat.completions.create( model=self.model, messages=messages, stream=False, temperature=self.temperature, max_tokens=self.max_tokens, # Ask vLLM for a JSON object directly when the server supports it; the # prompts also demand strict JSON, so this is belt-and-braces. response_format={"type": "json_object"}, ) return resp.choices[0].message.content or "" def warm(self) -> bool: try: self._client.models.list() return True except Exception: return False class ModalArtist(ArtistBackend): def __init__(self, app=None, cls=None): import modal app = app or os.environ.get("COMIC_FLUX_APP", "comic-flux") cls = cls or os.environ.get("COMIC_FLUX_CLS", "FluxRenderer") Renderer = modal.Cls.from_name(app, cls) self._obj = Renderer() def render(self, prompt: str, seed: int = 0) -> bytes: return self._obj.render.remote(prompt, seed) def render_batch(self, prompts: list, seeds: list) -> list: return self._obj.render_batch.remote(list(prompts), list(seeds)) def warm(self) -> bool: try: return bool(self._obj.warm.remote()) except Exception: return False