"""Swappable model backends behind two tiny interfaces. WriterBackend.chat(messages) -> str # Gemma: returns the full reply text ArtistBackend.render(prompt, seed) -> bytes # FLUX: returns JPEG/PNG image bytes Selected by COMIC_BACKEND (or an explicit arg): "mock" -> offline stand-ins (no GPU, no cost) for building/testing the whole flow. "modal" -> the real Gemma vLLM OpenAI endpoint + the resident FLUX endpoint on Modal. The app, engine and tests only ever see these two interfaces, so deploying real models is a config flip, not a code change. """ from __future__ import annotations import os from abc import ABC, abstractmethod class WriterBackend(ABC): @abstractmethod def chat(self, messages: list) -> str: """Return the model's full text reply for an OpenAI-style message list.""" raise NotImplementedError def warm(self) -> bool: """Best-effort: nudge the backend so a later call is warm. Never raises.""" return True class ArtistBackend(ABC): @abstractmethod def render(self, prompt: str, seed: int = 0) -> bytes: """Return image bytes (JPEG/PNG) for a text-free image prompt.""" raise NotImplementedError def render_batch(self, prompts: list, seeds: list) -> list: """Render several prompts. Default: one at a time. Real backends override to batch through the GPU in a single pass for much higher throughput.""" seeds = list(seeds) + [0] * (len(prompts) - len(seeds)) return [self.render(p, s) for p, s in zip(prompts, seeds)] def warm(self) -> bool: return True # ── factory ────────────────────────────────────────────────────────────────── def make_backends(kind: str | None = None): """(writer, artist) pair. kind defaults to $COMIC_BACKEND or 'mock'.""" kind = (kind or os.environ.get("COMIC_BACKEND", "mock")).lower() if kind == "modal": from .modal_backend import ModalWriter, ModalArtist return ModalWriter(), ModalArtist() if kind == "mock": from .mock_backend import MockWriter, MockArtist return MockWriter(), MockArtist() raise ValueError(f"Unknown COMIC_BACKEND: {kind!r} (expected 'mock' or 'modal')")