Spaces:
Running on Zero
Running on Zero
| """Real Modal backends: Gemma (vLLM OpenAI endpoint) + FLUX (resident pipeline). | |
| Config via env (set after deploying the two Modal apps in serve/): | |
| COMIC_GEMMA_URL - base URL of the vLLM OpenAI server, ending in /v1 | |
| COMIC_GEMMA_MODEL - served model name (default "gemma-comic") | |
| COMIC_GEMMA_KEY - token if the endpoint uses proxy-auth (else "EMPTY") | |
| COMIC_FLUX_APP - Modal app name for FLUX (default comic-flux) | |
| COMIC_FLUX_CLS - Modal class name (default FluxRenderer) | |
| Pattern mirrors the wisdom2 reference deployment: an OpenAI client to the vLLM /v1 | |
| endpoint for the writer, and a modal.Cls handle for the artist. The generous timeout | |
| lets the first call after scale-to-zero ride through the GPU cold boot. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from .backends import WriterBackend, ArtistBackend | |
| DEFAULT_GEMMA_URL = "https://keshav-public07--comic-gemma-serve.modal.run/v1" | |
| DEFAULT_GEMMA_MODEL = "gemma-comic" | |
| class ModalWriter(WriterBackend): | |
| def __init__(self, base_url=None, model=None, api_key=None, | |
| temperature=0.9, max_tokens=16384, timeout=900): | |
| from openai import OpenAI | |
| base_url = base_url or os.environ.get("COMIC_GEMMA_URL", DEFAULT_GEMMA_URL) | |
| self.model = model or os.environ.get("COMIC_GEMMA_MODEL", DEFAULT_GEMMA_MODEL) | |
| self.temperature = temperature | |
| # Bibles/panel batches are large JSON; give plenty of output room. | |
| self.max_tokens = max_tokens | |
| # 15 min: absorbs the ~850s first-deploy cold boot without timing out. | |
| self._client = OpenAI( | |
| base_url=base_url, | |
| api_key=api_key or os.environ.get("COMIC_GEMMA_KEY", "EMPTY"), | |
| timeout=timeout, | |
| ) | |
| def chat(self, messages: list) -> str: | |
| resp = self._client.chat.completions.create( | |
| model=self.model, | |
| messages=messages, | |
| stream=False, | |
| temperature=self.temperature, | |
| max_tokens=self.max_tokens, | |
| # Ask vLLM for a JSON object directly when the server supports it; the | |
| # prompts also demand strict JSON, so this is belt-and-braces. | |
| response_format={"type": "json_object"}, | |
| ) | |
| return resp.choices[0].message.content or "" | |
| def warm(self) -> bool: | |
| try: | |
| self._client.models.list() | |
| return True | |
| except Exception: | |
| return False | |
| class ModalArtist(ArtistBackend): | |
| def __init__(self, app=None, cls=None): | |
| import modal | |
| app = app or os.environ.get("COMIC_FLUX_APP", "comic-flux") | |
| cls = cls or os.environ.get("COMIC_FLUX_CLS", "FluxRenderer") | |
| Renderer = modal.Cls.from_name(app, cls) | |
| self._obj = Renderer() | |
| def render(self, prompt: str, seed: int = 0) -> bytes: | |
| return self._obj.render.remote(prompt, seed) | |
| def render_batch(self, prompts: list, seeds: list) -> list: | |
| return self._obj.render_batch.remote(list(prompts), list(seeds)) | |
| def warm(self) -> bool: | |
| try: | |
| return bool(self._obj.warm.remote()) | |
| except Exception: | |
| return False | |