Spaces:
Running on Zero
Running on Zero
| """Gemma prompting + parsing for the comic writer. | |
| Two prompt families, both demanding STRICT JSON so parsing is reliable: | |
| build_bible_messages(idea) -> Gemma call #1: safety gate + story bible. | |
| build_panel_messages(bible, pages..) -> Gemma calls #2..N: the actual panels for a | |
| small batch of pages, given the bible and a | |
| recap of the story so far (continuity). | |
| This module is backend-agnostic: it only builds message lists and parses replies. | |
| The engine owns the model calls. Robust JSON extraction tolerates a stray ```json | |
| fence, leftover <think> blocks, or prose around the object (belt-and-braces on top of | |
| vLLM's --reasoning-parser, which already strips the thinking channel). | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import re | |
| from typing import List, Optional | |
| from .schema import ( | |
| ComicBible, Panel, PageSynopsis, PAGES, PANELS_PER_PAGE, | |
| ) | |
| # How many pages to script per panel call. 5 pages = 10 panels/call -> 5 calls for a | |
| # 25-page (50-panel) comic. Small enough that each JSON reply stays well within the | |
| # token budget (even with thinking) and the model keeps full continuity context; big | |
| # enough to keep round-trips down. | |
| PANEL_BATCH_PAGES = 5 | |
| # ββ JSON extraction ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE) | |
| _FENCE_RE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) | |
| def _first_json_object(text: str) -> Optional[str]: | |
| """Return the first balanced {...} object in `text` (string/escape aware).""" | |
| start = text.find("{") | |
| if start < 0: | |
| return None | |
| depth = 0 | |
| in_str = False | |
| esc = False | |
| for i in range(start, len(text)): | |
| c = text[i] | |
| if in_str: | |
| if esc: | |
| esc = False | |
| elif c == "\\": | |
| esc = True | |
| elif c == '"': | |
| in_str = False | |
| continue | |
| if c == '"': | |
| in_str = True | |
| elif c == "{": | |
| depth += 1 | |
| elif c == "}": | |
| depth -= 1 | |
| if depth == 0: | |
| return text[start:i + 1] | |
| return None | |
| def extract_json(text: str) -> dict: | |
| """Parse the model reply into a dict, tolerating fences/prose/thinking leftovers. | |
| Raises ValueError if nothing JSON-like is found, so callers can surface a clear | |
| error (and retry) rather than silently producing an empty comic. | |
| """ | |
| if not text or not text.strip(): | |
| raise ValueError("empty model reply") | |
| cleaned = _THINK_RE.sub("", text).strip() | |
| # Prefer a fenced block if present, else the raw text. | |
| candidates = [] | |
| m = _FENCE_RE.search(cleaned) | |
| if m: | |
| candidates.append(m.group(1)) | |
| candidates.append(cleaned) | |
| for cand in candidates: | |
| for blob in (cand, _first_json_object(cand)): | |
| if not blob: | |
| continue | |
| try: | |
| obj = json.loads(blob) | |
| if isinstance(obj, dict): | |
| return obj | |
| except json.JSONDecodeError: | |
| continue | |
| raise ValueError("no JSON object found in model reply") | |
| # ββ Call #1: gatekeeper + story bible ββββββββββββββββββββββββββββββββββββββββ | |
| BIBLE_SYSTEM = ( | |
| "You are a professional comic-book writer and art director. From a reader's " | |
| "request you design a complete comic of exactly " | |
| f"{PAGES} pages, {PANELS_PER_PAGE} panels per page ({PAGES * PANELS_PER_PAGE} " | |
| "panels total). You are ALSO the content gatekeeper.\n\n" | |
| "SAFETY FIRST. Refuse (approved=false) only if the request asks for: sexual " | |
| "content involving minors; real, named people in sexual or defamatory scenes; " | |
| "extreme gore or cruelty for shock value; hateful or harassing content toward a " | |
| "protected group; or instructions that enable real-world harm. Ordinary fictional " | |
| "adventure, action, peril, rivalry, mystery, romance, horror and comedy ARE allowed. " | |
| "When you refuse, give one short, polite sentence and leave the other fields empty.\n\n" | |
| "If approved, design a story BIBLE:\n" | |
| "- a punchy title and a one-sentence logline;\n" | |
| "- a FIXED cast of 1 to 4 main characters. Each gets a name and a single vivid, " | |
| "concrete VISUAL description (species/build, age, hair, face, signature clothing, " | |
| "colors, props) of about 25-40 words. This description is reused verbatim in every " | |
| "image, so it must be self-contained and unambiguous;\n" | |
| "- one global art_style line (medium, linework, shading) and one palette line, both " | |
| "constant for the whole comic;\n" | |
| f"- a {PAGES}-page synopsis: one vivid sentence per page, together forming a full arc " | |
| "(setup, rising action, midpoint turn, climax, resolution).\n\n" | |
| "Output STRICT JSON ONLY β no markdown, no commentary. Schema:\n" | |
| "{\n" | |
| ' "approved": true,\n' | |
| ' "refusal_reason": "",\n' | |
| ' "title": "...",\n' | |
| ' "logline": "...",\n' | |
| ' "art_style": "...",\n' | |
| ' "palette": "...",\n' | |
| ' "characters": [ {"name": "...", "appearance": "..."} ],\n' | |
| f' "pages": [ {{"page": 1, "synopsis": "..."}} ... exactly {PAGES} items ]\n' | |
| "}" | |
| ) | |
| def build_bible_messages(idea: str) -> list: | |
| user = ( | |
| f"Reader's request:\n\"\"\"\n{idea.strip()}\n\"\"\"\n\n" | |
| f"Decide if it is allowed, then (if allowed) design the full {PAGES}-page comic " | |
| "bible. Remember: character appearance descriptions are reused verbatim in every " | |
| "panel image, so make them detailed and consistent. Output strict JSON only." | |
| ) | |
| return [ | |
| {"role": "system", "content": BIBLE_SYSTEM}, | |
| {"role": "user", "content": user}, | |
| ] | |
| def parse_bible(reply: str) -> ComicBible: | |
| """Parse call #1. On approval, pad/truncate pages to exactly PAGES entries.""" | |
| bible = ComicBible.from_dict(extract_json(reply)) | |
| if bible.approved: | |
| # Normalise to exactly PAGES synopses so downstream batching is clean. | |
| pages = bible.pages[:PAGES] | |
| while len(pages) < PAGES: | |
| pages.append(PageSynopsis(page=len(pages) + 1, synopsis="")) | |
| for i, p in enumerate(pages, start=1): | |
| p.page = i | |
| bible.pages = pages | |
| return bible | |
| # ββ Calls #2..N: panel script for a batch of pages βββββββββββββββββββββββββββ | |
| PANEL_SYSTEM = ( | |
| "You are the same comic-book writer, now scripting individual panels. You are given " | |
| "the story bible (title, fixed cast with appearances, art style, palette, and the " | |
| "full page-by-page synopsis) and a recap of the panels written so far. You write the " | |
| f"{PANELS_PER_PAGE} panels for each requested page, continuing the story coherently.\n\n" | |
| "For every panel produce:\n" | |
| "- scene: a purely VISUAL description of what we see in the frame β camera/shot, which " | |
| "named characters are present and what they are doing, setting and mood. No dialogue " | |
| "or words that should appear AS text in the image (the image must be text-free).\n" | |
| "- caption: the reader-facing text shown UNDER the panel: 1-2 short sentences of " | |
| "narration, optionally one short line of spoken dialogue in quotes. This carries the " | |
| "story between images.\n" | |
| "- characters: the list of cast names present in the panel (use the bible's exact " | |
| "names so their look stays consistent).\n\n" | |
| "Keep continuity with the recap and the synopsis. Output STRICT JSON ONLY:\n" | |
| '{ "panels": [ {"page": N, "panel": 1, "scene": "...", "caption": "...", ' | |
| '"characters": ["..."]} , ... ] }' | |
| ) | |
| def _bible_brief(bible: ComicBible) -> str: | |
| cast = "\n".join(f" - {c.name}: {c.appearance}" for c in bible.characters) | |
| synopsis = "\n".join(f" Page {p.page}: {p.synopsis}" for p in bible.pages) | |
| return ( | |
| f"TITLE: {bible.title}\n" | |
| f"LOGLINE: {bible.logline}\n" | |
| f"ART STYLE: {bible.art_style}\n" | |
| f"PALETTE: {bible.palette}\n" | |
| f"CAST (fixed appearances):\n{cast}\n" | |
| f"FULL {PAGES}-PAGE SYNOPSIS:\n{synopsis}" | |
| ) | |
| def build_panel_messages(bible: ComicBible, pages: List[int], recap: str) -> list: | |
| page_lines = "\n".join( | |
| f" Page {n}: {bible.pages[n - 1].synopsis}" for n in pages | |
| ) | |
| recap_block = recap.strip() or "(this is the opening β nothing has happened yet)" | |
| user = ( | |
| f"{_bible_brief(bible)}\n\n" | |
| f"STORY SO FAR (panels already written):\n{recap_block}\n\n" | |
| f"NOW WRITE the {PANELS_PER_PAGE} panels for EACH of these pages, in order:\n" | |
| f"{page_lines}\n\n" | |
| f"Return exactly {len(pages) * PANELS_PER_PAGE} panels as strict JSON." | |
| ) | |
| return [ | |
| {"role": "system", "content": PANEL_SYSTEM}, | |
| {"role": "user", "content": user}, | |
| ] | |
| def parse_panels(reply: str, pages: List[int]) -> List[Panel]: | |
| """Parse a panel-batch reply into Panels, coercing to the expected page/panel grid. | |
| Defensive: if the model returns the wrong count or scrambled page/panel numbers, | |
| we slot the panels into the expected (page, panel) order so the comic stays whole. | |
| """ | |
| obj = extract_json(reply) | |
| raw = obj.get("panels") | |
| if not isinstance(raw, list): | |
| raw = [obj] # tolerate a bare single panel object | |
| expected = [(pg, pn) for pg in pages for pn in range(1, PANELS_PER_PAGE + 1)] | |
| panels: List[Panel] = [] | |
| for slot, item in zip(expected, raw): | |
| if not isinstance(item, dict): | |
| continue | |
| p = Panel.from_dict(item, default_page=slot[0], default_panel=slot[1]) | |
| # Force onto the expected grid slot β trust position over the model's numbering. | |
| p.page, p.panel = slot | |
| panels.append(p) | |
| return panels | |
| def batches(pages_per_batch: int = PANEL_BATCH_PAGES) -> List[List[int]]: | |
| """Page-number batches covering 1..PAGES, e.g. [[1,2],[3,4],...,[9,10]].""" | |
| out = [] | |
| for start in range(1, PAGES + 1, pages_per_batch): | |
| out.append(list(range(start, min(start + pages_per_batch, PAGES + 1)))) | |
| return out | |
| def recap_from_panels(panels: List[Panel], last: int = 16) -> str: | |
| """A compact running summary fed back as continuity context for the next batch. | |
| Only the most recent `last` panels are included β the full-arc page synopsis is | |
| always in the prompt, so this just needs the immediate lead-in. Keeps prompts lean | |
| and fast across a 50-panel comic. | |
| """ | |
| ordered = sorted(panels, key=lambda x: x.index)[-last:] | |
| lines = [] | |
| for p in ordered: | |
| cap = p.caption.replace("\n", " ").strip() | |
| if len(cap) > 160: | |
| cap = cap[:157] + "..." | |
| lines.append(f" Page {p.page} panel {p.panel}: {cap}") | |
| return "\n".join(lines) | |