"""Gemma prompting + parsing for the comic writer. Two prompt families, both demanding STRICT JSON so parsing is reliable: build_bible_messages(idea) -> Gemma call #1: safety gate + story bible. build_panel_messages(bible, pages..) -> Gemma calls #2..N: the actual panels for a small batch of pages, given the bible and a recap of the story so far (continuity). This module is backend-agnostic: it only builds message lists and parses replies. The engine owns the model calls. Robust JSON extraction tolerates a stray ```json fence, leftover blocks, or prose around the object (belt-and-braces on top of vLLM's --reasoning-parser, which already strips the thinking channel). """ from __future__ import annotations import json import re from typing import List, Optional from .schema import ( ComicBible, Panel, PageSynopsis, PAGES, PANELS_PER_PAGE, ) # How many pages to script per panel call. 5 pages = 10 panels/call -> 5 calls for a # 25-page (50-panel) comic. Small enough that each JSON reply stays well within the # token budget (even with thinking) and the model keeps full continuity context; big # enough to keep round-trips down. PANEL_BATCH_PAGES = 5 # ── JSON extraction ────────────────────────────────────────────────────────── _THINK_RE = re.compile(r".*?", re.DOTALL | re.IGNORECASE) _FENCE_RE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE) def _first_json_object(text: str) -> Optional[str]: """Return the first balanced {...} object in `text` (string/escape aware).""" start = text.find("{") if start < 0: return None depth = 0 in_str = False esc = False for i in range(start, len(text)): c = text[i] if in_str: if esc: esc = False elif c == "\\": esc = True elif c == '"': in_str = False continue if c == '"': in_str = True elif c == "{": depth += 1 elif c == "}": depth -= 1 if depth == 0: return text[start:i + 1] return None def extract_json(text: str) -> dict: """Parse the model reply into a dict, tolerating fences/prose/thinking leftovers. Raises ValueError if nothing JSON-like is found, so callers can surface a clear error (and retry) rather than silently producing an empty comic. """ if not text or not text.strip(): raise ValueError("empty model reply") cleaned = _THINK_RE.sub("", text).strip() # Prefer a fenced block if present, else the raw text. candidates = [] m = _FENCE_RE.search(cleaned) if m: candidates.append(m.group(1)) candidates.append(cleaned) for cand in candidates: for blob in (cand, _first_json_object(cand)): if not blob: continue try: obj = json.loads(blob) if isinstance(obj, dict): return obj except json.JSONDecodeError: continue raise ValueError("no JSON object found in model reply") # ── Call #1: gatekeeper + story bible ──────────────────────────────────────── BIBLE_SYSTEM = ( "You are a professional comic-book writer and art director. From a reader's " "request you design a complete comic of exactly " f"{PAGES} pages, {PANELS_PER_PAGE} panels per page ({PAGES * PANELS_PER_PAGE} " "panels total). You are ALSO the content gatekeeper.\n\n" "SAFETY FIRST. Refuse (approved=false) only if the request asks for: sexual " "content involving minors; real, named people in sexual or defamatory scenes; " "extreme gore or cruelty for shock value; hateful or harassing content toward a " "protected group; or instructions that enable real-world harm. Ordinary fictional " "adventure, action, peril, rivalry, mystery, romance, horror and comedy ARE allowed. " "When you refuse, give one short, polite sentence and leave the other fields empty.\n\n" "If approved, design a story BIBLE:\n" "- a punchy title and a one-sentence logline;\n" "- a FIXED cast of 1 to 4 main characters. Each gets a name and a single vivid, " "concrete VISUAL description (species/build, age, hair, face, signature clothing, " "colors, props) of about 25-40 words. This description is reused verbatim in every " "image, so it must be self-contained and unambiguous;\n" "- one global art_style line (medium, linework, shading) and one palette line, both " "constant for the whole comic;\n" f"- a {PAGES}-page synopsis: one vivid sentence per page, together forming a full arc " "(setup, rising action, midpoint turn, climax, resolution).\n\n" "Output STRICT JSON ONLY — no markdown, no commentary. Schema:\n" "{\n" ' "approved": true,\n' ' "refusal_reason": "",\n' ' "title": "...",\n' ' "logline": "...",\n' ' "art_style": "...",\n' ' "palette": "...",\n' ' "characters": [ {"name": "...", "appearance": "..."} ],\n' f' "pages": [ {{"page": 1, "synopsis": "..."}} ... exactly {PAGES} items ]\n' "}" ) def build_bible_messages(idea: str) -> list: user = ( f"Reader's request:\n\"\"\"\n{idea.strip()}\n\"\"\"\n\n" f"Decide if it is allowed, then (if allowed) design the full {PAGES}-page comic " "bible. Remember: character appearance descriptions are reused verbatim in every " "panel image, so make them detailed and consistent. Output strict JSON only." ) return [ {"role": "system", "content": BIBLE_SYSTEM}, {"role": "user", "content": user}, ] def parse_bible(reply: str) -> ComicBible: """Parse call #1. On approval, pad/truncate pages to exactly PAGES entries.""" bible = ComicBible.from_dict(extract_json(reply)) if bible.approved: # Normalise to exactly PAGES synopses so downstream batching is clean. pages = bible.pages[:PAGES] while len(pages) < PAGES: pages.append(PageSynopsis(page=len(pages) + 1, synopsis="")) for i, p in enumerate(pages, start=1): p.page = i bible.pages = pages return bible # ── Calls #2..N: panel script for a batch of pages ─────────────────────────── PANEL_SYSTEM = ( "You are the same comic-book writer, now scripting individual panels. You are given " "the story bible (title, fixed cast with appearances, art style, palette, and the " "full page-by-page synopsis) and a recap of the panels written so far. You write the " f"{PANELS_PER_PAGE} panels for each requested page, continuing the story coherently.\n\n" "For every panel produce:\n" "- scene: a purely VISUAL description of what we see in the frame — camera/shot, which " "named characters are present and what they are doing, setting and mood. No dialogue " "or words that should appear AS text in the image (the image must be text-free).\n" "- caption: the reader-facing text shown UNDER the panel: 1-2 short sentences of " "narration, optionally one short line of spoken dialogue in quotes. This carries the " "story between images.\n" "- characters: the list of cast names present in the panel (use the bible's exact " "names so their look stays consistent).\n\n" "Keep continuity with the recap and the synopsis. Output STRICT JSON ONLY:\n" '{ "panels": [ {"page": N, "panel": 1, "scene": "...", "caption": "...", ' '"characters": ["..."]} , ... ] }' ) def _bible_brief(bible: ComicBible) -> str: cast = "\n".join(f" - {c.name}: {c.appearance}" for c in bible.characters) synopsis = "\n".join(f" Page {p.page}: {p.synopsis}" for p in bible.pages) return ( f"TITLE: {bible.title}\n" f"LOGLINE: {bible.logline}\n" f"ART STYLE: {bible.art_style}\n" f"PALETTE: {bible.palette}\n" f"CAST (fixed appearances):\n{cast}\n" f"FULL {PAGES}-PAGE SYNOPSIS:\n{synopsis}" ) def build_panel_messages(bible: ComicBible, pages: List[int], recap: str) -> list: page_lines = "\n".join( f" Page {n}: {bible.pages[n - 1].synopsis}" for n in pages ) recap_block = recap.strip() or "(this is the opening — nothing has happened yet)" user = ( f"{_bible_brief(bible)}\n\n" f"STORY SO FAR (panels already written):\n{recap_block}\n\n" f"NOW WRITE the {PANELS_PER_PAGE} panels for EACH of these pages, in order:\n" f"{page_lines}\n\n" f"Return exactly {len(pages) * PANELS_PER_PAGE} panels as strict JSON." ) return [ {"role": "system", "content": PANEL_SYSTEM}, {"role": "user", "content": user}, ] def parse_panels(reply: str, pages: List[int]) -> List[Panel]: """Parse a panel-batch reply into Panels, coercing to the expected page/panel grid. Defensive: if the model returns the wrong count or scrambled page/panel numbers, we slot the panels into the expected (page, panel) order so the comic stays whole. """ obj = extract_json(reply) raw = obj.get("panels") if not isinstance(raw, list): raw = [obj] # tolerate a bare single panel object expected = [(pg, pn) for pg in pages for pn in range(1, PANELS_PER_PAGE + 1)] panels: List[Panel] = [] for slot, item in zip(expected, raw): if not isinstance(item, dict): continue p = Panel.from_dict(item, default_page=slot[0], default_panel=slot[1]) # Force onto the expected grid slot — trust position over the model's numbering. p.page, p.panel = slot panels.append(p) return panels def batches(pages_per_batch: int = PANEL_BATCH_PAGES) -> List[List[int]]: """Page-number batches covering 1..PAGES, e.g. [[1,2],[3,4],...,[9,10]].""" out = [] for start in range(1, PAGES + 1, pages_per_batch): out.append(list(range(start, min(start + pages_per_batch, PAGES + 1)))) return out def recap_from_panels(panels: List[Panel], last: int = 16) -> str: """A compact running summary fed back as continuity context for the next batch. Only the most recent `last` panels are included — the full-arc page synopsis is always in the prompt, so this just needs the immediate lead-in. Keeps prompts lean and fast across a 50-panel comic. """ ordered = sorted(panels, key=lambda x: x.index)[-last:] lines = [] for p in ordered: cap = p.caption.replace("\n", " ").strip() if len(cap) > 160: cap = cap[:157] + "..." lines.append(f" Page {p.page} panel {p.panel}: {cap}") return "\n".join(lines)