comicx / comic /writer.py
ASTRALK's picture
Upload comic/writer.py with huggingface_hub
e3fafda verified
Raw
History Blame Contribute Delete
11 kB
"""Gemma prompting + parsing for the comic writer.
Two prompt families, both demanding STRICT JSON so parsing is reliable:
build_bible_messages(idea) -> Gemma call #1: safety gate + story bible.
build_panel_messages(bible, pages..) -> Gemma calls #2..N: the actual panels for a
small batch of pages, given the bible and a
recap of the story so far (continuity).
This module is backend-agnostic: it only builds message lists and parses replies.
The engine owns the model calls. Robust JSON extraction tolerates a stray ```json
fence, leftover <think> blocks, or prose around the object (belt-and-braces on top of
vLLM's --reasoning-parser, which already strips the thinking channel).
"""
from __future__ import annotations
import json
import re
from typing import List, Optional
from .schema import (
ComicBible, Panel, PageSynopsis, PAGES, PANELS_PER_PAGE,
)
# How many pages to script per panel call. 5 pages = 10 panels/call -> 5 calls for a
# 25-page (50-panel) comic. Small enough that each JSON reply stays well within the
# token budget (even with thinking) and the model keeps full continuity context; big
# enough to keep round-trips down.
PANEL_BATCH_PAGES = 5
# ── JSON extraction ──────────────────────────────────────────────────────────
_THINK_RE = re.compile(r"<think>.*?</think>", re.DOTALL | re.IGNORECASE)
_FENCE_RE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
def _first_json_object(text: str) -> Optional[str]:
"""Return the first balanced {...} object in `text` (string/escape aware)."""
start = text.find("{")
if start < 0:
return None
depth = 0
in_str = False
esc = False
for i in range(start, len(text)):
c = text[i]
if in_str:
if esc:
esc = False
elif c == "\\":
esc = True
elif c == '"':
in_str = False
continue
if c == '"':
in_str = True
elif c == "{":
depth += 1
elif c == "}":
depth -= 1
if depth == 0:
return text[start:i + 1]
return None
def extract_json(text: str) -> dict:
"""Parse the model reply into a dict, tolerating fences/prose/thinking leftovers.
Raises ValueError if nothing JSON-like is found, so callers can surface a clear
error (and retry) rather than silently producing an empty comic.
"""
if not text or not text.strip():
raise ValueError("empty model reply")
cleaned = _THINK_RE.sub("", text).strip()
# Prefer a fenced block if present, else the raw text.
candidates = []
m = _FENCE_RE.search(cleaned)
if m:
candidates.append(m.group(1))
candidates.append(cleaned)
for cand in candidates:
for blob in (cand, _first_json_object(cand)):
if not blob:
continue
try:
obj = json.loads(blob)
if isinstance(obj, dict):
return obj
except json.JSONDecodeError:
continue
raise ValueError("no JSON object found in model reply")
# ── Call #1: gatekeeper + story bible ────────────────────────────────────────
BIBLE_SYSTEM = (
"You are a professional comic-book writer and art director. From a reader's "
"request you design a complete comic of exactly "
f"{PAGES} pages, {PANELS_PER_PAGE} panels per page ({PAGES * PANELS_PER_PAGE} "
"panels total). You are ALSO the content gatekeeper.\n\n"
"SAFETY FIRST. Refuse (approved=false) only if the request asks for: sexual "
"content involving minors; real, named people in sexual or defamatory scenes; "
"extreme gore or cruelty for shock value; hateful or harassing content toward a "
"protected group; or instructions that enable real-world harm. Ordinary fictional "
"adventure, action, peril, rivalry, mystery, romance, horror and comedy ARE allowed. "
"When you refuse, give one short, polite sentence and leave the other fields empty.\n\n"
"If approved, design a story BIBLE:\n"
"- a punchy title and a one-sentence logline;\n"
"- a FIXED cast of 1 to 4 main characters. Each gets a name and a single vivid, "
"concrete VISUAL description (species/build, age, hair, face, signature clothing, "
"colors, props) of about 25-40 words. This description is reused verbatim in every "
"image, so it must be self-contained and unambiguous;\n"
"- one global art_style line (medium, linework, shading) and one palette line, both "
"constant for the whole comic;\n"
f"- a {PAGES}-page synopsis: one vivid sentence per page, together forming a full arc "
"(setup, rising action, midpoint turn, climax, resolution).\n\n"
"Output STRICT JSON ONLY β€” no markdown, no commentary. Schema:\n"
"{\n"
' "approved": true,\n'
' "refusal_reason": "",\n'
' "title": "...",\n'
' "logline": "...",\n'
' "art_style": "...",\n'
' "palette": "...",\n'
' "characters": [ {"name": "...", "appearance": "..."} ],\n'
f' "pages": [ {{"page": 1, "synopsis": "..."}} ... exactly {PAGES} items ]\n'
"}"
)
def build_bible_messages(idea: str) -> list:
user = (
f"Reader's request:\n\"\"\"\n{idea.strip()}\n\"\"\"\n\n"
f"Decide if it is allowed, then (if allowed) design the full {PAGES}-page comic "
"bible. Remember: character appearance descriptions are reused verbatim in every "
"panel image, so make them detailed and consistent. Output strict JSON only."
)
return [
{"role": "system", "content": BIBLE_SYSTEM},
{"role": "user", "content": user},
]
def parse_bible(reply: str) -> ComicBible:
"""Parse call #1. On approval, pad/truncate pages to exactly PAGES entries."""
bible = ComicBible.from_dict(extract_json(reply))
if bible.approved:
# Normalise to exactly PAGES synopses so downstream batching is clean.
pages = bible.pages[:PAGES]
while len(pages) < PAGES:
pages.append(PageSynopsis(page=len(pages) + 1, synopsis=""))
for i, p in enumerate(pages, start=1):
p.page = i
bible.pages = pages
return bible
# ── Calls #2..N: panel script for a batch of pages ───────────────────────────
PANEL_SYSTEM = (
"You are the same comic-book writer, now scripting individual panels. You are given "
"the story bible (title, fixed cast with appearances, art style, palette, and the "
"full page-by-page synopsis) and a recap of the panels written so far. You write the "
f"{PANELS_PER_PAGE} panels for each requested page, continuing the story coherently.\n\n"
"For every panel produce:\n"
"- scene: a purely VISUAL description of what we see in the frame β€” camera/shot, which "
"named characters are present and what they are doing, setting and mood. No dialogue "
"or words that should appear AS text in the image (the image must be text-free).\n"
"- caption: the reader-facing text shown UNDER the panel: 1-2 short sentences of "
"narration, optionally one short line of spoken dialogue in quotes. This carries the "
"story between images.\n"
"- characters: the list of cast names present in the panel (use the bible's exact "
"names so their look stays consistent).\n\n"
"Keep continuity with the recap and the synopsis. Output STRICT JSON ONLY:\n"
'{ "panels": [ {"page": N, "panel": 1, "scene": "...", "caption": "...", '
'"characters": ["..."]} , ... ] }'
)
def _bible_brief(bible: ComicBible) -> str:
cast = "\n".join(f" - {c.name}: {c.appearance}" for c in bible.characters)
synopsis = "\n".join(f" Page {p.page}: {p.synopsis}" for p in bible.pages)
return (
f"TITLE: {bible.title}\n"
f"LOGLINE: {bible.logline}\n"
f"ART STYLE: {bible.art_style}\n"
f"PALETTE: {bible.palette}\n"
f"CAST (fixed appearances):\n{cast}\n"
f"FULL {PAGES}-PAGE SYNOPSIS:\n{synopsis}"
)
def build_panel_messages(bible: ComicBible, pages: List[int], recap: str) -> list:
page_lines = "\n".join(
f" Page {n}: {bible.pages[n - 1].synopsis}" for n in pages
)
recap_block = recap.strip() or "(this is the opening β€” nothing has happened yet)"
user = (
f"{_bible_brief(bible)}\n\n"
f"STORY SO FAR (panels already written):\n{recap_block}\n\n"
f"NOW WRITE the {PANELS_PER_PAGE} panels for EACH of these pages, in order:\n"
f"{page_lines}\n\n"
f"Return exactly {len(pages) * PANELS_PER_PAGE} panels as strict JSON."
)
return [
{"role": "system", "content": PANEL_SYSTEM},
{"role": "user", "content": user},
]
def parse_panels(reply: str, pages: List[int]) -> List[Panel]:
"""Parse a panel-batch reply into Panels, coercing to the expected page/panel grid.
Defensive: if the model returns the wrong count or scrambled page/panel numbers,
we slot the panels into the expected (page, panel) order so the comic stays whole.
"""
obj = extract_json(reply)
raw = obj.get("panels")
if not isinstance(raw, list):
raw = [obj] # tolerate a bare single panel object
expected = [(pg, pn) for pg in pages for pn in range(1, PANELS_PER_PAGE + 1)]
panels: List[Panel] = []
for slot, item in zip(expected, raw):
if not isinstance(item, dict):
continue
p = Panel.from_dict(item, default_page=slot[0], default_panel=slot[1])
# Force onto the expected grid slot β€” trust position over the model's numbering.
p.page, p.panel = slot
panels.append(p)
return panels
def batches(pages_per_batch: int = PANEL_BATCH_PAGES) -> List[List[int]]:
"""Page-number batches covering 1..PAGES, e.g. [[1,2],[3,4],...,[9,10]]."""
out = []
for start in range(1, PAGES + 1, pages_per_batch):
out.append(list(range(start, min(start + pages_per_batch, PAGES + 1))))
return out
def recap_from_panels(panels: List[Panel], last: int = 16) -> str:
"""A compact running summary fed back as continuity context for the next batch.
Only the most recent `last` panels are included β€” the full-arc page synopsis is
always in the prompt, so this just needs the immediate lead-in. Keeps prompts lean
and fast across a 50-panel comic.
"""
ordered = sorted(panels, key=lambda x: x.index)[-last:]
lines = []
for p in ordered:
cap = p.caption.replace("\n", " ").strip()
if len(cap) > 160:
cap = cap[:157] + "..."
lines.append(f" Page {p.page} panel {p.panel}: {cap}")
return "\n".join(lines)