anime-gen-api / app /utils /prompt_builder.py
AswinMathew's picture
Hybrid pollen-paced video gen, BYOP, background consistency, browser notifications
c4dfe24 verified
"""Build prompts for image generation with character consistency."""
# Fallback defaults keyed by (role, gender) β€” last safety net so "unknown" never reaches image gen
MANHWA_DEFAULTS = {
("protagonist", "male"): {
"hair_color": "jet black", "hair_style": "short messy with side-swept bangs",
"eye_color": "deep crimson", "skin_tone": "fair", "build": "athletic",
"clothing": "dark fitted jacket over white shirt, black pants",
},
("protagonist", "female"): {
"hair_color": "dark chestnut brown", "hair_style": "long flowing with gentle waves",
"eye_color": "bright amber", "skin_tone": "warm ivory", "build": "slim",
"clothing": "elegant dark blazer with pleated skirt, ribbon tie",
},
("antagonist", "male"): {
"hair_color": "silver white", "hair_style": "slicked back undercut",
"eye_color": "cold ice blue", "skin_tone": "pale ivory", "build": "tall athletic",
"clothing": "long black coat with high collar, dark gloves",
},
("antagonist", "female"): {
"hair_color": "deep violet", "hair_style": "long straight with sharp bangs",
"eye_color": "piercing gold", "skin_tone": "porcelain", "build": "slim tall",
"clothing": "form-fitting dark dress with ornate accessories",
},
("supporting", "male"): {
"hair_color": "warm brown", "hair_style": "medium length tousled",
"eye_color": "forest green", "skin_tone": "warm tan", "build": "average",
"clothing": "casual layered outfit, open jacket over graphic tee",
},
("supporting", "female"): {
"hair_color": "honey blonde", "hair_style": "shoulder-length with side part",
"eye_color": "soft blue", "skin_tone": "light peach", "build": "petite",
"clothing": "light cardigan over blouse, simple accessories",
},
("minor", "male"): {
"hair_color": "dark brown", "hair_style": "short neat",
"eye_color": "dark brown", "skin_tone": "medium", "build": "average",
"clothing": "plain dark shirt, simple pants",
},
("minor", "female"): {
"hair_color": "auburn", "hair_style": "tied in a ponytail",
"eye_color": "hazel", "skin_tone": "fair", "build": "average",
"clothing": "simple blouse and skirt",
},
}
# Default fallback for any role/gender combo not in the map
_GENERIC_DEFAULT = {
"hair_color": "dark", "hair_style": "medium length",
"eye_color": "dark brown", "skin_tone": "medium", "build": "average",
"clothing": "casual dark clothing",
}
# Art style constants
MANHWA_STYLE_PREFIX = (
"manhwa style, korean webtoon art, sharp detailed lineart, "
"cel-shaded coloring, vibrant saturated colors, dramatic lighting with strong contrast, "
"clean digital art, detailed backgrounds"
)
NEGATIVE_GUIDANCE = ", no 3d render, no photorealistic, no blurry, no deformed hands, no extra fingers"
def _str(val) -> str:
"""Coerce a value to string. Joins lists with ', '."""
if isinstance(val, list):
return ", ".join(str(v) for v in val)
return str(val)
def _get_field(char_data: dict, field: str, role: str = "supporting", gender: str = "male") -> str:
"""Get a character field, falling back to MANHWA_DEFAULTS if missing/unknown."""
val = char_data.get(field, "")
if isinstance(val, str):
val = val.strip()
if not val or val.lower() in ("unknown", "none", "n/a", "not described", "not mentioned"):
defaults = MANHWA_DEFAULTS.get((role, gender), _GENERIC_DEFAULT)
return defaults.get(field, _GENERIC_DEFAULT.get(field, ""))
return _str(val)
def build_character_visual_prompt(character_data: dict) -> str:
"""Build a 50-80 word prompt-ready visual description from character data."""
parts = []
role = character_data.get("role", "supporting")
gender = character_data.get("gender", "male")
# Core identity
age = character_data.get("age_category", "young_adult")
age_map = {
"child": "young child",
"teen": "teenage",
"young_adult": "young adult",
"adult": "adult",
"elder": "elderly",
}
parts.append(f"{age_map.get(age, 'young adult')} {gender}")
# Detailed appearance β€” always filled, never "unknown"
hair_color = _get_field(character_data, "hair_color", role, gender)
hair_style = _get_field(character_data, "hair_style", role, gender)
eye_color = _get_field(character_data, "eye_color", role, gender)
skin_tone = _get_field(character_data, "skin_tone", role, gender)
parts.append(f"{hair_color} {hair_style} hair")
parts.append(f"{eye_color} eyes")
parts.append(f"{skin_tone} skin")
# Legacy fields fallback (if old extraction format used)
if not character_data.get("hair_color") and character_data.get("hair"):
hair_val = _str(character_data["hair"])
if hair_val.lower() not in ("unknown", "none"):
parts[-3] = f"{hair_val} hair" # override
if not character_data.get("eye_color") and character_data.get("eyes"):
eyes_val = _str(character_data["eyes"])
if eyes_val.lower() not in ("unknown", "none"):
parts[-2] = f"{eyes_val} eyes" # override
# Build
build = _get_field(character_data, "build", role, gender)
parts.append(f"{build} build")
# Clothing
clothing = _get_field(character_data, "clothing", role, gender)
parts.append(f"wearing {clothing}")
# Distinctive features
if character_data.get("distinctive_features"):
feat = _str(character_data["distinctive_features"])
if feat.lower() not in ("unknown", "none", "n/a"):
parts.append(feat)
return ", ".join(parts)
def _build_identity_anchor(visual_prompt: str) -> str:
"""Extract identity-only details from a full visual prompt.
Keeps: hair, eyes, skin, clothing descriptors.
Drops: expression, pose, build, age/gender β€” those are cut-specific
and controlled by the storyboard's image_prompt.
"""
parts = [p.strip() for p in visual_prompt.split(",")]
kept = []
for part in parts:
lower = part.lower().strip()
# Skip age/gender identifiers
if any(x in lower for x in [
"young adult", "teenage", "adult male", "adult female",
"young child", "elderly", "male", "female",
]):
continue
# Skip build
if "build" in lower:
continue
# Skip expression/pose β€” the image_prompt controls those per-cut
if "expression" in lower:
continue
kept.append(part.strip())
return ", ".join(kept) if kept else visual_prompt
def build_image_prompt(
cut: dict,
characters: dict[str, str], # name -> visual_prompt
style_prefix: str = MANHWA_STYLE_PREFIX,
) -> str:
"""Build a complete image generation prompt for a cut/shot.
Layer order (earlier = higher priority for image models):
1. Style prefix β€” locks the art style
2. Shot type β€” framing cue
3. image_prompt β€” COMPOSITION MASTER from the storyboard director.
This controls pose, camera angle, lighting, expression, background.
It is self-contained (describes everything the image AI needs).
4. Character identity anchors β€” MINIMAL reinforcement for consistency.
Only hair/eyes/skin/clothing. No expression or pose (those are
already in the image_prompt and would conflict).
5. Effects hints
6. Negative guidance
"""
parts = [style_prefix]
# --- Layer 2: Shot type ---
shot_type = cut.get("shot_type", "medium")
type_map = {
"establishing": "wide establishing shot",
"wide": "wide shot",
"medium": "medium shot",
"medium_wide": "medium wide shot",
"medium_close_up": "medium close-up shot",
"close_up": "close-up shot",
"extreme_close_up": "extreme close-up",
"over_shoulder": "over-the-shoulder shot",
"reaction": "reaction shot",
"pov": "first-person POV",
"action": "dynamic action shot",
"dutch_angle": "Dutch angle tilted shot",
"low_angle": "low angle shot looking up",
"high_angle": "high angle shot looking down",
"birds_eye": "bird's eye view from above",
}
# Robust fallback: convert underscores to spaces for unknown types
parts.append(type_map.get(shot_type, shot_type.replace("_", " ") + " shot"))
# --- Layer 3: image_prompt FIRST (composition master) ---
# This is the director's full visual brief for this cut.
# It already describes character appearance, pose, expression, lighting, etc.
if cut.get("image_prompt"):
parts.append(cut["image_prompt"])
# --- Layer 3b: Scene setting anchor (background consistency) ---
# Ensures all cuts within the same scene share the same background.
# The scene setting acts like a character identity anchor but for locations.
scene_setting = cut.get("_scene_setting", "")
if scene_setting:
parts.append(f"[background: {scene_setting}]")
# --- Layer 4: Character identity anchors (consistency reinforcement) ---
# Minimal identity details only β€” hair, eyes, skin, clothing.
# No expression/pose/build (those are controlled by the image_prompt above).
chars_present = cut.get("characters_present", [])
for char_name in chars_present:
if char_name in characters:
anchor = _build_identity_anchor(characters[char_name])
parts.append(f"({anchor})")
# --- Layer 5: Effects hints ---
effects = cut.get("effects", [])
effect_map = {
"rain": "rain",
"snow": "snow",
"glow": "glowing effects",
"lens_flare": "lens flare",
"bloom": "bloom lighting",
"dust": "floating dust particles",
"sparkle": "sparkle effects",
"vignette": "vignette darkened edges",
"impact_flash": "bright impact flash",
"screen_shake": "dynamic motion blur",
"speed_lines": "speed lines",
"chromatic_aberration": "chromatic aberration",
}
for effect in effects:
if effect in effect_map:
parts.append(effect_map[effect])
# --- Layer 6: Negative guidance ---
return ", ".join(parts) + NEGATIVE_GUIDANCE