composer / composer_dynamic.py
factorstudios's picture
Upload 9 files
735a97b verified
Raw
History Blame Contribute Delete
22 kB
"""
Dynamic Video Composer - Generates SCENE_CONFIG from Manifest
Converts manifest labels to uppercase and creates video without hardcoded config
"""
import os
import numpy as np
import cv2
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont, ImageEnhance, ImageOps
from typing import Dict, List, Any
# ---------------------------------------------------------------------------
# GLOBALS
# ---------------------------------------------------------------------------
RESOLUTION = (1080, 1920) # W x H
FPS = 30
SELECTED_DIR = "selected"
FONT_PATH = "asset/TR Impact.TTF"
FONT_PATH_REG = "asset/TR Impact.TTF"
GLOBAL_TEMP = +8
GLOBAL_TINT = -5
TEXT_WHITE = (255, 255, 255, 255)
TEXT_STROKE = (0, 0, 0, 210)
TEXT_SHADOW = (0, 0, 0, 60)
STROKE_W = 8
# Easing
def ease_out(t): return 1 - (1 - t) ** 3
def ease_in_out(t): return t * t * (3 - 2 * t)
def ease_in(t): return t * t * t
def lerp(a, b, t): return a + (b - a) * t
# ---------------------------------------------------------------------------
# DYNAMIC SCENE CONFIG GENERATION
# ---------------------------------------------------------------------------
# Default motion, text, grade, transition configs per scene type
DEFAULT_SCENE_TEMPLATES = {
"intro": {
"motion": {"type": "slow_push_in", "scale_start": 1.0, "scale_end": 1.08},
"text": {"type": "center_stroke_pop", "entry_frame": 2, "font_size": 95},
"grade": {"crush_blacks": 15, "contrast": 1.15},
"transition": {"type": "hard_cut", "frames": 1},
"duration_s": 4.7,
},
"default": {
"motion": {"type": "snap_zoom", "scale_start": 1.0, "scale_end": 1.12},
"text": {"type": "center_pop", "entry_frame": 0, "font_size": 110},
"grade": {"warm_tint": True, "lift_mids": 10},
"transition": {"type": "whip_pan_right", "frames": 4},
"duration_s": 2.3,
},
"final": {
"motion": {"type": "static"},
"text": {"type": "center_fade_pop", "entry_frame": 2, "font_size": 110},
"grade": {"warm_indoor": True, "soft_glow": True, "lift_mids": 12},
"transition": {"type": "end_fade_black", "frames": 30},
"duration_s": 2.3,
}
}
# Grade variations for different scenes
GRADE_VARIATIONS = [
{"warm_tint": True, "lift_mids": 10}, # Scene 1: Warm
{"desaturate": True, "lift_blacks": 5}, # Scene 2: Desaturated
{"cool_tint": True, "highlights": -15}, # Scene 3: Cool
{"soft_pink": True, "lift_mids": 15}, # Scene 4: Pink
{"indoor_warm": True, "lift_shadows": 8}, # Scene 5: Warm indoor
{"teal_orange": True, "crush_blacks": 10}, # Scene 6: Teal/orange
{"dark_moody": True, "crush_blacks": 20, "desaturate": 15}, # Scene 7: Moody
]
def generate_scene_config(manifest: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Generate SCENE_CONFIG dynamically from manifest.
Converts labels to uppercase and assigns motion/grade/transition configs.
"""
scenes = manifest.get("scenes", [])
config = []
for idx, scene_data in enumerate(scenes):
# Extract label and convert to uppercase
label = scene_data.get("label", f"SCENE {idx}").upper()
# Determine scene type
if idx == 0:
template = DEFAULT_SCENE_TEMPLATES["intro"]
elif idx == len(scenes) - 1:
template = DEFAULT_SCENE_TEMPLATES["final"]
else:
template = DEFAULT_SCENE_TEMPLATES["default"]
# Calculate hold_frames based on duration
duration_s = template.get("duration_s", 2.3)
total_frames = int(duration_s * FPS)
# Build scene config
scene_cfg = {
"idx": idx,
"label": label,
"duration_s": duration_s,
"motion": template["motion"].copy(),
"text": {
**template["text"],
"hold_frames": total_frames - template["text"].get("entry_frame", 2) - 6,
"align": "center"
},
"grade": GRADE_VARIATIONS[idx % len(GRADE_VARIATIONS)].copy() if idx > 0 else template["grade"].copy(),
"transition": template["transition"].copy(),
}
config.append(scene_cfg)
return config
# ---------------------------------------------------------------------------
# COLOUR GRADE
# ---------------------------------------------------------------------------
def grade_image(img: Image.Image, grade: dict) -> Image.Image:
r, g, b = img.split()
# Global warm grade
r = r.point(lambda p: min(255, p + int(GLOBAL_TEMP * 1.2)))
g = g.point(lambda p: min(255, p + int(GLOBAL_TEMP * 0.35)))
b = b.point(lambda p: max(0, p - int(GLOBAL_TEMP * 0.8)))
g = g.point(lambda p: max(0, min(255, p + int(GLOBAL_TINT * 0.5))))
# Scene-specific
if grade.get("boost_reds"):
v = grade["boost_reds"]
r = r.point(lambda p: min(255, p + v))
g = g.point(lambda p: min(255, p + int(v * 0.25)))
if grade.get("crush_blacks"):
v = grade.get("crush_blacks", 10)
r = r.point(lambda p: max(0, p - v) if p < 55 else p)
g = g.point(lambda p: max(0, p - v) if p < 55 else p)
b = b.point(lambda p: max(0, p - v) if p < 55 else p)
if grade.get("contrast"):
v = grade["contrast"]
r = r.point(lambda p: int((p - 128) * v + 128))
g = g.point(lambda p: int((p - 128) * v + 128))
b = b.point(lambda p: int((p - 128) * v + 128))
if grade.get("lift_shadows"):
v = grade.get("lift_shadows", 0)
r = r.point(lambda p: min(255, p + v))
g = g.point(lambda p: min(255, p + v))
b = b.point(lambda p: min(255, p + v))
if grade.get("warm_tint"):
r = r.point(lambda p: min(255, p + 8))
g = g.point(lambda p: min(255, p + 3))
if grade.get("cool_tint"):
b = b.point(lambda p: min(255, p + 8))
r = r.point(lambda p: max(0, p - 5))
if grade.get("desaturate"):
v = grade.get("desaturate", 10)
merged = Image.merge("RGB", (r, g, b))
merged = ImageEnhance.Color(merged).enhance(max(0, 1.0 - v/100.0))
r, g, b = merged.split()
if grade.get("lift_blacks"):
v = grade["lift_blacks"]
r = r.point(lambda p: min(255, p + v))
g = g.point(lambda p: min(255, p + v))
b = b.point(lambda p: min(255, p + v))
if grade.get("lift_mids"):
v = grade["lift_mids"]
r = r.point(lambda p: int(p + v * (1 - abs(p - 128) / 128)))
g = g.point(lambda p: int(p + v * (1 - abs(p - 128) / 128)))
b = b.point(lambda p: int(p + v * (1 - abs(p - 128) / 128)))
if grade.get("highlights"):
v = abs(grade["highlights"])
r = r.point(lambda p: p - int((p / 255) ** 2.2 * v))
g = g.point(lambda p: p - int((p / 255) ** 2.2 * v))
b = b.point(lambda p: p - int((p / 255) ** 2.2 * v))
if grade.get("teal_orange"):
r = r.point(lambda p: min(255, p + 5))
b = b.point(lambda p: max(0, p - 8))
if grade.get("soft_pink"):
r = r.point(lambda p: min(255, p + 10))
b = b.point(lambda p: min(255, p + 5))
if grade.get("indoor_warm"):
r = r.point(lambda p: min(255, p + 12))
g = g.point(lambda p: min(255, p + 5))
if grade.get("soft_glow"):
merged = Image.merge("RGB", (r, g, b))
merged = ImageEnhance.Brightness(merged).enhance(1.05)
r, g, b = merged.split()
if grade.get("dark_moody"):
r = r.point(lambda p: max(0, p - 15))
g = g.point(lambda p: max(0, p - 15))
b = b.point(lambda p: max(0, p - 10))
img = Image.merge("RGB", (r, g, b))
img = ImageEnhance.Brightness(img).enhance(1.02)
return img
# ---------------------------------------------------------------------------
# IMAGE LOAD + CROP-TO-FILL
# ---------------------------------------------------------------------------
def load_scene_image(idx: int, selected_dir: str) -> Image.Image:
path = os.path.join(selected_dir, f"scene_{idx:02d}.jpg")
img = Image.open(path).convert("RGB")
return crop_to_fill(img, *RESOLUTION)
def crop_to_fill(img: Image.Image, target_w: int, target_h: int) -> Image.Image:
iw, ih = img.size
scale = max(target_w / iw, target_h / ih)
new_w = int(iw * scale)
new_h = int(ih * scale)
img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
left = (new_w - target_w) // 2
top = (new_h - target_h) // 2
return img.crop((left, top, left + target_w, top + target_h))
# ---------------------------------------------------------------------------
# MOTION
# ---------------------------------------------------------------------------
def get_motion_frame(base: Image.Image, motion: dict, t: float) -> Image.Image:
mtype = motion["type"]
w, h = RESOLUTION
if mtype == "snap_zoom":
s_start = motion["scale_start"]
s_end = motion["scale_end"]
scale = lerp(s_start, s_end, ease_out(t))
nw = int(w * scale)
nh = int(h * scale)
scaled = base.resize((nw, nh), Image.Resampling.BILINEAR)
left = (nw - w) // 2
top = (nh - h) // 2
left = max(0, min(left, nw - w))
top = max(0, min(top, nh - h))
return scaled.crop((left, top, left + w, top + h))
elif mtype == "slow_push_in":
s_start = motion.get("scale_start", 1.0)
s_end = motion.get("scale_end", 1.08)
scale = lerp(s_start, s_end, ease_in_out(t))
nw = int(w * scale)
nh = int(h * scale)
scaled = base.resize((nw, nh), Image.Resampling.BILINEAR)
left = (nw - w) // 2
top = (nh - h) // 2
left = max(0, min(left, nw - w))
top = max(0, min(top, nh - h))
return scaled.crop((left, top, left + w, top + h))
else: # static or others
return base
# ---------------------------------------------------------------------------
# FONT CACHE
# ---------------------------------------------------------------------------
_font_cache = {}
def get_font(size: int) -> ImageFont.FreeTypeFont:
if size not in _font_cache:
try:
_font_cache[size] = ImageFont.truetype(FONT_PATH, size)
except Exception:
try:
_font_cache[size] = ImageFont.truetype(FONT_PATH_REG, size)
except Exception:
_font_cache[size] = ImageFont.load_default()
return _font_cache[size]
# ---------------------------------------------------------------------------
# TEXT DRAWING
# ---------------------------------------------------------------------------
def draw_text_stroked(draw, text, pos, font, align="left", opacity=1.0):
"""White text with stroke, drop shadow, and opacity."""
x, y = pos
w, _ = RESOLUTION
lines = text.split("\n")
line_heights = []
line_widths = []
for line in lines:
bb = draw.textbbox((0, 0), line, font=font)
line_widths.append(bb[2] - bb[0])
line_heights.append(bb[3] - bb[1])
line_spacing = int(font.size * 1.25)
for i, line in enumerate(lines):
lw = line_widths[i]
ly = y + i * line_spacing
if align == "center":
lx = x - lw // 2
elif align == "right":
lx = x - lw
else:
lx = x
alpha_stroke = int(TEXT_STROKE[3] * opacity)
alpha_white = int(TEXT_WHITE[3] * opacity)
alpha_shadow = int(TEXT_SHADOW[3] * opacity)
stroke_col = TEXT_STROKE[:3] + (alpha_stroke,)
white_col = TEXT_WHITE[:3] + (alpha_white,)
shadow_col = TEXT_SHADOW[:3] + (alpha_shadow,)
# Drop shadow
draw.text((lx + 4, ly + 4), line, font=font, fill=shadow_col)
# Stroke layers
for sw in [STROKE_W, STROKE_W - 2, STROKE_W - 4, 2]:
for ax in range(-sw, sw + 1, max(1, sw // 3)):
for ay in range(-sw, sw + 1, max(1, sw // 3)):
if ax * ax + ay * ay <= sw * sw:
draw.text((lx + ax, ly + ay), line, font=font, fill=stroke_col)
# White fill
draw.text((lx, ly), line, font=font, fill=white_col)
# ---------------------------------------------------------------------------
# TEXT ANIMATIONS
# ---------------------------------------------------------------------------
def render_text_frame(cfg: dict, frame: int, total_frames: int) -> Image.Image:
tcfg = cfg["text"]
ttype = tcfg["type"]
label = cfg["label"]
w, h = RESOLUTION
layer = Image.new("RGBA", (w, h), (0, 0, 0, 0))
draw = ImageDraw.Draw(layer)
font = get_font(tcfg["font_size"])
if ttype == "quick_center_pop" or ttype == "center_stroke_pop":
entry_f = tcfg["entry_frame"]
hold_f = tcfg["hold_frames"]
fade_start = entry_f + hold_f
if frame < entry_f:
pass
elif frame < entry_f + 6: # 0.2s pop-in
progress = ease_out((frame - entry_f) / 6)
opacity = min(1.0, progress * 1.5)
x = w // 2
y = h // 2
draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity)
elif frame < fade_start:
x = w // 2
y = h // 2
draw_text_stroked(draw, label, (x, y), font, align="center", opacity=1.0)
else:
fade_progress = min(1.0, (frame - fade_start) / 4)
opacity = 1.0 - fade_progress
x = w // 2
y = h // 2
draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity)
elif ttype == "center_pop" or ttype == "center_fade_pop":
entry_f = tcfg["entry_frame"]
hold_f = tcfg["hold_frames"]
fade_start = entry_f + hold_f
if frame < entry_f:
pass
elif frame < entry_f + 6:
progress = ease_out((frame - entry_f) / 6)
opacity = min(1.0, progress * 1.5)
x = w // 2
y = h // 2
draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity)
elif frame < fade_start:
x = w // 2
y = h // 2
draw_text_stroked(draw, label, (x, y), font, align="center", opacity=1.0)
else:
fade_progress = min(1.0, (frame - fade_start) / 4)
opacity = 1.0 - fade_progress
x = w // 2
y = h // 2
draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity)
return layer
# ---------------------------------------------------------------------------
# TRANSITION
# ---------------------------------------------------------------------------
def apply_transition(frame_a: Image.Image, frame_b: Image.Image, ttype: str, progress: float) -> Image.Image:
"""Apply dynamic transition between frames."""
w, h = RESOLUTION
if ttype == "whip_pan_right":
offset = int(w * (1 - ease_out(progress)))
result = Image.new("RGB", (w, h))
result.paste(frame_a, (0, 0))
result.paste(frame_b, (offset, 0))
return result
elif ttype == "flash":
if progress < 0.3:
flash_intensity = (progress / 0.3) * 0.5
brightened = ImageEnhance.Brightness(frame_a).enhance(1.0 + flash_intensity)
alpha = min(0.5, progress / 0.3 * 0.5)
return Image.blend(brightened, frame_b, alpha)
else:
blend_t = (progress - 0.3) / 0.7
return Image.blend(frame_a, frame_b, blend_t)
elif ttype == "end_fade_black":
black = Image.new("RGB", (w, h), (0, 0, 0))
return Image.blend(frame_a, black, progress)
else: # Default to hard cut
return frame_b if progress >= 0.5 else frame_a
# ---------------------------------------------------------------------------
# MAIN RENDER FROM MANIFEST
# ---------------------------------------------------------------------------
def render_video_from_manifest(manifest_dict: Dict[str, Any], selected_dir: str, output_path: str) -> Dict[str, Any]:
"""
Render video from manifest without hardcoded SCENE_CONFIG.
Args:
manifest_dict: Dictionary with 'scenes' key containing scene data
selected_dir: Path to directory with selected scene images
output_path: Path to output video file
Returns:
Dictionary with success status, duration, and metadata
"""
w, h = RESOLUTION
Path(os.path.dirname(output_path) or ".").mkdir(parents=True, exist_ok=True)
# Generate dynamic SCENE_CONFIG from manifest
SCENE_CONFIG = generate_scene_config(manifest_dict)
if not SCENE_CONFIG:
return {
"success": False,
"error": "No scenes in manifest",
"duration_s": 0
}
tmp_path = output_path.replace(".mp4", "_raw.mp4")
writer = cv2.VideoWriter(
tmp_path,
cv2.VideoWriter_fourcc(*"mp4v"),
FPS,
(w, h),
)
print(f"\n{'='*55}")
print(f" Dynamic Video Composition")
print(f" {len(SCENE_CONFIG)} scenes | {FPS}fps | {w}x{h}")
print(f"{'='*55}\n")
# Preload and grade all base images
print("[1/3] Loading + grading images...")
base_images = []
for cfg in SCENE_CONFIG:
try:
raw = load_scene_image(cfg["idx"], selected_dir)
graded = grade_image(raw, cfg["grade"])
base_images.append(graded)
except Exception as e:
print(f" [ERROR] Failed to load scene {cfg['idx']}: {e}")
return {"success": False, "error": str(e), "duration_s": 0}
print(" [OK] Done\n")
# Render scenes
print("[2/3] Rendering frames...")
total_scenes = len(SCENE_CONFIG)
frames_written = 0
for scene_i, cfg in enumerate(SCENE_CONFIG):
total_frames = int(cfg["duration_s"] * FPS)
trans_cfg = cfg["transition"]
trans_frames = trans_cfg["frames"]
base = base_images[scene_i]
# Preload next scene base for transitions
if scene_i + 1 < total_scenes:
next_cfg = SCENE_CONFIG[scene_i + 1]
next_base = base_images[scene_i + 1]
else:
next_cfg = None
next_base = None
print(f" Scene {cfg['idx']:02d} -- {cfg['label'][:40]} ({total_frames}f, {cfg['duration_s']}s)")
for frame in range(total_frames):
# Motion frame
t_motion = frame / max(total_frames - 1, 1)
img = get_motion_frame(base, cfg["motion"], t_motion)
# Text layer
text_layer = render_text_frame(cfg, frame, total_frames)
img_rgba = img.convert("RGBA")
img_rgba = Image.alpha_composite(img_rgba, text_layer)
img = img_rgba.convert("RGB")
# Transition blend at end of scene
frames_into_trans = frame - (total_frames - trans_frames)
if frames_into_trans >= 0 and next_base is not None:
trans_t = frames_into_trans / max(trans_frames - 1, 1)
t_next = 0.0
next_motion = get_motion_frame(next_base, next_cfg["motion"], t_next)
img = apply_transition(img, next_motion, trans_cfg["type"], trans_t)
# Write frame (cv2 expects BGR)
arr = np.array(img)
writer.write(cv2.cvtColor(arr, cv2.COLOR_RGB2BGR))
frames_written += 1
print(f" [OK] {total_frames} frames")
# Hard cut to black at end
print(f"\n Hard cut to black")
black_frame = np.zeros((h, w, 3), dtype=np.uint8)
for _ in range(2):
writer.write(black_frame)
frames_written += 1
print(f" [OK] 2 frames")
writer.release()
duration_s = frames_written / FPS
print(f"\n Total frames written: {frames_written} (~{duration_s:.1f}s)\n")
# Re-encode with ffmpeg
print("[3/3] Encoding H.264 MP4 via ffmpeg...")
cmd = (
f"ffmpeg -y -i {tmp_path} "
f"-vcodec libx264 -crf 20 -preset fast "
f"-pix_fmt yuv420p "
f"-movflags +faststart "
f"{output_path} 2>&1"
)
ret = os.system(cmd)
if ret == 0:
os.remove(tmp_path)
size_mb = os.path.getsize(output_path) / (1024 * 1024)
print(f"\n [OK] Output: {output_path}")
print(f" [OK] Size : {size_mb:.1f} MB")
print(f"\n{'='*55}\n")
return {
"success": True,
"output_path": output_path,
"duration_s": duration_s,
"size_mb": size_mb
}
else:
print(f" [ERROR] ffmpeg failed (code {ret}). Raw file kept: {tmp_path}")
return {
"success": False,
"error": f"ffmpeg failed with code {ret}",
"duration_s": duration_s
}
if __name__ == "__main__":
# Example usage
manifest = {
"scenes": [
{"label": "which type of anger do you have?"},
{"label": "shouting"},
{"label": "revenge"},
]
}
result = render_video_from_manifest(manifest, "selected", "renders/test_dynamic.mp4")
print(f"Result: {result}")