""" Dynamic Video Composer - Generates SCENE_CONFIG from Manifest Converts manifest labels to uppercase and creates video without hardcoded config """ import os import numpy as np import cv2 from pathlib import Path from PIL import Image, ImageDraw, ImageFont, ImageEnhance, ImageOps from typing import Dict, List, Any # --------------------------------------------------------------------------- # GLOBALS # --------------------------------------------------------------------------- RESOLUTION = (1080, 1920) # W x H FPS = 30 SELECTED_DIR = "selected" FONT_PATH = "asset/TR Impact.TTF" FONT_PATH_REG = "asset/TR Impact.TTF" GLOBAL_TEMP = +8 GLOBAL_TINT = -5 TEXT_WHITE = (255, 255, 255, 255) TEXT_STROKE = (0, 0, 0, 210) TEXT_SHADOW = (0, 0, 0, 60) STROKE_W = 8 # Easing def ease_out(t): return 1 - (1 - t) ** 3 def ease_in_out(t): return t * t * (3 - 2 * t) def ease_in(t): return t * t * t def lerp(a, b, t): return a + (b - a) * t # --------------------------------------------------------------------------- # DYNAMIC SCENE CONFIG GENERATION # --------------------------------------------------------------------------- # Default motion, text, grade, transition configs per scene type DEFAULT_SCENE_TEMPLATES = { "intro": { "motion": {"type": "slow_push_in", "scale_start": 1.0, "scale_end": 1.08}, "text": {"type": "center_stroke_pop", "entry_frame": 2, "font_size": 95}, "grade": {"crush_blacks": 15, "contrast": 1.15}, "transition": {"type": "hard_cut", "frames": 1}, "duration_s": 4.7, }, "default": { "motion": {"type": "snap_zoom", "scale_start": 1.0, "scale_end": 1.12}, "text": {"type": "center_pop", "entry_frame": 0, "font_size": 110}, "grade": {"warm_tint": True, "lift_mids": 10}, "transition": {"type": "whip_pan_right", "frames": 4}, "duration_s": 2.3, }, "final": { "motion": {"type": "static"}, "text": {"type": "center_fade_pop", "entry_frame": 2, "font_size": 110}, "grade": {"warm_indoor": True, "soft_glow": True, "lift_mids": 12}, "transition": {"type": "end_fade_black", "frames": 30}, "duration_s": 2.3, } } # Grade variations for different scenes GRADE_VARIATIONS = [ {"warm_tint": True, "lift_mids": 10}, # Scene 1: Warm {"desaturate": True, "lift_blacks": 5}, # Scene 2: Desaturated {"cool_tint": True, "highlights": -15}, # Scene 3: Cool {"soft_pink": True, "lift_mids": 15}, # Scene 4: Pink {"indoor_warm": True, "lift_shadows": 8}, # Scene 5: Warm indoor {"teal_orange": True, "crush_blacks": 10}, # Scene 6: Teal/orange {"dark_moody": True, "crush_blacks": 20, "desaturate": 15}, # Scene 7: Moody ] def generate_scene_config(manifest: Dict[str, Any]) -> List[Dict[str, Any]]: """ Generate SCENE_CONFIG dynamically from manifest. Converts labels to uppercase and assigns motion/grade/transition configs. """ scenes = manifest.get("scenes", []) config = [] for idx, scene_data in enumerate(scenes): # Extract label and convert to uppercase label = scene_data.get("label", f"SCENE {idx}").upper() # Determine scene type if idx == 0: template = DEFAULT_SCENE_TEMPLATES["intro"] elif idx == len(scenes) - 1: template = DEFAULT_SCENE_TEMPLATES["final"] else: template = DEFAULT_SCENE_TEMPLATES["default"] # Calculate hold_frames based on duration duration_s = template.get("duration_s", 2.3) total_frames = int(duration_s * FPS) # Build scene config scene_cfg = { "idx": idx, "label": label, "duration_s": duration_s, "motion": template["motion"].copy(), "text": { **template["text"], "hold_frames": total_frames - template["text"].get("entry_frame", 2) - 6, "align": "center" }, "grade": GRADE_VARIATIONS[idx % len(GRADE_VARIATIONS)].copy() if idx > 0 else template["grade"].copy(), "transition": template["transition"].copy(), } config.append(scene_cfg) return config # --------------------------------------------------------------------------- # COLOUR GRADE # --------------------------------------------------------------------------- def grade_image(img: Image.Image, grade: dict) -> Image.Image: r, g, b = img.split() # Global warm grade r = r.point(lambda p: min(255, p + int(GLOBAL_TEMP * 1.2))) g = g.point(lambda p: min(255, p + int(GLOBAL_TEMP * 0.35))) b = b.point(lambda p: max(0, p - int(GLOBAL_TEMP * 0.8))) g = g.point(lambda p: max(0, min(255, p + int(GLOBAL_TINT * 0.5)))) # Scene-specific if grade.get("boost_reds"): v = grade["boost_reds"] r = r.point(lambda p: min(255, p + v)) g = g.point(lambda p: min(255, p + int(v * 0.25))) if grade.get("crush_blacks"): v = grade.get("crush_blacks", 10) r = r.point(lambda p: max(0, p - v) if p < 55 else p) g = g.point(lambda p: max(0, p - v) if p < 55 else p) b = b.point(lambda p: max(0, p - v) if p < 55 else p) if grade.get("contrast"): v = grade["contrast"] r = r.point(lambda p: int((p - 128) * v + 128)) g = g.point(lambda p: int((p - 128) * v + 128)) b = b.point(lambda p: int((p - 128) * v + 128)) if grade.get("lift_shadows"): v = grade.get("lift_shadows", 0) r = r.point(lambda p: min(255, p + v)) g = g.point(lambda p: min(255, p + v)) b = b.point(lambda p: min(255, p + v)) if grade.get("warm_tint"): r = r.point(lambda p: min(255, p + 8)) g = g.point(lambda p: min(255, p + 3)) if grade.get("cool_tint"): b = b.point(lambda p: min(255, p + 8)) r = r.point(lambda p: max(0, p - 5)) if grade.get("desaturate"): v = grade.get("desaturate", 10) merged = Image.merge("RGB", (r, g, b)) merged = ImageEnhance.Color(merged).enhance(max(0, 1.0 - v/100.0)) r, g, b = merged.split() if grade.get("lift_blacks"): v = grade["lift_blacks"] r = r.point(lambda p: min(255, p + v)) g = g.point(lambda p: min(255, p + v)) b = b.point(lambda p: min(255, p + v)) if grade.get("lift_mids"): v = grade["lift_mids"] r = r.point(lambda p: int(p + v * (1 - abs(p - 128) / 128))) g = g.point(lambda p: int(p + v * (1 - abs(p - 128) / 128))) b = b.point(lambda p: int(p + v * (1 - abs(p - 128) / 128))) if grade.get("highlights"): v = abs(grade["highlights"]) r = r.point(lambda p: p - int((p / 255) ** 2.2 * v)) g = g.point(lambda p: p - int((p / 255) ** 2.2 * v)) b = b.point(lambda p: p - int((p / 255) ** 2.2 * v)) if grade.get("teal_orange"): r = r.point(lambda p: min(255, p + 5)) b = b.point(lambda p: max(0, p - 8)) if grade.get("soft_pink"): r = r.point(lambda p: min(255, p + 10)) b = b.point(lambda p: min(255, p + 5)) if grade.get("indoor_warm"): r = r.point(lambda p: min(255, p + 12)) g = g.point(lambda p: min(255, p + 5)) if grade.get("soft_glow"): merged = Image.merge("RGB", (r, g, b)) merged = ImageEnhance.Brightness(merged).enhance(1.05) r, g, b = merged.split() if grade.get("dark_moody"): r = r.point(lambda p: max(0, p - 15)) g = g.point(lambda p: max(0, p - 15)) b = b.point(lambda p: max(0, p - 10)) img = Image.merge("RGB", (r, g, b)) img = ImageEnhance.Brightness(img).enhance(1.02) return img # --------------------------------------------------------------------------- # IMAGE LOAD + CROP-TO-FILL # --------------------------------------------------------------------------- def load_scene_image(idx: int, selected_dir: str) -> Image.Image: path = os.path.join(selected_dir, f"scene_{idx:02d}.jpg") img = Image.open(path).convert("RGB") return crop_to_fill(img, *RESOLUTION) def crop_to_fill(img: Image.Image, target_w: int, target_h: int) -> Image.Image: iw, ih = img.size scale = max(target_w / iw, target_h / ih) new_w = int(iw * scale) new_h = int(ih * scale) img = img.resize((new_w, new_h), Image.Resampling.LANCZOS) left = (new_w - target_w) // 2 top = (new_h - target_h) // 2 return img.crop((left, top, left + target_w, top + target_h)) # --------------------------------------------------------------------------- # MOTION # --------------------------------------------------------------------------- def get_motion_frame(base: Image.Image, motion: dict, t: float) -> Image.Image: mtype = motion["type"] w, h = RESOLUTION if mtype == "snap_zoom": s_start = motion["scale_start"] s_end = motion["scale_end"] scale = lerp(s_start, s_end, ease_out(t)) nw = int(w * scale) nh = int(h * scale) scaled = base.resize((nw, nh), Image.Resampling.BILINEAR) left = (nw - w) // 2 top = (nh - h) // 2 left = max(0, min(left, nw - w)) top = max(0, min(top, nh - h)) return scaled.crop((left, top, left + w, top + h)) elif mtype == "slow_push_in": s_start = motion.get("scale_start", 1.0) s_end = motion.get("scale_end", 1.08) scale = lerp(s_start, s_end, ease_in_out(t)) nw = int(w * scale) nh = int(h * scale) scaled = base.resize((nw, nh), Image.Resampling.BILINEAR) left = (nw - w) // 2 top = (nh - h) // 2 left = max(0, min(left, nw - w)) top = max(0, min(top, nh - h)) return scaled.crop((left, top, left + w, top + h)) else: # static or others return base # --------------------------------------------------------------------------- # FONT CACHE # --------------------------------------------------------------------------- _font_cache = {} def get_font(size: int) -> ImageFont.FreeTypeFont: if size not in _font_cache: try: _font_cache[size] = ImageFont.truetype(FONT_PATH, size) except Exception: try: _font_cache[size] = ImageFont.truetype(FONT_PATH_REG, size) except Exception: _font_cache[size] = ImageFont.load_default() return _font_cache[size] # --------------------------------------------------------------------------- # TEXT DRAWING # --------------------------------------------------------------------------- def draw_text_stroked(draw, text, pos, font, align="left", opacity=1.0): """White text with stroke, drop shadow, and opacity.""" x, y = pos w, _ = RESOLUTION lines = text.split("\n") line_heights = [] line_widths = [] for line in lines: bb = draw.textbbox((0, 0), line, font=font) line_widths.append(bb[2] - bb[0]) line_heights.append(bb[3] - bb[1]) line_spacing = int(font.size * 1.25) for i, line in enumerate(lines): lw = line_widths[i] ly = y + i * line_spacing if align == "center": lx = x - lw // 2 elif align == "right": lx = x - lw else: lx = x alpha_stroke = int(TEXT_STROKE[3] * opacity) alpha_white = int(TEXT_WHITE[3] * opacity) alpha_shadow = int(TEXT_SHADOW[3] * opacity) stroke_col = TEXT_STROKE[:3] + (alpha_stroke,) white_col = TEXT_WHITE[:3] + (alpha_white,) shadow_col = TEXT_SHADOW[:3] + (alpha_shadow,) # Drop shadow draw.text((lx + 4, ly + 4), line, font=font, fill=shadow_col) # Stroke layers for sw in [STROKE_W, STROKE_W - 2, STROKE_W - 4, 2]: for ax in range(-sw, sw + 1, max(1, sw // 3)): for ay in range(-sw, sw + 1, max(1, sw // 3)): if ax * ax + ay * ay <= sw * sw: draw.text((lx + ax, ly + ay), line, font=font, fill=stroke_col) # White fill draw.text((lx, ly), line, font=font, fill=white_col) # --------------------------------------------------------------------------- # TEXT ANIMATIONS # --------------------------------------------------------------------------- def render_text_frame(cfg: dict, frame: int, total_frames: int) -> Image.Image: tcfg = cfg["text"] ttype = tcfg["type"] label = cfg["label"] w, h = RESOLUTION layer = Image.new("RGBA", (w, h), (0, 0, 0, 0)) draw = ImageDraw.Draw(layer) font = get_font(tcfg["font_size"]) if ttype == "quick_center_pop" or ttype == "center_stroke_pop": entry_f = tcfg["entry_frame"] hold_f = tcfg["hold_frames"] fade_start = entry_f + hold_f if frame < entry_f: pass elif frame < entry_f + 6: # 0.2s pop-in progress = ease_out((frame - entry_f) / 6) opacity = min(1.0, progress * 1.5) x = w // 2 y = h // 2 draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity) elif frame < fade_start: x = w // 2 y = h // 2 draw_text_stroked(draw, label, (x, y), font, align="center", opacity=1.0) else: fade_progress = min(1.0, (frame - fade_start) / 4) opacity = 1.0 - fade_progress x = w // 2 y = h // 2 draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity) elif ttype == "center_pop" or ttype == "center_fade_pop": entry_f = tcfg["entry_frame"] hold_f = tcfg["hold_frames"] fade_start = entry_f + hold_f if frame < entry_f: pass elif frame < entry_f + 6: progress = ease_out((frame - entry_f) / 6) opacity = min(1.0, progress * 1.5) x = w // 2 y = h // 2 draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity) elif frame < fade_start: x = w // 2 y = h // 2 draw_text_stroked(draw, label, (x, y), font, align="center", opacity=1.0) else: fade_progress = min(1.0, (frame - fade_start) / 4) opacity = 1.0 - fade_progress x = w // 2 y = h // 2 draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity) return layer # --------------------------------------------------------------------------- # TRANSITION # --------------------------------------------------------------------------- def apply_transition(frame_a: Image.Image, frame_b: Image.Image, ttype: str, progress: float) -> Image.Image: """Apply dynamic transition between frames.""" w, h = RESOLUTION if ttype == "whip_pan_right": offset = int(w * (1 - ease_out(progress))) result = Image.new("RGB", (w, h)) result.paste(frame_a, (0, 0)) result.paste(frame_b, (offset, 0)) return result elif ttype == "flash": if progress < 0.3: flash_intensity = (progress / 0.3) * 0.5 brightened = ImageEnhance.Brightness(frame_a).enhance(1.0 + flash_intensity) alpha = min(0.5, progress / 0.3 * 0.5) return Image.blend(brightened, frame_b, alpha) else: blend_t = (progress - 0.3) / 0.7 return Image.blend(frame_a, frame_b, blend_t) elif ttype == "end_fade_black": black = Image.new("RGB", (w, h), (0, 0, 0)) return Image.blend(frame_a, black, progress) else: # Default to hard cut return frame_b if progress >= 0.5 else frame_a # --------------------------------------------------------------------------- # MAIN RENDER FROM MANIFEST # --------------------------------------------------------------------------- def render_video_from_manifest(manifest_dict: Dict[str, Any], selected_dir: str, output_path: str) -> Dict[str, Any]: """ Render video from manifest without hardcoded SCENE_CONFIG. Args: manifest_dict: Dictionary with 'scenes' key containing scene data selected_dir: Path to directory with selected scene images output_path: Path to output video file Returns: Dictionary with success status, duration, and metadata """ w, h = RESOLUTION Path(os.path.dirname(output_path) or ".").mkdir(parents=True, exist_ok=True) # Generate dynamic SCENE_CONFIG from manifest SCENE_CONFIG = generate_scene_config(manifest_dict) if not SCENE_CONFIG: return { "success": False, "error": "No scenes in manifest", "duration_s": 0 } tmp_path = output_path.replace(".mp4", "_raw.mp4") writer = cv2.VideoWriter( tmp_path, cv2.VideoWriter_fourcc(*"mp4v"), FPS, (w, h), ) print(f"\n{'='*55}") print(f" Dynamic Video Composition") print(f" {len(SCENE_CONFIG)} scenes | {FPS}fps | {w}x{h}") print(f"{'='*55}\n") # Preload and grade all base images print("[1/3] Loading + grading images...") base_images = [] for cfg in SCENE_CONFIG: try: raw = load_scene_image(cfg["idx"], selected_dir) graded = grade_image(raw, cfg["grade"]) base_images.append(graded) except Exception as e: print(f" [ERROR] Failed to load scene {cfg['idx']}: {e}") return {"success": False, "error": str(e), "duration_s": 0} print(" [OK] Done\n") # Render scenes print("[2/3] Rendering frames...") total_scenes = len(SCENE_CONFIG) frames_written = 0 for scene_i, cfg in enumerate(SCENE_CONFIG): total_frames = int(cfg["duration_s"] * FPS) trans_cfg = cfg["transition"] trans_frames = trans_cfg["frames"] base = base_images[scene_i] # Preload next scene base for transitions if scene_i + 1 < total_scenes: next_cfg = SCENE_CONFIG[scene_i + 1] next_base = base_images[scene_i + 1] else: next_cfg = None next_base = None print(f" Scene {cfg['idx']:02d} -- {cfg['label'][:40]} ({total_frames}f, {cfg['duration_s']}s)") for frame in range(total_frames): # Motion frame t_motion = frame / max(total_frames - 1, 1) img = get_motion_frame(base, cfg["motion"], t_motion) # Text layer text_layer = render_text_frame(cfg, frame, total_frames) img_rgba = img.convert("RGBA") img_rgba = Image.alpha_composite(img_rgba, text_layer) img = img_rgba.convert("RGB") # Transition blend at end of scene frames_into_trans = frame - (total_frames - trans_frames) if frames_into_trans >= 0 and next_base is not None: trans_t = frames_into_trans / max(trans_frames - 1, 1) t_next = 0.0 next_motion = get_motion_frame(next_base, next_cfg["motion"], t_next) img = apply_transition(img, next_motion, trans_cfg["type"], trans_t) # Write frame (cv2 expects BGR) arr = np.array(img) writer.write(cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)) frames_written += 1 print(f" [OK] {total_frames} frames") # Hard cut to black at end print(f"\n Hard cut to black") black_frame = np.zeros((h, w, 3), dtype=np.uint8) for _ in range(2): writer.write(black_frame) frames_written += 1 print(f" [OK] 2 frames") writer.release() duration_s = frames_written / FPS print(f"\n Total frames written: {frames_written} (~{duration_s:.1f}s)\n") # Re-encode with ffmpeg print("[3/3] Encoding H.264 MP4 via ffmpeg...") cmd = ( f"ffmpeg -y -i {tmp_path} " f"-vcodec libx264 -crf 20 -preset fast " f"-pix_fmt yuv420p " f"-movflags +faststart " f"{output_path} 2>&1" ) ret = os.system(cmd) if ret == 0: os.remove(tmp_path) size_mb = os.path.getsize(output_path) / (1024 * 1024) print(f"\n [OK] Output: {output_path}") print(f" [OK] Size : {size_mb:.1f} MB") print(f"\n{'='*55}\n") return { "success": True, "output_path": output_path, "duration_s": duration_s, "size_mb": size_mb } else: print(f" [ERROR] ffmpeg failed (code {ret}). Raw file kept: {tmp_path}") return { "success": False, "error": f"ffmpeg failed with code {ret}", "duration_s": duration_s } if __name__ == "__main__": # Example usage manifest = { "scenes": [ {"label": "which type of anger do you have?"}, {"label": "shouting"}, {"label": "revenge"}, ] } result = render_video_from_manifest(manifest, "selected", "renders/test_dynamic.mp4") print(f"Result: {result}")