Spaces:

factorstudios
/

composer

Sleeping

App Files Files Community

composer / composer_dynamic.py

factorstudios

Upload 9 files

735a97b verified 29 days ago

Raw

History Blame Contribute Delete

22 kB

	"""
	Dynamic Video Composer - Generates SCENE_CONFIG from Manifest
	Converts manifest labels to uppercase and creates video without hardcoded config
	"""

	import os
	import numpy as np
	import cv2
	from pathlib import Path
	from PIL import Image, ImageDraw, ImageFont, ImageEnhance, ImageOps
	from typing import Dict, List, Any

	# ---------------------------------------------------------------------------
	# GLOBALS
	# ---------------------------------------------------------------------------

	RESOLUTION = (1080, 1920) # W x H
	FPS = 30
	SELECTED_DIR = "selected"
	FONT_PATH = "asset/TR Impact.TTF"
	FONT_PATH_REG = "asset/TR Impact.TTF"

	GLOBAL_TEMP = +8
	GLOBAL_TINT = -5

	TEXT_WHITE = (255, 255, 255, 255)
	TEXT_STROKE = (0, 0, 0, 210)
	TEXT_SHADOW = (0, 0, 0, 60)
	STROKE_W = 8

	# Easing
	def ease_out(t): return 1 - (1 - t) ** 3
	def ease_in_out(t): return t * t * (3 - 2 * t)
	def ease_in(t): return t * t * t
	def lerp(a, b, t): return a + (b - a) * t

	# ---------------------------------------------------------------------------
	# DYNAMIC SCENE CONFIG GENERATION
	# ---------------------------------------------------------------------------

	# Default motion, text, grade, transition configs per scene type
	DEFAULT_SCENE_TEMPLATES = {
	"intro": {
	"motion": {"type": "slow_push_in", "scale_start": 1.0, "scale_end": 1.08},
	"text": {"type": "center_stroke_pop", "entry_frame": 2, "font_size": 95},
	"grade": {"crush_blacks": 15, "contrast": 1.15},
	"transition": {"type": "hard_cut", "frames": 1},
	"duration_s": 4.7,
	},
	"default": {
	"motion": {"type": "snap_zoom", "scale_start": 1.0, "scale_end": 1.12},
	"text": {"type": "center_pop", "entry_frame": 0, "font_size": 110},
	"grade": {"warm_tint": True, "lift_mids": 10},
	"transition": {"type": "whip_pan_right", "frames": 4},
	"duration_s": 2.3,
	},
	"final": {
	"motion": {"type": "static"},
	"text": {"type": "center_fade_pop", "entry_frame": 2, "font_size": 110},
	"grade": {"warm_indoor": True, "soft_glow": True, "lift_mids": 12},
	"transition": {"type": "end_fade_black", "frames": 30},
	"duration_s": 2.3,
	}
	}

	# Grade variations for different scenes
	GRADE_VARIATIONS = [
	{"warm_tint": True, "lift_mids": 10}, # Scene 1: Warm
	{"desaturate": True, "lift_blacks": 5}, # Scene 2: Desaturated
	{"cool_tint": True, "highlights": -15}, # Scene 3: Cool
	{"soft_pink": True, "lift_mids": 15}, # Scene 4: Pink
	{"indoor_warm": True, "lift_shadows": 8}, # Scene 5: Warm indoor
	{"teal_orange": True, "crush_blacks": 10}, # Scene 6: Teal/orange
	{"dark_moody": True, "crush_blacks": 20, "desaturate": 15}, # Scene 7: Moody
	]


	def generate_scene_config(manifest: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	Generate SCENE_CONFIG dynamically from manifest.

	Converts labels to uppercase and assigns motion/grade/transition configs.
	"""
	scenes = manifest.get("scenes", [])
	config = []

	for idx, scene_data in enumerate(scenes):
	# Extract label and convert to uppercase
	label = scene_data.get("label", f"SCENE {idx}").upper()

	# Determine scene type
	if idx == 0:
	template = DEFAULT_SCENE_TEMPLATES["intro"]
	elif idx == len(scenes) - 1:
	template = DEFAULT_SCENE_TEMPLATES["final"]
	else:
	template = DEFAULT_SCENE_TEMPLATES["default"]

	# Calculate hold_frames based on duration
	duration_s = template.get("duration_s", 2.3)
	total_frames = int(duration_s * FPS)

	# Build scene config
	scene_cfg = {
	"idx": idx,
	"label": label,
	"duration_s": duration_s,
	"motion": template["motion"].copy(),
	"text": {
	**template["text"],
	"hold_frames": total_frames - template["text"].get("entry_frame", 2) - 6,
	"align": "center"
	},
	"grade": GRADE_VARIATIONS[idx % len(GRADE_VARIATIONS)].copy() if idx > 0 else template["grade"].copy(),
	"transition": template["transition"].copy(),
	}

	config.append(scene_cfg)

	return config


	# ---------------------------------------------------------------------------
	# COLOUR GRADE
	# ---------------------------------------------------------------------------

	def grade_image(img: Image.Image, grade: dict) -> Image.Image:
	r, g, b = img.split()

	# Global warm grade
	r = r.point(lambda p: min(255, p + int(GLOBAL_TEMP * 1.2)))
	g = g.point(lambda p: min(255, p + int(GLOBAL_TEMP * 0.35)))
	b = b.point(lambda p: max(0, p - int(GLOBAL_TEMP * 0.8)))
	g = g.point(lambda p: max(0, min(255, p + int(GLOBAL_TINT * 0.5))))

	# Scene-specific
	if grade.get("boost_reds"):
	v = grade["boost_reds"]
	r = r.point(lambda p: min(255, p + v))
	g = g.point(lambda p: min(255, p + int(v * 0.25)))

	if grade.get("crush_blacks"):
	v = grade.get("crush_blacks", 10)
	r = r.point(lambda p: max(0, p - v) if p < 55 else p)
	g = g.point(lambda p: max(0, p - v) if p < 55 else p)
	b = b.point(lambda p: max(0, p - v) if p < 55 else p)

	if grade.get("contrast"):
	v = grade["contrast"]
	r = r.point(lambda p: int((p - 128) * v + 128))
	g = g.point(lambda p: int((p - 128) * v + 128))
	b = b.point(lambda p: int((p - 128) * v + 128))

	if grade.get("lift_shadows"):
	v = grade.get("lift_shadows", 0)
	r = r.point(lambda p: min(255, p + v))
	g = g.point(lambda p: min(255, p + v))
	b = b.point(lambda p: min(255, p + v))

	if grade.get("warm_tint"):
	r = r.point(lambda p: min(255, p + 8))
	g = g.point(lambda p: min(255, p + 3))

	if grade.get("cool_tint"):
	b = b.point(lambda p: min(255, p + 8))
	r = r.point(lambda p: max(0, p - 5))

	if grade.get("desaturate"):
	v = grade.get("desaturate", 10)
	merged = Image.merge("RGB", (r, g, b))
	merged = ImageEnhance.Color(merged).enhance(max(0, 1.0 - v/100.0))
	r, g, b = merged.split()

	if grade.get("lift_blacks"):
	v = grade["lift_blacks"]
	r = r.point(lambda p: min(255, p + v))
	g = g.point(lambda p: min(255, p + v))
	b = b.point(lambda p: min(255, p + v))

	if grade.get("lift_mids"):
	v = grade["lift_mids"]
	r = r.point(lambda p: int(p + v * (1 - abs(p - 128) / 128)))
	g = g.point(lambda p: int(p + v * (1 - abs(p - 128) / 128)))
	b = b.point(lambda p: int(p + v * (1 - abs(p - 128) / 128)))

	if grade.get("highlights"):
	v = abs(grade["highlights"])
	r = r.point(lambda p: p - int((p / 255) ** 2.2 * v))
	g = g.point(lambda p: p - int((p / 255) ** 2.2 * v))
	b = b.point(lambda p: p - int((p / 255) ** 2.2 * v))

	if grade.get("teal_orange"):
	r = r.point(lambda p: min(255, p + 5))
	b = b.point(lambda p: max(0, p - 8))

	if grade.get("soft_pink"):
	r = r.point(lambda p: min(255, p + 10))
	b = b.point(lambda p: min(255, p + 5))

	if grade.get("indoor_warm"):
	r = r.point(lambda p: min(255, p + 12))
	g = g.point(lambda p: min(255, p + 5))

	if grade.get("soft_glow"):
	merged = Image.merge("RGB", (r, g, b))
	merged = ImageEnhance.Brightness(merged).enhance(1.05)
	r, g, b = merged.split()

	if grade.get("dark_moody"):
	r = r.point(lambda p: max(0, p - 15))
	g = g.point(lambda p: max(0, p - 15))
	b = b.point(lambda p: max(0, p - 10))

	img = Image.merge("RGB", (r, g, b))
	img = ImageEnhance.Brightness(img).enhance(1.02)
	return img

	# ---------------------------------------------------------------------------
	# IMAGE LOAD + CROP-TO-FILL
	# ---------------------------------------------------------------------------

	def load_scene_image(idx: int, selected_dir: str) -> Image.Image:
	path = os.path.join(selected_dir, f"scene_{idx:02d}.jpg")
	img = Image.open(path).convert("RGB")
	return crop_to_fill(img, *RESOLUTION)

	def crop_to_fill(img: Image.Image, target_w: int, target_h: int) -> Image.Image:
	iw, ih = img.size
	scale = max(target_w / iw, target_h / ih)
	new_w = int(iw * scale)
	new_h = int(ih * scale)
	img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
	left = (new_w - target_w) // 2
	top = (new_h - target_h) // 2
	return img.crop((left, top, left + target_w, top + target_h))

	# ---------------------------------------------------------------------------
	# MOTION
	# ---------------------------------------------------------------------------

	def get_motion_frame(base: Image.Image, motion: dict, t: float) -> Image.Image:
	mtype = motion["type"]
	w, h = RESOLUTION

	if mtype == "snap_zoom":
	s_start = motion["scale_start"]
	s_end = motion["scale_end"]
	scale = lerp(s_start, s_end, ease_out(t))

	nw = int(w * scale)
	nh = int(h * scale)
	scaled = base.resize((nw, nh), Image.Resampling.BILINEAR)

	left = (nw - w) // 2
	top = (nh - h) // 2
	left = max(0, min(left, nw - w))
	top = max(0, min(top, nh - h))
	return scaled.crop((left, top, left + w, top + h))

	elif mtype == "slow_push_in":
	s_start = motion.get("scale_start", 1.0)
	s_end = motion.get("scale_end", 1.08)
	scale = lerp(s_start, s_end, ease_in_out(t))

	nw = int(w * scale)
	nh = int(h * scale)
	scaled = base.resize((nw, nh), Image.Resampling.BILINEAR)

	left = (nw - w) // 2
	top = (nh - h) // 2
	left = max(0, min(left, nw - w))
	top = max(0, min(top, nh - h))
	return scaled.crop((left, top, left + w, top + h))

	else: # static or others
	return base

	# ---------------------------------------------------------------------------
	# FONT CACHE
	# ---------------------------------------------------------------------------

	_font_cache = {}

	def get_font(size: int) -> ImageFont.FreeTypeFont:
	if size not in _font_cache:
	try:
	_font_cache[size] = ImageFont.truetype(FONT_PATH, size)
	except Exception:
	try:
	_font_cache[size] = ImageFont.truetype(FONT_PATH_REG, size)
	except Exception:
	_font_cache[size] = ImageFont.load_default()
	return _font_cache[size]

	# ---------------------------------------------------------------------------
	# TEXT DRAWING
	# ---------------------------------------------------------------------------

	def draw_text_stroked(draw, text, pos, font, align="left", opacity=1.0):
	"""White text with stroke, drop shadow, and opacity."""
	x, y = pos
	w, _ = RESOLUTION

	lines = text.split("\n")
	line_heights = []
	line_widths = []
	for line in lines:
	bb = draw.textbbox((0, 0), line, font=font)
	line_widths.append(bb[2] - bb[0])
	line_heights.append(bb[3] - bb[1])

	line_spacing = int(font.size * 1.25)

	for i, line in enumerate(lines):
	lw = line_widths[i]
	ly = y + i * line_spacing

	if align == "center":
	lx = x - lw // 2
	elif align == "right":
	lx = x - lw
	else:
	lx = x

	alpha_stroke = int(TEXT_STROKE[3] * opacity)
	alpha_white = int(TEXT_WHITE[3] * opacity)
	alpha_shadow = int(TEXT_SHADOW[3] * opacity)

	stroke_col = TEXT_STROKE[:3] + (alpha_stroke,)
	white_col = TEXT_WHITE[:3] + (alpha_white,)
	shadow_col = TEXT_SHADOW[:3] + (alpha_shadow,)

	# Drop shadow
	draw.text((lx + 4, ly + 4), line, font=font, fill=shadow_col)

	# Stroke layers
	for sw in [STROKE_W, STROKE_W - 2, STROKE_W - 4, 2]:
	for ax in range(-sw, sw + 1, max(1, sw // 3)):
	for ay in range(-sw, sw + 1, max(1, sw // 3)):
	if ax * ax + ay * ay <= sw * sw:
	draw.text((lx + ax, ly + ay), line, font=font, fill=stroke_col)

	# White fill
	draw.text((lx, ly), line, font=font, fill=white_col)

	# ---------------------------------------------------------------------------
	# TEXT ANIMATIONS
	# ---------------------------------------------------------------------------

	def render_text_frame(cfg: dict, frame: int, total_frames: int) -> Image.Image:
	tcfg = cfg["text"]
	ttype = tcfg["type"]
	label = cfg["label"]
	w, h = RESOLUTION

	layer = Image.new("RGBA", (w, h), (0, 0, 0, 0))
	draw = ImageDraw.Draw(layer)
	font = get_font(tcfg["font_size"])

	if ttype == "quick_center_pop" or ttype == "center_stroke_pop":
	entry_f = tcfg["entry_frame"]
	hold_f = tcfg["hold_frames"]
	fade_start = entry_f + hold_f

	if frame < entry_f:
	pass
	elif frame < entry_f + 6: # 0.2s pop-in
	progress = ease_out((frame - entry_f) / 6)
	opacity = min(1.0, progress * 1.5)
	x = w // 2
	y = h // 2
	draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity)
	elif frame < fade_start:
	x = w // 2
	y = h // 2
	draw_text_stroked(draw, label, (x, y), font, align="center", opacity=1.0)
	else:
	fade_progress = min(1.0, (frame - fade_start) / 4)
	opacity = 1.0 - fade_progress
	x = w // 2
	y = h // 2
	draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity)

	elif ttype == "center_pop" or ttype == "center_fade_pop":
	entry_f = tcfg["entry_frame"]
	hold_f = tcfg["hold_frames"]
	fade_start = entry_f + hold_f

	if frame < entry_f:
	pass
	elif frame < entry_f + 6:
	progress = ease_out((frame - entry_f) / 6)
	opacity = min(1.0, progress * 1.5)
	x = w // 2
	y = h // 2
	draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity)
	elif frame < fade_start:
	x = w // 2
	y = h // 2
	draw_text_stroked(draw, label, (x, y), font, align="center", opacity=1.0)
	else:
	fade_progress = min(1.0, (frame - fade_start) / 4)
	opacity = 1.0 - fade_progress
	x = w // 2
	y = h // 2
	draw_text_stroked(draw, label, (x, y), font, align="center", opacity=opacity)

	return layer

	# ---------------------------------------------------------------------------
	# TRANSITION
	# ---------------------------------------------------------------------------

	def apply_transition(frame_a: Image.Image, frame_b: Image.Image, ttype: str, progress: float) -> Image.Image:
	"""Apply dynamic transition between frames."""
	w, h = RESOLUTION

	if ttype == "whip_pan_right":
	offset = int(w * (1 - ease_out(progress)))
	result = Image.new("RGB", (w, h))
	result.paste(frame_a, (0, 0))
	result.paste(frame_b, (offset, 0))
	return result

	elif ttype == "flash":
	if progress < 0.3:
	flash_intensity = (progress / 0.3) * 0.5
	brightened = ImageEnhance.Brightness(frame_a).enhance(1.0 + flash_intensity)
	alpha = min(0.5, progress / 0.3 * 0.5)
	return Image.blend(brightened, frame_b, alpha)
	else:
	blend_t = (progress - 0.3) / 0.7
	return Image.blend(frame_a, frame_b, blend_t)

	elif ttype == "end_fade_black":
	black = Image.new("RGB", (w, h), (0, 0, 0))
	return Image.blend(frame_a, black, progress)

	else: # Default to hard cut
	return frame_b if progress >= 0.5 else frame_a

	# ---------------------------------------------------------------------------
	# MAIN RENDER FROM MANIFEST
	# ---------------------------------------------------------------------------

	def render_video_from_manifest(manifest_dict: Dict[str, Any], selected_dir: str, output_path: str) -> Dict[str, Any]:
	"""
	Render video from manifest without hardcoded SCENE_CONFIG.

	Args:
	manifest_dict: Dictionary with 'scenes' key containing scene data
	selected_dir: Path to directory with selected scene images
	output_path: Path to output video file

	Returns:
	Dictionary with success status, duration, and metadata
	"""
	w, h = RESOLUTION
	Path(os.path.dirname(output_path) or ".").mkdir(parents=True, exist_ok=True)

	# Generate dynamic SCENE_CONFIG from manifest
	SCENE_CONFIG = generate_scene_config(manifest_dict)

	if not SCENE_CONFIG:
	return {
	"success": False,
	"error": "No scenes in manifest",
	"duration_s": 0
	}

	tmp_path = output_path.replace(".mp4", "_raw.mp4")
	writer = cv2.VideoWriter(
	tmp_path,
	cv2.VideoWriter_fourcc(*"mp4v"),
	FPS,
	(w, h),
	)

	print(f"\n{'='*55}")
	print(f" Dynamic Video Composition")
	print(f" {len(SCENE_CONFIG)} scenes \| {FPS}fps \| {w}x{h}")
	print(f"{'='*55}\n")

	# Preload and grade all base images
	print("[1/3] Loading + grading images...")
	base_images = []
	for cfg in SCENE_CONFIG:
	try:
	raw = load_scene_image(cfg["idx"], selected_dir)
	graded = grade_image(raw, cfg["grade"])
	base_images.append(graded)
	except Exception as e:
	print(f" [ERROR] Failed to load scene {cfg['idx']}: {e}")
	return {"success": False, "error": str(e), "duration_s": 0}

	print(" [OK] Done\n")

	# Render scenes
	print("[2/3] Rendering frames...")
	total_scenes = len(SCENE_CONFIG)
	frames_written = 0

	for scene_i, cfg in enumerate(SCENE_CONFIG):
	total_frames = int(cfg["duration_s"] * FPS)
	trans_cfg = cfg["transition"]
	trans_frames = trans_cfg["frames"]
	base = base_images[scene_i]

	# Preload next scene base for transitions
	if scene_i + 1 < total_scenes:
	next_cfg = SCENE_CONFIG[scene_i + 1]
	next_base = base_images[scene_i + 1]
	else:
	next_cfg = None
	next_base = None

	print(f" Scene {cfg['idx']:02d} -- {cfg['label'][:40]} ({total_frames}f, {cfg['duration_s']}s)")

	for frame in range(total_frames):
	# Motion frame
	t_motion = frame / max(total_frames - 1, 1)
	img = get_motion_frame(base, cfg["motion"], t_motion)

	# Text layer
	text_layer = render_text_frame(cfg, frame, total_frames)
	img_rgba = img.convert("RGBA")
	img_rgba = Image.alpha_composite(img_rgba, text_layer)
	img = img_rgba.convert("RGB")

	# Transition blend at end of scene
	frames_into_trans = frame - (total_frames - trans_frames)
	if frames_into_trans >= 0 and next_base is not None:
	trans_t = frames_into_trans / max(trans_frames - 1, 1)
	t_next = 0.0
	next_motion = get_motion_frame(next_base, next_cfg["motion"], t_next)
	img = apply_transition(img, next_motion, trans_cfg["type"], trans_t)

	# Write frame (cv2 expects BGR)
	arr = np.array(img)
	writer.write(cv2.cvtColor(arr, cv2.COLOR_RGB2BGR))
	frames_written += 1

	print(f" [OK] {total_frames} frames")

	# Hard cut to black at end
	print(f"\n Hard cut to black")
	black_frame = np.zeros((h, w, 3), dtype=np.uint8)
	for _ in range(2):
	writer.write(black_frame)
	frames_written += 1
	print(f" [OK] 2 frames")

	writer.release()
	duration_s = frames_written / FPS
	print(f"\n Total frames written: {frames_written} (~{duration_s:.1f}s)\n")

	# Re-encode with ffmpeg
	print("[3/3] Encoding H.264 MP4 via ffmpeg...")
	cmd = (
	f"ffmpeg -y -i {tmp_path} "
	f"-vcodec libx264 -crf 20 -preset fast "
	f"-pix_fmt yuv420p "
	f"-movflags +faststart "
	f"{output_path} 2>&1"
	)
	ret = os.system(cmd)
	if ret == 0:
	os.remove(tmp_path)
	size_mb = os.path.getsize(output_path) / (1024 * 1024)
	print(f"\n [OK] Output: {output_path}")
	print(f" [OK] Size : {size_mb:.1f} MB")
	print(f"\n{'='*55}\n")

	return {
	"success": True,
	"output_path": output_path,
	"duration_s": duration_s,
	"size_mb": size_mb
	}
	else:
	print(f" [ERROR] ffmpeg failed (code {ret}). Raw file kept: {tmp_path}")
	return {
	"success": False,
	"error": f"ffmpeg failed with code {ret}",
	"duration_s": duration_s
	}


	if __name__ == "__main__":
	# Example usage
	manifest = {
	"scenes": [
	{"label": "which type of anger do you have?"},
	{"label": "shouting"},
	{"label": "revenge"},
	]
	}

	result = render_video_from_manifest(manifest, "selected", "renders/test_dynamic.mp4")
	print(f"Result: {result}")