Spaces:

ex510
/

auto_cliper

Running

App Files Files Community

auto_cliper / core /styles.py

aliSaac510

Fix: Ensure caption_style is passed correctly in styles.py, and secure firebase credentials.

834dd13 about 2 months ago

raw

history blame contribute delete

20 kB

	"""
	Video Styles — YouTube Shorts Production Engine
	SplitVertical & SplitHorizontal rebuilt with seamless gradient blending.
	All class/method names kept identical for drop-in integration.
	"""
	from abc import ABC, abstractmethod
	import os
	import cv2
	import numpy as np
	import moviepy.editor as mpe
	from .config import Config
	from .logger import Logger
	from .subtitle_manager import SubtitleManager

	logger = Logger.get_logger(__name__)


	# ─────────────────────────────────────────────────────────────────────────────
	# Gradient Mask Helpers
	# ─────────────────────────────────────────────────────────────────────────────

	def _linear_gradient(length: int, fade_from_zero: bool) -> np.ndarray:
	"""
	Returns a 1-D float32 array [0..1] of given length.
	fade_from_zero=True → 0 → 1 (clip fades IN at this edge)
	fade_from_zero=False → 1 → 0 (clip fades OUT at this edge)
	"""
	arr = np.linspace(0.0, 1.0, length, dtype=np.float32)
	return arr if fade_from_zero else arr[::-1]


	def _make_vertical_mask(clip_w: int, clip_h: int,
	blend_top: int = 0, blend_bottom: int = 0) -> np.ndarray:
	"""
	Float32 mask (clip_h × clip_w) in [0,1].
	blend_top → pixels from top that fade in (0→1)
	blend_bottom → pixels from bottom that fade out (1→0)
	"""
	mask = np.ones((clip_h, clip_w), dtype=np.float32)
	if blend_top > 0:
	grad = _linear_gradient(blend_top, fade_from_zero=True)
	mask[:blend_top, :] = grad[:, np.newaxis]
	if blend_bottom > 0:
	grad = _linear_gradient(blend_bottom, fade_from_zero=False)
	mask[clip_h - blend_bottom:, :] = grad[:, np.newaxis]
	return mask


	def _make_horizontal_mask(clip_w: int, clip_h: int,
	blend_left: int = 0, blend_right: int = 0) -> np.ndarray:
	"""
	Float32 mask (clip_h × clip_w) in [0,1].
	blend_left → pixels from left that fade in (0→1)
	blend_right → pixels from right that fade out (1→0)
	"""
	mask = np.ones((clip_h, clip_w), dtype=np.float32)
	if blend_left > 0:
	grad = _linear_gradient(blend_left, fade_from_zero=True)
	mask[:, :blend_left] = grad[np.newaxis, :]
	if blend_right > 0:
	grad = _linear_gradient(blend_right, fade_from_zero=False)
	mask[:, clip_w - blend_right:] = grad[np.newaxis, :]
	return mask


	def _apply_mask(clip: mpe.VideoClip, mask_array: np.ndarray) -> mpe.VideoClip:
	"""Attach a static float32 numpy mask to a video clip."""
	mask_clip = mpe.ImageClip(mask_array, ismask=True, duration=clip.duration)
	return clip.set_mask(mask_clip)


	def _fit_to_width(clip: mpe.VideoClip, target_w: int) -> mpe.VideoClip:
	"""Resize clip so width == target_w, keeping aspect ratio."""
	return clip.resize(width=target_w)


	def _fit_to_height(clip: mpe.VideoClip, target_h: int) -> mpe.VideoClip:
	"""Resize clip so height == target_h, keeping aspect ratio."""
	return clip.resize(height=target_h)


	def _loop_or_cut(clip: mpe.VideoClip, duration: float) -> mpe.VideoClip:
	if clip.duration < duration:
	return clip.loop(duration=duration)
	return clip.subclip(0, duration)


	# ─────────────────────────────────────────────────────────────────────────────
	# Smart Face Cropper
	# ─────────────────────────────────────────────────────────────────────────────

	class SmartFaceCropper:
	def __init__(self, output_size=(1080, 1920)):
	self.output_size = output_size
	self.face_cascade = cv2.CascadeClassifier(
	cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
	)
	self.last_coords = None
	self.smoothed_x = None
	self.smoothing = 0.2
	self.frame_count = 0

	def get_crop_coordinates(self, frame):
	h, w = frame.shape[:2]
	target_w = int(h * self.output_size[0] / self.output_size[1])
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	small = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
	faces = self.face_cascade.detectMultiScale(small, 1.1, 8, minSize=(50, 50))

	if len(faces) > 0:
	faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True)
	fx, fy, fw, fh = [v * 2 for v in faces[0]]
	current_center_x = fx + fw // 2
	self.last_coords = (fx, fy, fw, fh)
	else:
	current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x

	if self.smoothed_x is None:
	self.smoothed_x = current_center_x
	else:
	self.smoothed_x = (
	self.smoothed_x * (1 - self.smoothing)
	+ current_center_x * self.smoothing
	)

	left = int(self.smoothed_x - target_w // 2)
	left = max(0, min(left, w - target_w))
	return left, 0, left + target_w, h

	def apply_to_clip(self, clip):
	frame_skip = 5

	def filter_frame(get_frame, t):
	frame = get_frame(t)
	self.frame_count += 1
	if self.frame_count % frame_skip == 0 or self.last_coords is None:
	left, _, right, _ = self.get_crop_coordinates(frame)
	else:
	h, w = frame.shape[:2]
	target_w = int(h * self.output_size[0] / self.output_size[1])
	left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
	left = max(0, min(left, w - target_w))
	right = left + target_w
	return cv2.resize(frame[:, left:right], self.output_size)

	return clip.fl(filter_frame)


	# ─────────────────────────────────────────────────────────────────────────────
	# Base Style
	# ─────────────────────────────────────────────────────────────────────────────

	class BaseStyle(ABC):
	def __init__(self, output_size=Config.DEFAULT_SIZE):
	self.output_size = output_size

	@abstractmethod
	def apply(self, clip, **kwargs):
	pass

	def apply_with_captions(self, clip, transcript_data=None, language=None,
	caption_mode="sentence", caption_style="classic", **kwargs):
	styled_clip = self.apply(clip, **kwargs)
	if not transcript_data:
	return styled_clip

	caption_clips = self._create_caption_clips(
	transcript_data, language, caption_mode, caption_style
	)
	if not caption_clips:
	return styled_clip

	if isinstance(styled_clip, mpe.CompositeVideoClip):
	return mpe.CompositeVideoClip(
	list(styled_clip.clips) + caption_clips, size=self.output_size
	)
	return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)

	def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
	"""Kept for backward compatibility."""
	if not transcript_data:
	return clip
	return SubtitleManager.create_captions(
	clip, transcript_data, size=self.output_size,
	language=language, caption_mode=caption_mode,
	)

	def _create_caption_clips(self, transcript_data, language=None,
	caption_mode="sentence", caption_style="classic"):
	return SubtitleManager.create_caption_clips(
	transcript_data, size=self.output_size,
	language=language, caption_mode=caption_mode,
	caption_style=caption_style,
	)


	# ─────────────────────────────────────────────────────────────────────────────
	# Cinematic Style
	# ─────────────────────────────────────────────────────────────────────────────

	class CinematicStyle(BaseStyle):
	def apply(self, clip, background_path=None, **kwargs):
	if background_path and os.path.exists(background_path):
	ext = os.path.splitext(background_path)[1].lower()
	video_ext = {".mp4", ".avi", ".mov", ".mkv", ".webm"}
	if ext in video_ext:
	bg = _loop_or_cut(
	mpe.VideoFileClip(background_path).without_audio()
	.resize(height=self.output_size[1]),
	clip.duration,
	)
	else:
	bg = (
	mpe.ImageClip(background_path)
	.set_duration(clip.duration)
	.resize(height=self.output_size[1])
	)
	if bg.w > self.output_size[0]:
	bg = bg.crop(x_center=bg.w / 2, width=self.output_size[0])
	else:
	bg = bg.resize(width=self.output_size[0])
	else:
	bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)

	main = clip.resize(width=self.output_size[0]).set_position("center")
	if main.h > self.output_size[1]:
	main = clip.resize(height=self.output_size[1]).set_position("center")

	return mpe.CompositeVideoClip([bg, main], size=self.output_size)


	# ─────────────────────────────────────────────────────────────────────────────
	# Cinematic Blur Style
	# ─────────────────────────────────────────────────────────────────────────────

	class CinematicBlurStyle(BaseStyle):
	def apply(self, clip, **kwargs):
	bg = clip.resize(height=self.output_size[1])
	if bg.w < self.output_size[0]:
	bg = clip.resize(width=self.output_size[0])

	def make_blur(get_frame, t):
	frame = get_frame(t)
	small = cv2.resize(frame, (16, 16))
	blurred = cv2.resize(
	small, (self.output_size[0], self.output_size[1]),
	interpolation=cv2.INTER_LINEAR,
	)
	return cv2.GaussianBlur(blurred, (21, 21), 0)

	bg_blurred = bg.fl(make_blur).set_opacity(0.6)
	main = clip.resize(width=self.output_size[0]).set_position("center")
	if main.h > self.output_size[1]:
	main = clip.resize(height=self.output_size[1]).set_position("center")

	return mpe.CompositeVideoClip([bg_blurred, main], size=self.output_size)


	# ─────────────────────────────────────────────────────────────────────────────
	# Split Vertical (top / bottom, seamless gradient blend)
	# ─────────────────────────────────────────────────────────────────────────────

	class SplitVerticalStyle(BaseStyle):
	"""
	Splits the Shorts canvas (1080 × 1920) into top and bottom segments.

	Layout
	──────
	• Top segment : 58 % of canvas height → ~1114 px
	• Bottom segment: fills the rest → ~926 px
	• Blend zone : 120 px overlap where the two clips cross-fade via
	gradient masks — no hard dividing line visible.

	The gradient is very subtle (linear alpha), so it doesn't destroy
	content near the seam, it just dissolves one clip into the other.
	"""

	SPLIT_RATIO : float = 0.58 # top segment fraction of total height
	BLEND_PX : int = 120 # overlap / blend zone height in pixels

	def apply(self, clip, playground_path=None, **kwargs):
	W, H = self.output_size # 1080 × 1920
	blend = self.BLEND_PX
	h_top_seg = int(H * self.SPLIT_RATIO) # ~1114
	h_bot_seg = H - h_top_seg + blend # ~926 (includes overlap)

	# ── Prepare main clip for top segment ───────────────────────────────
	top_clip = _fit_to_width(clip, W)

	# Crop to the top portion we need (+ blend zone so gradient has room)
	top_h = min(top_clip.h, h_top_seg + blend // 2)
	top_clip = top_clip.crop(x1=0, y1=0, x2=W, y2=top_h).resize((W, h_top_seg))

	# Gradient: fade out the bottom `blend` rows → seamless merge
	top_mask = _make_vertical_mask(W, h_top_seg, blend_bottom=blend)
	top_clip = _apply_mask(top_clip, top_mask).set_position((0, 0))

	# ── Prepare playground / fallback clip for bottom segment ────────────
	if playground_path and os.path.exists(playground_path):
	bot_src = _loop_or_cut(
	mpe.VideoFileClip(playground_path).without_audio(), clip.duration
	)
	else:
	# Fallback: mirror/tint of the same source
	bot_src = clip.set_opacity(0.85)

	bot_clip = _fit_to_width(bot_src, W)

	# We want the middle/lower portion of the source for the bottom panel
	if bot_clip.h > h_bot_seg:
	y_start = max(0, bot_clip.h - h_bot_seg)
	bot_clip = bot_clip.crop(x1=0, y1=y_start,
	x2=W, y2=bot_clip.h)

	bot_clip = bot_clip.resize((W, h_bot_seg))

	# Gradient: fade in the top `blend` rows → seamless merge
	bot_mask = _make_vertical_mask(W, h_bot_seg, blend_top=blend)
	bot_y = h_top_seg - blend # overlaps by `blend` px
	bot_clip = _apply_mask(bot_clip, bot_mask).set_position((0, bot_y))

	return mpe.CompositeVideoClip([bot_clip, top_clip], size=self.output_size)


	# ─────────────────────────────────────────────────────────────────────────────
	# Split Horizontal (left / right, seamless gradient blend)
	# ─────────────────────────────────────────────────────────────────────────────

	class SplitHorizontalStyle(BaseStyle):
	"""
	Splits the Shorts canvas (1080 × 1920) into left and right panels.

	Layout
	──────
	• Each panel fills the full 1920 px height.
	• Left panel: 52 % of canvas width → ~562 px
	• Right panel: fills the rest → ~518 px
	• Blend zone : 80 px overlap with cross-fade gradient masks.

	Both panels are individually cropped to portrait aspect ratio
	(each showing a 540-wide slice of a 1080-wide source),
	then blended at the seam — no visible dividing line.
	"""

	SPLIT_RATIO : float = 0.52 # left panel fraction of total width
	BLEND_PX : int = 80 # horizontal overlap / blend zone

	def apply(self, clip, playground_path=None, **kwargs):
	W, H = self.output_size # 1080 × 1920
	blend = self.BLEND_PX
	w_left_seg = int(W * self.SPLIT_RATIO) # ~562
	w_right_seg = W - w_left_seg + blend # ~598 (includes overlap)

	# ── Left panel from main clip ────────────────────────────────────────
	left_src = _fit_to_height(clip, H)
	lw = left_src.w

	# Crop the left portion (slightly more than half for a natural look)
	crop_w_l = min(lw, w_left_seg + blend)
	left_clip = left_src.crop(x1=max(0, lw // 2 - crop_w_l),
	y1=0, x2=lw // 2, y2=H)
	left_clip = left_clip.resize((w_left_seg, H))

	# Gradient: fade out rightmost `blend` columns
	left_mask = _make_horizontal_mask(w_left_seg, H, blend_right=blend)
	left_clip = _apply_mask(left_clip, left_mask).set_position((0, 0))

	# ── Right panel from playground or fallback ───────────────────────────
	if playground_path and os.path.exists(playground_path):
	right_src = _loop_or_cut(
	mpe.VideoFileClip(playground_path).without_audio(), clip.duration
	)
	else:
	right_src = clip.set_opacity(0.85)

	right_full = _fit_to_height(right_src, H)
	rw = right_full.w

	# Crop the right portion of the source
	crop_w_r = min(rw, w_right_seg + blend)
	right_clip = right_full.crop(x1=rw // 2, y1=0,
	x2=rw // 2 + crop_w_r, y2=H)
	right_clip = right_clip.resize((w_right_seg, H))

	# Gradient: fade in leftmost `blend` columns
	right_mask = _make_horizontal_mask(w_right_seg, H, blend_left=blend)
	right_x = w_left_seg - blend # overlaps by `blend` px
	right_clip = _apply_mask(right_clip, right_mask).set_position((right_x, 0))

	return mpe.CompositeVideoClip([right_clip, left_clip], size=self.output_size)


	# ─────────────────────────────────────────────────────────────────────────────
	# Vertical Full Style
	# ─────────────────────────────────────────────────────────────────────────────

	class VerticalFullStyle(BaseStyle):
	def apply(self, clip, **kwargs):
	cropper = SmartFaceCropper(output_size=self.output_size)
	return cropper.apply_to_clip(clip)


	# ─────────────────────────────────────────────────────────────────────────────
	# Style Factory (unchanged API)
	# ─────────────────────────────────────────────────────────────────────────────

	class StyleFactory:
	_styles = {
	"cinematic": CinematicStyle,
	"cinematic_blur": CinematicBlurStyle,
	"split_vertical": SplitVerticalStyle,
	"split_horizontal": SplitHorizontalStyle,
	"vertical_full": VerticalFullStyle,
	}

	@staticmethod
	def get_style(style_name) -> BaseStyle:
	style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
	return style_class()