""" Video Styles — YouTube Shorts Production Engine SplitVertical & SplitHorizontal rebuilt with seamless gradient blending. All class/method names kept identical for drop-in integration. """ from abc import ABC, abstractmethod import os import cv2 import numpy as np import moviepy.editor as mpe from .config import Config from .logger import Logger from .subtitle_manager import SubtitleManager logger = Logger.get_logger(__name__) # ───────────────────────────────────────────────────────────────────────────── # Gradient Mask Helpers # ───────────────────────────────────────────────────────────────────────────── def _linear_gradient(length: int, fade_from_zero: bool) -> np.ndarray: """ Returns a 1-D float32 array [0..1] of given length. fade_from_zero=True → 0 → 1 (clip fades IN at this edge) fade_from_zero=False → 1 → 0 (clip fades OUT at this edge) """ arr = np.linspace(0.0, 1.0, length, dtype=np.float32) return arr if fade_from_zero else arr[::-1] def _make_vertical_mask(clip_w: int, clip_h: int, blend_top: int = 0, blend_bottom: int = 0) -> np.ndarray: """ Float32 mask (clip_h × clip_w) in [0,1]. blend_top → pixels from top that fade in (0→1) blend_bottom → pixels from bottom that fade out (1→0) """ mask = np.ones((clip_h, clip_w), dtype=np.float32) if blend_top > 0: grad = _linear_gradient(blend_top, fade_from_zero=True) mask[:blend_top, :] = grad[:, np.newaxis] if blend_bottom > 0: grad = _linear_gradient(blend_bottom, fade_from_zero=False) mask[clip_h - blend_bottom:, :] = grad[:, np.newaxis] return mask def _make_horizontal_mask(clip_w: int, clip_h: int, blend_left: int = 0, blend_right: int = 0) -> np.ndarray: """ Float32 mask (clip_h × clip_w) in [0,1]. blend_left → pixels from left that fade in (0→1) blend_right → pixels from right that fade out (1→0) """ mask = np.ones((clip_h, clip_w), dtype=np.float32) if blend_left > 0: grad = _linear_gradient(blend_left, fade_from_zero=True) mask[:, :blend_left] = grad[np.newaxis, :] if blend_right > 0: grad = _linear_gradient(blend_right, fade_from_zero=False) mask[:, clip_w - blend_right:] = grad[np.newaxis, :] return mask def _apply_mask(clip: mpe.VideoClip, mask_array: np.ndarray) -> mpe.VideoClip: """Attach a static float32 numpy mask to a video clip.""" mask_clip = mpe.ImageClip(mask_array, ismask=True, duration=clip.duration) return clip.set_mask(mask_clip) def _fit_to_width(clip: mpe.VideoClip, target_w: int) -> mpe.VideoClip: """Resize clip so width == target_w, keeping aspect ratio.""" return clip.resize(width=target_w) def _fit_to_height(clip: mpe.VideoClip, target_h: int) -> mpe.VideoClip: """Resize clip so height == target_h, keeping aspect ratio.""" return clip.resize(height=target_h) def _loop_or_cut(clip: mpe.VideoClip, duration: float) -> mpe.VideoClip: if clip.duration < duration: return clip.loop(duration=duration) return clip.subclip(0, duration) # ───────────────────────────────────────────────────────────────────────────── # Smart Face Cropper # ───────────────────────────────────────────────────────────────────────────── class SmartFaceCropper: def __init__(self, output_size=(1080, 1920)): self.output_size = output_size self.face_cascade = cv2.CascadeClassifier( cv2.data.haarcascades + "haarcascade_frontalface_default.xml" ) self.last_coords = None self.smoothed_x = None self.smoothing = 0.2 self.frame_count = 0 def get_crop_coordinates(self, frame): h, w = frame.shape[:2] target_w = int(h * self.output_size[0] / self.output_size[1]) gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) small = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5) faces = self.face_cascade.detectMultiScale(small, 1.1, 8, minSize=(50, 50)) if len(faces) > 0: faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True) fx, fy, fw, fh = [v * 2 for v in faces[0]] current_center_x = fx + fw // 2 self.last_coords = (fx, fy, fw, fh) else: current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x if self.smoothed_x is None: self.smoothed_x = current_center_x else: self.smoothed_x = ( self.smoothed_x * (1 - self.smoothing) + current_center_x * self.smoothing ) left = int(self.smoothed_x - target_w // 2) left = max(0, min(left, w - target_w)) return left, 0, left + target_w, h def apply_to_clip(self, clip): frame_skip = 5 def filter_frame(get_frame, t): frame = get_frame(t) self.frame_count += 1 if self.frame_count % frame_skip == 0 or self.last_coords is None: left, _, right, _ = self.get_crop_coordinates(frame) else: h, w = frame.shape[:2] target_w = int(h * self.output_size[0] / self.output_size[1]) left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2 left = max(0, min(left, w - target_w)) right = left + target_w return cv2.resize(frame[:, left:right], self.output_size) return clip.fl(filter_frame) # ───────────────────────────────────────────────────────────────────────────── # Base Style # ───────────────────────────────────────────────────────────────────────────── class BaseStyle(ABC): def __init__(self, output_size=Config.DEFAULT_SIZE): self.output_size = output_size @abstractmethod def apply(self, clip, **kwargs): pass def apply_with_captions(self, clip, transcript_data=None, language=None, caption_mode="sentence", caption_style="classic", **kwargs): styled_clip = self.apply(clip, **kwargs) if not transcript_data: return styled_clip caption_clips = self._create_caption_clips( transcript_data, language, caption_mode, caption_style ) if not caption_clips: return styled_clip if isinstance(styled_clip, mpe.CompositeVideoClip): return mpe.CompositeVideoClip( list(styled_clip.clips) + caption_clips, size=self.output_size ) return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size) def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"): """Kept for backward compatibility.""" if not transcript_data: return clip return SubtitleManager.create_captions( clip, transcript_data, size=self.output_size, language=language, caption_mode=caption_mode, ) def _create_caption_clips(self, transcript_data, language=None, caption_mode="sentence", caption_style="classic"): return SubtitleManager.create_caption_clips( transcript_data, size=self.output_size, language=language, caption_mode=caption_mode, caption_style=caption_style, ) # ───────────────────────────────────────────────────────────────────────────── # Cinematic Style # ───────────────────────────────────────────────────────────────────────────── class CinematicStyle(BaseStyle): def apply(self, clip, background_path=None, **kwargs): if background_path and os.path.exists(background_path): ext = os.path.splitext(background_path)[1].lower() video_ext = {".mp4", ".avi", ".mov", ".mkv", ".webm"} if ext in video_ext: bg = _loop_or_cut( mpe.VideoFileClip(background_path).without_audio() .resize(height=self.output_size[1]), clip.duration, ) else: bg = ( mpe.ImageClip(background_path) .set_duration(clip.duration) .resize(height=self.output_size[1]) ) if bg.w > self.output_size[0]: bg = bg.crop(x_center=bg.w / 2, width=self.output_size[0]) else: bg = bg.resize(width=self.output_size[0]) else: bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration) main = clip.resize(width=self.output_size[0]).set_position("center") if main.h > self.output_size[1]: main = clip.resize(height=self.output_size[1]).set_position("center") return mpe.CompositeVideoClip([bg, main], size=self.output_size) # ───────────────────────────────────────────────────────────────────────────── # Cinematic Blur Style # ───────────────────────────────────────────────────────────────────────────── class CinematicBlurStyle(BaseStyle): def apply(self, clip, **kwargs): bg = clip.resize(height=self.output_size[1]) if bg.w < self.output_size[0]: bg = clip.resize(width=self.output_size[0]) def make_blur(get_frame, t): frame = get_frame(t) small = cv2.resize(frame, (16, 16)) blurred = cv2.resize( small, (self.output_size[0], self.output_size[1]), interpolation=cv2.INTER_LINEAR, ) return cv2.GaussianBlur(blurred, (21, 21), 0) bg_blurred = bg.fl(make_blur).set_opacity(0.6) main = clip.resize(width=self.output_size[0]).set_position("center") if main.h > self.output_size[1]: main = clip.resize(height=self.output_size[1]).set_position("center") return mpe.CompositeVideoClip([bg_blurred, main], size=self.output_size) # ───────────────────────────────────────────────────────────────────────────── # Split Vertical (top / bottom, seamless gradient blend) # ───────────────────────────────────────────────────────────────────────────── class SplitVerticalStyle(BaseStyle): """ Splits the Shorts canvas (1080 × 1920) into top and bottom segments. Layout ────── • Top segment : 58 % of canvas height → ~1114 px • Bottom segment: fills the rest → ~926 px • Blend zone : 120 px overlap where the two clips cross-fade via gradient masks — no hard dividing line visible. The gradient is very subtle (linear alpha), so it doesn't destroy content near the seam, it just dissolves one clip into the other. """ SPLIT_RATIO : float = 0.58 # top segment fraction of total height BLEND_PX : int = 120 # overlap / blend zone height in pixels def apply(self, clip, playground_path=None, **kwargs): W, H = self.output_size # 1080 × 1920 blend = self.BLEND_PX h_top_seg = int(H * self.SPLIT_RATIO) # ~1114 h_bot_seg = H - h_top_seg + blend # ~926 (includes overlap) # ── Prepare main clip for top segment ─────────────────────────────── top_clip = _fit_to_width(clip, W) # Crop to the top portion we need (+ blend zone so gradient has room) top_h = min(top_clip.h, h_top_seg + blend // 2) top_clip = top_clip.crop(x1=0, y1=0, x2=W, y2=top_h).resize((W, h_top_seg)) # Gradient: fade out the bottom `blend` rows → seamless merge top_mask = _make_vertical_mask(W, h_top_seg, blend_bottom=blend) top_clip = _apply_mask(top_clip, top_mask).set_position((0, 0)) # ── Prepare playground / fallback clip for bottom segment ──────────── if playground_path and os.path.exists(playground_path): bot_src = _loop_or_cut( mpe.VideoFileClip(playground_path).without_audio(), clip.duration ) else: # Fallback: mirror/tint of the same source bot_src = clip.set_opacity(0.85) bot_clip = _fit_to_width(bot_src, W) # We want the middle/lower portion of the source for the bottom panel if bot_clip.h > h_bot_seg: y_start = max(0, bot_clip.h - h_bot_seg) bot_clip = bot_clip.crop(x1=0, y1=y_start, x2=W, y2=bot_clip.h) bot_clip = bot_clip.resize((W, h_bot_seg)) # Gradient: fade in the top `blend` rows → seamless merge bot_mask = _make_vertical_mask(W, h_bot_seg, blend_top=blend) bot_y = h_top_seg - blend # overlaps by `blend` px bot_clip = _apply_mask(bot_clip, bot_mask).set_position((0, bot_y)) return mpe.CompositeVideoClip([bot_clip, top_clip], size=self.output_size) # ───────────────────────────────────────────────────────────────────────────── # Split Horizontal (left / right, seamless gradient blend) # ───────────────────────────────────────────────────────────────────────────── class SplitHorizontalStyle(BaseStyle): """ Splits the Shorts canvas (1080 × 1920) into left and right panels. Layout ────── • Each panel fills the full 1920 px height. • Left panel: 52 % of canvas width → ~562 px • Right panel: fills the rest → ~518 px • Blend zone : 80 px overlap with cross-fade gradient masks. Both panels are individually cropped to portrait aspect ratio (each showing a 540-wide slice of a 1080-wide source), then blended at the seam — no visible dividing line. """ SPLIT_RATIO : float = 0.52 # left panel fraction of total width BLEND_PX : int = 80 # horizontal overlap / blend zone def apply(self, clip, playground_path=None, **kwargs): W, H = self.output_size # 1080 × 1920 blend = self.BLEND_PX w_left_seg = int(W * self.SPLIT_RATIO) # ~562 w_right_seg = W - w_left_seg + blend # ~598 (includes overlap) # ── Left panel from main clip ──────────────────────────────────────── left_src = _fit_to_height(clip, H) lw = left_src.w # Crop the left portion (slightly more than half for a natural look) crop_w_l = min(lw, w_left_seg + blend) left_clip = left_src.crop(x1=max(0, lw // 2 - crop_w_l), y1=0, x2=lw // 2, y2=H) left_clip = left_clip.resize((w_left_seg, H)) # Gradient: fade out rightmost `blend` columns left_mask = _make_horizontal_mask(w_left_seg, H, blend_right=blend) left_clip = _apply_mask(left_clip, left_mask).set_position((0, 0)) # ── Right panel from playground or fallback ─────────────────────────── if playground_path and os.path.exists(playground_path): right_src = _loop_or_cut( mpe.VideoFileClip(playground_path).without_audio(), clip.duration ) else: right_src = clip.set_opacity(0.85) right_full = _fit_to_height(right_src, H) rw = right_full.w # Crop the right portion of the source crop_w_r = min(rw, w_right_seg + blend) right_clip = right_full.crop(x1=rw // 2, y1=0, x2=rw // 2 + crop_w_r, y2=H) right_clip = right_clip.resize((w_right_seg, H)) # Gradient: fade in leftmost `blend` columns right_mask = _make_horizontal_mask(w_right_seg, H, blend_left=blend) right_x = w_left_seg - blend # overlaps by `blend` px right_clip = _apply_mask(right_clip, right_mask).set_position((right_x, 0)) return mpe.CompositeVideoClip([right_clip, left_clip], size=self.output_size) # ───────────────────────────────────────────────────────────────────────────── # Vertical Full Style # ───────────────────────────────────────────────────────────────────────────── class VerticalFullStyle(BaseStyle): def apply(self, clip, **kwargs): cropper = SmartFaceCropper(output_size=self.output_size) return cropper.apply_to_clip(clip) # ───────────────────────────────────────────────────────────────────────────── # Style Factory (unchanged API) # ───────────────────────────────────────────────────────────────────────────── class StyleFactory: _styles = { "cinematic": CinematicStyle, "cinematic_blur": CinematicBlurStyle, "split_vertical": SplitVerticalStyle, "split_horizontal": SplitHorizontalStyle, "vertical_full": VerticalFullStyle, } @staticmethod def get_style(style_name) -> BaseStyle: style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle) return style_class()