Spaces:
Running
Running
Fix: Ensure caption_style is passed correctly in styles.py, and secure firebase credentials.
834dd13 | """ | |
| Video Styles β YouTube Shorts Production Engine | |
| SplitVertical & SplitHorizontal rebuilt with seamless gradient blending. | |
| All class/method names kept identical for drop-in integration. | |
| """ | |
| from abc import ABC, abstractmethod | |
| import os | |
| import cv2 | |
| import numpy as np | |
| import moviepy.editor as mpe | |
| from .config import Config | |
| from .logger import Logger | |
| from .subtitle_manager import SubtitleManager | |
| logger = Logger.get_logger(__name__) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Gradient Mask Helpers | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _linear_gradient(length: int, fade_from_zero: bool) -> np.ndarray: | |
| """ | |
| Returns a 1-D float32 array [0..1] of given length. | |
| fade_from_zero=True β 0 β 1 (clip fades IN at this edge) | |
| fade_from_zero=False β 1 β 0 (clip fades OUT at this edge) | |
| """ | |
| arr = np.linspace(0.0, 1.0, length, dtype=np.float32) | |
| return arr if fade_from_zero else arr[::-1] | |
| def _make_vertical_mask(clip_w: int, clip_h: int, | |
| blend_top: int = 0, blend_bottom: int = 0) -> np.ndarray: | |
| """ | |
| Float32 mask (clip_h Γ clip_w) in [0,1]. | |
| blend_top β pixels from top that fade in (0β1) | |
| blend_bottom β pixels from bottom that fade out (1β0) | |
| """ | |
| mask = np.ones((clip_h, clip_w), dtype=np.float32) | |
| if blend_top > 0: | |
| grad = _linear_gradient(blend_top, fade_from_zero=True) | |
| mask[:blend_top, :] = grad[:, np.newaxis] | |
| if blend_bottom > 0: | |
| grad = _linear_gradient(blend_bottom, fade_from_zero=False) | |
| mask[clip_h - blend_bottom:, :] = grad[:, np.newaxis] | |
| return mask | |
| def _make_horizontal_mask(clip_w: int, clip_h: int, | |
| blend_left: int = 0, blend_right: int = 0) -> np.ndarray: | |
| """ | |
| Float32 mask (clip_h Γ clip_w) in [0,1]. | |
| blend_left β pixels from left that fade in (0β1) | |
| blend_right β pixels from right that fade out (1β0) | |
| """ | |
| mask = np.ones((clip_h, clip_w), dtype=np.float32) | |
| if blend_left > 0: | |
| grad = _linear_gradient(blend_left, fade_from_zero=True) | |
| mask[:, :blend_left] = grad[np.newaxis, :] | |
| if blend_right > 0: | |
| grad = _linear_gradient(blend_right, fade_from_zero=False) | |
| mask[:, clip_w - blend_right:] = grad[np.newaxis, :] | |
| return mask | |
| def _apply_mask(clip: mpe.VideoClip, mask_array: np.ndarray) -> mpe.VideoClip: | |
| """Attach a static float32 numpy mask to a video clip.""" | |
| mask_clip = mpe.ImageClip(mask_array, ismask=True, duration=clip.duration) | |
| return clip.set_mask(mask_clip) | |
| def _fit_to_width(clip: mpe.VideoClip, target_w: int) -> mpe.VideoClip: | |
| """Resize clip so width == target_w, keeping aspect ratio.""" | |
| return clip.resize(width=target_w) | |
| def _fit_to_height(clip: mpe.VideoClip, target_h: int) -> mpe.VideoClip: | |
| """Resize clip so height == target_h, keeping aspect ratio.""" | |
| return clip.resize(height=target_h) | |
| def _loop_or_cut(clip: mpe.VideoClip, duration: float) -> mpe.VideoClip: | |
| if clip.duration < duration: | |
| return clip.loop(duration=duration) | |
| return clip.subclip(0, duration) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Smart Face Cropper | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class SmartFaceCropper: | |
| def __init__(self, output_size=(1080, 1920)): | |
| self.output_size = output_size | |
| self.face_cascade = cv2.CascadeClassifier( | |
| cv2.data.haarcascades + "haarcascade_frontalface_default.xml" | |
| ) | |
| self.last_coords = None | |
| self.smoothed_x = None | |
| self.smoothing = 0.2 | |
| self.frame_count = 0 | |
| def get_crop_coordinates(self, frame): | |
| h, w = frame.shape[:2] | |
| target_w = int(h * self.output_size[0] / self.output_size[1]) | |
| gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
| small = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5) | |
| faces = self.face_cascade.detectMultiScale(small, 1.1, 8, minSize=(50, 50)) | |
| if len(faces) > 0: | |
| faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True) | |
| fx, fy, fw, fh = [v * 2 for v in faces[0]] | |
| current_center_x = fx + fw // 2 | |
| self.last_coords = (fx, fy, fw, fh) | |
| else: | |
| current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x | |
| if self.smoothed_x is None: | |
| self.smoothed_x = current_center_x | |
| else: | |
| self.smoothed_x = ( | |
| self.smoothed_x * (1 - self.smoothing) | |
| + current_center_x * self.smoothing | |
| ) | |
| left = int(self.smoothed_x - target_w // 2) | |
| left = max(0, min(left, w - target_w)) | |
| return left, 0, left + target_w, h | |
| def apply_to_clip(self, clip): | |
| frame_skip = 5 | |
| def filter_frame(get_frame, t): | |
| frame = get_frame(t) | |
| self.frame_count += 1 | |
| if self.frame_count % frame_skip == 0 or self.last_coords is None: | |
| left, _, right, _ = self.get_crop_coordinates(frame) | |
| else: | |
| h, w = frame.shape[:2] | |
| target_w = int(h * self.output_size[0] / self.output_size[1]) | |
| left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2 | |
| left = max(0, min(left, w - target_w)) | |
| right = left + target_w | |
| return cv2.resize(frame[:, left:right], self.output_size) | |
| return clip.fl(filter_frame) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Base Style | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class BaseStyle(ABC): | |
| def __init__(self, output_size=Config.DEFAULT_SIZE): | |
| self.output_size = output_size | |
| def apply(self, clip, **kwargs): | |
| pass | |
| def apply_with_captions(self, clip, transcript_data=None, language=None, | |
| caption_mode="sentence", caption_style="classic", **kwargs): | |
| styled_clip = self.apply(clip, **kwargs) | |
| if not transcript_data: | |
| return styled_clip | |
| caption_clips = self._create_caption_clips( | |
| transcript_data, language, caption_mode, caption_style | |
| ) | |
| if not caption_clips: | |
| return styled_clip | |
| if isinstance(styled_clip, mpe.CompositeVideoClip): | |
| return mpe.CompositeVideoClip( | |
| list(styled_clip.clips) + caption_clips, size=self.output_size | |
| ) | |
| return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size) | |
| def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"): | |
| """Kept for backward compatibility.""" | |
| if not transcript_data: | |
| return clip | |
| return SubtitleManager.create_captions( | |
| clip, transcript_data, size=self.output_size, | |
| language=language, caption_mode=caption_mode, | |
| ) | |
| def _create_caption_clips(self, transcript_data, language=None, | |
| caption_mode="sentence", caption_style="classic"): | |
| return SubtitleManager.create_caption_clips( | |
| transcript_data, size=self.output_size, | |
| language=language, caption_mode=caption_mode, | |
| caption_style=caption_style, | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Cinematic Style | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class CinematicStyle(BaseStyle): | |
| def apply(self, clip, background_path=None, **kwargs): | |
| if background_path and os.path.exists(background_path): | |
| ext = os.path.splitext(background_path)[1].lower() | |
| video_ext = {".mp4", ".avi", ".mov", ".mkv", ".webm"} | |
| if ext in video_ext: | |
| bg = _loop_or_cut( | |
| mpe.VideoFileClip(background_path).without_audio() | |
| .resize(height=self.output_size[1]), | |
| clip.duration, | |
| ) | |
| else: | |
| bg = ( | |
| mpe.ImageClip(background_path) | |
| .set_duration(clip.duration) | |
| .resize(height=self.output_size[1]) | |
| ) | |
| if bg.w > self.output_size[0]: | |
| bg = bg.crop(x_center=bg.w / 2, width=self.output_size[0]) | |
| else: | |
| bg = bg.resize(width=self.output_size[0]) | |
| else: | |
| bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration) | |
| main = clip.resize(width=self.output_size[0]).set_position("center") | |
| if main.h > self.output_size[1]: | |
| main = clip.resize(height=self.output_size[1]).set_position("center") | |
| return mpe.CompositeVideoClip([bg, main], size=self.output_size) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Cinematic Blur Style | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class CinematicBlurStyle(BaseStyle): | |
| def apply(self, clip, **kwargs): | |
| bg = clip.resize(height=self.output_size[1]) | |
| if bg.w < self.output_size[0]: | |
| bg = clip.resize(width=self.output_size[0]) | |
| def make_blur(get_frame, t): | |
| frame = get_frame(t) | |
| small = cv2.resize(frame, (16, 16)) | |
| blurred = cv2.resize( | |
| small, (self.output_size[0], self.output_size[1]), | |
| interpolation=cv2.INTER_LINEAR, | |
| ) | |
| return cv2.GaussianBlur(blurred, (21, 21), 0) | |
| bg_blurred = bg.fl(make_blur).set_opacity(0.6) | |
| main = clip.resize(width=self.output_size[0]).set_position("center") | |
| if main.h > self.output_size[1]: | |
| main = clip.resize(height=self.output_size[1]).set_position("center") | |
| return mpe.CompositeVideoClip([bg_blurred, main], size=self.output_size) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Split Vertical (top / bottom, seamless gradient blend) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class SplitVerticalStyle(BaseStyle): | |
| """ | |
| Splits the Shorts canvas (1080 Γ 1920) into top and bottom segments. | |
| Layout | |
| ββββββ | |
| β’ Top segment : 58 % of canvas height β ~1114 px | |
| β’ Bottom segment: fills the rest β ~926 px | |
| β’ Blend zone : 120 px overlap where the two clips cross-fade via | |
| gradient masks β no hard dividing line visible. | |
| The gradient is very subtle (linear alpha), so it doesn't destroy | |
| content near the seam, it just dissolves one clip into the other. | |
| """ | |
| SPLIT_RATIO : float = 0.58 # top segment fraction of total height | |
| BLEND_PX : int = 120 # overlap / blend zone height in pixels | |
| def apply(self, clip, playground_path=None, **kwargs): | |
| W, H = self.output_size # 1080 Γ 1920 | |
| blend = self.BLEND_PX | |
| h_top_seg = int(H * self.SPLIT_RATIO) # ~1114 | |
| h_bot_seg = H - h_top_seg + blend # ~926 (includes overlap) | |
| # ββ Prepare main clip for top segment βββββββββββββββββββββββββββββββ | |
| top_clip = _fit_to_width(clip, W) | |
| # Crop to the top portion we need (+ blend zone so gradient has room) | |
| top_h = min(top_clip.h, h_top_seg + blend // 2) | |
| top_clip = top_clip.crop(x1=0, y1=0, x2=W, y2=top_h).resize((W, h_top_seg)) | |
| # Gradient: fade out the bottom `blend` rows β seamless merge | |
| top_mask = _make_vertical_mask(W, h_top_seg, blend_bottom=blend) | |
| top_clip = _apply_mask(top_clip, top_mask).set_position((0, 0)) | |
| # ββ Prepare playground / fallback clip for bottom segment ββββββββββββ | |
| if playground_path and os.path.exists(playground_path): | |
| bot_src = _loop_or_cut( | |
| mpe.VideoFileClip(playground_path).without_audio(), clip.duration | |
| ) | |
| else: | |
| # Fallback: mirror/tint of the same source | |
| bot_src = clip.set_opacity(0.85) | |
| bot_clip = _fit_to_width(bot_src, W) | |
| # We want the middle/lower portion of the source for the bottom panel | |
| if bot_clip.h > h_bot_seg: | |
| y_start = max(0, bot_clip.h - h_bot_seg) | |
| bot_clip = bot_clip.crop(x1=0, y1=y_start, | |
| x2=W, y2=bot_clip.h) | |
| bot_clip = bot_clip.resize((W, h_bot_seg)) | |
| # Gradient: fade in the top `blend` rows β seamless merge | |
| bot_mask = _make_vertical_mask(W, h_bot_seg, blend_top=blend) | |
| bot_y = h_top_seg - blend # overlaps by `blend` px | |
| bot_clip = _apply_mask(bot_clip, bot_mask).set_position((0, bot_y)) | |
| return mpe.CompositeVideoClip([bot_clip, top_clip], size=self.output_size) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Split Horizontal (left / right, seamless gradient blend) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class SplitHorizontalStyle(BaseStyle): | |
| """ | |
| Splits the Shorts canvas (1080 Γ 1920) into left and right panels. | |
| Layout | |
| ββββββ | |
| β’ Each panel fills the full 1920 px height. | |
| β’ Left panel: 52 % of canvas width β ~562 px | |
| β’ Right panel: fills the rest β ~518 px | |
| β’ Blend zone : 80 px overlap with cross-fade gradient masks. | |
| Both panels are individually cropped to portrait aspect ratio | |
| (each showing a 540-wide slice of a 1080-wide source), | |
| then blended at the seam β no visible dividing line. | |
| """ | |
| SPLIT_RATIO : float = 0.52 # left panel fraction of total width | |
| BLEND_PX : int = 80 # horizontal overlap / blend zone | |
| def apply(self, clip, playground_path=None, **kwargs): | |
| W, H = self.output_size # 1080 Γ 1920 | |
| blend = self.BLEND_PX | |
| w_left_seg = int(W * self.SPLIT_RATIO) # ~562 | |
| w_right_seg = W - w_left_seg + blend # ~598 (includes overlap) | |
| # ββ Left panel from main clip ββββββββββββββββββββββββββββββββββββββββ | |
| left_src = _fit_to_height(clip, H) | |
| lw = left_src.w | |
| # Crop the left portion (slightly more than half for a natural look) | |
| crop_w_l = min(lw, w_left_seg + blend) | |
| left_clip = left_src.crop(x1=max(0, lw // 2 - crop_w_l), | |
| y1=0, x2=lw // 2, y2=H) | |
| left_clip = left_clip.resize((w_left_seg, H)) | |
| # Gradient: fade out rightmost `blend` columns | |
| left_mask = _make_horizontal_mask(w_left_seg, H, blend_right=blend) | |
| left_clip = _apply_mask(left_clip, left_mask).set_position((0, 0)) | |
| # ββ Right panel from playground or fallback βββββββββββββββββββββββββββ | |
| if playground_path and os.path.exists(playground_path): | |
| right_src = _loop_or_cut( | |
| mpe.VideoFileClip(playground_path).without_audio(), clip.duration | |
| ) | |
| else: | |
| right_src = clip.set_opacity(0.85) | |
| right_full = _fit_to_height(right_src, H) | |
| rw = right_full.w | |
| # Crop the right portion of the source | |
| crop_w_r = min(rw, w_right_seg + blend) | |
| right_clip = right_full.crop(x1=rw // 2, y1=0, | |
| x2=rw // 2 + crop_w_r, y2=H) | |
| right_clip = right_clip.resize((w_right_seg, H)) | |
| # Gradient: fade in leftmost `blend` columns | |
| right_mask = _make_horizontal_mask(w_right_seg, H, blend_left=blend) | |
| right_x = w_left_seg - blend # overlaps by `blend` px | |
| right_clip = _apply_mask(right_clip, right_mask).set_position((right_x, 0)) | |
| return mpe.CompositeVideoClip([right_clip, left_clip], size=self.output_size) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Vertical Full Style | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class VerticalFullStyle(BaseStyle): | |
| def apply(self, clip, **kwargs): | |
| cropper = SmartFaceCropper(output_size=self.output_size) | |
| return cropper.apply_to_clip(clip) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Style Factory (unchanged API) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class StyleFactory: | |
| _styles = { | |
| "cinematic": CinematicStyle, | |
| "cinematic_blur": CinematicBlurStyle, | |
| "split_vertical": SplitVerticalStyle, | |
| "split_horizontal": SplitHorizontalStyle, | |
| "vertical_full": VerticalFullStyle, | |
| } | |
| def get_style(style_name) -> BaseStyle: | |
| style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle) | |
| return style_class() |