auto_cliper / core /styles.py
aliSaac510's picture
Fix: Ensure caption_style is passed correctly in styles.py, and secure firebase credentials.
834dd13
"""
Video Styles β€” YouTube Shorts Production Engine
SplitVertical & SplitHorizontal rebuilt with seamless gradient blending.
All class/method names kept identical for drop-in integration.
"""
from abc import ABC, abstractmethod
import os
import cv2
import numpy as np
import moviepy.editor as mpe
from .config import Config
from .logger import Logger
from .subtitle_manager import SubtitleManager
logger = Logger.get_logger(__name__)
# ─────────────────────────────────────────────────────────────────────────────
# Gradient Mask Helpers
# ─────────────────────────────────────────────────────────────────────────────
def _linear_gradient(length: int, fade_from_zero: bool) -> np.ndarray:
"""
Returns a 1-D float32 array [0..1] of given length.
fade_from_zero=True β†’ 0 β†’ 1 (clip fades IN at this edge)
fade_from_zero=False β†’ 1 β†’ 0 (clip fades OUT at this edge)
"""
arr = np.linspace(0.0, 1.0, length, dtype=np.float32)
return arr if fade_from_zero else arr[::-1]
def _make_vertical_mask(clip_w: int, clip_h: int,
blend_top: int = 0, blend_bottom: int = 0) -> np.ndarray:
"""
Float32 mask (clip_h Γ— clip_w) in [0,1].
blend_top β†’ pixels from top that fade in (0β†’1)
blend_bottom β†’ pixels from bottom that fade out (1β†’0)
"""
mask = np.ones((clip_h, clip_w), dtype=np.float32)
if blend_top > 0:
grad = _linear_gradient(blend_top, fade_from_zero=True)
mask[:blend_top, :] = grad[:, np.newaxis]
if blend_bottom > 0:
grad = _linear_gradient(blend_bottom, fade_from_zero=False)
mask[clip_h - blend_bottom:, :] = grad[:, np.newaxis]
return mask
def _make_horizontal_mask(clip_w: int, clip_h: int,
blend_left: int = 0, blend_right: int = 0) -> np.ndarray:
"""
Float32 mask (clip_h Γ— clip_w) in [0,1].
blend_left β†’ pixels from left that fade in (0β†’1)
blend_right β†’ pixels from right that fade out (1β†’0)
"""
mask = np.ones((clip_h, clip_w), dtype=np.float32)
if blend_left > 0:
grad = _linear_gradient(blend_left, fade_from_zero=True)
mask[:, :blend_left] = grad[np.newaxis, :]
if blend_right > 0:
grad = _linear_gradient(blend_right, fade_from_zero=False)
mask[:, clip_w - blend_right:] = grad[np.newaxis, :]
return mask
def _apply_mask(clip: mpe.VideoClip, mask_array: np.ndarray) -> mpe.VideoClip:
"""Attach a static float32 numpy mask to a video clip."""
mask_clip = mpe.ImageClip(mask_array, ismask=True, duration=clip.duration)
return clip.set_mask(mask_clip)
def _fit_to_width(clip: mpe.VideoClip, target_w: int) -> mpe.VideoClip:
"""Resize clip so width == target_w, keeping aspect ratio."""
return clip.resize(width=target_w)
def _fit_to_height(clip: mpe.VideoClip, target_h: int) -> mpe.VideoClip:
"""Resize clip so height == target_h, keeping aspect ratio."""
return clip.resize(height=target_h)
def _loop_or_cut(clip: mpe.VideoClip, duration: float) -> mpe.VideoClip:
if clip.duration < duration:
return clip.loop(duration=duration)
return clip.subclip(0, duration)
# ─────────────────────────────────────────────────────────────────────────────
# Smart Face Cropper
# ─────────────────────────────────────────────────────────────────────────────
class SmartFaceCropper:
def __init__(self, output_size=(1080, 1920)):
self.output_size = output_size
self.face_cascade = cv2.CascadeClassifier(
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
)
self.last_coords = None
self.smoothed_x = None
self.smoothing = 0.2
self.frame_count = 0
def get_crop_coordinates(self, frame):
h, w = frame.shape[:2]
target_w = int(h * self.output_size[0] / self.output_size[1])
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
small = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
faces = self.face_cascade.detectMultiScale(small, 1.1, 8, minSize=(50, 50))
if len(faces) > 0:
faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True)
fx, fy, fw, fh = [v * 2 for v in faces[0]]
current_center_x = fx + fw // 2
self.last_coords = (fx, fy, fw, fh)
else:
current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x
if self.smoothed_x is None:
self.smoothed_x = current_center_x
else:
self.smoothed_x = (
self.smoothed_x * (1 - self.smoothing)
+ current_center_x * self.smoothing
)
left = int(self.smoothed_x - target_w // 2)
left = max(0, min(left, w - target_w))
return left, 0, left + target_w, h
def apply_to_clip(self, clip):
frame_skip = 5
def filter_frame(get_frame, t):
frame = get_frame(t)
self.frame_count += 1
if self.frame_count % frame_skip == 0 or self.last_coords is None:
left, _, right, _ = self.get_crop_coordinates(frame)
else:
h, w = frame.shape[:2]
target_w = int(h * self.output_size[0] / self.output_size[1])
left = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
left = max(0, min(left, w - target_w))
right = left + target_w
return cv2.resize(frame[:, left:right], self.output_size)
return clip.fl(filter_frame)
# ─────────────────────────────────────────────────────────────────────────────
# Base Style
# ─────────────────────────────────────────────────────────────────────────────
class BaseStyle(ABC):
def __init__(self, output_size=Config.DEFAULT_SIZE):
self.output_size = output_size
@abstractmethod
def apply(self, clip, **kwargs):
pass
def apply_with_captions(self, clip, transcript_data=None, language=None,
caption_mode="sentence", caption_style="classic", **kwargs):
styled_clip = self.apply(clip, **kwargs)
if not transcript_data:
return styled_clip
caption_clips = self._create_caption_clips(
transcript_data, language, caption_mode, caption_style
)
if not caption_clips:
return styled_clip
if isinstance(styled_clip, mpe.CompositeVideoClip):
return mpe.CompositeVideoClip(
list(styled_clip.clips) + caption_clips, size=self.output_size
)
return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)
def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
"""Kept for backward compatibility."""
if not transcript_data:
return clip
return SubtitleManager.create_captions(
clip, transcript_data, size=self.output_size,
language=language, caption_mode=caption_mode,
)
def _create_caption_clips(self, transcript_data, language=None,
caption_mode="sentence", caption_style="classic"):
return SubtitleManager.create_caption_clips(
transcript_data, size=self.output_size,
language=language, caption_mode=caption_mode,
caption_style=caption_style,
)
# ─────────────────────────────────────────────────────────────────────────────
# Cinematic Style
# ─────────────────────────────────────────────────────────────────────────────
class CinematicStyle(BaseStyle):
def apply(self, clip, background_path=None, **kwargs):
if background_path and os.path.exists(background_path):
ext = os.path.splitext(background_path)[1].lower()
video_ext = {".mp4", ".avi", ".mov", ".mkv", ".webm"}
if ext in video_ext:
bg = _loop_or_cut(
mpe.VideoFileClip(background_path).without_audio()
.resize(height=self.output_size[1]),
clip.duration,
)
else:
bg = (
mpe.ImageClip(background_path)
.set_duration(clip.duration)
.resize(height=self.output_size[1])
)
if bg.w > self.output_size[0]:
bg = bg.crop(x_center=bg.w / 2, width=self.output_size[0])
else:
bg = bg.resize(width=self.output_size[0])
else:
bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)
main = clip.resize(width=self.output_size[0]).set_position("center")
if main.h > self.output_size[1]:
main = clip.resize(height=self.output_size[1]).set_position("center")
return mpe.CompositeVideoClip([bg, main], size=self.output_size)
# ─────────────────────────────────────────────────────────────────────────────
# Cinematic Blur Style
# ─────────────────────────────────────────────────────────────────────────────
class CinematicBlurStyle(BaseStyle):
def apply(self, clip, **kwargs):
bg = clip.resize(height=self.output_size[1])
if bg.w < self.output_size[0]:
bg = clip.resize(width=self.output_size[0])
def make_blur(get_frame, t):
frame = get_frame(t)
small = cv2.resize(frame, (16, 16))
blurred = cv2.resize(
small, (self.output_size[0], self.output_size[1]),
interpolation=cv2.INTER_LINEAR,
)
return cv2.GaussianBlur(blurred, (21, 21), 0)
bg_blurred = bg.fl(make_blur).set_opacity(0.6)
main = clip.resize(width=self.output_size[0]).set_position("center")
if main.h > self.output_size[1]:
main = clip.resize(height=self.output_size[1]).set_position("center")
return mpe.CompositeVideoClip([bg_blurred, main], size=self.output_size)
# ─────────────────────────────────────────────────────────────────────────────
# Split Vertical (top / bottom, seamless gradient blend)
# ─────────────────────────────────────────────────────────────────────────────
class SplitVerticalStyle(BaseStyle):
"""
Splits the Shorts canvas (1080 Γ— 1920) into top and bottom segments.
Layout
──────
β€’ Top segment : 58 % of canvas height β†’ ~1114 px
β€’ Bottom segment: fills the rest β†’ ~926 px
β€’ Blend zone : 120 px overlap where the two clips cross-fade via
gradient masks β€” no hard dividing line visible.
The gradient is very subtle (linear alpha), so it doesn't destroy
content near the seam, it just dissolves one clip into the other.
"""
SPLIT_RATIO : float = 0.58 # top segment fraction of total height
BLEND_PX : int = 120 # overlap / blend zone height in pixels
def apply(self, clip, playground_path=None, **kwargs):
W, H = self.output_size # 1080 Γ— 1920
blend = self.BLEND_PX
h_top_seg = int(H * self.SPLIT_RATIO) # ~1114
h_bot_seg = H - h_top_seg + blend # ~926 (includes overlap)
# ── Prepare main clip for top segment ───────────────────────────────
top_clip = _fit_to_width(clip, W)
# Crop to the top portion we need (+ blend zone so gradient has room)
top_h = min(top_clip.h, h_top_seg + blend // 2)
top_clip = top_clip.crop(x1=0, y1=0, x2=W, y2=top_h).resize((W, h_top_seg))
# Gradient: fade out the bottom `blend` rows β†’ seamless merge
top_mask = _make_vertical_mask(W, h_top_seg, blend_bottom=blend)
top_clip = _apply_mask(top_clip, top_mask).set_position((0, 0))
# ── Prepare playground / fallback clip for bottom segment ────────────
if playground_path and os.path.exists(playground_path):
bot_src = _loop_or_cut(
mpe.VideoFileClip(playground_path).without_audio(), clip.duration
)
else:
# Fallback: mirror/tint of the same source
bot_src = clip.set_opacity(0.85)
bot_clip = _fit_to_width(bot_src, W)
# We want the middle/lower portion of the source for the bottom panel
if bot_clip.h > h_bot_seg:
y_start = max(0, bot_clip.h - h_bot_seg)
bot_clip = bot_clip.crop(x1=0, y1=y_start,
x2=W, y2=bot_clip.h)
bot_clip = bot_clip.resize((W, h_bot_seg))
# Gradient: fade in the top `blend` rows β†’ seamless merge
bot_mask = _make_vertical_mask(W, h_bot_seg, blend_top=blend)
bot_y = h_top_seg - blend # overlaps by `blend` px
bot_clip = _apply_mask(bot_clip, bot_mask).set_position((0, bot_y))
return mpe.CompositeVideoClip([bot_clip, top_clip], size=self.output_size)
# ─────────────────────────────────────────────────────────────────────────────
# Split Horizontal (left / right, seamless gradient blend)
# ─────────────────────────────────────────────────────────────────────────────
class SplitHorizontalStyle(BaseStyle):
"""
Splits the Shorts canvas (1080 Γ— 1920) into left and right panels.
Layout
──────
β€’ Each panel fills the full 1920 px height.
β€’ Left panel: 52 % of canvas width β†’ ~562 px
β€’ Right panel: fills the rest β†’ ~518 px
β€’ Blend zone : 80 px overlap with cross-fade gradient masks.
Both panels are individually cropped to portrait aspect ratio
(each showing a 540-wide slice of a 1080-wide source),
then blended at the seam β€” no visible dividing line.
"""
SPLIT_RATIO : float = 0.52 # left panel fraction of total width
BLEND_PX : int = 80 # horizontal overlap / blend zone
def apply(self, clip, playground_path=None, **kwargs):
W, H = self.output_size # 1080 Γ— 1920
blend = self.BLEND_PX
w_left_seg = int(W * self.SPLIT_RATIO) # ~562
w_right_seg = W - w_left_seg + blend # ~598 (includes overlap)
# ── Left panel from main clip ────────────────────────────────────────
left_src = _fit_to_height(clip, H)
lw = left_src.w
# Crop the left portion (slightly more than half for a natural look)
crop_w_l = min(lw, w_left_seg + blend)
left_clip = left_src.crop(x1=max(0, lw // 2 - crop_w_l),
y1=0, x2=lw // 2, y2=H)
left_clip = left_clip.resize((w_left_seg, H))
# Gradient: fade out rightmost `blend` columns
left_mask = _make_horizontal_mask(w_left_seg, H, blend_right=blend)
left_clip = _apply_mask(left_clip, left_mask).set_position((0, 0))
# ── Right panel from playground or fallback ───────────────────────────
if playground_path and os.path.exists(playground_path):
right_src = _loop_or_cut(
mpe.VideoFileClip(playground_path).without_audio(), clip.duration
)
else:
right_src = clip.set_opacity(0.85)
right_full = _fit_to_height(right_src, H)
rw = right_full.w
# Crop the right portion of the source
crop_w_r = min(rw, w_right_seg + blend)
right_clip = right_full.crop(x1=rw // 2, y1=0,
x2=rw // 2 + crop_w_r, y2=H)
right_clip = right_clip.resize((w_right_seg, H))
# Gradient: fade in leftmost `blend` columns
right_mask = _make_horizontal_mask(w_right_seg, H, blend_left=blend)
right_x = w_left_seg - blend # overlaps by `blend` px
right_clip = _apply_mask(right_clip, right_mask).set_position((right_x, 0))
return mpe.CompositeVideoClip([right_clip, left_clip], size=self.output_size)
# ─────────────────────────────────────────────────────────────────────────────
# Vertical Full Style
# ─────────────────────────────────────────────────────────────────────────────
class VerticalFullStyle(BaseStyle):
def apply(self, clip, **kwargs):
cropper = SmartFaceCropper(output_size=self.output_size)
return cropper.apply_to_clip(clip)
# ─────────────────────────────────────────────────────────────────────────────
# Style Factory (unchanged API)
# ─────────────────────────────────────────────────────────────────────────────
class StyleFactory:
_styles = {
"cinematic": CinematicStyle,
"cinematic_blur": CinematicBlurStyle,
"split_vertical": SplitVerticalStyle,
"split_horizontal": SplitHorizontalStyle,
"vertical_full": VerticalFullStyle,
}
@staticmethod
def get_style(style_name) -> BaseStyle:
style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
return style_class()