Spaces:

ex510
/

auto_cliper

Sleeping

File size: 20,034 Bytes

"""
Video Styles — YouTube Shorts Production Engine
SplitVertical & SplitHorizontal rebuilt with seamless gradient blending.
All class/method names kept identical for drop-in integration.
"""
from abc import ABC, abstractmethod
import os
import cv2
import numpy as np
import moviepy.editor as mpe
from .config import Config
from .logger import Logger
from .subtitle_manager import SubtitleManager

logger = Logger.get_logger(__name__)


# ─────────────────────────────────────────────────────────────────────────────
# Gradient Mask Helpers
# ─────────────────────────────────────────────────────────────────────────────

def _linear_gradient(length: int, fade_from_zero: bool) -> np.ndarray:
    """
    Returns a 1-D float32 array [0..1] of given length.
    fade_from_zero=True  → 0 → 1  (clip fades IN at this edge)
    fade_from_zero=False → 1 → 0  (clip fades OUT at this edge)
    """
    arr = np.linspace(0.0, 1.0, length, dtype=np.float32)
    return arr if fade_from_zero else arr[::-1]


def _make_vertical_mask(clip_w: int, clip_h: int,
                        blend_top: int = 0, blend_bottom: int = 0) -> np.ndarray:
    """
    Float32 mask (clip_h × clip_w) in [0,1].
    blend_top    → pixels from top that fade in  (0→1)
    blend_bottom → pixels from bottom that fade out (1→0)
    """
    mask = np.ones((clip_h, clip_w), dtype=np.float32)
    if blend_top > 0:
        grad = _linear_gradient(blend_top, fade_from_zero=True)
        mask[:blend_top, :] = grad[:, np.newaxis]
    if blend_bottom > 0:
        grad = _linear_gradient(blend_bottom, fade_from_zero=False)
        mask[clip_h - blend_bottom:, :] = grad[:, np.newaxis]
    return mask


def _make_horizontal_mask(clip_w: int, clip_h: int,
                          blend_left: int = 0, blend_right: int = 0) -> np.ndarray:
    """
    Float32 mask (clip_h × clip_w) in [0,1].
    blend_left  → pixels from left  that fade in  (0→1)
    blend_right → pixels from right that fade out (1→0)
    """
    mask = np.ones((clip_h, clip_w), dtype=np.float32)
    if blend_left > 0:
        grad = _linear_gradient(blend_left, fade_from_zero=True)
        mask[:, :blend_left] = grad[np.newaxis, :]
    if blend_right > 0:
        grad = _linear_gradient(blend_right, fade_from_zero=False)
        mask[:, clip_w - blend_right:] = grad[np.newaxis, :]
    return mask


def _apply_mask(clip: mpe.VideoClip, mask_array: np.ndarray) -> mpe.VideoClip:
    """Attach a static float32 numpy mask to a video clip."""
    mask_clip = mpe.ImageClip(mask_array, ismask=True, duration=clip.duration)
    return clip.set_mask(mask_clip)


def _fit_to_width(clip: mpe.VideoClip, target_w: int) -> mpe.VideoClip:
    """Resize clip so width == target_w, keeping aspect ratio."""
    return clip.resize(width=target_w)


def _fit_to_height(clip: mpe.VideoClip, target_h: int) -> mpe.VideoClip:
    """Resize clip so height == target_h, keeping aspect ratio."""
    return clip.resize(height=target_h)


def _loop_or_cut(clip: mpe.VideoClip, duration: float) -> mpe.VideoClip:
    if clip.duration < duration:
        return clip.loop(duration=duration)
    return clip.subclip(0, duration)


# ─────────────────────────────────────────────────────────────────────────────
# Smart Face Cropper
# ─────────────────────────────────────────────────────────────────────────────

class SmartFaceCropper:
    def __init__(self, output_size=(1080, 1920)):
        self.output_size = output_size
        self.face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
        )
        self.last_coords  = None
        self.smoothed_x   = None
        self.smoothing    = 0.2
        self.frame_count  = 0

    def get_crop_coordinates(self, frame):
        h, w    = frame.shape[:2]
        target_w = int(h * self.output_size[0] / self.output_size[1])
        gray     = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        small    = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
        faces    = self.face_cascade.detectMultiScale(small, 1.1, 8, minSize=(50, 50))

        if len(faces) > 0:
            faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True)
            fx, fy, fw, fh  = [v * 2 for v in faces[0]]
            current_center_x = fx + fw // 2
            self.last_coords  = (fx, fy, fw, fh)
        else:
            current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x

        if self.smoothed_x is None:
            self.smoothed_x = current_center_x
        else:
            self.smoothed_x = (
                self.smoothed_x * (1 - self.smoothing)
                + current_center_x * self.smoothing
            )

        left = int(self.smoothed_x - target_w // 2)
        left = max(0, min(left, w - target_w))
        return left, 0, left + target_w, h

    def apply_to_clip(self, clip):
        frame_skip = 5

        def filter_frame(get_frame, t):
            frame = get_frame(t)
            self.frame_count += 1
            if self.frame_count % frame_skip == 0 or self.last_coords is None:
                left, _, right, _ = self.get_crop_coordinates(frame)
            else:
                h, w     = frame.shape[:2]
                target_w = int(h * self.output_size[0] / self.output_size[1])
                left     = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
                left     = max(0, min(left, w - target_w))
                right    = left + target_w
            return cv2.resize(frame[:, left:right], self.output_size)

        return clip.fl(filter_frame)


# ─────────────────────────────────────────────────────────────────────────────
# Base Style
# ─────────────────────────────────────────────────────────────────────────────

class BaseStyle(ABC):
    def __init__(self, output_size=Config.DEFAULT_SIZE):
        self.output_size = output_size

    @abstractmethod
    def apply(self, clip, **kwargs):
        pass

    def apply_with_captions(self, clip, transcript_data=None, language=None,
                            caption_mode="sentence", caption_style="classic", **kwargs):
        styled_clip = self.apply(clip, **kwargs)
        if not transcript_data:
            return styled_clip

        caption_clips = self._create_caption_clips(
            transcript_data, language, caption_mode, caption_style
        )
        if not caption_clips:
            return styled_clip

        if isinstance(styled_clip, mpe.CompositeVideoClip):
            return mpe.CompositeVideoClip(
                list(styled_clip.clips) + caption_clips, size=self.output_size
            )
        return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)

    def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
        """Kept for backward compatibility."""
        if not transcript_data:
            return clip
        return SubtitleManager.create_captions(
            clip, transcript_data, size=self.output_size,
            language=language, caption_mode=caption_mode,
        )

    def _create_caption_clips(self, transcript_data, language=None, 
                              caption_mode="sentence", caption_style="classic"):
        return SubtitleManager.create_caption_clips(
            transcript_data, size=self.output_size,
            language=language, caption_mode=caption_mode,
            caption_style=caption_style,
        )


# ─────────────────────────────────────────────────────────────────────────────
# Cinematic Style
# ─────────────────────────────────────────────────────────────────────────────

class CinematicStyle(BaseStyle):
    def apply(self, clip, background_path=None, **kwargs):
        if background_path and os.path.exists(background_path):
            ext = os.path.splitext(background_path)[1].lower()
            video_ext = {".mp4", ".avi", ".mov", ".mkv", ".webm"}
            if ext in video_ext:
                bg = _loop_or_cut(
                    mpe.VideoFileClip(background_path).without_audio()
                    .resize(height=self.output_size[1]),
                    clip.duration,
                )
            else:
                bg = (
                    mpe.ImageClip(background_path)
                    .set_duration(clip.duration)
                    .resize(height=self.output_size[1])
                )
            if bg.w > self.output_size[0]:
                bg = bg.crop(x_center=bg.w / 2, width=self.output_size[0])
            else:
                bg = bg.resize(width=self.output_size[0])
        else:
            bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)

        main = clip.resize(width=self.output_size[0]).set_position("center")
        if main.h > self.output_size[1]:
            main = clip.resize(height=self.output_size[1]).set_position("center")

        return mpe.CompositeVideoClip([bg, main], size=self.output_size)


# ─────────────────────────────────────────────────────────────────────────────
# Cinematic Blur Style
# ─────────────────────────────────────────────────────────────────────────────

class CinematicBlurStyle(BaseStyle):
    def apply(self, clip, **kwargs):
        bg = clip.resize(height=self.output_size[1])
        if bg.w < self.output_size[0]:
            bg = clip.resize(width=self.output_size[0])

        def make_blur(get_frame, t):
            frame   = get_frame(t)
            small   = cv2.resize(frame, (16, 16))
            blurred = cv2.resize(
                small, (self.output_size[0], self.output_size[1]),
                interpolation=cv2.INTER_LINEAR,
            )
            return cv2.GaussianBlur(blurred, (21, 21), 0)

        bg_blurred = bg.fl(make_blur).set_opacity(0.6)
        main = clip.resize(width=self.output_size[0]).set_position("center")
        if main.h > self.output_size[1]:
            main = clip.resize(height=self.output_size[1]).set_position("center")

        return mpe.CompositeVideoClip([bg_blurred, main], size=self.output_size)


# ─────────────────────────────────────────────────────────────────────────────
# Split Vertical  (top / bottom, seamless gradient blend)
# ─────────────────────────────────────────────────────────────────────────────

class SplitVerticalStyle(BaseStyle):
    """
    Splits the Shorts canvas (1080 × 1920) into top and bottom segments.

    Layout
    ──────
    • Top segment  : 58 % of canvas height  → ~1114 px
    • Bottom segment: fills the rest        → ~926 px
    • Blend zone   : 120 px overlap where the two clips cross-fade via
                     gradient masks — no hard dividing line visible.

    The gradient is very subtle (linear alpha), so it doesn't destroy
    content near the seam, it just dissolves one clip into the other.
    """

    SPLIT_RATIO  : float = 0.58   # top segment fraction of total height
    BLEND_PX     : int   = 120    # overlap / blend zone height in pixels

    def apply(self, clip, playground_path=None, **kwargs):
        W, H       = self.output_size          # 1080 × 1920
        blend      = self.BLEND_PX
        h_top_seg  = int(H * self.SPLIT_RATIO)            # ~1114
        h_bot_seg  = H - h_top_seg + blend                # ~926 (includes overlap)

        # ── Prepare main clip for top segment ───────────────────────────────
        top_clip = _fit_to_width(clip, W)

        # Crop to the top portion we need (+ blend zone so gradient has room)
        top_h = min(top_clip.h, h_top_seg + blend // 2)
        top_clip = top_clip.crop(x1=0, y1=0, x2=W, y2=top_h).resize((W, h_top_seg))

        # Gradient: fade out the bottom `blend` rows → seamless merge
        top_mask = _make_vertical_mask(W, h_top_seg, blend_bottom=blend)
        top_clip = _apply_mask(top_clip, top_mask).set_position((0, 0))

        # ── Prepare playground / fallback clip for bottom segment ────────────
        if playground_path and os.path.exists(playground_path):
            bot_src = _loop_or_cut(
                mpe.VideoFileClip(playground_path).without_audio(), clip.duration
            )
        else:
            # Fallback: mirror/tint of the same source
            bot_src = clip.set_opacity(0.85)

        bot_clip = _fit_to_width(bot_src, W)

        # We want the middle/lower portion of the source for the bottom panel
        if bot_clip.h > h_bot_seg:
            y_start = max(0, bot_clip.h - h_bot_seg)
            bot_clip = bot_clip.crop(x1=0, y1=y_start,
                                     x2=W, y2=bot_clip.h)

        bot_clip = bot_clip.resize((W, h_bot_seg))

        # Gradient: fade in the top `blend` rows → seamless merge
        bot_mask = _make_vertical_mask(W, h_bot_seg, blend_top=blend)
        bot_y    = h_top_seg - blend                      # overlaps by `blend` px
        bot_clip = _apply_mask(bot_clip, bot_mask).set_position((0, bot_y))

        return mpe.CompositeVideoClip([bot_clip, top_clip], size=self.output_size)


# ─────────────────────────────────────────────────────────────────────────────
# Split Horizontal  (left / right, seamless gradient blend)
# ─────────────────────────────────────────────────────────────────────────────

class SplitHorizontalStyle(BaseStyle):
    """
    Splits the Shorts canvas (1080 × 1920) into left and right panels.

    Layout
    ──────
    • Each panel fills the full 1920 px height.
    • Left  panel: 52 % of canvas width → ~562 px
    • Right panel: fills the rest       → ~518 px
    • Blend zone : 80 px overlap with cross-fade gradient masks.

    Both panels are individually cropped to portrait aspect ratio
    (each showing a 540-wide slice of a 1080-wide source),
    then blended at the seam — no visible dividing line.
    """

    SPLIT_RATIO : float = 0.52   # left panel fraction of total width
    BLEND_PX    : int   = 80     # horizontal overlap / blend zone

    def apply(self, clip, playground_path=None, **kwargs):
        W, H      = self.output_size          # 1080 × 1920
        blend     = self.BLEND_PX
        w_left_seg = int(W * self.SPLIT_RATIO)             # ~562
        w_right_seg = W - w_left_seg + blend               # ~598 (includes overlap)

        # ── Left panel from main clip ────────────────────────────────────────
        left_src  = _fit_to_height(clip, H)
        lw        = left_src.w

        # Crop the left portion (slightly more than half for a natural look)
        crop_w_l  = min(lw, w_left_seg + blend)
        left_clip = left_src.crop(x1=max(0, lw // 2 - crop_w_l),
                                  y1=0, x2=lw // 2, y2=H)
        left_clip = left_clip.resize((w_left_seg, H))

        # Gradient: fade out rightmost `blend` columns
        left_mask = _make_horizontal_mask(w_left_seg, H, blend_right=blend)
        left_clip = _apply_mask(left_clip, left_mask).set_position((0, 0))

        # ── Right panel from playground or fallback ───────────────────────────
        if playground_path and os.path.exists(playground_path):
            right_src = _loop_or_cut(
                mpe.VideoFileClip(playground_path).without_audio(), clip.duration
            )
        else:
            right_src = clip.set_opacity(0.85)

        right_full = _fit_to_height(right_src, H)
        rw         = right_full.w

        # Crop the right portion of the source
        crop_w_r   = min(rw, w_right_seg + blend)
        right_clip = right_full.crop(x1=rw // 2, y1=0,
                                     x2=rw // 2 + crop_w_r, y2=H)
        right_clip = right_clip.resize((w_right_seg, H))

        # Gradient: fade in leftmost `blend` columns
        right_mask = _make_horizontal_mask(w_right_seg, H, blend_left=blend)
        right_x    = w_left_seg - blend                    # overlaps by `blend` px
        right_clip = _apply_mask(right_clip, right_mask).set_position((right_x, 0))

        return mpe.CompositeVideoClip([right_clip, left_clip], size=self.output_size)


# ─────────────────────────────────────────────────────────────────────────────
# Vertical Full Style
# ─────────────────────────────────────────────────────────────────────────────

class VerticalFullStyle(BaseStyle):
    def apply(self, clip, **kwargs):
        cropper = SmartFaceCropper(output_size=self.output_size)
        return cropper.apply_to_clip(clip)


# ─────────────────────────────────────────────────────────────────────────────
# Style Factory  (unchanged API)
# ─────────────────────────────────────────────────────────────────────────────

class StyleFactory:
    _styles = {
        "cinematic":        CinematicStyle,
        "cinematic_blur":   CinematicBlurStyle,
        "split_vertical":   SplitVerticalStyle,
        "split_horizontal": SplitHorizontalStyle,
        "vertical_full":    VerticalFullStyle,
    }

    @staticmethod
    def get_style(style_name) -> BaseStyle:
        style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
        return style_class()