File size: 20,034 Bytes
0faf659
 
 
 
 
342e0fb
 
 
0faf659
342e0fb
 
 
 
 
 
 
0faf659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342e0fb
 
 
0faf659
 
 
 
 
 
 
342e0fb
 
0faf659
342e0fb
0faf659
 
 
342e0fb
 
0faf659
 
342e0fb
0faf659
342e0fb
 
 
 
 
 
0faf659
 
 
 
342e0fb
 
 
 
 
 
 
0faf659
342e0fb
 
 
 
0faf659
342e0fb
0faf659
342e0fb
0faf659
 
 
 
 
342e0fb
 
0faf659
 
 
 
 
342e0fb
 
 
 
 
 
 
0faf659
 
834dd13
342e0fb
 
 
 
834dd13
 
 
342e0fb
 
 
 
0faf659
 
 
 
342e0fb
 
0faf659
342e0fb
 
 
0faf659
 
342e0fb
0faf659
834dd13
 
342e0fb
0faf659
 
834dd13
342e0fb
 
0faf659
 
 
 
 
342e0fb
 
 
 
0faf659
 
 
 
 
 
 
342e0fb
0faf659
 
 
 
 
342e0fb
0faf659
342e0fb
 
 
 
0faf659
 
 
 
 
 
 
 
 
 
 
342e0fb
 
 
 
 
 
0faf659
342e0fb
0faf659
 
 
 
 
 
 
 
342e0fb
0faf659
 
 
 
 
 
 
 
 
 
342e0fb
 
0faf659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342e0fb
0faf659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342e0fb
0faf659
 
 
342e0fb
0faf659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342e0fb
 
0faf659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342e0fb
0faf659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342e0fb
0faf659
 
 
342e0fb
0faf659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342e0fb
 
 
 
 
 
0faf659
 
 
 
 
342e0fb
 
0faf659
 
 
342e0fb
0faf659
342e0fb
 
 
 
 
0faf659
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
"""
Video Styles β€” YouTube Shorts Production Engine
SplitVertical & SplitHorizontal rebuilt with seamless gradient blending.
All class/method names kept identical for drop-in integration.
"""
from abc import ABC, abstractmethod
import os
import cv2
import numpy as np
import moviepy.editor as mpe
from .config import Config
from .logger import Logger
from .subtitle_manager import SubtitleManager

logger = Logger.get_logger(__name__)


# ─────────────────────────────────────────────────────────────────────────────
# Gradient Mask Helpers
# ─────────────────────────────────────────────────────────────────────────────

def _linear_gradient(length: int, fade_from_zero: bool) -> np.ndarray:
    """
    Returns a 1-D float32 array [0..1] of given length.
    fade_from_zero=True  β†’ 0 β†’ 1  (clip fades IN at this edge)
    fade_from_zero=False β†’ 1 β†’ 0  (clip fades OUT at this edge)
    """
    arr = np.linspace(0.0, 1.0, length, dtype=np.float32)
    return arr if fade_from_zero else arr[::-1]


def _make_vertical_mask(clip_w: int, clip_h: int,
                        blend_top: int = 0, blend_bottom: int = 0) -> np.ndarray:
    """
    Float32 mask (clip_h Γ— clip_w) in [0,1].
    blend_top    β†’ pixels from top that fade in  (0β†’1)
    blend_bottom β†’ pixels from bottom that fade out (1β†’0)
    """
    mask = np.ones((clip_h, clip_w), dtype=np.float32)
    if blend_top > 0:
        grad = _linear_gradient(blend_top, fade_from_zero=True)
        mask[:blend_top, :] = grad[:, np.newaxis]
    if blend_bottom > 0:
        grad = _linear_gradient(blend_bottom, fade_from_zero=False)
        mask[clip_h - blend_bottom:, :] = grad[:, np.newaxis]
    return mask


def _make_horizontal_mask(clip_w: int, clip_h: int,
                          blend_left: int = 0, blend_right: int = 0) -> np.ndarray:
    """
    Float32 mask (clip_h Γ— clip_w) in [0,1].
    blend_left  β†’ pixels from left  that fade in  (0β†’1)
    blend_right β†’ pixels from right that fade out (1β†’0)
    """
    mask = np.ones((clip_h, clip_w), dtype=np.float32)
    if blend_left > 0:
        grad = _linear_gradient(blend_left, fade_from_zero=True)
        mask[:, :blend_left] = grad[np.newaxis, :]
    if blend_right > 0:
        grad = _linear_gradient(blend_right, fade_from_zero=False)
        mask[:, clip_w - blend_right:] = grad[np.newaxis, :]
    return mask


def _apply_mask(clip: mpe.VideoClip, mask_array: np.ndarray) -> mpe.VideoClip:
    """Attach a static float32 numpy mask to a video clip."""
    mask_clip = mpe.ImageClip(mask_array, ismask=True, duration=clip.duration)
    return clip.set_mask(mask_clip)


def _fit_to_width(clip: mpe.VideoClip, target_w: int) -> mpe.VideoClip:
    """Resize clip so width == target_w, keeping aspect ratio."""
    return clip.resize(width=target_w)


def _fit_to_height(clip: mpe.VideoClip, target_h: int) -> mpe.VideoClip:
    """Resize clip so height == target_h, keeping aspect ratio."""
    return clip.resize(height=target_h)


def _loop_or_cut(clip: mpe.VideoClip, duration: float) -> mpe.VideoClip:
    if clip.duration < duration:
        return clip.loop(duration=duration)
    return clip.subclip(0, duration)


# ─────────────────────────────────────────────────────────────────────────────
# Smart Face Cropper
# ─────────────────────────────────────────────────────────────────────────────

class SmartFaceCropper:
    def __init__(self, output_size=(1080, 1920)):
        self.output_size = output_size
        self.face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
        )
        self.last_coords  = None
        self.smoothed_x   = None
        self.smoothing    = 0.2
        self.frame_count  = 0

    def get_crop_coordinates(self, frame):
        h, w    = frame.shape[:2]
        target_w = int(h * self.output_size[0] / self.output_size[1])
        gray     = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        small    = cv2.resize(gray, (0, 0), fx=0.5, fy=0.5)
        faces    = self.face_cascade.detectMultiScale(small, 1.1, 8, minSize=(50, 50))

        if len(faces) > 0:
            faces = sorted(faces, key=lambda f: f[2] * f[3], reverse=True)
            fx, fy, fw, fh  = [v * 2 for v in faces[0]]
            current_center_x = fx + fw // 2
            self.last_coords  = (fx, fy, fw, fh)
        else:
            current_center_x = w // 2 if self.smoothed_x is None else self.smoothed_x

        if self.smoothed_x is None:
            self.smoothed_x = current_center_x
        else:
            self.smoothed_x = (
                self.smoothed_x * (1 - self.smoothing)
                + current_center_x * self.smoothing
            )

        left = int(self.smoothed_x - target_w // 2)
        left = max(0, min(left, w - target_w))
        return left, 0, left + target_w, h

    def apply_to_clip(self, clip):
        frame_skip = 5

        def filter_frame(get_frame, t):
            frame = get_frame(t)
            self.frame_count += 1
            if self.frame_count % frame_skip == 0 or self.last_coords is None:
                left, _, right, _ = self.get_crop_coordinates(frame)
            else:
                h, w     = frame.shape[:2]
                target_w = int(h * self.output_size[0] / self.output_size[1])
                left     = int(self.smoothed_x - target_w // 2) if self.smoothed_x else w // 2 - target_w // 2
                left     = max(0, min(left, w - target_w))
                right    = left + target_w
            return cv2.resize(frame[:, left:right], self.output_size)

        return clip.fl(filter_frame)


# ─────────────────────────────────────────────────────────────────────────────
# Base Style
# ─────────────────────────────────────────────────────────────────────────────

class BaseStyle(ABC):
    def __init__(self, output_size=Config.DEFAULT_SIZE):
        self.output_size = output_size

    @abstractmethod
    def apply(self, clip, **kwargs):
        pass

    def apply_with_captions(self, clip, transcript_data=None, language=None,
                            caption_mode="sentence", caption_style="classic", **kwargs):
        styled_clip = self.apply(clip, **kwargs)
        if not transcript_data:
            return styled_clip

        caption_clips = self._create_caption_clips(
            transcript_data, language, caption_mode, caption_style
        )
        if not caption_clips:
            return styled_clip

        if isinstance(styled_clip, mpe.CompositeVideoClip):
            return mpe.CompositeVideoClip(
                list(styled_clip.clips) + caption_clips, size=self.output_size
            )
        return mpe.CompositeVideoClip([styled_clip] + caption_clips, size=self.output_size)

    def add_captions(self, clip, transcript_data, language=None, caption_mode="sentence"):
        """Kept for backward compatibility."""
        if not transcript_data:
            return clip
        return SubtitleManager.create_captions(
            clip, transcript_data, size=self.output_size,
            language=language, caption_mode=caption_mode,
        )

    def _create_caption_clips(self, transcript_data, language=None, 
                              caption_mode="sentence", caption_style="classic"):
        return SubtitleManager.create_caption_clips(
            transcript_data, size=self.output_size,
            language=language, caption_mode=caption_mode,
            caption_style=caption_style,
        )


# ─────────────────────────────────────────────────────────────────────────────
# Cinematic Style
# ─────────────────────────────────────────────────────────────────────────────

class CinematicStyle(BaseStyle):
    def apply(self, clip, background_path=None, **kwargs):
        if background_path and os.path.exists(background_path):
            ext = os.path.splitext(background_path)[1].lower()
            video_ext = {".mp4", ".avi", ".mov", ".mkv", ".webm"}
            if ext in video_ext:
                bg = _loop_or_cut(
                    mpe.VideoFileClip(background_path).without_audio()
                    .resize(height=self.output_size[1]),
                    clip.duration,
                )
            else:
                bg = (
                    mpe.ImageClip(background_path)
                    .set_duration(clip.duration)
                    .resize(height=self.output_size[1])
                )
            if bg.w > self.output_size[0]:
                bg = bg.crop(x_center=bg.w / 2, width=self.output_size[0])
            else:
                bg = bg.resize(width=self.output_size[0])
        else:
            bg = mpe.ColorClip(size=self.output_size, color=(0, 0, 0)).set_duration(clip.duration)

        main = clip.resize(width=self.output_size[0]).set_position("center")
        if main.h > self.output_size[1]:
            main = clip.resize(height=self.output_size[1]).set_position("center")

        return mpe.CompositeVideoClip([bg, main], size=self.output_size)


# ─────────────────────────────────────────────────────────────────────────────
# Cinematic Blur Style
# ─────────────────────────────────────────────────────────────────────────────

class CinematicBlurStyle(BaseStyle):
    def apply(self, clip, **kwargs):
        bg = clip.resize(height=self.output_size[1])
        if bg.w < self.output_size[0]:
            bg = clip.resize(width=self.output_size[0])

        def make_blur(get_frame, t):
            frame   = get_frame(t)
            small   = cv2.resize(frame, (16, 16))
            blurred = cv2.resize(
                small, (self.output_size[0], self.output_size[1]),
                interpolation=cv2.INTER_LINEAR,
            )
            return cv2.GaussianBlur(blurred, (21, 21), 0)

        bg_blurred = bg.fl(make_blur).set_opacity(0.6)
        main = clip.resize(width=self.output_size[0]).set_position("center")
        if main.h > self.output_size[1]:
            main = clip.resize(height=self.output_size[1]).set_position("center")

        return mpe.CompositeVideoClip([bg_blurred, main], size=self.output_size)


# ─────────────────────────────────────────────────────────────────────────────
# Split Vertical  (top / bottom, seamless gradient blend)
# ─────────────────────────────────────────────────────────────────────────────

class SplitVerticalStyle(BaseStyle):
    """
    Splits the Shorts canvas (1080 Γ— 1920) into top and bottom segments.

    Layout
    ──────
    β€’ Top segment  : 58 % of canvas height  β†’ ~1114 px
    β€’ Bottom segment: fills the rest        β†’ ~926 px
    β€’ Blend zone   : 120 px overlap where the two clips cross-fade via
                     gradient masks β€” no hard dividing line visible.

    The gradient is very subtle (linear alpha), so it doesn't destroy
    content near the seam, it just dissolves one clip into the other.
    """

    SPLIT_RATIO  : float = 0.58   # top segment fraction of total height
    BLEND_PX     : int   = 120    # overlap / blend zone height in pixels

    def apply(self, clip, playground_path=None, **kwargs):
        W, H       = self.output_size          # 1080 Γ— 1920
        blend      = self.BLEND_PX
        h_top_seg  = int(H * self.SPLIT_RATIO)            # ~1114
        h_bot_seg  = H - h_top_seg + blend                # ~926 (includes overlap)

        # ── Prepare main clip for top segment ───────────────────────────────
        top_clip = _fit_to_width(clip, W)

        # Crop to the top portion we need (+ blend zone so gradient has room)
        top_h = min(top_clip.h, h_top_seg + blend // 2)
        top_clip = top_clip.crop(x1=0, y1=0, x2=W, y2=top_h).resize((W, h_top_seg))

        # Gradient: fade out the bottom `blend` rows β†’ seamless merge
        top_mask = _make_vertical_mask(W, h_top_seg, blend_bottom=blend)
        top_clip = _apply_mask(top_clip, top_mask).set_position((0, 0))

        # ── Prepare playground / fallback clip for bottom segment ────────────
        if playground_path and os.path.exists(playground_path):
            bot_src = _loop_or_cut(
                mpe.VideoFileClip(playground_path).without_audio(), clip.duration
            )
        else:
            # Fallback: mirror/tint of the same source
            bot_src = clip.set_opacity(0.85)

        bot_clip = _fit_to_width(bot_src, W)

        # We want the middle/lower portion of the source for the bottom panel
        if bot_clip.h > h_bot_seg:
            y_start = max(0, bot_clip.h - h_bot_seg)
            bot_clip = bot_clip.crop(x1=0, y1=y_start,
                                     x2=W, y2=bot_clip.h)

        bot_clip = bot_clip.resize((W, h_bot_seg))

        # Gradient: fade in the top `blend` rows β†’ seamless merge
        bot_mask = _make_vertical_mask(W, h_bot_seg, blend_top=blend)
        bot_y    = h_top_seg - blend                      # overlaps by `blend` px
        bot_clip = _apply_mask(bot_clip, bot_mask).set_position((0, bot_y))

        return mpe.CompositeVideoClip([bot_clip, top_clip], size=self.output_size)


# ─────────────────────────────────────────────────────────────────────────────
# Split Horizontal  (left / right, seamless gradient blend)
# ─────────────────────────────────────────────────────────────────────────────

class SplitHorizontalStyle(BaseStyle):
    """
    Splits the Shorts canvas (1080 Γ— 1920) into left and right panels.

    Layout
    ──────
    β€’ Each panel fills the full 1920 px height.
    β€’ Left  panel: 52 % of canvas width β†’ ~562 px
    β€’ Right panel: fills the rest       β†’ ~518 px
    β€’ Blend zone : 80 px overlap with cross-fade gradient masks.

    Both panels are individually cropped to portrait aspect ratio
    (each showing a 540-wide slice of a 1080-wide source),
    then blended at the seam β€” no visible dividing line.
    """

    SPLIT_RATIO : float = 0.52   # left panel fraction of total width
    BLEND_PX    : int   = 80     # horizontal overlap / blend zone

    def apply(self, clip, playground_path=None, **kwargs):
        W, H      = self.output_size          # 1080 Γ— 1920
        blend     = self.BLEND_PX
        w_left_seg = int(W * self.SPLIT_RATIO)             # ~562
        w_right_seg = W - w_left_seg + blend               # ~598 (includes overlap)

        # ── Left panel from main clip ────────────────────────────────────────
        left_src  = _fit_to_height(clip, H)
        lw        = left_src.w

        # Crop the left portion (slightly more than half for a natural look)
        crop_w_l  = min(lw, w_left_seg + blend)
        left_clip = left_src.crop(x1=max(0, lw // 2 - crop_w_l),
                                  y1=0, x2=lw // 2, y2=H)
        left_clip = left_clip.resize((w_left_seg, H))

        # Gradient: fade out rightmost `blend` columns
        left_mask = _make_horizontal_mask(w_left_seg, H, blend_right=blend)
        left_clip = _apply_mask(left_clip, left_mask).set_position((0, 0))

        # ── Right panel from playground or fallback ───────────────────────────
        if playground_path and os.path.exists(playground_path):
            right_src = _loop_or_cut(
                mpe.VideoFileClip(playground_path).without_audio(), clip.duration
            )
        else:
            right_src = clip.set_opacity(0.85)

        right_full = _fit_to_height(right_src, H)
        rw         = right_full.w

        # Crop the right portion of the source
        crop_w_r   = min(rw, w_right_seg + blend)
        right_clip = right_full.crop(x1=rw // 2, y1=0,
                                     x2=rw // 2 + crop_w_r, y2=H)
        right_clip = right_clip.resize((w_right_seg, H))

        # Gradient: fade in leftmost `blend` columns
        right_mask = _make_horizontal_mask(w_right_seg, H, blend_left=blend)
        right_x    = w_left_seg - blend                    # overlaps by `blend` px
        right_clip = _apply_mask(right_clip, right_mask).set_position((right_x, 0))

        return mpe.CompositeVideoClip([right_clip, left_clip], size=self.output_size)


# ─────────────────────────────────────────────────────────────────────────────
# Vertical Full Style
# ─────────────────────────────────────────────────────────────────────────────

class VerticalFullStyle(BaseStyle):
    def apply(self, clip, **kwargs):
        cropper = SmartFaceCropper(output_size=self.output_size)
        return cropper.apply_to_clip(clip)


# ─────────────────────────────────────────────────────────────────────────────
# Style Factory  (unchanged API)
# ─────────────────────────────────────────────────────────────────────────────

class StyleFactory:
    _styles = {
        "cinematic":        CinematicStyle,
        "cinematic_blur":   CinematicBlurStyle,
        "split_vertical":   SplitVerticalStyle,
        "split_horizontal": SplitHorizontalStyle,
        "vertical_full":    VerticalFullStyle,
    }

    @staticmethod
    def get_style(style_name) -> BaseStyle:
        style_class = StyleFactory._styles.get(style_name, CinematicBlurStyle)
        return style_class()