File size: 13,472 Bytes
0c8f7e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
import string
from typing import List, Dict, Tuple
from loguru import logger

from typing import Dict, List


class Caption:
    def is_punctuation(self, text):
        return text in string.punctuation

    def create_subtitle_segments_english(
        self, captions: List[Dict], max_length=80, lines=2
    ):
        """
        Breaks up the captions into segments of max_length characters
        on two lines and merge punctuation with the last word
        """

        if not captions:
            return []

        segments = []
        current_segment_texts = ["" for _ in range(lines)]
        current_line = 0
        segment_start_ts = captions[0]["start_ts"]
        segment_end_ts = captions[0]["end_ts"]

        for caption in captions:
            text = caption["text"]
            start_ts = caption["start_ts"]
            end_ts = caption["end_ts"]

            # Update the segment end timestamp
            segment_end_ts = end_ts

            # If the caption is a punctuation, merge it with the current line
            if self.is_punctuation(text):
                if current_line < lines and current_segment_texts[current_line]:
                    current_segment_texts[current_line] += text
                continue

            # If the line is too long, move to the next one
            if (
                current_line < lines
                and len(current_segment_texts[current_line] + text) > max_length
            ):
                current_line += 1

            # If we've filled all lines, save the current segment and start a new one
            if current_line >= lines:
                segments.append(
                    {
                        "text": current_segment_texts,
                        "start_ts": segment_start_ts,
                        "end_ts": segment_end_ts,
                    }
                )

                # Reset for next segment
                current_segment_texts = ["" for _ in range(lines)]
                current_line = 0
                # Add a small gap (0.05s) between segments to prevent overlap
                segment_start_ts = start_ts + 0.05

            # Add the text to the current segment
            if current_line < lines:
                current_segment_texts[current_line] += (
                    " " if current_segment_texts[current_line] else ""
                )
                current_segment_texts[current_line] += text

        # Add the last segment if there's any content
        if any(current_segment_texts):
            segments.append(
                {
                    "text": current_segment_texts,
                    "start_ts": segment_start_ts,
                    "end_ts": segment_end_ts,
                }
            )

        # Post-processing to ensure no overlaps by adjusting end times if needed
        for i in range(len(segments) - 1):
            if segments[i]["end_ts"] >= segments[i + 1]["start_ts"]:
                segments[i]["end_ts"] = segments[i + 1]["start_ts"] - 0.05

        return segments

    def create_subtitle_segments_international(
        self, captions: List[Dict], max_length=80, lines=2
    ):
        """
        Breaks up international captions (full sentences) into smaller segments that fit
        within max_length characters per line, with proper timing distribution.

        Handles both space-delimited languages like English and character-based languages like Chinese.

        Args:
            captions: List of caption dictionaries with text, start_ts, and end_ts
            max_length: Maximum number of characters per line
            lines: Number of lines per segment

        Returns:
            List of subtitle segments
        """
        if not captions:
            return []

        segments = []

        for caption in captions:
            text = caption["text"].strip()
            start_ts = caption["start_ts"]
            end_ts = caption["end_ts"]
            duration = end_ts - start_ts

            # Check if text is using Chinese/Japanese/Korean characters (CJK)
            # For CJK, we'll split by characters rather than words
            is_cjk = any("\u4e00" <= char <= "\u9fff" for char in text)

            parts = []
            if is_cjk:
                # For CJK languages, process character by character
                current_part = ""
                for char in text:
                    if len(current_part + char) > max_length:
                        parts.append(current_part)
                        current_part = char
                    else:
                        current_part += char

                # Add the last part if not empty
                if current_part:
                    parts.append(current_part)
            else:
                # Original word-based splitting for languages with spaces
                words = text.split()
                current_part = ""

                for word in words:
                    # If adding this word would exceed max_length, start a new part
                    if len(current_part + " " + word) > max_length and current_part:
                        parts.append(current_part.strip())
                        current_part = word
                    else:
                        # Add space if not the first word in the part
                        if current_part:
                            current_part += " "
                        current_part += word

                # Add the last part if not empty
                if current_part:
                    parts.append(current_part.strip())

            # Group parts into segments with 'lines' number of lines per segment
            segment_parts = []
            for i in range(0, len(parts), lines):
                segment_parts.append(parts[i : i + lines])

            # Calculate time proportionally based on segment text length
            total_chars = sum(len("".join(part_group)) for part_group in segment_parts)

            current_time = start_ts
            for i, part_group in enumerate(segment_parts):
                # Get character count for this segment group
                segment_chars = len("".join(part_group))

                # Calculate time proportionally, but ensure at least a minimum duration
                if total_chars > 0:
                    segment_duration = (segment_chars / total_chars) * duration
                    segment_duration = max(
                        segment_duration, 0.5
                    )  # Ensure minimum duration of 0.5s
                else:
                    segment_duration = duration / len(segment_parts)

                segment_start = current_time
                segment_end = segment_start + segment_duration

                # Move current time forward for next segment
                current_time = segment_end

                # Create segment with proper text array format for the subtitle renderer
                segment_text = part_group + [""] * (lines - len(part_group))

                segments.append(
                    {
                        "text": segment_text,
                        "start_ts": segment_start,
                        "end_ts": segment_end,
                    }
                )

        # Ensure no overlaps between segments by adjusting end times if needed
        for i in range(len(segments) - 1):
            if segments[i]["end_ts"] >= segments[i + 1]["start_ts"]:
                segments[i]["end_ts"] = segments[i + 1]["start_ts"] - 0.05

        return segments

    @staticmethod
    def hex_to_ass(hex_color: str, alpha: float = 1.0) -> str:
        """
        Convert a hex color + transparency to ASS &HaaBBGGRR& format.

        :param hex_color: CSS-style color string, e.g. "#FFA07A" or "00ff00"
        :param alpha: transparency from 0.0 (opaque) to 1.0 (fully transparent)
        :return: ASS color string, e.g. "&H8014C8FF&"
        """

        # strip leading '#' if present
        hex_color = hex_color.lstrip('#')

        # support 3-digit shorthand like 'f0a'
        if len(hex_color) == 3:
            hex_color = ''.join([c*2 for c in hex_color])

        if len(hex_color) != 6:
            raise ValueError("hex_color must be in 'RRGGBB' or 'RGB' format")

        # parse RGB
        r = int(hex_color[0:2], 16)
        g = int(hex_color[2:4], 16)
        b = int(hex_color[4:6], 16)

        # ASS alpha is inverted: 00=opaque, FF=transparent
        # so we invert the user's alpha (0.0 = opaque)  
        a = int((1.0 - alpha) * 255)
        a = max(0, min(255, a))

        # build BGR and alpha bytes
        aa = f"{a:02X}"
        bb = f"{b:02X}"
        gg = f"{g:02X}"
        rr = f"{r:02X}"

        return f"&H{aa}{bb}{gg}{rr}"

    def create_subtitle(
        self,
        segments,
        dimensions: Tuple[int, int],
        output_path: str,
        font_size=24, 
        font_color="#fff",
        shadow_color="#000",
        shadow_transparency=0.1,
        shadow_blur=0,
        stroke_color="#000",
        stroke_size=0,
        font_name="Arial",
        font_bold=True,
        font_italic=False,
        subtitle_position="center",
    ):
        width, height = dimensions
        bold_value = -1 if font_bold else 0
        italic_value = -1 if font_italic else 0
        
        position_from_top = 0.2
        if subtitle_position == "center":
            position_from_top = 0.45
        if subtitle_position == "bottom":
            position_from_top = 0.75
        
        ass_content = """[Script Info]
ScriptType: v4.00+
PlayResX: {width}
PlayResY: {height}

[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,{font_name},{font_size},{font_color},&H000000FF,{stroke_color},&H00000000,{bold},{italic},0,0,100,100,0,0,1,{stroke_size},0,8,20,20,20,1

[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
""".format(
            width=width,
            height=height,
            font_size=font_size,
            font_color=self.hex_to_ass(font_color),
            stroke_color=self.hex_to_ass(stroke_color),
            stroke_size=stroke_size,
            font_name=font_name,
            bold=bold_value,
            italic=italic_value
        )

        pos_x = int(width / 2)
        pos_y = int(height * position_from_top)

        # Process each segment and add to the subtitle file
        for segment in segments:
            start_time = self.format_time(segment["start_ts"])
            end_time = self.format_time(segment["end_ts"])

            # Create text with line breaks
            text_lines = segment["text"]
            formatted_text = ""
            for i, line in enumerate(text_lines):
                if line:  # Only add non-empty lines
                    if i > 0:  # Add line break if not the first line
                        formatted_text += "\\N"
                    formatted_text += line

            # Create shadow if shadow_blur is specified or if we want a drop shadow effect
            if shadow_blur > 0 or shadow_transparency < 1.0:
                # Convert shadow color with transparency
                shadow_color_ass = self.hex_to_ass(shadow_color, shadow_transparency)
                
                # Offset shadow position slightly for drop shadow effect
                shadow_pos_x = pos_x + 2
                shadow_pos_y = pos_y + 2
                
                # For shadow text, use shadow color only for primary color and set proper alpha
                # Only apply shadow color to primary color (\1c) and use alpha for transparency
                shadow_override_tags = f"\\pos({shadow_pos_x},{shadow_pos_y})\\1c{shadow_color_ass}\\bord0"
                
                # Add alpha transparency if needed
                if shadow_transparency > 0:
                    alpha_hex = hex(int((1.0 - shadow_transparency) * 255))[2:].upper().zfill(2)
                    shadow_override_tags += f"\\1a&H{alpha_hex}&"
                
                if shadow_blur > 0:
                    shadow_override_tags += f"\\blur{shadow_blur}"
                
                shadow_formatted_text = f"{{{shadow_override_tags}}}" + formatted_text
                
                # Add shadow dialogue line first (so it appears behind)
                ass_content += f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{shadow_formatted_text}\n"

            # Create main text layer
            main_override_tags = f"\\pos({pos_x},{pos_y})"
            main_formatted_text = f"{{{main_override_tags}}}" + formatted_text
            
            # Add main dialogue line (appears on top)
            ass_content += f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{main_formatted_text}\n"

        with open(output_path, "w", encoding="utf-8") as f:
            f.write(ass_content)

        logger.debug("subtitle (ass) was created with drop shadow")

    def format_time(self, seconds):
        """
        Convert seconds to ASS time format (H:MM:SS.cc)
        """
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        secs = int(seconds % 60)
        centisecs = int((seconds % 1) * 100)

        return f"{hours}:{minutes:02d}:{secs:02d}.{centisecs:02d}"