import string from typing import List, Dict, Tuple from loguru import logger from typing import Dict, List class Caption: def is_punctuation(self, text): return text in string.punctuation def create_subtitle_segments_english( self, captions: List[Dict], max_length=80, lines=2 ): """ Breaks up the captions into segments of max_length characters on two lines and merge punctuation with the last word """ if not captions: return [] segments = [] current_segment_texts = ["" for _ in range(lines)] current_line = 0 segment_start_ts = captions[0]["start_ts"] segment_end_ts = captions[0]["end_ts"] for caption in captions: text = caption["text"] start_ts = caption["start_ts"] end_ts = caption["end_ts"] # Update the segment end timestamp segment_end_ts = end_ts # If the caption is a punctuation, merge it with the current line if self.is_punctuation(text): if current_line < lines and current_segment_texts[current_line]: current_segment_texts[current_line] += text continue # If the line is too long, move to the next one if ( current_line < lines and len(current_segment_texts[current_line] + text) > max_length ): current_line += 1 # If we've filled all lines, save the current segment and start a new one if current_line >= lines: segments.append( { "text": current_segment_texts, "start_ts": segment_start_ts, "end_ts": segment_end_ts, } ) # Reset for next segment current_segment_texts = ["" for _ in range(lines)] current_line = 0 # Add a small gap (0.05s) between segments to prevent overlap segment_start_ts = start_ts + 0.05 # Add the text to the current segment if current_line < lines: current_segment_texts[current_line] += ( " " if current_segment_texts[current_line] else "" ) current_segment_texts[current_line] += text # Add the last segment if there's any content if any(current_segment_texts): segments.append( { "text": current_segment_texts, "start_ts": segment_start_ts, "end_ts": segment_end_ts, } ) # Post-processing to ensure no overlaps by adjusting end times if needed for i in range(len(segments) - 1): if segments[i]["end_ts"] >= segments[i + 1]["start_ts"]: segments[i]["end_ts"] = segments[i + 1]["start_ts"] - 0.05 return segments def create_subtitle_segments_international( self, captions: List[Dict], max_length=80, lines=2 ): """ Breaks up international captions (full sentences) into smaller segments that fit within max_length characters per line, with proper timing distribution. Handles both space-delimited languages like English and character-based languages like Chinese. Args: captions: List of caption dictionaries with text, start_ts, and end_ts max_length: Maximum number of characters per line lines: Number of lines per segment Returns: List of subtitle segments """ if not captions: return [] segments = [] for caption in captions: text = caption["text"].strip() start_ts = caption["start_ts"] end_ts = caption["end_ts"] duration = end_ts - start_ts # Check if text is using Chinese/Japanese/Korean characters (CJK) # For CJK, we'll split by characters rather than words is_cjk = any("\u4e00" <= char <= "\u9fff" for char in text) parts = [] if is_cjk: # For CJK languages, process character by character current_part = "" for char in text: if len(current_part + char) > max_length: parts.append(current_part) current_part = char else: current_part += char # Add the last part if not empty if current_part: parts.append(current_part) else: # Original word-based splitting for languages with spaces words = text.split() current_part = "" for word in words: # If adding this word would exceed max_length, start a new part if len(current_part + " " + word) > max_length and current_part: parts.append(current_part.strip()) current_part = word else: # Add space if not the first word in the part if current_part: current_part += " " current_part += word # Add the last part if not empty if current_part: parts.append(current_part.strip()) # Group parts into segments with 'lines' number of lines per segment segment_parts = [] for i in range(0, len(parts), lines): segment_parts.append(parts[i : i + lines]) # Calculate time proportionally based on segment text length total_chars = sum(len("".join(part_group)) for part_group in segment_parts) current_time = start_ts for i, part_group in enumerate(segment_parts): # Get character count for this segment group segment_chars = len("".join(part_group)) # Calculate time proportionally, but ensure at least a minimum duration if total_chars > 0: segment_duration = (segment_chars / total_chars) * duration segment_duration = max( segment_duration, 0.5 ) # Ensure minimum duration of 0.5s else: segment_duration = duration / len(segment_parts) segment_start = current_time segment_end = segment_start + segment_duration # Move current time forward for next segment current_time = segment_end # Create segment with proper text array format for the subtitle renderer segment_text = part_group + [""] * (lines - len(part_group)) segments.append( { "text": segment_text, "start_ts": segment_start, "end_ts": segment_end, } ) # Ensure no overlaps between segments by adjusting end times if needed for i in range(len(segments) - 1): if segments[i]["end_ts"] >= segments[i + 1]["start_ts"]: segments[i]["end_ts"] = segments[i + 1]["start_ts"] - 0.05 return segments @staticmethod def hex_to_ass(hex_color: str, alpha: float = 1.0) -> str: """ Convert a hex color + transparency to ASS &HaaBBGGRR& format. :param hex_color: CSS-style color string, e.g. "#FFA07A" or "00ff00" :param alpha: transparency from 0.0 (opaque) to 1.0 (fully transparent) :return: ASS color string, e.g. "&H8014C8FF&" """ # strip leading '#' if present hex_color = hex_color.lstrip('#') # support 3-digit shorthand like 'f0a' if len(hex_color) == 3: hex_color = ''.join([c*2 for c in hex_color]) if len(hex_color) != 6: raise ValueError("hex_color must be in 'RRGGBB' or 'RGB' format") # parse RGB r = int(hex_color[0:2], 16) g = int(hex_color[2:4], 16) b = int(hex_color[4:6], 16) # ASS alpha is inverted: 00=opaque, FF=transparent # so we invert the user's alpha (0.0 = opaque) a = int((1.0 - alpha) * 255) a = max(0, min(255, a)) # build BGR and alpha bytes aa = f"{a:02X}" bb = f"{b:02X}" gg = f"{g:02X}" rr = f"{r:02X}" return f"&H{aa}{bb}{gg}{rr}" def create_subtitle( self, segments, dimensions: Tuple[int, int], output_path: str, font_size=24, font_color="#fff", shadow_color="#000", shadow_transparency=0.1, shadow_blur=0, stroke_color="#000", stroke_size=0, font_name="Arial", font_bold=True, font_italic=False, subtitle_position="center", ): width, height = dimensions bold_value = -1 if font_bold else 0 italic_value = -1 if font_italic else 0 position_from_top = 0.2 if subtitle_position == "center": position_from_top = 0.45 if subtitle_position == "bottom": position_from_top = 0.75 ass_content = """[Script Info] ScriptType: v4.00+ PlayResX: {width} PlayResY: {height} [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding Style: Default,{font_name},{font_size},{font_color},&H000000FF,{stroke_color},&H00000000,{bold},{italic},0,0,100,100,0,0,1,{stroke_size},0,8,20,20,20,1 [Events] Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text """.format( width=width, height=height, font_size=font_size, font_color=self.hex_to_ass(font_color), stroke_color=self.hex_to_ass(stroke_color), stroke_size=stroke_size, font_name=font_name, bold=bold_value, italic=italic_value ) pos_x = int(width / 2) pos_y = int(height * position_from_top) # Process each segment and add to the subtitle file for segment in segments: start_time = self.format_time(segment["start_ts"]) end_time = self.format_time(segment["end_ts"]) # Create text with line breaks text_lines = segment["text"] formatted_text = "" for i, line in enumerate(text_lines): if line: # Only add non-empty lines if i > 0: # Add line break if not the first line formatted_text += "\\N" formatted_text += line # Create shadow if shadow_blur is specified or if we want a drop shadow effect if shadow_blur > 0 or shadow_transparency < 1.0: # Convert shadow color with transparency shadow_color_ass = self.hex_to_ass(shadow_color, shadow_transparency) # Offset shadow position slightly for drop shadow effect shadow_pos_x = pos_x + 2 shadow_pos_y = pos_y + 2 # For shadow text, use shadow color only for primary color and set proper alpha # Only apply shadow color to primary color (\1c) and use alpha for transparency shadow_override_tags = f"\\pos({shadow_pos_x},{shadow_pos_y})\\1c{shadow_color_ass}\\bord0" # Add alpha transparency if needed if shadow_transparency > 0: alpha_hex = hex(int((1.0 - shadow_transparency) * 255))[2:].upper().zfill(2) shadow_override_tags += f"\\1a&H{alpha_hex}&" if shadow_blur > 0: shadow_override_tags += f"\\blur{shadow_blur}" shadow_formatted_text = f"{{{shadow_override_tags}}}" + formatted_text # Add shadow dialogue line first (so it appears behind) ass_content += f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{shadow_formatted_text}\n" # Create main text layer main_override_tags = f"\\pos({pos_x},{pos_y})" main_formatted_text = f"{{{main_override_tags}}}" + formatted_text # Add main dialogue line (appears on top) ass_content += f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{main_formatted_text}\n" with open(output_path, "w", encoding="utf-8") as f: f.write(ass_content) logger.debug("subtitle (ass) was created with drop shadow") def format_time(self, seconds): """ Convert seconds to ASS time format (H:MM:SS.cc) """ hours = int(seconds // 3600) minutes = int((seconds % 3600) // 60) secs = int(seconds % 60) centisecs = int((seconds % 1) * 100) return f"{hours}:{minutes:02d}:{secs:02d}.{centisecs:02d}"