Spaces:

sam12345324
/

shortsrender

Sleeping

File size: 13,472 Bytes

0c8f7e3

import string
from typing import List, Dict, Tuple
from loguru import logger

from typing import Dict, List


class Caption:
    def is_punctuation(self, text):
        return text in string.punctuation

    def create_subtitle_segments_english(
        self, captions: List[Dict], max_length=80, lines=2
    ):
        """
        Breaks up the captions into segments of max_length characters
        on two lines and merge punctuation with the last word
        """

        if not captions:
            return []

        segments = []
        current_segment_texts = ["" for _ in range(lines)]
        current_line = 0
        segment_start_ts = captions[0]["start_ts"]
        segment_end_ts = captions[0]["end_ts"]

        for caption in captions:
            text = caption["text"]
            start_ts = caption["start_ts"]
            end_ts = caption["end_ts"]

            # Update the segment end timestamp
            segment_end_ts = end_ts

            # If the caption is a punctuation, merge it with the current line
            if self.is_punctuation(text):
                if current_line < lines and current_segment_texts[current_line]:
                    current_segment_texts[current_line] += text
                continue

            # If the line is too long, move to the next one
            if (
                current_line < lines
                and len(current_segment_texts[current_line] + text) > max_length
            ):
                current_line += 1

            # If we've filled all lines, save the current segment and start a new one
            if current_line >= lines:
                segments.append(
                    {
                        "text": current_segment_texts,
                        "start_ts": segment_start_ts,
                        "end_ts": segment_end_ts,
                    }
                )

                # Reset for next segment
                current_segment_texts = ["" for _ in range(lines)]
                current_line = 0
                # Add a small gap (0.05s) between segments to prevent overlap
                segment_start_ts = start_ts + 0.05

            # Add the text to the current segment
            if current_line < lines:
                current_segment_texts[current_line] += (
                    " " if current_segment_texts[current_line] else ""
                )
                current_segment_texts[current_line] += text

        # Add the last segment if there's any content
        if any(current_segment_texts):
            segments.append(
                {
                    "text": current_segment_texts,
                    "start_ts": segment_start_ts,
                    "end_ts": segment_end_ts,
                }
            )

        # Post-processing to ensure no overlaps by adjusting end times if needed
        for i in range(len(segments) - 1):
            if segments[i]["end_ts"] >= segments[i + 1]["start_ts"]:
                segments[i]["end_ts"] = segments[i + 1]["start_ts"] - 0.05

        return segments

    def create_subtitle_segments_international(
        self, captions: List[Dict], max_length=80, lines=2
    ):
        """
        Breaks up international captions (full sentences) into smaller segments that fit
        within max_length characters per line, with proper timing distribution.

        Handles both space-delimited languages like English and character-based languages like Chinese.

        Args:
            captions: List of caption dictionaries with text, start_ts, and end_ts
            max_length: Maximum number of characters per line
            lines: Number of lines per segment

        Returns:
            List of subtitle segments
        """
        if not captions:
            return []

        segments = []

        for caption in captions:
            text = caption["text"].strip()
            start_ts = caption["start_ts"]
            end_ts = caption["end_ts"]
            duration = end_ts - start_ts

            # Check if text is using Chinese/Japanese/Korean characters (CJK)
            # For CJK, we'll split by characters rather than words
            is_cjk = any("\u4e00" <= char <= "\u9fff" for char in text)

            parts = []
            if is_cjk:
                # For CJK languages, process character by character
                current_part = ""
                for char in text:
                    if len(current_part + char) > max_length:
                        parts.append(current_part)
                        current_part = char
                    else:
                        current_part += char

                # Add the last part if not empty
                if current_part:
                    parts.append(current_part)
            else:
                # Original word-based splitting for languages with spaces
                words = text.split()
                current_part = ""

                for word in words:
                    # If adding this word would exceed max_length, start a new part
                    if len(current_part + " " + word) > max_length and current_part:
                        parts.append(current_part.strip())
                        current_part = word
                    else:
                        # Add space if not the first word in the part
                        if current_part:
                            current_part += " "
                        current_part += word

                # Add the last part if not empty
                if current_part:
                    parts.append(current_part.strip())

            # Group parts into segments with 'lines' number of lines per segment
            segment_parts = []
            for i in range(0, len(parts), lines):
                segment_parts.append(parts[i : i + lines])

            # Calculate time proportionally based on segment text length
            total_chars = sum(len("".join(part_group)) for part_group in segment_parts)

            current_time = start_ts
            for i, part_group in enumerate(segment_parts):
                # Get character count for this segment group
                segment_chars = len("".join(part_group))

                # Calculate time proportionally, but ensure at least a minimum duration
                if total_chars > 0:
                    segment_duration = (segment_chars / total_chars) * duration
                    segment_duration = max(
                        segment_duration, 0.5
                    )  # Ensure minimum duration of 0.5s
                else:
                    segment_duration = duration / len(segment_parts)

                segment_start = current_time
                segment_end = segment_start + segment_duration

                # Move current time forward for next segment
                current_time = segment_end

                # Create segment with proper text array format for the subtitle renderer
                segment_text = part_group + [""] * (lines - len(part_group))

                segments.append(
                    {
                        "text": segment_text,
                        "start_ts": segment_start,
                        "end_ts": segment_end,
                    }
                )

        # Ensure no overlaps between segments by adjusting end times if needed
        for i in range(len(segments) - 1):
            if segments[i]["end_ts"] >= segments[i + 1]["start_ts"]:
                segments[i]["end_ts"] = segments[i + 1]["start_ts"] - 0.05

        return segments

    @staticmethod
    def hex_to_ass(hex_color: str, alpha: float = 1.0) -> str:
        """
        Convert a hex color + transparency to ASS &HaaBBGGRR& format.

        :param hex_color: CSS-style color string, e.g. "#FFA07A" or "00ff00"
        :param alpha: transparency from 0.0 (opaque) to 1.0 (fully transparent)
        :return: ASS color string, e.g. "&H8014C8FF&"
        """

        # strip leading '#' if present
        hex_color = hex_color.lstrip('#')

        # support 3-digit shorthand like 'f0a'
        if len(hex_color) == 3:
            hex_color = ''.join([c*2 for c in hex_color])

        if len(hex_color) != 6:
            raise ValueError("hex_color must be in 'RRGGBB' or 'RGB' format")

        # parse RGB
        r = int(hex_color[0:2], 16)
        g = int(hex_color[2:4], 16)
        b = int(hex_color[4:6], 16)

        # ASS alpha is inverted: 00=opaque, FF=transparent
        # so we invert the user's alpha (0.0 = opaque)  
        a = int((1.0 - alpha) * 255)
        a = max(0, min(255, a))

        # build BGR and alpha bytes
        aa = f"{a:02X}"
        bb = f"{b:02X}"
        gg = f"{g:02X}"
        rr = f"{r:02X}"

        return f"&H{aa}{bb}{gg}{rr}"

    def create_subtitle(
        self,
        segments,
        dimensions: Tuple[int, int],
        output_path: str,
        font_size=24, 
        font_color="#fff",
        shadow_color="#000",
        shadow_transparency=0.1,
        shadow_blur=0,
        stroke_color="#000",
        stroke_size=0,
        font_name="Arial",
        font_bold=True,
        font_italic=False,
        subtitle_position="center",
    ):
        width, height = dimensions
        bold_value = -1 if font_bold else 0
        italic_value = -1 if font_italic else 0
        
        position_from_top = 0.2
        if subtitle_position == "center":
            position_from_top = 0.45
        if subtitle_position == "bottom":
            position_from_top = 0.75
        
        ass_content = """[Script Info]
ScriptType: v4.00+
PlayResX: {width}
PlayResY: {height}

[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,{font_name},{font_size},{font_color},&H000000FF,{stroke_color},&H00000000,{bold},{italic},0,0,100,100,0,0,1,{stroke_size},0,8,20,20,20,1

[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
""".format(
            width=width,
            height=height,
            font_size=font_size,
            font_color=self.hex_to_ass(font_color),
            stroke_color=self.hex_to_ass(stroke_color),
            stroke_size=stroke_size,
            font_name=font_name,
            bold=bold_value,
            italic=italic_value
        )

        pos_x = int(width / 2)
        pos_y = int(height * position_from_top)

        # Process each segment and add to the subtitle file
        for segment in segments:
            start_time = self.format_time(segment["start_ts"])
            end_time = self.format_time(segment["end_ts"])

            # Create text with line breaks
            text_lines = segment["text"]
            formatted_text = ""
            for i, line in enumerate(text_lines):
                if line:  # Only add non-empty lines
                    if i > 0:  # Add line break if not the first line
                        formatted_text += "\\N"
                    formatted_text += line

            # Create shadow if shadow_blur is specified or if we want a drop shadow effect
            if shadow_blur > 0 or shadow_transparency < 1.0:
                # Convert shadow color with transparency
                shadow_color_ass = self.hex_to_ass(shadow_color, shadow_transparency)
                
                # Offset shadow position slightly for drop shadow effect
                shadow_pos_x = pos_x + 2
                shadow_pos_y = pos_y + 2
                
                # For shadow text, use shadow color only for primary color and set proper alpha
                # Only apply shadow color to primary color (\1c) and use alpha for transparency
                shadow_override_tags = f"\\pos({shadow_pos_x},{shadow_pos_y})\\1c{shadow_color_ass}\\bord0"
                
                # Add alpha transparency if needed
                if shadow_transparency > 0:
                    alpha_hex = hex(int((1.0 - shadow_transparency) * 255))[2:].upper().zfill(2)
                    shadow_override_tags += f"\\1a&H{alpha_hex}&"
                
                if shadow_blur > 0:
                    shadow_override_tags += f"\\blur{shadow_blur}"
                
                shadow_formatted_text = f"{{{shadow_override_tags}}}" + formatted_text
                
                # Add shadow dialogue line first (so it appears behind)
                ass_content += f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{shadow_formatted_text}\n"

            # Create main text layer
            main_override_tags = f"\\pos({pos_x},{pos_y})"
            main_formatted_text = f"{{{main_override_tags}}}" + formatted_text
            
            # Add main dialogue line (appears on top)
            ass_content += f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{main_formatted_text}\n"

        with open(output_path, "w", encoding="utf-8") as f:
            f.write(ass_content)

        logger.debug("subtitle (ass) was created with drop shadow")

    def format_time(self, seconds):
        """
        Convert seconds to ASS time format (H:MM:SS.cc)
        """
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        secs = int(seconds % 60)
        centisecs = int((seconds % 1) * 100)

        return f"{hours}:{minutes:02d}:{secs:02d}.{centisecs:02d}"