"""Generate ASS subtitles using pysubs2. Supports: word-by-word, sentence, karaoke, fade, pop, typewriter animations. Full ASS spec: font, size, 4-color layers, border, shadow, position, alignment. Handles Thai/Chinese character-level splitting. """ from pathlib import Path from typing import Optional import pysubs2 from pysubs2 import SSAFile, SSAEvent, SSAStyle from loguru import logger # Languages that split by character rather than word CHAR_LEVEL_LANGUAGES = {"th", "zh", "ja", "km", "lo"} # Default font per language DEFAULT_FONTS = { "th": "Noto Sans Thai", "zh": "Noto Sans SC", "zh-tw": "Noto Sans TC", "ja": "Noto Sans JP", "ko": "Noto Sans KR", "en": "Montserrat", "default": "Noto Sans", } # Animation presets (ASS override tags) def _fade_tags(fade_in_ms: int = 200, fade_out_ms: int = 200) -> str: return f"{{\\fade({fade_in_ms},{fade_out_ms})}}" def _pop_tags() -> str: return "{\\t(0,100,\\fscx120\\fscy120)\\t(100,200,\\fscx100\\fscy100)}" def _typewriter_per_char(char: str, delay_ms: int) -> str: return f"{{\\alpha&HFF&\\t({delay_ms},{delay_ms+80},\\alpha&H00&)}}{char}" def _bounce_tags() -> str: return "{\\t(0,150,\\frz-5)\\t(150,300,\\frz5)\\t(300,400,\\frz0)}" def _color_to_ass(hex_color: str, alpha: int = 0) -> str: """Convert #RRGGBB hex to ASS &HAABBGGRR format.""" hex_color = hex_color.lstrip("#") if len(hex_color) == 6: r, g, b = hex_color[0:2], hex_color[2:4], hex_color[4:6] else: r, g, b = "FF", "FF", "FF" aa = f"{alpha:02X}" return f"&H{aa}{b}{g}{r}" def build_style( font_family: str = "Noto Sans", font_size: int = 72, primary_color: str = "#FFFFFF", secondary_color: str = "#FFFF00", outline_color: str = "#000000", shadow_color: str = "#000000", primary_alpha: int = 0, outline_alpha: int = 0, shadow_alpha: int = 80, bold: bool = True, italic: bool = False, underline: bool = False, outline_size: float = 4.0, shadow_size: float = 2.0, alignment: int = 2, # 2=bottom-center, 8=top-center margin_l: int = 40, margin_r: int = 40, margin_v: int = 250, scale_x: int = 100, scale_y: int = 100, spacing: float = 0.0, angle: float = 0.0, ) -> SSAStyle: style = SSAStyle() style.fontname = font_family style.fontsize = font_size style.primarycolor = pysubs2.Color(*_hex_to_rgba(primary_color, primary_alpha)) style.secondarycolor = pysubs2.Color(*_hex_to_rgba(secondary_color, 0)) style.outlinecolor = pysubs2.Color(*_hex_to_rgba(outline_color, outline_alpha)) style.backcolor = pysubs2.Color(*_hex_to_rgba(shadow_color, shadow_alpha)) style.bold = bold style.italic = italic style.underline = underline style.outline = outline_size style.shadow = shadow_size style.alignment = alignment style.marginl = margin_l style.marginr = margin_r style.marginv = margin_v style.scalex = scale_x style.scaley = scale_y style.spacing = spacing style.angle = angle style.borderstyle = 1 # outline + shadow return style def _hex_to_rgba(hex_color: str, alpha_0_255: int = 0): """Convert #RRGGBB to (R, G, B, A) where A=0 is opaque.""" hex_color = hex_color.lstrip("#") if len(hex_color) == 6: r = int(hex_color[0:2], 16) g = int(hex_color[2:4], 16) b = int(hex_color[4:6], 16) else: r, g, b = 255, 255, 255 return r, g, b, alpha_0_255 def generate_subtitles( transcript: dict, output_path: Path, style_config: dict, clip_start_offset: float = 0.0, ) -> Path: """Generate .ass subtitle file from transcript. Args: transcript: Output from whisper.py output_path: Where to save the .ass file style_config: Dict with font/color/animation settings from frontend clip_start_offset: Shift all timestamps (for sub-clips from longer video) """ subs = SSAFile() subs.info["PlayResX"] = "1080" subs.info["PlayResY"] = "1920" subs.info["ScaledBorderAndShadow"] = "yes" subs.info["WrapStyle"] = "0" display_mode = style_config.get("display_mode", "word") # "word" or "sentence" animation = style_config.get("animation", "none") # none|fade|karaoke|pop|typewriter|bounce subtitle_lang = style_config.get("subtitle_language", "en") char_level = transcript.get("char_level", False) or subtitle_lang in CHAR_LEVEL_LANGUAGES font_family = style_config.get("font_family") or DEFAULT_FONTS.get(subtitle_lang, DEFAULT_FONTS["default"]) style = build_style( font_family=font_family, font_size=style_config.get("font_size", 72), primary_color=style_config.get("primary_color", "#FFFFFF"), secondary_color=style_config.get("secondary_color", "#FFFF00"), outline_color=style_config.get("outline_color", "#000000"), shadow_color=style_config.get("shadow_color", "#000000"), primary_alpha=style_config.get("primary_alpha", 0), outline_alpha=style_config.get("outline_alpha", 0), shadow_alpha=style_config.get("shadow_alpha", 80), bold=style_config.get("bold", True), italic=style_config.get("italic", False), underline=style_config.get("underline", False), outline_size=style_config.get("outline_size", 4.0), shadow_size=style_config.get("shadow_size", 2.0), alignment=style_config.get("alignment", 2), margin_l=style_config.get("margin_l", 40), margin_r=style_config.get("margin_r", 40), margin_v=style_config.get("margin_v", 250), scale_x=style_config.get("scale_x", 100), scale_y=style_config.get("scale_y", 100), spacing=style_config.get("spacing", 0.0), angle=style_config.get("angle", 0.0), ) subs.styles["Default"] = style segments = transcript.get("segments", []) for seg in segments: words = seg.get("words", []) seg_end = seg["end"] - clip_start_offset if seg_end <= 0: continue # segment ends before clip starts — skip entirely seg_start = max(0.0, seg["start"] - clip_start_offset) if display_mode == "sentence" or not words: _add_sentence_event(subs, seg["text"], seg_start, seg_end, animation, style_config) else: if animation == "karaoke": _add_karaoke_line(subs, words, seg_start, seg_end, clip_start_offset, char_level) else: _add_word_events(subs, words, seg_start, seg_end, animation, char_level, style_config, clip_start_offset) output_path.parent.mkdir(parents=True, exist_ok=True) subs.save(str(output_path), encoding="utf-8") logger.info(f"Generated {len(subs)} subtitle events → {output_path.name}") return output_path def _add_sentence_event(subs, text, start, end, animation, style_config): tags = "" if animation == "fade": fi = style_config.get("fade_in_ms", 200) fo = style_config.get("fade_out_ms", 200) tags = _fade_tags(fi, fo) elif animation == "pop": tags = _pop_tags() elif animation == "bounce": tags = _bounce_tags() event = SSAEvent( start=pysubs2.make_time(s=start), end=pysubs2.make_time(s=end), text=tags + text.strip(), ) subs.append(event) def _add_word_events(subs, words, seg_start, seg_end, animation, char_level, style_config, clip_offset=0.0): """Add one SSAEvent per word (word-by-word mode).""" unit_list = [] for w in words: if char_level: for ch in w["word"]: unit_list.append({"word": ch, "start": w["start"], "end": w["end"]}) else: unit_list.append(w) for i, unit in enumerate(unit_list): start = unit["start"] - clip_offset end = (unit["end"] - clip_offset) if unit["end"] > unit["start"] else start + 0.3 if start < 0: continue tags = "" if animation == "fade": fi = style_config.get("fade_in_ms", 150) fo = style_config.get("fade_out_ms", 100) tags = _fade_tags(fi, fo) elif animation == "pop": tags = _pop_tags() elif animation == "typewriter": delay = int((start - seg_start) * 1000) tags = _typewriter_per_char("", delay) event = SSAEvent( start=pysubs2.make_time(s=start), end=pysubs2.make_time(s=end), text=tags + unit["word"].strip(), ) subs.append(event) def _add_karaoke_line(subs, words, seg_start, seg_end, clip_offset, char_level): """Add karaoke-style line: full line visible, words highlight in sequence.""" karaoke_text = "" for w in words: duration_cs = int((w["end"] - w["start"]) * 100) word_text = w["word"].strip() if char_level: for ch in word_text: karaoke_text += f"{{\\kf{duration_cs // max(len(word_text), 1)}}}{ch}" else: karaoke_text += f"{{\\kf{duration_cs}}}{word_text} " event = SSAEvent( start=pysubs2.make_time(s=seg_start), end=pysubs2.make_time(s=seg_end), text=karaoke_text.strip(), ) subs.append(event) def update_subtitle_event( ass_path: Path, event_index: int, updates: dict, ) -> Path: """Update a single subtitle event (for editor patches).""" subs = SSAFile.load(str(ass_path)) if event_index >= len(subs): raise IndexError(f"Event index {event_index} out of range") evt = subs[event_index] if "text" in updates: evt.text = updates["text"] if "start" in updates: evt.start = pysubs2.make_time(s=updates["start"]) if "end" in updates: evt.end = pysubs2.make_time(s=updates["end"]) subs.save(str(ass_path), encoding="utf-8") return ass_path def apply_global_style_override(ass_path: Path, style_config: dict) -> Path: """Re-apply global style overrides to all events (for live preview).""" subs = SSAFile.load(str(ass_path)) new_style = build_style(**{k: v for k, v in style_config.items() if k in build_style.__code__.co_varnames}) subs.styles["Default"] = new_style subs.save(str(ass_path), encoding="utf-8") return ass_path