import os from typing import List, Dict def _ass_time(t: float) -> str: """Helper function to format time for ASS files (H:MM:SS.cs).""" if t < 0: t = 0 h = int(t // 3600) m = int((t % 3600) // 60) s = int(t % 60) cs = int((t - int(t)) * 100) return f"{h}:{m:02}:{s:02}.{cs:02}" def generate_subtitle_file( tokens_with_timestamps: List[Dict], output_audio_path: str ) -> str: """ Generates an ASS subtitle file from tokens with absolute timestamps. Args: tokens_with_timestamps (list): A list of token dictionaries with 'text', 'start', and 'end' keys. output_audio_path (str): The path to the audio file, used to name the subtitle file. Returns: str: The path to the generated subtitle file. """ subtitle_file_path = os.path.splitext(output_audio_path)[0] + ".ass" with open(subtitle_file_path, "w", encoding="utf-8") as f: # Write standard ASS header f.write("[Script Info]\n") f.write("Title: Generated by Manimator\n") f.write("ScriptType: v4.00+\n\n") f.write("[V4+ Styles]\n") f.write( "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n" ) f.write( "Style: Default,Arial,24,&H00FFFFFF,&H000000FF,&H003C3C3C,&H00000000,0,0,0,0,100,100,0,0,1,1.5,1,2,10,10,15,1\n\n" ) f.write("[Events]\n") f.write( "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n" ) # Write dialogue entries word-by-word for token in tokens_with_timestamps: start_time = token.get("start") end_time = token.get("end") text = token.get("text", "").strip() if ( start_time is not None and end_time is not None and text and end_time > start_time ): start_formatted = _ass_time(start_time) end_formatted = _ass_time(end_time) text = text.replace(",", "\\,") f.write( f"Dialogue: 0,{start_formatted},{end_formatted},Default,,0,0,0,,{text}\n" ) return subtitle_file_path