video-dubbing-agent / services /subtitle_generator.py
dashhdata's picture
Upload folder using huggingface_hub
4ec3855 verified
"""
Bonus — Subtitle Generator
Creates .srt subtitle files in both original and target languages.
"""
import logging
from pathlib import Path
from typing import List, Dict
logger = logging.getLogger(__name__)
def generate_subtitles(
segments: List[Dict],
output_dir: Path,
source_language: str,
target_language: str
) -> Dict[str, Path]:
"""
Generate .srt subtitle files.
Returns dict with paths to original and translated subtitle files.
"""
original_srt = output_dir / f"subtitles_{source_language}.srt"
translated_srt = output_dir / f"subtitles_{target_language}.srt"
# Original language subtitles
_write_srt(original_srt, segments, text_key="text")
logger.info(f"Original subtitles: {original_srt}")
# Translated language subtitles
_write_srt(translated_srt, segments, text_key="translated_text")
logger.info(f"Translated subtitles: {translated_srt}")
return {
"original": original_srt,
"translated": translated_srt,
}
def _write_srt(output_path: Path, segments: List[Dict], text_key: str):
"""Write segments to .srt format."""
with open(output_path, "w", encoding="utf-8") as f:
for idx, seg in enumerate(segments, 1):
text = seg.get(text_key, seg.get("text", "")).strip()
if not text:
continue
start = _format_srt_time(seg["start"])
end = _format_srt_time(seg["end"])
f.write(f"{idx}\n")
f.write(f"{start} --> {end}\n")
f.write(f"{text}\n\n")
def _format_srt_time(seconds: float) -> str:
"""Convert seconds to SRT time format: HH:MM:SS,mmm"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
secs = int(seconds % 60)
millis = int((seconds % 1) * 1000)
return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}"