Spaces:
Running
Running
| import logging | |
| import os | |
| import tempfile | |
| from typing import Iterator | |
| from pysrt import SubRipFile, SubRipItem, SubRipTime | |
| from pytubefix import YouTube | |
| from transcriber import TranscribeResult | |
| logger = logging.getLogger(__name__) | |
| def download_youtube_audio(video_id: str) -> str: | |
| """ | |
| Download audio from YouTube video. | |
| Args: | |
| video_id (str): YouTube video ID. | |
| Returns: | |
| str: Path to the downloaded audio file. | |
| """ | |
| urls = "https://www.youtube.com/watch?v={}".format(video_id) | |
| try: | |
| # https://github.com/JuanBindez/pytubefix/issues/242#issuecomment-2369067929 | |
| vid = YouTube(urls, "MWEB") | |
| if vid.title is None: | |
| return None | |
| audio_download = vid.streams.get_audio_only() | |
| audio_download.download( | |
| mp3=True, | |
| filename=video_id, | |
| output_path=tempfile.gettempdir(), | |
| skip_existing=True, | |
| ) | |
| audio_file = tempfile.gettempdir() + "/" + video_id + ".mp3" | |
| return audio_file | |
| except Exception as e: | |
| print(e) | |
| return None | |
| def to_srt(results: Iterator["TranscribeResult"]) -> str: | |
| """ | |
| Convert the list of TranscribeResult objects into a SRT file | |
| """ | |
| srt = SubRipFile() | |
| for i, t in enumerate(results): | |
| start = SubRipTime(seconds=t.start_time) | |
| end = SubRipTime(seconds=t.end_time) | |
| item = SubRipItem(index=i, start=start, end=end, text=t.text) | |
| srt.append(item) | |
| temp_file = tempfile.gettempdir() + "/output.srt" | |
| srt.save(temp_file) | |
| with open(temp_file, "r", encoding="utf-8") as f: | |
| srt_text = f.read() | |
| os.remove(temp_file) | |
| return srt_text | |