Spaces:
Running
Running
| import os | |
| os.environ["OMP_NUM_THREADS"] = "1" | |
| os.environ["MKL_NUM_THREADS"] = "1" | |
| import gradio as gr | |
| import pysrt | |
| import requests | |
| import tempfile | |
| from faster_whisper import WhisperModel | |
| from datetime import timedelta | |
| from urllib.parse import urlparse | |
| # ----------------------------- | |
| # Core subtitle generator | |
| # ----------------------------- | |
| class LinearSubtitleGenerator: | |
| def __init__(self, model_size="base"): | |
| self.model = WhisperModel( | |
| model_size, | |
| device="cpu", | |
| compute_type="int8" | |
| ) | |
| def transcribe(self, audio_path): | |
| segments, _ = self.model.transcribe( | |
| audio_path, | |
| word_timestamps=True, | |
| vad_filter=True | |
| ) | |
| return segments | |
| def extract_words(self, segments): | |
| words = [] | |
| for segment in segments: | |
| if not segment.words: | |
| continue | |
| for w in segment.words: | |
| if w.start is None or w.end is None: | |
| continue | |
| words.append({ | |
| "word": w.word.strip(), | |
| "start": float(w.start), | |
| "end": float(w.end) | |
| }) | |
| return words | |
| def create_linear_subtitles(self, words): | |
| subs = pysrt.SubRipFile() | |
| total_words = len(words) | |
| index = 0 | |
| subtitle_index = 1 | |
| current_size = 1 # 1,2,3,4,... | |
| while index < total_words: | |
| planned_size = current_size | |
| remaining = total_words - (index + planned_size) | |
| next_size = current_size + 1 | |
| # absorb leftovers to avoid tiny last subtitle | |
| if remaining > 0 and remaining < next_size: | |
| planned_size += remaining | |
| subtitle_words = [] | |
| start_time = None | |
| end_time = None | |
| for _ in range(planned_size): | |
| if index >= total_words: | |
| break | |
| w = words[index] | |
| subtitle_words.append(w["word"]) | |
| if start_time is None: | |
| start_time = w["start"] | |
| end_time = w["end"] | |
| index += 1 | |
| subs.append( | |
| pysrt.SubRipItem( | |
| index=subtitle_index, | |
| start=self._to_time(start_time), | |
| end=self._to_time(end_time), | |
| text=" ".join(subtitle_words) | |
| ) | |
| ) | |
| subtitle_index += 1 | |
| if planned_size == current_size: | |
| current_size += 1 | |
| else: | |
| break | |
| return subs | |
| def _to_time(self, seconds): | |
| td = timedelta(seconds=seconds) | |
| return pysrt.SubRipTime( | |
| hours=td.seconds // 3600, | |
| minutes=(td.seconds % 3600) // 60, | |
| seconds=td.seconds % 60, | |
| milliseconds=td.microseconds // 1000 | |
| ) | |
| # ----------------------------- | |
| # Helper: download audio from URL | |
| # ----------------------------- | |
| def download_audio(url: str) -> str: | |
| parsed = urlparse(url) | |
| if parsed.scheme not in ("http", "https"): | |
| raise ValueError("Invalid URL scheme") | |
| response = requests.get(url, stream=True, timeout=30) | |
| response.raise_for_status() | |
| suffix = os.path.splitext(parsed.path)[1] or ".wav" | |
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=suffix) | |
| for chunk in response.iter_content(chunk_size=8192): | |
| tmp.write(chunk) | |
| tmp.close() | |
| return tmp.name | |
| # ----------------------------- | |
| # Gradio callable function | |
| # ----------------------------- | |
| def generate_srt(audio_file, audio_url, model_size): | |
| # exactly one input must be provided | |
| if bool(audio_file) == bool(audio_url): | |
| raise gr.Error( | |
| "Please provide EITHER an audio file OR an audio URL (not both)." | |
| ) | |
| if audio_url: | |
| audio_path = download_audio(audio_url) | |
| else: | |
| audio_path = audio_file | |
| generator = LinearSubtitleGenerator(model_size) | |
| segments = generator.transcribe(audio_path) | |
| words = generator.extract_words(segments) | |
| subs = generator.create_linear_subtitles(words) | |
| out = tempfile.NamedTemporaryFile(delete=False, suffix=".srt") | |
| subs.save(out.name, encoding="utf-8") | |
| return out.name | |
| # ----------------------------- | |
| # Gradio UI (UNCHANGED) | |
| # ----------------------------- | |
| with gr.Blocks(title="Subtitle Generator") as demo: | |
| gr.Markdown( | |
| """ | |
| # srt generator | |
| """ | |
| ) | |
| with gr.Row(): | |
| audio_file = gr.Audio( | |
| label="Upload Audio File", | |
| type="filepath" | |
| ) | |
| audio_url = gr.Textbox( | |
| label="Audio URL (http/https)", | |
| placeholder="https://example.com/audio.wav" | |
| ) | |
| model_choice = gr.Dropdown( | |
| choices=["tiny", "base", "small", "medium"], | |
| value="base", | |
| label="Whisper Model" | |
| ) | |
| generate_btn = gr.Button("Generate SRT") | |
| output_file = gr.File(label="Download SRT") | |
| generate_btn.click( | |
| fn=generate_srt, | |
| inputs=[audio_file, audio_url, model_choice], | |
| outputs=output_file | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(mcp_server=True) |