Spaces:

lvvignesh2122
/

Subtitle_Generator

Sleeping

App Files Files Community

lvvignesh2122 commited on Nov 3, 2025

Commit

eb04a70

verified ·

1 Parent(s): b8ec7ca

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -24

app.py CHANGED Viewed

@@ -2,16 +2,17 @@ import os
 import tempfile
 from pathlib import Path
 import gradio as gr
-from faster_whisper import WhisperModel
 import ffmpeg
 # -------- Configuration --------
 MODEL_NAME = "small"  # tiny, base, small, medium, large-v3
-DEVICE = "cuda" if os.environ.get("USE_CUDA", "0") == "1" else "cpu"
 # -------- Load Faster-Whisper --------
 print(f"🚀 Loading Faster-Whisper model: {MODEL_NAME} on {DEVICE}")
-model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type="float16" if DEVICE == "cuda" else "int8")
 # -------- Helper functions --------
 def _format_timestamp(seconds: float) -> str:
@@ -26,19 +27,30 @@ def _format_timestamp(seconds: float) -> str:
     return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
-def transcribe(audio_file):
-    """Transcribe uploaded audio and return text + SRT + file."""
     try:
-        # Convert any format to wav for consistency
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
-            (
-                ffmpeg
-                .input(audio_file)
-                .output(tmp_wav.name, format="wav", acodec="pcm_s16le", ac=1, ar="16k")
-                .overwrite_output()
-                .run(quiet=True)
-            )
-            wav_path = tmp_wav.name
         # Transcribe
         segments, info = model.transcribe(wav_path, beam_size=5)
@@ -50,7 +62,7 @@ def transcribe(audio_file):
             srt_output += f"{i}\n{start} --> {end}\n{segment.text.strip()}\n\n"
             text_output += segment.text.strip() + " "
-        # Save SRT file
         srt_path = Path(tempfile.mkstemp(suffix=".srt")[1])
         with open(srt_path, "w", encoding="utf-8") as f:
             f.write(srt_output)
@@ -58,7 +70,7 @@ def transcribe(audio_file):
         return text_output.strip(), srt_output, srt_path
     except Exception as e:
-        return f"Error: {str(e)}", "", None
 def clear_outputs():
@@ -70,10 +82,13 @@ def clear_outputs():
 def build_ui():
     with gr.Blocks(title="🎬 Subtitle Generator (Faster-Whisper)") as app:
         gr.Markdown("# 🎧 Fast Subtitle Generator using Faster-Whisper")
-        gr.Markdown("Upload or record an audio file to generate `.srt` subtitles instantly.")
         with gr.Row():
-            audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="🎙️ Audio Input")
         with gr.Row():
             text_output = gr.Textbox(label="📝 Transcribed Text", lines=6)
@@ -81,15 +96,14 @@ def build_ui():
             srt_file = gr.File(label="⬇️ Download .srt File")
         with gr.Row():
-            transcribe_btn = gr.Button("🚀 Generate Subtitles")
             clear_btn = gr.Button("🧹 Clear All")
-        # Button actions
-        transcribe_btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output, srt_output, srt_file])
-        clear_btn.click(fn=clear_outputs, inputs=None, outputs=[audio_input, text_output, srt_output, srt_file])
         gr.Markdown("---")
-        gr.Markdown("⚡ Built with **Faster-Whisper** | 🎥 Ideal for Subtitle Generation")
     return app

 import tempfile
 from pathlib import Path
 import gradio as gr
 import ffmpeg
+from faster_whisper import WhisperModel
 # -------- Configuration --------
 MODEL_NAME = "small"  # tiny, base, small, medium, large-v3
+DEVICE = "cpu"  # Force CPU for Hugging Face free tier
+COMPUTE_TYPE = "int8"
 # -------- Load Faster-Whisper --------
 print(f"🚀 Loading Faster-Whisper model: {MODEL_NAME} on {DEVICE}")
+model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type=COMPUTE_TYPE)
 # -------- Helper functions --------
 def _format_timestamp(seconds: float) -> str:
     return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
+def extract_audio(input_file: str) -> str:
+    """Extract audio track from any video/audio file and return path to WAV."""
+    tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
     try:
+        (
+            ffmpeg
+            .input(input_file)
+            .output(tmp_wav.name, format="wav", acodec="pcm_s16le", ac=1, ar="16k")
+            .overwrite_output()
+            .run(quiet=True)
+        )
+        return tmp_wav.name
+    except Exception as e:
+        raise RuntimeError(f"FFmpeg conversion failed: {e}")
+def transcribe(file_path):
+    """Transcribe uploaded file (video/audio) and return text + SRT + file."""
+    try:
+        if not file_path:
+            return "⚠️ Please upload a file first.", "", None
+        # Convert any format to WAV
+        wav_path = extract_audio(file_path)
         # Transcribe
         segments, info = model.transcribe(wav_path, beam_size=5)
             srt_output += f"{i}\n{start} --> {end}\n{segment.text.strip()}\n\n"
             text_output += segment.text.strip() + " "
+        # Save SRT
         srt_path = Path(tempfile.mkstemp(suffix=".srt")[1])
         with open(srt_path, "w", encoding="utf-8") as f:
             f.write(srt_output)
         return text_output.strip(), srt_output, srt_path
     except Exception as e:
+        return f"❌ Error: {str(e)}", "", None
 def clear_outputs():
 def build_ui():
     with gr.Blocks(title="🎬 Subtitle Generator (Faster-Whisper)") as app:
         gr.Markdown("# 🎧 Fast Subtitle Generator using Faster-Whisper")
+        gr.Markdown(
+            "Upload any **audio or video** file — MP3, WAV, MP4, MKV, MOV, etc. "
+            "and generate `.srt` subtitles instantly!"
+        )
         with gr.Row():
+            file_input = gr.File(label="🎥 Upload Video/Audio File", file_types=["audio", "video"])
         with gr.Row():
             text_output = gr.Textbox(label="📝 Transcribed Text", lines=6)
             srt_file = gr.File(label="⬇️ Download .srt File")
         with gr.Row():
+            transcribe_btn = gr.Button("🚀 Generate Subtitles", variant="primary")
             clear_btn = gr.Button("🧹 Clear All")
+        transcribe_btn.click(fn=transcribe, inputs=file_input, outputs=[text_output, srt_output, srt_file])
+        clear_btn.click(fn=clear_outputs, inputs=None, outputs=[file_input, text_output, srt_output, srt_file])
         gr.Markdown("---")
+        gr.Markdown("⚡ Built with **Faster-Whisper** | 🧠 Runs fully on CPU | 🎬 Ideal for Subtitle Generation")
     return app