Spaces:

Ahmadkhan12
/

VideoTranslator

Runtime error

App Files Files Community

Ahmadkhan12 commited on Dec 11, 2025

Commit

880e201

verified ·

1 Parent(s): 1100805

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -16

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import subprocess
-import soundfile as sf
 import numpy as np
 from vosk import Model, KaldiRecognizer
 import json
 from datetime import timedelta
@@ -9,18 +9,19 @@ import os
 # -----------------------------
-# Load Vosk Model (EN)
 # -----------------------------
 if not os.path.exists("model"):
     os.system("wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip")
     os.system("unzip vosk-model-small-en-us-0.15.zip")
     os.system("mv vosk-model-small-en-us-0.15 model")
 model = Model("model")
 # -----------------------------
-# Extract Audio
 # -----------------------------
 def extract_audio(video_path):
     try:
@@ -35,22 +36,34 @@ def extract_audio(video_path):
         subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
         return audio_path, "Audio extracted!"
     except Exception as e:
-        return None, f"FFmpeg Error: {e}"
 # -----------------------------
-# Offline Speech-to-Text (VOSK)
 # -----------------------------
 def transcribe_audio(audio_path):
     try:
-        audio, sr = sf.read(audio_path)
-        audio = (audio * 32767).astype(np.int16)
         rec = KaldiRecognizer(model, 16000)
         rec.SetWords(True)
-        text = ""
-        for chunk in np.array_split(audio, 50):
             rec.AcceptWaveform(chunk.tobytes())
         result = json.loads(rec.FinalResult())
@@ -61,20 +74,20 @@ def transcribe_audio(audio_path):
         return text, "Transcription complete!"
     except Exception as e:
-        return None, f"STT Error: {e}"
 # -----------------------------
-# Build SRT File
 # -----------------------------
 def make_srt(text):
     try:
         words = text.split()
         lines = []
-        chunk = ""
         for w in words:
-            if len(chunk.split()) < 8:
                 chunk += w + " "
             else:
                 lines.append(chunk.strip())
@@ -98,7 +111,7 @@ def make_srt(text):
         return file, "SRT created!"
     except Exception as e:
-        return None, f"SRT Error: {e}"
 # -----------------------------
@@ -123,13 +136,13 @@ def process(video):
 # Gradio App
 # -----------------------------
 with gr.Blocks() as app:
-    gr.Markdown("## 🎬 Offline Subtitle Generator (No Token, No Whisper, No API)")
     video_in = gr.Video(label="Upload Video")
     btn = gr.Button("Generate SRT")
     srt_out = gr.File(label="Download SRT")
-    logs = gr.Textbox(label="Debug Log")
     btn.click(process, inputs=video_in, outputs=[srt_out, logs])

 import gradio as gr
 import subprocess
 import numpy as np
+import wave
 from vosk import Model, KaldiRecognizer
 import json
 from datetime import timedelta
 # -----------------------------
+# Download + Load VOSK model
 # -----------------------------
 if not os.path.exists("model"):
     os.system("wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip")
     os.system("unzip vosk-model-small-en-us-0.15.zip")
     os.system("mv vosk-model-small-en-us-0.15 model")
 model = Model("model")
 # -----------------------------
+# Extract audio as WAV (ffmpeg)
 # -----------------------------
 def extract_audio(video_path):
     try:
         subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
         return audio_path, "Audio extracted!"
     except Exception as e:
+        return None, f"FFmpeg Error:\n{e}"
 # -----------------------------
+# Read WAV using Python's 'wave'
+# -----------------------------
+def read_wave(path):
+    try:
+        wf = wave.open(path, "rb")
+        frames = wf.readframes(wf.getnframes())
+        audio = np.frombuffer(frames, dtype=np.int16)
+        wf.close()
+        return audio
+    except Exception as e:
+        raise RuntimeError(f"WAV Read Error: {e}")
+# -----------------------------
+# Vosk Offline Speech-to-Text
 # -----------------------------
 def transcribe_audio(audio_path):
     try:
+        audio = read_wave(audio_path)
         rec = KaldiRecognizer(model, 16000)
         rec.SetWords(True)
+        for chunk in np.array_split(audio, 30):
             rec.AcceptWaveform(chunk.tobytes())
         result = json.loads(rec.FinalResult())
         return text, "Transcription complete!"
     except Exception as e:
+        return None, f"STT Error:\n{e}"
 # -----------------------------
+# Make SRT subtitles
 # -----------------------------
 def make_srt(text):
     try:
         words = text.split()
         lines = []
+        chunk = ""
         for w in words:
+            if len(chunk.split()) < 7:
                 chunk += w + " "
             else:
                 lines.append(chunk.strip())
         return file, "SRT created!"
     except Exception as e:
+        return None, f"SRT Error:\n{e}"
 # -----------------------------
 # Gradio App
 # -----------------------------
 with gr.Blocks() as app:
+    gr.Markdown("## 🎬 Offline Subtitle Generator (No Whisper · No Token · No Soundfile · 100% Free)")
     video_in = gr.Video(label="Upload Video")
     btn = gr.Button("Generate SRT")
     srt_out = gr.File(label="Download SRT")
+    logs = gr.Textbox(label="Debug Logs")
     btn.click(process, inputs=video_in, outputs=[srt_out, logs])