Spaces:

Ahmadkhan12
/

VideoTranslator

Runtime error

App Files Files Community

Ahmadkhan12 commited on Dec 11, 2025

Commit

86e09e6

verified ·

1 Parent(s): af660ad

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -59

app.py CHANGED Viewed

@@ -1,81 +1,114 @@
 import gradio as gr
 import subprocess
-import os
 import traceback
-from datetime import timedelta
-from faster_whisper import WhisperModel
-# Load tiny whisper model for HF Spaces free tier
-model = WhisperModel("tiny", device="cpu", compute_type="int8")
 def extract_audio(video_path):
     audio_path = "audio.wav"
     cmd = f"ffmpeg -y -i '{video_path}' -ar 16000 -ac 1 -f wav {audio_path}"
     subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return audio_path
-def format_timestamp(seconds):
-    td = timedelta(seconds=seconds)
-    total_seconds = int(td.total_seconds())
-    milliseconds = int((td.total_seconds() - total_seconds) * 1000)
-    hours = total_seconds // 3600
-    minutes = (total_seconds % 3600) // 60
-    secs = total_seconds % 60
-    return f"{hours:02}:{minutes:02}:{secs:02},{milliseconds:03}"
-def generate_srt(video):
     try:
-        video_path = video
-        if not video_path:
-            return None, "No file uploaded"
-        # Extract audio
-        audio_path = extract_audio(video_path)
-        # Transcribe
-        segments, info = model.transcribe(audio_path)
-        # Build SRT manually
-        srt_lines = []
-        idx = 1
-        for seg in segments:
-            start = format_timestamp(seg.start)
-            end = format_timestamp(seg.end)
-            srt_lines.append(str(idx))
-            srt_lines.append(f"{start} --> {end}")
-            srt_lines.append(seg.text.strip())
-            srt_lines.append("")  # Blank line
-            idx += 1
-        srt_content = "\n".join(srt_lines)
-        # Save SRT file
-        output_path = "output.srt"
-        with open(output_path, "w", encoding="utf-8") as f:
-            f.write(srt_content)
-        return output_path, "SRT successfully generated!"
     except Exception as e:
-        error_text = traceback.format_exc()
-        return None, f"❌ ERROR:\n{error_text}"
-# Build UI
 with gr.Blocks() as demo:
-    gr.Markdown("## 🎧 Auto SRT Generator (No Token, No API, No Whisper API, 100% Local)")
     video_input = gr.Video(label="Upload Video")
-    generate_btn = gr.Button("Generate SRT")
-    srt_output = gr.File(label="Download SRT")
-    debug_box = gr.Textbox(label="Debug Log", lines=8)
-    generate_btn.click(generate_srt, inputs=video_input, outputs=[srt_output, debug_box])
 demo.launch()

 import gradio as gr
 import subprocess
 import traceback
+import os
+from transformers import MarianMTModel, MarianTokenizer
+import torch
+import numpy as np
+import wave
+# -------------------------------
+# 1. Audio extraction
+# -------------------------------
 def extract_audio(video_path):
     audio_path = "audio.wav"
     cmd = f"ffmpeg -y -i '{video_path}' -ar 16000 -ac 1 -f wav {audio_path}"
     subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     return audio_path
+# -------------------------------
+# 2. Read WAV
+# -------------------------------
+def read_wav(path):
+    with wave.open(path, "rb") as wf:
+        frames = wf.readframes(wf.getnframes())
+        audio = np.frombuffer(frames, dtype=np.int16)
+    return audio
+# -------------------------------
+# 3. Simple STT using Silero
+# -------------------------------
+def stt(audio_path):
     try:
+        import torch
+        import torchaudio
+        model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models',
+                                               model='silero_stt', language='en', device='cpu')
+        (read_batch, split_into_batches, read_audio, prepare_model_input) = utils
+        audio = read_audio(audio_path)
+        batches = split_into_batches(audio, batch_size=16)
+        input = prepare_model_input(batches)
+        output = model(input)
+        text = decoder(output[0])
+        return text
+    except Exception as e:
+        return f"STT Error: {traceback.format_exc()}"
+# -------------------------------
+# 4. Translation using MarianMT
+# -------------------------------
+def translate_text(text, target_lang):
+    if target_lang == "original":
+        return text
+    lang_map = {
+        "en": "en",
+        "ur": "ur",
+        "hi": "hi",
+        "ps": "ps",
+        "ar": "ar"
+    }
+    tgt = lang_map.get(target_lang, "en")
+    model_name = f"Helsinki-NLP/opus-mt-en-{tgt}" if tgt != "en" else "Helsinki-NLP/opus-mt-en-en"
+    tokenizer = MarianTokenizer.from_pretrained(model_name)
+    model = MarianMTModel.from_pretrained(model_name)
+    batch = tokenizer([text], return_tensors="pt", padding=True)
+    translated = model.generate(**batch)
+    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
+    return translated_text
+# -------------------------------
+# 5. Main processing
+# -------------------------------
+def process_video(video, lang):
+    try:
+        if not video:
+            return "", "No video uploaded"
+        audio_path = extract_audio(video)
+        transcription = stt(audio_path)
+        translation = translate_text(transcription, lang)
+        logs = f"Audio extracted!\nTranscribed text length: {len(transcription)}"
+        return translation, logs
     except Exception as e:
+        return "", f"❌ ERROR:\n{traceback.format_exc()}"
+# -------------------------------
+# 6. Gradio UI
+# -------------------------------
+languages = {
+    "original": "Original",
+    "en": "English",
+    "ur": "Urdu",
+    "hi": "Hindi",
+    "ps": "Pashto",
+    "ar": "Arabic"
+}
 with gr.Blocks() as demo:
+    gr.Markdown("## 🎬 Video → Text → Translation (No API, Offline, CPU-friendly)")
     video_input = gr.Video(label="Upload Video")
+    lang_dropdown = gr.Dropdown(list(languages.keys()), value="original", label="Translate To")
+    btn = gr.Button("Generate Text")
+    text_output = gr.Textbox(label="Transcribed / Translated Text", lines=10)
+    debug_box = gr.Textbox(label="Debug Logs", lines=8)
+    btn.click(process_video, inputs=[video_input, lang_dropdown], outputs=[text_output, debug_box])
 demo.launch()