import gradio as gr from moviepy.editor import VideoFileClip import speech_recognition as sr from argostranslate import package, translate import os import tempfile # --------------------------- # Setup Argos Translate (offline) # --------------------------- def setup_translation(): # Download English->Urdu model if not exists if not os.path.exists("en_ur.argosmodel"): import urllib.request url = "https://www.argosopentech.com/argospm/models/en_ur.argosmodel" urllib.request.urlretrieve(url, "en_ur.argosmodel") package.install_from_path("en_ur.argosmodel") setup_translation() # --------------------------- # Extract audio from video # --------------------------- def extract_audio(video_path): clip = VideoFileClip(video_path) audio_path = tempfile.mktemp(suffix=".wav") clip.audio.write_audiofile(audio_path, fps=16000, codec="pcm_s16le") return audio_path # --------------------------- # Transcribe audio using CMU Sphinx # --------------------------- def transcribe_audio(audio_path): r = sr.Recognizer() with sr.AudioFile(audio_path) as source: audio = r.record(source) try: text = r.recognize_sphinx(audio) return text except sr.UnknownValueError: return "[Could not understand audio]" except sr.RequestError as e: return f"[Sphinx error: {e}]" # --------------------------- # Translate text using Argos Translate # --------------------------- def translate_text(text, target_lang): if target_lang == "original": return text from_lang = "en" to_lang = target_lang installed_languages = translate.get_installed_languages() from_lang_obj = next((l for l in installed_languages if l.code == from_lang), None) to_lang_obj = next((l for l in installed_languages if l.code == to_lang), None) if from_lang_obj and to_lang_obj: translated = from_lang_obj.get_translation(to_lang_obj).translate(text) return translated return text # --------------------------- # Gradio Interface # --------------------------- languages = { "original": "No Translation", "ur": "Urdu", "hi": "Hindi", "ps": "Pashto", "ar": "Arabic", "en": "English" } def process_video(video_file, lang): error_log = "" try: audio_path = extract_audio(video_file.name) error_log += "Audio extracted!\n" except Exception as e: return "", f"Audio extraction error: {e}" try: text = transcribe_audio(audio_path) error_log += f"Transcribed text length: {len(text)}\n" except Exception as e: return "", f"STT Error: {e}" try: translated = translate_text(text, lang) error_log += f"Translation done!\n" except Exception as e: translated = text error_log += f"Translation error: {e}\n" return translated, error_log demo = gr.Interface( fn=process_video, inputs=[ gr.Video(label="Upload Video"), gr.Dropdown(list(languages.keys()), value="original", label="Translate To") ], outputs=[ gr.Textbox(label="Transcribed / Translated Text", interactive=False), gr.Textbox(label="Debug / Error Log", interactive=False) ], title="Offline Video Subtitle Generator", description="Upload a video → Extract audio → Generate subtitles → Optional translation → All offline, token-free" ) demo.launch()