File size: 3,434 Bytes
5725cca
9ce4eb9
cc78ef3
9ce4eb9
 
 
86e09e6
9ce4eb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ea3e15
9ce4eb9
 
 
62ce611
9ea3e15
9ce4eb9
 
 
65dd572
9ce4eb9
 
 
880e201
9ce4eb9
 
 
 
 
 
86e09e6
9ce4eb9
 
 
 
65dd572
86e09e6
9ce4eb9
 
 
 
 
 
 
65dd572
9ce4eb9
1b673e2
9ce4eb9
 
 
65dd572
9ce4eb9
 
 
 
 
 
65dd572
 
9ce4eb9
 
 
 
 
 
 
77ff116
9ce4eb9
 
 
 
 
 
 
 
 
 
 
 
317ef68
9ce4eb9
317ef68
9ce4eb9
 
 
 
 
 
 
 
 
 
 
 
 
317ef68
62ce611
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
from moviepy.editor import VideoFileClip
import speech_recognition as sr
from argostranslate import package, translate
import os
import tempfile

# ---------------------------
# Setup Argos Translate (offline)
# ---------------------------
def setup_translation():
    # Download English->Urdu model if not exists
    if not os.path.exists("en_ur.argosmodel"):
        import urllib.request
        url = "https://www.argosopentech.com/argospm/models/en_ur.argosmodel"
        urllib.request.urlretrieve(url, "en_ur.argosmodel")
        package.install_from_path("en_ur.argosmodel")
setup_translation()

# ---------------------------
# Extract audio from video
# ---------------------------
def extract_audio(video_path):
    clip = VideoFileClip(video_path)
    audio_path = tempfile.mktemp(suffix=".wav")
    clip.audio.write_audiofile(audio_path, fps=16000, codec="pcm_s16le")
    return audio_path

# ---------------------------
# Transcribe audio using CMU Sphinx
# ---------------------------
def transcribe_audio(audio_path):
    r = sr.Recognizer()
    with sr.AudioFile(audio_path) as source:
        audio = r.record(source)
    try:
        text = r.recognize_sphinx(audio)
        return text
    except sr.UnknownValueError:
        return "[Could not understand audio]"
    except sr.RequestError as e:
        return f"[Sphinx error: {e}]"

# ---------------------------
# Translate text using Argos Translate
# ---------------------------
def translate_text(text, target_lang):
    if target_lang == "original":
        return text
    from_lang = "en"
    to_lang = target_lang
    installed_languages = translate.get_installed_languages()
    from_lang_obj = next((l for l in installed_languages if l.code == from_lang), None)
    to_lang_obj = next((l for l in installed_languages if l.code == to_lang), None)
    if from_lang_obj and to_lang_obj:
        translated = from_lang_obj.get_translation(to_lang_obj).translate(text)
        return translated
    return text

# ---------------------------
# Gradio Interface
# ---------------------------
languages = {
    "original": "No Translation",
    "ur": "Urdu",
    "hi": "Hindi",
    "ps": "Pashto",
    "ar": "Arabic",
    "en": "English"
}

def process_video(video_file, lang):
    error_log = ""
    try:
        audio_path = extract_audio(video_file.name)
        error_log += "Audio extracted!\n"
    except Exception as e:
        return "", f"Audio extraction error: {e}"

    try:
        text = transcribe_audio(audio_path)
        error_log += f"Transcribed text length: {len(text)}\n"
    except Exception as e:
        return "", f"STT Error: {e}"

    try:
        translated = translate_text(text, lang)
        error_log += f"Translation done!\n"
    except Exception as e:
        translated = text
        error_log += f"Translation error: {e}\n"

    return translated, error_log

demo = gr.Interface(
    fn=process_video,
    inputs=[
        gr.Video(label="Upload Video"),
        gr.Dropdown(list(languages.keys()), value="original", label="Translate To")
    ],
    outputs=[
        gr.Textbox(label="Transcribed / Translated Text", interactive=False),
        gr.Textbox(label="Debug / Error Log", interactive=False)
    ],
    title="Offline Video Subtitle Generator",
    description="Upload a video → Extract audio → Generate subtitles → Optional translation → All offline, token-free"
)

demo.launch()