Ahmadkhan12 commited on
Commit
86e09e6
·
verified ·
1 Parent(s): af660ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -59
app.py CHANGED
@@ -1,81 +1,114 @@
1
  import gradio as gr
2
  import subprocess
3
- import os
4
  import traceback
5
- from datetime import timedelta
6
- from faster_whisper import WhisperModel
7
-
8
- # Load tiny whisper model for HF Spaces free tier
9
- model = WhisperModel("tiny", device="cpu", compute_type="int8")
10
-
 
 
 
11
  def extract_audio(video_path):
12
  audio_path = "audio.wav"
13
  cmd = f"ffmpeg -y -i '{video_path}' -ar 16000 -ac 1 -f wav {audio_path}"
14
  subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
15
  return audio_path
16
 
17
- def format_timestamp(seconds):
18
- td = timedelta(seconds=seconds)
19
- total_seconds = int(td.total_seconds())
20
- milliseconds = int((td.total_seconds() - total_seconds) * 1000)
21
-
22
- hours = total_seconds // 3600
23
- minutes = (total_seconds % 3600) // 60
24
- secs = total_seconds % 60
25
-
26
- return f"{hours:02}:{minutes:02}:{secs:02},{milliseconds:03}"
27
-
28
- def generate_srt(video):
 
29
  try:
30
- video_path = video
31
- if not video_path:
32
- return None, "No file uploaded"
33
-
34
- # Extract audio
35
- audio_path = extract_audio(video_path)
36
-
37
- # Transcribe
38
- segments, info = model.transcribe(audio_path)
39
-
40
- # Build SRT manually
41
- srt_lines = []
42
- idx = 1
43
-
44
- for seg in segments:
45
- start = format_timestamp(seg.start)
46
- end = format_timestamp(seg.end)
47
-
48
- srt_lines.append(str(idx))
49
- srt_lines.append(f"{start} --> {end}")
50
- srt_lines.append(seg.text.strip())
51
- srt_lines.append("") # Blank line
52
-
53
- idx += 1
54
-
55
- srt_content = "\n".join(srt_lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- # Save SRT file
58
- output_path = "output.srt"
59
- with open(output_path, "w", encoding="utf-8") as f:
60
- f.write(srt_content)
61
 
62
- return output_path, "SRT successfully generated!"
 
63
 
64
  except Exception as e:
65
- error_text = traceback.format_exc()
66
- return None, f"❌ ERROR:\n{error_text}"
67
-
 
 
 
 
 
 
 
 
 
 
68
 
69
- # Build UI
70
  with gr.Blocks() as demo:
71
- gr.Markdown("## 🎧 Auto SRT Generator (No Token, No API, No Whisper API, 100% Local)")
72
 
73
  video_input = gr.Video(label="Upload Video")
74
- generate_btn = gr.Button("Generate SRT")
 
75
 
76
- srt_output = gr.File(label="Download SRT")
77
- debug_box = gr.Textbox(label="Debug Log", lines=8)
78
 
79
- generate_btn.click(generate_srt, inputs=video_input, outputs=[srt_output, debug_box])
80
 
81
  demo.launch()
 
1
  import gradio as gr
2
  import subprocess
 
3
  import traceback
4
+ import os
5
+ from transformers import MarianMTModel, MarianTokenizer
6
+ import torch
7
+ import numpy as np
8
+ import wave
9
+
10
+ # -------------------------------
11
+ # 1. Audio extraction
12
+ # -------------------------------
13
  def extract_audio(video_path):
14
  audio_path = "audio.wav"
15
  cmd = f"ffmpeg -y -i '{video_path}' -ar 16000 -ac 1 -f wav {audio_path}"
16
  subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
17
  return audio_path
18
 
19
+ # -------------------------------
20
+ # 2. Read WAV
21
+ # -------------------------------
22
+ def read_wav(path):
23
+ with wave.open(path, "rb") as wf:
24
+ frames = wf.readframes(wf.getnframes())
25
+ audio = np.frombuffer(frames, dtype=np.int16)
26
+ return audio
27
+
28
+ # -------------------------------
29
+ # 3. Simple STT using Silero
30
+ # -------------------------------
31
+ def stt(audio_path):
32
  try:
33
+ import torch
34
+ import torchaudio
35
+ model, decoder, utils = torch.hub.load(repo_or_dir='snakers4/silero-models',
36
+ model='silero_stt', language='en', device='cpu')
37
+ (read_batch, split_into_batches, read_audio, prepare_model_input) = utils
38
+
39
+ audio = read_audio(audio_path)
40
+ batches = split_into_batches(audio, batch_size=16)
41
+ input = prepare_model_input(batches)
42
+ output = model(input)
43
+ text = decoder(output[0])
44
+ return text
45
+ except Exception as e:
46
+ return f"STT Error: {traceback.format_exc()}"
47
+
48
+ # -------------------------------
49
+ # 4. Translation using MarianMT
50
+ # -------------------------------
51
+ def translate_text(text, target_lang):
52
+ if target_lang == "original":
53
+ return text
54
+
55
+ lang_map = {
56
+ "en": "en",
57
+ "ur": "ur",
58
+ "hi": "hi",
59
+ "ps": "ps",
60
+ "ar": "ar"
61
+ }
62
+ tgt = lang_map.get(target_lang, "en")
63
+ model_name = f"Helsinki-NLP/opus-mt-en-{tgt}" if tgt != "en" else "Helsinki-NLP/opus-mt-en-en"
64
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
65
+ model = MarianMTModel.from_pretrained(model_name)
66
+
67
+ batch = tokenizer([text], return_tensors="pt", padding=True)
68
+ translated = model.generate(**batch)
69
+ translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
70
+ return translated_text
71
+
72
+ # -------------------------------
73
+ # 5. Main processing
74
+ # -------------------------------
75
+ def process_video(video, lang):
76
+ try:
77
+ if not video:
78
+ return "", "No video uploaded"
79
 
80
+ audio_path = extract_audio(video)
81
+ transcription = stt(audio_path)
82
+ translation = translate_text(transcription, lang)
 
83
 
84
+ logs = f"Audio extracted!\nTranscribed text length: {len(transcription)}"
85
+ return translation, logs
86
 
87
  except Exception as e:
88
+ return "", f"❌ ERROR:\n{traceback.format_exc()}"
89
+
90
+ # -------------------------------
91
+ # 6. Gradio UI
92
+ # -------------------------------
93
+ languages = {
94
+ "original": "Original",
95
+ "en": "English",
96
+ "ur": "Urdu",
97
+ "hi": "Hindi",
98
+ "ps": "Pashto",
99
+ "ar": "Arabic"
100
+ }
101
 
 
102
  with gr.Blocks() as demo:
103
+ gr.Markdown("## 🎬 Video Text Translation (No API, Offline, CPU-friendly)")
104
 
105
  video_input = gr.Video(label="Upload Video")
106
+ lang_dropdown = gr.Dropdown(list(languages.keys()), value="original", label="Translate To")
107
+ btn = gr.Button("Generate Text")
108
 
109
+ text_output = gr.Textbox(label="Transcribed / Translated Text", lines=10)
110
+ debug_box = gr.Textbox(label="Debug Logs", lines=8)
111
 
112
+ btn.click(process_video, inputs=[video_input, lang_dropdown], outputs=[text_output, debug_box])
113
 
114
  demo.launch()