Ahmadkhan12 commited on
Commit
880e201
verified
1 Parent(s): 1100805

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -16
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import subprocess
3
- import soundfile as sf
4
  import numpy as np
 
5
  from vosk import Model, KaldiRecognizer
6
  import json
7
  from datetime import timedelta
@@ -9,18 +9,19 @@ import os
9
 
10
 
11
  # -----------------------------
12
- # Load Vosk Model (EN)
13
  # -----------------------------
14
  if not os.path.exists("model"):
15
  os.system("wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip")
16
  os.system("unzip vosk-model-small-en-us-0.15.zip")
17
  os.system("mv vosk-model-small-en-us-0.15 model")
18
 
 
19
  model = Model("model")
20
 
21
 
22
  # -----------------------------
23
- # Extract Audio
24
  # -----------------------------
25
  def extract_audio(video_path):
26
  try:
@@ -35,22 +36,34 @@ def extract_audio(video_path):
35
  subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
36
  return audio_path, "Audio extracted!"
37
  except Exception as e:
38
- return None, f"FFmpeg Error: {e}"
39
 
40
 
41
  # -----------------------------
42
- # Offline Speech-to-Text (VOSK)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # -----------------------------
44
  def transcribe_audio(audio_path):
45
  try:
46
- audio, sr = sf.read(audio_path)
47
- audio = (audio * 32767).astype(np.int16)
48
 
49
  rec = KaldiRecognizer(model, 16000)
50
  rec.SetWords(True)
51
 
52
- text = ""
53
- for chunk in np.array_split(audio, 50):
54
  rec.AcceptWaveform(chunk.tobytes())
55
 
56
  result = json.loads(rec.FinalResult())
@@ -61,20 +74,20 @@ def transcribe_audio(audio_path):
61
 
62
  return text, "Transcription complete!"
63
  except Exception as e:
64
- return None, f"STT Error: {e}"
65
 
66
 
67
  # -----------------------------
68
- # Build SRT File
69
  # -----------------------------
70
  def make_srt(text):
71
  try:
72
  words = text.split()
73
  lines = []
74
- chunk = ""
75
 
 
76
  for w in words:
77
- if len(chunk.split()) < 8:
78
  chunk += w + " "
79
  else:
80
  lines.append(chunk.strip())
@@ -98,7 +111,7 @@ def make_srt(text):
98
 
99
  return file, "SRT created!"
100
  except Exception as e:
101
- return None, f"SRT Error: {e}"
102
 
103
 
104
  # -----------------------------
@@ -123,13 +136,13 @@ def process(video):
123
  # Gradio App
124
  # -----------------------------
125
  with gr.Blocks() as app:
126
- gr.Markdown("## 馃幀 Offline Subtitle Generator (No Token, No Whisper, No API)")
127
 
128
  video_in = gr.Video(label="Upload Video")
129
  btn = gr.Button("Generate SRT")
130
 
131
  srt_out = gr.File(label="Download SRT")
132
- logs = gr.Textbox(label="Debug Log")
133
 
134
  btn.click(process, inputs=video_in, outputs=[srt_out, logs])
135
 
 
1
  import gradio as gr
2
  import subprocess
 
3
  import numpy as np
4
+ import wave
5
  from vosk import Model, KaldiRecognizer
6
  import json
7
  from datetime import timedelta
 
9
 
10
 
11
  # -----------------------------
12
+ # Download + Load VOSK model
13
  # -----------------------------
14
  if not os.path.exists("model"):
15
  os.system("wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip")
16
  os.system("unzip vosk-model-small-en-us-0.15.zip")
17
  os.system("mv vosk-model-small-en-us-0.15 model")
18
 
19
+
20
  model = Model("model")
21
 
22
 
23
  # -----------------------------
24
+ # Extract audio as WAV (ffmpeg)
25
  # -----------------------------
26
  def extract_audio(video_path):
27
  try:
 
36
  subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
37
  return audio_path, "Audio extracted!"
38
  except Exception as e:
39
+ return None, f"FFmpeg Error:\n{e}"
40
 
41
 
42
  # -----------------------------
43
+ # Read WAV using Python's 'wave'
44
+ # -----------------------------
45
+ def read_wave(path):
46
+ try:
47
+ wf = wave.open(path, "rb")
48
+ frames = wf.readframes(wf.getnframes())
49
+ audio = np.frombuffer(frames, dtype=np.int16)
50
+ wf.close()
51
+ return audio
52
+ except Exception as e:
53
+ raise RuntimeError(f"WAV Read Error: {e}")
54
+
55
+
56
+ # -----------------------------
57
+ # Vosk Offline Speech-to-Text
58
  # -----------------------------
59
  def transcribe_audio(audio_path):
60
  try:
61
+ audio = read_wave(audio_path)
 
62
 
63
  rec = KaldiRecognizer(model, 16000)
64
  rec.SetWords(True)
65
 
66
+ for chunk in np.array_split(audio, 30):
 
67
  rec.AcceptWaveform(chunk.tobytes())
68
 
69
  result = json.loads(rec.FinalResult())
 
74
 
75
  return text, "Transcription complete!"
76
  except Exception as e:
77
+ return None, f"STT Error:\n{e}"
78
 
79
 
80
  # -----------------------------
81
+ # Make SRT subtitles
82
  # -----------------------------
83
  def make_srt(text):
84
  try:
85
  words = text.split()
86
  lines = []
 
87
 
88
+ chunk = ""
89
  for w in words:
90
+ if len(chunk.split()) < 7:
91
  chunk += w + " "
92
  else:
93
  lines.append(chunk.strip())
 
111
 
112
  return file, "SRT created!"
113
  except Exception as e:
114
+ return None, f"SRT Error:\n{e}"
115
 
116
 
117
  # -----------------------------
 
136
  # Gradio App
137
  # -----------------------------
138
  with gr.Blocks() as app:
139
+ gr.Markdown("## 馃幀 Offline Subtitle Generator (No Whisper No Token No Soundfile 路 100% Free)")
140
 
141
  video_in = gr.Video(label="Upload Video")
142
  btn = gr.Button("Generate SRT")
143
 
144
  srt_out = gr.File(label="Download SRT")
145
+ logs = gr.Textbox(label="Debug Logs")
146
 
147
  btn.click(process, inputs=video_in, outputs=[srt_out, logs])
148