Ahmadkhan12 commited on
Commit
62ce611
·
verified ·
1 Parent(s): 880e201

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -126
app.py CHANGED
@@ -1,149 +1,66 @@
1
  import gradio as gr
2
  import subprocess
3
- import numpy as np
4
- import wave
5
- from vosk import Model, KaldiRecognizer
6
- import json
7
- from datetime import timedelta
8
  import os
 
 
 
 
9
 
 
 
10
 
11
- # -----------------------------
12
- # Download + Load VOSK model
13
- # -----------------------------
14
- if not os.path.exists("model"):
15
- os.system("wget https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip")
16
- os.system("unzip vosk-model-small-en-us-0.15.zip")
17
- os.system("mv vosk-model-small-en-us-0.15 model")
18
-
19
-
20
- model = Model("model")
21
-
22
-
23
- # -----------------------------
24
- # Extract audio as WAV (ffmpeg)
25
- # -----------------------------
26
  def extract_audio(video_path):
27
- try:
28
- audio_path = "audio.wav"
29
- cmd = [
30
- "ffmpeg", "-y",
31
- "-i", video_path,
32
- "-ac", "1", "-ar", "16000",
33
- audio_path
34
- ]
35
-
36
- subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
37
- return audio_path, "Audio extracted!"
38
- except Exception as e:
39
- return None, f"FFmpeg Error:\n{e}"
40
 
41
-
42
- # -----------------------------
43
- # Read WAV using Python's 'wave'
44
- # -----------------------------
45
- def read_wave(path):
46
  try:
47
- wf = wave.open(path, "rb")
48
- frames = wf.readframes(wf.getnframes())
49
- audio = np.frombuffer(frames, dtype=np.int16)
50
- wf.close()
51
- return audio
52
- except Exception as e:
53
- raise RuntimeError(f"WAV Read Error: {e}")
54
 
 
 
55
 
56
- # -----------------------------
57
- # Vosk Offline Speech-to-Text
58
- # -----------------------------
59
- def transcribe_audio(audio_path):
60
- try:
61
- audio = read_wave(audio_path)
62
-
63
- rec = KaldiRecognizer(model, 16000)
64
- rec.SetWords(True)
65
 
66
- for chunk in np.array_split(audio, 30):
67
- rec.AcceptWaveform(chunk.tobytes())
68
 
69
- result = json.loads(rec.FinalResult())
70
- text = result.get("text", "")
 
 
 
 
 
71
 
72
- if not text:
73
- return None, "No speech detected."
74
 
75
- return text, "Transcription complete!"
76
- except Exception as e:
77
- return None, f"STT Error:\n{e}"
 
78
 
 
79
 
80
- # -----------------------------
81
- # Make SRT subtitles
82
- # -----------------------------
83
- def make_srt(text):
84
- try:
85
- words = text.split()
86
- lines = []
87
-
88
- chunk = ""
89
- for w in words:
90
- if len(chunk.split()) < 7:
91
- chunk += w + " "
92
- else:
93
- lines.append(chunk.strip())
94
- chunk = w + " "
95
-
96
- if chunk:
97
- lines.append(chunk.strip())
98
-
99
- srt_out = ""
100
- for i, caption in enumerate(lines, start=1):
101
- start = timedelta(seconds=(i - 1) * 3)
102
- end = timedelta(seconds=i * 3)
103
-
104
- srt_out += f"{i}\n"
105
- srt_out += f"{str(start)[:-3].replace('.', ',')} --> {str(end)[:-3].replace('.', ',')}\n"
106
- srt_out += caption + "\n\n"
107
-
108
- file = "subtitles.srt"
109
- with open(file, "w", encoding="utf-8") as f:
110
- f.write(srt_out)
111
-
112
- return file, "SRT created!"
113
  except Exception as e:
114
- return None, f"SRT Error:\n{e}"
115
-
116
-
117
- # -----------------------------
118
- # Main Pipeline
119
- # -----------------------------
120
- def process(video):
121
- audio, log1 = extract_audio(video)
122
- if not audio:
123
- return None, log1
124
-
125
- text, log2 = transcribe_audio(audio)
126
- if not text:
127
- return None, log2
128
-
129
- srt_path, log3 = make_srt(text)
130
-
131
- logs = f"{log1}\n{log2}\n{log3}"
132
- return srt_path, logs
133
 
134
 
135
- # -----------------------------
136
- # Gradio App
137
- # -----------------------------
138
- with gr.Blocks() as app:
139
- gr.Markdown("## 🎬 Offline Subtitle Generator (No Whisper · No Token · No Soundfile · 100% Free)")
140
 
141
- video_in = gr.Video(label="Upload Video")
142
- btn = gr.Button("Generate SRT")
143
 
144
- srt_out = gr.File(label="Download SRT")
145
- logs = gr.Textbox(label="Debug Logs")
146
 
147
- btn.click(process, inputs=video_in, outputs=[srt_out, logs])
148
 
149
- app.launch()
 
1
  import gradio as gr
2
  import subprocess
 
 
 
 
 
3
  import os
4
+ import traceback
5
+ import srt
6
+ from datetime import timedelta
7
+ from faster_whisper import WhisperModel
8
 
9
+ # Load tiny model (best for HF free)
10
+ model = WhisperModel("tiny", device="cpu", compute_type="int8")
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def extract_audio(video_path):
13
+ audio_path = "audio.wav"
14
+ cmd = f"ffmpeg -y -i '{video_path}' -ar 16000 -ac 1 -f wav {audio_path}"
15
+ subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
16
+ return audio_path
 
 
 
 
 
 
 
 
 
17
 
18
+ def generate_srt(video):
 
 
 
 
19
  try:
20
+ video_path = video
21
+ if not video_path:
22
+ return None, "No file uploaded"
 
 
 
 
23
 
24
+ # Extract audio
25
+ audio_path = extract_audio(video_path)
26
 
27
+ # Transcribe
28
+ segments, info = model.transcribe(audio_path)
 
 
 
 
 
 
 
29
 
30
+ subs = []
31
+ idx = 1
32
 
33
+ for seg in segments:
34
+ start = timedelta(seconds=seg.start)
35
+ end = timedelta(seconds=seg.end)
36
+ subs.append(
37
+ srt.Subtitle(index=idx, start=start, end=end, content=seg.text)
38
+ )
39
+ idx += 1
40
 
41
+ srt_data = srt.compose(subs)
 
42
 
43
+ # Save file
44
+ output_path = "output.srt"
45
+ with open(output_path, "w", encoding="utf-8") as f:
46
+ f.write(srt_data)
47
 
48
+ return output_path, "SRT successfully generated!"
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
+ error_text = traceback.format_exc()
52
+ return None, f"❌ ERROR:\n{error_text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
+ with gr.Blocks() as demo:
56
+ gr.Markdown("## 🎧 Auto SRT Generator (No Token, No Whisper API, Fully Local)")
 
 
 
57
 
58
+ video_input = gr.Video(label="Upload Video")
59
+ generate_btn = gr.Button("Generate SRT")
60
 
61
+ srt_output = gr.File(label="Download SRT")
62
+ debug_box = gr.Textbox(label="Debug Log", lines=8)
63
 
64
+ generate_btn.click(generate_srt, inputs=video_input, outputs=[srt_output, debug_box])
65
 
66
+ demo.launch()