lvvignesh2122 commited on
Commit
cc8bbf8
Β·
verified Β·
1 Parent(s): ef2a1d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -129
app.py CHANGED
@@ -2,149 +2,85 @@ import os
2
  import tempfile
3
  from pathlib import Path
4
  import gradio as gr
 
5
  import ffmpeg
6
- import whisper
 
 
 
 
 
 
 
7
 
8
  # -------- Helper functions --------
9
  def _format_timestamp(seconds: float) -> str:
 
 
 
10
  ms = int(round(seconds * 1000))
11
- hours = ms // 3600000
12
- ms %= 3600000
13
- minutes = ms // 60000
14
- ms %= 60000
15
- seconds = ms // 1000
16
- millis = ms % 1000
17
- return f"{hours:02d}:{minutes:02d}:{seconds:02d},{millis:03d}"
18
-
19
-
20
- def segments_to_srt(segments: list) -> str:
21
- lines = []
22
- for i, seg in enumerate(segments, start=1):
23
- start_ts = _format_timestamp(seg["start"])
24
- end_ts = _format_timestamp(seg["end"])
25
- text = seg["text"].strip().replace("\n", " ")
26
- if text:
27
- lines.append(f"{i}\n{start_ts} --> {end_ts}\n{text}\n")
28
- return "\n".join(lines)
29
-
30
-
31
- # -------- Config --------
32
- MODEL_NAME = "base"
33
- OUTPUT_DIR = Path("outputs/subtitles")
34
- OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
35
-
36
- print(f"Loading Whisper model '{MODEL_NAME}'...")
37
- model = whisper.load_model(MODEL_NAME)
38
- print("Model loaded successfully!")
39
-
40
-
41
- # -------- Core functions --------
42
- def extract_audio(input_path: str, out_path: str):
43
- """Extract mono 16 kHz WAV using ffmpeg"""
44
- (
45
- ffmpeg
46
- .input(input_path)
47
- .output(out_path, format="wav", acodec="pcm_s16le", ac=1, ar="16000")
48
- .overwrite_output()
49
- .run(quiet=True)
50
- )
51
-
52
-
53
- def transcribe_file_to_srt(file_obj, language="en"):
54
- tmp_dir = Path(tempfile.mkdtemp(prefix="subgen_"))
55
- input_path = tmp_dir / Path(file_obj.name).name
56
- with open(input_path, "wb") as f:
57
- f.write(file_obj.read())
58
-
59
- audio_path = tmp_dir / "audio.wav"
60
- extract_audio(str(input_path), str(audio_path))
61
-
62
- result = model.transcribe(str(audio_path), language=language)
63
- segments = []
64
- for i, seg in enumerate(result["segments"]):
65
- segments.append({
66
- "start": seg["start"],
67
- "end": seg["end"],
68
- "text": seg["text"]
69
- })
70
-
71
- srt_text = segments_to_srt(segments)
72
- output_path = OUTPUT_DIR / f"{Path(file_obj.name).stem}.srt"
73
- output_path.write_text(srt_text, encoding="utf-8")
74
-
75
- return str(output_path), "βœ… Subtitles generated successfully!"
76
-
77
-
78
- # -------- UI Styling --------
79
- def build_style(theme="light"):
80
- if theme == "dark":
81
- bg = "#0f2027"
82
- color = "#ffffff"
83
- button = "#00adb5"
84
- else:
85
- bg = "#f0f2f5"
86
- color = "#000000"
87
- button = "#0077ff"
88
-
89
- return f"""
90
- <style>
91
- body {{
92
- background: {bg};
93
- color: {color};
94
- font-family: 'Poppins', sans-serif;
95
- transition: background 0.5s, color 0.5s;
96
- }}
97
- .gr-button {{
98
- background-color: {button} !important;
99
- color: white !important;
100
- font-weight: bold;
101
- border-radius: 10px !important;
102
- }}
103
- .gr-button:hover {{
104
- filter: brightness(1.2);
105
- }}
106
- </style>
107
- """
108
 
109
 
110
  # -------- Gradio UI --------
111
- with gr.Blocks(title="AI Subtitle Generator") as demo:
112
- theme_state = gr.State("light")
113
-
114
- style_html = gr.HTML(build_style("light"))
115
- gr.HTML("<h1 style='text-align:center;'>🎬 AI Subtitle Generator</h1>")
116
- gr.HTML("<p style='text-align:center;'>Upload a video or audio file to generate English <b>.srt</b> subtitles.</p>")
117
-
118
- with gr.Row():
119
- input_file = gr.File(label="Upload video/audio file")
120
- output_file = gr.File(label="Download .srt file")
121
 
122
- status_box = gr.Textbox(label="Status", interactive=False)
 
123
 
124
- with gr.Row():
125
- generate_btn = gr.Button("πŸš€ Generate Subtitles")
126
- clear_btn = gr.Button("🧹 Clear Chat")
127
- theme_btn = gr.Button("πŸŒ— Toggle Theme")
128
 
129
- # Button logic
130
- def on_generate(file):
131
- if not file:
132
- return None, "⚠️ Please upload a file first!"
133
- srt_path, msg = transcribe_file_to_srt(file)
134
- return srt_path, msg
135
 
136
- def on_clear():
137
- return None, None, ""
138
 
139
- def on_theme(current):
140
- new_theme = "dark" if current == "light" else "light"
141
- return new_theme, gr.update(value=build_style(new_theme))
142
 
143
- generate_btn.click(on_generate, inputs=[input_file], outputs=[output_file, status_box])
144
- clear_btn.click(on_clear, outputs=[input_file, output_file, status_box])
145
- theme_btn.click(on_theme, inputs=[theme_state], outputs=[theme_state, style_html])
146
 
147
- gr.HTML("<p style='text-align:center;font-size:14px;opacity:0.6;'>✨ Built with OpenAI Whisper + Gradio</p>")
148
 
 
149
  if __name__ == "__main__":
150
- demo.launch()
 
 
2
  import tempfile
3
  from pathlib import Path
4
  import gradio as gr
5
+ from faster_whisper import WhisperModel
6
  import ffmpeg
7
+
8
+ # -------- Configuration --------
9
+ MODEL_NAME = "small" # choices: tiny, base, small, medium, large-v3
10
+ DEVICE = "cuda" if os.environ.get("USE_CUDA", "0") == "1" else "cpu"
11
+
12
+ # Load model once
13
+ print(f"πŸš€ Loading Faster-Whisper model: {MODEL_NAME} on {DEVICE}")
14
+ model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type="float16" if DEVICE == "cuda" else "int8")
15
 
16
  # -------- Helper functions --------
17
  def _format_timestamp(seconds: float) -> str:
18
+ """Return hh:mm:ss,ms timestamp format."""
19
+ if seconds is None:
20
+ return "00:00:00,000"
21
  ms = int(round(seconds * 1000))
22
+ hours = ms // 3_600_000
23
+ minutes = (ms % 3_600_000) // 60_000
24
+ seconds = (ms % 60_000) // 1000
25
+ milliseconds = ms % 1000
26
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
27
+
28
+
29
+ def transcribe(audio_file):
30
+ """Transcribe uploaded audio file and return text + SRT."""
31
+ try:
32
+ # Convert to wav if needed (ensures consistency)
33
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
34
+ (
35
+ ffmpeg
36
+ .input(audio_file)
37
+ .output(tmp_wav.name, format="wav", acodec="pcm_s16le", ac=1, ar="16k")
38
+ .overwrite_output()
39
+ .run(quiet=True)
40
+ )
41
+ wav_path = tmp_wav.name
42
+
43
+ # Run transcription
44
+ segments, info = model.transcribe(wav_path, beam_size=5)
45
+ text_output = ""
46
+ srt_output = ""
47
+
48
+ for i, segment in enumerate(segments, start=1):
49
+ start = _format_timestamp(segment.start)
50
+ end = _format_timestamp(segment.end)
51
+ srt_output += f"{i}\n{start} --> {end}\n{segment.text.strip()}\n\n"
52
+ text_output += segment.text.strip() + " "
53
+
54
+ return text_output.strip(), srt_output
55
+
56
+ except Exception as e:
57
+ return f"Error: {str(e)}", ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
  # -------- Gradio UI --------
61
+ def build_ui():
62
+ with gr.Blocks(title="πŸŽ™οΈ Faster-Whisper Transcriber") as app:
63
+ gr.Markdown("# 🎧 Fast & Accurate Speech-to-Text using Faster-Whisper")
64
+ gr.Markdown("Upload or record an audio file to get instant transcription.")
 
 
 
 
 
 
65
 
66
+ with gr.Row():
67
+ audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio Input")
68
 
69
+ with gr.Row():
70
+ text_output = gr.Textbox(label="Transcribed Text", lines=6)
71
+ srt_output = gr.Textbox(label="SRT Subtitle", lines=6)
 
72
 
73
+ transcribe_btn = gr.Button("Transcribe")
 
 
 
 
 
74
 
75
+ transcribe_btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output, srt_output])
 
76
 
77
+ gr.Markdown("---")
78
+ gr.Markdown("Powered by **Faster-Whisper** ⚑")
 
79
 
80
+ return app
 
 
81
 
 
82
 
83
+ # -------- Launch --------
84
  if __name__ == "__main__":
85
+ app = build_ui()
86
+ app.launch(server_name="0.0.0.0", server_port=7860)