lvvignesh2122 commited on
Commit
eb04a70
Β·
verified Β·
1 Parent(s): b8ec7ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -24
app.py CHANGED
@@ -2,16 +2,17 @@ import os
2
  import tempfile
3
  from pathlib import Path
4
  import gradio as gr
5
- from faster_whisper import WhisperModel
6
  import ffmpeg
 
7
 
8
  # -------- Configuration --------
9
  MODEL_NAME = "small" # tiny, base, small, medium, large-v3
10
- DEVICE = "cuda" if os.environ.get("USE_CUDA", "0") == "1" else "cpu"
 
11
 
12
  # -------- Load Faster-Whisper --------
13
  print(f"πŸš€ Loading Faster-Whisper model: {MODEL_NAME} on {DEVICE}")
14
- model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type="float16" if DEVICE == "cuda" else "int8")
15
 
16
  # -------- Helper functions --------
17
  def _format_timestamp(seconds: float) -> str:
@@ -26,19 +27,30 @@ def _format_timestamp(seconds: float) -> str:
26
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
27
 
28
 
29
- def transcribe(audio_file):
30
- """Transcribe uploaded audio and return text + SRT + file."""
 
31
  try:
32
- # Convert any format to wav for consistency
33
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
34
- (
35
- ffmpeg
36
- .input(audio_file)
37
- .output(tmp_wav.name, format="wav", acodec="pcm_s16le", ac=1, ar="16k")
38
- .overwrite_output()
39
- .run(quiet=True)
40
- )
41
- wav_path = tmp_wav.name
 
 
 
 
 
 
 
 
 
 
42
 
43
  # Transcribe
44
  segments, info = model.transcribe(wav_path, beam_size=5)
@@ -50,7 +62,7 @@ def transcribe(audio_file):
50
  srt_output += f"{i}\n{start} --> {end}\n{segment.text.strip()}\n\n"
51
  text_output += segment.text.strip() + " "
52
 
53
- # Save SRT file
54
  srt_path = Path(tempfile.mkstemp(suffix=".srt")[1])
55
  with open(srt_path, "w", encoding="utf-8") as f:
56
  f.write(srt_output)
@@ -58,7 +70,7 @@ def transcribe(audio_file):
58
  return text_output.strip(), srt_output, srt_path
59
 
60
  except Exception as e:
61
- return f"Error: {str(e)}", "", None
62
 
63
 
64
  def clear_outputs():
@@ -70,10 +82,13 @@ def clear_outputs():
70
  def build_ui():
71
  with gr.Blocks(title="🎬 Subtitle Generator (Faster-Whisper)") as app:
72
  gr.Markdown("# 🎧 Fast Subtitle Generator using Faster-Whisper")
73
- gr.Markdown("Upload or record an audio file to generate `.srt` subtitles instantly.")
 
 
 
74
 
75
  with gr.Row():
76
- audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="πŸŽ™οΈ Audio Input")
77
 
78
  with gr.Row():
79
  text_output = gr.Textbox(label="πŸ“ Transcribed Text", lines=6)
@@ -81,15 +96,14 @@ def build_ui():
81
  srt_file = gr.File(label="⬇️ Download .srt File")
82
 
83
  with gr.Row():
84
- transcribe_btn = gr.Button("πŸš€ Generate Subtitles")
85
  clear_btn = gr.Button("🧹 Clear All")
86
 
87
- # Button actions
88
- transcribe_btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output, srt_output, srt_file])
89
- clear_btn.click(fn=clear_outputs, inputs=None, outputs=[audio_input, text_output, srt_output, srt_file])
90
 
91
  gr.Markdown("---")
92
- gr.Markdown("⚑ Built with **Faster-Whisper** | πŸŽ₯ Ideal for Subtitle Generation")
93
 
94
  return app
95
 
 
2
  import tempfile
3
  from pathlib import Path
4
  import gradio as gr
 
5
  import ffmpeg
6
+ from faster_whisper import WhisperModel
7
 
8
  # -------- Configuration --------
9
  MODEL_NAME = "small" # tiny, base, small, medium, large-v3
10
+ DEVICE = "cpu" # Force CPU for Hugging Face free tier
11
+ COMPUTE_TYPE = "int8"
12
 
13
  # -------- Load Faster-Whisper --------
14
  print(f"πŸš€ Loading Faster-Whisper model: {MODEL_NAME} on {DEVICE}")
15
+ model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type=COMPUTE_TYPE)
16
 
17
  # -------- Helper functions --------
18
  def _format_timestamp(seconds: float) -> str:
 
27
  return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
28
 
29
 
30
+ def extract_audio(input_file: str) -> str:
31
+ """Extract audio track from any video/audio file and return path to WAV."""
32
+ tmp_wav = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
33
  try:
34
+ (
35
+ ffmpeg
36
+ .input(input_file)
37
+ .output(tmp_wav.name, format="wav", acodec="pcm_s16le", ac=1, ar="16k")
38
+ .overwrite_output()
39
+ .run(quiet=True)
40
+ )
41
+ return tmp_wav.name
42
+ except Exception as e:
43
+ raise RuntimeError(f"FFmpeg conversion failed: {e}")
44
+
45
+
46
+ def transcribe(file_path):
47
+ """Transcribe uploaded file (video/audio) and return text + SRT + file."""
48
+ try:
49
+ if not file_path:
50
+ return "⚠️ Please upload a file first.", "", None
51
+
52
+ # Convert any format to WAV
53
+ wav_path = extract_audio(file_path)
54
 
55
  # Transcribe
56
  segments, info = model.transcribe(wav_path, beam_size=5)
 
62
  srt_output += f"{i}\n{start} --> {end}\n{segment.text.strip()}\n\n"
63
  text_output += segment.text.strip() + " "
64
 
65
+ # Save SRT
66
  srt_path = Path(tempfile.mkstemp(suffix=".srt")[1])
67
  with open(srt_path, "w", encoding="utf-8") as f:
68
  f.write(srt_output)
 
70
  return text_output.strip(), srt_output, srt_path
71
 
72
  except Exception as e:
73
+ return f"❌ Error: {str(e)}", "", None
74
 
75
 
76
  def clear_outputs():
 
82
  def build_ui():
83
  with gr.Blocks(title="🎬 Subtitle Generator (Faster-Whisper)") as app:
84
  gr.Markdown("# 🎧 Fast Subtitle Generator using Faster-Whisper")
85
+ gr.Markdown(
86
+ "Upload any **audio or video** file β€” MP3, WAV, MP4, MKV, MOV, etc. "
87
+ "and generate `.srt` subtitles instantly!"
88
+ )
89
 
90
  with gr.Row():
91
+ file_input = gr.File(label="πŸŽ₯ Upload Video/Audio File", file_types=["audio", "video"])
92
 
93
  with gr.Row():
94
  text_output = gr.Textbox(label="πŸ“ Transcribed Text", lines=6)
 
96
  srt_file = gr.File(label="⬇️ Download .srt File")
97
 
98
  with gr.Row():
99
+ transcribe_btn = gr.Button("πŸš€ Generate Subtitles", variant="primary")
100
  clear_btn = gr.Button("🧹 Clear All")
101
 
102
+ transcribe_btn.click(fn=transcribe, inputs=file_input, outputs=[text_output, srt_output, srt_file])
103
+ clear_btn.click(fn=clear_outputs, inputs=None, outputs=[file_input, text_output, srt_output, srt_file])
 
104
 
105
  gr.Markdown("---")
106
+ gr.Markdown("⚑ Built with **Faster-Whisper** | 🧠 Runs fully on CPU | 🎬 Ideal for Subtitle Generation")
107
 
108
  return app
109