lvvignesh2122 commited on
Commit
e03f714
Β·
verified Β·
1 Parent(s): cc8bbf8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -6,10 +6,10 @@ from faster_whisper import WhisperModel
6
  import ffmpeg
7
 
8
  # -------- Configuration --------
9
- MODEL_NAME = "small" # choices: tiny, base, small, medium, large-v3
10
  DEVICE = "cuda" if os.environ.get("USE_CUDA", "0") == "1" else "cpu"
11
 
12
- # Load model once
13
  print(f"πŸš€ Loading Faster-Whisper model: {MODEL_NAME} on {DEVICE}")
14
  model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type="float16" if DEVICE == "cuda" else "int8")
15
 
@@ -27,9 +27,9 @@ def _format_timestamp(seconds: float) -> str:
27
 
28
 
29
  def transcribe(audio_file):
30
- """Transcribe uploaded audio file and return text + SRT."""
31
  try:
32
- # Convert to wav if needed (ensures consistency)
33
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
34
  (
35
  ffmpeg
@@ -40,10 +40,9 @@ def transcribe(audio_file):
40
  )
41
  wav_path = tmp_wav.name
42
 
43
- # Run transcription
44
  segments, info = model.transcribe(wav_path, beam_size=5)
45
- text_output = ""
46
- srt_output = ""
47
 
48
  for i, segment in enumerate(segments, start=1):
49
  start = _format_timestamp(segment.start)
@@ -51,31 +50,46 @@ def transcribe(audio_file):
51
  srt_output += f"{i}\n{start} --> {end}\n{segment.text.strip()}\n\n"
52
  text_output += segment.text.strip() + " "
53
 
54
- return text_output.strip(), srt_output
 
 
 
 
 
55
 
56
  except Exception as e:
57
- return f"Error: {str(e)}", ""
 
 
 
 
 
58
 
59
 
60
  # -------- Gradio UI --------
61
  def build_ui():
62
- with gr.Blocks(title="πŸŽ™οΈ Faster-Whisper Transcriber") as app:
63
- gr.Markdown("# 🎧 Fast & Accurate Speech-to-Text using Faster-Whisper")
64
- gr.Markdown("Upload or record an audio file to get instant transcription.")
65
 
66
  with gr.Row():
67
- audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio Input")
68
 
69
  with gr.Row():
70
- text_output = gr.Textbox(label="Transcribed Text", lines=6)
71
- srt_output = gr.Textbox(label="SRT Subtitle", lines=6)
 
72
 
73
- transcribe_btn = gr.Button("Transcribe")
 
 
74
 
75
- transcribe_btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output, srt_output])
 
 
76
 
77
  gr.Markdown("---")
78
- gr.Markdown("Powered by **Faster-Whisper** ⚑")
79
 
80
  return app
81
 
 
6
  import ffmpeg
7
 
8
  # -------- Configuration --------
9
+ MODEL_NAME = "small" # tiny, base, small, medium, large-v3
10
  DEVICE = "cuda" if os.environ.get("USE_CUDA", "0") == "1" else "cpu"
11
 
12
+ # -------- Load Faster-Whisper --------
13
  print(f"πŸš€ Loading Faster-Whisper model: {MODEL_NAME} on {DEVICE}")
14
  model = WhisperModel(MODEL_NAME, device=DEVICE, compute_type="float16" if DEVICE == "cuda" else "int8")
15
 
 
27
 
28
 
29
  def transcribe(audio_file):
30
+ """Transcribe uploaded audio and return text + SRT + file."""
31
  try:
32
+ # Convert any format to wav for consistency
33
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_wav:
34
  (
35
  ffmpeg
 
40
  )
41
  wav_path = tmp_wav.name
42
 
43
+ # Transcribe
44
  segments, info = model.transcribe(wav_path, beam_size=5)
45
+ text_output, srt_output = "", ""
 
46
 
47
  for i, segment in enumerate(segments, start=1):
48
  start = _format_timestamp(segment.start)
 
50
  srt_output += f"{i}\n{start} --> {end}\n{segment.text.strip()}\n\n"
51
  text_output += segment.text.strip() + " "
52
 
53
+ # Save SRT file
54
+ srt_path = Path(tempfile.mkstemp(suffix=".srt")[1])
55
+ with open(srt_path, "w", encoding="utf-8") as f:
56
+ f.write(srt_output)
57
+
58
+ return text_output.strip(), srt_output, srt_path
59
 
60
  except Exception as e:
61
+ return f"Error: {str(e)}", "", None
62
+
63
+
64
+ def clear_outputs():
65
+ """Clear all UI fields."""
66
+ return None, "", "", None
67
 
68
 
69
  # -------- Gradio UI --------
70
  def build_ui():
71
+ with gr.Blocks(title="🎬 Subtitle Generator (Faster-Whisper)") as app:
72
+ gr.Markdown("# 🎧 Fast Subtitle Generator using Faster-Whisper")
73
+ gr.Markdown("Upload or record an audio file to generate `.srt` subtitles instantly.")
74
 
75
  with gr.Row():
76
+ audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="πŸŽ™οΈ Audio Input")
77
 
78
  with gr.Row():
79
+ text_output = gr.Textbox(label="πŸ“ Transcribed Text", lines=6)
80
+ srt_output = gr.Textbox(label="πŸ“„ SRT Subtitle", lines=6)
81
+ srt_file = gr.File(label="⬇️ Download .srt File")
82
 
83
+ with gr.Row():
84
+ transcribe_btn = gr.Button("πŸš€ Generate Subtitles")
85
+ clear_btn = gr.Button("🧹 Clear All")
86
 
87
+ # Button actions
88
+ transcribe_btn.click(fn=transcribe, inputs=audio_input, outputs=[text_output, srt_output, srt_file])
89
+ clear_btn.click(fn=clear_outputs, inputs=None, outputs=[audio_input, text_output, srt_output, srt_file])
90
 
91
  gr.Markdown("---")
92
+ gr.Markdown("⚑ Built with **Faster-Whisper** | πŸŽ₯ Ideal for Subtitle Generation")
93
 
94
  return app
95