Whisper Transcriber Bot commited on
Commit
eff77b5
Β·
1 Parent(s): 3fc26fb

Fix: Redesign UI for Gradio 5.x compatibility with cleaner tab-based layout

Browse files
Files changed (1) hide show
  1. app.py +71 -77
app.py CHANGED
@@ -213,87 +213,81 @@ def create_interface():
213
  gr.Markdown(
214
  """
215
  # 🎀 Whisper Transcriber
216
-
217
  Generate accurate subtitles and transcripts from audio/video files using OpenAI Whisper.
218
-
219
- **Features:**
220
- - πŸ“ Upload files or paste YouTube/direct URLs
221
- - 🎯 Multiple model sizes (tiny/small/medium)
222
- - 🌍 Auto language detection (99 languages)
223
- - πŸ‘₯ Optional speaker diarization
224
- - πŸ“ Multiple formats: SRT, VTT, TXT, JSON
225
  """
226
  )
227
 
228
- with gr.Row():
229
- with gr.Column(scale=1):
230
- gr.Markdown("### Input")
231
-
232
- file_input = gr.File(
233
- label="Upload Audio/Video File",
234
- file_types=[
235
- 'audio/*',
236
- 'video/*',
237
- '.mp3', '.wav', '.m4a', '.flac', '.aac',
238
- '.mp4', '.avi', '.mkv', '.mov', '.webm'
239
- ]
240
- )
241
-
242
- url_input = gr.Textbox(
243
- label="Or Paste URL (YouTube or direct link)",
244
- placeholder="https://www.youtube.com/watch?v=... or https://example.com/audio.mp3",
245
- lines=2
246
- )
247
-
248
- gr.Markdown("### Settings")
249
-
250
- model_size = gr.Dropdown(
251
- choices=model_choices,
252
- value='small',
253
- label="Model Size",
254
- info="Tiny=fastest, Medium=most accurate"
255
- )
256
-
257
- language = gr.Dropdown(
258
- choices=[(f"{v} ({k})", k) for k, v in language_choices.items()],
259
- value='auto',
260
- label="Language",
261
- info="Auto-detect or select specific language"
262
- )
263
-
264
- enable_diarization = gr.Checkbox(
265
- label="Enable Speaker Diarization",
266
- value=False,
267
- info="Identify different speakers (slower, requires HF_TOKEN)"
268
- )
269
-
270
- process_btn = gr.Button("πŸš€ Generate Transcription", variant="primary", size="lg")
271
-
272
- with gr.Column(scale=1):
273
- gr.Markdown("### Output")
274
-
275
- preview_output = gr.Markdown(label="Preview")
276
-
277
- with gr.Row():
278
- srt_output = gr.File(label="πŸ“„ SRT File")
279
- vtt_output = gr.File(label="πŸ“„ VTT File")
280
-
281
- with gr.Row():
282
- txt_output = gr.File(label="πŸ“„ TXT File")
283
- json_output = gr.File(label="πŸ“„ JSON File")
284
-
285
- gr.Markdown(
286
- """
287
- ### πŸ“š Usage Tips
288
- - **Model Selection:** Tiny for speed, Small for balance, Medium for accuracy
289
- - **Large Files:** Files will be automatically chunked for processing
290
- - **Speaker Diarization:** Requires HF_TOKEN environment variable (get it at huggingface.co/settings/tokens)
291
- - **Supported Formats:** MP3, WAV, M4A, FLAC, MP4, AVI, MKV, MOV, WebM and more
292
-
293
- ### πŸ”Œ API Usage
294
- This Space provides an API endpoint. Click "Use via API" below for details.
295
- """
296
- )
 
 
297
 
298
  # Wire up the button
299
  process_btn.click(
 
213
  gr.Markdown(
214
  """
215
  # 🎀 Whisper Transcriber
 
216
  Generate accurate subtitles and transcripts from audio/video files using OpenAI Whisper.
 
 
 
 
 
 
 
217
  """
218
  )
219
 
220
+ with gr.Tab("Transcribe"):
221
+ with gr.Row():
222
+ with gr.Column():
223
+ file_input = gr.File(
224
+ label="πŸ“ Upload Audio/Video File",
225
+ file_types=['audio', 'video']
226
+ )
227
+
228
+ url_input = gr.Textbox(
229
+ label="πŸ”— Or Paste URL (YouTube or direct link)",
230
+ placeholder="https://www.youtube.com/watch?v=... or https://example.com/audio.mp3"
231
+ )
232
+
233
+ model_size = gr.Dropdown(
234
+ choices=model_choices,
235
+ value='small',
236
+ label="🎯 Model Size"
237
+ )
238
+
239
+ language = gr.Dropdown(
240
+ choices=[(f"{v} ({k})", k) for k, v in language_choices.items()],
241
+ value='auto',
242
+ label="🌍 Language"
243
+ )
244
+
245
+ enable_diarization = gr.Checkbox(
246
+ label="πŸ‘₯ Enable Speaker Diarization",
247
+ value=False
248
+ )
249
+
250
+ process_btn = gr.Button("πŸš€ Generate Transcription", variant="primary")
251
+
252
+ with gr.Column():
253
+ preview_output = gr.Markdown(label="πŸ“„ Preview")
254
+
255
+ srt_output = gr.File(label="SRT File")
256
+ vtt_output = gr.File(label="VTT File")
257
+ txt_output = gr.File(label="TXT File")
258
+ json_output = gr.File(label="JSON File")
259
+
260
+ with gr.Tab("Help"):
261
+ gr.Markdown(
262
+ """
263
+ ## πŸ“š How to Use
264
+
265
+ 1. **Upload a file** or **paste a URL** (YouTube or direct media link)
266
+ 2. **Select model size**: Tiny (fast), Small (balanced), Medium (accurate)
267
+ 3. **Choose language**: Auto-detect or select manually
268
+ 4. **Enable diarization** (optional): Identifies different speakers
269
+ 5. Click **Generate Transcription**
270
+ 6. **Download** your preferred format(s)
271
+
272
+ ## πŸ“‹ Supported Formats
273
+ **Audio:** MP3, WAV, M4A, FLAC, AAC, OGG, WMA
274
+ **Video:** MP4, AVI, MKV, MOV, WMV, WebM, FLV
275
+
276
+ ## βš™οΈ Features
277
+ - βœ… Auto language detection (99+ languages)
278
+ - βœ… Multiple output formats (SRT, VTT, TXT, JSON)
279
+ - βœ… Word-level timestamps in JSON
280
+ - βœ… Large file chunking (30-min segments)
281
+ - βœ… Optional speaker identification
282
+ - βœ… Public API endpoint
283
+
284
+ ## πŸ’‘ Tips
285
+ - Use **Small model** for most cases
286
+ - **Diarization** requires HF_TOKEN (Space settings)
287
+ - Large files are automatically chunked
288
+ - Processing time varies by model and file length
289
+ """
290
+ )
291
 
292
  # Wire up the button
293
  process_btn.click(