Spaces:
Sleeping
Sleeping
Whisper Transcriber Bot
commited on
Commit
Β·
eff77b5
1
Parent(s):
3fc26fb
Fix: Redesign UI for Gradio 5.x compatibility with cleaner tab-based layout
Browse files
app.py
CHANGED
|
@@ -213,87 +213,81 @@ def create_interface():
|
|
| 213 |
gr.Markdown(
|
| 214 |
"""
|
| 215 |
# π€ Whisper Transcriber
|
| 216 |
-
|
| 217 |
Generate accurate subtitles and transcripts from audio/video files using OpenAI Whisper.
|
| 218 |
-
|
| 219 |
-
**Features:**
|
| 220 |
-
- π Upload files or paste YouTube/direct URLs
|
| 221 |
-
- π― Multiple model sizes (tiny/small/medium)
|
| 222 |
-
- π Auto language detection (99 languages)
|
| 223 |
-
- π₯ Optional speaker diarization
|
| 224 |
-
- π Multiple formats: SRT, VTT, TXT, JSON
|
| 225 |
"""
|
| 226 |
)
|
| 227 |
|
| 228 |
-
with gr.
|
| 229 |
-
with gr.
|
| 230 |
-
gr.
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
label="
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
|
|
|
|
|
|
| 297 |
|
| 298 |
# Wire up the button
|
| 299 |
process_btn.click(
|
|
|
|
| 213 |
gr.Markdown(
|
| 214 |
"""
|
| 215 |
# π€ Whisper Transcriber
|
|
|
|
| 216 |
Generate accurate subtitles and transcripts from audio/video files using OpenAI Whisper.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
"""
|
| 218 |
)
|
| 219 |
|
| 220 |
+
with gr.Tab("Transcribe"):
|
| 221 |
+
with gr.Row():
|
| 222 |
+
with gr.Column():
|
| 223 |
+
file_input = gr.File(
|
| 224 |
+
label="π Upload Audio/Video File",
|
| 225 |
+
file_types=['audio', 'video']
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
url_input = gr.Textbox(
|
| 229 |
+
label="π Or Paste URL (YouTube or direct link)",
|
| 230 |
+
placeholder="https://www.youtube.com/watch?v=... or https://example.com/audio.mp3"
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
model_size = gr.Dropdown(
|
| 234 |
+
choices=model_choices,
|
| 235 |
+
value='small',
|
| 236 |
+
label="π― Model Size"
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
language = gr.Dropdown(
|
| 240 |
+
choices=[(f"{v} ({k})", k) for k, v in language_choices.items()],
|
| 241 |
+
value='auto',
|
| 242 |
+
label="π Language"
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
enable_diarization = gr.Checkbox(
|
| 246 |
+
label="π₯ Enable Speaker Diarization",
|
| 247 |
+
value=False
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
process_btn = gr.Button("π Generate Transcription", variant="primary")
|
| 251 |
+
|
| 252 |
+
with gr.Column():
|
| 253 |
+
preview_output = gr.Markdown(label="π Preview")
|
| 254 |
+
|
| 255 |
+
srt_output = gr.File(label="SRT File")
|
| 256 |
+
vtt_output = gr.File(label="VTT File")
|
| 257 |
+
txt_output = gr.File(label="TXT File")
|
| 258 |
+
json_output = gr.File(label="JSON File")
|
| 259 |
+
|
| 260 |
+
with gr.Tab("Help"):
|
| 261 |
+
gr.Markdown(
|
| 262 |
+
"""
|
| 263 |
+
## π How to Use
|
| 264 |
+
|
| 265 |
+
1. **Upload a file** or **paste a URL** (YouTube or direct media link)
|
| 266 |
+
2. **Select model size**: Tiny (fast), Small (balanced), Medium (accurate)
|
| 267 |
+
3. **Choose language**: Auto-detect or select manually
|
| 268 |
+
4. **Enable diarization** (optional): Identifies different speakers
|
| 269 |
+
5. Click **Generate Transcription**
|
| 270 |
+
6. **Download** your preferred format(s)
|
| 271 |
+
|
| 272 |
+
## π Supported Formats
|
| 273 |
+
**Audio:** MP3, WAV, M4A, FLAC, AAC, OGG, WMA
|
| 274 |
+
**Video:** MP4, AVI, MKV, MOV, WMV, WebM, FLV
|
| 275 |
+
|
| 276 |
+
## βοΈ Features
|
| 277 |
+
- β
Auto language detection (99+ languages)
|
| 278 |
+
- β
Multiple output formats (SRT, VTT, TXT, JSON)
|
| 279 |
+
- β
Word-level timestamps in JSON
|
| 280 |
+
- β
Large file chunking (30-min segments)
|
| 281 |
+
- β
Optional speaker identification
|
| 282 |
+
- β
Public API endpoint
|
| 283 |
+
|
| 284 |
+
## π‘ Tips
|
| 285 |
+
- Use **Small model** for most cases
|
| 286 |
+
- **Diarization** requires HF_TOKEN (Space settings)
|
| 287 |
+
- Large files are automatically chunked
|
| 288 |
+
- Processing time varies by model and file length
|
| 289 |
+
"""
|
| 290 |
+
)
|
| 291 |
|
| 292 |
# Wire up the button
|
| 293 |
process_btn.click(
|