Spaces:

xTHExBEASTx
/

Whisper-Transcriber

Sleeping

App Files Files Community

Whisper Transcriber Bot commited on Dec 29, 2025

Commit

eff77b5

1 Parent(s): 3fc26fb

Fix: Redesign UI for Gradio 5.x compatibility with cleaner tab-based layout

Browse files

Files changed (1) hide show

app.py +71 -77

app.py CHANGED Viewed

@@ -213,87 +213,81 @@ def create_interface():
         gr.Markdown(
             """
             # 🎤 Whisper Transcriber
             Generate accurate subtitles and transcripts from audio/video files using OpenAI Whisper.
-            **Features:**
-            - 📁 Upload files or paste YouTube/direct URLs
-            - 🎯 Multiple model sizes (tiny/small/medium)
-            - 🌍 Auto language detection (99 languages)
-            - 👥 Optional speaker diarization
-            - 📝 Multiple formats: SRT, VTT, TXT, JSON
             """
         )
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("### Input")
-                file_input = gr.File(
-                    label="Upload Audio/Video File",
-                    file_types=[
-                        'audio/*',
-                        'video/*',
-                        '.mp3', '.wav', '.m4a', '.flac', '.aac',
-                        '.mp4', '.avi', '.mkv', '.mov', '.webm'
-                    ]
-                )
-                url_input = gr.Textbox(
-                    label="Or Paste URL (YouTube or direct link)",
-                    placeholder="https://www.youtube.com/watch?v=... or https://example.com/audio.mp3",
-                    lines=2
-                )
-                gr.Markdown("### Settings")
-                model_size = gr.Dropdown(
-                    choices=model_choices,
-                    value='small',
-                    label="Model Size",
-                    info="Tiny=fastest, Medium=most accurate"
-                )
-                language = gr.Dropdown(
-                    choices=[(f"{v} ({k})", k) for k, v in language_choices.items()],
-                    value='auto',
-                    label="Language",
-                    info="Auto-detect or select specific language"
-                )
-                enable_diarization = gr.Checkbox(
-                    label="Enable Speaker Diarization",
-                    value=False,
-                    info="Identify different speakers (slower, requires HF_TOKEN)"
-                )
-                process_btn = gr.Button("🚀 Generate Transcription", variant="primary", size="lg")
-            with gr.Column(scale=1):
-                gr.Markdown("### Output")
-                preview_output = gr.Markdown(label="Preview")
-                with gr.Row():
-                    srt_output = gr.File(label="📄 SRT File")
-                    vtt_output = gr.File(label="📄 VTT File")
-                with gr.Row():
-                    txt_output = gr.File(label="📄 TXT File")
-                    json_output = gr.File(label="📄 JSON File")
-        gr.Markdown(
-            """
-            ### 📚 Usage Tips
-            - **Model Selection:** Tiny for speed, Small for balance, Medium for accuracy
-            - **Large Files:** Files will be automatically chunked for processing
-            - **Speaker Diarization:** Requires HF_TOKEN environment variable (get it at huggingface.co/settings/tokens)
-            - **Supported Formats:** MP3, WAV, M4A, FLAC, MP4, AVI, MKV, MOV, WebM and more
-            ### 🔌 API Usage
-            This Space provides an API endpoint. Click "Use via API" below for details.
-            """
-        )
         # Wire up the button
         process_btn.click(

         gr.Markdown(
             """
             # 🎤 Whisper Transcriber
             Generate accurate subtitles and transcripts from audio/video files using OpenAI Whisper.
             """
         )
+        with gr.Tab("Transcribe"):
+            with gr.Row():
+                with gr.Column():
+                    file_input = gr.File(
+                        label="📁 Upload Audio/Video File",
+                        file_types=['audio', 'video']
+                    )
+                    url_input = gr.Textbox(
+                        label="🔗 Or Paste URL (YouTube or direct link)",
+                        placeholder="https://www.youtube.com/watch?v=... or https://example.com/audio.mp3"
+                    )
+                    model_size = gr.Dropdown(
+                        choices=model_choices,
+                        value='small',
+                        label="🎯 Model Size"
+                    )
+                    language = gr.Dropdown(
+                        choices=[(f"{v} ({k})", k) for k, v in language_choices.items()],
+                        value='auto',
+                        label="🌍 Language"
+                    )
+                    enable_diarization = gr.Checkbox(
+                        label="👥 Enable Speaker Diarization",
+                        value=False
+                    )
+                    process_btn = gr.Button("🚀 Generate Transcription", variant="primary")
+                with gr.Column():
+                    preview_output = gr.Markdown(label="📄 Preview")
+                    srt_output = gr.File(label="SRT File")
+                    vtt_output = gr.File(label="VTT File")
+                    txt_output = gr.File(label="TXT File")
+                    json_output = gr.File(label="JSON File")
+        with gr.Tab("Help"):
+            gr.Markdown(
+                """
+                ## 📚 How to Use
+                1. **Upload a file** or **paste a URL** (YouTube or direct media link)
+                2. **Select model size**: Tiny (fast), Small (balanced), Medium (accurate)
+                3. **Choose language**: Auto-detect or select manually
+                4. **Enable diarization** (optional): Identifies different speakers
+                5. Click **Generate Transcription**
+                6. **Download** your preferred format(s)
+                ## 📋 Supported Formats
+                **Audio:** MP3, WAV, M4A, FLAC, AAC, OGG, WMA
+                **Video:** MP4, AVI, MKV, MOV, WMV, WebM, FLV
+                ## ⚙️ Features
+                - ✅ Auto language detection (99+ languages)
+                - ✅ Multiple output formats (SRT, VTT, TXT, JSON)
+                - ✅ Word-level timestamps in JSON
+                - ✅ Large file chunking (30-min segments)
+                - ✅ Optional speaker identification
+                - ✅ Public API endpoint
+                ## 💡 Tips
+                - Use **Small model** for most cases
+                - **Diarization** requires HF_TOKEN (Space settings)
+                - Large files are automatically chunked
+                - Processing time varies by model and file length
+                """
+            )
         # Wire up the button
         process_btn.click(