Realtime-whisper-demo

Sleeping

App Files Files Community

hyungjoochae commited on Apr 19

Commit

d2d2762

verified ·

1 Parent(s): 0ecc66f

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -25

app.py CHANGED Viewed

@@ -132,32 +132,50 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
         "openai/whisper-large-v3", "distil-whisper/distil-large-v3",
     ]
-    waveform_options=gr.WaveformOptions(
-        waveform_color="#01C6FF",
-        waveform_progress_color="#0066B4",
-        skip_length=2,
-        show_controls=False,
-    )
     with gr.Tab("File Transcription"):
-        simple_transcribe = gr.Interface(
             fn=transcribe_webui_simple_progress,
             inputs=[
-                gr.Dropdown(whisper_models, value="distil-whisper/distil-large-v2", label="Model"),
-                gr.Dropdown(["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language"),
-                gr.Text(label="URL (YouTube, etc.)"),
-                gr.File(label="Upload Files", file_count="multiple"),
-                gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio Input", waveform_options=waveform_options),
-                gr.Dropdown(["transcribe", "translate"], label="Task", value="transcribe"),
-                gr.Checkbox(label='Flash', info='Use Flash Attention 2'),
-                gr.Number(label='chunk_length_s', value=30),
-                gr.Number(label='batch_size', value=24)
             ],
-            outputs=[
-                gr.File(label="Download"),
-                gr.Text(label="Transcription"),
-                gr.Text(label="Segments")
-            ]
         )
     with gr.Tab("Real-time Transcription"):
@@ -173,7 +191,7 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
             outputs=[st_buffer, txt_rt]
         )
-    # Hugging Face space 최적화 (미리 모델 로딩)
     def load_model():
         global pipe, last_model
         last_model = "distil-whisper/distil-large-v2"
@@ -181,5 +199,6 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
     demo.load(load_model)
-# Hugging Face에서는 아래 launch 설정을 권장
-demo.launch()

         "openai/whisper-large-v3", "distil-whisper/distil-large-v3",
     ]
     with gr.Tab("File Transcription"):
+        with gr.Row():
+            with gr.Column():
+                model_dropdown = gr.Dropdown(
+                    whisper_models,
+                    value="distil-whisper/distil-large-v2",
+                    label="Model"
+                )
+                language_dropdown = gr.Dropdown(
+                    ["Automatic Detection"] + sorted(get_language_names()),
+                    value="Automatic Detection",
+                    label="Language"
+                )
+                url_input = gr.Text(label="URL (YouTube, etc.)")
+                file_input = gr.File(label="Upload Files", file_count="multiple")
+                audio_input = gr.Audio(
+                    sources=["upload", "microphone"],
+                    type="filepath",
+                    label="Audio Input"
+                )
+                task_dropdown = gr.Dropdown(
+                    ["transcribe", "translate"],
+                    label="Task",
+                    value="transcribe"
+                )
+                flash_checkbox = gr.Checkbox(label='Flash', info='Use Flash Attention 2')
+                chunk_length = gr.Number(label='chunk_length_s', value=30)
+                batch_size = gr.Number(label='batch_size', value=24)
+                transcribe_button = gr.Button("Transcribe")
+            with gr.Column():
+                output_files = gr.File(label="Download")
+                output_text = gr.Text(label="Transcription")
+                output_segments = gr.Text(label="Segments")
+        transcribe_button.click(
             fn=transcribe_webui_simple_progress,
             inputs=[
+                model_dropdown, language_dropdown, url_input,
+                file_input, audio_input, task_dropdown,
+                flash_checkbox, chunk_length, batch_size
             ],
+            outputs=[output_files, output_text, output_segments]
         )
     with gr.Tab("Real-time Transcription"):
             outputs=[st_buffer, txt_rt]
         )
+    # Preload model for Hugging Face spaces
     def load_model():
         global pipe, last_model
         last_model = "distil-whisper/distil-large-v2"
     demo.load(load_model)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()