Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -132,32 +132,50 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
|
|
| 132 |
"openai/whisper-large-v3", "distil-whisper/distil-large-v3",
|
| 133 |
]
|
| 134 |
|
| 135 |
-
waveform_options=gr.WaveformOptions(
|
| 136 |
-
waveform_color="#01C6FF",
|
| 137 |
-
waveform_progress_color="#0066B4",
|
| 138 |
-
skip_length=2,
|
| 139 |
-
show_controls=False,
|
| 140 |
-
)
|
| 141 |
-
|
| 142 |
with gr.Tab("File Transcription"):
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
fn=transcribe_webui_simple_progress,
|
| 145 |
inputs=[
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
gr.File(label="Upload Files", file_count="multiple"),
|
| 150 |
-
gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio Input", waveform_options=waveform_options),
|
| 151 |
-
gr.Dropdown(["transcribe", "translate"], label="Task", value="transcribe"),
|
| 152 |
-
gr.Checkbox(label='Flash', info='Use Flash Attention 2'),
|
| 153 |
-
gr.Number(label='chunk_length_s', value=30),
|
| 154 |
-
gr.Number(label='batch_size', value=24)
|
| 155 |
],
|
| 156 |
-
outputs=[
|
| 157 |
-
gr.File(label="Download"),
|
| 158 |
-
gr.Text(label="Transcription"),
|
| 159 |
-
gr.Text(label="Segments")
|
| 160 |
-
]
|
| 161 |
)
|
| 162 |
|
| 163 |
with gr.Tab("Real-time Transcription"):
|
|
@@ -173,7 +191,7 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
|
|
| 173 |
outputs=[st_buffer, txt_rt]
|
| 174 |
)
|
| 175 |
|
| 176 |
-
#
|
| 177 |
def load_model():
|
| 178 |
global pipe, last_model
|
| 179 |
last_model = "distil-whisper/distil-large-v2"
|
|
@@ -181,5 +199,6 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
|
|
| 181 |
|
| 182 |
demo.load(load_model)
|
| 183 |
|
| 184 |
-
#
|
| 185 |
-
|
|
|
|
|
|
| 132 |
"openai/whisper-large-v3", "distil-whisper/distil-large-v3",
|
| 133 |
]
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
with gr.Tab("File Transcription"):
|
| 136 |
+
with gr.Row():
|
| 137 |
+
with gr.Column():
|
| 138 |
+
model_dropdown = gr.Dropdown(
|
| 139 |
+
whisper_models,
|
| 140 |
+
value="distil-whisper/distil-large-v2",
|
| 141 |
+
label="Model"
|
| 142 |
+
)
|
| 143 |
+
language_dropdown = gr.Dropdown(
|
| 144 |
+
["Automatic Detection"] + sorted(get_language_names()),
|
| 145 |
+
value="Automatic Detection",
|
| 146 |
+
label="Language"
|
| 147 |
+
)
|
| 148 |
+
url_input = gr.Text(label="URL (YouTube, etc.)")
|
| 149 |
+
file_input = gr.File(label="Upload Files", file_count="multiple")
|
| 150 |
+
audio_input = gr.Audio(
|
| 151 |
+
sources=["upload", "microphone"],
|
| 152 |
+
type="filepath",
|
| 153 |
+
label="Audio Input"
|
| 154 |
+
)
|
| 155 |
+
task_dropdown = gr.Dropdown(
|
| 156 |
+
["transcribe", "translate"],
|
| 157 |
+
label="Task",
|
| 158 |
+
value="transcribe"
|
| 159 |
+
)
|
| 160 |
+
flash_checkbox = gr.Checkbox(label='Flash', info='Use Flash Attention 2')
|
| 161 |
+
chunk_length = gr.Number(label='chunk_length_s', value=30)
|
| 162 |
+
batch_size = gr.Number(label='batch_size', value=24)
|
| 163 |
+
|
| 164 |
+
transcribe_button = gr.Button("Transcribe")
|
| 165 |
+
|
| 166 |
+
with gr.Column():
|
| 167 |
+
output_files = gr.File(label="Download")
|
| 168 |
+
output_text = gr.Text(label="Transcription")
|
| 169 |
+
output_segments = gr.Text(label="Segments")
|
| 170 |
+
|
| 171 |
+
transcribe_button.click(
|
| 172 |
fn=transcribe_webui_simple_progress,
|
| 173 |
inputs=[
|
| 174 |
+
model_dropdown, language_dropdown, url_input,
|
| 175 |
+
file_input, audio_input, task_dropdown,
|
| 176 |
+
flash_checkbox, chunk_length, batch_size
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
],
|
| 178 |
+
outputs=[output_files, output_text, output_segments]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
)
|
| 180 |
|
| 181 |
with gr.Tab("Real-time Transcription"):
|
|
|
|
| 191 |
outputs=[st_buffer, txt_rt]
|
| 192 |
)
|
| 193 |
|
| 194 |
+
# Preload model for Hugging Face spaces
|
| 195 |
def load_model():
|
| 196 |
global pipe, last_model
|
| 197 |
last_model = "distil-whisper/distil-large-v2"
|
|
|
|
| 199 |
|
| 200 |
demo.load(load_model)
|
| 201 |
|
| 202 |
+
# Launch the app
|
| 203 |
+
if __name__ == "__main__":
|
| 204 |
+
demo.launch()
|