hyungjoochae commited on
Commit
d2d2762
·
verified ·
1 Parent(s): 0ecc66f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -25
app.py CHANGED
@@ -132,32 +132,50 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
132
  "openai/whisper-large-v3", "distil-whisper/distil-large-v3",
133
  ]
134
 
135
- waveform_options=gr.WaveformOptions(
136
- waveform_color="#01C6FF",
137
- waveform_progress_color="#0066B4",
138
- skip_length=2,
139
- show_controls=False,
140
- )
141
-
142
  with gr.Tab("File Transcription"):
143
- simple_transcribe = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  fn=transcribe_webui_simple_progress,
145
  inputs=[
146
- gr.Dropdown(whisper_models, value="distil-whisper/distil-large-v2", label="Model"),
147
- gr.Dropdown(["Automatic Detection"] + sorted(get_language_names()), value="Automatic Detection", label="Language"),
148
- gr.Text(label="URL (YouTube, etc.)"),
149
- gr.File(label="Upload Files", file_count="multiple"),
150
- gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio Input", waveform_options=waveform_options),
151
- gr.Dropdown(["transcribe", "translate"], label="Task", value="transcribe"),
152
- gr.Checkbox(label='Flash', info='Use Flash Attention 2'),
153
- gr.Number(label='chunk_length_s', value=30),
154
- gr.Number(label='batch_size', value=24)
155
  ],
156
- outputs=[
157
- gr.File(label="Download"),
158
- gr.Text(label="Transcription"),
159
- gr.Text(label="Segments")
160
- ]
161
  )
162
 
163
  with gr.Tab("Real-time Transcription"):
@@ -173,7 +191,7 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
173
  outputs=[st_buffer, txt_rt]
174
  )
175
 
176
- # Hugging Face space 최적화 (미리 모델 로딩)
177
  def load_model():
178
  global pipe, last_model
179
  last_model = "distil-whisper/distil-large-v2"
@@ -181,5 +199,6 @@ with gr.Blocks(title="Insanely Fast Whisper") as demo:
181
 
182
  demo.load(load_model)
183
 
184
- # Hugging Face에서는 아래 launch 설정을 권장
185
- demo.launch()
 
 
132
  "openai/whisper-large-v3", "distil-whisper/distil-large-v3",
133
  ]
134
 
 
 
 
 
 
 
 
135
  with gr.Tab("File Transcription"):
136
+ with gr.Row():
137
+ with gr.Column():
138
+ model_dropdown = gr.Dropdown(
139
+ whisper_models,
140
+ value="distil-whisper/distil-large-v2",
141
+ label="Model"
142
+ )
143
+ language_dropdown = gr.Dropdown(
144
+ ["Automatic Detection"] + sorted(get_language_names()),
145
+ value="Automatic Detection",
146
+ label="Language"
147
+ )
148
+ url_input = gr.Text(label="URL (YouTube, etc.)")
149
+ file_input = gr.File(label="Upload Files", file_count="multiple")
150
+ audio_input = gr.Audio(
151
+ sources=["upload", "microphone"],
152
+ type="filepath",
153
+ label="Audio Input"
154
+ )
155
+ task_dropdown = gr.Dropdown(
156
+ ["transcribe", "translate"],
157
+ label="Task",
158
+ value="transcribe"
159
+ )
160
+ flash_checkbox = gr.Checkbox(label='Flash', info='Use Flash Attention 2')
161
+ chunk_length = gr.Number(label='chunk_length_s', value=30)
162
+ batch_size = gr.Number(label='batch_size', value=24)
163
+
164
+ transcribe_button = gr.Button("Transcribe")
165
+
166
+ with gr.Column():
167
+ output_files = gr.File(label="Download")
168
+ output_text = gr.Text(label="Transcription")
169
+ output_segments = gr.Text(label="Segments")
170
+
171
+ transcribe_button.click(
172
  fn=transcribe_webui_simple_progress,
173
  inputs=[
174
+ model_dropdown, language_dropdown, url_input,
175
+ file_input, audio_input, task_dropdown,
176
+ flash_checkbox, chunk_length, batch_size
 
 
 
 
 
 
177
  ],
178
+ outputs=[output_files, output_text, output_segments]
 
 
 
 
179
  )
180
 
181
  with gr.Tab("Real-time Transcription"):
 
191
  outputs=[st_buffer, txt_rt]
192
  )
193
 
194
+ # Preload model for Hugging Face spaces
195
  def load_model():
196
  global pipe, last_model
197
  last_model = "distil-whisper/distil-large-v2"
 
199
 
200
  demo.load(load_model)
201
 
202
+ # Launch the app
203
+ if __name__ == "__main__":
204
+ demo.launch()