Spaces:
Running
Running
Commit
ยท
6dbf680
1
Parent(s):
f374409
fix: add clear transcription functionality for real-time and file transcription, update UI elements
Browse files
app.py
CHANGED
|
@@ -159,6 +159,7 @@ def transcribe_file(audio_file, model_name="nvidia/parakeet-tdt-0.6b-v2"):
|
|
| 159 |
return "No audio file provided. Please upload an audio file."
|
| 160 |
|
| 161 |
try:
|
|
|
|
| 162 |
model = load_model(model_name)
|
| 163 |
|
| 164 |
print(f"Processing file: {audio_file}")
|
|
@@ -209,7 +210,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
| 209 |
label="Speak into your microphone"
|
| 210 |
)
|
| 211 |
|
| 212 |
-
|
| 213 |
|
| 214 |
with gr.Column(scale=3):
|
| 215 |
text_output = gr.Textbox(
|
|
@@ -232,7 +233,9 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
| 232 |
type="filepath",
|
| 233 |
label="Record or upload audio file"
|
| 234 |
)
|
| 235 |
-
|
|
|
|
|
|
|
| 236 |
|
| 237 |
with gr.Column(scale=3):
|
| 238 |
file_transcription = gr.Textbox(
|
|
@@ -281,24 +284,45 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
| 281 |
fn=transcribe_file,
|
| 282 |
inputs=[audio_recorder, model_dropdown],
|
| 283 |
outputs=[file_transcription]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
)
|
| 285 |
|
| 286 |
# Update the main text output when the state changes
|
| 287 |
state.change(
|
| 288 |
fn=lambda s: s,
|
| 289 |
inputs=[state],
|
| 290 |
-
outputs=[text_output]
|
| 291 |
-
|
|
|
|
| 292 |
|
| 293 |
-
gr.Markdown("## ๐ Instructions")
|
| 294 |
-
gr.Markdown("""
|
| 295 |
### Real-time Transcription:
|
| 296 |
1. Select an ASR model from the dropdown menu
|
| 297 |
2. Click 'Load Selected Model' to load the model
|
| 298 |
3. Click the microphone button to start recording
|
| 299 |
4. Speak clearly into your microphone
|
| 300 |
5. The transcription will appear in real-time
|
| 301 |
-
6. Click 'Clear Transcript' to
|
| 302 |
|
| 303 |
### File Transcription:
|
| 304 |
1. Select an ASR model from the dropdown menu
|
|
@@ -307,6 +331,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
| 307 |
4. Record audio by clicking the microphone button or upload an existing audio file
|
| 308 |
5. Click 'Transcribe Audio File' to process the recording
|
| 309 |
6. The complete transcription will appear in the text box
|
|
|
|
| 310 |
""")
|
| 311 |
|
| 312 |
# Launch the app
|
|
|
|
| 159 |
return "No audio file provided. Please upload an audio file."
|
| 160 |
|
| 161 |
try:
|
| 162 |
+
global model
|
| 163 |
model = load_model(model_name)
|
| 164 |
|
| 165 |
print(f"Processing file: {audio_file}")
|
|
|
|
| 210 |
label="Speak into your microphone"
|
| 211 |
)
|
| 212 |
|
| 213 |
+
clear_btn = gr.Button("Clear Transcript", variant="secondary")
|
| 214 |
|
| 215 |
with gr.Column(scale=3):
|
| 216 |
text_output = gr.Textbox(
|
|
|
|
| 233 |
type="filepath",
|
| 234 |
label="Record or upload audio file"
|
| 235 |
)
|
| 236 |
+
with gr.Row():
|
| 237 |
+
transcribe_btn = gr.Button("Transcribe Audio File", variant="primary")
|
| 238 |
+
clear_file_btn = gr.Button("Clear Transcript", variant="secondary")
|
| 239 |
|
| 240 |
with gr.Column(scale=3):
|
| 241 |
file_transcription = gr.Textbox(
|
|
|
|
| 284 |
fn=transcribe_file,
|
| 285 |
inputs=[audio_recorder, model_dropdown],
|
| 286 |
outputs=[file_transcription]
|
| 287 |
+
) # Clear the real-time transcription
|
| 288 |
+
def clear_transcription():
|
| 289 |
+
print("Clearing real-time transcription")
|
| 290 |
+
return "", "", None, 0 # Clear state, streaming_text, audio_buffer, and last_processed_time
|
| 291 |
+
|
| 292 |
+
# Clear the file transcription
|
| 293 |
+
def clear_file_transcription():
|
| 294 |
+
print("Clearing file transcription")
|
| 295 |
+
return "" # Clear file_transcription
|
| 296 |
+
|
| 297 |
+
# Set up clear button event handlers
|
| 298 |
+
clear_btn.click(
|
| 299 |
+
fn=clear_transcription,
|
| 300 |
+
inputs=[],
|
| 301 |
+
outputs=[state, streaming_text, audio_buffer, last_processed_time]
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
clear_file_btn.click(
|
| 305 |
+
fn=clear_file_transcription,
|
| 306 |
+
inputs=[],
|
| 307 |
+
outputs=[file_transcription]
|
| 308 |
)
|
| 309 |
|
| 310 |
# Update the main text output when the state changes
|
| 311 |
state.change(
|
| 312 |
fn=lambda s: s,
|
| 313 |
inputs=[state],
|
| 314 |
+
outputs=[text_output] )
|
| 315 |
+
|
| 316 |
+
|
| 317 |
|
| 318 |
+
gr.Markdown("## ๐ Instructions")gr.Markdown("""
|
|
|
|
| 319 |
### Real-time Transcription:
|
| 320 |
1. Select an ASR model from the dropdown menu
|
| 321 |
2. Click 'Load Selected Model' to load the model
|
| 322 |
3. Click the microphone button to start recording
|
| 323 |
4. Speak clearly into your microphone
|
| 324 |
5. The transcription will appear in real-time
|
| 325 |
+
6. Click 'Clear Transcript' to reset the transcription
|
| 326 |
|
| 327 |
### File Transcription:
|
| 328 |
1. Select an ASR model from the dropdown menu
|
|
|
|
| 331 |
4. Record audio by clicking the microphone button or upload an existing audio file
|
| 332 |
5. Click 'Transcribe Audio File' to process the recording
|
| 333 |
6. The complete transcription will appear in the text box
|
| 334 |
+
7. Click 'Clear Transcript' to reset the file transcription
|
| 335 |
""")
|
| 336 |
|
| 337 |
# Launch the app
|