Spaces:

GavinHuang
/

asr-demo

Running

App Files Files Community

GavinHuang commited on May 5

Commit

6dbf680

1 Parent(s): f374409

fix: add clear transcription functionality for real-time and file transcription, update UI elements

Browse files

Files changed (1) hide show

app.py +32 -7

app.py CHANGED Viewed

@@ -159,6 +159,7 @@ def transcribe_file(audio_file, model_name="nvidia/parakeet-tdt-0.6b-v2"):
         return "No audio file provided. Please upload an audio file."
     try:
         model = load_model(model_name)
         print(f"Processing file: {audio_file}")
@@ -209,7 +210,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
                         label="Speak into your microphone"
                     )
-                    # clear_btn = gr.Button("Clear Transcript")
                 with gr.Column(scale=3):
                     text_output = gr.Textbox(
@@ -232,7 +233,9 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
                         type="filepath",
                         label="Record or upload audio file"
                     )
-                    transcribe_btn = gr.Button("Transcribe Audio File")
                 with gr.Column(scale=3):
                     file_transcription = gr.Textbox(
@@ -281,24 +284,45 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
         fn=transcribe_file,
         inputs=[audio_recorder, model_dropdown],
         outputs=[file_transcription]
     )
     # Update the main text output when the state changes
     state.change(
         fn=lambda s: s,
         inputs=[state],
-        outputs=[text_output]
-    )
-    gr.Markdown("## 📝 Instructions")
-    gr.Markdown("""
     ### Real-time Transcription:
     1. Select an ASR model from the dropdown menu
     2. Click 'Load Selected Model' to load the model
     3. Click the microphone button to start recording
     4. Speak clearly into your microphone
     5. The transcription will appear in real-time
-    6. Click 'Clear Transcript' to start a new transcription
     ### File Transcription:
     1. Select an ASR model from the dropdown menu
@@ -307,6 +331,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
     4. Record audio by clicking the microphone button or upload an existing audio file
     5. Click 'Transcribe Audio File' to process the recording
     6. The complete transcription will appear in the text box
     """)
 # Launch the app

         return "No audio file provided. Please upload an audio file."
     try:
+        global model
         model = load_model(model_name)
         print(f"Processing file: {audio_file}")
                         label="Speak into your microphone"
                     )
+                clear_btn = gr.Button("Clear Transcript", variant="secondary")
                 with gr.Column(scale=3):
                     text_output = gr.Textbox(
                         type="filepath",
                         label="Record or upload audio file"
                     )
+                    with gr.Row():
+                        transcribe_btn = gr.Button("Transcribe Audio File", variant="primary")
+                        clear_file_btn = gr.Button("Clear Transcript", variant="secondary")
                 with gr.Column(scale=3):
                     file_transcription = gr.Textbox(
         fn=transcribe_file,
         inputs=[audio_recorder, model_dropdown],
         outputs=[file_transcription]
+    )    # Clear the real-time transcription
+    def clear_transcription():
+        print("Clearing real-time transcription")
+        return "", "", None, 0  # Clear state, streaming_text, audio_buffer, and last_processed_time
+    # Clear the file transcription
+    def clear_file_transcription():
+        print("Clearing file transcription")
+        return ""  # Clear file_transcription
+    # Set up clear button event handlers
+    clear_btn.click(
+        fn=clear_transcription,
+        inputs=[],
+        outputs=[state, streaming_text, audio_buffer, last_processed_time]
+    )
+    clear_file_btn.click(
+        fn=clear_file_transcription,
+        inputs=[],
+        outputs=[file_transcription]
     )
     # Update the main text output when the state changes
     state.change(
         fn=lambda s: s,
         inputs=[state],
+        outputs=[text_output]    )
+    gr.Markdown("## 📝 Instructions")gr.Markdown("""
     ### Real-time Transcription:
     1. Select an ASR model from the dropdown menu
     2. Click 'Load Selected Model' to load the model
     3. Click the microphone button to start recording
     4. Speak clearly into your microphone
     5. The transcription will appear in real-time
+    6. Click 'Clear Transcript' to reset the transcription
     ### File Transcription:
     1. Select an ASR model from the dropdown menu
     4. Record audio by clicking the microphone button or upload an existing audio file
     5. Click 'Transcribe Audio File' to process the recording
     6. The complete transcription will appear in the text box
+    7. Click 'Clear Transcript' to reset the file transcription
     """)
 # Launch the app