Spaces:

csukuangfj
/

test

Runtime error

App Files Files Community

csukuangfj commited on Dec 6, 2022

Commit

9194752

1 Parent(s): 0e6190d

small fixes

Browse files

Files changed (3) hide show

app.py +18 -11
examples.py +47 -27
test_wavs/tal_csasr/0.wav +0 -0

app.py CHANGED Viewed

@@ -60,7 +60,11 @@ def process_uploaded_file(
     in_filename: str,
 ):
     if in_filename is None or in_filename == "":
-        return ""
     logging.info(f"Processing uploaded file: {in_filename}")
     try:
@@ -73,7 +77,7 @@ def process_uploaded_file(
         )
     except Exception as e:
         logging.info(str(e))
-        return str(e)
 def process_microphone(
@@ -84,10 +88,11 @@ def process_microphone(
     in_filename: str,
 ):
     if in_filename is None or in_filename == "":
-        return (
             "Please first click 'Record from microphone', speak, "
             "click 'Stop recording', and then "
-            "click the button 'submit for recognition'"
         )
     logging.info(f"Processing microphone: {in_filename}")
@@ -101,7 +106,7 @@ def process_microphone(
         )
     except Exception as e:
         logging.info(str(e))
-        return str(e)
 @torch.no_grad()
@@ -136,7 +141,7 @@ def process(
     s.accept_wave_file(filename)
     recognizer.decode_stream(s)
-    text = s.result.text.strip()
     date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
     end = time.time()
@@ -161,7 +166,7 @@ def process(
     logging.info(info)
     logging.info(f"\nrepo_id: {repo_id}\nhyp: {text}")
-    return text
 title = "# Automatic Speech Recognition with Next-gen Kaldi"
@@ -246,6 +251,7 @@ with demo:
             )
             upload_button = gr.Button("Submit for recognition")
             uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
             gr.Examples(
                 examples=examples,
@@ -256,7 +262,7 @@ with demo:
                     num_active_paths_slider,
                     uploaded_file,
                 ],
-                outputs=[uploaded_output],
                 fn=process_uploaded_file,
             )
@@ -270,6 +276,7 @@ with demo:
             record_button = gr.Button("Submit for recognition")
             recorded_output = gr.Textbox(label="Recognized speech from recordings")
             gr.Examples(
                 examples=examples,
@@ -280,7 +287,7 @@ with demo:
                     num_active_paths_slider,
                     microphone,
                 ],
-                outputs=[recorded_output],
                 fn=process_microphone,
             )
@@ -293,7 +300,7 @@ with demo:
                 num_active_paths_slider,
                 uploaded_file,
             ],
-            outputs=[uploaded_output],
         )
         record_button.click(
@@ -305,7 +312,7 @@ with demo:
                 num_active_paths_slider,
                 microphone,
             ],
-            outputs=[recorded_output],
         )
     gr.Markdown(description)

     in_filename: str,
 ):
     if in_filename is None or in_filename == "":
+        return "", build_html_output(
+            "Please first upload a file and then click "
+            'the button "submit for recognition"',
+            "result_item_error",
+        )
     logging.info(f"Processing uploaded file: {in_filename}")
     try:
         )
     except Exception as e:
         logging.info(str(e))
+        return "", build_html_output(str(e), "result_item_error")
 def process_microphone(
     in_filename: str,
 ):
     if in_filename is None or in_filename == "":
+        return "", build_html_output(
             "Please first click 'Record from microphone', speak, "
             "click 'Stop recording', and then "
+            "click the button 'submit for recognition'",
+            "result_item_error",
         )
     logging.info(f"Processing microphone: {in_filename}")
         )
     except Exception as e:
         logging.info(str(e))
+        return "", build_html_output(str(e), "result_item_error")
 @torch.no_grad()
     s.accept_wave_file(filename)
     recognizer.decode_stream(s)
+    text = s.result.text
     date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
     end = time.time()
     logging.info(info)
     logging.info(f"\nrepo_id: {repo_id}\nhyp: {text}")
+    return text, build_html_output(info)
 title = "# Automatic Speech Recognition with Next-gen Kaldi"
             )
             upload_button = gr.Button("Submit for recognition")
             uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
+            uploaded_html_info = gr.HTML(label="Info")
             gr.Examples(
                 examples=examples,
                     num_active_paths_slider,
                     uploaded_file,
                 ],
+                outputs=[uploaded_output, uploaded_html_info],
                 fn=process_uploaded_file,
             )
             record_button = gr.Button("Submit for recognition")
             recorded_output = gr.Textbox(label="Recognized speech from recordings")
+            recorded_html_info = gr.HTML(label="Info")
             gr.Examples(
                 examples=examples,
                     num_active_paths_slider,
                     microphone,
                 ],
+                outputs=[recorded_output, recorded_html_info],
                 fn=process_microphone,
             )
                 num_active_paths_slider,
                 uploaded_file,
             ],
+            outputs=[uploaded_output, uploaded_html_info],
         )
         record_button.click(
                 num_active_paths_slider,
                 microphone,
             ],
+            outputs=[recorded_output, recorded_html_info],
         )
     gr.Markdown(description)

examples.py CHANGED Viewed

@@ -16,6 +16,48 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 examples = [
     # librispeech
     # https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13/tree/main/test_wavs
     [
@@ -154,57 +196,42 @@ examples = [
         "./test_wavs/aidatatang_200zh/T0055G0036S0004.wav",
     ],
     # tal_csasr
-    # https://huggingface.co/luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5/tree/main/test_wavs
     [
         "Chinese+English",
-        "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
         "greedy_search",
         4,
         "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_132.wav",
     ],
     [
         "Chinese+English",
-        "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
         "greedy_search",
         4,
         "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_138.wav",
     ],
     [
         "Chinese+English",
-        "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
         "greedy_search",
         4,
         "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_145.wav",
     ],
     [
         "Tibetan",
-        "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
         "greedy_search",
         4,
         "./test_wavs/tibetan/a_0_cacm-A70_31116.wav",
     ],
     [
         "Tibetan",
-        "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
-        "greedy_search",
-        4,
-        "./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
-    ],
-    [
-        "Tibetan",
-        "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
         "greedy_search",
         4,
         "./test_wavs/tibetan/a_0_cacm-A70_31118.wav",
     ],
     # arabic
-    [
-        "Arabic",
-        "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
-        "greedy_search",
-        4,
-        "./test_wavs/arabic/a.wav",
-    ],
     [
         "Arabic",
         "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
@@ -226,11 +253,4 @@ examples = [
         4,
         "./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
     ],
-    [
-        "German",
-        "csukuangfj/wav2vec2.0-torchaudio",
-        "greedy_search",
-        4,
-        "./test_wavs/german/20170517-0900-PLENARY-16-de_20170517.wav",
-    ],
 ]

 # See the License for the specific language governing permissions and
 # limitations under the License.
 examples = [
+    [
+        "Chinese+English",
+        "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
+        "greedy_search",
+        4,
+        "./test_wavs/tal_csasr/0.wav",
+    ],
+    [
+        "English",
+        "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13",
+        "greedy_search",
+        4,
+        "./test_wavs/librispeech/1089-134686-0001.wav",
+    ],
+    [
+        "Chinese",
+        "luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2",
+        "greedy_search",
+        4,
+        "./test_wavs/wenetspeech/DEV_T0000000000.opus",
+    ],
+    [
+        "German",
+        "csukuangfj/wav2vec2.0-torchaudio",
+        "greedy_search",
+        4,
+        "./test_wavs/german/20170517-0900-PLENARY-16-de_20170517.wav",
+    ],
+    [
+        "Arabic",
+        "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
+        "greedy_search",
+        4,
+        "./test_wavs/arabic/a.wav",
+    ],
+    [
+        "Tibetan",
+        "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
+        "greedy_search",
+        4,
+        "./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
+    ],
     # librispeech
     # https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13/tree/main/test_wavs
     [
         "./test_wavs/aidatatang_200zh/T0055G0036S0004.wav",
     ],
     # tal_csasr
     [
         "Chinese+English",
+        "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
         "greedy_search",
         4,
         "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_132.wav",
     ],
     [
         "Chinese+English",
+        "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
         "greedy_search",
         4,
         "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_138.wav",
     ],
     [
         "Chinese+English",
+        "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
         "greedy_search",
         4,
         "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_145.wav",
     ],
     [
         "Tibetan",
+        "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
         "greedy_search",
         4,
         "./test_wavs/tibetan/a_0_cacm-A70_31116.wav",
     ],
     [
         "Tibetan",
+        "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
         "greedy_search",
         4,
         "./test_wavs/tibetan/a_0_cacm-A70_31118.wav",
     ],
     # arabic
     [
         "Arabic",
         "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
         4,
         "./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
     ],
 ]

test_wavs/tal_csasr/0.wav ADDED Viewed

Binary file (778 kB). View file