yuekai
/

aishell1_tlg_essentials

Model card Files Files and versions

xet

Community

Yuekai Zhang commited on Jun 6, 2023

Commit

96f87e0

1 Parent(s): c92992e

add streaming support

Browse files

Files changed (1) hide show

test/test_riva_wfst_decoder.py +27 -3

test/test_riva_wfst_decoder.py CHANGED Viewed

@@ -2,7 +2,7 @@ import numpy as np
 import time
 import torch
 import os
-from riva.asrlib.decoder.python_decoder import BatchedMappedDecoderCuda, BatchedMappedDecoderCudaConfig
 from typing import List
 from test_frame_reducer import FrameReducer
@@ -30,7 +30,7 @@ class RivaWFSTDecoder:
         config.online_opts.decoder_opts.max_active = 7000
         config.online_opts.determinize_lattice = True
         config.online_opts.max_batch_size = 100
-        config.online_opts.num_channels = 200
         config.online_opts.frame_shift_seconds = 0.04
         config.online_opts.lattice_postprocessor_opts.lm_scale = 5.0
         config.online_opts.lattice_postprocessor_opts.word_ins_penalty = 0.0
@@ -38,12 +38,20 @@ class RivaWFSTDecoder:
         config.online_opts.num_post_processing_worker_threads = 16
         config.online_opts.num_decoder_copy_threads = 4
         config.online_opts.lattice_postprocessor_opts.nbest = beam_size
         self.decoder = BatchedMappedDecoderCuda(
             config, os.path.join(tlg_dir, "TLG.fst"),
             os.path.join(tlg_dir, "words.txt"), vocab_size
         )
         self.word_id_to_word_str = load_word_symbols(os.path.join(tlg_dir, "words.txt"))
         self.nbest = beam_size
         self.vocab_size = vocab_size
@@ -91,7 +99,7 @@ if __name__ == "__main__":
     char_dict = load_word_symbols('./data/words.txt')
     beam_size = 10
-    batch_size = 1
     counts = 10
     # ctc_log_probs [1,103,4233]
@@ -116,4 +124,20 @@ if __name__ == "__main__":
         # total_hyps = riva_decoder.decode_nbest(ctc_log_probs, encoder_out_lens)
         # print('nbest', total_hyps)
     decode_end = time.perf_counter() - decode_start
     print(f"Decode {ctc_log_probs.shape[0] * counts} sentences, cost {decode_end} seconds")

 import time
 import torch
 import os
+from riva.asrlib.decoder.python_decoder import BatchedMappedDecoderCuda, BatchedMappedOnlineDecoderCuda, BatchedMappedDecoderCudaConfig
 from typing import List
 from test_frame_reducer import FrameReducer
         config.online_opts.decoder_opts.max_active = 7000
         config.online_opts.determinize_lattice = True
         config.online_opts.max_batch_size = 100
+        config.online_opts.num_channels = config.online_opts.max_batch_size * 2
         config.online_opts.frame_shift_seconds = 0.04
         config.online_opts.lattice_postprocessor_opts.lm_scale = 5.0
         config.online_opts.lattice_postprocessor_opts.word_ins_penalty = 0.0
         config.online_opts.num_post_processing_worker_threads = 16
         config.online_opts.num_decoder_copy_threads = 4
+        #config.online_opts.decoder_opts.ntokens_pre_allocated = 10_000_000
         config.online_opts.lattice_postprocessor_opts.nbest = beam_size
         self.decoder = BatchedMappedDecoderCuda(
             config, os.path.join(tlg_dir, "TLG.fst"),
             os.path.join(tlg_dir, "words.txt"), vocab_size
         )
+        self.online_decoder = BatchedMappedOnlineDecoderCuda(
+            config.online_opts, os.path.join(tlg_dir, "TLG.fst"),
+            os.path.join(tlg_dir, "words.txt"), vocab_size
+        )
         self.word_id_to_word_str = load_word_symbols(os.path.join(tlg_dir, "words.txt"))
         self.nbest = beam_size
         self.vocab_size = vocab_size
     char_dict = load_word_symbols('./data/words.txt')
     beam_size = 10
+    batch_size = 10
     counts = 10
     # ctc_log_probs [1,103,4233]
         # total_hyps = riva_decoder.decode_nbest(ctc_log_probs, encoder_out_lens)
         # print('nbest', total_hyps)
     decode_end = time.perf_counter() - decode_start
+    #chunk_size = 32
+    ctc_log_probs_list, is_first_chunk, is_last_chunk = [], [True] * batch_size, [True] * batch_size
+    corr_ids = list(range(batch_size))
+    for corr_id in corr_ids:
+        success = riva_decoder.online_decoder.try_init_corr_id(corr_id)
+        assert success
+    for i in range(batch_size):
+        #ctc_log_probs_list.append(ctc_log_probs[i,:chunk_size,:])
+        ctc_log_probs_list.append(ctc_log_probs[i,:,:])
+    channels, partial_hypotheses = \
+    riva_decoder.online_decoder.decode_batch(corr_ids, ctc_log_probs_list,
+                            is_first_chunk, is_last_chunk)
+    for j, ph in enumerate(partial_hypotheses):
+        print(j, ph.words, ph.score, ph.ilabels)
     print(f"Decode {ctc_log_probs.shape[0] * counts} sentences, cost {decode_end} seconds")