AXERA-TECH
/

WeNet

Model card Files Files and versions

xet

Community

yangrongzhao commited on Jun 1

Commit

78224f0

1 Parent(s): 249d07c

Add provider, Update README, add RTF

Browse files

Files changed (3) hide show

README.md +13 -15
ax_common.py +10 -6
run_ax.py +5 -0

README.md CHANGED Viewed

@@ -1,14 +1,3 @@
----
-license: mit
-language:
-- zh
-pipeline_tag: automatic-speech-recognition
-tags:
-- wenet
-- axera
-- speech-recognition
----
 # wenet.axera
 WeNet on Axera.
@@ -140,23 +129,32 @@ python run_ort.py \
 Offline CTC：
 ```bash
-python3 run_ax.py -i demo.wav --mode ctc_prefix_beam_search
 ```
 Online CTC：
 ```bash
-python3 run_ax.py -i demo.wav --online --mode ctc_prefix_beam_search
 ```
 Offline attention rescoring：
 ```bash
-python3 run_ax.py -i demo.wav --mode attention_rescoring
 ```
 Online attention rescoring：
 ```bash
-python3 run_ax.py -i demo.wav --online --mode attention_rescoring
 ```

 # wenet.axera
 WeNet on Axera.
 Offline CTC：
 ```bash
+python3 run_ax.py -i demo.wav --mode ctc_prefix_beam_search --provider AxEngineExecutionProvider
 ```
 Online CTC：
 ```bash
+python3 run_ax.py -i demo.wav --online --mode ctc_prefix_beam_search --provider AxEngineExecutionProvider
 ```
 Offline attention rescoring：
 ```bash
+python3 run_ax.py -i demo.wav --mode attention_rescoring --provider AxEngineExecutionProvider
 ```
 Online attention rescoring：
 ```bash
+python3 run_ax.py -i demo.wav --online --mode attention_rescoring --provider AxEngineExecutionProvider
 ```
+RTF 测试结果，`demo.wav` 时长 4.204s，repeat 5，不含模型加载：
+| 模式 | 平均耗时 | RTF |
+| --- | ---: | ---: |
+| offline CTC | 0.5202s | 0.1237 |
+| online CTC | 0.5582s | 0.1328 |
+| offline attention rescoring | 0.5266s | 0.1253 |
+| online attention rescoring | 0.5626s | 0.1338 |

ax_common.py CHANGED Viewed

@@ -468,10 +468,10 @@ def update_online_state(state, outputs):
 class AxModel:
-    def __init__(self, path):
         from axengine import InferenceSession
-        self.session = InferenceSession(path)
         self.output_names = [item.name for item in self.session.get_outputs()]
     def run(self, input_feed):
@@ -491,7 +491,8 @@ class WenetAXRunner:
                  decoder_len=32,
                  decoding_chunk_size=16,
                  num_decoding_left_chunks=5,
-                 batch_size=1):
         self.config_path = config_path
         self.vocab_path = vocab_path
         self.encoder_offline_path = encoder_offline_path
@@ -502,6 +503,7 @@ class WenetAXRunner:
         self.decoding_chunk_size = decoding_chunk_size
         self.num_decoding_left_chunks = num_decoding_left_chunks
         self.batch_size = batch_size
         self.configs = load_config(config_path)
         self.vocabulary, self.char_dict = load_vocab(vocab_path)
@@ -514,19 +516,21 @@ class WenetAXRunner:
     @property
     def offline_encoder(self):
         if self._offline_encoder is None:
-            self._offline_encoder = AxModel(self.encoder_offline_path)
         return self._offline_encoder
     @property
     def online_encoder(self):
         if self._online_encoder is None:
-            self._online_encoder = AxModel(self.encoder_online_path)
         return self._online_encoder
     @property
     def decoder(self):
         if self._decoder is None:
-            self._decoder = AxModel(self.decoder_path)
         return self._decoder
     def compute_feats(self, audio_file):

 class AxModel:
+    def __init__(self, path, provider="AxEngineExecutionProvider"):
         from axengine import InferenceSession
+        self.session = InferenceSession(path, providers=[provider])
         self.output_names = [item.name for item in self.session.get_outputs()]
     def run(self, input_feed):
                  decoder_len=32,
                  decoding_chunk_size=16,
                  num_decoding_left_chunks=5,
+                 batch_size=1,
+                 provider="AxEngineExecutionProvider"):
         self.config_path = config_path
         self.vocab_path = vocab_path
         self.encoder_offline_path = encoder_offline_path
         self.decoding_chunk_size = decoding_chunk_size
         self.num_decoding_left_chunks = num_decoding_left_chunks
         self.batch_size = batch_size
+        self.provider = provider
         self.configs = load_config(config_path)
         self.vocabulary, self.char_dict = load_vocab(vocab_path)
     @property
     def offline_encoder(self):
         if self._offline_encoder is None:
+            self._offline_encoder = AxModel(self.encoder_offline_path,
+                                            self.provider)
         return self._offline_encoder
     @property
     def online_encoder(self):
         if self._online_encoder is None:
+            self._online_encoder = AxModel(self.encoder_online_path,
+                                           self.provider)
         return self._online_encoder
     @property
     def decoder(self):
         if self._decoder is None:
+            self._decoder = AxModel(self.decoder_path, self.provider)
         return self._decoder
     def compute_feats(self, audio_file):

run_ax.py CHANGED Viewed

@@ -29,6 +29,9 @@ def get_args():
     parser.add_argument("--decoder_len", type=int, default=32)
     parser.add_argument("--decoding_chunk_size", type=int, default=16)
     parser.add_argument("--num_decoding_left_chunks", type=int, default=5)
     parser.add_argument("--mode",
                         choices=[
                             "ctc_greedy_search", "ctc_prefix_beam_search",
@@ -43,6 +46,7 @@ def main():
     args = get_args()
     print(f"online: {args.online}")
     print(f"mode: {args.mode}")
     runner = WenetAXRunner(
         args.config,
@@ -54,6 +58,7 @@ def main():
         decoder_len=args.decoder_len,
         decoding_chunk_size=args.decoding_chunk_size,
         num_decoding_left_chunks=args.num_decoding_left_chunks,
     )
     result = runner.transcribe(args.input,
                                online=args.online,

     parser.add_argument("--decoder_len", type=int, default=32)
     parser.add_argument("--decoding_chunk_size", type=int, default=16)
     parser.add_argument("--num_decoding_left_chunks", type=int, default=5)
+    parser.add_argument("--provider",
+                        type=str,
+                        default="AxEngineExecutionProvider")
     parser.add_argument("--mode",
                         choices=[
                             "ctc_greedy_search", "ctc_prefix_beam_search",
     args = get_args()
     print(f"online: {args.online}")
     print(f"mode: {args.mode}")
+    print(f"provider: {args.provider}")
     runner = WenetAXRunner(
         args.config,
         decoder_len=args.decoder_len,
         decoding_chunk_size=args.decoding_chunk_size,
         num_decoding_left_chunks=args.num_decoding_left_chunks,
+        provider=args.provider,
     )
     result = runner.transcribe(args.input,
                                online=args.online,