AXERA-TECH
/

FireRedASR-AED

Model card Files Files and versions

xet

Community

inoryQwQ commited on Oct 28, 2025

Commit

40049a2

1 Parent(s): 136adf2

fix pipeline

Browse files

Files changed (1) hide show

test_ax_model.py +10 -2

test_ax_model.py CHANGED Viewed

@@ -228,8 +228,8 @@ class FireRedASROnnxModel:
                 # self.decoder_main.get_inputs()[1].name: n_layer_self_k_cache,
                 "n_layer_cross_k": n_layer_cross_k_cache,
                 "n_layer_cross_v": n_layer_cross_v_cache,
-                "pe": pe,
-                "self_attn_mask": self_attn_mask,
                 "cross_attn_mask": cross_attn_mask,
                 # self.decoder_main.get_inputs()[7].name: cross_attn_mask,
             }
@@ -356,6 +356,9 @@ class FireRedASROnnxModel:
             n_layer_cross_v = to_numpy(n_layer_cross_v)
             cross_attn_mask = to_numpy(cross_attn_mask)
             # for name, npy in zip(
             #     ["tokens", "n_layer_self_k_cache", "n_layer_self_v_cache", "n_layer_cross_k", "n_layer_cross_v", "pe", "self_attn_mask", "cross_attn_mask"],
             #     [tokens, n_layer_self_k_cache, n_layer_self_v_cache, n_layer_cross_k, n_layer_cross_v, self.pe[offset], self_attn_mask, cross_attn_mask]
@@ -365,6 +368,7 @@ class FireRedASROnnxModel:
             #     np.save(os.path.join(file_path, f"{i}.npy"), npy)
             if i == 0:
                 logits, n_layer_self_k_cache, n_layer_self_v_cache = self.decode_main_one_token(
                     to_numpy(tokens),
                     to_numpy(n_layer_self_k_cache),
@@ -375,7 +379,9 @@ class FireRedASROnnxModel:
                     self_attn_mask,
                     to_numpy(cross_attn_mask)
                 )
             else:
                 logits, n_layer_self_k_cache, n_layer_self_v_cache = self.decode_loop_one_token(
                     to_numpy(tokens),
                     to_numpy(n_layer_self_k_cache),
@@ -386,6 +392,7 @@ class FireRedASROnnxModel:
                     self_attn_mask,
                     to_numpy(cross_attn_mask)
                 )
             offset += 1
             logits = torch.from_numpy(logits)
@@ -513,6 +520,7 @@ class FireRedASROnnxModel:
             to_numpy(feats),
             to_numpy(lengths)
         )
         nbest_hyps = self.run_decoder(n_layer_cross_k,
                                       n_layer_cross_v,
                                       cross_attn_mask,

                 # self.decoder_main.get_inputs()[1].name: n_layer_self_k_cache,
                 "n_layer_cross_k": n_layer_cross_k_cache,
                 "n_layer_cross_v": n_layer_cross_v_cache,
+                # "pe": pe,
+                # "self_attn_mask": self_attn_mask,
                 "cross_attn_mask": cross_attn_mask,
                 # self.decoder_main.get_inputs()[7].name: cross_attn_mask,
             }
             n_layer_cross_v = to_numpy(n_layer_cross_v)
             cross_attn_mask = to_numpy(cross_attn_mask)
+            self_attn_mask = np.zeros((batch_size * beam_size, 1, self.decode_max_len), dtype=np.float32)
+            self_attn_mask[:, :, :self.decode_max_len - offset[0] - 1] = -np.inf
             # for name, npy in zip(
             #     ["tokens", "n_layer_self_k_cache", "n_layer_self_v_cache", "n_layer_cross_k", "n_layer_cross_v", "pe", "self_attn_mask", "cross_attn_mask"],
             #     [tokens, n_layer_self_k_cache, n_layer_self_v_cache, n_layer_cross_k, n_layer_cross_v, self.pe[offset], self_attn_mask, cross_attn_mask]
             #     np.save(os.path.join(file_path, f"{i}.npy"), npy)
             if i == 0:
+                start_time = time.time()
                 logits, n_layer_self_k_cache, n_layer_self_v_cache = self.decode_main_one_token(
                     to_numpy(tokens),
                     to_numpy(n_layer_self_k_cache),
                     self_attn_mask,
                     to_numpy(cross_attn_mask)
                 )
+                print(f"run decoder_main take {(time.time() - start_time) * 1000}ms")
             else:
+                start_time = time.time()
                 logits, n_layer_self_k_cache, n_layer_self_v_cache = self.decode_loop_one_token(
                     to_numpy(tokens),
                     to_numpy(n_layer_self_k_cache),
                     self_attn_mask,
                     to_numpy(cross_attn_mask)
                 )
+                print(f"run decoder_loop take {(time.time() - start_time) * 1000}ms")
             offset += 1
             logits = torch.from_numpy(logits)
             to_numpy(feats),
             to_numpy(lengths)
         )
+        print(f"run encoder take {(time.time() - start_time) * 1000}ms")
         nbest_hyps = self.run_decoder(n_layer_cross_k,
                                       n_layer_cross_v,
                                       cross_attn_mask,