odg123 commited on Jan 26

Commit

e8237cb

verified ·

1 Parent(s): d8d48c5

Upload 30 files

Browse files

Files changed (31) hide show

.gitattributes +1 -0
egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-04-45-26-checkpoint +264 -0
egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-05-16-checkpoint +111 -0
egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-20-04-checkpoint +22 -0
egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-29-29-checkpoint +22 -0
egs/ami/ASR/xlsr_transducer/inference_results/hyp-ihm.txt +0 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-25-15-47-40 +32 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-43-42 +45 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-44-36 +45 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-45-26 +0 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-57-24 +57 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-58-20 +32 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-59-21 +111 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-02-37 +32 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-03-42 +32 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-05-16 +529 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-13-05 +22 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-14-59 +22 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-17-40 +22 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-20-04 +22 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-29-29 +22 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-48-19 +22 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-50-10 +22 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-54-32 +28 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-02-34 +77 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-04-30 +72 -0
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-07-36 +0 -0
egs/ami/ASR/xlsr_transducer/inference_results/metrics-ihm.txt +16 -0
egs/ami/ASR/xlsr_transducer/inference_results/ref-ihm.txt +0 -0
egs/ami/ASR/xlsr_transducer/log/log-train-2026-01-25-02-57-28 +3 -0
egs/ami/ASR/xlsr_transducer/tensorboard/events.out.tfevents.1769309848.3edaabdb707c.1028020.0 +3 -0

.gitattributes CHANGED Viewed

@@ -55,3 +55,4 @@ egs/libricss/SURT/heat.png filter=lfs diff=lfs merge=lfs -text
 egs/libricss/SURT/surt.png filter=lfs diff=lfs merge=lfs -text
 egs/librispeech/WSASR/figures/otc_training_graph.drawio.png filter=lfs diff=lfs merge=lfs -text
 egs/speech_llm/ASR_LLM/assets/framework.png filter=lfs diff=lfs merge=lfs -text

 egs/libricss/SURT/surt.png filter=lfs diff=lfs merge=lfs -text
 egs/librispeech/WSASR/figures/otc_training_graph.drawio.png filter=lfs diff=lfs merge=lfs -text
 egs/speech_llm/ASR_LLM/assets/framework.png filter=lfs diff=lfs merge=lfs -text
+egs/ami/ASR/xlsr_transducer/log/log-train-2026-01-25-02-57-28 filter=lfs diff=lfs merge=lfs -text

egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-04-45-26-checkpoint ADDED Viewed

	@@ -0,0 +1,264 @@

+2026-01-26 04:45:26,876 INFO [inference.py:419] ================================================================================
+2026-01-26 04:45:26,877 INFO [inference.py:420] XLSR-Transducer Inference on AMI
+2026-01-26 04:45:26,877 INFO [inference.py:421] ================================================================================
+2026-01-26 04:45:26,877 INFO [inference.py:422] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
+2026-01-26 04:45:26,877 INFO [inference.py:423] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 04:45:26,877 INFO [inference.py:424] Test set: ihm
+2026-01-26 04:45:26,877 INFO [inference.py:425] Decoding method: greedy_search
+2026-01-26 04:45:26,877 INFO [inference.py:431] Device: cpu
+2026-01-26 04:45:26,877 INFO [inference.py:434] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 04:45:26,878 INFO [inference.py:442] Vocabulary size: 500
+2026-01-26 04:45:26,879 INFO [inference.py:443] Blank ID: 0
+2026-01-26 04:45:26,879 INFO [inference.py:446] Creating model
+2026-01-26 04:45:28,435 INFO [inference.py:453] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 04:45:28,436 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 04:45:34,301 INFO [inference.py:482] Number of model parameters: 317,511,772
+2026-01-26 04:45:34,302 INFO [inference.py:485] Loading test data
+2026-01-26 04:45:34,302 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 04:45:35,453 INFO [inference.py:496] Number of test utterances: 6676
+2026-01-26 04:45:35,453 INFO [inference.py:499] Starting inference...
+2026-01-26 04:45:36,710 INFO [inference.py:318] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 04:45:36,713 INFO [inference.py:319] Audio range: [-0.090, 0.104]
+2026-01-26 04:45:36,715 INFO [inference.py:320] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 04:45:46,614 INFO [inference.py:341] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 04:45:46,615 INFO [inference.py:342] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 04:45:46,615 INFO [inference.py:343] Encoder out range: [-13.684, 12.764]
+2026-01-26 04:45:55,306 INFO [inference.py:353] Number of hypotheses: 6
+2026-01-26 04:45:55,307 INFO [inference.py:355] First hypothesis: [290, 289, 20, 262, 20, 262, 20, 262, 20, 262, 20, 262, 20, 262, 240, 199, 35, 8, 14, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 13, 77, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 15, 83, 7, 8, 56, 12, 10, 74, 74, 19, 46, 74, 19, 46, 74, 19, 46, 74, 19, 190, 162, 5, 14, 43, 8, 119, 5, 14, 43, 8, 5, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 23, 4, 2, 48, 33, 48, 33, 48, 33, 48, 33, 48, 33, 10, 33, 10, 33, 10, 33, 10, 33, 10, 33, 10, 38, 14, 29, 5, 52, 15, 7, 27, 154, 38, 4, 185, 16, 95, 10, 10, 118, 231, 10, 13, 160, 202]
+2026-01-26 04:45:55,323 INFO [inference.py:318] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 04:45:55,324 INFO [inference.py:319] Audio range: [-0.401, 0.443]
+2026-01-26 04:45:55,325 INFO [inference.py:320] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)
+2026-01-26 04:46:02,522 INFO [inference.py:341] Encoder out shape: torch.Size([23, 209, 1024])
+2026-01-26 04:46:02,524 INFO [inference.py:342] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
+        162, 157, 157, 156, 151, 149, 148, 139, 137])
+2026-01-26 04:46:02,524 INFO [inference.py:343] Encoder out range: [-12.514, 12.004]
+2026-01-26 04:46:14,119 INFO [inference.py:353] Number of hypotheses: 23
+2026-01-26 04:46:14,119 INFO [inference.py:355] First hypothesis: [10, 7, 5, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 4, 5, 21, 306, 210, 96, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 6, 4, 24, 9, 102, 9, 102, 9, 102, 9, 102, 9, 102, 9, 102, 130, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 18, 18, 27, 60, 30, 14, 60, 60, 70, 60, 60, 28, 60, 60, 28, 60, 60, 28, 60, 60, 28, 24, 14, 36, 75, 41, 86, 97, 25, 75, 14, 43, 8, 4, 14, 4, 7, 197, 4, 7, 197, 4, 7, 197, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 17, 169, 17, 169, 17, 169, 17, 169, 17, 169, 17, 147, 169, 147, 169, 147, 169, 147, 169, 147, 169, 25, 147, 147, 147, 147, 147, 147, 147, 147, 147, 4, 7, 197, 112, 4, 7, 197, 112, 4, 7, 197, 112, 4, 7, 197, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 17, 169, 17, 169, 17, 169, 17, 169, 17, 169, 25, 75, 8, 54, 44, 24, 363, 328, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 199, 205, 199, 199, 92, 14, 199, 199, 92, 14, 8, 67, 67, 32, 26, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 27, 8, 8, 119, 8, 19, 201, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 234, 16, 16, 234, 16, 16, 234, 16, 16, 234, 58, 55, 43, 30, 55, 43, 30, 55, 43, 30, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 36, 22, 41, 80, 219, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 15, 72]
+2026-01-26 04:46:14,135 INFO [inference.py:318] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
+2026-01-26 04:46:14,136 INFO [inference.py:319] Audio range: [-0.439, 0.480]
+2026-01-26 04:46:14,137 INFO [inference.py:320] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
+        28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
+        20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
+        12320, 11680, 11520, 10880,  9440,  9120,  7840,  5920,  5760],
+       dtype=torch.int32)
+2026-01-26 04:46:21,443 INFO [inference.py:341] Encoder out shape: torch.Size([39, 126, 1024])
+2026-01-26 04:46:21,499 INFO [inference.py:342] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100,  94,  89,  88,  87,  73,
+         71,  71,  69,  68,  68,  65,  62,  62,  59,  59,  58,  56,  51,  45,
+         42,  40,  38,  36,  35,  33,  29,  28,  24,  18,  17])
+2026-01-26 04:46:21,500 INFO [inference.py:343] Encoder out range: [-11.444, 10.811]
+2026-01-26 04:46:29,134 INFO [inference.py:353] Number of hypotheses: 39
+2026-01-26 04:46:29,134 INFO [inference.py:355] First hypothesis: [11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 4, 2, 64, 10, 7, 5, 51, 13, 227, 211, 120, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 17, 7, 5, 17, 113, 9, 113, 9, 113, 9, 113, 13, 4, 5, 8, 5, 136, 40, 5, 16, 136, 40, 30, 5, 8, 136, 30, 8, 14, 5, 310, 23, 177, 63, 54, 136, 40, 30, 8, 14, 5, 16, 272, 16, 272, 16, 272, 16, 272]
+2026-01-26 04:46:29,199 INFO [inference.py:318] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
+2026-01-26 04:46:29,200 INFO [inference.py:319] Audio range: [-0.314, 0.332]
+2026-01-26 04:46:29,201 INFO [inference.py:320] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
+        56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
+        46079, 45280, 44960], dtype=torch.int32)
+2026-01-26 04:46:36,615 INFO [inference.py:341] Encoder out shape: torch.Size([23, 212, 1024])
+2026-01-26 04:46:36,617 INFO [inference.py:342] Encoder out lens: tensor([212, 205, 204, 202, 200, 198, 194, 192, 184, 181, 176, 174, 173, 172,
+        170, 166, 164, 164, 147, 144, 143, 141, 140])
+2026-01-26 04:46:36,617 INFO [inference.py:343] Encoder out range: [-13.261, 11.090]
+2026-01-26 04:46:45,499 INFO [inference.py:353] Number of hypotheses: 23
+2026-01-26 04:46:45,500 INFO [inference.py:355] First hypothesis: [31, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 20, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 32, 28, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 130, 4, 2, 11]
+2026-01-26 04:46:45,516 INFO [inference.py:318] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
+2026-01-26 04:46:45,516 INFO [inference.py:319] Audio range: [-0.323, 0.414]
+2026-01-26 04:46:45,517 INFO [inference.py:320] Audio lengths: tensor([317280, 298079, 298080, 294559, 292480], dtype=torch.int32)
+2026-01-26 04:46:53,506 INFO [inference.py:341] Encoder out shape: torch.Size([5, 991, 1024])
+2026-01-26 04:46:53,507 INFO [inference.py:342] Encoder out lens: tensor([991, 931, 931, 920, 913])
+2026-01-26 04:46:53,507 INFO [inference.py:343] Encoder out range: [-14.241, 14.344]
+2026-01-26 04:47:07,903 INFO [inference.py:353] Number of hypotheses: 5
+2026-01-26 04:47:07,903 INFO [inference.py:355] First hypothesis: [37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 4, 2, 12, 23, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 7, 69, 7, 69, 7, 69, 7, 69, 7, 69, 7, 5, 116, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 82, 98, 27, 267, 63, 137, 27, 267, 63, 137, 27, 267, 63, 137, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 19, 5, 14, 5, 12, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 31, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 24, 28, 257, 24, 28, 257, 24, 28, 257, 24, 28, 24, 14, 24, 24, 5, 147, 17, 147, 61, 4, 28, 30, 8, 25, 4, 28, 30, 8, 25, 4, 28, 28, 30, 8, 25, 4, 28, 28, 30, 8, 25, 4, 28, 28, 159, 13, 30, 8, 28, 28, 30, 8, 28, 28, 30, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 24, 24, 21, 24, 24, 21, 24, 24, 21, 24, 24, 21, 24, 21, 24, 21, 24, 21, 24, 21, 24, 21, 8, 21, 5, 14, 21, 5, 14, 21, 5, 14, 5, 5, 71, 14, 43, 8, 5, 119, 55, 14, 43, 8, 119, 55, 80, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 96, 40, 275, 32, 5, 275, 32, 5, 275, 32, 5, 275, 32, 5, 24, 325, 5, 43, 40, 43, 40, 43, 40, 43, 40, 43, 40, 43, 40, 43, 40, 19, 190, 40, 137, 43, 40, 19, 40, 43, 40, 19, 40, 43, 30, 27, 14, 29, 14, 43, 19, 29, 14, 43, 19, 29, 8, 103, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 20, 98, 30, 82, 82, 4, 223, 82, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 28, 4, 223, 28, 4, 223, 28, 4, 223, 190, 5, 20, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 119, 53, 21, 29, 29, 119, 53, 21, 29, 29, 119, 5, 14, 190, 8, 5, 20, 20, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 18, 7, 27, 220, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 6, 24, 138, 67, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 47, 7, 8, 4, 5, 23, 36, 63, 46, 12, 6, 4, 5, 93, 4, 5, 93, 4, 5, 93, 4, 5, 93, 93, 210, 4, 5, 93, 93, 210, 4, 5, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 91, 5, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 98, 98, 30, 82, 82, 82, 82, 82, 82, 82, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 82, 30, 60, 30, 36, 82, 30, 60, 30, 36, 82, 30, 36, 63, 79, 14, 190, 8, 30, 43, 8, 30, 43, 8, 30, 43, 8, 5, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 53, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 14, 43, 8, 5, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 13, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 30, 36, 63, 4, 28, 28, 30, 36, 63, 22, 30, 30, 22, 19, 28, 22, 30, 30, 22, 19, 28, 30, 30, 22, 19, 28, 30, 30, 22, 19, 28, 30, 8, 28, 28, 30, 8, 28, 28, 30, 8, 28, 28, 22, 70, 25, 13, 30, 8, 28, 29, 14, 25, 29, 29, 21, 19, 21, 42, 41, 19, 21, 19, 40, 19, 41, 4, 2, 45, 78, 62]
+2026-01-26 04:47:08,602 INFO [inference.py:318] Audio shape: torch.Size([40, 39360]), dtype: torch.float32
+2026-01-26 04:47:08,603 INFO [inference.py:319] Audio range: [-0.274, 0.362]
+2026-01-26 04:47:08,604 INFO [inference.py:320] Audio lengths: tensor([39359, 39199, 39039, 38080, 36000, 35200, 34880, 34880, 33760, 33760,
+        33600, 33120, 29440, 29280, 27360, 24960, 24960, 23680, 21760, 21600,
+        20800, 16800, 16320, 16160, 16000, 15679, 15040, 13440, 12320,  7040,
+         6560,  6400,  5760,  5760,  5120,  4800,  4800,  4640,  4480,  3360],
+       dtype=torch.int32)
+2026-01-26 04:47:15,213 INFO [inference.py:341] Encoder out shape: torch.Size([40, 122, 1024])
+2026-01-26 04:47:15,214 INFO [inference.py:342] Encoder out lens: tensor([122, 122, 121, 118, 112, 109, 108, 108, 105, 105, 104, 103,  91,  91,
+         85,  77,  77,  73,  67,  67,  64,  52,  50,  50,  49,  48,  46,  41,
+         38,  21,  20,  19,  17,  17,  15,  14,  14,  14,  13,  10])
+2026-01-26 04:47:15,214 INFO [inference.py:343] Encoder out range: [-11.784, 11.570]
+2026-01-26 04:47:22,413 INFO [inference.py:353] Number of hypotheses: 40
+2026-01-26 04:47:22,414 INFO [inference.py:355] First hypothesis: [45, 78, 62, 4, 2, 45, 78, 62, 4, 2, 31, 4, 2, 11, 4, 2, 11]
+2026-01-26 04:47:22,425 INFO [inference.py:318] Audio shape: torch.Size([23, 66880]), dtype: torch.float32
+2026-01-26 04:47:22,426 INFO [inference.py:319] Audio range: [-0.514, 0.393]
+2026-01-26 04:47:22,427 INFO [inference.py:320] Audio lengths: tensor([66880, 65439, 60799, 60320, 59520, 58240, 57280, 56320, 55520, 54080,
+        51840, 51520, 50720, 49920, 49600, 48319, 48320, 47999, 46880, 46079,
+        44640, 44320, 44160], dtype=torch.int32)
+2026-01-26 04:47:29,228 INFO [inference.py:341] Encoder out shape: torch.Size([23, 208, 1024])
+2026-01-26 04:47:29,229 INFO [inference.py:342] Encoder out lens: tensor([208, 204, 189, 188, 185, 181, 178, 175, 173, 168, 161, 160, 158, 155,
+        154, 150, 150, 149, 146, 143, 139, 138, 137])
+2026-01-26 04:47:29,229 INFO [inference.py:343] Encoder out range: [-12.152, 11.060]
+2026-01-26 04:47:36,938 INFO [inference.py:353] Number of hypotheses: 23
+2026-01-26 04:47:36,938 INFO [inference.py:355] First hypothesis: [4, 5, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 4, 2, 11, 15, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 58, 40, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 21, 30, 36, 19, 5, 30, 14, 222, 5, 26, 5, 26, 5, 26, 5, 26, 5, 26, 5, 26, 58, 30, 36, 63, 22, 54, 240, 20, 20, 4, 5, 8, 136, 21, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 347, 265, 147, 265, 20, 10, 7, 5, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 22, 104, 108, 8, 183, 25, 10, 33, 10, 7, 5, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 113, 5, 14, 5, 5, 46, 4, 96, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 17, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 68, 71, 26, 5]
+2026-01-26 04:47:37,011 INFO [inference.py:318] Audio shape: torch.Size([24, 65600]), dtype: torch.float32
+2026-01-26 04:47:37,011 INFO [inference.py:319] Audio range: [-0.416, 0.458]
+2026-01-26 04:47:37,012 INFO [inference.py:320] Audio lengths: tensor([65600, 64000, 63680, 61280, 60000, 58080, 55200, 52960, 51359, 51200,
+        50720, 50720, 50080, 49280, 48639, 47840, 47360, 46880, 46400, 46240,
+        45920, 44640, 43040, 42720], dtype=torch.int32)
+2026-01-26 04:47:44,400 INFO [inference.py:341] Encoder out shape: torch.Size([24, 204, 1024])
+2026-01-26 04:47:44,401 INFO [inference.py:342] Encoder out lens: tensor([204, 199, 198, 191, 187, 181, 172, 165, 160, 159, 158, 158, 156, 153,
+        151, 149, 147, 146, 144, 144, 143, 139, 134, 133])
+2026-01-26 04:47:44,401 INFO [inference.py:343] Encoder out range: [-12.007, 11.624]
+2026-01-26 04:47:51,732 INFO [inference.py:353] Number of hypotheses: 24
+2026-01-26 04:47:51,732 INFO [inference.py:355] First hypothesis: [11, 4, 2, 11, 17, 7, 5, 59, 19, 75, 8, 164, 21, 27, 4, 2, 9, 49, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 15, 7, 27, 154, 34, 16, 125, 10, 13, 24, 19, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 20, 84, 18, 7, 85, 34, 13, 227, 211, 120, 4, 2, 64, 18, 7, 85, 116, 73, 13, 227, 211, 120, 46, 13, 227, 211, 120, 46, 13, 227, 211, 120, 46, 13, 227, 211, 120, 46, 13, 227, 211, 120, 46, 66, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 18, 34, 16, 34, 13, 227, 211, 120, 25, 6, 24, 9, 7, 24, 68, 218, 52, 15, 34, 16, 34, 13, 74, 19, 5, 8, 41, 19, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 369]
+2026-01-26 04:47:51,743 INFO [inference.py:318] Audio shape: torch.Size([9, 176960]), dtype: torch.float32
+2026-01-26 04:47:51,744 INFO [inference.py:319] Audio range: [-0.135, 0.191]
+2026-01-26 04:47:51,745 INFO [inference.py:320] Audio lengths: tensor([176960, 170720, 164480, 155840, 154559, 151839, 151840, 151360, 147040],
+       dtype=torch.int32)
+2026-01-26 04:48:00,501 INFO [inference.py:341] Encoder out shape: torch.Size([9, 552, 1024])
+2026-01-26 04:48:00,502 INFO [inference.py:342] Encoder out lens: tensor([552, 533, 513, 486, 482, 474, 474, 472, 459])
+2026-01-26 04:48:00,502 INFO [inference.py:343] Encoder out range: [-13.325, 12.083]
+2026-01-26 04:48:10,617 INFO [inference.py:353] Number of hypotheses: 9
+2026-01-26 04:48:10,617 INFO [inference.py:355] First hypothesis: [89, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 21, 94, 221, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 17, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 33, 113, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 4, 5, 21, 5, 97, 5, 5, 19, 5, 14, 379, 5, 292, 379, 5, 292, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 130, 101, 101, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 29, 29, 119, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 29, 21, 29, 29, 28, 29, 29, 28, 29, 29, 119, 149, 29, 29, 119, 149, 29, 29, 119, 149, 29, 14, 43, 8, 21, 29, 29, 119, 149, 29, 14, 43, 8, 119, 55, 18, 77, 77, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 81, 7, 27, 12, 52, 10, 52, 10, 52, 10, 52, 10, 52, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 324, 115, 54, 10, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 71, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 53, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 134, 36, 58, 140, 36, 58, 140, 36, 58, 140, 36, 119, 149, 22, 18, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 4, 63, 19, 41, 4, 63, 19, 41, 4, 63, 21, 82, 19, 70, 4, 185, 4, 185, 4, 185, 4, 185, 82, 185, 88, 100, 16, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 36, 58, 58, 36, 58, 58, 36, 58, 58, 36, 58, 140, 36, 58, 140, 36, 58, 140, 36, 58, 134, 36, 121, 46, 74, 58, 140, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 79, 14, 43, 8, 119, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 29, 29, 53, 21, 29, 29, 119, 53, 21, 29, 29, 119, 53, 21, 29, 29, 14, 53, 21, 29, 29, 14, 53, 21, 29, 29, 21, 29, 29, 21, 29, 29, 21, 79, 14, 29, 14, 21, 29, 14, 21, 29, 14, 43, 8, 119, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 84, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 7, 27, 10, 10, 53, 28, 29, 29, 90, 53, 28, 29, 29, 90, 53, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 28, 30, 29, 28, 30, 29, 28, 30, 29, 28, 28, 29, 29, 28, 28, 29, 29, 28, 28, 29, 29, 14, 43, 8, 30, 29, 29, 14, 43, 8, 5, 29, 14, 43, 8, 5, 29, 14, 43, 8, 5, 23, 17, 7, 5, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 205, 205, 5, 17, 13, 43, 43, 28, 133, 180, 43, 30, 24, 180, 8, 86, 180, 8, 142, 19, 21, 42, 26, 35, 87, 17, 174, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 12]
+2026-01-26 04:48:10,629 INFO [inference.py:318] Audio shape: torch.Size([14, 112320]), dtype: torch.float32
+2026-01-26 04:48:10,630 INFO [inference.py:319] Audio range: [-0.469, 0.457]
+2026-01-26 04:48:10,630 INFO [inference.py:320] Audio lengths: tensor([112320, 105920, 105439, 104000, 103840, 101920,  98720,  98400,  96960,
+         96800,  96320,  95680,  93760,  93600], dtype=torch.int32)
+2026-01-26 04:48:18,040 INFO [inference.py:341] Encoder out shape: torch.Size([14, 350, 1024])
+2026-01-26 04:48:18,040 INFO [inference.py:342] Encoder out lens: tensor([350, 330, 329, 324, 324, 318, 308, 307, 302, 302, 300, 298, 292, 292])
+2026-01-26 04:48:18,041 INFO [inference.py:343] Encoder out range: [-14.286, 11.940]
+2026-01-26 04:48:29,123 INFO [inference.py:353] Number of hypotheses: 14
+2026-01-26 04:48:29,123 INFO [inference.py:355] First hypothesis: [39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 11, 31, 4, 24, 4, 32, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 12, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 142, 19, 22, 22, 142, 19, 22, 22, 142, 19, 22, 262, 20, 35, 32, 28, 67, 8, 26, 35, 32, 28, 8, 86, 22, 142, 117, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 21, 8, 8, 21, 8, 8, 21, 8, 8, 142, 86, 22, 142, 67, 22, 142, 67, 22, 142, 67, 22, 142, 67, 22, 108, 22, 142, 67, 22, 108, 22, 142, 67, 32, 67, 14, 86, 32, 67, 14, 86, 32, 67, 14, 86, 32, 67, 14, 86, 32, 67, 14, 86, 32, 67, 14, 32, 67, 14, 32, 67, 14, 32, 67, 14, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 85, 13, 13, 13, 13, 13, 13, 13, 13, 22, 22, 4, 224, 4, 224, 4, 224, 4, 224, 4, 224, 30, 60, 30, 183, 51, 4, 224, 30, 60, 30, 183, 56, 15, 51, 56, 15, 51, 56, 15, 51, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 56, 56, 15, 56, 56, 15, 56, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 56, 148, 4, 27, 148, 4, 27, 148, 4, 27, 148, 4, 27, 148, 4, 27, 21, 104, 19, 22, 14, 54, 19, 149, 22, 54, 54, 80, 10, 191, 32, 7, 8, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 4, 5, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 26, 52, 18, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 15, 49, 26, 130, 15, 7, 27, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 82, 4, 28, 82, 4, 28, 82, 4, 28, 82, 82, 70, 25, 4, 28, 82, 82, 70, 25, 4, 223, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 104, 19, 82, 104, 19, 82, 104, 19, 82, 104, 19, 104, 19, 104, 19, 104, 19, 104, 19, 104, 19, 263, 80, 48, 33, 48, 33, 262, 446, 446, 446, 446, 446, 446, 446, 446, 446, 446, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 20, 262, 20, 262, 20, 262, 20, 262, 20, 262, 31, 20, 31, 20, 31, 20, 31, 20, 31, 20, 31, 262, 31, 262, 31, 262, 31, 262, 31, 262, 31]
+2026-01-26 04:48:29,127 INFO [inference.py:544] Processed 206 utterances in 10 batches
+2026-01-26 04:48:29,135 INFO [inference.py:318] Audio shape: torch.Size([38, 41440]), dtype: torch.float32
+2026-01-26 04:48:29,136 INFO [inference.py:319] Audio range: [-0.272, 0.322]
+2026-01-26 04:48:29,137 INFO [inference.py:320] Audio lengths: tensor([41440, 41120, 40160, 35680, 33120, 32960, 32800, 31520, 31040, 30880,
+        30239, 29920, 29120, 27360, 25279, 24480, 23520, 22720, 22720, 21600,
+        20800, 20320, 19840, 19840, 17600, 15520, 13120, 12480, 12320, 11040,
+        10560,  9600,  8640,  7520,  5440,  5120,  5120,  4640],
+       dtype=torch.int32)
+2026-01-26 04:48:35,710 INFO [inference.py:341] Encoder out shape: torch.Size([38, 129, 1024])
+2026-01-26 04:48:35,710 INFO [inference.py:342] Encoder out lens: tensor([129, 128, 125, 111, 103, 102, 102,  98,  96,  96,  94,  93,  90,  85,
+         78,  76,  73,  70,  70,  67,  64,  63,  61,  61,  54,  48,  40,  38,
+         38,  34,  32,  29,  26,  23,  16,  15,  15,  14])
+2026-01-26 04:48:35,711 INFO [inference.py:343] Encoder out range: [-13.512, 11.822]
+2026-01-26 04:48:43,120 INFO [inference.py:353] Number of hypotheses: 38
+2026-01-26 04:48:43,121 INFO [inference.py:355] First hypothesis: [9, 7, 85, 13, 4, 27, 5, 30, 27, 4, 27, 5, 30, 27, 4, 27, 5, 30, 5, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 5, 14, 43, 19, 5, 14, 43, 19, 5, 43, 19, 5, 43, 19, 5, 43, 19, 5, 43, 21, 29, 14, 43, 19, 21, 43, 8, 19, 201, 14, 190, 19, 201, 14, 190, 19, 201, 14, 190, 8, 19, 201, 14, 8, 119, 55, 80, 118, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 10, 4, 14, 199, 199, 202, 199, 199, 202, 199, 199, 202, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 71, 14, 14, 75, 75, 14, 14, 75, 75, 14, 43, 8, 26, 130, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11]
+2026-01-26 04:48:43,132 INFO [inference.py:318] Audio shape: torch.Size([38, 41280]), dtype: torch.float32
+2026-01-26 04:48:43,133 INFO [inference.py:319] Audio range: [-0.080, 0.105]
+2026-01-26 04:48:43,133 INFO [inference.py:320] Audio lengths: tensor([41280, 40320, 36800, 35680, 34880, 34879, 34080, 34080, 32000, 30400,
+        29280, 29280, 28320, 24000, 23040, 20960, 20960, 20960, 20160, 16960,
+        14080, 13280, 12640, 12160, 10720,  9440,  8640,  6240,  6080,  5440,
+         5440,  5120,  4800,  4800,  4640,  4480,  4320,  4160],
+       dtype=torch.int32)
+2026-01-26 04:48:50,029 INFO [inference.py:341] Encoder out shape: torch.Size([38, 128, 1024])
+2026-01-26 04:48:50,030 INFO [inference.py:342] Encoder out lens: tensor([128, 125, 114, 111, 108, 108, 106, 106,  99,  94,  91,  91,  88,  74,
+         71,  65,  65,  65,  62,  52,  43,  41,  39,  37,  33,  29,  26,  19,
+         18,  16,  16,  15,  14,  14,  14,  13,  13,  12])
+2026-01-26 04:48:50,030 INFO [inference.py:343] Encoder out range: [-11.071, 11.522]
+2026-01-26 04:48:56,638 INFO [inference.py:353] Number of hypotheses: 38
+2026-01-26 04:48:56,638 INFO [inference.py:355] First hypothesis: [4, 62, 4, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 68, 4, 2, 20, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 2, 20, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 4, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 5, 65, 19, 36, 63, 54, 17, 6, 124, 32, 28, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 19, 70, 77, 71, 19, 70, 77, 71, 19, 70, 22, 22, 70, 22, 22, 70, 22, 22, 70, 22, 22, 148, 148, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 24, 185, 25, 13, 82, 185, 64]
+2026-01-26 04:48:56,707 INFO [inference.py:318] Audio shape: torch.Size([38, 41760]), dtype: torch.float32
+2026-01-26 04:48:56,707 INFO [inference.py:319] Audio range: [-0.246, 0.340]
+2026-01-26 04:48:56,708 INFO [inference.py:320] Audio lengths: tensor([41760, 39680, 38880, 36799, 36639, 36000, 34559, 34240, 33120, 31840,
+        30720, 30560, 29760, 29280, 24640, 24160, 22720, 21759, 21600, 20960,
+        16320, 14400, 13600, 11360, 10880, 10399, 10400,  9760,  9440,  9280,
+         8320,  8320,  7680,  7360,  6880,  6880,  6240,  6240],
+       dtype=torch.int32)
+2026-01-26 04:49:03,412 INFO [inference.py:341] Encoder out shape: torch.Size([38, 130, 1024])
+2026-01-26 04:49:03,412 INFO [inference.py:342] Encoder out lens: tensor([130, 123, 121, 114, 114, 112, 107, 106, 103,  99,  95,  95,  92,  91,
+         76,  75,  70,  67,  67,  65,  50,  44,  42,  35,  33,  32,  32,  30,
+         29,  28,  25,  25,  23,  22,  21,  21,  19,  19])
+2026-01-26 04:49:03,413 INFO [inference.py:343] Encoder out range: [-11.967, 11.229]
+2026-01-26 04:49:09,503 INFO [inference.py:353] Number of hypotheses: 38
+2026-01-26 04:49:09,503 INFO [inference.py:355] First hypothesis: [145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 4, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 154, 44, 13, 24, 28, 27, 170, 205, 5, 130, 4, 2, 11, 39]
+2026-01-26 04:49:09,512 INFO [inference.py:318] Audio shape: torch.Size([9, 170400]), dtype: torch.float32
+2026-01-26 04:49:09,513 INFO [inference.py:319] Audio range: [-0.370, 0.393]
+2026-01-26 04:49:09,513 INFO [inference.py:320] Audio lengths: tensor([170400, 166559, 165919, 164800, 156800, 152480, 147520, 146559, 145759],
+       dtype=torch.int32)
+2026-01-26 04:49:17,349 INFO [inference.py:341] Encoder out shape: torch.Size([9, 532, 1024])
+2026-01-26 04:49:17,349 INFO [inference.py:342] Encoder out lens: tensor([532, 520, 518, 514, 489, 476, 460, 457, 455])
+2026-01-26 04:49:17,350 INFO [inference.py:343] Encoder out range: [-12.221, 14.348]
+2026-01-26 04:49:28,539 INFO [inference.py:353] Number of hypotheses: 9
+2026-01-26 04:49:28,539 INFO [inference.py:355] First hypothesis: [37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 197, 4, 2, 4, 7, 197, 4, 2, 4, 7, 197, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 18, 7, 69, 4, 2, 23, 17, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 51, 13, 43, 8, 232, 80, 4, 2, 23, 57, 18, 7, 85, 116, 55, 5, 28, 36, 30, 55, 80, 17, 17, 7, 5, 13, 43, 8, 5, 17, 7, 5, 13, 43, 8, 5, 17, 7, 5, 13, 43, 8, 5, 25, 6, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 39, 9, 46, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 6, 74, 67, 14, 233, 6, 74, 67, 14, 233, 6, 74, 67, 14, 9, 24, 29, 70, 173, 17, 81, 7, 27, 154, 38, 13, 227, 211, 25, 6, 4, 27, 43, 8, 232, 4, 2, 274, 5, 156, 6, 222, 298, 157, 41, 5, 92, 55, 490, 86, 97, 369, 6, 222, 298, 157, 41, 5, 92, 55, 490, 86, 97, 369, 4, 5, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 4, 2, 23, 15, 47, 122, 6, 226, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 4, 2, 127, 33, 4, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 5, 19, 5, 8, 40, 19, 5, 8, 40, 19, 5, 8, 40, 19, 5, 8, 40, 19, 5, 8, 5, 19, 5, 19, 5, 19, 5, 19, 5, 19, 5, 8, 19, 24, 19, 5, 8, 19, 24, 19, 5, 14, 69, 40, 183, 25, 6, 222, 298, 157, 41, 5, 92, 55, 490, 86, 97, 369, 31, 20, 31, 20, 31, 20, 31, 20, 31, 20, 31, 9, 83, 7, 8, 34, 13, 31, 13, 239, 25, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 4, 2, 64, 18, 7, 85, 116, 123, 64, 18, 123, 64, 18, 123, 64, 18, 123, 64, 9, 47, 7, 8, 72, 52, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 4, 2, 64, 4, 2, 64, 4, 2, 64, 4, 2, 64, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 100, 57, 100, 57, 100, 57, 100, 57, 100, 57, 371, 208, 387, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 74, 67, 14, 12]
+2026-01-26 04:49:28,610 INFO [inference.py:318] Audio shape: torch.Size([5, 315520]), dtype: torch.float32
+2026-01-26 04:49:28,611 INFO [inference.py:319] Audio range: [-0.297, 0.334]
+2026-01-26 04:49:28,612 INFO [inference.py:320] Audio lengths: tensor([315520, 301440, 294399, 292480, 289919], dtype=torch.int32)
+2026-01-26 04:49:36,428 INFO [inference.py:341] Encoder out shape: torch.Size([5, 985, 1024])
+2026-01-26 04:49:36,429 INFO [inference.py:342] Encoder out lens: tensor([985, 941, 919, 913, 905])
+2026-01-26 04:49:36,429 INFO [inference.py:343] Encoder out range: [-12.260, 13.635]
+2026-01-26 04:49:53,832 INFO [inference.py:353] Number of hypotheses: 5
+2026-01-26 04:49:53,833 INFO [inference.py:355] First hypothesis: [31, 53, 27, 8, 119, 55, 80, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 113, 5, 113, 5, 14, 5, 5, 23, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 15, 315, 15, 7, 69, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 25, 6, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 7, 5, 87, 7, 5, 87, 7, 5, 87, 7, 5, 51, 272, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 6, 17, 17, 106, 21, 96, 204, 9, 204, 204, 204, 204, 204, 204, 204, 204, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 204, 9, 204, 9, 204, 9, 204, 9, 204, 9, 258, 10, 7, 5, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 7, 5, 154, 231, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 8, 54, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 106, 40, 275, 67, 279, 359, 275, 67, 279, 359, 275, 67, 279, 359, 275, 67, 279, 359, 275, 67, 279, 359, 275, 67, 172, 359, 42, 26, 170, 24, 24, 170, 8, 19, 86, 22, 142, 19, 24, 19, 36, 108, 32, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 14, 24, 19, 24, 14, 24, 19, 24, 14, 24, 19, 24, 24, 19, 24, 24, 19, 24, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 5, 19, 5, 14, 36, 19, 5, 14, 36, 19, 5, 14, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 14, 15, 4, 42, 15, 4, 42, 15, 4, 27, 4, 14, 4, 14, 4, 14, 4, 14, 4, 24, 30, 24, 24, 92, 173, 115, 54, 16, 25, 4, 96, 4, 96, 4, 96, 4, 96, 4, 96, 4, 96, 108, 209, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 5, 25, 6, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 125, 10, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 24, 20, 84, 9, 24, 24, 20, 9, 24, 24, 20, 9, 24, 24, 98, 27, 267, 153, 267, 40, 30, 27, 267, 153, 267, 153, 267, 153, 267, 153, 5, 41, 5, 92, 5, 41, 5, 92, 5, 41, 5, 90, 42, 41, 26, 20, 18, 7, 27, 18, 7, 27, 18, 7, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 113, 10, 100, 10, 33, 10, 114, 32, 7, 8, 72, 15, 72, 15, 72, 15, 72, 32, 26, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 30, 96, 244, 20, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 21, 29, 35, 369, 38, 86, 38, 35, 32, 28, 104, 108, 8, 26, 189, 19, 5, 162, 33, 10, 33, 5, 30, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 52, 15, 7, 27, 177, 28, 30, 96, 71, 19, 29, 71, 21, 29, 14, 25, 75, 25, 10, 7, 5, 13, 29, 29, 14, 43, 8, 232, 4, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 10, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 16, 8, 16, 16, 16, 16, 16, 16, 16, 16, 136, 19, 22, 54, 16, 56, 136, 54, 16, 56, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 10, 7, 5, 13, 16, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 36, 58, 58, 36, 58, 58, 36, 58, 58, 36, 58, 36, 58, 36, 58, 36, 58, 36, 58, 36, 19, 36, 58, 140, 21, 43, 36, 19, 36, 58, 63, 40, 19, 41, 69, 40, 69, 75, 75, 14, 131, 13, 9, 13, 9, 13, 9, 13, 9, 13, 9, 7, 85, 13, 4, 14, 70, 13, 13, 74, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 18, 7, 27, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 47, 154, 154, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 259, 259, 259, 259, 259, 259, 259, 259, 259, 259, 101, 6, 24, 35, 6, 35, 82, 70, 182, 182, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 4, 5, 23, 195, 25, 13, 22, 104, 19, 22, 14, 10, 7, 5, 10, 7, 5, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 30, 5, 159, 31, 46, 46, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 18, 18, 34, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 29, 28, 28, 29, 28, 28, 29, 28, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 119, 5, 14, 190, 8, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 159, 33, 4, 96, 40, 275, 32, 5, 24, 86, 22, 142, 5, 43, 40, 19, 60, 40, 79, 137, 43, 40, 19, 60, 137, 43, 40, 19, 60, 137, 43, 40, 19, 60, 137, 43, 40, 19, 60, 137, 19, 60, 137, 19, 60, 137, 19, 60, 137, 19, 82, 19, 40, 19, 82, 19, 40, 19, 82, 19, 29, 14, 21, 29, 14, 21, 29, 14, 21, 29, 29, 14, 43, 8, 5, 29, 14, 29, 29, 14, 29, 29, 14, 29, 5, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 159, 33, 5, 4, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 16, 22, 16, 394, 333, 172, 20, 9, 7, 85, 34, 199, 25, 6, 35, 22, 19, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 25, 6, 24, 5, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 24, 24, 63, 42, 33, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5]
+2026-01-26 04:49:53,846 INFO [inference.py:318] Audio shape: torch.Size([6, 239520]), dtype: torch.float32
+2026-01-26 04:49:53,847 INFO [inference.py:319] Audio range: [-0.116, 0.111]
+2026-01-26 04:49:53,848 INFO [inference.py:320] Audio lengths: tensor([239519, 234240, 223840, 223360, 219679, 215680], dtype=torch.int32)
+2026-01-26 04:50:02,225 INFO [inference.py:341] Encoder out shape: torch.Size([6, 748, 1024])
+2026-01-26 04:50:02,226 INFO [inference.py:342] Encoder out lens: tensor([748, 731, 699, 697, 686, 673])
+2026-01-26 04:50:02,226 INFO [inference.py:343] Encoder out range: [-13.591, 10.919]
+2026-01-26 04:50:10,299 INFO [inference.py:353] Number of hypotheses: 6
+2026-01-26 04:50:10,299 INFO [inference.py:355] First hypothesis: [49, 4, 2, 20, 84, 18, 7, 27, 154, 38, 13, 227, 211, 120, 412, 23, 23, 4, 2, 31, 20, 84, 18, 7, 27, 154, 34, 16, 34, 13, 4, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 80, 4, 2, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 4, 2, 9, 49, 9, 7, 24, 68, 218, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 18, 7, 27, 154, 34, 16, 34, 13, 4, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 25, 321, 18, 7, 85, 116, 334, 6, 24, 61, 110, 46, 17, 88, 38, 120, 289, 39, 18, 7, 69, 38, 86, 98, 30, 22, 233, 6, 221, 18, 18, 241, 13, 160, 202, 25, 6, 24, 5, 25, 6, 24, 5, 25, 6, 24, 5, 25, 6, 24, 33, 10, 13, 239, 25, 31]
+2026-01-26 04:50:10,309 INFO [inference.py:318] Audio shape: torch.Size([5, 315200]), dtype: torch.float32
+2026-01-26 04:50:10,310 INFO [inference.py:319] Audio range: [-0.082, 0.158]
+2026-01-26 04:50:10,311 INFO [inference.py:320] Audio lengths: tensor([315200, 310560, 300000, 299680, 296959], dtype=torch.int32)
+2026-01-26 04:50:18,933 INFO [inference.py:341] Encoder out shape: torch.Size([5, 984, 1024])
+2026-01-26 04:50:18,933 INFO [inference.py:342] Encoder out lens: tensor([984, 970, 937, 936, 927])
+2026-01-26 04:50:18,934 INFO [inference.py:343] Encoder out range: [-14.589, 11.647]
+2026-01-26 04:50:32,710 INFO [inference.py:353] Number of hypotheses: 5
+2026-01-26 04:50:32,710 INFO [inference.py:355] First hypothesis: [59, 4, 7, 197, 4, 7, 197, 4, 7, 197, 4, 7, 197, 4, 7, 197, 4, 7, 197, 4, 7, 197, 23, 15, 7, 27, 249, 56, 15, 249, 56, 15, 249, 56, 15, 249, 56, 15, 7, 27, 310, 310, 310, 310, 310, 310, 310, 18, 7, 27, 38, 26, 35, 331, 4, 32, 67, 32, 67, 32, 67, 32, 67, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 264, 205, 264, 264, 264, 264, 264, 264, 264, 264, 128, 23, 9, 34, 64, 64, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 56, 217, 217, 36, 16, 259, 16, 56, 217, 36, 19, 55, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 81, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 76, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 245, 36, 245, 134, 46, 46, 131, 34, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 14, 21, 4, 28, 4, 30, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 30, 8, 28, 28, 30, 8, 28, 28, 30, 8, 28, 29, 29, 8, 8, 137, 8, 40, 19, 60, 137, 8, 96, 63, 209, 8, 137, 8, 96, 63, 60, 41, 149, 60, 137, 8, 96, 63, 60, 119, 8, 30, 201, 14, 8, 14, 43, 8, 155, 30, 24, 443, 155, 22, 30, 36, 8, 19, 36, 22, 41, 55, 70, 22, 36, 30, 36, 63, 105, 50, 20, 50, 20, 20, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 152, 25, 274, 5, 20, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 74, 46, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 36, 29, 29, 36, 53, 36, 29, 29, 36, 53, 36, 30, 36, 53, 36, 30, 36, 53, 36, 30, 36, 30, 36, 30, 36, 30, 36, 30, 36, 30, 60, 60, 70, 32, 26, 35, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 36, 53, 36, 29, 80, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 26, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 6, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 76, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 49, 26, 33, 35, 13, 30, 27, 20, 4, 27, 55, 19, 60, 30, 183, 20, 9, 100, 51, 51, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 21, 96, 14, 14, 96, 455, 106, 14, 96, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 189, 43, 21, 8, 96, 14, 14, 43, 8, 204, 80, 95, 33, 13, 33, 13, 33, 13, 33, 13, 33, 13, 33, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 4, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 5, 19, 5, 19, 5, 19, 5, 19, 5, 19, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 8, 28, 8, 8, 28, 8, 8, 28, 8, 8, 28, 24, 8, 28, 24, 8, 28, 24, 8, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 24, 24, 29, 36, 8, 14, 29, 29, 54, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 76, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 22, 41, 54, 117, 6, 16, 6, 16, 6, 16, 6, 16, 6, 16, 6, 16, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 71, 132, 21, 132, 19, 131, 370, 132, 21, 370, 132, 21, 370, 132, 21, 370, 132, 19, 131, 370, 41, 13, 13, 4, 70, 4, 140, 82, 4, 70, 69, 36, 4, 140, 82, 4, 70, 69, 36, 69, 185, 267, 153, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 30, 70, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 194, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 47, 152, 25, 124, 32, 28, 8, 86, 32, 28, 8, 86, 13, 32, 28, 8, 21, 24, 14, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 63, 4, 28, 4, 27, 21, 13, 4, 27, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 29, 29, 28, 29, 14, 13, 29, 29, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 36, 53, 36, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 26, 4, 27, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 50, 73, 59, 59, 19, 75, 19, 286, 25, 59, 28, 59, 28, 59, 28, 36, 25, 105, 25, 105, 25, 105, 25, 105, 25, 105, 25, 6, 74, 153, 54, 17, 6, 39, 39, 10, 7, 5, 13, 22, 22, 54, 16, 6, 4, 499, 30, 28, 43, 43, 90, 188, 22, 14, 36, 22, 22, 70, 22, 22, 70, 22, 22, 70, 22, 22, 86, 22, 86, 20, 6, 24, 4, 24, 4, 24, 4, 24, 4, 24, 4, 24, 30, 24, 155, 8, 30, 24, 63, 8, 173, 214, 101, 214, 101, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 26, 35, 6, 35, 32, 28, 42, 26, 15, 87, 81, 81, 76, 87, 76, 87, 76, 87, 76, 87, 76, 87, 76, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 43, 43, 90, 188, 22, 14, 70, 43, 36, 30, 22, 14, 70, 43, 8, 54, 38, 56, 18, 56, 18, 56, 18, 56, 18, 56, 18, 56, 10, 35, 5, 19, 5, 93, 170, 24, 24, 19, 24, 24, 19, 24, 24, 19, 24, 24, 19, 24, 24, 63, 19, 24, 14, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 29, 29, 14, 43, 8, 38, 53, 30, 82, 38, 38, 38, 38, 38, 38, 38, 38, 38, 35, 6, 165, 32, 30, 24, 67, 32, 153, 32, 226, 331, 33, 59, 19, 75, 75, 19, 5, 75, 75, 19, 5, 75, 75, 19, 104, 19, 75, 75, 19, 104, 19, 75, 75, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 19, 104, 19, 75, 19, 104, 19, 75, 19, 104, 19, 104, 19, 104, 19, 104, 19, 104, 19, 104, 19, 22, 30, 97, 15, 51, 95, 15, 51, 95, 15, 51, 95, 15, 51, 95, 10, 33, 51, 33, 51, 33, 51, 33, 51, 44, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 6, 4, 5, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 15, 122, 32, 135, 225, 225, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 6, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 7, 27, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 72, 35, 5, 8, 5, 93, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 125, 10, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 30, 29, 28, 28, 29, 14, 147, 147, 147, 147, 31, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 26, 16, 269, 30, 24, 29, 36, 108, 79, 243, 243, 391, 274, 156, 180, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 24, 28, 24, 4, 24, 4, 24, 4, 24, 4, 24, 29, 36, 5, 29, 29, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 97, 29, 19, 97, 29, 19, 97, 29, 19, 97, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 71, 19, 29, 71, 19, 29, 71, 19, 29, 8, 54, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 7, 5, 31, 12, 31]
+2026-01-26 04:50:32,722 INFO [inference.py:318] Audio shape: torch.Size([6, 237280]), dtype: torch.float32
+2026-01-26 04:50:32,723 INFO [inference.py:319] Audio range: [-0.130, 0.131]
+2026-01-26 04:50:32,723 INFO [inference.py:320] Audio lengths: tensor([237280, 228159, 220639, 220480, 219359, 213119], dtype=torch.int32)
+2026-01-26 04:50:40,430 INFO [inference.py:341] Encoder out shape: torch.Size([6, 741, 1024])
+2026-01-26 04:50:40,430 INFO [inference.py:342] Encoder out lens: tensor([741, 712, 689, 688, 685, 665])
+2026-01-26 04:50:40,435 INFO [inference.py:343] Encoder out range: [-13.120, 12.506]
+2026-01-26 04:50:52,401 INFO [inference.py:353] Number of hypotheses: 6
+2026-01-26 04:50:52,402 INFO [inference.py:355] First hypothesis: [105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 4, 257, 24, 92, 60, 92, 7, 8, 326, 116, 4, 257, 24, 92, 60, 4, 257, 24, 92, 60, 4, 257, 24, 92, 60, 28, 37, 4, 7, 295, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 31, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 6, 91, 18, 7, 27, 154, 259, 16, 16, 16, 16, 16, 16, 16, 16, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 204, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 245, 14, 43, 8, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 112, 283, 283, 283, 283, 283, 283, 283, 283, 283, 18, 7, 27, 13, 43, 43, 90, 58, 21, 58, 21, 58, 21, 58, 21, 58, 21, 43, 43, 90, 22, 41, 275, 32, 41, 275, 32, 41, 275, 32, 41, 275, 32, 41, 327, 4, 27, 5, 30, 27, 5, 19, 5, 162, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 18, 7, 85, 18, 18, 204, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 43, 8, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 113, 5, 14, 5, 5, 26, 4, 5, 26, 4, 5, 26, 4, 5, 26, 248, 130, 128, 26, 122, 32, 248, 130, 128, 26, 130, 4, 223, 75, 59, 28, 29, 59, 28, 29, 59, 28, 29, 59, 28, 36, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 159, 33, 16, 136, 19, 22, 54, 16, 136, 19, 22, 54, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 323, 16, 323, 16, 323, 16, 323, 16, 323, 16, 6, 157, 33, 13, 196, 5, 94, 271, 16, 6, 4, 5, 29, 29, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 4, 5, 4, 2, 11, 17, 7, 5, 13, 4, 5, 4, 2, 64, 9, 7, 85, 34, 13, 239, 25, 105, 25, 6, 91, 109, 5, 61, 335, 15, 7, 27, 105, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 2, 31, 23, 17, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 52, 10, 52, 10, 52, 10, 52, 10, 52, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 49, 193, 81, 49, 49, 101, 220, 50, 31, 50, 6, 98, 21, 22, 332, 243, 243, 5, 25, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 98, 30, 82, 82, 40, 98, 30, 82, 40, 30, 82, 40, 30, 82, 40, 30, 82, 30, 36, 63, 113, 366, 113, 64, 113, 366, 113, 64, 113, 10, 113, 13, 113, 13, 113, 13, 113, 79, 14, 5, 5, 183, 5, 25, 6, 4, 5, 21, 97, 49, 193, 49, 193, 49, 193, 49, 193, 49, 193, 49, 193, 49, 10, 7, 5, 10, 7, 5, 154, 38, 35, 6, 35, 22, 14, 5, 35, 6, 35, 22, 19, 28, 168, 94, 14, 4, 2, 23, 98, 30, 42, 233, 98, 30, 42, 233, 98, 30, 42, 215, 165, 32, 22, 233, 98, 36, 67, 98, 36, 67, 98, 36, 67, 98, 36, 67, 98, 36, 67, 14, 9, 102, 9, 7, 24, 154, 34, 124, 202, 20, 101, 101, 31, 9, 7, 85, 34, 13, 169, 25, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 73, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 98, 30, 82, 82, 70, 101, 150, 101, 17, 101, 17, 101, 17, 101, 17, 101, 17, 101, 17, 101, 6, 150, 17, 52, 15, 234, 16, 6, 106, 141, 19, 5, 14, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 13, 211, 25, 75, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 329, 185, 57, 18, 7, 85, 184, 105, 6, 329, 123, 38, 13, 227, 211, 25, 75, 90, 8, 325, 25, 6, 222, 298, 157, 41, 5, 92, 55, 490, 86, 97, 369, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 27, 60, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 24, 29, 70, 173, 5, 15, 47, 56, 15, 49, 18, 114, 38, 35, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 296, 380, 5, 11]
+2026-01-26 04:50:52,416 INFO [inference.py:318] Audio shape: torch.Size([17, 91040]), dtype: torch.float32
+2026-01-26 04:50:52,417 INFO [inference.py:319] Audio range: [-0.574, 0.629]
+2026-01-26 04:50:52,418 INFO [inference.py:320] Audio lengths: tensor([91040, 90240, 89119, 88480, 87520, 86079, 83680, 82880, 81120, 79520,
+        79520, 78079, 76800, 76480, 73760, 73600, 73599], dtype=torch.int32)
+2026-01-26 04:50:59,721 INFO [inference.py:341] Encoder out shape: torch.Size([17, 284, 1024])
+2026-01-26 04:50:59,722 INFO [inference.py:342] Encoder out lens: tensor([284, 281, 278, 276, 273, 268, 261, 258, 253, 248, 248, 243, 239, 238,
+        230, 229, 229])
+2026-01-26 04:50:59,722 INFO [inference.py:343] Encoder out range: [-13.703, 11.821]
+2026-01-26 04:51:09,014 INFO [inference.py:353] Number of hypotheses: 17
+2026-01-26 04:51:09,014 INFO [inference.py:355] First hypothesis: [50, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 16, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 98, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 24, 67, 205, 205, 5, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 259, 16, 259, 259, 16, 259, 259, 16, 259, 259, 259, 259, 259, 259, 259, 259, 259, 259, 259, 101, 15, 72, 15, 72, 15, 72, 15, 72, 15, 72, 101, 6, 4, 5, 8, 136, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 147, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 205, 5, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 15, 34, 13, 4, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 8, 41, 5, 92, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 24, 19, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 22, 14, 5, 5, 19, 22, 14, 5, 5, 19, 22, 14, 5, 5, 19, 24, 94, 5, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 79, 14, 5, 5, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 29, 71, 19, 29, 71, 19, 29, 71, 21, 29, 5, 8, 40, 19, 29, 71, 21, 29, 5, 4, 2, 9, 49, 23, 9, 49, 9, 7, 85, 34, 13, 74, 19, 5, 14, 36, 75, 19, 80]
+2026-01-26 04:51:09,027 INFO [inference.py:318] Audio shape: torch.Size([23, 68960]), dtype: torch.float32
+2026-01-26 04:51:09,030 INFO [inference.py:319] Audio range: [-0.269, 0.266]
+2026-01-26 04:51:09,031 INFO [inference.py:320] Audio lengths: tensor([68959, 66880, 64800, 64479, 61920, 59680, 54400, 53440, 52479, 52319,
+        51840, 46880, 46559, 45120, 44480, 43360, 43360, 43360, 43040, 43040,
+        43040, 42880, 42560], dtype=torch.int32)
+2026-01-26 04:51:16,407 INFO [inference.py:341] Encoder out shape: torch.Size([23, 215, 1024])
+2026-01-26 04:51:16,408 INFO [inference.py:342] Encoder out lens: tensor([215, 208, 202, 201, 193, 186, 169, 166, 163, 163, 161, 146, 145, 140,
+        138, 135, 135, 135, 134, 134, 134, 133, 132])
+2026-01-26 04:51:16,408 INFO [inference.py:343] Encoder out range: [-13.477, 12.445]
+2026-01-26 04:51:24,735 INFO [inference.py:353] Number of hypotheses: 23
+2026-01-26 04:51:24,735 INFO [inference.py:355] First hypothesis: [4, 7, 295, 4, 2, 11, 4, 2, 9, 7, 85, 151, 4, 28, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 4, 2, 9, 7, 24, 51, 34, 13, 4, 5, 21, 69, 5, 8, 41, 5, 92, 55, 490, 86, 97, 4, 2, 31, 20, 4, 133, 133, 22, 5, 51, 4, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11]
+2026-01-26 04:51:24,800 INFO [inference.py:544] Processed 391 utterances in 20 batches
+2026-01-26 04:51:24,806 INFO [inference.py:318] Audio shape: torch.Size([17, 92320]), dtype: torch.float32
+2026-01-26 04:51:24,807 INFO [inference.py:319] Audio range: [-0.234, 0.300]
+2026-01-26 04:51:24,808 INFO [inference.py:320] Audio lengths: tensor([92320, 91200, 91200, 90560, 89120, 84000, 83840, 83360, 82880, 82079,
+        79840, 79520, 76800, 73760, 73280, 70079, 69600], dtype=torch.int32)
+2026-01-26 04:51:32,739 INFO [inference.py:341] Encoder out shape: torch.Size([17, 288, 1024])
+2026-01-26 04:51:32,741 INFO [inference.py:342] Encoder out lens: tensor([288, 284, 284, 282, 278, 262, 261, 260, 258, 256, 249, 248, 239, 230,
+        228, 218, 217])
+2026-01-26 04:51:32,741 INFO [inference.py:343] Encoder out range: [-13.483, 12.297]
+2026-01-26 04:51:45,129 INFO [inference.py:353] Number of hypotheses: 17
+2026-01-26 04:51:45,129 INFO [inference.py:355] First hypothesis: [39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 191, 56, 56, 196, 104, 19, 22, 30, 70, 22, 435, 55, 185, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 36, 58, 36, 58, 36, 58, 36, 58, 36, 58, 36, 19, 36, 58, 36, 19, 36, 58, 36, 19, 36, 121, 54, 121, 5, 121, 54, 121, 5, 121, 54, 121, 5, 15, 7, 85, 274, 50, 180, 43, 92, 7, 8, 274, 50, 180, 43, 92, 50, 8, 28, 24, 5, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 83, 14, 84, 84, 84, 84, 84, 84, 84, 84, 84, 83, 14, 83, 84, 84, 83, 14, 83, 84, 84, 83, 7, 8, 83, 14, 84, 83, 7, 8, 83, 14, 83, 14, 83, 14, 83, 14, 83, 14, 83, 7, 8, 217, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 56, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 43, 30, 5, 14, 190, 189, 43, 30, 5, 14, 190, 4, 190, 189, 97, 5, 5, 29, 14, 245, 14, 43, 8, 245, 21, 43, 245, 14, 43, 8, 349, 245, 21, 43, 8, 349, 245, 21, 43, 8, 119, 21, 29, 14, 43, 19, 201, 14, 190, 8, 119, 8, 54, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, 150, 20, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84]
+2026-01-26 04:51:45,142 INFO [inference.py:318] Audio shape: torch.Size([23, 68800]), dtype: torch.float32
+2026-01-26 04:51:45,143 INFO [inference.py:319] Audio range: [-0.321, 0.370]
+2026-01-26 04:51:45,144 INFO [inference.py:320] Audio lengths: tensor([68799, 66720, 62560, 62240, 61919, 60160, 59840, 58080, 57920, 57280,
+        53920, 52960, 51040, 50080, 49920, 49280, 48160, 48160, 47680, 47200,
+        44800, 44000, 42560], dtype=torch.int32)
+2026-01-26 04:51:52,920 INFO [inference.py:341] Encoder out shape: torch.Size([23, 214, 1024])
+2026-01-26 04:51:52,921 INFO [inference.py:342] Encoder out lens: tensor([214, 208, 195, 194, 193, 187, 186, 181, 180, 178, 168, 165, 159, 156,
+        155, 153, 150, 150, 148, 147, 139, 137, 132])
+2026-01-26 04:51:52,921 INFO [inference.py:343] Encoder out range: [-11.273, 12.003]
+2026-01-26 04:52:00,812 INFO [inference.py:353] Number of hypotheses: 23
+2026-01-26 04:52:00,813 INFO [inference.py:355] First hypothesis: [218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 4, 2, 11, 17, 7, 5, 13, 4, 5, 21, 69, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 5, 8, 40, 19, 29, 8, 5, 17, 7, 5, 13, 211, 120, 412, 20, 265]
+2026-01-26 04:52:00,823 INFO [inference.py:318] Audio shape: torch.Size([38, 42080]), dtype: torch.float32
+2026-01-26 04:52:00,824 INFO [inference.py:319] Audio range: [-0.400, 0.452]
+2026-01-26 04:52:00,825 INFO [inference.py:320] Audio lengths: tensor([42080, 39200, 37439, 36960, 35520, 34560, 34079, 33599, 33600, 33280,
+        31520, 31200, 29760, 28160, 28000, 27200, 26720, 25600, 25120, 23200,
+        22880, 21280, 20800, 20000, 19680, 19520, 19200, 18080, 17600, 17600,
+        16320, 13120, 12320, 11680,  8000,  6400,  5120,  3840],
+       dtype=torch.int32)
+2026-01-26 04:52:07,931 INFO [inference.py:341] Encoder out shape: torch.Size([38, 131, 1024])
+2026-01-26 04:52:07,932 INFO [inference.py:342] Encoder out lens: tensor([131, 122, 116, 115, 110, 107, 106, 104, 104, 103,  98,  97,  92,  87,
+         87,  84,  83,  79,  78,  72,  71,  66,  64,  62,  61,  60,  59,  56,
+         54,  54,  50,  40,  38,  36,  24,  19,  15,  11])
+2026-01-26 04:52:07,932 INFO [inference.py:343] Encoder out range: [-11.872, 11.798]

egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-05-16-checkpoint ADDED Viewed

	@@ -0,0 +1,111 @@

+2026-01-26 05:05:16,888 INFO [inference.py:410] ================================================================================
+2026-01-26 05:05:16,888 INFO [inference.py:411] XLSR-Transducer Inference on AMI
+2026-01-26 05:05:16,888 INFO [inference.py:412] ================================================================================
+2026-01-26 05:05:16,888 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:05:16,888 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:05:16,888 INFO [inference.py:415] Test set: ihm
+2026-01-26 05:05:16,888 INFO [inference.py:416] Decoding method: modified_beam_search
+2026-01-26 05:05:16,888 INFO [inference.py:422] Device: cpu
+2026-01-26 05:05:16,888 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:05:16,890 INFO [inference.py:433] Vocabulary size: 500
+2026-01-26 05:05:16,890 INFO [inference.py:434] Blank ID: 0
+2026-01-26 05:05:16,890 INFO [inference.py:437] Creating model
+2026-01-26 05:05:18,544 INFO [inference.py:444] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:05:18,544 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:05:23,319 INFO [inference.py:473] Number of model parameters: 317,511,772
+2026-01-26 05:05:23,320 INFO [inference.py:476] Loading test data
+2026-01-26 05:05:23,320 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:05:24,403 INFO [inference.py:487] Number of test utterances: 6676
+2026-01-26 05:05:24,403 INFO [inference.py:490] Starting inference...
+2026-01-26 05:05:25,573 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 05:05:25,576 INFO [inference.py:310] Audio range: [-0.090, 0.104]
+2026-01-26 05:05:25,579 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 05:05:34,838 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 05:05:34,839 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 05:05:34,839 INFO [inference.py:334] Encoder out range: [-13.684, 12.764]
+2026-01-26 05:05:35,536 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:05:35,537 INFO [inference.py:346] First hypothesis: [171]
+2026-01-26 05:05:35,546 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 05:05:35,547 INFO [inference.py:310] Audio range: [-0.401, 0.443]
+2026-01-26 05:05:35,547 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)
+2026-01-26 05:05:43,001 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
+2026-01-26 05:05:43,003 INFO [inference.py:333] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
+        162, 157, 157, 156, 151, 149, 148, 139, 137])
+2026-01-26 05:05:43,003 INFO [inference.py:334] Encoder out range: [-12.514, 12.004]
+2026-01-26 05:05:43,905 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:05:43,905 INFO [inference.py:346] First hypothesis: [23, 51, 156, 6, 205, 18, 116, 113, 363]
+2026-01-26 05:05:43,925 INFO [inference.py:309] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
+2026-01-26 05:05:43,926 INFO [inference.py:310] Audio range: [-0.439, 0.480]
+2026-01-26 05:05:43,926 INFO [inference.py:311] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
+        28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
+        20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
+        12320, 11680, 11520, 10880,  9440,  9120,  7840,  5920,  5760],
+       dtype=torch.int32)
+2026-01-26 05:05:51,027 INFO [inference.py:332] Encoder out shape: torch.Size([39, 126, 1024])
+2026-01-26 05:05:51,028 INFO [inference.py:333] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100,  94,  89,  88,  87,  73,
+         71,  71,  69,  68,  68,  65,  62,  62,  59,  59,  58,  56,  51,  45,
+         42,  40,  38,  36,  35,  33,  29,  28,  24,  18,  17])
+2026-01-26 05:05:51,028 INFO [inference.py:334] Encoder out range: [-11.444, 10.811]
+2026-01-26 05:05:51,620 INFO [inference.py:344] Number of hypotheses: 39
+2026-01-26 05:05:51,620 INFO [inference.py:346] First hypothesis: [11]
+2026-01-26 05:05:51,628 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
+2026-01-26 05:05:51,629 INFO [inference.py:310] Audio range: [-0.314, 0.332]
+2026-01-26 05:05:51,629 INFO [inference.py:311] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
+        56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
+        46079, 45280, 44960], dtype=torch.int32)
+2026-01-26 05:05:59,021 INFO [inference.py:332] Encoder out shape: torch.Size([23, 212, 1024])
+2026-01-26 05:05:59,022 INFO [inference.py:333] Encoder out lens: tensor([212, 205, 204, 202, 200, 198, 194, 192, 184, 181, 176, 174, 173, 172,
+        170, 166, 164, 164, 147, 144, 143, 141, 140])
+2026-01-26 05:05:59,023 INFO [inference.py:334] Encoder out range: [-13.261, 11.090]
+2026-01-26 05:05:59,931 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:05:59,932 INFO [inference.py:346] First hypothesis: [20]
+2026-01-26 05:06:00,567 INFO [inference.py:309] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
+2026-01-26 05:06:00,568 INFO [inference.py:310] Audio range: [-0.323, 0.414]
+2026-01-26 05:06:00,569 INFO [inference.py:311] Audio lengths: tensor([317280, 298079, 298080, 294559, 292480], dtype=torch.int32)
+2026-01-26 05:06:09,302 INFO [inference.py:332] Encoder out shape: torch.Size([5, 991, 1024])
+2026-01-26 05:06:09,303 INFO [inference.py:333] Encoder out lens: tensor([991, 931, 931, 920, 913])
+2026-01-26 05:06:09,304 INFO [inference.py:334] Encoder out range: [-14.241, 14.344]
+2026-01-26 05:06:10,112 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:06:10,113 INFO [inference.py:346] First hypothesis: [37, 23, 127, 274, 5, 147, 80, 73, 6, 16, 29, 119, 5, 20, 84, 171, 15, 6, 328, 5, 127, 20, 56]
+2026-01-26 05:06:10,117 INFO [inference.py:309] Audio shape: torch.Size([40, 39360]), dtype: torch.float32
+2026-01-26 05:06:10,118 INFO [inference.py:310] Audio range: [-0.274, 0.362]
+2026-01-26 05:06:10,118 INFO [inference.py:311] Audio lengths: tensor([39359, 39199, 39039, 38080, 36000, 35200, 34880, 34880, 33760, 33760,
+        33600, 33120, 29440, 29280, 27360, 24960, 24960, 23680, 21760, 21600,
+        20800, 16800, 16320, 16160, 16000, 15679, 15040, 13440, 12320,  7040,
+         6560,  6400,  5760,  5760,  5120,  4800,  4800,  4640,  4480,  3360],
+       dtype=torch.int32)
+2026-01-26 05:06:16,910 INFO [inference.py:332] Encoder out shape: torch.Size([40, 122, 1024])
+2026-01-26 05:06:16,911 INFO [inference.py:333] Encoder out lens: tensor([122, 122, 121, 118, 112, 109, 108, 108, 105, 105, 104, 103,  91,  91,
+         85,  77,  77,  73,  67,  67,  64,  52,  50,  50,  49,  48,  46,  41,
+         38,  21,  20,  19,  17,  17,  15,  14,  14,  14,  13,  10])
+2026-01-26 05:06:16,911 INFO [inference.py:334] Encoder out range: [-11.784, 11.570]
+2026-01-26 05:06:17,504 INFO [inference.py:344] Number of hypotheses: 40
+2026-01-26 05:06:17,504 INFO [inference.py:346] First hypothesis: []
+2026-01-26 05:06:17,513 INFO [inference.py:309] Audio shape: torch.Size([23, 66880]), dtype: torch.float32
+2026-01-26 05:06:17,514 INFO [inference.py:310] Audio range: [-0.514, 0.393]
+2026-01-26 05:06:17,514 INFO [inference.py:311] Audio lengths: tensor([66880, 65439, 60799, 60320, 59520, 58240, 57280, 56320, 55520, 54080,
+        51840, 51520, 50720, 49920, 49600, 48319, 48320, 47999, 46880, 46079,
+        44640, 44320, 44160], dtype=torch.int32)
+2026-01-26 05:06:24,334 INFO [inference.py:332] Encoder out shape: torch.Size([23, 208, 1024])
+2026-01-26 05:06:24,335 INFO [inference.py:333] Encoder out lens: tensor([208, 204, 189, 188, 185, 181, 178, 175, 173, 168, 161, 160, 158, 155,
+        154, 150, 150, 149, 146, 143, 139, 138, 137])
+2026-01-26 05:06:24,335 INFO [inference.py:334] Encoder out range: [-12.152, 11.060]
+2026-01-26 05:06:25,044 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:06:25,045 INFO [inference.py:346] First hypothesis: [51, 6, 4, 27, 5, 26, 70, 22, 20, 265, 33, 46]
+2026-01-26 05:06:25,108 INFO [inference.py:309] Audio shape: torch.Size([24, 65600]), dtype: torch.float32
+2026-01-26 05:06:25,109 INFO [inference.py:310] Audio range: [-0.416, 0.458]
+2026-01-26 05:06:25,109 INFO [inference.py:311] Audio lengths: tensor([65600, 64000, 63680, 61280, 60000, 58080, 55200, 52960, 51359, 51200,
+        50720, 50720, 50080, 49280, 48639, 47840, 47360, 46880, 46400, 46240,
+        45920, 44640, 43040, 42720], dtype=torch.int32)
+2026-01-26 05:06:32,219 INFO [inference.py:332] Encoder out shape: torch.Size([24, 204, 1024])
+2026-01-26 05:06:32,220 INFO [inference.py:333] Encoder out lens: tensor([204, 199, 198, 191, 187, 181, 172, 165, 160, 159, 158, 158, 156, 153,
+        151, 149, 147, 146, 144, 144, 143, 139, 134, 133])
+2026-01-26 05:06:32,220 INFO [inference.py:334] Encoder out range: [-12.007, 11.624]
+2026-01-26 05:06:32,900 INFO [inference.py:344] Number of hypotheses: 24
+2026-01-26 05:06:32,900 INFO [inference.py:346] First hypothesis: [11]
+2026-01-26 05:06:32,908 INFO [inference.py:309] Audio shape: torch.Size([9, 176960]), dtype: torch.float32
+2026-01-26 05:06:32,920 INFO [inference.py:310] Audio range: [-0.135, 0.191]
+2026-01-26 05:06:32,921 INFO [inference.py:311] Audio lengths: tensor([176960, 170720, 164480, 155840, 154559, 151839, 151840, 151360, 147040],
+       dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-20-04-checkpoint ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-01-26 05:20:04,436 INFO [inference.py:613] ================================================================================
+2026-01-26 05:20:04,436 INFO [inference.py:614] XLSR-Transducer Inference on AMI
+2026-01-26 05:20:04,436 INFO [inference.py:615] ================================================================================
+2026-01-26 05:20:04,436 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:20:04,436 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:20:04,436 INFO [inference.py:618] Test set: ihm
+2026-01-26 05:20:04,436 INFO [inference.py:619] Decoding method: modified_beam_search
+2026-01-26 05:20:04,436 INFO [inference.py:621] Beam size: 4
+2026-01-26 05:20:04,436 INFO [inference.py:622] Max states: 64
+2026-01-26 05:20:04,436 INFO [inference.py:623] Max symbols per frame: 3
+2026-01-26 05:20:04,437 INFO [inference.py:629] Device: cpu
+2026-01-26 05:20:04,437 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:20:04,438 INFO [inference.py:640] Vocabulary size: 500
+2026-01-26 05:20:04,438 INFO [inference.py:641] Blank ID: 0
+2026-01-26 05:20:04,438 INFO [inference.py:644] Creating model
+2026-01-26 05:20:05,956 INFO [inference.py:651] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:20:05,957 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:20:10,638 INFO [inference.py:680] Number of model parameters: 317,511,772
+2026-01-26 05:20:10,639 INFO [inference.py:683] Loading test data
+2026-01-26 05:20:10,639 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:20:11,677 INFO [inference.py:694] Number of test utterances: 6676
+2026-01-26 05:20:11,677 INFO [inference.py:697] Starting inference...

egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-29-29-checkpoint ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-01-26 05:29:29,151 INFO [inference.py:613] ================================================================================
+2026-01-26 05:29:29,151 INFO [inference.py:614] XLSR-Transducer Inference on AMI
+2026-01-26 05:29:29,151 INFO [inference.py:615] ================================================================================
+2026-01-26 05:29:29,151 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:29:29,151 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:29:29,151 INFO [inference.py:618] Test set: ihm
+2026-01-26 05:29:29,151 INFO [inference.py:619] Decoding method: modified_beam_search
+2026-01-26 05:29:29,151 INFO [inference.py:621] Beam size: 4
+2026-01-26 05:29:29,151 INFO [inference.py:622] Max states: 64
+2026-01-26 05:29:29,151 INFO [inference.py:623] Max symbols per frame: 3
+2026-01-26 05:29:29,151 INFO [inference.py:629] Device: cpu
+2026-01-26 05:29:29,151 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:29:29,153 INFO [inference.py:640] Vocabulary size: 500
+2026-01-26 05:29:29,153 INFO [inference.py:641] Blank ID: 0
+2026-01-26 05:29:29,153 INFO [inference.py:644] Creating model
+2026-01-26 05:29:30,733 INFO [inference.py:673] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:29:30,734 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:29:35,902 INFO [inference.py:680] Number of model parameters: 317,511,772
+2026-01-26 05:29:35,902 INFO [inference.py:683] Loading test data
+2026-01-26 05:29:35,902 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:29:37,022 INFO [inference.py:694] Number of test utterances: 6676
+2026-01-26 05:29:37,023 INFO [inference.py:697] Starting inference...

egs/ami/ASR/xlsr_transducer/inference_results/hyp-ihm.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-25-15-47-40 ADDED Viewed

	@@ -0,0 +1,32 @@

+2026-01-25 15:47:40,637 INFO [inference.py:419] ================================================================================
+2026-01-25 15:47:40,637 INFO [inference.py:420] XLSR-Transducer Inference on AMI
+2026-01-25 15:47:40,637 INFO [inference.py:421] ================================================================================
+2026-01-25 15:47:40,637 INFO [inference.py:422] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
+2026-01-25 15:47:40,637 INFO [inference.py:423] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-25 15:47:40,637 INFO [inference.py:424] Test set: ihm
+2026-01-25 15:47:40,637 INFO [inference.py:425] Decoding method: greedy_search
+2026-01-25 15:47:40,637 INFO [inference.py:431] Device: cpu
+2026-01-25 15:47:40,637 INFO [inference.py:434] Loading BPE model from data/lang_bpe_500_scd
+2026-01-25 15:47:40,639 INFO [inference.py:442] Vocabulary size: 500
+2026-01-25 15:47:40,639 INFO [inference.py:443] Blank ID: 0
+2026-01-25 15:47:40,639 INFO [inference.py:446] Creating model
+2026-01-25 15:47:41,928 INFO [inference.py:453] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-25 15:47:41,929 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-25 15:47:46,671 INFO [inference.py:482] Number of model parameters: 317,511,772
+2026-01-25 15:47:46,671 INFO [inference.py:485] Loading test data
+2026-01-25 15:47:46,671 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-25 15:47:47,717 INFO [inference.py:496] Number of test utterances: 6676
+2026-01-25 15:47:47,717 INFO [inference.py:499] Starting inference...
+2026-01-25 15:47:48,838 INFO [inference.py:318] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-25 15:47:48,842 INFO [inference.py:319] Audio range: [-0.090, 0.104]
+2026-01-25 15:47:48,845 INFO [inference.py:320] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-25 15:47:58,037 INFO [inference.py:341] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-25 15:47:58,038 INFO [inference.py:342] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-25 15:47:58,039 INFO [inference.py:343] Encoder out range: [-11.805, 12.741]
+2026-01-25 15:48:09,204 INFO [inference.py:353] Number of hypotheses: 6
+2026-01-25 15:48:09,205 INFO [inference.py:355] First hypothesis: [37, 9, 49, 17, 9, 49, 17, 9, 49, 17, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 13, 59, 14, 164, 59, 21, 19, 40, 22, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 125, 13, 200, 130, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 7, 24, 154, 125, 13, 160, 202, 281, 116, 126, 281, 5, 8, 119, 55, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 4, 2, 11, 4, 2, 37, 4, 2, 37, 4, 2, 37, 4, 7, 197, 10, 7, 5, 13, 160, 157, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 2, 11]
+2026-01-25 15:48:09,222 INFO [inference.py:318] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-25 15:48:09,223 INFO [inference.py:319] Audio range: [-0.401, 0.443]
+2026-01-25 15:48:09,224 INFO [inference.py:320] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-43-42 ADDED Viewed

	@@ -0,0 +1,45 @@

+2026-01-26 04:43:42,361 INFO [inference.py:419] ================================================================================
+2026-01-26 04:43:42,361 INFO [inference.py:420] XLSR-Transducer Inference on AMI
+2026-01-26 04:43:42,361 INFO [inference.py:421] ================================================================================
+2026-01-26 04:43:42,361 INFO [inference.py:422] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
+2026-01-26 04:43:42,361 INFO [inference.py:423] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 04:43:42,361 INFO [inference.py:424] Test set: ihm
+2026-01-26 04:43:42,361 INFO [inference.py:425] Decoding method: greedy_search
+2026-01-26 04:43:42,361 INFO [inference.py:431] Device: cpu
+2026-01-26 04:43:42,361 INFO [inference.py:434] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 04:43:42,363 INFO [inference.py:442] Vocabulary size: 500
+2026-01-26 04:43:42,363 INFO [inference.py:443] Blank ID: 0
+2026-01-26 04:43:42,363 INFO [inference.py:446] Creating model
+2026-01-26 04:43:43,908 INFO [inference.py:453] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 04:43:43,908 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 04:43:48,495 INFO [inference.py:482] Number of model parameters: 317,511,772
+2026-01-26 04:43:48,495 INFO [inference.py:485] Loading test data
+2026-01-26 04:43:48,495 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 04:43:49,572 INFO [inference.py:496] Number of test utterances: 6676
+2026-01-26 04:43:49,572 INFO [inference.py:499] Starting inference...
+2026-01-26 04:43:50,628 INFO [inference.py:318] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 04:43:50,631 INFO [inference.py:319] Audio range: [-0.090, 0.104]
+2026-01-26 04:43:50,633 INFO [inference.py:320] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 04:43:59,926 INFO [inference.py:341] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 04:43:59,926 INFO [inference.py:342] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 04:43:59,927 INFO [inference.py:343] Encoder out range: [-4.703, 6.664]
+2026-01-26 04:44:00,350 INFO [inference.py:353] Number of hypotheses: 6
+2026-01-26 04:44:00,350 INFO [inference.py:355] First hypothesis: [11]
+2026-01-26 04:44:00,355 INFO [inference.py:318] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 04:44:00,356 INFO [inference.py:319] Audio range: [-0.401, 0.443]
+2026-01-26 04:44:00,357 INFO [inference.py:320] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)
+2026-01-26 04:44:07,432 INFO [inference.py:341] Encoder out shape: torch.Size([23, 209, 1024])
+2026-01-26 04:44:07,434 INFO [inference.py:342] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
+        162, 157, 157, 156, 151, 149, 148, 139, 137])
+2026-01-26 04:44:07,434 INFO [inference.py:343] Encoder out range: [-4.701, 6.665]
+2026-01-26 04:44:08,039 INFO [inference.py:353] Number of hypotheses: 23
+2026-01-26 04:44:08,039 INFO [inference.py:355] First hypothesis: [11]
+2026-01-26 04:44:08,059 INFO [inference.py:318] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
+2026-01-26 04:44:08,060 INFO [inference.py:319] Audio range: [-0.439, 0.480]
+2026-01-26 04:44:08,061 INFO [inference.py:320] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
+        28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
+        20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
+        12320, 11680, 11520, 10880,  9440,  9120,  7840,  5920,  5760],
+       dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-44-36 ADDED Viewed

	@@ -0,0 +1,45 @@

+2026-01-26 04:44:36,166 INFO [inference.py:419] ================================================================================
+2026-01-26 04:44:36,166 INFO [inference.py:420] XLSR-Transducer Inference on AMI
+2026-01-26 04:44:36,166 INFO [inference.py:421] ================================================================================
+2026-01-26 04:44:36,166 INFO [inference.py:422] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
+2026-01-26 04:44:36,166 INFO [inference.py:423] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 04:44:36,166 INFO [inference.py:424] Test set: ihm
+2026-01-26 04:44:36,166 INFO [inference.py:425] Decoding method: greedy_search
+2026-01-26 04:44:36,166 INFO [inference.py:431] Device: cpu
+2026-01-26 04:44:36,166 INFO [inference.py:434] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 04:44:36,168 INFO [inference.py:442] Vocabulary size: 500
+2026-01-26 04:44:36,168 INFO [inference.py:443] Blank ID: 0
+2026-01-26 04:44:36,168 INFO [inference.py:446] Creating model
+2026-01-26 04:44:37,655 INFO [inference.py:453] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/checkpoint-30000.pt
+2026-01-26 04:44:37,655 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/checkpoint-30000.pt
+2026-01-26 04:44:42,489 INFO [inference.py:482] Number of model parameters: 317,511,772
+2026-01-26 04:44:42,489 INFO [inference.py:485] Loading test data
+2026-01-26 04:44:42,489 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 04:44:43,623 INFO [inference.py:496] Number of test utterances: 6676
+2026-01-26 04:44:43,623 INFO [inference.py:499] Starting inference...
+2026-01-26 04:44:44,773 INFO [inference.py:318] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 04:44:44,776 INFO [inference.py:319] Audio range: [-0.090, 0.104]
+2026-01-26 04:44:44,779 INFO [inference.py:320] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 04:44:52,532 INFO [inference.py:341] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 04:44:52,532 INFO [inference.py:342] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 04:44:52,533 INFO [inference.py:343] Encoder out range: [-4.808, 7.175]
+2026-01-26 04:44:52,980 INFO [inference.py:353] Number of hypotheses: 6
+2026-01-26 04:44:52,980 INFO [inference.py:355] First hypothesis: [11, 4, 2, 11]
+2026-01-26 04:44:52,988 INFO [inference.py:318] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 04:44:52,989 INFO [inference.py:319] Audio range: [-0.401, 0.443]
+2026-01-26 04:44:52,990 INFO [inference.py:320] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)
+2026-01-26 04:45:00,200 INFO [inference.py:341] Encoder out shape: torch.Size([23, 209, 1024])
+2026-01-26 04:45:00,201 INFO [inference.py:342] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
+        162, 157, 157, 156, 151, 149, 148, 139, 137])
+2026-01-26 04:45:00,201 INFO [inference.py:343] Encoder out range: [-4.808, 7.173]
+2026-01-26 04:45:01,019 INFO [inference.py:353] Number of hypotheses: 23
+2026-01-26 04:45:01,019 INFO [inference.py:355] First hypothesis: [11, 4, 2, 11]
+2026-01-26 04:45:01,031 INFO [inference.py:318] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
+2026-01-26 04:45:01,032 INFO [inference.py:319] Audio range: [-0.439, 0.480]
+2026-01-26 04:45:01,033 INFO [inference.py:320] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
+        28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
+        20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
+        12320, 11680, 11520, 10880,  9440,  9120,  7840,  5920,  5760],
+       dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-45-26 ADDED Viewed

The diff for this file is too large to render. See raw diff

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-57-24 ADDED Viewed

	@@ -0,0 +1,57 @@

+2026-01-26 04:57:24,557 INFO [inference.py:410] ================================================================================
+2026-01-26 04:57:24,557 INFO [inference.py:411] XLSR-Transducer Inference on AMI
+2026-01-26 04:57:24,557 INFO [inference.py:412] ================================================================================
+2026-01-26 04:57:24,557 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 04:57:24,557 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 04:57:24,557 INFO [inference.py:415] Test set: ihm
+2026-01-26 04:57:24,558 INFO [inference.py:416] Decoding method: greedy_search
+2026-01-26 04:57:24,558 INFO [inference.py:422] Device: cpu
+2026-01-26 04:57:24,558 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 04:57:24,559 INFO [inference.py:433] Vocabulary size: 500
+2026-01-26 04:57:24,559 INFO [inference.py:434] Blank ID: 0
+2026-01-26 04:57:24,559 INFO [inference.py:437] Creating model
+2026-01-26 04:57:26,107 INFO [inference.py:459] Loading checkpoint: xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 04:57:26,108 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 04:57:30,697 INFO [inference.py:473] Number of model parameters: 317,511,772
+2026-01-26 04:57:30,697 INFO [inference.py:476] Loading test data
+2026-01-26 04:57:30,697 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 04:57:31,812 INFO [inference.py:487] Number of test utterances: 6676
+2026-01-26 04:57:31,812 INFO [inference.py:490] Starting inference...
+2026-01-26 04:57:32,942 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 04:57:32,945 INFO [inference.py:310] Audio range: [-0.090, 0.104]
+2026-01-26 04:57:32,948 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 04:57:42,125 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 04:57:42,126 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 04:57:42,129 INFO [inference.py:334] Encoder out range: [-4.703, 6.664]
+2026-01-26 04:57:42,499 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 04:57:42,500 INFO [inference.py:346] First hypothesis: []
+2026-01-26 04:57:42,506 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 04:57:42,506 INFO [inference.py:310] Audio range: [-0.401, 0.443]
+2026-01-26 04:57:42,507 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)
+2026-01-26 04:57:49,615 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
+2026-01-26 04:57:49,616 INFO [inference.py:333] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
+        162, 157, 157, 156, 151, 149, 148, 139, 137])
+2026-01-26 04:57:49,616 INFO [inference.py:334] Encoder out range: [-4.701, 6.665]
+2026-01-26 04:57:50,141 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 04:57:50,141 INFO [inference.py:346] First hypothesis: []
+2026-01-26 04:57:50,152 INFO [inference.py:309] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
+2026-01-26 04:57:50,153 INFO [inference.py:310] Audio range: [-0.439, 0.480]
+2026-01-26 04:57:50,154 INFO [inference.py:311] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
+        28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
+        20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
+        12320, 11680, 11520, 10880,  9440,  9120,  7840,  5920,  5760],
+       dtype=torch.int32)
+2026-01-26 04:57:56,416 INFO [inference.py:332] Encoder out shape: torch.Size([39, 126, 1024])
+2026-01-26 04:57:56,417 INFO [inference.py:333] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100,  94,  89,  88,  87,  73,
+         71,  71,  69,  68,  68,  65,  62,  62,  59,  59,  58,  56,  51,  45,
+         42,  40,  38,  36,  35,  33,  29,  28,  24,  18,  17])
+2026-01-26 04:57:56,417 INFO [inference.py:334] Encoder out range: [-4.699, 6.664]
+2026-01-26 04:57:56,902 INFO [inference.py:344] Number of hypotheses: 39
+2026-01-26 04:57:56,902 INFO [inference.py:346] First hypothesis: []
+2026-01-26 04:57:56,906 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
+2026-01-26 04:57:56,907 INFO [inference.py:310] Audio range: [-0.314, 0.332]
+2026-01-26 04:57:56,907 INFO [inference.py:311] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
+        56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
+        46079, 45280, 44960], dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-58-20 ADDED Viewed

	@@ -0,0 +1,32 @@

+2026-01-26 04:58:20,350 INFO [inference.py:410] ================================================================================
+2026-01-26 04:58:20,350 INFO [inference.py:411] XLSR-Transducer Inference on AMI
+2026-01-26 04:58:20,350 INFO [inference.py:412] ================================================================================
+2026-01-26 04:58:20,350 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 04:58:20,350 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 04:58:20,350 INFO [inference.py:415] Test set: ihm
+2026-01-26 04:58:20,350 INFO [inference.py:416] Decoding method: modified_beam_search
+2026-01-26 04:58:20,350 INFO [inference.py:422] Device: cpu
+2026-01-26 04:58:20,350 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 04:58:20,352 INFO [inference.py:433] Vocabulary size: 500
+2026-01-26 04:58:20,352 INFO [inference.py:434] Blank ID: 0
+2026-01-26 04:58:20,352 INFO [inference.py:437] Creating model
+2026-01-26 04:58:21,896 INFO [inference.py:459] Loading checkpoint: xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 04:58:21,897 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 04:58:26,596 INFO [inference.py:473] Number of model parameters: 317,511,772
+2026-01-26 04:58:26,597 INFO [inference.py:476] Loading test data
+2026-01-26 04:58:26,597 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 04:58:27,751 INFO [inference.py:487] Number of test utterances: 6676
+2026-01-26 04:58:27,752 INFO [inference.py:490] Starting inference...
+2026-01-26 04:58:28,913 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 04:58:28,920 INFO [inference.py:310] Audio range: [-0.090, 0.104]
+2026-01-26 04:58:28,923 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 04:58:38,037 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 04:58:38,037 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 04:58:38,038 INFO [inference.py:334] Encoder out range: [-4.703, 6.664]
+2026-01-26 04:58:38,417 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 04:58:38,418 INFO [inference.py:346] First hypothesis: []
+2026-01-26 04:58:38,426 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 04:58:38,427 INFO [inference.py:310] Audio range: [-0.401, 0.443]
+2026-01-26 04:58:38,428 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-59-21 ADDED Viewed

	@@ -0,0 +1,111 @@

+2026-01-26 04:59:21,443 INFO [inference.py:410] ================================================================================
+2026-01-26 04:59:21,443 INFO [inference.py:411] XLSR-Transducer Inference on AMI
+2026-01-26 04:59:21,443 INFO [inference.py:412] ================================================================================
+2026-01-26 04:59:21,443 INFO [inference.py:413] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
+2026-01-26 04:59:21,443 INFO [inference.py:414] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 04:59:21,443 INFO [inference.py:415] Test set: ihm
+2026-01-26 04:59:21,443 INFO [inference.py:416] Decoding method: modified_beam_search
+2026-01-26 04:59:21,443 INFO [inference.py:422] Device: cpu
+2026-01-26 04:59:21,443 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 04:59:21,445 INFO [inference.py:433] Vocabulary size: 500
+2026-01-26 04:59:21,445 INFO [inference.py:434] Blank ID: 0
+2026-01-26 04:59:21,445 INFO [inference.py:437] Creating model
+2026-01-26 04:59:23,052 INFO [inference.py:444] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 04:59:23,052 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 04:59:27,784 INFO [inference.py:473] Number of model parameters: 317,511,772
+2026-01-26 04:59:27,784 INFO [inference.py:476] Loading test data
+2026-01-26 04:59:27,784 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 04:59:28,889 INFO [inference.py:487] Number of test utterances: 6676
+2026-01-26 04:59:28,889 INFO [inference.py:490] Starting inference...
+2026-01-26 04:59:29,994 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 04:59:29,997 INFO [inference.py:310] Audio range: [-0.090, 0.104]
+2026-01-26 04:59:30,000 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 04:59:39,304 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 04:59:39,305 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 04:59:39,306 INFO [inference.py:334] Encoder out range: [-13.684, 12.764]
+2026-01-26 04:59:39,937 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 04:59:39,938 INFO [inference.py:346] First hypothesis: [171]
+2026-01-26 04:59:39,943 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 04:59:39,998 INFO [inference.py:310] Audio range: [-0.401, 0.443]
+2026-01-26 04:59:39,999 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)
+2026-01-26 04:59:47,631 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
+2026-01-26 04:59:47,632 INFO [inference.py:333] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
+        162, 157, 157, 156, 151, 149, 148, 139, 137])
+2026-01-26 04:59:47,632 INFO [inference.py:334] Encoder out range: [-12.514, 12.004]
+2026-01-26 04:59:48,802 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 04:59:48,802 INFO [inference.py:346] First hypothesis: [23, 51, 156, 6, 205, 18, 116, 113, 363]
+2026-01-26 04:59:49,215 INFO [inference.py:309] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
+2026-01-26 04:59:49,220 INFO [inference.py:310] Audio range: [-0.439, 0.480]
+2026-01-26 04:59:49,221 INFO [inference.py:311] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
+        28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
+        20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
+        12320, 11680, 11520, 10880,  9440,  9120,  7840,  5920,  5760],
+       dtype=torch.int32)
+2026-01-26 04:59:56,731 INFO [inference.py:332] Encoder out shape: torch.Size([39, 126, 1024])
+2026-01-26 04:59:56,732 INFO [inference.py:333] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100,  94,  89,  88,  87,  73,
+         71,  71,  69,  68,  68,  65,  62,  62,  59,  59,  58,  56,  51,  45,
+         42,  40,  38,  36,  35,  33,  29,  28,  24,  18,  17])
+2026-01-26 04:59:56,733 INFO [inference.py:334] Encoder out range: [-11.444, 10.811]
+2026-01-26 04:59:57,403 INFO [inference.py:344] Number of hypotheses: 39
+2026-01-26 04:59:57,403 INFO [inference.py:346] First hypothesis: [11]
+2026-01-26 04:59:57,409 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
+2026-01-26 04:59:57,420 INFO [inference.py:310] Audio range: [-0.314, 0.332]
+2026-01-26 04:59:57,420 INFO [inference.py:311] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
+        56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
+        46079, 45280, 44960], dtype=torch.int32)
+2026-01-26 05:00:05,318 INFO [inference.py:332] Encoder out shape: torch.Size([23, 212, 1024])
+2026-01-26 05:00:05,319 INFO [inference.py:333] Encoder out lens: tensor([212, 205, 204, 202, 200, 198, 194, 192, 184, 181, 176, 174, 173, 172,
+        170, 166, 164, 164, 147, 144, 143, 141, 140])
+2026-01-26 05:00:05,319 INFO [inference.py:334] Encoder out range: [-13.261, 11.090]
+2026-01-26 05:00:06,035 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:00:06,035 INFO [inference.py:346] First hypothesis: [20]
+2026-01-26 05:00:06,104 INFO [inference.py:309] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
+2026-01-26 05:00:06,105 INFO [inference.py:310] Audio range: [-0.323, 0.414]
+2026-01-26 05:00:06,105 INFO [inference.py:311] Audio lengths: tensor([317280, 298079, 298080, 294559, 292480], dtype=torch.int32)
+2026-01-26 05:00:14,039 INFO [inference.py:332] Encoder out shape: torch.Size([5, 991, 1024])
+2026-01-26 05:00:14,040 INFO [inference.py:333] Encoder out lens: tensor([991, 931, 931, 920, 913])
+2026-01-26 05:00:14,098 INFO [inference.py:334] Encoder out range: [-14.241, 14.344]
+2026-01-26 05:00:14,713 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:00:14,713 INFO [inference.py:346] First hypothesis: [37, 23, 127, 274, 5, 147, 80, 73, 6, 16, 29, 119, 5, 20, 84, 171, 15, 6, 328, 5, 127, 20, 56]
+2026-01-26 05:00:14,718 INFO [inference.py:309] Audio shape: torch.Size([40, 39360]), dtype: torch.float32
+2026-01-26 05:00:14,719 INFO [inference.py:310] Audio range: [-0.274, 0.362]
+2026-01-26 05:00:14,719 INFO [inference.py:311] Audio lengths: tensor([39359, 39199, 39039, 38080, 36000, 35200, 34880, 34880, 33760, 33760,
+        33600, 33120, 29440, 29280, 27360, 24960, 24960, 23680, 21760, 21600,
+        20800, 16800, 16320, 16160, 16000, 15679, 15040, 13440, 12320,  7040,
+         6560,  6400,  5760,  5760,  5120,  4800,  4800,  4640,  4480,  3360],
+       dtype=torch.int32)
+2026-01-26 05:00:21,633 INFO [inference.py:332] Encoder out shape: torch.Size([40, 122, 1024])
+2026-01-26 05:00:21,634 INFO [inference.py:333] Encoder out lens: tensor([122, 122, 121, 118, 112, 109, 108, 108, 105, 105, 104, 103,  91,  91,
+         85,  77,  77,  73,  67,  67,  64,  52,  50,  50,  49,  48,  46,  41,
+         38,  21,  20,  19,  17,  17,  15,  14,  14,  14,  13,  10])
+2026-01-26 05:00:21,635 INFO [inference.py:334] Encoder out range: [-11.784, 11.570]
+2026-01-26 05:00:22,302 INFO [inference.py:344] Number of hypotheses: 40
+2026-01-26 05:00:22,302 INFO [inference.py:346] First hypothesis: []
+2026-01-26 05:00:22,310 INFO [inference.py:309] Audio shape: torch.Size([23, 66880]), dtype: torch.float32
+2026-01-26 05:00:22,311 INFO [inference.py:310] Audio range: [-0.514, 0.393]
+2026-01-26 05:00:22,311 INFO [inference.py:311] Audio lengths: tensor([66880, 65439, 60799, 60320, 59520, 58240, 57280, 56320, 55520, 54080,
+        51840, 51520, 50720, 49920, 49600, 48319, 48320, 47999, 46880, 46079,
+        44640, 44320, 44160], dtype=torch.int32)
+2026-01-26 05:00:29,229 INFO [inference.py:332] Encoder out shape: torch.Size([23, 208, 1024])
+2026-01-26 05:00:29,230 INFO [inference.py:333] Encoder out lens: tensor([208, 204, 189, 188, 185, 181, 178, 175, 173, 168, 161, 160, 158, 155,
+        154, 150, 150, 149, 146, 143, 139, 138, 137])
+2026-01-26 05:00:29,230 INFO [inference.py:334] Encoder out range: [-12.152, 11.060]
+2026-01-26 05:00:29,913 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:00:29,913 INFO [inference.py:346] First hypothesis: [51, 6, 4, 27, 5, 26, 70, 22, 20, 265, 33, 46]
+2026-01-26 05:00:29,920 INFO [inference.py:309] Audio shape: torch.Size([24, 65600]), dtype: torch.float32
+2026-01-26 05:00:29,921 INFO [inference.py:310] Audio range: [-0.416, 0.458]
+2026-01-26 05:00:29,921 INFO [inference.py:311] Audio lengths: tensor([65600, 64000, 63680, 61280, 60000, 58080, 55200, 52960, 51359, 51200,
+        50720, 50720, 50080, 49280, 48639, 47840, 47360, 46880, 46400, 46240,
+        45920, 44640, 43040, 42720], dtype=torch.int32)
+2026-01-26 05:00:37,217 INFO [inference.py:332] Encoder out shape: torch.Size([24, 204, 1024])
+2026-01-26 05:00:37,217 INFO [inference.py:333] Encoder out lens: tensor([204, 199, 198, 191, 187, 181, 172, 165, 160, 159, 158, 158, 156, 153,
+        151, 149, 147, 146, 144, 144, 143, 139, 134, 133])
+2026-01-26 05:00:37,218 INFO [inference.py:334] Encoder out range: [-12.007, 11.624]
+2026-01-26 05:00:37,807 INFO [inference.py:344] Number of hypotheses: 24
+2026-01-26 05:00:37,808 INFO [inference.py:346] First hypothesis: [11]
+2026-01-26 05:00:37,815 INFO [inference.py:309] Audio shape: torch.Size([9, 176960]), dtype: torch.float32
+2026-01-26 05:00:37,816 INFO [inference.py:310] Audio range: [-0.135, 0.191]
+2026-01-26 05:00:37,816 INFO [inference.py:311] Audio lengths: tensor([176960, 170720, 164480, 155840, 154559, 151839, 151840, 151360, 147040],
+       dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-02-37 ADDED Viewed

	@@ -0,0 +1,32 @@

+2026-01-26 05:02:37,849 INFO [inference.py:410] ================================================================================
+2026-01-26 05:02:37,849 INFO [inference.py:411] XLSR-Transducer Inference on AMI
+2026-01-26 05:02:37,850 INFO [inference.py:412] ================================================================================
+2026-01-26 05:02:37,850 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:02:37,850 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:02:37,850 INFO [inference.py:415] Test set: ihm
+2026-01-26 05:02:37,850 INFO [inference.py:416] Decoding method: modified_beam_search
+2026-01-26 05:02:37,850 INFO [inference.py:422] Device: cpu
+2026-01-26 05:02:37,850 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:02:37,851 INFO [inference.py:433] Vocabulary size: 500
+2026-01-26 05:02:37,851 INFO [inference.py:434] Blank ID: 0
+2026-01-26 05:02:37,851 INFO [inference.py:437] Creating model
+2026-01-26 05:02:39,443 INFO [inference.py:459] Loading checkpoint: xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 05:02:39,444 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 05:02:44,138 INFO [inference.py:473] Number of model parameters: 317,511,772
+2026-01-26 05:02:44,139 INFO [inference.py:476] Loading test data
+2026-01-26 05:02:44,139 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:02:45,310 INFO [inference.py:487] Number of test utterances: 6676
+2026-01-26 05:02:45,310 INFO [inference.py:490] Starting inference...
+2026-01-26 05:02:46,398 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 05:02:46,400 INFO [inference.py:310] Audio range: [-0.090, 0.104]
+2026-01-26 05:02:46,404 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 05:02:55,240 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 05:02:55,241 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 05:02:55,298 INFO [inference.py:334] Encoder out range: [-4.703, 6.664]
+2026-01-26 05:02:55,596 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:02:55,596 INFO [inference.py:346] First hypothesis: []
+2026-01-26 05:02:55,605 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 05:02:55,606 INFO [inference.py:310] Audio range: [-0.401, 0.443]
+2026-01-26 05:02:55,607 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-03-42 ADDED Viewed

	@@ -0,0 +1,32 @@

+2026-01-26 05:03:42,123 INFO [inference.py:410] ================================================================================
+2026-01-26 05:03:42,123 INFO [inference.py:411] XLSR-Transducer Inference on AMI
+2026-01-26 05:03:42,123 INFO [inference.py:412] ================================================================================
+2026-01-26 05:03:42,123 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:03:42,123 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:03:42,123 INFO [inference.py:415] Test set: ihm
+2026-01-26 05:03:42,123 INFO [inference.py:416] Decoding method: modified_beam_search
+2026-01-26 05:03:42,123 INFO [inference.py:422] Device: cpu
+2026-01-26 05:03:42,124 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:03:42,125 INFO [inference.py:433] Vocabulary size: 500
+2026-01-26 05:03:42,125 INFO [inference.py:434] Blank ID: 0
+2026-01-26 05:03:42,125 INFO [inference.py:437] Creating model
+2026-01-26 05:03:43,760 INFO [inference.py:459] Loading checkpoint: xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 05:03:43,760 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/epoch-15.pt
+2026-01-26 05:03:48,510 INFO [inference.py:473] Number of model parameters: 317,511,772
+2026-01-26 05:03:48,511 INFO [inference.py:476] Loading test data
+2026-01-26 05:03:48,511 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:03:49,613 INFO [inference.py:487] Number of test utterances: 6676
+2026-01-26 05:03:49,613 INFO [inference.py:490] Starting inference...
+2026-01-26 05:03:50,741 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 05:03:50,756 INFO [inference.py:310] Audio range: [-0.090, 0.104]
+2026-01-26 05:03:50,759 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 05:03:59,827 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 05:03:59,828 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 05:03:59,828 INFO [inference.py:334] Encoder out range: [-4.703, 6.664]
+2026-01-26 05:04:00,148 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:04:00,148 INFO [inference.py:346] First hypothesis: []
+2026-01-26 05:04:00,158 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 05:04:00,162 INFO [inference.py:310] Audio range: [-0.401, 0.443]
+2026-01-26 05:04:00,163 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-05-16 ADDED Viewed

	@@ -0,0 +1,529 @@

+2026-01-26 05:05:16,888 INFO [inference.py:410] ================================================================================
+2026-01-26 05:05:16,888 INFO [inference.py:411] XLSR-Transducer Inference on AMI
+2026-01-26 05:05:16,888 INFO [inference.py:412] ================================================================================
+2026-01-26 05:05:16,888 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:05:16,888 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:05:16,888 INFO [inference.py:415] Test set: ihm
+2026-01-26 05:05:16,888 INFO [inference.py:416] Decoding method: modified_beam_search
+2026-01-26 05:05:16,888 INFO [inference.py:422] Device: cpu
+2026-01-26 05:05:16,888 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:05:16,890 INFO [inference.py:433] Vocabulary size: 500
+2026-01-26 05:05:16,890 INFO [inference.py:434] Blank ID: 0
+2026-01-26 05:05:16,890 INFO [inference.py:437] Creating model
+2026-01-26 05:05:18,544 INFO [inference.py:444] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:05:18,544 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:05:23,319 INFO [inference.py:473] Number of model parameters: 317,511,772
+2026-01-26 05:05:23,320 INFO [inference.py:476] Loading test data
+2026-01-26 05:05:23,320 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:05:24,403 INFO [inference.py:487] Number of test utterances: 6676
+2026-01-26 05:05:24,403 INFO [inference.py:490] Starting inference...
+2026-01-26 05:05:25,573 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
+2026-01-26 05:05:25,576 INFO [inference.py:310] Audio range: [-0.090, 0.104]
+2026-01-26 05:05:25,579 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
+2026-01-26 05:05:34,838 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
+2026-01-26 05:05:34,839 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
+2026-01-26 05:05:34,839 INFO [inference.py:334] Encoder out range: [-13.684, 12.764]
+2026-01-26 05:05:35,536 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:05:35,537 INFO [inference.py:346] First hypothesis: [171]
+2026-01-26 05:05:35,546 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 05:05:35,547 INFO [inference.py:310] Audio range: [-0.401, 0.443]
+2026-01-26 05:05:35,547 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
+        58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
+        47520, 44639, 44000], dtype=torch.int32)
+2026-01-26 05:05:43,001 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
+2026-01-26 05:05:43,003 INFO [inference.py:333] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
+        162, 157, 157, 156, 151, 149, 148, 139, 137])
+2026-01-26 05:05:43,003 INFO [inference.py:334] Encoder out range: [-12.514, 12.004]
+2026-01-26 05:05:43,905 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:05:43,905 INFO [inference.py:346] First hypothesis: [23, 51, 156, 6, 205, 18, 116, 113, 363]
+2026-01-26 05:05:43,925 INFO [inference.py:309] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
+2026-01-26 05:05:43,926 INFO [inference.py:310] Audio range: [-0.439, 0.480]
+2026-01-26 05:05:43,926 INFO [inference.py:311] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
+        28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
+        20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
+        12320, 11680, 11520, 10880,  9440,  9120,  7840,  5920,  5760],
+       dtype=torch.int32)
+2026-01-26 05:05:51,027 INFO [inference.py:332] Encoder out shape: torch.Size([39, 126, 1024])
+2026-01-26 05:05:51,028 INFO [inference.py:333] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100,  94,  89,  88,  87,  73,
+         71,  71,  69,  68,  68,  65,  62,  62,  59,  59,  58,  56,  51,  45,
+         42,  40,  38,  36,  35,  33,  29,  28,  24,  18,  17])
+2026-01-26 05:05:51,028 INFO [inference.py:334] Encoder out range: [-11.444, 10.811]
+2026-01-26 05:05:51,620 INFO [inference.py:344] Number of hypotheses: 39
+2026-01-26 05:05:51,620 INFO [inference.py:346] First hypothesis: [11]
+2026-01-26 05:05:51,628 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
+2026-01-26 05:05:51,629 INFO [inference.py:310] Audio range: [-0.314, 0.332]
+2026-01-26 05:05:51,629 INFO [inference.py:311] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
+        56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
+        46079, 45280, 44960], dtype=torch.int32)
+2026-01-26 05:05:59,021 INFO [inference.py:332] Encoder out shape: torch.Size([23, 212, 1024])
+2026-01-26 05:05:59,022 INFO [inference.py:333] Encoder out lens: tensor([212, 205, 204, 202, 200, 198, 194, 192, 184, 181, 176, 174, 173, 172,
+        170, 166, 164, 164, 147, 144, 143, 141, 140])
+2026-01-26 05:05:59,023 INFO [inference.py:334] Encoder out range: [-13.261, 11.090]
+2026-01-26 05:05:59,931 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:05:59,932 INFO [inference.py:346] First hypothesis: [20]
+2026-01-26 05:06:00,567 INFO [inference.py:309] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
+2026-01-26 05:06:00,568 INFO [inference.py:310] Audio range: [-0.323, 0.414]
+2026-01-26 05:06:00,569 INFO [inference.py:311] Audio lengths: tensor([317280, 298079, 298080, 294559, 292480], dtype=torch.int32)
+2026-01-26 05:06:09,302 INFO [inference.py:332] Encoder out shape: torch.Size([5, 991, 1024])
+2026-01-26 05:06:09,303 INFO [inference.py:333] Encoder out lens: tensor([991, 931, 931, 920, 913])
+2026-01-26 05:06:09,304 INFO [inference.py:334] Encoder out range: [-14.241, 14.344]
+2026-01-26 05:06:10,112 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:06:10,113 INFO [inference.py:346] First hypothesis: [37, 23, 127, 274, 5, 147, 80, 73, 6, 16, 29, 119, 5, 20, 84, 171, 15, 6, 328, 5, 127, 20, 56]
+2026-01-26 05:06:10,117 INFO [inference.py:309] Audio shape: torch.Size([40, 39360]), dtype: torch.float32
+2026-01-26 05:06:10,118 INFO [inference.py:310] Audio range: [-0.274, 0.362]
+2026-01-26 05:06:10,118 INFO [inference.py:311] Audio lengths: tensor([39359, 39199, 39039, 38080, 36000, 35200, 34880, 34880, 33760, 33760,
+        33600, 33120, 29440, 29280, 27360, 24960, 24960, 23680, 21760, 21600,
+        20800, 16800, 16320, 16160, 16000, 15679, 15040, 13440, 12320,  7040,
+         6560,  6400,  5760,  5760,  5120,  4800,  4800,  4640,  4480,  3360],
+       dtype=torch.int32)
+2026-01-26 05:06:16,910 INFO [inference.py:332] Encoder out shape: torch.Size([40, 122, 1024])
+2026-01-26 05:06:16,911 INFO [inference.py:333] Encoder out lens: tensor([122, 122, 121, 118, 112, 109, 108, 108, 105, 105, 104, 103,  91,  91,
+         85,  77,  77,  73,  67,  67,  64,  52,  50,  50,  49,  48,  46,  41,
+         38,  21,  20,  19,  17,  17,  15,  14,  14,  14,  13,  10])
+2026-01-26 05:06:16,911 INFO [inference.py:334] Encoder out range: [-11.784, 11.570]
+2026-01-26 05:06:17,504 INFO [inference.py:344] Number of hypotheses: 40
+2026-01-26 05:06:17,504 INFO [inference.py:346] First hypothesis: []
+2026-01-26 05:06:17,513 INFO [inference.py:309] Audio shape: torch.Size([23, 66880]), dtype: torch.float32
+2026-01-26 05:06:17,514 INFO [inference.py:310] Audio range: [-0.514, 0.393]
+2026-01-26 05:06:17,514 INFO [inference.py:311] Audio lengths: tensor([66880, 65439, 60799, 60320, 59520, 58240, 57280, 56320, 55520, 54080,
+        51840, 51520, 50720, 49920, 49600, 48319, 48320, 47999, 46880, 46079,
+        44640, 44320, 44160], dtype=torch.int32)
+2026-01-26 05:06:24,334 INFO [inference.py:332] Encoder out shape: torch.Size([23, 208, 1024])
+2026-01-26 05:06:24,335 INFO [inference.py:333] Encoder out lens: tensor([208, 204, 189, 188, 185, 181, 178, 175, 173, 168, 161, 160, 158, 155,
+        154, 150, 150, 149, 146, 143, 139, 138, 137])
+2026-01-26 05:06:24,335 INFO [inference.py:334] Encoder out range: [-12.152, 11.060]
+2026-01-26 05:06:25,044 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:06:25,045 INFO [inference.py:346] First hypothesis: [51, 6, 4, 27, 5, 26, 70, 22, 20, 265, 33, 46]
+2026-01-26 05:06:25,108 INFO [inference.py:309] Audio shape: torch.Size([24, 65600]), dtype: torch.float32
+2026-01-26 05:06:25,109 INFO [inference.py:310] Audio range: [-0.416, 0.458]
+2026-01-26 05:06:25,109 INFO [inference.py:311] Audio lengths: tensor([65600, 64000, 63680, 61280, 60000, 58080, 55200, 52960, 51359, 51200,
+        50720, 50720, 50080, 49280, 48639, 47840, 47360, 46880, 46400, 46240,
+        45920, 44640, 43040, 42720], dtype=torch.int32)
+2026-01-26 05:06:32,219 INFO [inference.py:332] Encoder out shape: torch.Size([24, 204, 1024])
+2026-01-26 05:06:32,220 INFO [inference.py:333] Encoder out lens: tensor([204, 199, 198, 191, 187, 181, 172, 165, 160, 159, 158, 158, 156, 153,
+        151, 149, 147, 146, 144, 144, 143, 139, 134, 133])
+2026-01-26 05:06:32,220 INFO [inference.py:334] Encoder out range: [-12.007, 11.624]
+2026-01-26 05:06:32,900 INFO [inference.py:344] Number of hypotheses: 24
+2026-01-26 05:06:32,900 INFO [inference.py:346] First hypothesis: [11]
+2026-01-26 05:06:32,908 INFO [inference.py:309] Audio shape: torch.Size([9, 176960]), dtype: torch.float32
+2026-01-26 05:06:32,920 INFO [inference.py:310] Audio range: [-0.135, 0.191]
+2026-01-26 05:06:32,921 INFO [inference.py:311] Audio lengths: tensor([176960, 170720, 164480, 155840, 154559, 151839, 151840, 151360, 147040],
+       dtype=torch.int32)
+2026-01-26 05:06:40,613 INFO [inference.py:332] Encoder out shape: torch.Size([9, 552, 1024])
+2026-01-26 05:06:40,614 INFO [inference.py:333] Encoder out lens: tensor([552, 533, 513, 486, 482, 474, 474, 472, 459])
+2026-01-26 05:06:40,614 INFO [inference.py:334] Encoder out range: [-13.325, 12.083]
+2026-01-26 05:06:41,231 INFO [inference.py:344] Number of hypotheses: 9
+2026-01-26 05:06:41,231 INFO [inference.py:346] First hypothesis: [11, 87, 7, 5, 13, 379, 130, 101, 6, 16, 29, 119, 5, 6, 16, 29, 119, 18, 115, 93, 58, 36, 30, 201, 38, 58, 134, 50, 6, 16, 29, 119, 20, 81, 7, 85, 272, 73, 105, 6, 205, 17, 47]
+2026-01-26 05:06:41,240 INFO [inference.py:309] Audio shape: torch.Size([14, 112320]), dtype: torch.float32
+2026-01-26 05:06:41,241 INFO [inference.py:310] Audio range: [-0.469, 0.457]
+2026-01-26 05:06:41,242 INFO [inference.py:311] Audio lengths: tensor([112320, 105920, 105439, 104000, 103840, 101920,  98720,  98400,  96960,
+         96800,  96320,  95680,  93760,  93600], dtype=torch.int32)
+2026-01-26 05:06:49,007 INFO [inference.py:332] Encoder out shape: torch.Size([14, 350, 1024])
+2026-01-26 05:06:49,008 INFO [inference.py:333] Encoder out lens: tensor([350, 330, 329, 324, 324, 318, 308, 307, 302, 302, 300, 298, 292, 292])
+2026-01-26 05:06:49,009 INFO [inference.py:334] Encoder out range: [-14.286, 11.940]
+2026-01-26 05:06:49,714 INFO [inference.py:344] Number of hypotheses: 14
+2026-01-26 05:06:49,714 INFO [inference.py:346] First hypothesis: [39, 9, 83, 7, 8, 148, 122, 26, 48]
+2026-01-26 05:06:49,714 INFO [inference.py:535] Processed 206 utterances in 10 batches
+2026-01-26 05:06:49,723 INFO [inference.py:309] Audio shape: torch.Size([38, 41440]), dtype: torch.float32
+2026-01-26 05:06:49,724 INFO [inference.py:310] Audio range: [-0.272, 0.322]
+2026-01-26 05:06:49,726 INFO [inference.py:311] Audio lengths: tensor([41440, 41120, 40160, 35680, 33120, 32960, 32800, 31520, 31040, 30880,
+        30239, 29920, 29120, 27360, 25279, 24480, 23520, 22720, 22720, 21600,
+        20800, 20320, 19840, 19840, 17600, 15520, 13120, 12480, 12320, 11040,
+        10560,  9600,  8640,  7520,  5440,  5120,  5120,  4640],
+       dtype=torch.int32)
+2026-01-26 05:06:57,233 INFO [inference.py:332] Encoder out shape: torch.Size([38, 129, 1024])
+2026-01-26 05:06:57,234 INFO [inference.py:333] Encoder out lens: tensor([129, 128, 125, 111, 103, 102, 102,  98,  96,  96,  94,  93,  90,  85,
+         78,  76,  73,  70,  70,  67,  64,  63,  61,  61,  54,  48,  40,  38,
+         38,  34,  32,  29,  26,  23,  16,  15,  15,  14])
+2026-01-26 05:06:57,235 INFO [inference.py:334] Encoder out range: [-13.512, 11.822]
+2026-01-26 05:06:57,919 INFO [inference.py:344] Number of hypotheses: 38
+2026-01-26 05:06:57,920 INFO [inference.py:346] First hypothesis: [56, 199, 130]
+2026-01-26 05:06:57,928 INFO [inference.py:309] Audio shape: torch.Size([38, 41280]), dtype: torch.float32
+2026-01-26 05:06:57,929 INFO [inference.py:310] Audio range: [-0.080, 0.105]
+2026-01-26 05:06:57,930 INFO [inference.py:311] Audio lengths: tensor([41280, 40320, 36800, 35680, 34880, 34879, 34080, 34080, 32000, 30400,
+        29280, 29280, 28320, 24000, 23040, 20960, 20960, 20960, 20160, 16960,
+        14080, 13280, 12640, 12160, 10720,  9440,  8640,  6240,  6080,  5440,
+         5440,  5120,  4800,  4800,  4640,  4480,  4320,  4160],
+       dtype=torch.int32)
+2026-01-26 05:07:05,017 INFO [inference.py:332] Encoder out shape: torch.Size([38, 128, 1024])
+2026-01-26 05:07:05,019 INFO [inference.py:333] Encoder out lens: tensor([128, 125, 114, 111, 108, 108, 106, 106,  99,  94,  91,  91,  88,  74,
+         71,  65,  65,  65,  62,  52,  43,  41,  39,  37,  33,  29,  26,  19,
+         18,  16,  16,  15,  14,  14,  14,  13,  13,  12])
+2026-01-26 05:07:05,019 INFO [inference.py:334] Encoder out range: [-11.071, 11.522]
+2026-01-26 05:07:05,620 INFO [inference.py:344] Number of hypotheses: 38
+2026-01-26 05:07:05,620 INFO [inference.py:346] First hypothesis: [10, 7, 5, 6, 148]
+2026-01-26 05:07:05,630 INFO [inference.py:309] Audio shape: torch.Size([38, 41760]), dtype: torch.float32
+2026-01-26 05:07:05,631 INFO [inference.py:310] Audio range: [-0.246, 0.340]
+2026-01-26 05:07:05,631 INFO [inference.py:311] Audio lengths: tensor([41760, 39680, 38880, 36799, 36639, 36000, 34559, 34240, 33120, 31840,
+        30720, 30560, 29760, 29280, 24640, 24160, 22720, 21759, 21600, 20960,
+        16320, 14400, 13600, 11360, 10880, 10399, 10400,  9760,  9440,  9280,
+         8320,  8320,  7680,  7360,  6880,  6880,  6240,  6240],
+       dtype=torch.int32)
+2026-01-26 05:07:13,101 INFO [inference.py:332] Encoder out shape: torch.Size([38, 130, 1024])
+2026-01-26 05:07:13,102 INFO [inference.py:333] Encoder out lens: tensor([130, 123, 121, 114, 114, 112, 107, 106, 103,  99,  95,  95,  92,  91,
+         76,  75,  70,  67,  67,  65,  50,  44,  42,  35,  33,  32,  32,  30,
+         29,  28,  25,  25,  23,  22,  21,  21,  19,  19])
+2026-01-26 05:07:13,103 INFO [inference.py:334] Encoder out range: [-11.967, 11.229]
+2026-01-26 05:07:13,708 INFO [inference.py:344] Number of hypotheses: 38
+2026-01-26 05:07:13,708 INFO [inference.py:346] First hypothesis: [145, 9, 7, 24, 44, 205]
+2026-01-26 05:07:13,715 INFO [inference.py:309] Audio shape: torch.Size([9, 170400]), dtype: torch.float32
+2026-01-26 05:07:13,716 INFO [inference.py:310] Audio range: [-0.370, 0.393]
+2026-01-26 05:07:13,716 INFO [inference.py:311] Audio lengths: tensor([170400, 166559, 165919, 164800, 156800, 152480, 147520, 146559, 145759],
+       dtype=torch.int32)
+2026-01-26 05:07:21,734 INFO [inference.py:332] Encoder out shape: torch.Size([9, 532, 1024])
+2026-01-26 05:07:21,735 INFO [inference.py:333] Encoder out lens: tensor([532, 520, 518, 514, 489, 476, 460, 457, 455])
+2026-01-26 05:07:21,735 INFO [inference.py:334] Encoder out range: [-12.221, 14.348]
+2026-01-26 05:07:22,459 INFO [inference.py:344] Number of hypotheses: 9
+2026-01-26 05:07:22,459 INFO [inference.py:346] First hypothesis: [37, 4, 2, 11]
+2026-01-26 05:07:22,468 INFO [inference.py:309] Audio shape: torch.Size([5, 315520]), dtype: torch.float32
+2026-01-26 05:07:22,469 INFO [inference.py:310] Audio range: [-0.297, 0.334]
+2026-01-26 05:07:22,470 INFO [inference.py:311] Audio lengths: tensor([315520, 301440, 294399, 292480, 289919], dtype=torch.int32)
+2026-01-26 05:07:31,016 INFO [inference.py:332] Encoder out shape: torch.Size([5, 985, 1024])
+2026-01-26 05:07:31,017 INFO [inference.py:333] Encoder out lens: tensor([985, 941, 919, 913, 905])
+2026-01-26 05:07:31,017 INFO [inference.py:334] Encoder out range: [-12.260, 13.635]
+2026-01-26 05:07:31,753 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:07:31,753 INFO [inference.py:346] First hypothesis: [11, 52, 87, 7, 5, 272, 25, 313, 359, 5, 6, 24, 25, 297, 5, 114, 32, 7, 8, 38, 204, 51, 13, 58, 63, 5, 18, 47, 259, 101, 18, 34, 16, 29, 119, 5, 113, 64, 113, 64]
+2026-01-26 05:07:31,760 INFO [inference.py:309] Audio shape: torch.Size([6, 239520]), dtype: torch.float32
+2026-01-26 05:07:31,773 INFO [inference.py:310] Audio range: [-0.116, 0.111]
+2026-01-26 05:07:31,773 INFO [inference.py:311] Audio lengths: tensor([239519, 234240, 223840, 223360, 219679, 215680], dtype=torch.int32)
+2026-01-26 05:07:39,824 INFO [inference.py:332] Encoder out shape: torch.Size([6, 748, 1024])
+2026-01-26 05:07:39,824 INFO [inference.py:333] Encoder out lens: tensor([748, 731, 699, 697, 686, 673])
+2026-01-26 05:07:39,825 INFO [inference.py:334] Encoder out range: [-13.591, 10.919]
+2026-01-26 05:07:40,203 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:07:40,204 INFO [inference.py:346] First hypothesis: [23]
+2026-01-26 05:07:40,210 INFO [inference.py:309] Audio shape: torch.Size([5, 315200]), dtype: torch.float32
+2026-01-26 05:07:40,210 INFO [inference.py:310] Audio range: [-0.082, 0.158]
+2026-01-26 05:07:40,211 INFO [inference.py:311] Audio lengths: tensor([315200, 310560, 300000, 299680, 296959], dtype=torch.int32)
+2026-01-26 05:07:49,627 INFO [inference.py:332] Encoder out shape: torch.Size([5, 984, 1024])
+2026-01-26 05:07:49,628 INFO [inference.py:333] Encoder out lens: tensor([984, 970, 937, 936, 927])
+2026-01-26 05:07:49,628 INFO [inference.py:334] Encoder out range: [-14.589, 11.647]
+2026-01-26 05:07:50,241 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:07:50,241 INFO [inference.py:346] First hypothesis: [310, 20, 51, 46, 6, 155, 22, 51, 274, 73, 152, 25, 383, 73, 155, 22, 26, 93, 53, 183, 5, 73, 93, 269, 27, 5, 31]
+2026-01-26 05:07:50,247 INFO [inference.py:309] Audio shape: torch.Size([6, 237280]), dtype: torch.float32
+2026-01-26 05:07:50,248 INFO [inference.py:310] Audio range: [-0.130, 0.131]
+2026-01-26 05:07:50,248 INFO [inference.py:311] Audio lengths: tensor([237280, 228159, 220639, 220480, 219359, 213119], dtype=torch.int32)
+2026-01-26 05:07:58,004 INFO [inference.py:332] Encoder out shape: torch.Size([6, 741, 1024])
+2026-01-26 05:07:58,005 INFO [inference.py:333] Encoder out lens: tensor([741, 712, 689, 688, 685, 665])
+2026-01-26 05:07:58,005 INFO [inference.py:334] Encoder out range: [-13.120, 12.506]
+2026-01-26 05:07:58,629 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:07:58,630 INFO [inference.py:346] First hypothesis: [37, 349, 41, 18, 349, 41, 49, 101, 6]
+2026-01-26 05:07:58,637 INFO [inference.py:309] Audio shape: torch.Size([17, 91040]), dtype: torch.float32
+2026-01-26 05:07:58,637 INFO [inference.py:310] Audio range: [-0.574, 0.629]
+2026-01-26 05:07:58,638 INFO [inference.py:311] Audio lengths: tensor([91040, 90240, 89119, 88480, 87520, 86079, 83680, 82880, 81120, 79520,
+        79520, 78079, 76800, 76480, 73760, 73600, 73599], dtype=torch.int32)
+2026-01-26 05:08:06,806 INFO [inference.py:332] Encoder out shape: torch.Size([17, 284, 1024])
+2026-01-26 05:08:06,807 INFO [inference.py:333] Encoder out lens: tensor([284, 281, 278, 276, 273, 268, 261, 258, 253, 248, 248, 243, 239, 238,
+        230, 229, 229])
+2026-01-26 05:08:06,807 INFO [inference.py:334] Encoder out range: [-13.703, 11.821]
+2026-01-26 05:08:07,431 INFO [inference.py:344] Number of hypotheses: 17
+2026-01-26 05:08:07,432 INFO [inference.py:346] First hypothesis: [131, 214, 33, 259, 26, 101, 265, 6, 205]
+2026-01-26 05:08:07,438 INFO [inference.py:309] Audio shape: torch.Size([23, 68960]), dtype: torch.float32
+2026-01-26 05:08:07,439 INFO [inference.py:310] Audio range: [-0.269, 0.266]
+2026-01-26 05:08:07,439 INFO [inference.py:311] Audio lengths: tensor([68959, 66880, 64800, 64479, 61920, 59680, 54400, 53440, 52479, 52319,
+        51840, 46880, 46559, 45120, 44480, 43360, 43360, 43360, 43040, 43040,
+        43040, 42880, 42560], dtype=torch.int32)
+2026-01-26 05:08:16,530 INFO [inference.py:332] Encoder out shape: torch.Size([23, 215, 1024])
+2026-01-26 05:08:16,531 INFO [inference.py:333] Encoder out lens: tensor([215, 208, 202, 201, 193, 186, 169, 166, 163, 163, 161, 146, 145, 140,
+        138, 135, 135, 135, 134, 134, 134, 133, 132])
+2026-01-26 05:08:16,531 INFO [inference.py:334] Encoder out range: [-13.477, 12.445]
+2026-01-26 05:08:17,420 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:08:17,420 INFO [inference.py:346] First hypothesis: [225]
+2026-01-26 05:08:17,420 INFO [inference.py:535] Processed 391 utterances in 20 batches
+2026-01-26 05:08:17,426 INFO [inference.py:309] Audio shape: torch.Size([17, 92320]), dtype: torch.float32
+2026-01-26 05:08:17,427 INFO [inference.py:310] Audio range: [-0.234, 0.300]
+2026-01-26 05:08:17,427 INFO [inference.py:311] Audio lengths: tensor([92320, 91200, 91200, 90560, 89120, 84000, 83840, 83360, 82880, 82079,
+        79840, 79520, 76800, 73760, 73280, 70079, 69600], dtype=torch.int32)
+2026-01-26 05:08:25,743 INFO [inference.py:332] Encoder out shape: torch.Size([17, 288, 1024])
+2026-01-26 05:08:25,744 INFO [inference.py:333] Encoder out lens: tensor([288, 284, 284, 282, 278, 262, 261, 260, 258, 256, 249, 248, 239, 230,
+        228, 218, 217])
+2026-01-26 05:08:25,798 INFO [inference.py:334] Encoder out range: [-13.483, 12.297]
+2026-01-26 05:08:26,544 INFO [inference.py:344] Number of hypotheses: 17
+2026-01-26 05:08:26,545 INFO [inference.py:346] First hypothesis: [39, 52, 10, 7, 85, 58, 134, 5, 84, 189, 29, 14, 43, 8, 93, 130, 16, 34, 84]
+2026-01-26 05:08:26,602 INFO [inference.py:309] Audio shape: torch.Size([23, 68800]), dtype: torch.float32
+2026-01-26 05:08:26,604 INFO [inference.py:310] Audio range: [-0.321, 0.370]
+2026-01-26 05:08:26,604 INFO [inference.py:311] Audio lengths: tensor([68799, 66720, 62560, 62240, 61919, 60160, 59840, 58080, 57920, 57280,
+        53920, 52960, 51040, 50080, 49920, 49280, 48160, 48160, 47680, 47200,
+        44800, 44000, 42560], dtype=torch.int32)
+2026-01-26 05:08:34,725 INFO [inference.py:332] Encoder out shape: torch.Size([23, 214, 1024])
+2026-01-26 05:08:34,725 INFO [inference.py:333] Encoder out lens: tensor([214, 208, 195, 194, 193, 187, 186, 181, 180, 178, 168, 165, 159, 156,
+        155, 153, 150, 150, 148, 147, 139, 137, 132])
+2026-01-26 05:08:34,726 INFO [inference.py:334] Encoder out range: [-11.273, 12.003]
+2026-01-26 05:08:35,331 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:08:35,331 INFO [inference.py:346] First hypothesis: [218, 4, 2, 11]
+2026-01-26 05:08:35,338 INFO [inference.py:309] Audio shape: torch.Size([38, 42080]), dtype: torch.float32
+2026-01-26 05:08:35,339 INFO [inference.py:310] Audio range: [-0.400, 0.452]
+2026-01-26 05:08:35,340 INFO [inference.py:311] Audio lengths: tensor([42080, 39200, 37439, 36960, 35520, 34560, 34079, 33599, 33600, 33280,
+        31520, 31200, 29760, 28160, 28000, 27200, 26720, 25600, 25120, 23200,
+        22880, 21280, 20800, 20000, 19680, 19520, 19200, 18080, 17600, 17600,
+        16320, 13120, 12320, 11680,  8000,  6400,  5120,  3840],
+       dtype=torch.int32)
+2026-01-26 05:08:43,838 INFO [inference.py:332] Encoder out shape: torch.Size([38, 131, 1024])
+2026-01-26 05:08:43,839 INFO [inference.py:333] Encoder out lens: tensor([131, 122, 116, 115, 110, 107, 106, 104, 104, 103,  98,  97,  92,  87,
+         87,  84,  83,  79,  78,  72,  71,  66,  64,  62,  61,  60,  59,  56,
+         54,  54,  50,  40,  38,  36,  24,  19,  15,  11])
+2026-01-26 05:08:43,839 INFO [inference.py:334] Encoder out range: [-11.872, 11.798]
+2026-01-26 05:08:44,627 INFO [inference.py:344] Number of hypotheses: 38
+2026-01-26 05:08:44,627 INFO [inference.py:346] First hypothesis: []
+2026-01-26 05:08:44,634 INFO [inference.py:309] Audio shape: torch.Size([5, 280640]), dtype: torch.float32
+2026-01-26 05:08:44,635 INFO [inference.py:310] Audio range: [-0.100, 0.092]
+2026-01-26 05:08:44,635 INFO [inference.py:311] Audio lengths: tensor([280639, 280640, 272800, 270080, 256480], dtype=torch.int32)
+2026-01-26 05:08:53,033 INFO [inference.py:332] Encoder out shape: torch.Size([5, 876, 1024])
+2026-01-26 05:08:53,034 INFO [inference.py:333] Encoder out lens: tensor([876, 876, 852, 843, 801])
+2026-01-26 05:08:53,034 INFO [inference.py:334] Encoder out range: [-14.497, 12.570]
+2026-01-26 05:08:53,729 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:08:53,730 INFO [inference.py:346] First hypothesis: [68, 13, 211, 25, 294, 6, 344, 131, 214, 56, 18, 34, 146, 16, 56, 46, 136, 40, 26, 5, 156, 6, 189]
+2026-01-26 05:08:53,736 INFO [inference.py:309] Audio shape: torch.Size([6, 248640]), dtype: torch.float32
+2026-01-26 05:08:53,737 INFO [inference.py:310] Audio range: [-0.080, 0.094]
+2026-01-26 05:08:53,738 INFO [inference.py:311] Audio lengths: tensor([248639, 242720, 233119, 227199, 217440, 216479], dtype=torch.int32)
+2026-01-26 05:09:02,511 INFO [inference.py:332] Encoder out shape: torch.Size([6, 776, 1024])
+2026-01-26 05:09:02,511 INFO [inference.py:333] Encoder out lens: tensor([776, 758, 728, 709, 679, 676])
+2026-01-26 05:09:02,512 INFO [inference.py:334] Encoder out range: [-12.796, 12.210]
+2026-01-26 05:09:03,260 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:09:03,260 INFO [inference.py:346] First hypothesis: [11, 87, 7, 5, 58, 134, 5, 71, 67, 6, 195, 25, 98, 36, 67, 51, 121, 26, 117, 10, 208, 13, 265, 39, 9, 100]
+2026-01-26 05:09:03,270 INFO [inference.py:309] Audio shape: torch.Size([13, 117120]), dtype: torch.float32
+2026-01-26 05:09:03,270 INFO [inference.py:310] Audio range: [-0.283, 0.260]
+2026-01-26 05:09:03,271 INFO [inference.py:311] Audio lengths: tensor([117120, 111680, 107200, 106720, 106239, 104639, 104480, 101920, 100960,
+         98880,  96960,  93920,  93600], dtype=torch.int32)
+2026-01-26 05:09:11,401 INFO [inference.py:332] Encoder out shape: torch.Size([13, 365, 1024])
+2026-01-26 05:09:11,402 INFO [inference.py:333] Encoder out lens: tensor([365, 348, 334, 333, 331, 326, 326, 318, 315, 308, 302, 293, 292])
+2026-01-26 05:09:11,402 INFO [inference.py:334] Encoder out range: [-12.112, 13.452]
+2026-01-26 05:09:12,027 INFO [inference.py:344] Number of hypotheses: 13
+2026-01-26 05:09:12,027 INFO [inference.py:346] First hypothesis: [11]
+2026-01-26 05:09:12,035 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
+2026-01-26 05:09:12,036 INFO [inference.py:310] Audio range: [-0.233, 0.248]
+2026-01-26 05:09:12,037 INFO [inference.py:311] Audio lengths: tensor([67200, 67039, 66079, 62079, 61760, 60480, 59520, 58080, 57760, 54239,
+        54080, 54080, 52960, 50080, 49920, 49280, 49119, 47840, 47840, 46720,
+        45600, 44800, 44000], dtype=torch.int32)
+2026-01-26 05:09:19,942 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
+2026-01-26 05:09:19,943 INFO [inference.py:333] Encoder out lens: tensor([209, 209, 206, 193, 192, 188, 185, 181, 180, 169, 168, 168, 165, 156,
+        155, 153, 153, 149, 149, 145, 142, 139, 137])
+2026-01-26 05:09:19,943 INFO [inference.py:334] Encoder out range: [-14.993, 12.111]
+2026-01-26 05:09:20,821 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:09:20,821 INFO [inference.py:346] First hypothesis: [18, 34, 174, 15, 44, 84, 15, 34, 81, 7, 69, 57, 101]
+2026-01-26 05:09:20,829 INFO [inference.py:309] Audio shape: torch.Size([6, 269120]), dtype: torch.float32
+2026-01-26 05:09:20,830 INFO [inference.py:310] Audio range: [-0.516, 0.413]
+2026-01-26 05:09:20,830 INFO [inference.py:311] Audio lengths: tensor([269119, 263680, 262719, 262559, 258240, 249759], dtype=torch.int32)
+2026-01-26 05:09:30,645 INFO [inference.py:332] Encoder out shape: torch.Size([6, 840, 1024])
+2026-01-26 05:09:30,645 INFO [inference.py:333] Encoder out lens: tensor([840, 823, 820, 820, 806, 780])
+2026-01-26 05:09:30,646 INFO [inference.py:334] Encoder out range: [-11.696, 10.834]
+2026-01-26 05:09:31,125 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:09:31,125 INFO [inference.py:346] First hypothesis: []
+2026-01-26 05:09:31,132 INFO [inference.py:309] Audio shape: torch.Size([6, 241440]), dtype: torch.float32
+2026-01-26 05:09:31,133 INFO [inference.py:310] Audio range: [-0.067, 0.106]
+2026-01-26 05:09:31,134 INFO [inference.py:311] Audio lengths: tensor([241440, 240479, 238079, 236800, 224800, 224159], dtype=torch.int32)
+2026-01-26 05:09:39,233 INFO [inference.py:332] Encoder out shape: torch.Size([6, 754, 1024])
+2026-01-26 05:09:39,234 INFO [inference.py:333] Encoder out lens: tensor([754, 751, 743, 739, 702, 700])
+2026-01-26 05:09:39,234 INFO [inference.py:334] Encoder out range: [-13.524, 12.974]
+2026-01-26 05:09:39,812 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:09:39,812 INFO [inference.py:346] First hypothesis: [87, 7, 5, 51, 195, 25, 6, 362, 39, 114, 38, 65, 18, 354, 16, 38, 89, 174]
+2026-01-26 05:09:39,820 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
+2026-01-26 05:09:39,821 INFO [inference.py:310] Audio range: [-0.180, 0.177]
+2026-01-26 05:09:39,821 INFO [inference.py:311] Audio lengths: tensor([68000, 66080, 65120, 64319, 64000, 60960, 58880, 58400, 58240, 57600,
+        50239, 49760, 48480, 48480, 47520, 47200, 46560, 46080, 44960, 44480,
+        43200, 42719, 42240], dtype=torch.int32)
+2026-01-26 05:09:48,028 INFO [inference.py:332] Encoder out shape: torch.Size([23, 212, 1024])
+2026-01-26 05:09:48,029 INFO [inference.py:333] Encoder out lens: tensor([212, 206, 203, 200, 199, 190, 183, 182, 181, 179, 156, 155, 151, 151,
+        148, 147, 145, 143, 140, 138, 134, 133, 131])
+2026-01-26 05:09:48,029 INFO [inference.py:334] Encoder out range: [-13.762, 11.575]
+2026-01-26 05:09:48,731 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:09:48,731 INFO [inference.py:346] First hypothesis: [61, 51, 184, 13, 4, 140, 5, 169, 93, 25, 6, 260]
+2026-01-26 05:09:48,731 INFO [inference.py:535] Processed 551 utterances in 30 batches
+2026-01-26 05:09:48,738 INFO [inference.py:309] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
+2026-01-26 05:09:48,739 INFO [inference.py:310] Audio range: [-0.122, 0.148]
+2026-01-26 05:09:48,739 INFO [inference.py:311] Audio lengths: tensor([317280, 311840, 309600, 301120, 295680], dtype=torch.int32)
+2026-01-26 05:09:58,852 INFO [inference.py:332] Encoder out shape: torch.Size([5, 991, 1024])
+2026-01-26 05:09:58,853 INFO [inference.py:333] Encoder out lens: tensor([991, 974, 967, 940, 923])
+2026-01-26 05:09:58,853 INFO [inference.py:334] Encoder out range: [-13.657, 13.923]
+2026-01-26 05:09:59,738 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:09:59,739 INFO [inference.py:346] First hypothesis: [39, 231, 32, 51, 49, 25, 93, 221, 18, 118, 159, 6, 4, 27, 5, 8, 93, 193, 39, 6, 130, 18, 7, 69, 176, 33, 152, 25, 284, 251, 205]
+2026-01-26 05:09:59,746 INFO [inference.py:309] Audio shape: torch.Size([24, 64160]), dtype: torch.float32
+2026-01-26 05:09:59,747 INFO [inference.py:310] Audio range: [-0.274, 0.264]
+2026-01-26 05:09:59,748 INFO [inference.py:311] Audio lengths: tensor([64160, 61760, 61759, 61760, 59520, 58720, 57280, 55840, 55520, 54720,
+        51520, 50880, 50880, 50720, 49600, 49440, 49280, 47839, 46719, 46399,
+        45279, 43999, 43520, 42240], dtype=torch.int32)
+2026-01-26 05:10:08,023 INFO [inference.py:332] Encoder out shape: torch.Size([24, 200, 1024])
+2026-01-26 05:10:08,024 INFO [inference.py:333] Encoder out lens: tensor([200, 192, 192, 192, 185, 183, 178, 174, 173, 170, 160, 158, 158, 158,
+        154, 154, 153, 149, 145, 144, 141, 137, 135, 131])
+2026-01-26 05:10:08,024 INFO [inference.py:334] Encoder out range: [-13.370, 11.318]
+2026-01-26 05:10:08,807 INFO [inference.py:344] Number of hypotheses: 24
+2026-01-26 05:10:08,807 INFO [inference.py:346] First hypothesis: [61, 49, 39, 17, 7, 5, 64, 9, 115, 16, 34, 57, 7, 5, 206, 221, 16]
+2026-01-26 05:10:08,814 INFO [inference.py:309] Audio shape: torch.Size([5, 287520]), dtype: torch.float32
+2026-01-26 05:10:08,814 INFO [inference.py:310] Audio range: [-0.099, 0.090]
+2026-01-26 05:10:08,815 INFO [inference.py:311] Audio lengths: tensor([287520, 283360, 264959, 261760, 259360], dtype=torch.int32)
+2026-01-26 05:10:16,810 INFO [inference.py:332] Encoder out shape: torch.Size([5, 898, 1024])
+2026-01-26 05:10:16,811 INFO [inference.py:333] Encoder out lens: tensor([898, 885, 827, 817, 810])
+2026-01-26 05:10:16,811 INFO [inference.py:334] Encoder out range: [-13.209, 12.373]
+2026-01-26 05:10:17,639 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:10:17,640 INFO [inference.py:346] First hypothesis: [231, 178, 51, 144, 193, 46, 13, 38, 86, 120, 194, 16, 95, 13, 104, 19, 36, 26, 87, 20, 193]
+2026-01-26 05:10:17,648 INFO [inference.py:309] Audio shape: torch.Size([5, 310720]), dtype: torch.float32
+2026-01-26 05:10:17,649 INFO [inference.py:310] Audio range: [-0.046, 0.111]
+2026-01-26 05:10:17,650 INFO [inference.py:311] Audio lengths: tensor([310719, 308639, 298560, 294880, 293759], dtype=torch.int32)
+2026-01-26 05:10:26,341 INFO [inference.py:332] Encoder out shape: torch.Size([5, 970, 1024])
+2026-01-26 05:10:26,342 INFO [inference.py:333] Encoder out lens: tensor([970, 964, 932, 921, 917])
+2026-01-26 05:10:26,342 INFO [inference.py:334] Encoder out range: [-13.787, 12.644]
+2026-01-26 05:10:26,950 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:10:26,951 INFO [inference.py:346] First hypothesis: [17, 7, 5, 46, 48, 66, 48, 84, 51, 66]
+2026-01-26 05:10:26,958 INFO [inference.py:309] Audio shape: torch.Size([38, 41600]), dtype: torch.float32
+2026-01-26 05:10:26,959 INFO [inference.py:310] Audio range: [-0.528, 0.544]
+2026-01-26 05:10:26,960 INFO [inference.py:311] Audio lengths: tensor([41599, 39200, 37119, 36799, 34400, 34079, 33439, 32960, 31200, 31200,
+        26400, 25600, 24000, 22560, 22080, 21919, 21920, 21280, 20799, 19360,
+        18880, 18880, 17600, 17440, 15200, 13760, 12640, 11360,  5760,  5280,
+         5120,  4640,  4320,  3840,  3680,  3360,  3360,  3200],
+       dtype=torch.int32)
+2026-01-26 05:10:34,801 INFO [inference.py:332] Encoder out shape: torch.Size([38, 129, 1024])
+2026-01-26 05:10:34,802 INFO [inference.py:333] Encoder out lens: tensor([129, 122, 115, 114, 107, 106, 104, 102,  97,  97,  82,  79,  74,  70,
+         68,  68,  68,  66,  64,  60,  58,  58,  54,  54,  47,  42,  39,  35,
+         17,  16,  15,  14,  13,  11,  11,  10,  10,   9])
+2026-01-26 05:10:34,802 INFO [inference.py:334] Encoder out range: [-12.505, 11.696]
+2026-01-26 05:10:35,305 INFO [inference.py:344] Number of hypotheses: 38
+2026-01-26 05:10:35,305 INFO [inference.py:346] First hypothesis: []
+2026-01-26 05:10:35,312 INFO [inference.py:309] Audio shape: torch.Size([11, 143680]), dtype: torch.float32
+2026-01-26 05:10:35,312 INFO [inference.py:310] Audio range: [-0.331, 0.228]
+2026-01-26 05:10:35,313 INFO [inference.py:311] Audio lengths: tensor([143680, 143360, 143200, 137439, 130559, 129279, 128960, 125280, 125280,
+        123040, 118079], dtype=torch.int32)
+2026-01-26 05:10:44,633 INFO [inference.py:332] Encoder out shape: torch.Size([11, 448, 1024])
+2026-01-26 05:10:44,633 INFO [inference.py:333] Encoder out lens: tensor([448, 447, 447, 429, 407, 403, 402, 391, 391, 384, 368])
+2026-01-26 05:10:44,634 INFO [inference.py:334] Encoder out range: [-12.574, 13.090]
+2026-01-26 05:10:45,323 INFO [inference.py:344] Number of hypotheses: 11
+2026-01-26 05:10:45,323 INFO [inference.py:346] First hypothesis: [11, 37, 9, 102, 18, 230, 95, 6, 24, 64, 16, 163, 73, 6, 165, 5, 17, 171, 15, 267, 153, 5]
+2026-01-26 05:10:45,330 INFO [inference.py:309] Audio shape: torch.Size([5, 283680]), dtype: torch.float32
+2026-01-26 05:10:45,331 INFO [inference.py:310] Audio range: [-0.096, 0.119]
+2026-01-26 05:10:45,332 INFO [inference.py:311] Audio lengths: tensor([283680, 281119, 271360, 262560, 252479], dtype=torch.int32)
+2026-01-26 05:10:53,624 INFO [inference.py:332] Encoder out shape: torch.Size([5, 886, 1024])
+2026-01-26 05:10:53,625 INFO [inference.py:333] Encoder out lens: tensor([886, 878, 847, 820, 788])
+2026-01-26 05:10:53,625 INFO [inference.py:334] Encoder out range: [-12.921, 13.557]
+2026-01-26 05:10:54,461 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:10:54,462 INFO [inference.py:346] First hypothesis: [112, 81, 230, 38, 105, 34, 16, 34, 200, 294, 171, 15, 58, 134, 29, 16, 122, 25, 6, 25, 6, 16, 29, 119, 5, 25, 6, 205, 5, 17, 220]
+2026-01-26 05:10:54,467 INFO [inference.py:309] Audio shape: torch.Size([23, 68800]), dtype: torch.float32
+2026-01-26 05:10:54,468 INFO [inference.py:310] Audio range: [-0.356, 0.274]
+2026-01-26 05:10:54,468 INFO [inference.py:311] Audio lengths: tensor([68800, 65600, 64800, 64480, 62400, 58079, 57119, 56159, 54560, 53920,
+        53920, 51840, 51520, 49280, 49280, 47519, 46240, 45280, 44960, 44960,
+        44480, 43680, 42560], dtype=torch.int32)
+2026-01-26 05:11:03,138 INFO [inference.py:332] Encoder out shape: torch.Size([23, 214, 1024])
+2026-01-26 05:11:03,139 INFO [inference.py:333] Encoder out lens: tensor([214, 204, 202, 201, 194, 181, 178, 175, 170, 168, 168, 161, 160, 153,
+        153, 148, 144, 141, 140, 140, 138, 136, 132])
+2026-01-26 05:11:03,139 INFO [inference.py:334] Encoder out range: [-13.047, 12.227]
+2026-01-26 05:11:04,047 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:11:04,048 INFO [inference.py:346] First hypothesis: [57, 33, 193, 16, 48, 66, 33, 13, 74, 19, 201]
+2026-01-26 05:11:04,053 INFO [inference.py:309] Audio shape: torch.Size([11, 139520]), dtype: torch.float32
+2026-01-26 05:11:04,054 INFO [inference.py:310] Audio range: [-0.117, 0.153]
+2026-01-26 05:11:04,054 INFO [inference.py:311] Audio lengths: tensor([139520, 139200, 138880, 138079, 137440, 134720, 128320, 124000, 121600,
+        120160, 118240], dtype=torch.int32)
+2026-01-26 05:11:12,432 INFO [inference.py:332] Encoder out shape: torch.Size([11, 435, 1024])
+2026-01-26 05:11:12,433 INFO [inference.py:333] Encoder out lens: tensor([435, 434, 433, 431, 429, 420, 400, 387, 379, 375, 369])
+2026-01-26 05:11:12,433 INFO [inference.py:334] Encoder out range: [-13.984, 12.798]
+2026-01-26 05:11:13,304 INFO [inference.py:344] Number of hypotheses: 11
+2026-01-26 05:11:13,304 INFO [inference.py:346] First hypothesis: [105, 206, 66, 5, 18, 47, 236, 49]
+2026-01-26 05:11:13,310 INFO [inference.py:309] Audio shape: torch.Size([6, 248640]), dtype: torch.float32
+2026-01-26 05:11:13,311 INFO [inference.py:310] Audio range: [-0.155, 0.171]
+2026-01-26 05:11:13,312 INFO [inference.py:311] Audio lengths: tensor([248639, 231359, 228480, 225440, 223360, 212800], dtype=torch.int32)
+2026-01-26 05:11:22,125 INFO [inference.py:332] Encoder out shape: torch.Size([6, 776, 1024])
+2026-01-26 05:11:22,126 INFO [inference.py:333] Encoder out lens: tensor([776, 722, 713, 704, 697, 664])
+2026-01-26 05:11:22,126 INFO [inference.py:334] Encoder out range: [-13.353, 11.759]
+2026-01-26 05:11:22,718 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:11:22,719 INFO [inference.py:346] First hypothesis: [39, 93, 25, 6, 24, 6, 130, 117, 66, 100, 87, 7, 5, 110, 17, 172, 51, 9, 51, 49, 26, 87, 23, 6, 66, 33, 6, 91, 18, 7, 27, 46, 333, 130, 33, 357, 136, 21, 24]
+2026-01-26 05:11:22,719 INFO [inference.py:535] Processed 684 utterances in 40 batches
+2026-01-26 05:11:22,724 INFO [inference.py:309] Audio shape: torch.Size([9, 173600]), dtype: torch.float32
+2026-01-26 05:11:22,725 INFO [inference.py:310] Audio range: [-0.501, 0.295]
+2026-01-26 05:11:22,725 INFO [inference.py:311] Audio lengths: tensor([173599, 168319, 161760, 161760, 159040, 158719, 156800, 148000, 147359],
+       dtype=torch.int32)
+2026-01-26 05:11:31,816 INFO [inference.py:332] Encoder out shape: torch.Size([9, 542, 1024])
+2026-01-26 05:11:31,816 INFO [inference.py:333] Encoder out lens: tensor([542, 525, 505, 505, 496, 495, 489, 462, 460])
+2026-01-26 05:11:31,817 INFO [inference.py:334] Encoder out range: [-11.960, 12.653]
+2026-01-26 05:11:32,422 INFO [inference.py:344] Number of hypotheses: 9
+2026-01-26 05:11:32,422 INFO [inference.py:346] First hypothesis: [11, 159, 110, 17, 50, 6, 51, 195, 25, 34, 13, 260, 6, 395, 195, 26, 17, 18, 118, 53, 86, 56]
+2026-01-26 05:11:32,428 INFO [inference.py:309] Audio shape: torch.Size([11, 144640]), dtype: torch.float32
+2026-01-26 05:11:32,429 INFO [inference.py:310] Audio range: [-0.332, 0.358]
+2026-01-26 05:11:32,429 INFO [inference.py:311] Audio lengths: tensor([144639, 143520, 140159, 139840, 133760, 128159, 128159, 124000, 119680,
+        119200, 119040], dtype=torch.int32)
+2026-01-26 05:11:41,731 INFO [inference.py:332] Encoder out shape: torch.Size([11, 451, 1024])
+2026-01-26 05:11:41,731 INFO [inference.py:333] Encoder out lens: tensor([451, 448, 437, 436, 417, 400, 400, 387, 373, 372, 371])
+2026-01-26 05:11:41,732 INFO [inference.py:334] Encoder out range: [-13.569, 12.367]
+2026-01-26 05:11:42,844 INFO [inference.py:344] Number of hypotheses: 11
+2026-01-26 05:11:42,844 INFO [inference.py:346] First hypothesis: [51, 419, 26, 15, 72, 113, 6, 91, 33]
+2026-01-26 05:11:42,850 INFO [inference.py:309] Audio shape: torch.Size([13, 116640]), dtype: torch.float32
+2026-01-26 05:11:42,850 INFO [inference.py:310] Audio range: [-0.268, 0.323]
+2026-01-26 05:11:42,851 INFO [inference.py:311] Audio lengths: tensor([116640, 116000, 112799, 110240, 104319, 101919, 100799, 100800,  98400,
+         96480,  95039,  93920,  93600], dtype=torch.int32)
+2026-01-26 05:11:50,812 INFO [inference.py:332] Encoder out shape: torch.Size([13, 364, 1024])
+2026-01-26 05:11:50,813 INFO [inference.py:333] Encoder out lens: tensor([364, 362, 352, 344, 325, 318, 314, 314, 307, 301, 296, 293, 292])
+2026-01-26 05:11:50,813 INFO [inference.py:334] Encoder out range: [-11.927, 13.414]
+2026-01-26 05:11:51,360 INFO [inference.py:344] Number of hypotheses: 13
+2026-01-26 05:11:51,360 INFO [inference.py:346] First hypothesis: [111, 114, 157, 57, 152, 25, 130, 101, 210, 96]
+2026-01-26 05:11:51,366 INFO [inference.py:309] Audio shape: torch.Size([13, 114560]), dtype: torch.float32
+2026-01-26 05:11:51,367 INFO [inference.py:310] Audio range: [-0.431, 0.430]
+2026-01-26 05:11:51,368 INFO [inference.py:311] Audio lengths: tensor([114559, 111359, 110240, 108639, 107840, 103519, 102240, 101759, 101120,
+        100639,  98560,  97760,  97759], dtype=torch.int32)
+2026-01-26 05:11:59,310 INFO [inference.py:332] Encoder out shape: torch.Size([13, 357, 1024])
+2026-01-26 05:11:59,311 INFO [inference.py:333] Encoder out lens: tensor([357, 347, 344, 339, 336, 323, 319, 317, 315, 314, 307, 305, 305])
+2026-01-26 05:11:59,311 INFO [inference.py:334] Encoder out range: [-11.355, 12.943]
+2026-01-26 05:11:59,902 INFO [inference.py:344] Number of hypotheses: 13
+2026-01-26 05:11:59,902 INFO [inference.py:346] First hypothesis: [264, 48]
+2026-01-26 05:11:59,908 INFO [inference.py:309] Audio shape: torch.Size([6, 243200]), dtype: torch.float32
+2026-01-26 05:11:59,909 INFO [inference.py:310] Audio range: [-0.339, 0.341]
+2026-01-26 05:11:59,909 INFO [inference.py:311] Audio lengths: tensor([243200, 242079, 241760, 237920, 231679, 212799], dtype=torch.int32)
+2026-01-26 05:12:08,215 INFO [inference.py:332] Encoder out shape: torch.Size([6, 759, 1024])
+2026-01-26 05:12:08,215 INFO [inference.py:333] Encoder out lens: tensor([759, 756, 755, 743, 723, 664])
+2026-01-26 05:12:08,216 INFO [inference.py:334] Encoder out range: [-13.935, 11.852]
+2026-01-26 05:12:08,701 INFO [inference.py:344] Number of hypotheses: 6
+2026-01-26 05:12:08,701 INFO [inference.py:346] First hypothesis: [66, 89, 174, 20]
+2026-01-26 05:12:08,707 INFO [inference.py:309] Audio shape: torch.Size([5, 275520]), dtype: torch.float32
+2026-01-26 05:12:08,708 INFO [inference.py:310] Audio range: [-0.126, 0.148]
+2026-01-26 05:12:08,709 INFO [inference.py:311] Audio lengths: tensor([275520, 274880, 274880, 263999, 254879], dtype=torch.int32)
+2026-01-26 05:12:16,831 INFO [inference.py:332] Encoder out shape: torch.Size([5, 860, 1024])
+2026-01-26 05:12:16,831 INFO [inference.py:333] Encoder out lens: tensor([860, 858, 858, 824, 796])
+2026-01-26 05:12:16,832 INFO [inference.py:334] Encoder out range: [-12.819, 13.634]
+2026-01-26 05:12:17,414 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:12:17,414 INFO [inference.py:346] First hypothesis: [11, 39, 51, 49, 46, 171, 81, 7, 69]
+2026-01-26 05:12:17,420 INFO [inference.py:309] Audio shape: torch.Size([38, 41920]), dtype: torch.float32
+2026-01-26 05:12:17,421 INFO [inference.py:310] Audio range: [-0.380, 0.393]
+2026-01-26 05:12:17,422 INFO [inference.py:311] Audio lengths: tensor([41919, 41760, 41599, 38560, 38080, 37440, 34400, 33600, 32159, 29120,
+        27200, 26560, 25600, 24800, 23680, 23520, 23360, 19680, 18880, 16160,
+        15360, 15200, 14880, 13600, 13440, 10080,  7840,  6720,  6400,  6080,
+         6080,  5600,  5440,  5120,  4640,  4000,  3840,  3520],
+       dtype=torch.int32)
+2026-01-26 05:12:25,625 INFO [inference.py:332] Encoder out shape: torch.Size([38, 130, 1024])
+2026-01-26 05:12:25,626 INFO [inference.py:333] Encoder out lens: tensor([130, 130, 129, 120, 118, 116, 107, 104, 100,  90,  84,  82,  79,  77,
+         73,  73,  72,  61,  58,  50,  47,  47,  46,  42,  41,  31,  24,  20,
+         19,  18,  18,  17,  16,  15,  14,  12,  11,  10])
+2026-01-26 05:12:25,626 INFO [inference.py:334] Encoder out range: [-12.608, 11.500]
+2026-01-26 05:12:26,111 INFO [inference.py:344] Number of hypotheses: 38
+2026-01-26 05:12:26,111 INFO [inference.py:346] First hypothesis: [11]
+2026-01-26 05:12:26,117 INFO [inference.py:309] Audio shape: torch.Size([5, 289760]), dtype: torch.float32
+2026-01-26 05:12:26,117 INFO [inference.py:310] Audio range: [-0.259, 0.249]
+2026-01-26 05:12:26,118 INFO [inference.py:311] Audio lengths: tensor([289760, 283039, 277760, 261599, 250080], dtype=torch.int32)
+2026-01-26 05:12:34,901 INFO [inference.py:332] Encoder out shape: torch.Size([5, 905, 1024])
+2026-01-26 05:12:34,902 INFO [inference.py:333] Encoder out lens: tensor([905, 884, 867, 817, 781])
+2026-01-26 05:12:34,903 INFO [inference.py:334] Encoder out range: [-12.988, 13.561]
+2026-01-26 05:12:35,738 INFO [inference.py:344] Number of hypotheses: 5
+2026-01-26 05:12:35,738 INFO [inference.py:346] First hypothesis: [6, 290, 20, 48, 33, 238, 205, 37, 48, 265, 274]
+2026-01-26 05:12:35,744 INFO [inference.py:309] Audio shape: torch.Size([23, 68640]), dtype: torch.float32
+2026-01-26 05:12:35,745 INFO [inference.py:310] Audio range: [-0.114, 0.158]
+2026-01-26 05:12:35,745 INFO [inference.py:311] Audio lengths: tensor([68640, 67680, 66719, 66080, 65759, 65600, 64159, 64159, 61119, 60000,
+        56800, 56639, 53760, 53440, 52640, 52479, 50720, 50400, 49760, 46880,
+        46080, 45280, 45120], dtype=torch.int32)
+2026-01-26 05:12:44,120 INFO [inference.py:332] Encoder out shape: torch.Size([23, 214, 1024])
+2026-01-26 05:12:44,120 INFO [inference.py:333] Encoder out lens: tensor([214, 211, 208, 206, 205, 204, 200, 200, 190, 187, 177, 176, 167, 166,
+        164, 163, 158, 157, 155, 146, 143, 141, 140])
+2026-01-26 05:12:44,121 INFO [inference.py:334] Encoder out range: [-13.289, 13.747]
+2026-01-26 05:12:44,824 INFO [inference.py:344] Number of hypotheses: 23
+2026-01-26 05:12:44,824 INFO [inference.py:346] First hypothesis: [89, 186, 32, 7, 8, 234, 13]
+2026-01-26 05:12:44,831 INFO [inference.py:309] Audio shape: torch.Size([40, 39520]), dtype: torch.float32
+2026-01-26 05:12:44,832 INFO [inference.py:310] Audio range: [-0.170, 0.217]
+2026-01-26 05:12:44,832 INFO [inference.py:311] Audio lengths: tensor([39520, 38720, 37760, 36800, 36320, 36159, 34720, 33919, 32640, 31200,
+        29760, 28479, 27840, 27840, 24320, 23040, 21120, 20639, 17920, 16800,
+        16160, 15840, 14720, 14560, 14560, 14400, 13760, 11520,  9920,  9919,
+         9760,  9120,  7840,  7360,  7040,  5440,  4960,  4960,  4800,  4320],
+       dtype=torch.int32)
+2026-01-26 05:12:52,729 INFO [inference.py:332] Encoder out shape: torch.Size([40, 123, 1024])
+2026-01-26 05:12:52,730 INFO [inference.py:333] Encoder out lens: tensor([123, 120, 117, 114, 113, 112, 108, 105, 101,  97,  92,  88,  86,  86,
+         75,  71,  65,  64,  55,  52,  50,  49,  45,  45,  45,  44,  42,  35,
+         30,  30,  30,  28,  24,  22,  21,  16,  15,  15,  14,  13])
+2026-01-26 05:12:52,730 INFO [inference.py:334] Encoder out range: [-11.403, 12.142]
+2026-01-26 05:12:53,318 INFO [inference.py:344] Number of hypotheses: 40
+2026-01-26 05:12:53,319 INFO [inference.py:346] First hypothesis: [89]
+2026-01-26 05:12:53,319 INFO [inference.py:535] Processed 847 utterances in 50 batches
+2026-01-26 05:12:53,326 INFO [inference.py:309] Audio shape: torch.Size([9, 176320]), dtype: torch.float32
+2026-01-26 05:12:53,326 INFO [inference.py:310] Audio range: [-0.145, 0.173]
+2026-01-26 05:12:53,327 INFO [inference.py:311] Audio lengths: tensor([176320, 174879, 170880, 161280, 161120, 158880, 155039, 153760, 146079],
+       dtype=torch.int32)

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-13-05 ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-01-26 05:13:05,965 INFO [inference.py:617] ================================================================================
+2026-01-26 05:13:05,965 INFO [inference.py:618] XLSR-Transducer Inference on AMI
+2026-01-26 05:13:05,965 INFO [inference.py:619] ================================================================================
+2026-01-26 05:13:05,965 INFO [inference.py:620] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:13:05,965 INFO [inference.py:621] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:13:05,965 INFO [inference.py:622] Test set: ihm
+2026-01-26 05:13:05,965 INFO [inference.py:623] Decoding method: modified_beam_search
+2026-01-26 05:13:05,966 INFO [inference.py:625] Beam size: 4
+2026-01-26 05:13:05,966 INFO [inference.py:626] Max states: 64
+2026-01-26 05:13:05,966 INFO [inference.py:627] Max symbols per frame: 3
+2026-01-26 05:13:05,966 INFO [inference.py:633] Device: cpu
+2026-01-26 05:13:05,966 INFO [inference.py:636] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:13:05,967 INFO [inference.py:644] Vocabulary size: 500
+2026-01-26 05:13:05,967 INFO [inference.py:645] Blank ID: 0
+2026-01-26 05:13:05,967 INFO [inference.py:648] Creating model
+2026-01-26 05:13:07,626 INFO [inference.py:655] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:13:07,626 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:13:12,816 INFO [inference.py:684] Number of model parameters: 317,511,772
+2026-01-26 05:13:12,816 INFO [inference.py:687] Loading test data
+2026-01-26 05:13:12,816 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:13:13,985 INFO [inference.py:698] Number of test utterances: 6676
+2026-01-26 05:13:13,986 INFO [inference.py:701] Starting inference...

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-14-59 ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-01-26 05:14:59,640 INFO [inference.py:625] ================================================================================
+2026-01-26 05:14:59,640 INFO [inference.py:626] XLSR-Transducer Inference on AMI
+2026-01-26 05:14:59,640 INFO [inference.py:627] ================================================================================
+2026-01-26 05:14:59,640 INFO [inference.py:628] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:14:59,640 INFO [inference.py:629] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:14:59,640 INFO [inference.py:630] Test set: ihm
+2026-01-26 05:14:59,640 INFO [inference.py:631] Decoding method: modified_beam_search
+2026-01-26 05:14:59,640 INFO [inference.py:633] Beam size: 4
+2026-01-26 05:14:59,640 INFO [inference.py:634] Max states: 64
+2026-01-26 05:14:59,640 INFO [inference.py:635] Max symbols per frame: 3
+2026-01-26 05:14:59,640 INFO [inference.py:641] Device: cpu
+2026-01-26 05:14:59,640 INFO [inference.py:644] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:14:59,642 INFO [inference.py:652] Vocabulary size: 500
+2026-01-26 05:14:59,642 INFO [inference.py:653] Blank ID: 0
+2026-01-26 05:14:59,642 INFO [inference.py:656] Creating model
+2026-01-26 05:15:01,252 INFO [inference.py:663] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:15:01,252 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:15:06,040 INFO [inference.py:692] Number of model parameters: 317,511,772
+2026-01-26 05:15:06,040 INFO [inference.py:695] Loading test data
+2026-01-26 05:15:06,040 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:15:07,133 INFO [inference.py:706] Number of test utterances: 6676
+2026-01-26 05:15:07,133 INFO [inference.py:709] Starting inference...

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-17-40 ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-01-26 05:17:40,782 INFO [inference.py:622] ================================================================================
+2026-01-26 05:17:40,782 INFO [inference.py:623] XLSR-Transducer Inference on AMI
+2026-01-26 05:17:40,782 INFO [inference.py:624] ================================================================================
+2026-01-26 05:17:40,782 INFO [inference.py:625] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:17:40,782 INFO [inference.py:626] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:17:40,783 INFO [inference.py:627] Test set: ihm
+2026-01-26 05:17:40,783 INFO [inference.py:628] Decoding method: modified_beam_search
+2026-01-26 05:17:40,783 INFO [inference.py:630] Beam size: 4
+2026-01-26 05:17:40,783 INFO [inference.py:631] Max states: 64
+2026-01-26 05:17:40,783 INFO [inference.py:632] Max symbols per frame: 3
+2026-01-26 05:17:40,783 INFO [inference.py:638] Device: cpu
+2026-01-26 05:17:40,783 INFO [inference.py:641] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:17:40,784 INFO [inference.py:649] Vocabulary size: 500
+2026-01-26 05:17:40,784 INFO [inference.py:650] Blank ID: 0
+2026-01-26 05:17:40,785 INFO [inference.py:653] Creating model
+2026-01-26 05:17:42,399 INFO [inference.py:660] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:17:42,400 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:17:47,415 INFO [inference.py:689] Number of model parameters: 317,511,772
+2026-01-26 05:17:47,416 INFO [inference.py:692] Loading test data
+2026-01-26 05:17:47,416 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:17:48,537 INFO [inference.py:703] Number of test utterances: 6676
+2026-01-26 05:17:48,538 INFO [inference.py:706] Starting inference...

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-20-04 ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-01-26 05:20:04,436 INFO [inference.py:613] ================================================================================
+2026-01-26 05:20:04,436 INFO [inference.py:614] XLSR-Transducer Inference on AMI
+2026-01-26 05:20:04,436 INFO [inference.py:615] ================================================================================
+2026-01-26 05:20:04,436 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:20:04,436 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:20:04,436 INFO [inference.py:618] Test set: ihm
+2026-01-26 05:20:04,436 INFO [inference.py:619] Decoding method: modified_beam_search
+2026-01-26 05:20:04,436 INFO [inference.py:621] Beam size: 4
+2026-01-26 05:20:04,436 INFO [inference.py:622] Max states: 64
+2026-01-26 05:20:04,436 INFO [inference.py:623] Max symbols per frame: 3
+2026-01-26 05:20:04,437 INFO [inference.py:629] Device: cpu
+2026-01-26 05:20:04,437 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:20:04,438 INFO [inference.py:640] Vocabulary size: 500
+2026-01-26 05:20:04,438 INFO [inference.py:641] Blank ID: 0
+2026-01-26 05:20:04,438 INFO [inference.py:644] Creating model
+2026-01-26 05:20:05,956 INFO [inference.py:651] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:20:05,957 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
+2026-01-26 05:20:10,638 INFO [inference.py:680] Number of model parameters: 317,511,772
+2026-01-26 05:20:10,639 INFO [inference.py:683] Loading test data
+2026-01-26 05:20:10,639 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:20:11,677 INFO [inference.py:694] Number of test utterances: 6676
+2026-01-26 05:20:11,677 INFO [inference.py:697] Starting inference...

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-29-29 ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-01-26 05:29:29,151 INFO [inference.py:613] ================================================================================
+2026-01-26 05:29:29,151 INFO [inference.py:614] XLSR-Transducer Inference on AMI
+2026-01-26 05:29:29,151 INFO [inference.py:615] ================================================================================
+2026-01-26 05:29:29,151 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:29:29,151 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:29:29,151 INFO [inference.py:618] Test set: ihm
+2026-01-26 05:29:29,151 INFO [inference.py:619] Decoding method: modified_beam_search
+2026-01-26 05:29:29,151 INFO [inference.py:621] Beam size: 4
+2026-01-26 05:29:29,151 INFO [inference.py:622] Max states: 64
+2026-01-26 05:29:29,151 INFO [inference.py:623] Max symbols per frame: 3
+2026-01-26 05:29:29,151 INFO [inference.py:629] Device: cpu
+2026-01-26 05:29:29,151 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:29:29,153 INFO [inference.py:640] Vocabulary size: 500
+2026-01-26 05:29:29,153 INFO [inference.py:641] Blank ID: 0
+2026-01-26 05:29:29,153 INFO [inference.py:644] Creating model
+2026-01-26 05:29:30,733 INFO [inference.py:673] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:29:30,734 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:29:35,902 INFO [inference.py:680] Number of model parameters: 317,511,772
+2026-01-26 05:29:35,902 INFO [inference.py:683] Loading test data
+2026-01-26 05:29:35,902 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:29:37,022 INFO [inference.py:694] Number of test utterances: 6676
+2026-01-26 05:29:37,023 INFO [inference.py:697] Starting inference...

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-48-19 ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-01-26 05:48:19,123 INFO [inference.py:613] ================================================================================
+2026-01-26 05:48:19,123 INFO [inference.py:614] XLSR-Transducer Inference on AMI
+2026-01-26 05:48:19,123 INFO [inference.py:615] ================================================================================
+2026-01-26 05:48:19,123 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:48:19,123 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:48:19,123 INFO [inference.py:618] Test set: ihm
+2026-01-26 05:48:19,123 INFO [inference.py:619] Decoding method: modified_beam_search
+2026-01-26 05:48:19,123 INFO [inference.py:621] Beam size: 4
+2026-01-26 05:48:19,123 INFO [inference.py:622] Max states: 64
+2026-01-26 05:48:19,123 INFO [inference.py:623] Max symbols per frame: 3
+2026-01-26 05:48:19,123 INFO [inference.py:629] Device: cpu
+2026-01-26 05:48:19,123 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:48:19,125 INFO [inference.py:640] Vocabulary size: 500
+2026-01-26 05:48:19,125 INFO [inference.py:641] Blank ID: 0
+2026-01-26 05:48:19,125 INFO [inference.py:644] Creating model
+2026-01-26 05:48:22,516 INFO [inference.py:651] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:48:22,517 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:48:39,229 INFO [inference.py:680] Number of model parameters: 317,511,772
+2026-01-26 05:48:39,229 INFO [inference.py:683] Loading test data
+2026-01-26 05:48:39,229 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:48:41,915 INFO [inference.py:694] Number of test utterances: 6676
+2026-01-26 05:48:41,915 INFO [inference.py:697] Starting inference...

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-50-10 ADDED Viewed

	@@ -0,0 +1,22 @@

+2026-01-26 05:50:10,649 INFO [inference.py:613] ================================================================================
+2026-01-26 05:50:10,649 INFO [inference.py:614] XLSR-Transducer Inference on AMI
+2026-01-26 05:50:10,649 INFO [inference.py:615] ================================================================================
+2026-01-26 05:50:10,649 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:50:10,649 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:50:10,649 INFO [inference.py:618] Test set: ihm
+2026-01-26 05:50:10,649 INFO [inference.py:619] Decoding method: modified_beam_search
+2026-01-26 05:50:10,649 INFO [inference.py:621] Beam size: 4
+2026-01-26 05:50:10,649 INFO [inference.py:622] Max states: 64
+2026-01-26 05:50:10,649 INFO [inference.py:623] Max symbols per frame: 3
+2026-01-26 05:50:10,649 INFO [inference.py:629] Device: cuda:0
+2026-01-26 05:50:10,649 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:50:10,651 INFO [inference.py:640] Vocabulary size: 500
+2026-01-26 05:50:10,651 INFO [inference.py:641] Blank ID: 0
+2026-01-26 05:50:10,651 INFO [inference.py:644] Creating model
+2026-01-26 05:50:12,218 INFO [inference.py:651] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:50:12,219 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:50:18,117 INFO [inference.py:680] Number of model parameters: 317,511,772
+2026-01-26 05:50:18,118 INFO [inference.py:683] Loading test data
+2026-01-26 05:50:18,118 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:50:19,185 INFO [inference.py:694] Number of test utterances: 6676
+2026-01-26 05:50:19,186 INFO [inference.py:697] Starting inference...

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-54-32 ADDED Viewed

	@@ -0,0 +1,28 @@

+2026-01-26 05:54:32,577 INFO [inference.py:613] ================================================================================
+2026-01-26 05:54:32,577 INFO [inference.py:614] XLSR-Transducer Inference on AMI
+2026-01-26 05:54:32,577 INFO [inference.py:615] ================================================================================
+2026-01-26 05:54:32,578 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 05:54:32,578 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 05:54:32,578 INFO [inference.py:618] Test set: ihm
+2026-01-26 05:54:32,578 INFO [inference.py:619] Decoding method: modified_beam_search
+2026-01-26 05:54:32,578 INFO [inference.py:621] Beam size: 4
+2026-01-26 05:54:32,578 INFO [inference.py:622] Max states: 64
+2026-01-26 05:54:32,578 INFO [inference.py:623] Max symbols per frame: 3
+2026-01-26 05:54:32,578 INFO [inference.py:627] Device: cuda:0
+2026-01-26 05:54:32,578 INFO [inference.py:630] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 05:54:32,579 INFO [inference.py:638] Vocabulary size: 500
+2026-01-26 05:54:32,580 INFO [inference.py:639] Blank ID: 0
+2026-01-26 05:54:32,580 INFO [inference.py:642] Creating model
+2026-01-26 05:54:34,158 INFO [inference.py:649] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:54:34,158 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 05:54:40,547 INFO [inference.py:678] Number of model parameters: 317,511,772
+2026-01-26 05:54:40,547 INFO [inference.py:681] Loading test data
+2026-01-26 05:54:40,548 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 05:54:41,686 INFO [inference.py:692] Number of test utterances: 6676
+2026-01-26 05:54:41,686 INFO [inference.py:695] Starting inference...
+2026-01-26 05:54:41,686 INFO [inference.py:696] Note: First batch may take longer due to GPU warmup
+2026-01-26 05:54:42,879 INFO [inference.py:711]
+============================================================
+2026-01-26 05:54:42,879 INFO [inference.py:712] Processing batch 1
+2026-01-26 05:54:42,879 INFO [inference.py:718] Batch size: 6
+2026-01-26 05:54:42,879 INFO [inference.py:736] Starting decoding for this batch...

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-02-34 ADDED Viewed

	@@ -0,0 +1,77 @@

+2026-01-26 06:02:34,679 INFO [inference.py:630] ================================================================================
+2026-01-26 06:02:34,679 INFO [inference.py:631] XLSR-Transducer Inference on AMI
+2026-01-26 06:02:34,679 INFO [inference.py:632] ================================================================================
+2026-01-26 06:02:34,679 INFO [inference.py:633] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 06:02:34,679 INFO [inference.py:634] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 06:02:34,679 INFO [inference.py:635] Test set: ihm
+2026-01-26 06:02:34,679 INFO [inference.py:636] Decoding method: modified_beam_search
+2026-01-26 06:02:34,679 INFO [inference.py:638] Beam size: 4
+2026-01-26 06:02:34,679 INFO [inference.py:639] Max states: 64
+2026-01-26 06:02:34,680 INFO [inference.py:640] Max symbols per frame: 3
+2026-01-26 06:02:34,680 INFO [inference.py:644] Device: cuda:0
+2026-01-26 06:02:34,680 INFO [inference.py:647] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 06:02:34,681 INFO [inference.py:655] Vocabulary size: 500
+2026-01-26 06:02:34,681 INFO [inference.py:656] Blank ID: 0
+2026-01-26 06:02:34,681 INFO [inference.py:659] Creating model
+2026-01-26 06:02:36,292 INFO [inference.py:666] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 06:02:36,293 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 06:02:42,168 INFO [inference.py:695] Number of model parameters: 317,511,772
+2026-01-26 06:02:42,168 INFO [inference.py:698] Loading test data
+2026-01-26 06:02:42,168 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 06:02:43,277 INFO [inference.py:709] Number of test utterances: 6676
+2026-01-26 06:02:43,278 INFO [inference.py:712] Starting inference...
+2026-01-26 06:02:43,278 INFO [inference.py:713] Note: First batch may take longer due to GPU warmup
+2026-01-26 06:02:44,374 INFO [inference.py:728]
+============================================================
+2026-01-26 06:02:44,375 INFO [inference.py:729] Processing batch 1
+2026-01-26 06:02:44,375 INFO [inference.py:735] Batch size: 6
+2026-01-26 06:02:44,375 INFO [inference.py:753] Starting decoding for this batch...
+2026-01-26 06:02:45,363 INFO [inference.py:299] Beam search: Processing 6 utterances
+2026-01-26 06:02:45,363 INFO [inference.py:305]   Utterance 1/6: 769 frames
+2026-01-26 06:02:45,363 INFO [inference.py:312]     Frame 0/769, |B|=1
+2026-01-26 06:02:45,503 INFO [inference.py:362]     After initial expansion: |A|=5
+2026-01-26 06:02:45,503 INFO [inference.py:371]     Emission iteration 0, |A|=5
+2026-01-26 06:02:45,506 INFO [inference.py:371]     Emission iteration 1, |A|=15
+2026-01-26 06:02:45,513 INFO [inference.py:371]     Emission iteration 2, |A|=45
+2026-01-26 06:02:56,541 INFO [inference.py:312]     Frame 100/769, |B|=64
+2026-01-26 06:02:56,567 INFO [inference.py:362]     After initial expansion: |A|=320
+2026-01-26 06:02:56,567 INFO [inference.py:371]     Emission iteration 0, |A|=320
+2026-01-26 06:02:56,594 INFO [inference.py:371]     Emission iteration 1, |A|=64
+2026-01-26 06:02:56,620 INFO [inference.py:371]     Emission iteration 2, |A|=64
+2026-01-26 06:03:07,191 INFO [inference.py:312]     Frame 200/769, |B|=64
+2026-01-26 06:03:07,216 INFO [inference.py:362]     After initial expansion: |A|=320
+2026-01-26 06:03:07,217 INFO [inference.py:371]     Emission iteration 0, |A|=320
+2026-01-26 06:03:07,243 INFO [inference.py:371]     Emission iteration 1, |A|=64
+2026-01-26 06:03:07,270 INFO [inference.py:371]     Emission iteration 2, |A|=64
+2026-01-26 06:03:17,826 INFO [inference.py:312]     Frame 300/769, |B|=64
+2026-01-26 06:03:17,851 INFO [inference.py:362]     After initial expansion: |A|=320
+2026-01-26 06:03:17,851 INFO [inference.py:371]     Emission iteration 0, |A|=320
+2026-01-26 06:03:17,878 INFO [inference.py:371]     Emission iteration 1, |A|=64
+2026-01-26 06:03:17,904 INFO [inference.py:371]     Emission iteration 2, |A|=64
+2026-01-26 06:03:28,408 INFO [inference.py:312]     Frame 400/769, |B|=64
+2026-01-26 06:03:28,434 INFO [inference.py:362]     After initial expansion: |A|=320
+2026-01-26 06:03:28,434 INFO [inference.py:371]     Emission iteration 0, |A|=320
+2026-01-26 06:03:28,460 INFO [inference.py:371]     Emission iteration 1, |A|=64
+2026-01-26 06:03:28,487 INFO [inference.py:371]     Emission iteration 2, |A|=64
+2026-01-26 06:03:39,030 INFO [inference.py:312]     Frame 500/769, |B|=64
+2026-01-26 06:03:39,060 INFO [inference.py:362]     After initial expansion: |A|=320
+2026-01-26 06:03:39,060 INFO [inference.py:371]     Emission iteration 0, |A|=320
+2026-01-26 06:03:39,094 INFO [inference.py:371]     Emission iteration 1, |A|=64
+2026-01-26 06:03:39,125 INFO [inference.py:371]     Emission iteration 2, |A|=64
+2026-01-26 06:03:49,620 INFO [inference.py:312]     Frame 600/769, |B|=64
+2026-01-26 06:03:49,646 INFO [inference.py:362]     After initial expansion: |A|=320
+2026-01-26 06:03:49,646 INFO [inference.py:371]     Emission iteration 0, |A|=320
+2026-01-26 06:03:49,673 INFO [inference.py:371]     Emission iteration 1, |A|=64
+2026-01-26 06:03:49,699 INFO [inference.py:371]     Emission iteration 2, |A|=64
+2026-01-26 06:04:00,283 INFO [inference.py:312]     Frame 700/769, |B|=64
+2026-01-26 06:04:00,309 INFO [inference.py:362]     After initial expansion: |A|=320
+2026-01-26 06:04:00,309 INFO [inference.py:371]     Emission iteration 0, |A|=320
+2026-01-26 06:04:00,335 INFO [inference.py:371]     Emission iteration 1, |A|=64
+2026-01-26 06:04:00,362 INFO [inference.py:371]     Emission iteration 2, |A|=64
+2026-01-26 06:04:07,525 INFO [inference.py:455]   Utterance 1 result: 2 tokens
+2026-01-26 06:04:07,525 INFO [inference.py:305]   Utterance 2/6: 764 frames
+2026-01-26 06:04:07,525 INFO [inference.py:312]     Frame 0/764, |B|=1
+2026-01-26 06:04:07,526 INFO [inference.py:362]     After initial expansion: |A|=5
+2026-01-26 06:04:07,526 INFO [inference.py:371]     Emission iteration 0, |A|=5
+2026-01-26 06:04:07,528 INFO [inference.py:371]     Emission iteration 1, |A|=15
+2026-01-26 06:04:07,534 INFO [inference.py:371]     Emission iteration 2, |A|=45

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-04-30 ADDED Viewed

	@@ -0,0 +1,72 @@

+2026-01-26 06:04:30,814 INFO [inference.py:578] ================================================================================
+2026-01-26 06:04:30,815 INFO [inference.py:579] XLSR-Transducer Inference on AMI
+2026-01-26 06:04:30,815 INFO [inference.py:580] ================================================================================
+2026-01-26 06:04:30,815 INFO [inference.py:581] Experiment dir: xlsr_transducer/exp_16gb_scd
+2026-01-26 06:04:30,815 INFO [inference.py:582] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
+2026-01-26 06:04:30,815 INFO [inference.py:583] Test set: ihm
+2026-01-26 06:04:30,815 INFO [inference.py:584] Decoding method: modified_beam_search
+2026-01-26 06:04:30,815 INFO [inference.py:586] Beam size: 4
+2026-01-26 06:04:30,815 INFO [inference.py:587] Max states: 64
+2026-01-26 06:04:30,815 INFO [inference.py:588] Max symbols per frame: 3
+2026-01-26 06:04:30,815 INFO [inference.py:592] Device: cuda:0
+2026-01-26 06:04:30,815 INFO [inference.py:595] Loading BPE model from data/lang_bpe_500_scd
+2026-01-26 06:04:30,817 INFO [inference.py:603] Vocabulary size: 500
+2026-01-26 06:04:30,817 INFO [inference.py:604] Blank ID: 0
+2026-01-26 06:04:30,817 INFO [inference.py:607] Creating model
+2026-01-26 06:04:32,424 INFO [inference.py:614] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 06:04:32,424 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
+2026-01-26 06:04:38,254 INFO [inference.py:643] Number of model parameters: 317,511,772
+2026-01-26 06:04:38,254 INFO [inference.py:646] Loading test data
+2026-01-26 06:04:38,254 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
+2026-01-26 06:04:39,360 INFO [inference.py:657] Number of test utterances: 6676
+2026-01-26 06:04:39,361 INFO [inference.py:660] Starting inference...
+2026-01-26 06:04:39,361 INFO [inference.py:661] Note: First batch may take longer due to GPU warmup
+2026-01-26 06:04:40,450 INFO [inference.py:676]
+============================================================
+2026-01-26 06:04:40,450 INFO [inference.py:677] Processing batch 1
+2026-01-26 06:04:40,450 INFO [inference.py:683] Batch size: 6
+2026-01-26 06:04:40,450 INFO [inference.py:701] Starting decoding for this batch...
+2026-01-26 06:04:41,439 INFO [inference.py:283] Beam search: Processing 6 utterances
+2026-01-26 06:04:41,440 INFO [inference.py:289]   Utterance 1/6: 769 frames
+2026-01-26 06:04:41,440 INFO [inference.py:296]     Frame 0/769, |B|=1
+2026-01-26 06:04:41,938 INFO [inference.py:296]     Frame 200/769, |B|=4
+2026-01-26 06:04:42,252 INFO [inference.py:296]     Frame 400/769, |B|=4
+2026-01-26 06:04:42,564 INFO [inference.py:296]     Frame 600/769, |B|=4
+2026-01-26 06:04:42,846 INFO [inference.py:403]   Utterance 1 result: 1 tokens
+2026-01-26 06:04:42,846 INFO [inference.py:289]   Utterance 2/6: 764 frames
+2026-01-26 06:04:42,846 INFO [inference.py:296]     Frame 0/764, |B|=1
+2026-01-26 06:04:43,158 INFO [inference.py:296]     Frame 200/764, |B|=4
+2026-01-26 06:04:43,477 INFO [inference.py:296]     Frame 400/764, |B|=4
+2026-01-26 06:04:43,804 INFO [inference.py:296]     Frame 600/764, |B|=4
+2026-01-26 06:04:44,077 INFO [inference.py:403]   Utterance 2 result: 31 tokens
+2026-01-26 06:04:44,078 INFO [inference.py:289]   Utterance 3/6: 743 frames
+2026-01-26 06:04:44,078 INFO [inference.py:296]     Frame 0/743, |B|=1
+2026-01-26 06:04:44,393 INFO [inference.py:296]     Frame 200/743, |B|=4
+2026-01-26 06:04:44,721 INFO [inference.py:296]     Frame 400/743, |B|=4
+2026-01-26 06:04:45,054 INFO [inference.py:296]     Frame 600/743, |B|=4
+2026-01-26 06:04:45,278 INFO [inference.py:403]   Utterance 3 result: 1 tokens
+2026-01-26 06:04:45,278 INFO [inference.py:289]   Utterance 4/6: 712 frames
+2026-01-26 06:04:45,278 INFO [inference.py:296]     Frame 0/712, |B|=1
+2026-01-26 06:04:45,592 INFO [inference.py:296]     Frame 200/712, |B|=4
+2026-01-26 06:04:45,907 INFO [inference.py:296]     Frame 400/712, |B|=4
+2026-01-26 06:04:46,221 INFO [inference.py:296]     Frame 600/712, |B|=4
+2026-01-26 06:04:46,396 INFO [inference.py:403]   Utterance 4 result: 13 tokens
+2026-01-26 06:04:46,397 INFO [inference.py:289]   Utterance 5/6: 699 frames
+2026-01-26 06:04:46,397 INFO [inference.py:296]     Frame 0/699, |B|=1
+2026-01-26 06:04:46,713 INFO [inference.py:296]     Frame 200/699, |B|=4
+2026-01-26 06:04:47,059 INFO [inference.py:296]     Frame 400/699, |B|=4
+2026-01-26 06:04:47,404 INFO [inference.py:296]     Frame 600/699, |B|=4
+2026-01-26 06:04:47,572 INFO [inference.py:403]   Utterance 5 result: 11 tokens
+2026-01-26 06:04:47,572 INFO [inference.py:289]   Utterance 6/6: 696 frames
+2026-01-26 06:04:47,572 INFO [inference.py:296]     Frame 0/696, |B|=1
+2026-01-26 06:04:47,895 INFO [inference.py:296]     Frame 200/696, |B|=4
+2026-01-26 06:04:48,221 INFO [inference.py:296]     Frame 400/696, |B|=4
+2026-01-26 06:04:48,558 INFO [inference.py:296]     Frame 600/696, |B|=4
+2026-01-26 06:04:48,713 INFO [inference.py:403]   Utterance 6 result: 13 tokens
+2026-01-26 06:04:48,713 INFO [inference.py:410] Beam search complete
+2026-01-26 06:04:48,713 INFO [inference.py:707] Decoding completed in 8.26s
+2026-01-26 06:04:48,713 INFO [inference.py:710] Converting tokens to text...
+2026-01-26 06:04:48,714 INFO [inference.py:715] First hypothesis: OKAY...
+2026-01-26 06:04:48,714 INFO [inference.py:723] Batch 1 completed in 8.26s
+2026-01-26 06:04:48,714 INFO [inference.py:724] Average time per utterance: 1.38s
+2026-01-26 06:04:48,714 INFO [inference.py:725] Total processed so far: 6 utterances in 1 batches

egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-07-36 ADDED Viewed

The diff for this file is too large to render. See raw diff

egs/ami/ASR/xlsr_transducer/inference_results/metrics-ihm.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+================================================================================
+XLSR-Transducer Inference Results
+================================================================================
+Experiment: xlsr_transducer/exp_16gb_scd
+Test set: ihm
+Decoding method: modified_beam_search
+Beam size: 4
+Max states: 64
+Max symbols per frame: 3
+Number of utterances: 6676
+Total words: 92205
+Total errors: 73964
+WER: 80.22%
+Total inference time: 46.9 minutes
+Average time per utterance: 0.42s
+================================================================================

egs/ami/ASR/xlsr_transducer/inference_results/ref-ihm.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

egs/ami/ASR/xlsr_transducer/log/log-train-2026-01-25-02-57-28 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27310bd90828a6f6d515d1181fa187228601dfe8247ecc89d39848c95e54ea20
+size 174840669

egs/ami/ASR/xlsr_transducer/tensorboard/events.out.tfevents.1769309848.3edaabdb707c.1028020.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bb2715978701cb9358c38337c7bb5316cffc55440353a079aeb9c0bdc3867f2
+size 158109