Upload 30 files
Browse files- .gitattributes +1 -0
- egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-04-45-26-checkpoint +264 -0
- egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-05-16-checkpoint +111 -0
- egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-20-04-checkpoint +22 -0
- egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-29-29-checkpoint +22 -0
- egs/ami/ASR/xlsr_transducer/inference_results/hyp-ihm.txt +0 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-25-15-47-40 +32 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-43-42 +45 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-44-36 +45 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-45-26 +0 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-57-24 +57 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-58-20 +32 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-59-21 +111 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-02-37 +32 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-03-42 +32 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-05-16 +529 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-13-05 +22 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-14-59 +22 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-17-40 +22 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-20-04 +22 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-29-29 +22 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-48-19 +22 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-50-10 +22 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-54-32 +28 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-02-34 +77 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-04-30 +72 -0
- egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-07-36 +0 -0
- egs/ami/ASR/xlsr_transducer/inference_results/metrics-ihm.txt +16 -0
- egs/ami/ASR/xlsr_transducer/inference_results/ref-ihm.txt +0 -0
- egs/ami/ASR/xlsr_transducer/log/log-train-2026-01-25-02-57-28 +3 -0
- egs/ami/ASR/xlsr_transducer/tensorboard/events.out.tfevents.1769309848.3edaabdb707c.1028020.0 +3 -0
.gitattributes
CHANGED
|
@@ -55,3 +55,4 @@ egs/libricss/SURT/heat.png filter=lfs diff=lfs merge=lfs -text
|
|
| 55 |
egs/libricss/SURT/surt.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
egs/librispeech/WSASR/figures/otc_training_graph.drawio.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
egs/speech_llm/ASR_LLM/assets/framework.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 55 |
egs/libricss/SURT/surt.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
egs/librispeech/WSASR/figures/otc_training_graph.drawio.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
egs/speech_llm/ASR_LLM/assets/framework.png filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
egs/ami/ASR/xlsr_transducer/log/log-train-2026-01-25-02-57-28 filter=lfs diff=lfs merge=lfs -text
|
egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-04-45-26-checkpoint
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 04:45:26,876 INFO [inference.py:419] ================================================================================
|
| 2 |
+
2026-01-26 04:45:26,877 INFO [inference.py:420] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 04:45:26,877 INFO [inference.py:421] ================================================================================
|
| 4 |
+
2026-01-26 04:45:26,877 INFO [inference.py:422] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 04:45:26,877 INFO [inference.py:423] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 04:45:26,877 INFO [inference.py:424] Test set: ihm
|
| 7 |
+
2026-01-26 04:45:26,877 INFO [inference.py:425] Decoding method: greedy_search
|
| 8 |
+
2026-01-26 04:45:26,877 INFO [inference.py:431] Device: cpu
|
| 9 |
+
2026-01-26 04:45:26,877 INFO [inference.py:434] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 04:45:26,878 INFO [inference.py:442] Vocabulary size: 500
|
| 11 |
+
2026-01-26 04:45:26,879 INFO [inference.py:443] Blank ID: 0
|
| 12 |
+
2026-01-26 04:45:26,879 INFO [inference.py:446] Creating model
|
| 13 |
+
2026-01-26 04:45:28,435 INFO [inference.py:453] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 14 |
+
2026-01-26 04:45:28,436 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 15 |
+
2026-01-26 04:45:34,301 INFO [inference.py:482] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 04:45:34,302 INFO [inference.py:485] Loading test data
|
| 17 |
+
2026-01-26 04:45:34,302 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 04:45:35,453 INFO [inference.py:496] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 04:45:35,453 INFO [inference.py:499] Starting inference...
|
| 20 |
+
2026-01-26 04:45:36,710 INFO [inference.py:318] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 04:45:36,713 INFO [inference.py:319] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 04:45:36,715 INFO [inference.py:320] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 04:45:46,614 INFO [inference.py:341] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 04:45:46,615 INFO [inference.py:342] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 04:45:46,615 INFO [inference.py:343] Encoder out range: [-13.684, 12.764]
|
| 26 |
+
2026-01-26 04:45:55,306 INFO [inference.py:353] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 04:45:55,307 INFO [inference.py:355] First hypothesis: [290, 289, 20, 262, 20, 262, 20, 262, 20, 262, 20, 262, 20, 262, 240, 199, 35, 8, 14, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 13, 77, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 15, 83, 7, 8, 56, 12, 10, 74, 74, 19, 46, 74, 19, 46, 74, 19, 46, 74, 19, 190, 162, 5, 14, 43, 8, 119, 5, 14, 43, 8, 5, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 23, 4, 2, 48, 33, 48, 33, 48, 33, 48, 33, 48, 33, 10, 33, 10, 33, 10, 33, 10, 33, 10, 33, 10, 38, 14, 29, 5, 52, 15, 7, 27, 154, 38, 4, 185, 16, 95, 10, 10, 118, 231, 10, 13, 160, 202]
|
| 28 |
+
2026-01-26 04:45:55,323 INFO [inference.py:318] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 04:45:55,324 INFO [inference.py:319] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 04:45:55,325 INFO [inference.py:320] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
| 33 |
+
2026-01-26 04:46:02,522 INFO [inference.py:341] Encoder out shape: torch.Size([23, 209, 1024])
|
| 34 |
+
2026-01-26 04:46:02,524 INFO [inference.py:342] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
|
| 35 |
+
162, 157, 157, 156, 151, 149, 148, 139, 137])
|
| 36 |
+
2026-01-26 04:46:02,524 INFO [inference.py:343] Encoder out range: [-12.514, 12.004]
|
| 37 |
+
2026-01-26 04:46:14,119 INFO [inference.py:353] Number of hypotheses: 23
|
| 38 |
+
2026-01-26 04:46:14,119 INFO [inference.py:355] First hypothesis: [10, 7, 5, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 4, 5, 21, 306, 210, 96, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 156, 6, 4, 24, 9, 102, 9, 102, 9, 102, 9, 102, 9, 102, 9, 102, 130, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 18, 18, 27, 60, 30, 14, 60, 60, 70, 60, 60, 28, 60, 60, 28, 60, 60, 28, 60, 60, 28, 24, 14, 36, 75, 41, 86, 97, 25, 75, 14, 43, 8, 4, 14, 4, 7, 197, 4, 7, 197, 4, 7, 197, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 17, 169, 17, 169, 17, 169, 17, 169, 17, 169, 17, 147, 169, 147, 169, 147, 169, 147, 169, 147, 169, 25, 147, 147, 147, 147, 147, 147, 147, 147, 147, 4, 7, 197, 112, 4, 7, 197, 112, 4, 7, 197, 112, 4, 7, 197, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 17, 169, 17, 169, 17, 169, 17, 169, 17, 169, 25, 75, 8, 54, 44, 24, 363, 328, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 111, 199, 205, 199, 199, 92, 14, 199, 199, 92, 14, 8, 67, 67, 32, 26, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 27, 8, 8, 119, 8, 19, 201, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 234, 16, 16, 234, 16, 16, 234, 16, 16, 234, 58, 55, 43, 30, 55, 43, 30, 55, 43, 30, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 55, 19, 36, 22, 41, 80, 219, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 15, 72]
|
| 39 |
+
2026-01-26 04:46:14,135 INFO [inference.py:318] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
|
| 40 |
+
2026-01-26 04:46:14,136 INFO [inference.py:319] Audio range: [-0.439, 0.480]
|
| 41 |
+
2026-01-26 04:46:14,137 INFO [inference.py:320] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
|
| 42 |
+
28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
|
| 43 |
+
20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
|
| 44 |
+
12320, 11680, 11520, 10880, 9440, 9120, 7840, 5920, 5760],
|
| 45 |
+
dtype=torch.int32)
|
| 46 |
+
2026-01-26 04:46:21,443 INFO [inference.py:341] Encoder out shape: torch.Size([39, 126, 1024])
|
| 47 |
+
2026-01-26 04:46:21,499 INFO [inference.py:342] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100, 94, 89, 88, 87, 73,
|
| 48 |
+
71, 71, 69, 68, 68, 65, 62, 62, 59, 59, 58, 56, 51, 45,
|
| 49 |
+
42, 40, 38, 36, 35, 33, 29, 28, 24, 18, 17])
|
| 50 |
+
2026-01-26 04:46:21,500 INFO [inference.py:343] Encoder out range: [-11.444, 10.811]
|
| 51 |
+
2026-01-26 04:46:29,134 INFO [inference.py:353] Number of hypotheses: 39
|
| 52 |
+
2026-01-26 04:46:29,134 INFO [inference.py:355] First hypothesis: [11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 4, 2, 64, 10, 7, 5, 51, 13, 227, 211, 120, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 17, 7, 5, 17, 113, 9, 113, 9, 113, 9, 113, 13, 4, 5, 8, 5, 136, 40, 5, 16, 136, 40, 30, 5, 8, 136, 30, 8, 14, 5, 310, 23, 177, 63, 54, 136, 40, 30, 8, 14, 5, 16, 272, 16, 272, 16, 272, 16, 272]
|
| 53 |
+
2026-01-26 04:46:29,199 INFO [inference.py:318] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
|
| 54 |
+
2026-01-26 04:46:29,200 INFO [inference.py:319] Audio range: [-0.314, 0.332]
|
| 55 |
+
2026-01-26 04:46:29,201 INFO [inference.py:320] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
|
| 56 |
+
56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
|
| 57 |
+
46079, 45280, 44960], dtype=torch.int32)
|
| 58 |
+
2026-01-26 04:46:36,615 INFO [inference.py:341] Encoder out shape: torch.Size([23, 212, 1024])
|
| 59 |
+
2026-01-26 04:46:36,617 INFO [inference.py:342] Encoder out lens: tensor([212, 205, 204, 202, 200, 198, 194, 192, 184, 181, 176, 174, 173, 172,
|
| 60 |
+
170, 166, 164, 164, 147, 144, 143, 141, 140])
|
| 61 |
+
2026-01-26 04:46:36,617 INFO [inference.py:343] Encoder out range: [-13.261, 11.090]
|
| 62 |
+
2026-01-26 04:46:45,499 INFO [inference.py:353] Number of hypotheses: 23
|
| 63 |
+
2026-01-26 04:46:45,500 INFO [inference.py:355] First hypothesis: [31, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 20, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 124, 32, 28, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 130, 4, 2, 11]
|
| 64 |
+
2026-01-26 04:46:45,516 INFO [inference.py:318] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
|
| 65 |
+
2026-01-26 04:46:45,516 INFO [inference.py:319] Audio range: [-0.323, 0.414]
|
| 66 |
+
2026-01-26 04:46:45,517 INFO [inference.py:320] Audio lengths: tensor([317280, 298079, 298080, 294559, 292480], dtype=torch.int32)
|
| 67 |
+
2026-01-26 04:46:53,506 INFO [inference.py:341] Encoder out shape: torch.Size([5, 991, 1024])
|
| 68 |
+
2026-01-26 04:46:53,507 INFO [inference.py:342] Encoder out lens: tensor([991, 931, 931, 920, 913])
|
| 69 |
+
2026-01-26 04:46:53,507 INFO [inference.py:343] Encoder out range: [-14.241, 14.344]
|
| 70 |
+
2026-01-26 04:47:07,903 INFO [inference.py:353] Number of hypotheses: 5
|
| 71 |
+
2026-01-26 04:47:07,903 INFO [inference.py:355] First hypothesis: [37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 4, 2, 12, 23, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 7, 69, 7, 69, 7, 69, 7, 69, 7, 69, 7, 5, 116, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 82, 98, 27, 267, 63, 137, 27, 267, 63, 137, 27, 267, 63, 137, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 19, 5, 14, 5, 12, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 31, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 24, 28, 257, 24, 28, 257, 24, 28, 257, 24, 28, 24, 14, 24, 24, 5, 147, 17, 147, 61, 4, 28, 30, 8, 25, 4, 28, 30, 8, 25, 4, 28, 28, 30, 8, 25, 4, 28, 28, 30, 8, 25, 4, 28, 28, 159, 13, 30, 8, 28, 28, 30, 8, 28, 28, 30, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 24, 24, 21, 24, 24, 21, 24, 24, 21, 24, 24, 21, 24, 21, 24, 21, 24, 21, 24, 21, 24, 21, 8, 21, 5, 14, 21, 5, 14, 21, 5, 14, 5, 5, 71, 14, 43, 8, 5, 119, 55, 14, 43, 8, 119, 55, 80, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 96, 40, 275, 32, 5, 275, 32, 5, 275, 32, 5, 275, 32, 5, 24, 325, 5, 43, 40, 43, 40, 43, 40, 43, 40, 43, 40, 43, 40, 43, 40, 19, 190, 40, 137, 43, 40, 19, 40, 43, 40, 19, 40, 43, 30, 27, 14, 29, 14, 43, 19, 29, 14, 43, 19, 29, 8, 103, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 5, 103, 309, 20, 98, 30, 82, 82, 4, 223, 82, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 4, 223, 28, 4, 223, 28, 4, 223, 28, 4, 223, 190, 5, 20, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 119, 53, 21, 29, 29, 119, 53, 21, 29, 29, 119, 5, 14, 190, 8, 5, 20, 20, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 18, 7, 27, 220, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 6, 24, 138, 67, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 328, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 47, 7, 8, 4, 5, 23, 36, 63, 46, 12, 6, 4, 5, 93, 4, 5, 93, 4, 5, 93, 4, 5, 93, 93, 210, 4, 5, 93, 93, 210, 4, 5, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 91, 5, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 162, 98, 98, 30, 82, 82, 82, 82, 82, 82, 82, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 98, 30, 82, 4, 223, 82, 4, 223, 82, 4, 223, 82, 82, 30, 60, 30, 36, 82, 30, 60, 30, 36, 82, 30, 36, 63, 79, 14, 190, 8, 30, 43, 8, 30, 43, 8, 30, 43, 8, 5, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 53, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 14, 43, 8, 5, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 13, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 4, 28, 28, 30, 36, 63, 4, 28, 28, 30, 36, 63, 22, 30, 30, 22, 19, 28, 22, 30, 30, 22, 19, 28, 30, 30, 22, 19, 28, 30, 30, 22, 19, 28, 30, 8, 28, 28, 30, 8, 28, 28, 30, 8, 28, 28, 22, 70, 25, 13, 30, 8, 28, 29, 14, 25, 29, 29, 21, 19, 21, 42, 41, 19, 21, 19, 40, 19, 41, 4, 2, 45, 78, 62]
|
| 72 |
+
2026-01-26 04:47:08,602 INFO [inference.py:318] Audio shape: torch.Size([40, 39360]), dtype: torch.float32
|
| 73 |
+
2026-01-26 04:47:08,603 INFO [inference.py:319] Audio range: [-0.274, 0.362]
|
| 74 |
+
2026-01-26 04:47:08,604 INFO [inference.py:320] Audio lengths: tensor([39359, 39199, 39039, 38080, 36000, 35200, 34880, 34880, 33760, 33760,
|
| 75 |
+
33600, 33120, 29440, 29280, 27360, 24960, 24960, 23680, 21760, 21600,
|
| 76 |
+
20800, 16800, 16320, 16160, 16000, 15679, 15040, 13440, 12320, 7040,
|
| 77 |
+
6560, 6400, 5760, 5760, 5120, 4800, 4800, 4640, 4480, 3360],
|
| 78 |
+
dtype=torch.int32)
|
| 79 |
+
2026-01-26 04:47:15,213 INFO [inference.py:341] Encoder out shape: torch.Size([40, 122, 1024])
|
| 80 |
+
2026-01-26 04:47:15,214 INFO [inference.py:342] Encoder out lens: tensor([122, 122, 121, 118, 112, 109, 108, 108, 105, 105, 104, 103, 91, 91,
|
| 81 |
+
85, 77, 77, 73, 67, 67, 64, 52, 50, 50, 49, 48, 46, 41,
|
| 82 |
+
38, 21, 20, 19, 17, 17, 15, 14, 14, 14, 13, 10])
|
| 83 |
+
2026-01-26 04:47:15,214 INFO [inference.py:343] Encoder out range: [-11.784, 11.570]
|
| 84 |
+
2026-01-26 04:47:22,413 INFO [inference.py:353] Number of hypotheses: 40
|
| 85 |
+
2026-01-26 04:47:22,414 INFO [inference.py:355] First hypothesis: [45, 78, 62, 4, 2, 45, 78, 62, 4, 2, 31, 4, 2, 11, 4, 2, 11]
|
| 86 |
+
2026-01-26 04:47:22,425 INFO [inference.py:318] Audio shape: torch.Size([23, 66880]), dtype: torch.float32
|
| 87 |
+
2026-01-26 04:47:22,426 INFO [inference.py:319] Audio range: [-0.514, 0.393]
|
| 88 |
+
2026-01-26 04:47:22,427 INFO [inference.py:320] Audio lengths: tensor([66880, 65439, 60799, 60320, 59520, 58240, 57280, 56320, 55520, 54080,
|
| 89 |
+
51840, 51520, 50720, 49920, 49600, 48319, 48320, 47999, 46880, 46079,
|
| 90 |
+
44640, 44320, 44160], dtype=torch.int32)
|
| 91 |
+
2026-01-26 04:47:29,228 INFO [inference.py:341] Encoder out shape: torch.Size([23, 208, 1024])
|
| 92 |
+
2026-01-26 04:47:29,229 INFO [inference.py:342] Encoder out lens: tensor([208, 204, 189, 188, 185, 181, 178, 175, 173, 168, 161, 160, 158, 155,
|
| 93 |
+
154, 150, 150, 149, 146, 143, 139, 138, 137])
|
| 94 |
+
2026-01-26 04:47:29,229 INFO [inference.py:343] Encoder out range: [-12.152, 11.060]
|
| 95 |
+
2026-01-26 04:47:36,938 INFO [inference.py:353] Number of hypotheses: 23
|
| 96 |
+
2026-01-26 04:47:36,938 INFO [inference.py:355] First hypothesis: [4, 5, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 4, 2, 11, 15, 11, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 58, 40, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 21, 30, 36, 19, 5, 30, 14, 222, 5, 26, 5, 26, 5, 26, 5, 26, 5, 26, 5, 26, 58, 30, 36, 63, 22, 54, 240, 20, 20, 4, 5, 8, 136, 21, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 347, 265, 147, 265, 20, 10, 7, 5, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 22, 104, 108, 8, 183, 25, 10, 33, 10, 7, 5, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 113, 5, 14, 5, 5, 46, 4, 96, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 17, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 68, 71, 26, 5]
|
| 97 |
+
2026-01-26 04:47:37,011 INFO [inference.py:318] Audio shape: torch.Size([24, 65600]), dtype: torch.float32
|
| 98 |
+
2026-01-26 04:47:37,011 INFO [inference.py:319] Audio range: [-0.416, 0.458]
|
| 99 |
+
2026-01-26 04:47:37,012 INFO [inference.py:320] Audio lengths: tensor([65600, 64000, 63680, 61280, 60000, 58080, 55200, 52960, 51359, 51200,
|
| 100 |
+
50720, 50720, 50080, 49280, 48639, 47840, 47360, 46880, 46400, 46240,
|
| 101 |
+
45920, 44640, 43040, 42720], dtype=torch.int32)
|
| 102 |
+
2026-01-26 04:47:44,400 INFO [inference.py:341] Encoder out shape: torch.Size([24, 204, 1024])
|
| 103 |
+
2026-01-26 04:47:44,401 INFO [inference.py:342] Encoder out lens: tensor([204, 199, 198, 191, 187, 181, 172, 165, 160, 159, 158, 158, 156, 153,
|
| 104 |
+
151, 149, 147, 146, 144, 144, 143, 139, 134, 133])
|
| 105 |
+
2026-01-26 04:47:44,401 INFO [inference.py:343] Encoder out range: [-12.007, 11.624]
|
| 106 |
+
2026-01-26 04:47:51,732 INFO [inference.py:353] Number of hypotheses: 24
|
| 107 |
+
2026-01-26 04:47:51,732 INFO [inference.py:355] First hypothesis: [11, 4, 2, 11, 17, 7, 5, 59, 19, 75, 8, 164, 21, 27, 4, 2, 9, 49, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 15, 7, 27, 154, 34, 16, 125, 10, 13, 24, 19, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 5, 19, 24, 14, 5, 20, 84, 18, 7, 85, 34, 13, 227, 211, 120, 4, 2, 64, 18, 7, 85, 116, 73, 13, 227, 211, 120, 46, 13, 227, 211, 120, 46, 13, 227, 211, 120, 46, 13, 227, 211, 120, 46, 13, 227, 211, 120, 46, 66, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 18, 34, 16, 34, 13, 227, 211, 120, 25, 6, 24, 9, 7, 24, 68, 218, 52, 15, 34, 16, 34, 13, 74, 19, 5, 8, 41, 19, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 25, 75, 41, 86, 97, 369]
|
| 108 |
+
2026-01-26 04:47:51,743 INFO [inference.py:318] Audio shape: torch.Size([9, 176960]), dtype: torch.float32
|
| 109 |
+
2026-01-26 04:47:51,744 INFO [inference.py:319] Audio range: [-0.135, 0.191]
|
| 110 |
+
2026-01-26 04:47:51,745 INFO [inference.py:320] Audio lengths: tensor([176960, 170720, 164480, 155840, 154559, 151839, 151840, 151360, 147040],
|
| 111 |
+
dtype=torch.int32)
|
| 112 |
+
2026-01-26 04:48:00,501 INFO [inference.py:341] Encoder out shape: torch.Size([9, 552, 1024])
|
| 113 |
+
2026-01-26 04:48:00,502 INFO [inference.py:342] Encoder out lens: tensor([552, 533, 513, 486, 482, 474, 474, 472, 459])
|
| 114 |
+
2026-01-26 04:48:00,502 INFO [inference.py:343] Encoder out range: [-13.325, 12.083]
|
| 115 |
+
2026-01-26 04:48:10,617 INFO [inference.py:353] Number of hypotheses: 9
|
| 116 |
+
2026-01-26 04:48:10,617 INFO [inference.py:355] First hypothesis: [89, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 21, 94, 221, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 17, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 33, 113, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 4, 5, 21, 5, 97, 5, 5, 19, 5, 14, 379, 5, 292, 379, 5, 292, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 379, 130, 101, 101, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 29, 29, 119, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 29, 21, 29, 29, 28, 29, 29, 28, 29, 29, 119, 149, 29, 29, 119, 149, 29, 29, 119, 149, 29, 14, 43, 8, 21, 29, 29, 119, 149, 29, 14, 43, 8, 119, 55, 18, 77, 77, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 18, 77, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 81, 7, 27, 12, 52, 10, 52, 10, 52, 10, 52, 10, 52, 10, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 324, 115, 54, 10, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 71, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 53, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 134, 36, 58, 140, 36, 58, 140, 36, 58, 140, 36, 119, 149, 22, 18, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 4, 63, 19, 41, 4, 63, 19, 41, 4, 63, 21, 82, 19, 70, 4, 185, 4, 185, 4, 185, 4, 185, 82, 185, 88, 100, 16, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 36, 58, 58, 36, 58, 58, 36, 58, 58, 36, 58, 140, 36, 58, 140, 36, 58, 140, 36, 58, 134, 36, 121, 46, 74, 58, 140, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 19, 36, 79, 14, 43, 8, 119, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 29, 29, 53, 21, 29, 29, 119, 53, 21, 29, 29, 119, 53, 21, 29, 29, 14, 53, 21, 29, 29, 14, 53, 21, 29, 29, 21, 29, 29, 21, 29, 29, 21, 79, 14, 29, 14, 21, 29, 14, 21, 29, 14, 43, 8, 119, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 84, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 81, 7, 27, 10, 10, 53, 28, 29, 29, 90, 53, 28, 29, 29, 90, 53, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 28, 30, 29, 28, 30, 29, 28, 30, 29, 28, 28, 29, 29, 28, 28, 29, 29, 28, 28, 29, 29, 14, 43, 8, 30, 29, 29, 14, 43, 8, 5, 29, 14, 43, 8, 5, 29, 14, 43, 8, 5, 23, 17, 7, 5, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 205, 205, 5, 17, 13, 43, 43, 28, 133, 180, 43, 30, 24, 180, 8, 86, 180, 8, 142, 19, 21, 42, 26, 35, 87, 17, 174, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 12]
|
| 117 |
+
2026-01-26 04:48:10,629 INFO [inference.py:318] Audio shape: torch.Size([14, 112320]), dtype: torch.float32
|
| 118 |
+
2026-01-26 04:48:10,630 INFO [inference.py:319] Audio range: [-0.469, 0.457]
|
| 119 |
+
2026-01-26 04:48:10,630 INFO [inference.py:320] Audio lengths: tensor([112320, 105920, 105439, 104000, 103840, 101920, 98720, 98400, 96960,
|
| 120 |
+
96800, 96320, 95680, 93760, 93600], dtype=torch.int32)
|
| 121 |
+
2026-01-26 04:48:18,040 INFO [inference.py:341] Encoder out shape: torch.Size([14, 350, 1024])
|
| 122 |
+
2026-01-26 04:48:18,040 INFO [inference.py:342] Encoder out lens: tensor([350, 330, 329, 324, 324, 318, 308, 307, 302, 302, 300, 298, 292, 292])
|
| 123 |
+
2026-01-26 04:48:18,041 INFO [inference.py:343] Encoder out range: [-14.286, 11.940]
|
| 124 |
+
2026-01-26 04:48:29,123 INFO [inference.py:353] Number of hypotheses: 14
|
| 125 |
+
2026-01-26 04:48:29,123 INFO [inference.py:355] First hypothesis: [39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 11, 31, 4, 24, 4, 32, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 12, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 142, 19, 22, 22, 142, 19, 22, 22, 142, 19, 22, 262, 20, 35, 32, 28, 67, 8, 26, 35, 32, 28, 8, 86, 22, 142, 117, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 22, 21, 8, 8, 21, 8, 8, 21, 8, 8, 142, 86, 22, 142, 67, 22, 142, 67, 22, 142, 67, 22, 142, 67, 22, 108, 22, 142, 67, 22, 108, 22, 142, 67, 32, 67, 14, 86, 32, 67, 14, 86, 32, 67, 14, 86, 32, 67, 14, 86, 32, 67, 14, 86, 32, 67, 14, 32, 67, 14, 32, 67, 14, 32, 67, 14, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 85, 13, 13, 13, 13, 13, 13, 13, 13, 22, 22, 4, 224, 4, 224, 4, 224, 4, 224, 4, 224, 30, 60, 30, 183, 51, 4, 224, 30, 60, 30, 183, 56, 15, 51, 56, 15, 51, 56, 15, 51, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 56, 56, 15, 56, 56, 15, 56, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 56, 148, 4, 27, 148, 4, 27, 148, 4, 27, 148, 4, 27, 148, 4, 27, 21, 104, 19, 22, 14, 54, 19, 149, 22, 54, 54, 80, 10, 191, 32, 7, 8, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 4, 5, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 26, 52, 18, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 15, 49, 26, 130, 15, 7, 27, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 82, 4, 28, 82, 4, 28, 82, 4, 28, 82, 82, 70, 25, 4, 28, 82, 82, 70, 25, 4, 223, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 82, 104, 19, 82, 104, 19, 82, 104, 19, 82, 104, 19, 104, 19, 104, 19, 104, 19, 104, 19, 104, 19, 263, 80, 48, 33, 48, 33, 262, 446, 446, 446, 446, 446, 446, 446, 446, 446, 446, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 262, 20, 262, 20, 262, 20, 262, 20, 262, 20, 262, 31, 20, 31, 20, 31, 20, 31, 20, 31, 20, 31, 262, 31, 262, 31, 262, 31, 262, 31, 262, 31]
|
| 126 |
+
2026-01-26 04:48:29,127 INFO [inference.py:544] Processed 206 utterances in 10 batches
|
| 127 |
+
2026-01-26 04:48:29,135 INFO [inference.py:318] Audio shape: torch.Size([38, 41440]), dtype: torch.float32
|
| 128 |
+
2026-01-26 04:48:29,136 INFO [inference.py:319] Audio range: [-0.272, 0.322]
|
| 129 |
+
2026-01-26 04:48:29,137 INFO [inference.py:320] Audio lengths: tensor([41440, 41120, 40160, 35680, 33120, 32960, 32800, 31520, 31040, 30880,
|
| 130 |
+
30239, 29920, 29120, 27360, 25279, 24480, 23520, 22720, 22720, 21600,
|
| 131 |
+
20800, 20320, 19840, 19840, 17600, 15520, 13120, 12480, 12320, 11040,
|
| 132 |
+
10560, 9600, 8640, 7520, 5440, 5120, 5120, 4640],
|
| 133 |
+
dtype=torch.int32)
|
| 134 |
+
2026-01-26 04:48:35,710 INFO [inference.py:341] Encoder out shape: torch.Size([38, 129, 1024])
|
| 135 |
+
2026-01-26 04:48:35,710 INFO [inference.py:342] Encoder out lens: tensor([129, 128, 125, 111, 103, 102, 102, 98, 96, 96, 94, 93, 90, 85,
|
| 136 |
+
78, 76, 73, 70, 70, 67, 64, 63, 61, 61, 54, 48, 40, 38,
|
| 137 |
+
38, 34, 32, 29, 26, 23, 16, 15, 15, 14])
|
| 138 |
+
2026-01-26 04:48:35,711 INFO [inference.py:343] Encoder out range: [-13.512, 11.822]
|
| 139 |
+
2026-01-26 04:48:43,120 INFO [inference.py:353] Number of hypotheses: 38
|
| 140 |
+
2026-01-26 04:48:43,121 INFO [inference.py:355] First hypothesis: [9, 7, 85, 13, 4, 27, 5, 30, 27, 4, 27, 5, 30, 27, 4, 27, 5, 30, 5, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 5, 14, 43, 19, 5, 14, 43, 19, 5, 43, 19, 5, 43, 19, 5, 43, 19, 5, 43, 21, 29, 14, 43, 19, 21, 43, 8, 19, 201, 14, 190, 19, 201, 14, 190, 19, 201, 14, 190, 8, 19, 201, 14, 8, 119, 55, 80, 118, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 10, 4, 14, 199, 199, 202, 199, 199, 202, 199, 199, 202, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 199, 71, 14, 14, 75, 75, 14, 14, 75, 75, 14, 43, 8, 26, 130, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11, 4, 2, 11]
|
| 141 |
+
2026-01-26 04:48:43,132 INFO [inference.py:318] Audio shape: torch.Size([38, 41280]), dtype: torch.float32
|
| 142 |
+
2026-01-26 04:48:43,133 INFO [inference.py:319] Audio range: [-0.080, 0.105]
|
| 143 |
+
2026-01-26 04:48:43,133 INFO [inference.py:320] Audio lengths: tensor([41280, 40320, 36800, 35680, 34880, 34879, 34080, 34080, 32000, 30400,
|
| 144 |
+
29280, 29280, 28320, 24000, 23040, 20960, 20960, 20960, 20160, 16960,
|
| 145 |
+
14080, 13280, 12640, 12160, 10720, 9440, 8640, 6240, 6080, 5440,
|
| 146 |
+
5440, 5120, 4800, 4800, 4640, 4480, 4320, 4160],
|
| 147 |
+
dtype=torch.int32)
|
| 148 |
+
2026-01-26 04:48:50,029 INFO [inference.py:341] Encoder out shape: torch.Size([38, 128, 1024])
|
| 149 |
+
2026-01-26 04:48:50,030 INFO [inference.py:342] Encoder out lens: tensor([128, 125, 114, 111, 108, 108, 106, 106, 99, 94, 91, 91, 88, 74,
|
| 150 |
+
71, 65, 65, 65, 62, 52, 43, 41, 39, 37, 33, 29, 26, 19,
|
| 151 |
+
18, 16, 16, 15, 14, 14, 14, 13, 13, 12])
|
| 152 |
+
2026-01-26 04:48:50,030 INFO [inference.py:343] Encoder out range: [-11.071, 11.522]
|
| 153 |
+
2026-01-26 04:48:56,638 INFO [inference.py:353] Number of hypotheses: 38
|
| 154 |
+
2026-01-26 04:48:56,638 INFO [inference.py:355] First hypothesis: [4, 62, 4, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 68, 4, 2, 20, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 32, 4, 2, 20, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 4, 2, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 5, 65, 19, 36, 63, 54, 17, 6, 124, 32, 28, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 77, 71, 19, 70, 77, 71, 19, 70, 77, 71, 19, 70, 22, 22, 70, 22, 22, 70, 22, 22, 70, 22, 22, 148, 148, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 24, 185, 25, 13, 82, 185, 64]
|
| 155 |
+
2026-01-26 04:48:56,707 INFO [inference.py:318] Audio shape: torch.Size([38, 41760]), dtype: torch.float32
|
| 156 |
+
2026-01-26 04:48:56,707 INFO [inference.py:319] Audio range: [-0.246, 0.340]
|
| 157 |
+
2026-01-26 04:48:56,708 INFO [inference.py:320] Audio lengths: tensor([41760, 39680, 38880, 36799, 36639, 36000, 34559, 34240, 33120, 31840,
|
| 158 |
+
30720, 30560, 29760, 29280, 24640, 24160, 22720, 21759, 21600, 20960,
|
| 159 |
+
16320, 14400, 13600, 11360, 10880, 10399, 10400, 9760, 9440, 9280,
|
| 160 |
+
8320, 8320, 7680, 7360, 6880, 6880, 6240, 6240],
|
| 161 |
+
dtype=torch.int32)
|
| 162 |
+
2026-01-26 04:49:03,412 INFO [inference.py:341] Encoder out shape: torch.Size([38, 130, 1024])
|
| 163 |
+
2026-01-26 04:49:03,412 INFO [inference.py:342] Encoder out lens: tensor([130, 123, 121, 114, 114, 112, 107, 106, 103, 99, 95, 95, 92, 91,
|
| 164 |
+
76, 75, 70, 67, 67, 65, 50, 44, 42, 35, 33, 32, 32, 30,
|
| 165 |
+
29, 28, 25, 25, 23, 22, 21, 21, 19, 19])
|
| 166 |
+
2026-01-26 04:49:03,413 INFO [inference.py:343] Encoder out range: [-11.967, 11.229]
|
| 167 |
+
2026-01-26 04:49:09,503 INFO [inference.py:353] Number of hypotheses: 38
|
| 168 |
+
2026-01-26 04:49:09,503 INFO [inference.py:355] First hypothesis: [145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 145, 4, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 154, 44, 13, 24, 28, 27, 170, 205, 5, 130, 4, 2, 11, 39]
|
| 169 |
+
2026-01-26 04:49:09,512 INFO [inference.py:318] Audio shape: torch.Size([9, 170400]), dtype: torch.float32
|
| 170 |
+
2026-01-26 04:49:09,513 INFO [inference.py:319] Audio range: [-0.370, 0.393]
|
| 171 |
+
2026-01-26 04:49:09,513 INFO [inference.py:320] Audio lengths: tensor([170400, 166559, 165919, 164800, 156800, 152480, 147520, 146559, 145759],
|
| 172 |
+
dtype=torch.int32)
|
| 173 |
+
2026-01-26 04:49:17,349 INFO [inference.py:341] Encoder out shape: torch.Size([9, 532, 1024])
|
| 174 |
+
2026-01-26 04:49:17,349 INFO [inference.py:342] Encoder out lens: tensor([532, 520, 518, 514, 489, 476, 460, 457, 455])
|
| 175 |
+
2026-01-26 04:49:17,350 INFO [inference.py:343] Encoder out range: [-12.221, 14.348]
|
| 176 |
+
2026-01-26 04:49:28,539 INFO [inference.py:353] Number of hypotheses: 9
|
| 177 |
+
2026-01-26 04:49:28,539 INFO [inference.py:355] First hypothesis: [37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 4, 2, 4, 7, 197, 4, 2, 4, 7, 197, 4, 2, 4, 7, 197, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 18, 7, 69, 4, 2, 23, 17, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 51, 13, 43, 8, 232, 80, 4, 2, 23, 57, 18, 7, 85, 116, 55, 5, 28, 36, 30, 55, 80, 17, 17, 7, 5, 13, 43, 8, 5, 17, 7, 5, 13, 43, 8, 5, 17, 7, 5, 13, 43, 8, 5, 25, 6, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 39, 9, 46, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 233, 6, 74, 67, 14, 233, 6, 74, 67, 14, 233, 6, 74, 67, 14, 9, 24, 29, 70, 173, 17, 81, 7, 27, 154, 38, 13, 227, 211, 25, 6, 4, 27, 43, 8, 232, 4, 2, 274, 5, 156, 6, 222, 298, 157, 41, 5, 92, 55, 490, 86, 97, 369, 6, 222, 298, 157, 41, 5, 92, 55, 490, 86, 97, 369, 4, 5, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 4, 2, 23, 15, 47, 122, 6, 226, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 4, 2, 127, 33, 4, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 5, 19, 5, 8, 40, 19, 5, 8, 40, 19, 5, 8, 40, 19, 5, 8, 40, 19, 5, 8, 5, 19, 5, 19, 5, 19, 5, 19, 5, 19, 5, 8, 19, 24, 19, 5, 8, 19, 24, 19, 5, 14, 69, 40, 183, 25, 6, 222, 298, 157, 41, 5, 92, 55, 490, 86, 97, 369, 31, 20, 31, 20, 31, 20, 31, 20, 31, 20, 31, 9, 83, 7, 8, 34, 13, 31, 13, 239, 25, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 4, 2, 64, 18, 7, 85, 116, 123, 64, 18, 123, 64, 18, 123, 64, 18, 123, 64, 9, 47, 7, 8, 72, 52, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 4, 2, 64, 4, 2, 64, 4, 2, 64, 4, 2, 64, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 57, 100, 57, 100, 57, 100, 57, 100, 57, 100, 57, 371, 208, 387, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 74, 67, 14, 12]
|
| 178 |
+
2026-01-26 04:49:28,610 INFO [inference.py:318] Audio shape: torch.Size([5, 315520]), dtype: torch.float32
|
| 179 |
+
2026-01-26 04:49:28,611 INFO [inference.py:319] Audio range: [-0.297, 0.334]
|
| 180 |
+
2026-01-26 04:49:28,612 INFO [inference.py:320] Audio lengths: tensor([315520, 301440, 294399, 292480, 289919], dtype=torch.int32)
|
| 181 |
+
2026-01-26 04:49:36,428 INFO [inference.py:341] Encoder out shape: torch.Size([5, 985, 1024])
|
| 182 |
+
2026-01-26 04:49:36,429 INFO [inference.py:342] Encoder out lens: tensor([985, 941, 919, 913, 905])
|
| 183 |
+
2026-01-26 04:49:36,429 INFO [inference.py:343] Encoder out range: [-12.260, 13.635]
|
| 184 |
+
2026-01-26 04:49:53,832 INFO [inference.py:353] Number of hypotheses: 5
|
| 185 |
+
2026-01-26 04:49:53,833 INFO [inference.py:355] First hypothesis: [31, 53, 27, 8, 119, 55, 80, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 113, 5, 113, 5, 14, 5, 5, 23, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 15, 315, 15, 7, 69, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 25, 6, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 7, 5, 87, 7, 5, 87, 7, 5, 87, 7, 5, 51, 272, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 6, 17, 17, 106, 21, 96, 204, 9, 204, 204, 204, 204, 204, 204, 204, 204, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 204, 9, 204, 9, 204, 9, 204, 9, 204, 9, 258, 10, 7, 5, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 7, 5, 154, 231, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 144, 8, 54, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 106, 40, 275, 67, 279, 359, 275, 67, 279, 359, 275, 67, 279, 359, 275, 67, 279, 359, 275, 67, 279, 359, 275, 67, 172, 359, 42, 26, 170, 24, 24, 170, 8, 19, 86, 22, 142, 19, 24, 19, 36, 108, 32, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 14, 24, 19, 24, 14, 24, 19, 24, 14, 24, 19, 24, 24, 19, 24, 24, 19, 24, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 24, 19, 5, 19, 5, 14, 36, 19, 5, 14, 36, 19, 5, 14, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 14, 15, 4, 42, 15, 4, 42, 15, 4, 27, 4, 14, 4, 14, 4, 14, 4, 14, 4, 24, 30, 24, 24, 92, 173, 115, 54, 16, 25, 4, 96, 4, 96, 4, 96, 4, 96, 4, 96, 4, 96, 108, 209, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 297, 5, 25, 6, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 125, 10, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 35, 6, 24, 20, 84, 9, 24, 24, 20, 9, 24, 24, 20, 9, 24, 24, 98, 27, 267, 153, 267, 40, 30, 27, 267, 153, 267, 153, 267, 153, 267, 153, 5, 41, 5, 92, 5, 41, 5, 92, 5, 41, 5, 90, 42, 41, 26, 20, 18, 7, 27, 18, 7, 27, 18, 7, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 18, 18, 27, 113, 10, 100, 10, 33, 10, 114, 32, 7, 8, 72, 15, 72, 15, 72, 15, 72, 32, 26, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 30, 96, 244, 20, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 38, 53, 21, 29, 35, 369, 38, 86, 38, 35, 32, 28, 104, 108, 8, 26, 189, 19, 5, 162, 33, 10, 33, 5, 30, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 52, 15, 7, 27, 177, 28, 30, 96, 71, 19, 29, 71, 21, 29, 14, 25, 75, 25, 10, 7, 5, 13, 29, 29, 14, 43, 8, 232, 4, 2, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 204, 10, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 16, 8, 16, 16, 16, 16, 16, 16, 16, 16, 136, 19, 22, 54, 16, 56, 136, 54, 16, 56, 249, 249, 249, 249, 249, 249, 249, 249, 249, 249, 10, 7, 5, 13, 16, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 36, 58, 58, 36, 58, 58, 36, 58, 58, 36, 58, 36, 58, 36, 58, 36, 58, 36, 58, 36, 19, 36, 58, 140, 21, 43, 36, 19, 36, 58, 63, 40, 19, 41, 69, 40, 69, 75, 75, 14, 131, 13, 9, 13, 9, 13, 9, 13, 9, 13, 9, 7, 85, 13, 4, 14, 70, 13, 13, 74, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 341, 18, 7, 27, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 47, 154, 154, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 259, 259, 259, 259, 259, 259, 259, 259, 259, 259, 101, 6, 24, 35, 6, 35, 82, 70, 182, 182, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 4, 5, 23, 195, 25, 13, 22, 104, 19, 22, 14, 10, 7, 5, 10, 7, 5, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 4, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 30, 5, 159, 31, 46, 46, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 18, 18, 34, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 29, 28, 28, 29, 28, 28, 29, 28, 28, 29, 29, 28, 29, 29, 28, 29, 29, 28, 29, 29, 119, 5, 14, 190, 8, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 159, 33, 4, 96, 40, 275, 32, 5, 24, 86, 22, 142, 5, 43, 40, 19, 60, 40, 79, 137, 43, 40, 19, 60, 137, 43, 40, 19, 60, 137, 43, 40, 19, 60, 137, 43, 40, 19, 60, 137, 19, 60, 137, 19, 60, 137, 19, 60, 137, 19, 82, 19, 40, 19, 82, 19, 40, 19, 82, 19, 29, 14, 21, 29, 14, 21, 29, 14, 21, 29, 29, 14, 43, 8, 5, 29, 14, 29, 29, 14, 29, 29, 14, 29, 5, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 159, 33, 5, 4, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 16, 22, 16, 394, 333, 172, 20, 9, 7, 85, 34, 199, 25, 6, 35, 22, 19, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 25, 6, 24, 5, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 24, 24, 63, 42, 33, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5]
|
| 186 |
+
2026-01-26 04:49:53,846 INFO [inference.py:318] Audio shape: torch.Size([6, 239520]), dtype: torch.float32
|
| 187 |
+
2026-01-26 04:49:53,847 INFO [inference.py:319] Audio range: [-0.116, 0.111]
|
| 188 |
+
2026-01-26 04:49:53,848 INFO [inference.py:320] Audio lengths: tensor([239519, 234240, 223840, 223360, 219679, 215680], dtype=torch.int32)
|
| 189 |
+
2026-01-26 04:50:02,225 INFO [inference.py:341] Encoder out shape: torch.Size([6, 748, 1024])
|
| 190 |
+
2026-01-26 04:50:02,226 INFO [inference.py:342] Encoder out lens: tensor([748, 731, 699, 697, 686, 673])
|
| 191 |
+
2026-01-26 04:50:02,226 INFO [inference.py:343] Encoder out range: [-13.591, 10.919]
|
| 192 |
+
2026-01-26 04:50:10,299 INFO [inference.py:353] Number of hypotheses: 6
|
| 193 |
+
2026-01-26 04:50:10,299 INFO [inference.py:355] First hypothesis: [49, 4, 2, 20, 84, 18, 7, 27, 154, 38, 13, 227, 211, 120, 412, 23, 23, 4, 2, 31, 20, 84, 18, 7, 27, 154, 34, 16, 34, 13, 4, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 80, 4, 2, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 4, 2, 9, 49, 9, 7, 24, 68, 218, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 9, 7, 24, 68, 218, 52, 18, 7, 27, 154, 34, 16, 34, 13, 4, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 5, 5, 19, 82, 70, 25, 321, 18, 7, 85, 116, 334, 6, 24, 61, 110, 46, 17, 88, 38, 120, 289, 39, 18, 7, 69, 38, 86, 98, 30, 22, 233, 6, 221, 18, 18, 241, 13, 160, 202, 25, 6, 24, 5, 25, 6, 24, 5, 25, 6, 24, 5, 25, 6, 24, 33, 10, 13, 239, 25, 31]
|
| 194 |
+
2026-01-26 04:50:10,309 INFO [inference.py:318] Audio shape: torch.Size([5, 315200]), dtype: torch.float32
|
| 195 |
+
2026-01-26 04:50:10,310 INFO [inference.py:319] Audio range: [-0.082, 0.158]
|
| 196 |
+
2026-01-26 04:50:10,311 INFO [inference.py:320] Audio lengths: tensor([315200, 310560, 300000, 299680, 296959], dtype=torch.int32)
|
| 197 |
+
2026-01-26 04:50:18,933 INFO [inference.py:341] Encoder out shape: torch.Size([5, 984, 1024])
|
| 198 |
+
2026-01-26 04:50:18,933 INFO [inference.py:342] Encoder out lens: tensor([984, 970, 937, 936, 927])
|
| 199 |
+
2026-01-26 04:50:18,934 INFO [inference.py:343] Encoder out range: [-14.589, 11.647]
|
| 200 |
+
2026-01-26 04:50:32,710 INFO [inference.py:353] Number of hypotheses: 5
|
| 201 |
+
2026-01-26 04:50:32,710 INFO [inference.py:355] First hypothesis: [59, 4, 7, 197, 4, 7, 197, 4, 7, 197, 4, 7, 197, 4, 7, 197, 4, 7, 197, 4, 7, 197, 23, 15, 7, 27, 249, 56, 15, 249, 56, 15, 249, 56, 15, 249, 56, 15, 7, 27, 310, 310, 310, 310, 310, 310, 310, 18, 7, 27, 38, 26, 35, 331, 4, 32, 67, 32, 67, 32, 67, 32, 67, 331, 331, 331, 331, 331, 331, 331, 331, 331, 331, 264, 205, 264, 264, 264, 264, 264, 264, 264, 264, 128, 23, 9, 34, 64, 64, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 106, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 56, 217, 217, 36, 16, 259, 16, 56, 217, 36, 19, 55, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 36, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 81, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 76, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 245, 36, 245, 134, 46, 46, 131, 34, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 14, 21, 4, 28, 4, 30, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 4, 28, 30, 8, 28, 28, 30, 8, 28, 28, 30, 8, 28, 29, 29, 8, 8, 137, 8, 40, 19, 60, 137, 8, 96, 63, 209, 8, 137, 8, 96, 63, 60, 41, 149, 60, 137, 8, 96, 63, 60, 119, 8, 30, 201, 14, 8, 14, 43, 8, 155, 30, 24, 443, 155, 22, 30, 36, 8, 19, 36, 22, 41, 55, 70, 22, 36, 30, 36, 63, 105, 50, 20, 50, 20, 20, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 152, 25, 274, 5, 20, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 74, 46, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, 36, 29, 29, 36, 53, 36, 29, 29, 36, 53, 36, 30, 36, 53, 36, 30, 36, 53, 36, 30, 36, 30, 36, 30, 36, 30, 36, 30, 36, 30, 60, 60, 70, 32, 26, 35, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 36, 53, 36, 29, 80, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 26, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 6, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 76, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 49, 26, 33, 35, 13, 30, 27, 20, 4, 27, 55, 19, 60, 30, 183, 20, 9, 100, 51, 51, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 106, 21, 96, 14, 14, 96, 455, 106, 14, 96, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 455, 189, 43, 21, 8, 96, 14, 14, 43, 8, 204, 80, 95, 33, 13, 33, 13, 33, 13, 33, 13, 33, 13, 33, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 4, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 5, 5, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 228, 19, 5, 19, 5, 19, 5, 19, 5, 19, 5, 19, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 5, 8, 8, 28, 8, 8, 28, 8, 8, 28, 8, 8, 28, 24, 8, 28, 24, 8, 28, 24, 8, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 8, 28, 24, 14, 24, 24, 29, 36, 8, 14, 29, 29, 54, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 76, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 22, 41, 54, 117, 6, 16, 6, 16, 6, 16, 6, 16, 6, 16, 6, 16, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 71, 132, 21, 132, 19, 131, 370, 132, 21, 370, 132, 21, 370, 132, 21, 370, 132, 19, 131, 370, 41, 13, 13, 4, 70, 4, 140, 82, 4, 70, 69, 36, 4, 140, 82, 4, 70, 69, 36, 69, 185, 267, 153, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 158, 30, 70, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 194, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 47, 152, 25, 124, 32, 28, 8, 86, 32, 28, 8, 86, 13, 32, 28, 8, 21, 24, 14, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 63, 4, 28, 4, 27, 21, 13, 4, 27, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 29, 29, 28, 29, 14, 13, 29, 29, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 36, 53, 36, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 383, 26, 4, 27, 89, 89, 89, 89, 89, 89, 89, 89, 89, 89, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 403, 50, 73, 59, 59, 19, 75, 19, 286, 25, 59, 28, 59, 28, 59, 28, 36, 25, 105, 25, 105, 25, 105, 25, 105, 25, 105, 25, 6, 74, 153, 54, 17, 6, 39, 39, 10, 7, 5, 13, 22, 22, 54, 16, 6, 4, 499, 30, 28, 43, 43, 90, 188, 22, 14, 36, 22, 22, 70, 22, 22, 70, 22, 22, 70, 22, 22, 86, 22, 86, 20, 6, 24, 4, 24, 4, 24, 4, 24, 4, 24, 4, 24, 30, 24, 155, 8, 30, 24, 63, 8, 173, 214, 101, 214, 101, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 215, 255, 26, 35, 6, 35, 32, 28, 42, 26, 15, 87, 81, 81, 76, 87, 76, 87, 76, 87, 76, 87, 76, 87, 76, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 43, 43, 90, 188, 22, 14, 70, 43, 36, 30, 22, 14, 70, 43, 8, 54, 38, 56, 18, 56, 18, 56, 18, 56, 18, 56, 18, 56, 10, 35, 5, 19, 5, 93, 170, 24, 24, 19, 24, 24, 19, 24, 24, 19, 24, 24, 19, 24, 24, 63, 19, 24, 14, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 29, 29, 14, 43, 8, 38, 53, 30, 82, 38, 38, 38, 38, 38, 38, 38, 38, 38, 35, 6, 165, 32, 30, 24, 67, 32, 153, 32, 226, 331, 33, 59, 19, 75, 75, 19, 5, 75, 75, 19, 5, 75, 75, 19, 104, 19, 75, 75, 19, 104, 19, 75, 75, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 141, 19, 104, 19, 75, 19, 104, 19, 75, 19, 104, 19, 104, 19, 104, 19, 104, 19, 104, 19, 104, 19, 22, 30, 97, 15, 51, 95, 15, 51, 95, 15, 51, 95, 15, 51, 95, 10, 33, 51, 33, 51, 33, 51, 33, 51, 44, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 6, 4, 5, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 122, 15, 122, 32, 135, 225, 225, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 6, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 7, 27, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 56, 15, 72, 35, 5, 8, 5, 93, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 24, 9, 7, 24, 9, 7, 24, 9, 7, 24, 125, 10, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 30, 29, 28, 28, 29, 14, 147, 147, 147, 147, 31, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 20, 279, 26, 16, 269, 30, 24, 29, 36, 108, 79, 243, 243, 391, 274, 156, 180, 4, 257, 4, 257, 4, 257, 4, 257, 4, 257, 24, 28, 24, 4, 24, 4, 24, 4, 24, 4, 24, 29, 36, 5, 29, 29, 5, 19, 97, 5, 5, 19, 97, 5, 5, 19, 97, 29, 19, 97, 29, 19, 97, 29, 19, 97, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 19, 19, 29, 71, 19, 29, 71, 19, 29, 71, 19, 29, 8, 54, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 7, 5, 31, 12, 31]
|
| 202 |
+
2026-01-26 04:50:32,722 INFO [inference.py:318] Audio shape: torch.Size([6, 237280]), dtype: torch.float32
|
| 203 |
+
2026-01-26 04:50:32,723 INFO [inference.py:319] Audio range: [-0.130, 0.131]
|
| 204 |
+
2026-01-26 04:50:32,723 INFO [inference.py:320] Audio lengths: tensor([237280, 228159, 220639, 220480, 219359, 213119], dtype=torch.int32)
|
| 205 |
+
2026-01-26 04:50:40,430 INFO [inference.py:341] Encoder out shape: torch.Size([6, 741, 1024])
|
| 206 |
+
2026-01-26 04:50:40,430 INFO [inference.py:342] Encoder out lens: tensor([741, 712, 689, 688, 685, 665])
|
| 207 |
+
2026-01-26 04:50:40,435 INFO [inference.py:343] Encoder out range: [-13.120, 12.506]
|
| 208 |
+
2026-01-26 04:50:52,401 INFO [inference.py:353] Number of hypotheses: 6
|
| 209 |
+
2026-01-26 04:50:52,402 INFO [inference.py:355] First hypothesis: [105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 105, 4, 257, 24, 92, 60, 92, 7, 8, 326, 116, 4, 257, 24, 92, 60, 4, 257, 24, 92, 60, 4, 257, 24, 92, 60, 28, 37, 4, 7, 295, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 171, 171, 171, 171, 171, 171, 171, 171, 171, 171, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 31, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 6, 91, 18, 7, 27, 154, 259, 16, 16, 16, 16, 16, 16, 16, 16, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 204, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 245, 14, 43, 8, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 349, 112, 283, 283, 283, 283, 283, 283, 283, 283, 283, 18, 7, 27, 13, 43, 43, 90, 58, 21, 58, 21, 58, 21, 58, 21, 58, 21, 43, 43, 90, 22, 41, 275, 32, 41, 275, 32, 41, 275, 32, 41, 275, 32, 41, 327, 4, 27, 5, 30, 27, 5, 19, 5, 162, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 18, 7, 85, 18, 18, 204, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 43, 8, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 113, 5, 14, 5, 5, 26, 4, 5, 26, 4, 5, 26, 4, 5, 26, 248, 130, 128, 26, 122, 32, 248, 130, 128, 26, 130, 4, 223, 75, 59, 28, 29, 59, 28, 29, 59, 28, 29, 59, 28, 36, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 329, 159, 33, 16, 136, 19, 22, 54, 16, 136, 19, 22, 54, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 323, 16, 323, 16, 323, 16, 323, 16, 323, 16, 6, 157, 33, 13, 196, 5, 94, 271, 16, 6, 4, 5, 29, 29, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 4, 5, 4, 2, 11, 17, 7, 5, 13, 4, 5, 4, 2, 64, 9, 7, 85, 34, 13, 239, 25, 105, 25, 6, 91, 109, 5, 61, 335, 15, 7, 27, 105, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 2, 31, 23, 17, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 52, 10, 52, 10, 52, 10, 52, 10, 52, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 49, 193, 81, 49, 49, 101, 220, 50, 31, 50, 6, 98, 21, 22, 332, 243, 243, 5, 25, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 98, 30, 82, 82, 40, 98, 30, 82, 40, 30, 82, 40, 30, 82, 40, 30, 82, 30, 36, 63, 113, 366, 113, 64, 113, 366, 113, 64, 113, 10, 113, 13, 113, 13, 113, 13, 113, 79, 14, 5, 5, 183, 5, 25, 6, 4, 5, 21, 97, 49, 193, 49, 193, 49, 193, 49, 193, 49, 193, 49, 193, 49, 10, 7, 5, 10, 7, 5, 154, 38, 35, 6, 35, 22, 14, 5, 35, 6, 35, 22, 19, 28, 168, 94, 14, 4, 2, 23, 98, 30, 42, 233, 98, 30, 42, 233, 98, 30, 42, 215, 165, 32, 22, 233, 98, 36, 67, 98, 36, 67, 98, 36, 67, 98, 36, 67, 98, 36, 67, 14, 9, 102, 9, 7, 24, 154, 34, 124, 202, 20, 101, 101, 31, 9, 7, 85, 34, 13, 169, 25, 147, 147, 147, 147, 147, 147, 147, 147, 147, 147, 73, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 98, 30, 82, 82, 70, 101, 150, 101, 17, 101, 17, 101, 17, 101, 17, 101, 17, 101, 17, 101, 6, 150, 17, 52, 15, 234, 16, 6, 106, 141, 19, 5, 14, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 13, 211, 25, 75, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 329, 185, 57, 18, 7, 85, 184, 105, 6, 329, 123, 38, 13, 227, 211, 25, 75, 90, 8, 325, 25, 6, 222, 298, 157, 41, 5, 92, 55, 490, 86, 97, 369, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 27, 60, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 27, 5, 30, 24, 29, 70, 173, 5, 15, 47, 56, 15, 49, 18, 114, 38, 35, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 296, 380, 5, 11]
|
| 210 |
+
2026-01-26 04:50:52,416 INFO [inference.py:318] Audio shape: torch.Size([17, 91040]), dtype: torch.float32
|
| 211 |
+
2026-01-26 04:50:52,417 INFO [inference.py:319] Audio range: [-0.574, 0.629]
|
| 212 |
+
2026-01-26 04:50:52,418 INFO [inference.py:320] Audio lengths: tensor([91040, 90240, 89119, 88480, 87520, 86079, 83680, 82880, 81120, 79520,
|
| 213 |
+
79520, 78079, 76800, 76480, 73760, 73600, 73599], dtype=torch.int32)
|
| 214 |
+
2026-01-26 04:50:59,721 INFO [inference.py:341] Encoder out shape: torch.Size([17, 284, 1024])
|
| 215 |
+
2026-01-26 04:50:59,722 INFO [inference.py:342] Encoder out lens: tensor([284, 281, 278, 276, 273, 268, 261, 258, 253, 248, 248, 243, 239, 238,
|
| 216 |
+
230, 229, 229])
|
| 217 |
+
2026-01-26 04:50:59,722 INFO [inference.py:343] Encoder out range: [-13.703, 11.821]
|
| 218 |
+
2026-01-26 04:51:09,014 INFO [inference.py:353] Number of hypotheses: 17
|
| 219 |
+
2026-01-26 04:51:09,014 INFO [inference.py:355] First hypothesis: [50, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 66, 16, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 131, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 214, 98, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 24, 67, 205, 205, 5, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 16, 259, 259, 16, 259, 259, 16, 259, 259, 16, 259, 259, 259, 259, 259, 259, 259, 259, 259, 259, 259, 101, 15, 72, 15, 72, 15, 72, 15, 72, 15, 72, 101, 6, 4, 5, 8, 136, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 265, 147, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 205, 5, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 15, 34, 13, 4, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 14, 5, 5, 19, 5, 8, 41, 5, 92, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 24, 19, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 22, 14, 5, 5, 19, 22, 14, 5, 5, 19, 22, 14, 5, 5, 19, 24, 94, 5, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 9, 113, 79, 14, 5, 5, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 5, 71, 19, 29, 71, 19, 29, 71, 19, 29, 71, 21, 29, 5, 8, 40, 19, 29, 71, 21, 29, 5, 4, 2, 9, 49, 23, 9, 49, 9, 7, 85, 34, 13, 74, 19, 5, 14, 36, 75, 19, 80]
|
| 220 |
+
2026-01-26 04:51:09,027 INFO [inference.py:318] Audio shape: torch.Size([23, 68960]), dtype: torch.float32
|
| 221 |
+
2026-01-26 04:51:09,030 INFO [inference.py:319] Audio range: [-0.269, 0.266]
|
| 222 |
+
2026-01-26 04:51:09,031 INFO [inference.py:320] Audio lengths: tensor([68959, 66880, 64800, 64479, 61920, 59680, 54400, 53440, 52479, 52319,
|
| 223 |
+
51840, 46880, 46559, 45120, 44480, 43360, 43360, 43360, 43040, 43040,
|
| 224 |
+
43040, 42880, 42560], dtype=torch.int32)
|
| 225 |
+
2026-01-26 04:51:16,407 INFO [inference.py:341] Encoder out shape: torch.Size([23, 215, 1024])
|
| 226 |
+
2026-01-26 04:51:16,408 INFO [inference.py:342] Encoder out lens: tensor([215, 208, 202, 201, 193, 186, 169, 166, 163, 163, 161, 146, 145, 140,
|
| 227 |
+
138, 135, 135, 135, 134, 134, 134, 133, 132])
|
| 228 |
+
2026-01-26 04:51:16,408 INFO [inference.py:343] Encoder out range: [-13.477, 12.445]
|
| 229 |
+
2026-01-26 04:51:24,735 INFO [inference.py:353] Number of hypotheses: 23
|
| 230 |
+
2026-01-26 04:51:24,735 INFO [inference.py:355] First hypothesis: [4, 7, 295, 4, 2, 11, 4, 2, 9, 7, 85, 151, 4, 28, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 4, 2, 9, 7, 24, 51, 34, 13, 4, 5, 21, 69, 5, 8, 41, 5, 92, 55, 490, 86, 97, 4, 2, 31, 20, 4, 133, 133, 22, 5, 51, 4, 2, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11]
|
| 231 |
+
2026-01-26 04:51:24,800 INFO [inference.py:544] Processed 391 utterances in 20 batches
|
| 232 |
+
2026-01-26 04:51:24,806 INFO [inference.py:318] Audio shape: torch.Size([17, 92320]), dtype: torch.float32
|
| 233 |
+
2026-01-26 04:51:24,807 INFO [inference.py:319] Audio range: [-0.234, 0.300]
|
| 234 |
+
2026-01-26 04:51:24,808 INFO [inference.py:320] Audio lengths: tensor([92320, 91200, 91200, 90560, 89120, 84000, 83840, 83360, 82880, 82079,
|
| 235 |
+
79840, 79520, 76800, 73760, 73280, 70079, 69600], dtype=torch.int32)
|
| 236 |
+
2026-01-26 04:51:32,739 INFO [inference.py:341] Encoder out shape: torch.Size([17, 288, 1024])
|
| 237 |
+
2026-01-26 04:51:32,741 INFO [inference.py:342] Encoder out lens: tensor([288, 284, 284, 282, 278, 262, 261, 260, 258, 256, 249, 248, 239, 230,
|
| 238 |
+
228, 218, 217])
|
| 239 |
+
2026-01-26 04:51:32,741 INFO [inference.py:343] Encoder out range: [-13.483, 12.297]
|
| 240 |
+
2026-01-26 04:51:45,129 INFO [inference.py:353] Number of hypotheses: 17
|
| 241 |
+
2026-01-26 04:51:45,129 INFO [inference.py:355] First hypothesis: [39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 52, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 191, 56, 56, 196, 104, 19, 22, 30, 70, 22, 435, 55, 185, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 36, 58, 36, 58, 36, 58, 36, 58, 36, 58, 36, 19, 36, 58, 36, 19, 36, 58, 36, 19, 36, 121, 54, 121, 5, 121, 54, 121, 5, 121, 54, 121, 5, 15, 7, 85, 274, 50, 180, 43, 92, 7, 8, 274, 50, 180, 43, 92, 50, 8, 28, 24, 5, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 83, 14, 84, 84, 84, 84, 84, 84, 84, 84, 84, 83, 14, 83, 84, 84, 83, 14, 83, 84, 84, 83, 7, 8, 83, 14, 84, 83, 7, 8, 83, 14, 83, 14, 83, 14, 83, 14, 83, 14, 83, 7, 8, 217, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 56, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 43, 30, 5, 14, 190, 189, 43, 30, 5, 14, 190, 4, 190, 189, 97, 5, 5, 29, 14, 245, 14, 43, 8, 245, 21, 43, 245, 14, 43, 8, 349, 245, 21, 43, 8, 349, 245, 21, 43, 8, 119, 21, 29, 14, 43, 19, 201, 14, 190, 8, 119, 8, 54, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 34, 150, 20, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84]
|
| 242 |
+
2026-01-26 04:51:45,142 INFO [inference.py:318] Audio shape: torch.Size([23, 68800]), dtype: torch.float32
|
| 243 |
+
2026-01-26 04:51:45,143 INFO [inference.py:319] Audio range: [-0.321, 0.370]
|
| 244 |
+
2026-01-26 04:51:45,144 INFO [inference.py:320] Audio lengths: tensor([68799, 66720, 62560, 62240, 61919, 60160, 59840, 58080, 57920, 57280,
|
| 245 |
+
53920, 52960, 51040, 50080, 49920, 49280, 48160, 48160, 47680, 47200,
|
| 246 |
+
44800, 44000, 42560], dtype=torch.int32)
|
| 247 |
+
2026-01-26 04:51:52,920 INFO [inference.py:341] Encoder out shape: torch.Size([23, 214, 1024])
|
| 248 |
+
2026-01-26 04:51:52,921 INFO [inference.py:342] Encoder out lens: tensor([214, 208, 195, 194, 193, 187, 186, 181, 180, 178, 168, 165, 159, 156,
|
| 249 |
+
155, 153, 150, 150, 148, 147, 139, 137, 132])
|
| 250 |
+
2026-01-26 04:51:52,921 INFO [inference.py:343] Encoder out range: [-11.273, 12.003]
|
| 251 |
+
2026-01-26 04:52:00,812 INFO [inference.py:353] Number of hypotheses: 23
|
| 252 |
+
2026-01-26 04:52:00,813 INFO [inference.py:355] First hypothesis: [218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 218, 4, 2, 11, 17, 7, 5, 13, 4, 5, 21, 69, 5, 8, 41, 5, 92, 55, 490, 86, 97, 5, 5, 19, 5, 8, 40, 19, 29, 8, 5, 17, 7, 5, 13, 211, 120, 412, 20, 265]
|
| 253 |
+
2026-01-26 04:52:00,823 INFO [inference.py:318] Audio shape: torch.Size([38, 42080]), dtype: torch.float32
|
| 254 |
+
2026-01-26 04:52:00,824 INFO [inference.py:319] Audio range: [-0.400, 0.452]
|
| 255 |
+
2026-01-26 04:52:00,825 INFO [inference.py:320] Audio lengths: tensor([42080, 39200, 37439, 36960, 35520, 34560, 34079, 33599, 33600, 33280,
|
| 256 |
+
31520, 31200, 29760, 28160, 28000, 27200, 26720, 25600, 25120, 23200,
|
| 257 |
+
22880, 21280, 20800, 20000, 19680, 19520, 19200, 18080, 17600, 17600,
|
| 258 |
+
16320, 13120, 12320, 11680, 8000, 6400, 5120, 3840],
|
| 259 |
+
dtype=torch.int32)
|
| 260 |
+
2026-01-26 04:52:07,931 INFO [inference.py:341] Encoder out shape: torch.Size([38, 131, 1024])
|
| 261 |
+
2026-01-26 04:52:07,932 INFO [inference.py:342] Encoder out lens: tensor([131, 122, 116, 115, 110, 107, 106, 104, 104, 103, 98, 97, 92, 87,
|
| 262 |
+
87, 84, 83, 79, 78, 72, 71, 66, 64, 62, 61, 60, 59, 56,
|
| 263 |
+
54, 54, 50, 40, 38, 36, 24, 19, 15, 11])
|
| 264 |
+
2026-01-26 04:52:07,932 INFO [inference.py:343] Encoder out range: [-11.872, 11.798]
|
egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-05-16-checkpoint
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:05:16,888 INFO [inference.py:410] ================================================================================
|
| 2 |
+
2026-01-26 05:05:16,888 INFO [inference.py:411] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:05:16,888 INFO [inference.py:412] ================================================================================
|
| 4 |
+
2026-01-26 05:05:16,888 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:05:16,888 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:05:16,888 INFO [inference.py:415] Test set: ihm
|
| 7 |
+
2026-01-26 05:05:16,888 INFO [inference.py:416] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:05:16,888 INFO [inference.py:422] Device: cpu
|
| 9 |
+
2026-01-26 05:05:16,888 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 05:05:16,890 INFO [inference.py:433] Vocabulary size: 500
|
| 11 |
+
2026-01-26 05:05:16,890 INFO [inference.py:434] Blank ID: 0
|
| 12 |
+
2026-01-26 05:05:16,890 INFO [inference.py:437] Creating model
|
| 13 |
+
2026-01-26 05:05:18,544 INFO [inference.py:444] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 14 |
+
2026-01-26 05:05:18,544 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 15 |
+
2026-01-26 05:05:23,319 INFO [inference.py:473] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 05:05:23,320 INFO [inference.py:476] Loading test data
|
| 17 |
+
2026-01-26 05:05:23,320 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 05:05:24,403 INFO [inference.py:487] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 05:05:24,403 INFO [inference.py:490] Starting inference...
|
| 20 |
+
2026-01-26 05:05:25,573 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 05:05:25,576 INFO [inference.py:310] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 05:05:25,579 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 05:05:34,838 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 05:05:34,839 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 05:05:34,839 INFO [inference.py:334] Encoder out range: [-13.684, 12.764]
|
| 26 |
+
2026-01-26 05:05:35,536 INFO [inference.py:344] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 05:05:35,537 INFO [inference.py:346] First hypothesis: [171]
|
| 28 |
+
2026-01-26 05:05:35,546 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 05:05:35,547 INFO [inference.py:310] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 05:05:35,547 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
| 33 |
+
2026-01-26 05:05:43,001 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
|
| 34 |
+
2026-01-26 05:05:43,003 INFO [inference.py:333] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
|
| 35 |
+
162, 157, 157, 156, 151, 149, 148, 139, 137])
|
| 36 |
+
2026-01-26 05:05:43,003 INFO [inference.py:334] Encoder out range: [-12.514, 12.004]
|
| 37 |
+
2026-01-26 05:05:43,905 INFO [inference.py:344] Number of hypotheses: 23
|
| 38 |
+
2026-01-26 05:05:43,905 INFO [inference.py:346] First hypothesis: [23, 51, 156, 6, 205, 18, 116, 113, 363]
|
| 39 |
+
2026-01-26 05:05:43,925 INFO [inference.py:309] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
|
| 40 |
+
2026-01-26 05:05:43,926 INFO [inference.py:310] Audio range: [-0.439, 0.480]
|
| 41 |
+
2026-01-26 05:05:43,926 INFO [inference.py:311] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
|
| 42 |
+
28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
|
| 43 |
+
20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
|
| 44 |
+
12320, 11680, 11520, 10880, 9440, 9120, 7840, 5920, 5760],
|
| 45 |
+
dtype=torch.int32)
|
| 46 |
+
2026-01-26 05:05:51,027 INFO [inference.py:332] Encoder out shape: torch.Size([39, 126, 1024])
|
| 47 |
+
2026-01-26 05:05:51,028 INFO [inference.py:333] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100, 94, 89, 88, 87, 73,
|
| 48 |
+
71, 71, 69, 68, 68, 65, 62, 62, 59, 59, 58, 56, 51, 45,
|
| 49 |
+
42, 40, 38, 36, 35, 33, 29, 28, 24, 18, 17])
|
| 50 |
+
2026-01-26 05:05:51,028 INFO [inference.py:334] Encoder out range: [-11.444, 10.811]
|
| 51 |
+
2026-01-26 05:05:51,620 INFO [inference.py:344] Number of hypotheses: 39
|
| 52 |
+
2026-01-26 05:05:51,620 INFO [inference.py:346] First hypothesis: [11]
|
| 53 |
+
2026-01-26 05:05:51,628 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
|
| 54 |
+
2026-01-26 05:05:51,629 INFO [inference.py:310] Audio range: [-0.314, 0.332]
|
| 55 |
+
2026-01-26 05:05:51,629 INFO [inference.py:311] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
|
| 56 |
+
56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
|
| 57 |
+
46079, 45280, 44960], dtype=torch.int32)
|
| 58 |
+
2026-01-26 05:05:59,021 INFO [inference.py:332] Encoder out shape: torch.Size([23, 212, 1024])
|
| 59 |
+
2026-01-26 05:05:59,022 INFO [inference.py:333] Encoder out lens: tensor([212, 205, 204, 202, 200, 198, 194, 192, 184, 181, 176, 174, 173, 172,
|
| 60 |
+
170, 166, 164, 164, 147, 144, 143, 141, 140])
|
| 61 |
+
2026-01-26 05:05:59,023 INFO [inference.py:334] Encoder out range: [-13.261, 11.090]
|
| 62 |
+
2026-01-26 05:05:59,931 INFO [inference.py:344] Number of hypotheses: 23
|
| 63 |
+
2026-01-26 05:05:59,932 INFO [inference.py:346] First hypothesis: [20]
|
| 64 |
+
2026-01-26 05:06:00,567 INFO [inference.py:309] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
|
| 65 |
+
2026-01-26 05:06:00,568 INFO [inference.py:310] Audio range: [-0.323, 0.414]
|
| 66 |
+
2026-01-26 05:06:00,569 INFO [inference.py:311] Audio lengths: tensor([317280, 298079, 298080, 294559, 292480], dtype=torch.int32)
|
| 67 |
+
2026-01-26 05:06:09,302 INFO [inference.py:332] Encoder out shape: torch.Size([5, 991, 1024])
|
| 68 |
+
2026-01-26 05:06:09,303 INFO [inference.py:333] Encoder out lens: tensor([991, 931, 931, 920, 913])
|
| 69 |
+
2026-01-26 05:06:09,304 INFO [inference.py:334] Encoder out range: [-14.241, 14.344]
|
| 70 |
+
2026-01-26 05:06:10,112 INFO [inference.py:344] Number of hypotheses: 5
|
| 71 |
+
2026-01-26 05:06:10,113 INFO [inference.py:346] First hypothesis: [37, 23, 127, 274, 5, 147, 80, 73, 6, 16, 29, 119, 5, 20, 84, 171, 15, 6, 328, 5, 127, 20, 56]
|
| 72 |
+
2026-01-26 05:06:10,117 INFO [inference.py:309] Audio shape: torch.Size([40, 39360]), dtype: torch.float32
|
| 73 |
+
2026-01-26 05:06:10,118 INFO [inference.py:310] Audio range: [-0.274, 0.362]
|
| 74 |
+
2026-01-26 05:06:10,118 INFO [inference.py:311] Audio lengths: tensor([39359, 39199, 39039, 38080, 36000, 35200, 34880, 34880, 33760, 33760,
|
| 75 |
+
33600, 33120, 29440, 29280, 27360, 24960, 24960, 23680, 21760, 21600,
|
| 76 |
+
20800, 16800, 16320, 16160, 16000, 15679, 15040, 13440, 12320, 7040,
|
| 77 |
+
6560, 6400, 5760, 5760, 5120, 4800, 4800, 4640, 4480, 3360],
|
| 78 |
+
dtype=torch.int32)
|
| 79 |
+
2026-01-26 05:06:16,910 INFO [inference.py:332] Encoder out shape: torch.Size([40, 122, 1024])
|
| 80 |
+
2026-01-26 05:06:16,911 INFO [inference.py:333] Encoder out lens: tensor([122, 122, 121, 118, 112, 109, 108, 108, 105, 105, 104, 103, 91, 91,
|
| 81 |
+
85, 77, 77, 73, 67, 67, 64, 52, 50, 50, 49, 48, 46, 41,
|
| 82 |
+
38, 21, 20, 19, 17, 17, 15, 14, 14, 14, 13, 10])
|
| 83 |
+
2026-01-26 05:06:16,911 INFO [inference.py:334] Encoder out range: [-11.784, 11.570]
|
| 84 |
+
2026-01-26 05:06:17,504 INFO [inference.py:344] Number of hypotheses: 40
|
| 85 |
+
2026-01-26 05:06:17,504 INFO [inference.py:346] First hypothesis: []
|
| 86 |
+
2026-01-26 05:06:17,513 INFO [inference.py:309] Audio shape: torch.Size([23, 66880]), dtype: torch.float32
|
| 87 |
+
2026-01-26 05:06:17,514 INFO [inference.py:310] Audio range: [-0.514, 0.393]
|
| 88 |
+
2026-01-26 05:06:17,514 INFO [inference.py:311] Audio lengths: tensor([66880, 65439, 60799, 60320, 59520, 58240, 57280, 56320, 55520, 54080,
|
| 89 |
+
51840, 51520, 50720, 49920, 49600, 48319, 48320, 47999, 46880, 46079,
|
| 90 |
+
44640, 44320, 44160], dtype=torch.int32)
|
| 91 |
+
2026-01-26 05:06:24,334 INFO [inference.py:332] Encoder out shape: torch.Size([23, 208, 1024])
|
| 92 |
+
2026-01-26 05:06:24,335 INFO [inference.py:333] Encoder out lens: tensor([208, 204, 189, 188, 185, 181, 178, 175, 173, 168, 161, 160, 158, 155,
|
| 93 |
+
154, 150, 150, 149, 146, 143, 139, 138, 137])
|
| 94 |
+
2026-01-26 05:06:24,335 INFO [inference.py:334] Encoder out range: [-12.152, 11.060]
|
| 95 |
+
2026-01-26 05:06:25,044 INFO [inference.py:344] Number of hypotheses: 23
|
| 96 |
+
2026-01-26 05:06:25,045 INFO [inference.py:346] First hypothesis: [51, 6, 4, 27, 5, 26, 70, 22, 20, 265, 33, 46]
|
| 97 |
+
2026-01-26 05:06:25,108 INFO [inference.py:309] Audio shape: torch.Size([24, 65600]), dtype: torch.float32
|
| 98 |
+
2026-01-26 05:06:25,109 INFO [inference.py:310] Audio range: [-0.416, 0.458]
|
| 99 |
+
2026-01-26 05:06:25,109 INFO [inference.py:311] Audio lengths: tensor([65600, 64000, 63680, 61280, 60000, 58080, 55200, 52960, 51359, 51200,
|
| 100 |
+
50720, 50720, 50080, 49280, 48639, 47840, 47360, 46880, 46400, 46240,
|
| 101 |
+
45920, 44640, 43040, 42720], dtype=torch.int32)
|
| 102 |
+
2026-01-26 05:06:32,219 INFO [inference.py:332] Encoder out shape: torch.Size([24, 204, 1024])
|
| 103 |
+
2026-01-26 05:06:32,220 INFO [inference.py:333] Encoder out lens: tensor([204, 199, 198, 191, 187, 181, 172, 165, 160, 159, 158, 158, 156, 153,
|
| 104 |
+
151, 149, 147, 146, 144, 144, 143, 139, 134, 133])
|
| 105 |
+
2026-01-26 05:06:32,220 INFO [inference.py:334] Encoder out range: [-12.007, 11.624]
|
| 106 |
+
2026-01-26 05:06:32,900 INFO [inference.py:344] Number of hypotheses: 24
|
| 107 |
+
2026-01-26 05:06:32,900 INFO [inference.py:346] First hypothesis: [11]
|
| 108 |
+
2026-01-26 05:06:32,908 INFO [inference.py:309] Audio shape: torch.Size([9, 176960]), dtype: torch.float32
|
| 109 |
+
2026-01-26 05:06:32,920 INFO [inference.py:310] Audio range: [-0.135, 0.191]
|
| 110 |
+
2026-01-26 05:06:32,921 INFO [inference.py:311] Audio lengths: tensor([176960, 170720, 164480, 155840, 154559, 151839, 151840, 151360, 147040],
|
| 111 |
+
dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-20-04-checkpoint
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:20:04,436 INFO [inference.py:613] ================================================================================
|
| 2 |
+
2026-01-26 05:20:04,436 INFO [inference.py:614] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:20:04,436 INFO [inference.py:615] ================================================================================
|
| 4 |
+
2026-01-26 05:20:04,436 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:20:04,436 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:20:04,436 INFO [inference.py:618] Test set: ihm
|
| 7 |
+
2026-01-26 05:20:04,436 INFO [inference.py:619] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:20:04,436 INFO [inference.py:621] Beam size: 4
|
| 9 |
+
2026-01-26 05:20:04,436 INFO [inference.py:622] Max states: 64
|
| 10 |
+
2026-01-26 05:20:04,436 INFO [inference.py:623] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:20:04,437 INFO [inference.py:629] Device: cpu
|
| 12 |
+
2026-01-26 05:20:04,437 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:20:04,438 INFO [inference.py:640] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:20:04,438 INFO [inference.py:641] Blank ID: 0
|
| 15 |
+
2026-01-26 05:20:04,438 INFO [inference.py:644] Creating model
|
| 16 |
+
2026-01-26 05:20:05,956 INFO [inference.py:651] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 17 |
+
2026-01-26 05:20:05,957 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 18 |
+
2026-01-26 05:20:10,638 INFO [inference.py:680] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:20:10,639 INFO [inference.py:683] Loading test data
|
| 20 |
+
2026-01-26 05:20:10,639 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:20:11,677 INFO [inference.py:694] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:20:11,677 INFO [inference.py:697] Starting inference...
|
egs/ami/ASR/xlsr_transducer/inference_results/.ipynb_checkpoints/log-inference-ihm-2026-01-26-05-29-29-checkpoint
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:29:29,151 INFO [inference.py:613] ================================================================================
|
| 2 |
+
2026-01-26 05:29:29,151 INFO [inference.py:614] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:29:29,151 INFO [inference.py:615] ================================================================================
|
| 4 |
+
2026-01-26 05:29:29,151 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:29:29,151 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:29:29,151 INFO [inference.py:618] Test set: ihm
|
| 7 |
+
2026-01-26 05:29:29,151 INFO [inference.py:619] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:29:29,151 INFO [inference.py:621] Beam size: 4
|
| 9 |
+
2026-01-26 05:29:29,151 INFO [inference.py:622] Max states: 64
|
| 10 |
+
2026-01-26 05:29:29,151 INFO [inference.py:623] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:29:29,151 INFO [inference.py:629] Device: cpu
|
| 12 |
+
2026-01-26 05:29:29,151 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:29:29,153 INFO [inference.py:640] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:29:29,153 INFO [inference.py:641] Blank ID: 0
|
| 15 |
+
2026-01-26 05:29:29,153 INFO [inference.py:644] Creating model
|
| 16 |
+
2026-01-26 05:29:30,733 INFO [inference.py:673] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 17 |
+
2026-01-26 05:29:30,734 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 18 |
+
2026-01-26 05:29:35,902 INFO [inference.py:680] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:29:35,902 INFO [inference.py:683] Loading test data
|
| 20 |
+
2026-01-26 05:29:35,902 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:29:37,022 INFO [inference.py:694] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:29:37,023 INFO [inference.py:697] Starting inference...
|
egs/ami/ASR/xlsr_transducer/inference_results/hyp-ihm.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-25-15-47-40
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-25 15:47:40,637 INFO [inference.py:419] ================================================================================
|
| 2 |
+
2026-01-25 15:47:40,637 INFO [inference.py:420] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-25 15:47:40,637 INFO [inference.py:421] ================================================================================
|
| 4 |
+
2026-01-25 15:47:40,637 INFO [inference.py:422] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-25 15:47:40,637 INFO [inference.py:423] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-25 15:47:40,637 INFO [inference.py:424] Test set: ihm
|
| 7 |
+
2026-01-25 15:47:40,637 INFO [inference.py:425] Decoding method: greedy_search
|
| 8 |
+
2026-01-25 15:47:40,637 INFO [inference.py:431] Device: cpu
|
| 9 |
+
2026-01-25 15:47:40,637 INFO [inference.py:434] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-25 15:47:40,639 INFO [inference.py:442] Vocabulary size: 500
|
| 11 |
+
2026-01-25 15:47:40,639 INFO [inference.py:443] Blank ID: 0
|
| 12 |
+
2026-01-25 15:47:40,639 INFO [inference.py:446] Creating model
|
| 13 |
+
2026-01-25 15:47:41,928 INFO [inference.py:453] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 14 |
+
2026-01-25 15:47:41,929 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 15 |
+
2026-01-25 15:47:46,671 INFO [inference.py:482] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-25 15:47:46,671 INFO [inference.py:485] Loading test data
|
| 17 |
+
2026-01-25 15:47:46,671 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-25 15:47:47,717 INFO [inference.py:496] Number of test utterances: 6676
|
| 19 |
+
2026-01-25 15:47:47,717 INFO [inference.py:499] Starting inference...
|
| 20 |
+
2026-01-25 15:47:48,838 INFO [inference.py:318] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-25 15:47:48,842 INFO [inference.py:319] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-25 15:47:48,845 INFO [inference.py:320] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-25 15:47:58,037 INFO [inference.py:341] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-25 15:47:58,038 INFO [inference.py:342] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-25 15:47:58,039 INFO [inference.py:343] Encoder out range: [-11.805, 12.741]
|
| 26 |
+
2026-01-25 15:48:09,204 INFO [inference.py:353] Number of hypotheses: 6
|
| 27 |
+
2026-01-25 15:48:09,205 INFO [inference.py:355] First hypothesis: [37, 9, 49, 17, 9, 49, 17, 9, 49, 17, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 10, 7, 5, 13, 59, 14, 164, 59, 21, 19, 40, 22, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 125, 13, 200, 130, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 49, 9, 7, 24, 154, 125, 13, 160, 202, 281, 116, 126, 281, 5, 8, 119, 55, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 59, 19, 75, 8, 14, 80, 4, 2, 11, 4, 2, 37, 4, 2, 37, 4, 2, 37, 4, 7, 197, 10, 7, 5, 13, 160, 157, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 2, 11]
|
| 28 |
+
2026-01-25 15:48:09,222 INFO [inference.py:318] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-25 15:48:09,223 INFO [inference.py:319] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-25 15:48:09,224 INFO [inference.py:320] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-43-42
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 04:43:42,361 INFO [inference.py:419] ================================================================================
|
| 2 |
+
2026-01-26 04:43:42,361 INFO [inference.py:420] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 04:43:42,361 INFO [inference.py:421] ================================================================================
|
| 4 |
+
2026-01-26 04:43:42,361 INFO [inference.py:422] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 04:43:42,361 INFO [inference.py:423] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 04:43:42,361 INFO [inference.py:424] Test set: ihm
|
| 7 |
+
2026-01-26 04:43:42,361 INFO [inference.py:425] Decoding method: greedy_search
|
| 8 |
+
2026-01-26 04:43:42,361 INFO [inference.py:431] Device: cpu
|
| 9 |
+
2026-01-26 04:43:42,361 INFO [inference.py:434] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 04:43:42,363 INFO [inference.py:442] Vocabulary size: 500
|
| 11 |
+
2026-01-26 04:43:42,363 INFO [inference.py:443] Blank ID: 0
|
| 12 |
+
2026-01-26 04:43:42,363 INFO [inference.py:446] Creating model
|
| 13 |
+
2026-01-26 04:43:43,908 INFO [inference.py:453] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 14 |
+
2026-01-26 04:43:43,908 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 15 |
+
2026-01-26 04:43:48,495 INFO [inference.py:482] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 04:43:48,495 INFO [inference.py:485] Loading test data
|
| 17 |
+
2026-01-26 04:43:48,495 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 04:43:49,572 INFO [inference.py:496] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 04:43:49,572 INFO [inference.py:499] Starting inference...
|
| 20 |
+
2026-01-26 04:43:50,628 INFO [inference.py:318] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 04:43:50,631 INFO [inference.py:319] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 04:43:50,633 INFO [inference.py:320] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 04:43:59,926 INFO [inference.py:341] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 04:43:59,926 INFO [inference.py:342] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 04:43:59,927 INFO [inference.py:343] Encoder out range: [-4.703, 6.664]
|
| 26 |
+
2026-01-26 04:44:00,350 INFO [inference.py:353] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 04:44:00,350 INFO [inference.py:355] First hypothesis: [11]
|
| 28 |
+
2026-01-26 04:44:00,355 INFO [inference.py:318] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 04:44:00,356 INFO [inference.py:319] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 04:44:00,357 INFO [inference.py:320] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
| 33 |
+
2026-01-26 04:44:07,432 INFO [inference.py:341] Encoder out shape: torch.Size([23, 209, 1024])
|
| 34 |
+
2026-01-26 04:44:07,434 INFO [inference.py:342] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
|
| 35 |
+
162, 157, 157, 156, 151, 149, 148, 139, 137])
|
| 36 |
+
2026-01-26 04:44:07,434 INFO [inference.py:343] Encoder out range: [-4.701, 6.665]
|
| 37 |
+
2026-01-26 04:44:08,039 INFO [inference.py:353] Number of hypotheses: 23
|
| 38 |
+
2026-01-26 04:44:08,039 INFO [inference.py:355] First hypothesis: [11]
|
| 39 |
+
2026-01-26 04:44:08,059 INFO [inference.py:318] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
|
| 40 |
+
2026-01-26 04:44:08,060 INFO [inference.py:319] Audio range: [-0.439, 0.480]
|
| 41 |
+
2026-01-26 04:44:08,061 INFO [inference.py:320] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
|
| 42 |
+
28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
|
| 43 |
+
20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
|
| 44 |
+
12320, 11680, 11520, 10880, 9440, 9120, 7840, 5920, 5760],
|
| 45 |
+
dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-44-36
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 04:44:36,166 INFO [inference.py:419] ================================================================================
|
| 2 |
+
2026-01-26 04:44:36,166 INFO [inference.py:420] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 04:44:36,166 INFO [inference.py:421] ================================================================================
|
| 4 |
+
2026-01-26 04:44:36,166 INFO [inference.py:422] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 04:44:36,166 INFO [inference.py:423] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 04:44:36,166 INFO [inference.py:424] Test set: ihm
|
| 7 |
+
2026-01-26 04:44:36,166 INFO [inference.py:425] Decoding method: greedy_search
|
| 8 |
+
2026-01-26 04:44:36,166 INFO [inference.py:431] Device: cpu
|
| 9 |
+
2026-01-26 04:44:36,166 INFO [inference.py:434] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 04:44:36,168 INFO [inference.py:442] Vocabulary size: 500
|
| 11 |
+
2026-01-26 04:44:36,168 INFO [inference.py:443] Blank ID: 0
|
| 12 |
+
2026-01-26 04:44:36,168 INFO [inference.py:446] Creating model
|
| 13 |
+
2026-01-26 04:44:37,655 INFO [inference.py:453] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/checkpoint-30000.pt
|
| 14 |
+
2026-01-26 04:44:37,655 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/checkpoint-30000.pt
|
| 15 |
+
2026-01-26 04:44:42,489 INFO [inference.py:482] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 04:44:42,489 INFO [inference.py:485] Loading test data
|
| 17 |
+
2026-01-26 04:44:42,489 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 04:44:43,623 INFO [inference.py:496] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 04:44:43,623 INFO [inference.py:499] Starting inference...
|
| 20 |
+
2026-01-26 04:44:44,773 INFO [inference.py:318] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 04:44:44,776 INFO [inference.py:319] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 04:44:44,779 INFO [inference.py:320] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 04:44:52,532 INFO [inference.py:341] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 04:44:52,532 INFO [inference.py:342] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 04:44:52,533 INFO [inference.py:343] Encoder out range: [-4.808, 7.175]
|
| 26 |
+
2026-01-26 04:44:52,980 INFO [inference.py:353] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 04:44:52,980 INFO [inference.py:355] First hypothesis: [11, 4, 2, 11]
|
| 28 |
+
2026-01-26 04:44:52,988 INFO [inference.py:318] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 04:44:52,989 INFO [inference.py:319] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 04:44:52,990 INFO [inference.py:320] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
| 33 |
+
2026-01-26 04:45:00,200 INFO [inference.py:341] Encoder out shape: torch.Size([23, 209, 1024])
|
| 34 |
+
2026-01-26 04:45:00,201 INFO [inference.py:342] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
|
| 35 |
+
162, 157, 157, 156, 151, 149, 148, 139, 137])
|
| 36 |
+
2026-01-26 04:45:00,201 INFO [inference.py:343] Encoder out range: [-4.808, 7.173]
|
| 37 |
+
2026-01-26 04:45:01,019 INFO [inference.py:353] Number of hypotheses: 23
|
| 38 |
+
2026-01-26 04:45:01,019 INFO [inference.py:355] First hypothesis: [11, 4, 2, 11]
|
| 39 |
+
2026-01-26 04:45:01,031 INFO [inference.py:318] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
|
| 40 |
+
2026-01-26 04:45:01,032 INFO [inference.py:319] Audio range: [-0.439, 0.480]
|
| 41 |
+
2026-01-26 04:45:01,033 INFO [inference.py:320] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
|
| 42 |
+
28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
|
| 43 |
+
20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
|
| 44 |
+
12320, 11680, 11520, 10880, 9440, 9120, 7840, 5920, 5760],
|
| 45 |
+
dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-45-26
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-57-24
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 04:57:24,557 INFO [inference.py:410] ================================================================================
|
| 2 |
+
2026-01-26 04:57:24,557 INFO [inference.py:411] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 04:57:24,557 INFO [inference.py:412] ================================================================================
|
| 4 |
+
2026-01-26 04:57:24,557 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 04:57:24,557 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 04:57:24,557 INFO [inference.py:415] Test set: ihm
|
| 7 |
+
2026-01-26 04:57:24,558 INFO [inference.py:416] Decoding method: greedy_search
|
| 8 |
+
2026-01-26 04:57:24,558 INFO [inference.py:422] Device: cpu
|
| 9 |
+
2026-01-26 04:57:24,558 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 04:57:24,559 INFO [inference.py:433] Vocabulary size: 500
|
| 11 |
+
2026-01-26 04:57:24,559 INFO [inference.py:434] Blank ID: 0
|
| 12 |
+
2026-01-26 04:57:24,559 INFO [inference.py:437] Creating model
|
| 13 |
+
2026-01-26 04:57:26,107 INFO [inference.py:459] Loading checkpoint: xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 14 |
+
2026-01-26 04:57:26,108 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 15 |
+
2026-01-26 04:57:30,697 INFO [inference.py:473] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 04:57:30,697 INFO [inference.py:476] Loading test data
|
| 17 |
+
2026-01-26 04:57:30,697 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 04:57:31,812 INFO [inference.py:487] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 04:57:31,812 INFO [inference.py:490] Starting inference...
|
| 20 |
+
2026-01-26 04:57:32,942 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 04:57:32,945 INFO [inference.py:310] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 04:57:32,948 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 04:57:42,125 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 04:57:42,126 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 04:57:42,129 INFO [inference.py:334] Encoder out range: [-4.703, 6.664]
|
| 26 |
+
2026-01-26 04:57:42,499 INFO [inference.py:344] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 04:57:42,500 INFO [inference.py:346] First hypothesis: []
|
| 28 |
+
2026-01-26 04:57:42,506 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 04:57:42,506 INFO [inference.py:310] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 04:57:42,507 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
| 33 |
+
2026-01-26 04:57:49,615 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
|
| 34 |
+
2026-01-26 04:57:49,616 INFO [inference.py:333] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
|
| 35 |
+
162, 157, 157, 156, 151, 149, 148, 139, 137])
|
| 36 |
+
2026-01-26 04:57:49,616 INFO [inference.py:334] Encoder out range: [-4.701, 6.665]
|
| 37 |
+
2026-01-26 04:57:50,141 INFO [inference.py:344] Number of hypotheses: 23
|
| 38 |
+
2026-01-26 04:57:50,141 INFO [inference.py:346] First hypothesis: []
|
| 39 |
+
2026-01-26 04:57:50,152 INFO [inference.py:309] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
|
| 40 |
+
2026-01-26 04:57:50,153 INFO [inference.py:310] Audio range: [-0.439, 0.480]
|
| 41 |
+
2026-01-26 04:57:50,154 INFO [inference.py:311] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
|
| 42 |
+
28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
|
| 43 |
+
20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
|
| 44 |
+
12320, 11680, 11520, 10880, 9440, 9120, 7840, 5920, 5760],
|
| 45 |
+
dtype=torch.int32)
|
| 46 |
+
2026-01-26 04:57:56,416 INFO [inference.py:332] Encoder out shape: torch.Size([39, 126, 1024])
|
| 47 |
+
2026-01-26 04:57:56,417 INFO [inference.py:333] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100, 94, 89, 88, 87, 73,
|
| 48 |
+
71, 71, 69, 68, 68, 65, 62, 62, 59, 59, 58, 56, 51, 45,
|
| 49 |
+
42, 40, 38, 36, 35, 33, 29, 28, 24, 18, 17])
|
| 50 |
+
2026-01-26 04:57:56,417 INFO [inference.py:334] Encoder out range: [-4.699, 6.664]
|
| 51 |
+
2026-01-26 04:57:56,902 INFO [inference.py:344] Number of hypotheses: 39
|
| 52 |
+
2026-01-26 04:57:56,902 INFO [inference.py:346] First hypothesis: []
|
| 53 |
+
2026-01-26 04:57:56,906 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
|
| 54 |
+
2026-01-26 04:57:56,907 INFO [inference.py:310] Audio range: [-0.314, 0.332]
|
| 55 |
+
2026-01-26 04:57:56,907 INFO [inference.py:311] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
|
| 56 |
+
56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
|
| 57 |
+
46079, 45280, 44960], dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-58-20
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 04:58:20,350 INFO [inference.py:410] ================================================================================
|
| 2 |
+
2026-01-26 04:58:20,350 INFO [inference.py:411] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 04:58:20,350 INFO [inference.py:412] ================================================================================
|
| 4 |
+
2026-01-26 04:58:20,350 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 04:58:20,350 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 04:58:20,350 INFO [inference.py:415] Test set: ihm
|
| 7 |
+
2026-01-26 04:58:20,350 INFO [inference.py:416] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 04:58:20,350 INFO [inference.py:422] Device: cpu
|
| 9 |
+
2026-01-26 04:58:20,350 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 04:58:20,352 INFO [inference.py:433] Vocabulary size: 500
|
| 11 |
+
2026-01-26 04:58:20,352 INFO [inference.py:434] Blank ID: 0
|
| 12 |
+
2026-01-26 04:58:20,352 INFO [inference.py:437] Creating model
|
| 13 |
+
2026-01-26 04:58:21,896 INFO [inference.py:459] Loading checkpoint: xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 14 |
+
2026-01-26 04:58:21,897 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 15 |
+
2026-01-26 04:58:26,596 INFO [inference.py:473] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 04:58:26,597 INFO [inference.py:476] Loading test data
|
| 17 |
+
2026-01-26 04:58:26,597 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 04:58:27,751 INFO [inference.py:487] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 04:58:27,752 INFO [inference.py:490] Starting inference...
|
| 20 |
+
2026-01-26 04:58:28,913 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 04:58:28,920 INFO [inference.py:310] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 04:58:28,923 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 04:58:38,037 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 04:58:38,037 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 04:58:38,038 INFO [inference.py:334] Encoder out range: [-4.703, 6.664]
|
| 26 |
+
2026-01-26 04:58:38,417 INFO [inference.py:344] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 04:58:38,418 INFO [inference.py:346] First hypothesis: []
|
| 28 |
+
2026-01-26 04:58:38,426 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 04:58:38,427 INFO [inference.py:310] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 04:58:38,428 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-04-59-21
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 04:59:21,443 INFO [inference.py:410] ================================================================================
|
| 2 |
+
2026-01-26 04:59:21,443 INFO [inference.py:411] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 04:59:21,443 INFO [inference.py:412] ================================================================================
|
| 4 |
+
2026-01-26 04:59:21,443 INFO [inference.py:413] Experiment dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 04:59:21,443 INFO [inference.py:414] Output dir: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 04:59:21,443 INFO [inference.py:415] Test set: ihm
|
| 7 |
+
2026-01-26 04:59:21,443 INFO [inference.py:416] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 04:59:21,443 INFO [inference.py:422] Device: cpu
|
| 9 |
+
2026-01-26 04:59:21,443 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 04:59:21,445 INFO [inference.py:433] Vocabulary size: 500
|
| 11 |
+
2026-01-26 04:59:21,445 INFO [inference.py:434] Blank ID: 0
|
| 12 |
+
2026-01-26 04:59:21,445 INFO [inference.py:437] Creating model
|
| 13 |
+
2026-01-26 04:59:23,052 INFO [inference.py:444] Loading checkpoint: /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 14 |
+
2026-01-26 04:59:23,052 INFO [checkpoint.py:111] Loading checkpoint from /workspace/icefall/egs/ami/ASR/xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 15 |
+
2026-01-26 04:59:27,784 INFO [inference.py:473] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 04:59:27,784 INFO [inference.py:476] Loading test data
|
| 17 |
+
2026-01-26 04:59:27,784 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 04:59:28,889 INFO [inference.py:487] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 04:59:28,889 INFO [inference.py:490] Starting inference...
|
| 20 |
+
2026-01-26 04:59:29,994 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 04:59:29,997 INFO [inference.py:310] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 04:59:30,000 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 04:59:39,304 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 04:59:39,305 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 04:59:39,306 INFO [inference.py:334] Encoder out range: [-13.684, 12.764]
|
| 26 |
+
2026-01-26 04:59:39,937 INFO [inference.py:344] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 04:59:39,938 INFO [inference.py:346] First hypothesis: [171]
|
| 28 |
+
2026-01-26 04:59:39,943 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 04:59:39,998 INFO [inference.py:310] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 04:59:39,999 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
| 33 |
+
2026-01-26 04:59:47,631 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
|
| 34 |
+
2026-01-26 04:59:47,632 INFO [inference.py:333] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
|
| 35 |
+
162, 157, 157, 156, 151, 149, 148, 139, 137])
|
| 36 |
+
2026-01-26 04:59:47,632 INFO [inference.py:334] Encoder out range: [-12.514, 12.004]
|
| 37 |
+
2026-01-26 04:59:48,802 INFO [inference.py:344] Number of hypotheses: 23
|
| 38 |
+
2026-01-26 04:59:48,802 INFO [inference.py:346] First hypothesis: [23, 51, 156, 6, 205, 18, 116, 113, 363]
|
| 39 |
+
2026-01-26 04:59:49,215 INFO [inference.py:309] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
|
| 40 |
+
2026-01-26 04:59:49,220 INFO [inference.py:310] Audio range: [-0.439, 0.480]
|
| 41 |
+
2026-01-26 04:59:49,221 INFO [inference.py:311] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
|
| 42 |
+
28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
|
| 43 |
+
20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
|
| 44 |
+
12320, 11680, 11520, 10880, 9440, 9120, 7840, 5920, 5760],
|
| 45 |
+
dtype=torch.int32)
|
| 46 |
+
2026-01-26 04:59:56,731 INFO [inference.py:332] Encoder out shape: torch.Size([39, 126, 1024])
|
| 47 |
+
2026-01-26 04:59:56,732 INFO [inference.py:333] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100, 94, 89, 88, 87, 73,
|
| 48 |
+
71, 71, 69, 68, 68, 65, 62, 62, 59, 59, 58, 56, 51, 45,
|
| 49 |
+
42, 40, 38, 36, 35, 33, 29, 28, 24, 18, 17])
|
| 50 |
+
2026-01-26 04:59:56,733 INFO [inference.py:334] Encoder out range: [-11.444, 10.811]
|
| 51 |
+
2026-01-26 04:59:57,403 INFO [inference.py:344] Number of hypotheses: 39
|
| 52 |
+
2026-01-26 04:59:57,403 INFO [inference.py:346] First hypothesis: [11]
|
| 53 |
+
2026-01-26 04:59:57,409 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
|
| 54 |
+
2026-01-26 04:59:57,420 INFO [inference.py:310] Audio range: [-0.314, 0.332]
|
| 55 |
+
2026-01-26 04:59:57,420 INFO [inference.py:311] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
|
| 56 |
+
56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
|
| 57 |
+
46079, 45280, 44960], dtype=torch.int32)
|
| 58 |
+
2026-01-26 05:00:05,318 INFO [inference.py:332] Encoder out shape: torch.Size([23, 212, 1024])
|
| 59 |
+
2026-01-26 05:00:05,319 INFO [inference.py:333] Encoder out lens: tensor([212, 205, 204, 202, 200, 198, 194, 192, 184, 181, 176, 174, 173, 172,
|
| 60 |
+
170, 166, 164, 164, 147, 144, 143, 141, 140])
|
| 61 |
+
2026-01-26 05:00:05,319 INFO [inference.py:334] Encoder out range: [-13.261, 11.090]
|
| 62 |
+
2026-01-26 05:00:06,035 INFO [inference.py:344] Number of hypotheses: 23
|
| 63 |
+
2026-01-26 05:00:06,035 INFO [inference.py:346] First hypothesis: [20]
|
| 64 |
+
2026-01-26 05:00:06,104 INFO [inference.py:309] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
|
| 65 |
+
2026-01-26 05:00:06,105 INFO [inference.py:310] Audio range: [-0.323, 0.414]
|
| 66 |
+
2026-01-26 05:00:06,105 INFO [inference.py:311] Audio lengths: tensor([317280, 298079, 298080, 294559, 292480], dtype=torch.int32)
|
| 67 |
+
2026-01-26 05:00:14,039 INFO [inference.py:332] Encoder out shape: torch.Size([5, 991, 1024])
|
| 68 |
+
2026-01-26 05:00:14,040 INFO [inference.py:333] Encoder out lens: tensor([991, 931, 931, 920, 913])
|
| 69 |
+
2026-01-26 05:00:14,098 INFO [inference.py:334] Encoder out range: [-14.241, 14.344]
|
| 70 |
+
2026-01-26 05:00:14,713 INFO [inference.py:344] Number of hypotheses: 5
|
| 71 |
+
2026-01-26 05:00:14,713 INFO [inference.py:346] First hypothesis: [37, 23, 127, 274, 5, 147, 80, 73, 6, 16, 29, 119, 5, 20, 84, 171, 15, 6, 328, 5, 127, 20, 56]
|
| 72 |
+
2026-01-26 05:00:14,718 INFO [inference.py:309] Audio shape: torch.Size([40, 39360]), dtype: torch.float32
|
| 73 |
+
2026-01-26 05:00:14,719 INFO [inference.py:310] Audio range: [-0.274, 0.362]
|
| 74 |
+
2026-01-26 05:00:14,719 INFO [inference.py:311] Audio lengths: tensor([39359, 39199, 39039, 38080, 36000, 35200, 34880, 34880, 33760, 33760,
|
| 75 |
+
33600, 33120, 29440, 29280, 27360, 24960, 24960, 23680, 21760, 21600,
|
| 76 |
+
20800, 16800, 16320, 16160, 16000, 15679, 15040, 13440, 12320, 7040,
|
| 77 |
+
6560, 6400, 5760, 5760, 5120, 4800, 4800, 4640, 4480, 3360],
|
| 78 |
+
dtype=torch.int32)
|
| 79 |
+
2026-01-26 05:00:21,633 INFO [inference.py:332] Encoder out shape: torch.Size([40, 122, 1024])
|
| 80 |
+
2026-01-26 05:00:21,634 INFO [inference.py:333] Encoder out lens: tensor([122, 122, 121, 118, 112, 109, 108, 108, 105, 105, 104, 103, 91, 91,
|
| 81 |
+
85, 77, 77, 73, 67, 67, 64, 52, 50, 50, 49, 48, 46, 41,
|
| 82 |
+
38, 21, 20, 19, 17, 17, 15, 14, 14, 14, 13, 10])
|
| 83 |
+
2026-01-26 05:00:21,635 INFO [inference.py:334] Encoder out range: [-11.784, 11.570]
|
| 84 |
+
2026-01-26 05:00:22,302 INFO [inference.py:344] Number of hypotheses: 40
|
| 85 |
+
2026-01-26 05:00:22,302 INFO [inference.py:346] First hypothesis: []
|
| 86 |
+
2026-01-26 05:00:22,310 INFO [inference.py:309] Audio shape: torch.Size([23, 66880]), dtype: torch.float32
|
| 87 |
+
2026-01-26 05:00:22,311 INFO [inference.py:310] Audio range: [-0.514, 0.393]
|
| 88 |
+
2026-01-26 05:00:22,311 INFO [inference.py:311] Audio lengths: tensor([66880, 65439, 60799, 60320, 59520, 58240, 57280, 56320, 55520, 54080,
|
| 89 |
+
51840, 51520, 50720, 49920, 49600, 48319, 48320, 47999, 46880, 46079,
|
| 90 |
+
44640, 44320, 44160], dtype=torch.int32)
|
| 91 |
+
2026-01-26 05:00:29,229 INFO [inference.py:332] Encoder out shape: torch.Size([23, 208, 1024])
|
| 92 |
+
2026-01-26 05:00:29,230 INFO [inference.py:333] Encoder out lens: tensor([208, 204, 189, 188, 185, 181, 178, 175, 173, 168, 161, 160, 158, 155,
|
| 93 |
+
154, 150, 150, 149, 146, 143, 139, 138, 137])
|
| 94 |
+
2026-01-26 05:00:29,230 INFO [inference.py:334] Encoder out range: [-12.152, 11.060]
|
| 95 |
+
2026-01-26 05:00:29,913 INFO [inference.py:344] Number of hypotheses: 23
|
| 96 |
+
2026-01-26 05:00:29,913 INFO [inference.py:346] First hypothesis: [51, 6, 4, 27, 5, 26, 70, 22, 20, 265, 33, 46]
|
| 97 |
+
2026-01-26 05:00:29,920 INFO [inference.py:309] Audio shape: torch.Size([24, 65600]), dtype: torch.float32
|
| 98 |
+
2026-01-26 05:00:29,921 INFO [inference.py:310] Audio range: [-0.416, 0.458]
|
| 99 |
+
2026-01-26 05:00:29,921 INFO [inference.py:311] Audio lengths: tensor([65600, 64000, 63680, 61280, 60000, 58080, 55200, 52960, 51359, 51200,
|
| 100 |
+
50720, 50720, 50080, 49280, 48639, 47840, 47360, 46880, 46400, 46240,
|
| 101 |
+
45920, 44640, 43040, 42720], dtype=torch.int32)
|
| 102 |
+
2026-01-26 05:00:37,217 INFO [inference.py:332] Encoder out shape: torch.Size([24, 204, 1024])
|
| 103 |
+
2026-01-26 05:00:37,217 INFO [inference.py:333] Encoder out lens: tensor([204, 199, 198, 191, 187, 181, 172, 165, 160, 159, 158, 158, 156, 153,
|
| 104 |
+
151, 149, 147, 146, 144, 144, 143, 139, 134, 133])
|
| 105 |
+
2026-01-26 05:00:37,218 INFO [inference.py:334] Encoder out range: [-12.007, 11.624]
|
| 106 |
+
2026-01-26 05:00:37,807 INFO [inference.py:344] Number of hypotheses: 24
|
| 107 |
+
2026-01-26 05:00:37,808 INFO [inference.py:346] First hypothesis: [11]
|
| 108 |
+
2026-01-26 05:00:37,815 INFO [inference.py:309] Audio shape: torch.Size([9, 176960]), dtype: torch.float32
|
| 109 |
+
2026-01-26 05:00:37,816 INFO [inference.py:310] Audio range: [-0.135, 0.191]
|
| 110 |
+
2026-01-26 05:00:37,816 INFO [inference.py:311] Audio lengths: tensor([176960, 170720, 164480, 155840, 154559, 151839, 151840, 151360, 147040],
|
| 111 |
+
dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-02-37
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:02:37,849 INFO [inference.py:410] ================================================================================
|
| 2 |
+
2026-01-26 05:02:37,849 INFO [inference.py:411] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:02:37,850 INFO [inference.py:412] ================================================================================
|
| 4 |
+
2026-01-26 05:02:37,850 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:02:37,850 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:02:37,850 INFO [inference.py:415] Test set: ihm
|
| 7 |
+
2026-01-26 05:02:37,850 INFO [inference.py:416] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:02:37,850 INFO [inference.py:422] Device: cpu
|
| 9 |
+
2026-01-26 05:02:37,850 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 05:02:37,851 INFO [inference.py:433] Vocabulary size: 500
|
| 11 |
+
2026-01-26 05:02:37,851 INFO [inference.py:434] Blank ID: 0
|
| 12 |
+
2026-01-26 05:02:37,851 INFO [inference.py:437] Creating model
|
| 13 |
+
2026-01-26 05:02:39,443 INFO [inference.py:459] Loading checkpoint: xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 14 |
+
2026-01-26 05:02:39,444 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 15 |
+
2026-01-26 05:02:44,138 INFO [inference.py:473] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 05:02:44,139 INFO [inference.py:476] Loading test data
|
| 17 |
+
2026-01-26 05:02:44,139 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 05:02:45,310 INFO [inference.py:487] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 05:02:45,310 INFO [inference.py:490] Starting inference...
|
| 20 |
+
2026-01-26 05:02:46,398 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 05:02:46,400 INFO [inference.py:310] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 05:02:46,404 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 05:02:55,240 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 05:02:55,241 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 05:02:55,298 INFO [inference.py:334] Encoder out range: [-4.703, 6.664]
|
| 26 |
+
2026-01-26 05:02:55,596 INFO [inference.py:344] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 05:02:55,596 INFO [inference.py:346] First hypothesis: []
|
| 28 |
+
2026-01-26 05:02:55,605 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 05:02:55,606 INFO [inference.py:310] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 05:02:55,607 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-03-42
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:03:42,123 INFO [inference.py:410] ================================================================================
|
| 2 |
+
2026-01-26 05:03:42,123 INFO [inference.py:411] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:03:42,123 INFO [inference.py:412] ================================================================================
|
| 4 |
+
2026-01-26 05:03:42,123 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:03:42,123 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:03:42,123 INFO [inference.py:415] Test set: ihm
|
| 7 |
+
2026-01-26 05:03:42,123 INFO [inference.py:416] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:03:42,123 INFO [inference.py:422] Device: cpu
|
| 9 |
+
2026-01-26 05:03:42,124 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 05:03:42,125 INFO [inference.py:433] Vocabulary size: 500
|
| 11 |
+
2026-01-26 05:03:42,125 INFO [inference.py:434] Blank ID: 0
|
| 12 |
+
2026-01-26 05:03:42,125 INFO [inference.py:437] Creating model
|
| 13 |
+
2026-01-26 05:03:43,760 INFO [inference.py:459] Loading checkpoint: xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 14 |
+
2026-01-26 05:03:43,760 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/epoch-15.pt
|
| 15 |
+
2026-01-26 05:03:48,510 INFO [inference.py:473] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 05:03:48,511 INFO [inference.py:476] Loading test data
|
| 17 |
+
2026-01-26 05:03:48,511 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 05:03:49,613 INFO [inference.py:487] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 05:03:49,613 INFO [inference.py:490] Starting inference...
|
| 20 |
+
2026-01-26 05:03:50,741 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 05:03:50,756 INFO [inference.py:310] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 05:03:50,759 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 05:03:59,827 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 05:03:59,828 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 05:03:59,828 INFO [inference.py:334] Encoder out range: [-4.703, 6.664]
|
| 26 |
+
2026-01-26 05:04:00,148 INFO [inference.py:344] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 05:04:00,148 INFO [inference.py:346] First hypothesis: []
|
| 28 |
+
2026-01-26 05:04:00,158 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 05:04:00,162 INFO [inference.py:310] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 05:04:00,163 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-05-16
ADDED
|
@@ -0,0 +1,529 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:05:16,888 INFO [inference.py:410] ================================================================================
|
| 2 |
+
2026-01-26 05:05:16,888 INFO [inference.py:411] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:05:16,888 INFO [inference.py:412] ================================================================================
|
| 4 |
+
2026-01-26 05:05:16,888 INFO [inference.py:413] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:05:16,888 INFO [inference.py:414] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:05:16,888 INFO [inference.py:415] Test set: ihm
|
| 7 |
+
2026-01-26 05:05:16,888 INFO [inference.py:416] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:05:16,888 INFO [inference.py:422] Device: cpu
|
| 9 |
+
2026-01-26 05:05:16,888 INFO [inference.py:425] Loading BPE model from data/lang_bpe_500_scd
|
| 10 |
+
2026-01-26 05:05:16,890 INFO [inference.py:433] Vocabulary size: 500
|
| 11 |
+
2026-01-26 05:05:16,890 INFO [inference.py:434] Blank ID: 0
|
| 12 |
+
2026-01-26 05:05:16,890 INFO [inference.py:437] Creating model
|
| 13 |
+
2026-01-26 05:05:18,544 INFO [inference.py:444] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 14 |
+
2026-01-26 05:05:18,544 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 15 |
+
2026-01-26 05:05:23,319 INFO [inference.py:473] Number of model parameters: 317,511,772
|
| 16 |
+
2026-01-26 05:05:23,320 INFO [inference.py:476] Loading test data
|
| 17 |
+
2026-01-26 05:05:23,320 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 18 |
+
2026-01-26 05:05:24,403 INFO [inference.py:487] Number of test utterances: 6676
|
| 19 |
+
2026-01-26 05:05:24,403 INFO [inference.py:490] Starting inference...
|
| 20 |
+
2026-01-26 05:05:25,573 INFO [inference.py:309] Audio shape: torch.Size([6, 246400]), dtype: torch.float32
|
| 21 |
+
2026-01-26 05:05:25,576 INFO [inference.py:310] Audio range: [-0.090, 0.104]
|
| 22 |
+
2026-01-26 05:05:25,579 INFO [inference.py:311] Audio lengths: tensor([246400, 244799, 238079, 228000, 224000, 222880], dtype=torch.int32)
|
| 23 |
+
2026-01-26 05:05:34,838 INFO [inference.py:332] Encoder out shape: torch.Size([6, 769, 1024])
|
| 24 |
+
2026-01-26 05:05:34,839 INFO [inference.py:333] Encoder out lens: tensor([769, 764, 743, 712, 699, 696])
|
| 25 |
+
2026-01-26 05:05:34,839 INFO [inference.py:334] Encoder out range: [-13.684, 12.764]
|
| 26 |
+
2026-01-26 05:05:35,536 INFO [inference.py:344] Number of hypotheses: 6
|
| 27 |
+
2026-01-26 05:05:35,537 INFO [inference.py:346] First hypothesis: [171]
|
| 28 |
+
2026-01-26 05:05:35,546 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 29 |
+
2026-01-26 05:05:35,547 INFO [inference.py:310] Audio range: [-0.401, 0.443]
|
| 30 |
+
2026-01-26 05:05:35,547 INFO [inference.py:311] Audio lengths: tensor([67200, 66559, 66400, 64159, 63680, 63040, 61120, 60960, 59519, 59040,
|
| 31 |
+
58560, 55840, 54559, 52480, 52160, 50559, 50400, 50240, 48480, 47840,
|
| 32 |
+
47520, 44639, 44000], dtype=torch.int32)
|
| 33 |
+
2026-01-26 05:05:43,001 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
|
| 34 |
+
2026-01-26 05:05:43,003 INFO [inference.py:333] Encoder out lens: tensor([209, 207, 207, 200, 198, 196, 190, 190, 185, 184, 182, 174, 170, 163,
|
| 35 |
+
162, 157, 157, 156, 151, 149, 148, 139, 137])
|
| 36 |
+
2026-01-26 05:05:43,003 INFO [inference.py:334] Encoder out range: [-12.514, 12.004]
|
| 37 |
+
2026-01-26 05:05:43,905 INFO [inference.py:344] Number of hypotheses: 23
|
| 38 |
+
2026-01-26 05:05:43,905 INFO [inference.py:346] First hypothesis: [23, 51, 156, 6, 205, 18, 116, 113, 363]
|
| 39 |
+
2026-01-26 05:05:43,925 INFO [inference.py:309] Audio shape: torch.Size([39, 40640]), dtype: torch.float32
|
| 40 |
+
2026-01-26 05:05:43,926 INFO [inference.py:310] Audio range: [-0.439, 0.480]
|
| 41 |
+
2026-01-26 05:05:43,926 INFO [inference.py:311] Audio lengths: tensor([40640, 37279, 36799, 36480, 36480, 33280, 33279, 32320, 32159, 30400,
|
| 42 |
+
28800, 28480, 28160, 23520, 23039, 22880, 22400, 21920, 21920, 20960,
|
| 43 |
+
20160, 20000, 19200, 19040, 18880, 18240, 16480, 14720, 13600, 12960,
|
| 44 |
+
12320, 11680, 11520, 10880, 9440, 9120, 7840, 5920, 5760],
|
| 45 |
+
dtype=torch.int32)
|
| 46 |
+
2026-01-26 05:05:51,027 INFO [inference.py:332] Encoder out shape: torch.Size([39, 126, 1024])
|
| 47 |
+
2026-01-26 05:05:51,028 INFO [inference.py:333] Encoder out lens: tensor([126, 116, 114, 113, 113, 103, 103, 100, 100, 94, 89, 88, 87, 73,
|
| 48 |
+
71, 71, 69, 68, 68, 65, 62, 62, 59, 59, 58, 56, 51, 45,
|
| 49 |
+
42, 40, 38, 36, 35, 33, 29, 28, 24, 18, 17])
|
| 50 |
+
2026-01-26 05:05:51,028 INFO [inference.py:334] Encoder out range: [-11.444, 10.811]
|
| 51 |
+
2026-01-26 05:05:51,620 INFO [inference.py:344] Number of hypotheses: 39
|
| 52 |
+
2026-01-26 05:05:51,620 INFO [inference.py:346] First hypothesis: [11]
|
| 53 |
+
2026-01-26 05:05:51,628 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
|
| 54 |
+
2026-01-26 05:05:51,629 INFO [inference.py:310] Audio range: [-0.314, 0.332]
|
| 55 |
+
2026-01-26 05:05:51,629 INFO [inference.py:311] Audio lengths: tensor([68000, 65920, 65599, 64799, 64160, 63520, 62400, 61600, 59040, 58239,
|
| 56 |
+
56480, 55840, 55520, 55359, 54719, 53440, 52800, 52640, 47200, 46239,
|
| 57 |
+
46079, 45280, 44960], dtype=torch.int32)
|
| 58 |
+
2026-01-26 05:05:59,021 INFO [inference.py:332] Encoder out shape: torch.Size([23, 212, 1024])
|
| 59 |
+
2026-01-26 05:05:59,022 INFO [inference.py:333] Encoder out lens: tensor([212, 205, 204, 202, 200, 198, 194, 192, 184, 181, 176, 174, 173, 172,
|
| 60 |
+
170, 166, 164, 164, 147, 144, 143, 141, 140])
|
| 61 |
+
2026-01-26 05:05:59,023 INFO [inference.py:334] Encoder out range: [-13.261, 11.090]
|
| 62 |
+
2026-01-26 05:05:59,931 INFO [inference.py:344] Number of hypotheses: 23
|
| 63 |
+
2026-01-26 05:05:59,932 INFO [inference.py:346] First hypothesis: [20]
|
| 64 |
+
2026-01-26 05:06:00,567 INFO [inference.py:309] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
|
| 65 |
+
2026-01-26 05:06:00,568 INFO [inference.py:310] Audio range: [-0.323, 0.414]
|
| 66 |
+
2026-01-26 05:06:00,569 INFO [inference.py:311] Audio lengths: tensor([317280, 298079, 298080, 294559, 292480], dtype=torch.int32)
|
| 67 |
+
2026-01-26 05:06:09,302 INFO [inference.py:332] Encoder out shape: torch.Size([5, 991, 1024])
|
| 68 |
+
2026-01-26 05:06:09,303 INFO [inference.py:333] Encoder out lens: tensor([991, 931, 931, 920, 913])
|
| 69 |
+
2026-01-26 05:06:09,304 INFO [inference.py:334] Encoder out range: [-14.241, 14.344]
|
| 70 |
+
2026-01-26 05:06:10,112 INFO [inference.py:344] Number of hypotheses: 5
|
| 71 |
+
2026-01-26 05:06:10,113 INFO [inference.py:346] First hypothesis: [37, 23, 127, 274, 5, 147, 80, 73, 6, 16, 29, 119, 5, 20, 84, 171, 15, 6, 328, 5, 127, 20, 56]
|
| 72 |
+
2026-01-26 05:06:10,117 INFO [inference.py:309] Audio shape: torch.Size([40, 39360]), dtype: torch.float32
|
| 73 |
+
2026-01-26 05:06:10,118 INFO [inference.py:310] Audio range: [-0.274, 0.362]
|
| 74 |
+
2026-01-26 05:06:10,118 INFO [inference.py:311] Audio lengths: tensor([39359, 39199, 39039, 38080, 36000, 35200, 34880, 34880, 33760, 33760,
|
| 75 |
+
33600, 33120, 29440, 29280, 27360, 24960, 24960, 23680, 21760, 21600,
|
| 76 |
+
20800, 16800, 16320, 16160, 16000, 15679, 15040, 13440, 12320, 7040,
|
| 77 |
+
6560, 6400, 5760, 5760, 5120, 4800, 4800, 4640, 4480, 3360],
|
| 78 |
+
dtype=torch.int32)
|
| 79 |
+
2026-01-26 05:06:16,910 INFO [inference.py:332] Encoder out shape: torch.Size([40, 122, 1024])
|
| 80 |
+
2026-01-26 05:06:16,911 INFO [inference.py:333] Encoder out lens: tensor([122, 122, 121, 118, 112, 109, 108, 108, 105, 105, 104, 103, 91, 91,
|
| 81 |
+
85, 77, 77, 73, 67, 67, 64, 52, 50, 50, 49, 48, 46, 41,
|
| 82 |
+
38, 21, 20, 19, 17, 17, 15, 14, 14, 14, 13, 10])
|
| 83 |
+
2026-01-26 05:06:16,911 INFO [inference.py:334] Encoder out range: [-11.784, 11.570]
|
| 84 |
+
2026-01-26 05:06:17,504 INFO [inference.py:344] Number of hypotheses: 40
|
| 85 |
+
2026-01-26 05:06:17,504 INFO [inference.py:346] First hypothesis: []
|
| 86 |
+
2026-01-26 05:06:17,513 INFO [inference.py:309] Audio shape: torch.Size([23, 66880]), dtype: torch.float32
|
| 87 |
+
2026-01-26 05:06:17,514 INFO [inference.py:310] Audio range: [-0.514, 0.393]
|
| 88 |
+
2026-01-26 05:06:17,514 INFO [inference.py:311] Audio lengths: tensor([66880, 65439, 60799, 60320, 59520, 58240, 57280, 56320, 55520, 54080,
|
| 89 |
+
51840, 51520, 50720, 49920, 49600, 48319, 48320, 47999, 46880, 46079,
|
| 90 |
+
44640, 44320, 44160], dtype=torch.int32)
|
| 91 |
+
2026-01-26 05:06:24,334 INFO [inference.py:332] Encoder out shape: torch.Size([23, 208, 1024])
|
| 92 |
+
2026-01-26 05:06:24,335 INFO [inference.py:333] Encoder out lens: tensor([208, 204, 189, 188, 185, 181, 178, 175, 173, 168, 161, 160, 158, 155,
|
| 93 |
+
154, 150, 150, 149, 146, 143, 139, 138, 137])
|
| 94 |
+
2026-01-26 05:06:24,335 INFO [inference.py:334] Encoder out range: [-12.152, 11.060]
|
| 95 |
+
2026-01-26 05:06:25,044 INFO [inference.py:344] Number of hypotheses: 23
|
| 96 |
+
2026-01-26 05:06:25,045 INFO [inference.py:346] First hypothesis: [51, 6, 4, 27, 5, 26, 70, 22, 20, 265, 33, 46]
|
| 97 |
+
2026-01-26 05:06:25,108 INFO [inference.py:309] Audio shape: torch.Size([24, 65600]), dtype: torch.float32
|
| 98 |
+
2026-01-26 05:06:25,109 INFO [inference.py:310] Audio range: [-0.416, 0.458]
|
| 99 |
+
2026-01-26 05:06:25,109 INFO [inference.py:311] Audio lengths: tensor([65600, 64000, 63680, 61280, 60000, 58080, 55200, 52960, 51359, 51200,
|
| 100 |
+
50720, 50720, 50080, 49280, 48639, 47840, 47360, 46880, 46400, 46240,
|
| 101 |
+
45920, 44640, 43040, 42720], dtype=torch.int32)
|
| 102 |
+
2026-01-26 05:06:32,219 INFO [inference.py:332] Encoder out shape: torch.Size([24, 204, 1024])
|
| 103 |
+
2026-01-26 05:06:32,220 INFO [inference.py:333] Encoder out lens: tensor([204, 199, 198, 191, 187, 181, 172, 165, 160, 159, 158, 158, 156, 153,
|
| 104 |
+
151, 149, 147, 146, 144, 144, 143, 139, 134, 133])
|
| 105 |
+
2026-01-26 05:06:32,220 INFO [inference.py:334] Encoder out range: [-12.007, 11.624]
|
| 106 |
+
2026-01-26 05:06:32,900 INFO [inference.py:344] Number of hypotheses: 24
|
| 107 |
+
2026-01-26 05:06:32,900 INFO [inference.py:346] First hypothesis: [11]
|
| 108 |
+
2026-01-26 05:06:32,908 INFO [inference.py:309] Audio shape: torch.Size([9, 176960]), dtype: torch.float32
|
| 109 |
+
2026-01-26 05:06:32,920 INFO [inference.py:310] Audio range: [-0.135, 0.191]
|
| 110 |
+
2026-01-26 05:06:32,921 INFO [inference.py:311] Audio lengths: tensor([176960, 170720, 164480, 155840, 154559, 151839, 151840, 151360, 147040],
|
| 111 |
+
dtype=torch.int32)
|
| 112 |
+
2026-01-26 05:06:40,613 INFO [inference.py:332] Encoder out shape: torch.Size([9, 552, 1024])
|
| 113 |
+
2026-01-26 05:06:40,614 INFO [inference.py:333] Encoder out lens: tensor([552, 533, 513, 486, 482, 474, 474, 472, 459])
|
| 114 |
+
2026-01-26 05:06:40,614 INFO [inference.py:334] Encoder out range: [-13.325, 12.083]
|
| 115 |
+
2026-01-26 05:06:41,231 INFO [inference.py:344] Number of hypotheses: 9
|
| 116 |
+
2026-01-26 05:06:41,231 INFO [inference.py:346] First hypothesis: [11, 87, 7, 5, 13, 379, 130, 101, 6, 16, 29, 119, 5, 6, 16, 29, 119, 18, 115, 93, 58, 36, 30, 201, 38, 58, 134, 50, 6, 16, 29, 119, 20, 81, 7, 85, 272, 73, 105, 6, 205, 17, 47]
|
| 117 |
+
2026-01-26 05:06:41,240 INFO [inference.py:309] Audio shape: torch.Size([14, 112320]), dtype: torch.float32
|
| 118 |
+
2026-01-26 05:06:41,241 INFO [inference.py:310] Audio range: [-0.469, 0.457]
|
| 119 |
+
2026-01-26 05:06:41,242 INFO [inference.py:311] Audio lengths: tensor([112320, 105920, 105439, 104000, 103840, 101920, 98720, 98400, 96960,
|
| 120 |
+
96800, 96320, 95680, 93760, 93600], dtype=torch.int32)
|
| 121 |
+
2026-01-26 05:06:49,007 INFO [inference.py:332] Encoder out shape: torch.Size([14, 350, 1024])
|
| 122 |
+
2026-01-26 05:06:49,008 INFO [inference.py:333] Encoder out lens: tensor([350, 330, 329, 324, 324, 318, 308, 307, 302, 302, 300, 298, 292, 292])
|
| 123 |
+
2026-01-26 05:06:49,009 INFO [inference.py:334] Encoder out range: [-14.286, 11.940]
|
| 124 |
+
2026-01-26 05:06:49,714 INFO [inference.py:344] Number of hypotheses: 14
|
| 125 |
+
2026-01-26 05:06:49,714 INFO [inference.py:346] First hypothesis: [39, 9, 83, 7, 8, 148, 122, 26, 48]
|
| 126 |
+
2026-01-26 05:06:49,714 INFO [inference.py:535] Processed 206 utterances in 10 batches
|
| 127 |
+
2026-01-26 05:06:49,723 INFO [inference.py:309] Audio shape: torch.Size([38, 41440]), dtype: torch.float32
|
| 128 |
+
2026-01-26 05:06:49,724 INFO [inference.py:310] Audio range: [-0.272, 0.322]
|
| 129 |
+
2026-01-26 05:06:49,726 INFO [inference.py:311] Audio lengths: tensor([41440, 41120, 40160, 35680, 33120, 32960, 32800, 31520, 31040, 30880,
|
| 130 |
+
30239, 29920, 29120, 27360, 25279, 24480, 23520, 22720, 22720, 21600,
|
| 131 |
+
20800, 20320, 19840, 19840, 17600, 15520, 13120, 12480, 12320, 11040,
|
| 132 |
+
10560, 9600, 8640, 7520, 5440, 5120, 5120, 4640],
|
| 133 |
+
dtype=torch.int32)
|
| 134 |
+
2026-01-26 05:06:57,233 INFO [inference.py:332] Encoder out shape: torch.Size([38, 129, 1024])
|
| 135 |
+
2026-01-26 05:06:57,234 INFO [inference.py:333] Encoder out lens: tensor([129, 128, 125, 111, 103, 102, 102, 98, 96, 96, 94, 93, 90, 85,
|
| 136 |
+
78, 76, 73, 70, 70, 67, 64, 63, 61, 61, 54, 48, 40, 38,
|
| 137 |
+
38, 34, 32, 29, 26, 23, 16, 15, 15, 14])
|
| 138 |
+
2026-01-26 05:06:57,235 INFO [inference.py:334] Encoder out range: [-13.512, 11.822]
|
| 139 |
+
2026-01-26 05:06:57,919 INFO [inference.py:344] Number of hypotheses: 38
|
| 140 |
+
2026-01-26 05:06:57,920 INFO [inference.py:346] First hypothesis: [56, 199, 130]
|
| 141 |
+
2026-01-26 05:06:57,928 INFO [inference.py:309] Audio shape: torch.Size([38, 41280]), dtype: torch.float32
|
| 142 |
+
2026-01-26 05:06:57,929 INFO [inference.py:310] Audio range: [-0.080, 0.105]
|
| 143 |
+
2026-01-26 05:06:57,930 INFO [inference.py:311] Audio lengths: tensor([41280, 40320, 36800, 35680, 34880, 34879, 34080, 34080, 32000, 30400,
|
| 144 |
+
29280, 29280, 28320, 24000, 23040, 20960, 20960, 20960, 20160, 16960,
|
| 145 |
+
14080, 13280, 12640, 12160, 10720, 9440, 8640, 6240, 6080, 5440,
|
| 146 |
+
5440, 5120, 4800, 4800, 4640, 4480, 4320, 4160],
|
| 147 |
+
dtype=torch.int32)
|
| 148 |
+
2026-01-26 05:07:05,017 INFO [inference.py:332] Encoder out shape: torch.Size([38, 128, 1024])
|
| 149 |
+
2026-01-26 05:07:05,019 INFO [inference.py:333] Encoder out lens: tensor([128, 125, 114, 111, 108, 108, 106, 106, 99, 94, 91, 91, 88, 74,
|
| 150 |
+
71, 65, 65, 65, 62, 52, 43, 41, 39, 37, 33, 29, 26, 19,
|
| 151 |
+
18, 16, 16, 15, 14, 14, 14, 13, 13, 12])
|
| 152 |
+
2026-01-26 05:07:05,019 INFO [inference.py:334] Encoder out range: [-11.071, 11.522]
|
| 153 |
+
2026-01-26 05:07:05,620 INFO [inference.py:344] Number of hypotheses: 38
|
| 154 |
+
2026-01-26 05:07:05,620 INFO [inference.py:346] First hypothesis: [10, 7, 5, 6, 148]
|
| 155 |
+
2026-01-26 05:07:05,630 INFO [inference.py:309] Audio shape: torch.Size([38, 41760]), dtype: torch.float32
|
| 156 |
+
2026-01-26 05:07:05,631 INFO [inference.py:310] Audio range: [-0.246, 0.340]
|
| 157 |
+
2026-01-26 05:07:05,631 INFO [inference.py:311] Audio lengths: tensor([41760, 39680, 38880, 36799, 36639, 36000, 34559, 34240, 33120, 31840,
|
| 158 |
+
30720, 30560, 29760, 29280, 24640, 24160, 22720, 21759, 21600, 20960,
|
| 159 |
+
16320, 14400, 13600, 11360, 10880, 10399, 10400, 9760, 9440, 9280,
|
| 160 |
+
8320, 8320, 7680, 7360, 6880, 6880, 6240, 6240],
|
| 161 |
+
dtype=torch.int32)
|
| 162 |
+
2026-01-26 05:07:13,101 INFO [inference.py:332] Encoder out shape: torch.Size([38, 130, 1024])
|
| 163 |
+
2026-01-26 05:07:13,102 INFO [inference.py:333] Encoder out lens: tensor([130, 123, 121, 114, 114, 112, 107, 106, 103, 99, 95, 95, 92, 91,
|
| 164 |
+
76, 75, 70, 67, 67, 65, 50, 44, 42, 35, 33, 32, 32, 30,
|
| 165 |
+
29, 28, 25, 25, 23, 22, 21, 21, 19, 19])
|
| 166 |
+
2026-01-26 05:07:13,103 INFO [inference.py:334] Encoder out range: [-11.967, 11.229]
|
| 167 |
+
2026-01-26 05:07:13,708 INFO [inference.py:344] Number of hypotheses: 38
|
| 168 |
+
2026-01-26 05:07:13,708 INFO [inference.py:346] First hypothesis: [145, 9, 7, 24, 44, 205]
|
| 169 |
+
2026-01-26 05:07:13,715 INFO [inference.py:309] Audio shape: torch.Size([9, 170400]), dtype: torch.float32
|
| 170 |
+
2026-01-26 05:07:13,716 INFO [inference.py:310] Audio range: [-0.370, 0.393]
|
| 171 |
+
2026-01-26 05:07:13,716 INFO [inference.py:311] Audio lengths: tensor([170400, 166559, 165919, 164800, 156800, 152480, 147520, 146559, 145759],
|
| 172 |
+
dtype=torch.int32)
|
| 173 |
+
2026-01-26 05:07:21,734 INFO [inference.py:332] Encoder out shape: torch.Size([9, 532, 1024])
|
| 174 |
+
2026-01-26 05:07:21,735 INFO [inference.py:333] Encoder out lens: tensor([532, 520, 518, 514, 489, 476, 460, 457, 455])
|
| 175 |
+
2026-01-26 05:07:21,735 INFO [inference.py:334] Encoder out range: [-12.221, 14.348]
|
| 176 |
+
2026-01-26 05:07:22,459 INFO [inference.py:344] Number of hypotheses: 9
|
| 177 |
+
2026-01-26 05:07:22,459 INFO [inference.py:346] First hypothesis: [37, 4, 2, 11]
|
| 178 |
+
2026-01-26 05:07:22,468 INFO [inference.py:309] Audio shape: torch.Size([5, 315520]), dtype: torch.float32
|
| 179 |
+
2026-01-26 05:07:22,469 INFO [inference.py:310] Audio range: [-0.297, 0.334]
|
| 180 |
+
2026-01-26 05:07:22,470 INFO [inference.py:311] Audio lengths: tensor([315520, 301440, 294399, 292480, 289919], dtype=torch.int32)
|
| 181 |
+
2026-01-26 05:07:31,016 INFO [inference.py:332] Encoder out shape: torch.Size([5, 985, 1024])
|
| 182 |
+
2026-01-26 05:07:31,017 INFO [inference.py:333] Encoder out lens: tensor([985, 941, 919, 913, 905])
|
| 183 |
+
2026-01-26 05:07:31,017 INFO [inference.py:334] Encoder out range: [-12.260, 13.635]
|
| 184 |
+
2026-01-26 05:07:31,753 INFO [inference.py:344] Number of hypotheses: 5
|
| 185 |
+
2026-01-26 05:07:31,753 INFO [inference.py:346] First hypothesis: [11, 52, 87, 7, 5, 272, 25, 313, 359, 5, 6, 24, 25, 297, 5, 114, 32, 7, 8, 38, 204, 51, 13, 58, 63, 5, 18, 47, 259, 101, 18, 34, 16, 29, 119, 5, 113, 64, 113, 64]
|
| 186 |
+
2026-01-26 05:07:31,760 INFO [inference.py:309] Audio shape: torch.Size([6, 239520]), dtype: torch.float32
|
| 187 |
+
2026-01-26 05:07:31,773 INFO [inference.py:310] Audio range: [-0.116, 0.111]
|
| 188 |
+
2026-01-26 05:07:31,773 INFO [inference.py:311] Audio lengths: tensor([239519, 234240, 223840, 223360, 219679, 215680], dtype=torch.int32)
|
| 189 |
+
2026-01-26 05:07:39,824 INFO [inference.py:332] Encoder out shape: torch.Size([6, 748, 1024])
|
| 190 |
+
2026-01-26 05:07:39,824 INFO [inference.py:333] Encoder out lens: tensor([748, 731, 699, 697, 686, 673])
|
| 191 |
+
2026-01-26 05:07:39,825 INFO [inference.py:334] Encoder out range: [-13.591, 10.919]
|
| 192 |
+
2026-01-26 05:07:40,203 INFO [inference.py:344] Number of hypotheses: 6
|
| 193 |
+
2026-01-26 05:07:40,204 INFO [inference.py:346] First hypothesis: [23]
|
| 194 |
+
2026-01-26 05:07:40,210 INFO [inference.py:309] Audio shape: torch.Size([5, 315200]), dtype: torch.float32
|
| 195 |
+
2026-01-26 05:07:40,210 INFO [inference.py:310] Audio range: [-0.082, 0.158]
|
| 196 |
+
2026-01-26 05:07:40,211 INFO [inference.py:311] Audio lengths: tensor([315200, 310560, 300000, 299680, 296959], dtype=torch.int32)
|
| 197 |
+
2026-01-26 05:07:49,627 INFO [inference.py:332] Encoder out shape: torch.Size([5, 984, 1024])
|
| 198 |
+
2026-01-26 05:07:49,628 INFO [inference.py:333] Encoder out lens: tensor([984, 970, 937, 936, 927])
|
| 199 |
+
2026-01-26 05:07:49,628 INFO [inference.py:334] Encoder out range: [-14.589, 11.647]
|
| 200 |
+
2026-01-26 05:07:50,241 INFO [inference.py:344] Number of hypotheses: 5
|
| 201 |
+
2026-01-26 05:07:50,241 INFO [inference.py:346] First hypothesis: [310, 20, 51, 46, 6, 155, 22, 51, 274, 73, 152, 25, 383, 73, 155, 22, 26, 93, 53, 183, 5, 73, 93, 269, 27, 5, 31]
|
| 202 |
+
2026-01-26 05:07:50,247 INFO [inference.py:309] Audio shape: torch.Size([6, 237280]), dtype: torch.float32
|
| 203 |
+
2026-01-26 05:07:50,248 INFO [inference.py:310] Audio range: [-0.130, 0.131]
|
| 204 |
+
2026-01-26 05:07:50,248 INFO [inference.py:311] Audio lengths: tensor([237280, 228159, 220639, 220480, 219359, 213119], dtype=torch.int32)
|
| 205 |
+
2026-01-26 05:07:58,004 INFO [inference.py:332] Encoder out shape: torch.Size([6, 741, 1024])
|
| 206 |
+
2026-01-26 05:07:58,005 INFO [inference.py:333] Encoder out lens: tensor([741, 712, 689, 688, 685, 665])
|
| 207 |
+
2026-01-26 05:07:58,005 INFO [inference.py:334] Encoder out range: [-13.120, 12.506]
|
| 208 |
+
2026-01-26 05:07:58,629 INFO [inference.py:344] Number of hypotheses: 6
|
| 209 |
+
2026-01-26 05:07:58,630 INFO [inference.py:346] First hypothesis: [37, 349, 41, 18, 349, 41, 49, 101, 6]
|
| 210 |
+
2026-01-26 05:07:58,637 INFO [inference.py:309] Audio shape: torch.Size([17, 91040]), dtype: torch.float32
|
| 211 |
+
2026-01-26 05:07:58,637 INFO [inference.py:310] Audio range: [-0.574, 0.629]
|
| 212 |
+
2026-01-26 05:07:58,638 INFO [inference.py:311] Audio lengths: tensor([91040, 90240, 89119, 88480, 87520, 86079, 83680, 82880, 81120, 79520,
|
| 213 |
+
79520, 78079, 76800, 76480, 73760, 73600, 73599], dtype=torch.int32)
|
| 214 |
+
2026-01-26 05:08:06,806 INFO [inference.py:332] Encoder out shape: torch.Size([17, 284, 1024])
|
| 215 |
+
2026-01-26 05:08:06,807 INFO [inference.py:333] Encoder out lens: tensor([284, 281, 278, 276, 273, 268, 261, 258, 253, 248, 248, 243, 239, 238,
|
| 216 |
+
230, 229, 229])
|
| 217 |
+
2026-01-26 05:08:06,807 INFO [inference.py:334] Encoder out range: [-13.703, 11.821]
|
| 218 |
+
2026-01-26 05:08:07,431 INFO [inference.py:344] Number of hypotheses: 17
|
| 219 |
+
2026-01-26 05:08:07,432 INFO [inference.py:346] First hypothesis: [131, 214, 33, 259, 26, 101, 265, 6, 205]
|
| 220 |
+
2026-01-26 05:08:07,438 INFO [inference.py:309] Audio shape: torch.Size([23, 68960]), dtype: torch.float32
|
| 221 |
+
2026-01-26 05:08:07,439 INFO [inference.py:310] Audio range: [-0.269, 0.266]
|
| 222 |
+
2026-01-26 05:08:07,439 INFO [inference.py:311] Audio lengths: tensor([68959, 66880, 64800, 64479, 61920, 59680, 54400, 53440, 52479, 52319,
|
| 223 |
+
51840, 46880, 46559, 45120, 44480, 43360, 43360, 43360, 43040, 43040,
|
| 224 |
+
43040, 42880, 42560], dtype=torch.int32)
|
| 225 |
+
2026-01-26 05:08:16,530 INFO [inference.py:332] Encoder out shape: torch.Size([23, 215, 1024])
|
| 226 |
+
2026-01-26 05:08:16,531 INFO [inference.py:333] Encoder out lens: tensor([215, 208, 202, 201, 193, 186, 169, 166, 163, 163, 161, 146, 145, 140,
|
| 227 |
+
138, 135, 135, 135, 134, 134, 134, 133, 132])
|
| 228 |
+
2026-01-26 05:08:16,531 INFO [inference.py:334] Encoder out range: [-13.477, 12.445]
|
| 229 |
+
2026-01-26 05:08:17,420 INFO [inference.py:344] Number of hypotheses: 23
|
| 230 |
+
2026-01-26 05:08:17,420 INFO [inference.py:346] First hypothesis: [225]
|
| 231 |
+
2026-01-26 05:08:17,420 INFO [inference.py:535] Processed 391 utterances in 20 batches
|
| 232 |
+
2026-01-26 05:08:17,426 INFO [inference.py:309] Audio shape: torch.Size([17, 92320]), dtype: torch.float32
|
| 233 |
+
2026-01-26 05:08:17,427 INFO [inference.py:310] Audio range: [-0.234, 0.300]
|
| 234 |
+
2026-01-26 05:08:17,427 INFO [inference.py:311] Audio lengths: tensor([92320, 91200, 91200, 90560, 89120, 84000, 83840, 83360, 82880, 82079,
|
| 235 |
+
79840, 79520, 76800, 73760, 73280, 70079, 69600], dtype=torch.int32)
|
| 236 |
+
2026-01-26 05:08:25,743 INFO [inference.py:332] Encoder out shape: torch.Size([17, 288, 1024])
|
| 237 |
+
2026-01-26 05:08:25,744 INFO [inference.py:333] Encoder out lens: tensor([288, 284, 284, 282, 278, 262, 261, 260, 258, 256, 249, 248, 239, 230,
|
| 238 |
+
228, 218, 217])
|
| 239 |
+
2026-01-26 05:08:25,798 INFO [inference.py:334] Encoder out range: [-13.483, 12.297]
|
| 240 |
+
2026-01-26 05:08:26,544 INFO [inference.py:344] Number of hypotheses: 17
|
| 241 |
+
2026-01-26 05:08:26,545 INFO [inference.py:346] First hypothesis: [39, 52, 10, 7, 85, 58, 134, 5, 84, 189, 29, 14, 43, 8, 93, 130, 16, 34, 84]
|
| 242 |
+
2026-01-26 05:08:26,602 INFO [inference.py:309] Audio shape: torch.Size([23, 68800]), dtype: torch.float32
|
| 243 |
+
2026-01-26 05:08:26,604 INFO [inference.py:310] Audio range: [-0.321, 0.370]
|
| 244 |
+
2026-01-26 05:08:26,604 INFO [inference.py:311] Audio lengths: tensor([68799, 66720, 62560, 62240, 61919, 60160, 59840, 58080, 57920, 57280,
|
| 245 |
+
53920, 52960, 51040, 50080, 49920, 49280, 48160, 48160, 47680, 47200,
|
| 246 |
+
44800, 44000, 42560], dtype=torch.int32)
|
| 247 |
+
2026-01-26 05:08:34,725 INFO [inference.py:332] Encoder out shape: torch.Size([23, 214, 1024])
|
| 248 |
+
2026-01-26 05:08:34,725 INFO [inference.py:333] Encoder out lens: tensor([214, 208, 195, 194, 193, 187, 186, 181, 180, 178, 168, 165, 159, 156,
|
| 249 |
+
155, 153, 150, 150, 148, 147, 139, 137, 132])
|
| 250 |
+
2026-01-26 05:08:34,726 INFO [inference.py:334] Encoder out range: [-11.273, 12.003]
|
| 251 |
+
2026-01-26 05:08:35,331 INFO [inference.py:344] Number of hypotheses: 23
|
| 252 |
+
2026-01-26 05:08:35,331 INFO [inference.py:346] First hypothesis: [218, 4, 2, 11]
|
| 253 |
+
2026-01-26 05:08:35,338 INFO [inference.py:309] Audio shape: torch.Size([38, 42080]), dtype: torch.float32
|
| 254 |
+
2026-01-26 05:08:35,339 INFO [inference.py:310] Audio range: [-0.400, 0.452]
|
| 255 |
+
2026-01-26 05:08:35,340 INFO [inference.py:311] Audio lengths: tensor([42080, 39200, 37439, 36960, 35520, 34560, 34079, 33599, 33600, 33280,
|
| 256 |
+
31520, 31200, 29760, 28160, 28000, 27200, 26720, 25600, 25120, 23200,
|
| 257 |
+
22880, 21280, 20800, 20000, 19680, 19520, 19200, 18080, 17600, 17600,
|
| 258 |
+
16320, 13120, 12320, 11680, 8000, 6400, 5120, 3840],
|
| 259 |
+
dtype=torch.int32)
|
| 260 |
+
2026-01-26 05:08:43,838 INFO [inference.py:332] Encoder out shape: torch.Size([38, 131, 1024])
|
| 261 |
+
2026-01-26 05:08:43,839 INFO [inference.py:333] Encoder out lens: tensor([131, 122, 116, 115, 110, 107, 106, 104, 104, 103, 98, 97, 92, 87,
|
| 262 |
+
87, 84, 83, 79, 78, 72, 71, 66, 64, 62, 61, 60, 59, 56,
|
| 263 |
+
54, 54, 50, 40, 38, 36, 24, 19, 15, 11])
|
| 264 |
+
2026-01-26 05:08:43,839 INFO [inference.py:334] Encoder out range: [-11.872, 11.798]
|
| 265 |
+
2026-01-26 05:08:44,627 INFO [inference.py:344] Number of hypotheses: 38
|
| 266 |
+
2026-01-26 05:08:44,627 INFO [inference.py:346] First hypothesis: []
|
| 267 |
+
2026-01-26 05:08:44,634 INFO [inference.py:309] Audio shape: torch.Size([5, 280640]), dtype: torch.float32
|
| 268 |
+
2026-01-26 05:08:44,635 INFO [inference.py:310] Audio range: [-0.100, 0.092]
|
| 269 |
+
2026-01-26 05:08:44,635 INFO [inference.py:311] Audio lengths: tensor([280639, 280640, 272800, 270080, 256480], dtype=torch.int32)
|
| 270 |
+
2026-01-26 05:08:53,033 INFO [inference.py:332] Encoder out shape: torch.Size([5, 876, 1024])
|
| 271 |
+
2026-01-26 05:08:53,034 INFO [inference.py:333] Encoder out lens: tensor([876, 876, 852, 843, 801])
|
| 272 |
+
2026-01-26 05:08:53,034 INFO [inference.py:334] Encoder out range: [-14.497, 12.570]
|
| 273 |
+
2026-01-26 05:08:53,729 INFO [inference.py:344] Number of hypotheses: 5
|
| 274 |
+
2026-01-26 05:08:53,730 INFO [inference.py:346] First hypothesis: [68, 13, 211, 25, 294, 6, 344, 131, 214, 56, 18, 34, 146, 16, 56, 46, 136, 40, 26, 5, 156, 6, 189]
|
| 275 |
+
2026-01-26 05:08:53,736 INFO [inference.py:309] Audio shape: torch.Size([6, 248640]), dtype: torch.float32
|
| 276 |
+
2026-01-26 05:08:53,737 INFO [inference.py:310] Audio range: [-0.080, 0.094]
|
| 277 |
+
2026-01-26 05:08:53,738 INFO [inference.py:311] Audio lengths: tensor([248639, 242720, 233119, 227199, 217440, 216479], dtype=torch.int32)
|
| 278 |
+
2026-01-26 05:09:02,511 INFO [inference.py:332] Encoder out shape: torch.Size([6, 776, 1024])
|
| 279 |
+
2026-01-26 05:09:02,511 INFO [inference.py:333] Encoder out lens: tensor([776, 758, 728, 709, 679, 676])
|
| 280 |
+
2026-01-26 05:09:02,512 INFO [inference.py:334] Encoder out range: [-12.796, 12.210]
|
| 281 |
+
2026-01-26 05:09:03,260 INFO [inference.py:344] Number of hypotheses: 6
|
| 282 |
+
2026-01-26 05:09:03,260 INFO [inference.py:346] First hypothesis: [11, 87, 7, 5, 58, 134, 5, 71, 67, 6, 195, 25, 98, 36, 67, 51, 121, 26, 117, 10, 208, 13, 265, 39, 9, 100]
|
| 283 |
+
2026-01-26 05:09:03,270 INFO [inference.py:309] Audio shape: torch.Size([13, 117120]), dtype: torch.float32
|
| 284 |
+
2026-01-26 05:09:03,270 INFO [inference.py:310] Audio range: [-0.283, 0.260]
|
| 285 |
+
2026-01-26 05:09:03,271 INFO [inference.py:311] Audio lengths: tensor([117120, 111680, 107200, 106720, 106239, 104639, 104480, 101920, 100960,
|
| 286 |
+
98880, 96960, 93920, 93600], dtype=torch.int32)
|
| 287 |
+
2026-01-26 05:09:11,401 INFO [inference.py:332] Encoder out shape: torch.Size([13, 365, 1024])
|
| 288 |
+
2026-01-26 05:09:11,402 INFO [inference.py:333] Encoder out lens: tensor([365, 348, 334, 333, 331, 326, 326, 318, 315, 308, 302, 293, 292])
|
| 289 |
+
2026-01-26 05:09:11,402 INFO [inference.py:334] Encoder out range: [-12.112, 13.452]
|
| 290 |
+
2026-01-26 05:09:12,027 INFO [inference.py:344] Number of hypotheses: 13
|
| 291 |
+
2026-01-26 05:09:12,027 INFO [inference.py:346] First hypothesis: [11]
|
| 292 |
+
2026-01-26 05:09:12,035 INFO [inference.py:309] Audio shape: torch.Size([23, 67200]), dtype: torch.float32
|
| 293 |
+
2026-01-26 05:09:12,036 INFO [inference.py:310] Audio range: [-0.233, 0.248]
|
| 294 |
+
2026-01-26 05:09:12,037 INFO [inference.py:311] Audio lengths: tensor([67200, 67039, 66079, 62079, 61760, 60480, 59520, 58080, 57760, 54239,
|
| 295 |
+
54080, 54080, 52960, 50080, 49920, 49280, 49119, 47840, 47840, 46720,
|
| 296 |
+
45600, 44800, 44000], dtype=torch.int32)
|
| 297 |
+
2026-01-26 05:09:19,942 INFO [inference.py:332] Encoder out shape: torch.Size([23, 209, 1024])
|
| 298 |
+
2026-01-26 05:09:19,943 INFO [inference.py:333] Encoder out lens: tensor([209, 209, 206, 193, 192, 188, 185, 181, 180, 169, 168, 168, 165, 156,
|
| 299 |
+
155, 153, 153, 149, 149, 145, 142, 139, 137])
|
| 300 |
+
2026-01-26 05:09:19,943 INFO [inference.py:334] Encoder out range: [-14.993, 12.111]
|
| 301 |
+
2026-01-26 05:09:20,821 INFO [inference.py:344] Number of hypotheses: 23
|
| 302 |
+
2026-01-26 05:09:20,821 INFO [inference.py:346] First hypothesis: [18, 34, 174, 15, 44, 84, 15, 34, 81, 7, 69, 57, 101]
|
| 303 |
+
2026-01-26 05:09:20,829 INFO [inference.py:309] Audio shape: torch.Size([6, 269120]), dtype: torch.float32
|
| 304 |
+
2026-01-26 05:09:20,830 INFO [inference.py:310] Audio range: [-0.516, 0.413]
|
| 305 |
+
2026-01-26 05:09:20,830 INFO [inference.py:311] Audio lengths: tensor([269119, 263680, 262719, 262559, 258240, 249759], dtype=torch.int32)
|
| 306 |
+
2026-01-26 05:09:30,645 INFO [inference.py:332] Encoder out shape: torch.Size([6, 840, 1024])
|
| 307 |
+
2026-01-26 05:09:30,645 INFO [inference.py:333] Encoder out lens: tensor([840, 823, 820, 820, 806, 780])
|
| 308 |
+
2026-01-26 05:09:30,646 INFO [inference.py:334] Encoder out range: [-11.696, 10.834]
|
| 309 |
+
2026-01-26 05:09:31,125 INFO [inference.py:344] Number of hypotheses: 6
|
| 310 |
+
2026-01-26 05:09:31,125 INFO [inference.py:346] First hypothesis: []
|
| 311 |
+
2026-01-26 05:09:31,132 INFO [inference.py:309] Audio shape: torch.Size([6, 241440]), dtype: torch.float32
|
| 312 |
+
2026-01-26 05:09:31,133 INFO [inference.py:310] Audio range: [-0.067, 0.106]
|
| 313 |
+
2026-01-26 05:09:31,134 INFO [inference.py:311] Audio lengths: tensor([241440, 240479, 238079, 236800, 224800, 224159], dtype=torch.int32)
|
| 314 |
+
2026-01-26 05:09:39,233 INFO [inference.py:332] Encoder out shape: torch.Size([6, 754, 1024])
|
| 315 |
+
2026-01-26 05:09:39,234 INFO [inference.py:333] Encoder out lens: tensor([754, 751, 743, 739, 702, 700])
|
| 316 |
+
2026-01-26 05:09:39,234 INFO [inference.py:334] Encoder out range: [-13.524, 12.974]
|
| 317 |
+
2026-01-26 05:09:39,812 INFO [inference.py:344] Number of hypotheses: 6
|
| 318 |
+
2026-01-26 05:09:39,812 INFO [inference.py:346] First hypothesis: [87, 7, 5, 51, 195, 25, 6, 362, 39, 114, 38, 65, 18, 354, 16, 38, 89, 174]
|
| 319 |
+
2026-01-26 05:09:39,820 INFO [inference.py:309] Audio shape: torch.Size([23, 68000]), dtype: torch.float32
|
| 320 |
+
2026-01-26 05:09:39,821 INFO [inference.py:310] Audio range: [-0.180, 0.177]
|
| 321 |
+
2026-01-26 05:09:39,821 INFO [inference.py:311] Audio lengths: tensor([68000, 66080, 65120, 64319, 64000, 60960, 58880, 58400, 58240, 57600,
|
| 322 |
+
50239, 49760, 48480, 48480, 47520, 47200, 46560, 46080, 44960, 44480,
|
| 323 |
+
43200, 42719, 42240], dtype=torch.int32)
|
| 324 |
+
2026-01-26 05:09:48,028 INFO [inference.py:332] Encoder out shape: torch.Size([23, 212, 1024])
|
| 325 |
+
2026-01-26 05:09:48,029 INFO [inference.py:333] Encoder out lens: tensor([212, 206, 203, 200, 199, 190, 183, 182, 181, 179, 156, 155, 151, 151,
|
| 326 |
+
148, 147, 145, 143, 140, 138, 134, 133, 131])
|
| 327 |
+
2026-01-26 05:09:48,029 INFO [inference.py:334] Encoder out range: [-13.762, 11.575]
|
| 328 |
+
2026-01-26 05:09:48,731 INFO [inference.py:344] Number of hypotheses: 23
|
| 329 |
+
2026-01-26 05:09:48,731 INFO [inference.py:346] First hypothesis: [61, 51, 184, 13, 4, 140, 5, 169, 93, 25, 6, 260]
|
| 330 |
+
2026-01-26 05:09:48,731 INFO [inference.py:535] Processed 551 utterances in 30 batches
|
| 331 |
+
2026-01-26 05:09:48,738 INFO [inference.py:309] Audio shape: torch.Size([5, 317280]), dtype: torch.float32
|
| 332 |
+
2026-01-26 05:09:48,739 INFO [inference.py:310] Audio range: [-0.122, 0.148]
|
| 333 |
+
2026-01-26 05:09:48,739 INFO [inference.py:311] Audio lengths: tensor([317280, 311840, 309600, 301120, 295680], dtype=torch.int32)
|
| 334 |
+
2026-01-26 05:09:58,852 INFO [inference.py:332] Encoder out shape: torch.Size([5, 991, 1024])
|
| 335 |
+
2026-01-26 05:09:58,853 INFO [inference.py:333] Encoder out lens: tensor([991, 974, 967, 940, 923])
|
| 336 |
+
2026-01-26 05:09:58,853 INFO [inference.py:334] Encoder out range: [-13.657, 13.923]
|
| 337 |
+
2026-01-26 05:09:59,738 INFO [inference.py:344] Number of hypotheses: 5
|
| 338 |
+
2026-01-26 05:09:59,739 INFO [inference.py:346] First hypothesis: [39, 231, 32, 51, 49, 25, 93, 221, 18, 118, 159, 6, 4, 27, 5, 8, 93, 193, 39, 6, 130, 18, 7, 69, 176, 33, 152, 25, 284, 251, 205]
|
| 339 |
+
2026-01-26 05:09:59,746 INFO [inference.py:309] Audio shape: torch.Size([24, 64160]), dtype: torch.float32
|
| 340 |
+
2026-01-26 05:09:59,747 INFO [inference.py:310] Audio range: [-0.274, 0.264]
|
| 341 |
+
2026-01-26 05:09:59,748 INFO [inference.py:311] Audio lengths: tensor([64160, 61760, 61759, 61760, 59520, 58720, 57280, 55840, 55520, 54720,
|
| 342 |
+
51520, 50880, 50880, 50720, 49600, 49440, 49280, 47839, 46719, 46399,
|
| 343 |
+
45279, 43999, 43520, 42240], dtype=torch.int32)
|
| 344 |
+
2026-01-26 05:10:08,023 INFO [inference.py:332] Encoder out shape: torch.Size([24, 200, 1024])
|
| 345 |
+
2026-01-26 05:10:08,024 INFO [inference.py:333] Encoder out lens: tensor([200, 192, 192, 192, 185, 183, 178, 174, 173, 170, 160, 158, 158, 158,
|
| 346 |
+
154, 154, 153, 149, 145, 144, 141, 137, 135, 131])
|
| 347 |
+
2026-01-26 05:10:08,024 INFO [inference.py:334] Encoder out range: [-13.370, 11.318]
|
| 348 |
+
2026-01-26 05:10:08,807 INFO [inference.py:344] Number of hypotheses: 24
|
| 349 |
+
2026-01-26 05:10:08,807 INFO [inference.py:346] First hypothesis: [61, 49, 39, 17, 7, 5, 64, 9, 115, 16, 34, 57, 7, 5, 206, 221, 16]
|
| 350 |
+
2026-01-26 05:10:08,814 INFO [inference.py:309] Audio shape: torch.Size([5, 287520]), dtype: torch.float32
|
| 351 |
+
2026-01-26 05:10:08,814 INFO [inference.py:310] Audio range: [-0.099, 0.090]
|
| 352 |
+
2026-01-26 05:10:08,815 INFO [inference.py:311] Audio lengths: tensor([287520, 283360, 264959, 261760, 259360], dtype=torch.int32)
|
| 353 |
+
2026-01-26 05:10:16,810 INFO [inference.py:332] Encoder out shape: torch.Size([5, 898, 1024])
|
| 354 |
+
2026-01-26 05:10:16,811 INFO [inference.py:333] Encoder out lens: tensor([898, 885, 827, 817, 810])
|
| 355 |
+
2026-01-26 05:10:16,811 INFO [inference.py:334] Encoder out range: [-13.209, 12.373]
|
| 356 |
+
2026-01-26 05:10:17,639 INFO [inference.py:344] Number of hypotheses: 5
|
| 357 |
+
2026-01-26 05:10:17,640 INFO [inference.py:346] First hypothesis: [231, 178, 51, 144, 193, 46, 13, 38, 86, 120, 194, 16, 95, 13, 104, 19, 36, 26, 87, 20, 193]
|
| 358 |
+
2026-01-26 05:10:17,648 INFO [inference.py:309] Audio shape: torch.Size([5, 310720]), dtype: torch.float32
|
| 359 |
+
2026-01-26 05:10:17,649 INFO [inference.py:310] Audio range: [-0.046, 0.111]
|
| 360 |
+
2026-01-26 05:10:17,650 INFO [inference.py:311] Audio lengths: tensor([310719, 308639, 298560, 294880, 293759], dtype=torch.int32)
|
| 361 |
+
2026-01-26 05:10:26,341 INFO [inference.py:332] Encoder out shape: torch.Size([5, 970, 1024])
|
| 362 |
+
2026-01-26 05:10:26,342 INFO [inference.py:333] Encoder out lens: tensor([970, 964, 932, 921, 917])
|
| 363 |
+
2026-01-26 05:10:26,342 INFO [inference.py:334] Encoder out range: [-13.787, 12.644]
|
| 364 |
+
2026-01-26 05:10:26,950 INFO [inference.py:344] Number of hypotheses: 5
|
| 365 |
+
2026-01-26 05:10:26,951 INFO [inference.py:346] First hypothesis: [17, 7, 5, 46, 48, 66, 48, 84, 51, 66]
|
| 366 |
+
2026-01-26 05:10:26,958 INFO [inference.py:309] Audio shape: torch.Size([38, 41600]), dtype: torch.float32
|
| 367 |
+
2026-01-26 05:10:26,959 INFO [inference.py:310] Audio range: [-0.528, 0.544]
|
| 368 |
+
2026-01-26 05:10:26,960 INFO [inference.py:311] Audio lengths: tensor([41599, 39200, 37119, 36799, 34400, 34079, 33439, 32960, 31200, 31200,
|
| 369 |
+
26400, 25600, 24000, 22560, 22080, 21919, 21920, 21280, 20799, 19360,
|
| 370 |
+
18880, 18880, 17600, 17440, 15200, 13760, 12640, 11360, 5760, 5280,
|
| 371 |
+
5120, 4640, 4320, 3840, 3680, 3360, 3360, 3200],
|
| 372 |
+
dtype=torch.int32)
|
| 373 |
+
2026-01-26 05:10:34,801 INFO [inference.py:332] Encoder out shape: torch.Size([38, 129, 1024])
|
| 374 |
+
2026-01-26 05:10:34,802 INFO [inference.py:333] Encoder out lens: tensor([129, 122, 115, 114, 107, 106, 104, 102, 97, 97, 82, 79, 74, 70,
|
| 375 |
+
68, 68, 68, 66, 64, 60, 58, 58, 54, 54, 47, 42, 39, 35,
|
| 376 |
+
17, 16, 15, 14, 13, 11, 11, 10, 10, 9])
|
| 377 |
+
2026-01-26 05:10:34,802 INFO [inference.py:334] Encoder out range: [-12.505, 11.696]
|
| 378 |
+
2026-01-26 05:10:35,305 INFO [inference.py:344] Number of hypotheses: 38
|
| 379 |
+
2026-01-26 05:10:35,305 INFO [inference.py:346] First hypothesis: []
|
| 380 |
+
2026-01-26 05:10:35,312 INFO [inference.py:309] Audio shape: torch.Size([11, 143680]), dtype: torch.float32
|
| 381 |
+
2026-01-26 05:10:35,312 INFO [inference.py:310] Audio range: [-0.331, 0.228]
|
| 382 |
+
2026-01-26 05:10:35,313 INFO [inference.py:311] Audio lengths: tensor([143680, 143360, 143200, 137439, 130559, 129279, 128960, 125280, 125280,
|
| 383 |
+
123040, 118079], dtype=torch.int32)
|
| 384 |
+
2026-01-26 05:10:44,633 INFO [inference.py:332] Encoder out shape: torch.Size([11, 448, 1024])
|
| 385 |
+
2026-01-26 05:10:44,633 INFO [inference.py:333] Encoder out lens: tensor([448, 447, 447, 429, 407, 403, 402, 391, 391, 384, 368])
|
| 386 |
+
2026-01-26 05:10:44,634 INFO [inference.py:334] Encoder out range: [-12.574, 13.090]
|
| 387 |
+
2026-01-26 05:10:45,323 INFO [inference.py:344] Number of hypotheses: 11
|
| 388 |
+
2026-01-26 05:10:45,323 INFO [inference.py:346] First hypothesis: [11, 37, 9, 102, 18, 230, 95, 6, 24, 64, 16, 163, 73, 6, 165, 5, 17, 171, 15, 267, 153, 5]
|
| 389 |
+
2026-01-26 05:10:45,330 INFO [inference.py:309] Audio shape: torch.Size([5, 283680]), dtype: torch.float32
|
| 390 |
+
2026-01-26 05:10:45,331 INFO [inference.py:310] Audio range: [-0.096, 0.119]
|
| 391 |
+
2026-01-26 05:10:45,332 INFO [inference.py:311] Audio lengths: tensor([283680, 281119, 271360, 262560, 252479], dtype=torch.int32)
|
| 392 |
+
2026-01-26 05:10:53,624 INFO [inference.py:332] Encoder out shape: torch.Size([5, 886, 1024])
|
| 393 |
+
2026-01-26 05:10:53,625 INFO [inference.py:333] Encoder out lens: tensor([886, 878, 847, 820, 788])
|
| 394 |
+
2026-01-26 05:10:53,625 INFO [inference.py:334] Encoder out range: [-12.921, 13.557]
|
| 395 |
+
2026-01-26 05:10:54,461 INFO [inference.py:344] Number of hypotheses: 5
|
| 396 |
+
2026-01-26 05:10:54,462 INFO [inference.py:346] First hypothesis: [112, 81, 230, 38, 105, 34, 16, 34, 200, 294, 171, 15, 58, 134, 29, 16, 122, 25, 6, 25, 6, 16, 29, 119, 5, 25, 6, 205, 5, 17, 220]
|
| 397 |
+
2026-01-26 05:10:54,467 INFO [inference.py:309] Audio shape: torch.Size([23, 68800]), dtype: torch.float32
|
| 398 |
+
2026-01-26 05:10:54,468 INFO [inference.py:310] Audio range: [-0.356, 0.274]
|
| 399 |
+
2026-01-26 05:10:54,468 INFO [inference.py:311] Audio lengths: tensor([68800, 65600, 64800, 64480, 62400, 58079, 57119, 56159, 54560, 53920,
|
| 400 |
+
53920, 51840, 51520, 49280, 49280, 47519, 46240, 45280, 44960, 44960,
|
| 401 |
+
44480, 43680, 42560], dtype=torch.int32)
|
| 402 |
+
2026-01-26 05:11:03,138 INFO [inference.py:332] Encoder out shape: torch.Size([23, 214, 1024])
|
| 403 |
+
2026-01-26 05:11:03,139 INFO [inference.py:333] Encoder out lens: tensor([214, 204, 202, 201, 194, 181, 178, 175, 170, 168, 168, 161, 160, 153,
|
| 404 |
+
153, 148, 144, 141, 140, 140, 138, 136, 132])
|
| 405 |
+
2026-01-26 05:11:03,139 INFO [inference.py:334] Encoder out range: [-13.047, 12.227]
|
| 406 |
+
2026-01-26 05:11:04,047 INFO [inference.py:344] Number of hypotheses: 23
|
| 407 |
+
2026-01-26 05:11:04,048 INFO [inference.py:346] First hypothesis: [57, 33, 193, 16, 48, 66, 33, 13, 74, 19, 201]
|
| 408 |
+
2026-01-26 05:11:04,053 INFO [inference.py:309] Audio shape: torch.Size([11, 139520]), dtype: torch.float32
|
| 409 |
+
2026-01-26 05:11:04,054 INFO [inference.py:310] Audio range: [-0.117, 0.153]
|
| 410 |
+
2026-01-26 05:11:04,054 INFO [inference.py:311] Audio lengths: tensor([139520, 139200, 138880, 138079, 137440, 134720, 128320, 124000, 121600,
|
| 411 |
+
120160, 118240], dtype=torch.int32)
|
| 412 |
+
2026-01-26 05:11:12,432 INFO [inference.py:332] Encoder out shape: torch.Size([11, 435, 1024])
|
| 413 |
+
2026-01-26 05:11:12,433 INFO [inference.py:333] Encoder out lens: tensor([435, 434, 433, 431, 429, 420, 400, 387, 379, 375, 369])
|
| 414 |
+
2026-01-26 05:11:12,433 INFO [inference.py:334] Encoder out range: [-13.984, 12.798]
|
| 415 |
+
2026-01-26 05:11:13,304 INFO [inference.py:344] Number of hypotheses: 11
|
| 416 |
+
2026-01-26 05:11:13,304 INFO [inference.py:346] First hypothesis: [105, 206, 66, 5, 18, 47, 236, 49]
|
| 417 |
+
2026-01-26 05:11:13,310 INFO [inference.py:309] Audio shape: torch.Size([6, 248640]), dtype: torch.float32
|
| 418 |
+
2026-01-26 05:11:13,311 INFO [inference.py:310] Audio range: [-0.155, 0.171]
|
| 419 |
+
2026-01-26 05:11:13,312 INFO [inference.py:311] Audio lengths: tensor([248639, 231359, 228480, 225440, 223360, 212800], dtype=torch.int32)
|
| 420 |
+
2026-01-26 05:11:22,125 INFO [inference.py:332] Encoder out shape: torch.Size([6, 776, 1024])
|
| 421 |
+
2026-01-26 05:11:22,126 INFO [inference.py:333] Encoder out lens: tensor([776, 722, 713, 704, 697, 664])
|
| 422 |
+
2026-01-26 05:11:22,126 INFO [inference.py:334] Encoder out range: [-13.353, 11.759]
|
| 423 |
+
2026-01-26 05:11:22,718 INFO [inference.py:344] Number of hypotheses: 6
|
| 424 |
+
2026-01-26 05:11:22,719 INFO [inference.py:346] First hypothesis: [39, 93, 25, 6, 24, 6, 130, 117, 66, 100, 87, 7, 5, 110, 17, 172, 51, 9, 51, 49, 26, 87, 23, 6, 66, 33, 6, 91, 18, 7, 27, 46, 333, 130, 33, 357, 136, 21, 24]
|
| 425 |
+
2026-01-26 05:11:22,719 INFO [inference.py:535] Processed 684 utterances in 40 batches
|
| 426 |
+
2026-01-26 05:11:22,724 INFO [inference.py:309] Audio shape: torch.Size([9, 173600]), dtype: torch.float32
|
| 427 |
+
2026-01-26 05:11:22,725 INFO [inference.py:310] Audio range: [-0.501, 0.295]
|
| 428 |
+
2026-01-26 05:11:22,725 INFO [inference.py:311] Audio lengths: tensor([173599, 168319, 161760, 161760, 159040, 158719, 156800, 148000, 147359],
|
| 429 |
+
dtype=torch.int32)
|
| 430 |
+
2026-01-26 05:11:31,816 INFO [inference.py:332] Encoder out shape: torch.Size([9, 542, 1024])
|
| 431 |
+
2026-01-26 05:11:31,816 INFO [inference.py:333] Encoder out lens: tensor([542, 525, 505, 505, 496, 495, 489, 462, 460])
|
| 432 |
+
2026-01-26 05:11:31,817 INFO [inference.py:334] Encoder out range: [-11.960, 12.653]
|
| 433 |
+
2026-01-26 05:11:32,422 INFO [inference.py:344] Number of hypotheses: 9
|
| 434 |
+
2026-01-26 05:11:32,422 INFO [inference.py:346] First hypothesis: [11, 159, 110, 17, 50, 6, 51, 195, 25, 34, 13, 260, 6, 395, 195, 26, 17, 18, 118, 53, 86, 56]
|
| 435 |
+
2026-01-26 05:11:32,428 INFO [inference.py:309] Audio shape: torch.Size([11, 144640]), dtype: torch.float32
|
| 436 |
+
2026-01-26 05:11:32,429 INFO [inference.py:310] Audio range: [-0.332, 0.358]
|
| 437 |
+
2026-01-26 05:11:32,429 INFO [inference.py:311] Audio lengths: tensor([144639, 143520, 140159, 139840, 133760, 128159, 128159, 124000, 119680,
|
| 438 |
+
119200, 119040], dtype=torch.int32)
|
| 439 |
+
2026-01-26 05:11:41,731 INFO [inference.py:332] Encoder out shape: torch.Size([11, 451, 1024])
|
| 440 |
+
2026-01-26 05:11:41,731 INFO [inference.py:333] Encoder out lens: tensor([451, 448, 437, 436, 417, 400, 400, 387, 373, 372, 371])
|
| 441 |
+
2026-01-26 05:11:41,732 INFO [inference.py:334] Encoder out range: [-13.569, 12.367]
|
| 442 |
+
2026-01-26 05:11:42,844 INFO [inference.py:344] Number of hypotheses: 11
|
| 443 |
+
2026-01-26 05:11:42,844 INFO [inference.py:346] First hypothesis: [51, 419, 26, 15, 72, 113, 6, 91, 33]
|
| 444 |
+
2026-01-26 05:11:42,850 INFO [inference.py:309] Audio shape: torch.Size([13, 116640]), dtype: torch.float32
|
| 445 |
+
2026-01-26 05:11:42,850 INFO [inference.py:310] Audio range: [-0.268, 0.323]
|
| 446 |
+
2026-01-26 05:11:42,851 INFO [inference.py:311] Audio lengths: tensor([116640, 116000, 112799, 110240, 104319, 101919, 100799, 100800, 98400,
|
| 447 |
+
96480, 95039, 93920, 93600], dtype=torch.int32)
|
| 448 |
+
2026-01-26 05:11:50,812 INFO [inference.py:332] Encoder out shape: torch.Size([13, 364, 1024])
|
| 449 |
+
2026-01-26 05:11:50,813 INFO [inference.py:333] Encoder out lens: tensor([364, 362, 352, 344, 325, 318, 314, 314, 307, 301, 296, 293, 292])
|
| 450 |
+
2026-01-26 05:11:50,813 INFO [inference.py:334] Encoder out range: [-11.927, 13.414]
|
| 451 |
+
2026-01-26 05:11:51,360 INFO [inference.py:344] Number of hypotheses: 13
|
| 452 |
+
2026-01-26 05:11:51,360 INFO [inference.py:346] First hypothesis: [111, 114, 157, 57, 152, 25, 130, 101, 210, 96]
|
| 453 |
+
2026-01-26 05:11:51,366 INFO [inference.py:309] Audio shape: torch.Size([13, 114560]), dtype: torch.float32
|
| 454 |
+
2026-01-26 05:11:51,367 INFO [inference.py:310] Audio range: [-0.431, 0.430]
|
| 455 |
+
2026-01-26 05:11:51,368 INFO [inference.py:311] Audio lengths: tensor([114559, 111359, 110240, 108639, 107840, 103519, 102240, 101759, 101120,
|
| 456 |
+
100639, 98560, 97760, 97759], dtype=torch.int32)
|
| 457 |
+
2026-01-26 05:11:59,310 INFO [inference.py:332] Encoder out shape: torch.Size([13, 357, 1024])
|
| 458 |
+
2026-01-26 05:11:59,311 INFO [inference.py:333] Encoder out lens: tensor([357, 347, 344, 339, 336, 323, 319, 317, 315, 314, 307, 305, 305])
|
| 459 |
+
2026-01-26 05:11:59,311 INFO [inference.py:334] Encoder out range: [-11.355, 12.943]
|
| 460 |
+
2026-01-26 05:11:59,902 INFO [inference.py:344] Number of hypotheses: 13
|
| 461 |
+
2026-01-26 05:11:59,902 INFO [inference.py:346] First hypothesis: [264, 48]
|
| 462 |
+
2026-01-26 05:11:59,908 INFO [inference.py:309] Audio shape: torch.Size([6, 243200]), dtype: torch.float32
|
| 463 |
+
2026-01-26 05:11:59,909 INFO [inference.py:310] Audio range: [-0.339, 0.341]
|
| 464 |
+
2026-01-26 05:11:59,909 INFO [inference.py:311] Audio lengths: tensor([243200, 242079, 241760, 237920, 231679, 212799], dtype=torch.int32)
|
| 465 |
+
2026-01-26 05:12:08,215 INFO [inference.py:332] Encoder out shape: torch.Size([6, 759, 1024])
|
| 466 |
+
2026-01-26 05:12:08,215 INFO [inference.py:333] Encoder out lens: tensor([759, 756, 755, 743, 723, 664])
|
| 467 |
+
2026-01-26 05:12:08,216 INFO [inference.py:334] Encoder out range: [-13.935, 11.852]
|
| 468 |
+
2026-01-26 05:12:08,701 INFO [inference.py:344] Number of hypotheses: 6
|
| 469 |
+
2026-01-26 05:12:08,701 INFO [inference.py:346] First hypothesis: [66, 89, 174, 20]
|
| 470 |
+
2026-01-26 05:12:08,707 INFO [inference.py:309] Audio shape: torch.Size([5, 275520]), dtype: torch.float32
|
| 471 |
+
2026-01-26 05:12:08,708 INFO [inference.py:310] Audio range: [-0.126, 0.148]
|
| 472 |
+
2026-01-26 05:12:08,709 INFO [inference.py:311] Audio lengths: tensor([275520, 274880, 274880, 263999, 254879], dtype=torch.int32)
|
| 473 |
+
2026-01-26 05:12:16,831 INFO [inference.py:332] Encoder out shape: torch.Size([5, 860, 1024])
|
| 474 |
+
2026-01-26 05:12:16,831 INFO [inference.py:333] Encoder out lens: tensor([860, 858, 858, 824, 796])
|
| 475 |
+
2026-01-26 05:12:16,832 INFO [inference.py:334] Encoder out range: [-12.819, 13.634]
|
| 476 |
+
2026-01-26 05:12:17,414 INFO [inference.py:344] Number of hypotheses: 5
|
| 477 |
+
2026-01-26 05:12:17,414 INFO [inference.py:346] First hypothesis: [11, 39, 51, 49, 46, 171, 81, 7, 69]
|
| 478 |
+
2026-01-26 05:12:17,420 INFO [inference.py:309] Audio shape: torch.Size([38, 41920]), dtype: torch.float32
|
| 479 |
+
2026-01-26 05:12:17,421 INFO [inference.py:310] Audio range: [-0.380, 0.393]
|
| 480 |
+
2026-01-26 05:12:17,422 INFO [inference.py:311] Audio lengths: tensor([41919, 41760, 41599, 38560, 38080, 37440, 34400, 33600, 32159, 29120,
|
| 481 |
+
27200, 26560, 25600, 24800, 23680, 23520, 23360, 19680, 18880, 16160,
|
| 482 |
+
15360, 15200, 14880, 13600, 13440, 10080, 7840, 6720, 6400, 6080,
|
| 483 |
+
6080, 5600, 5440, 5120, 4640, 4000, 3840, 3520],
|
| 484 |
+
dtype=torch.int32)
|
| 485 |
+
2026-01-26 05:12:25,625 INFO [inference.py:332] Encoder out shape: torch.Size([38, 130, 1024])
|
| 486 |
+
2026-01-26 05:12:25,626 INFO [inference.py:333] Encoder out lens: tensor([130, 130, 129, 120, 118, 116, 107, 104, 100, 90, 84, 82, 79, 77,
|
| 487 |
+
73, 73, 72, 61, 58, 50, 47, 47, 46, 42, 41, 31, 24, 20,
|
| 488 |
+
19, 18, 18, 17, 16, 15, 14, 12, 11, 10])
|
| 489 |
+
2026-01-26 05:12:25,626 INFO [inference.py:334] Encoder out range: [-12.608, 11.500]
|
| 490 |
+
2026-01-26 05:12:26,111 INFO [inference.py:344] Number of hypotheses: 38
|
| 491 |
+
2026-01-26 05:12:26,111 INFO [inference.py:346] First hypothesis: [11]
|
| 492 |
+
2026-01-26 05:12:26,117 INFO [inference.py:309] Audio shape: torch.Size([5, 289760]), dtype: torch.float32
|
| 493 |
+
2026-01-26 05:12:26,117 INFO [inference.py:310] Audio range: [-0.259, 0.249]
|
| 494 |
+
2026-01-26 05:12:26,118 INFO [inference.py:311] Audio lengths: tensor([289760, 283039, 277760, 261599, 250080], dtype=torch.int32)
|
| 495 |
+
2026-01-26 05:12:34,901 INFO [inference.py:332] Encoder out shape: torch.Size([5, 905, 1024])
|
| 496 |
+
2026-01-26 05:12:34,902 INFO [inference.py:333] Encoder out lens: tensor([905, 884, 867, 817, 781])
|
| 497 |
+
2026-01-26 05:12:34,903 INFO [inference.py:334] Encoder out range: [-12.988, 13.561]
|
| 498 |
+
2026-01-26 05:12:35,738 INFO [inference.py:344] Number of hypotheses: 5
|
| 499 |
+
2026-01-26 05:12:35,738 INFO [inference.py:346] First hypothesis: [6, 290, 20, 48, 33, 238, 205, 37, 48, 265, 274]
|
| 500 |
+
2026-01-26 05:12:35,744 INFO [inference.py:309] Audio shape: torch.Size([23, 68640]), dtype: torch.float32
|
| 501 |
+
2026-01-26 05:12:35,745 INFO [inference.py:310] Audio range: [-0.114, 0.158]
|
| 502 |
+
2026-01-26 05:12:35,745 INFO [inference.py:311] Audio lengths: tensor([68640, 67680, 66719, 66080, 65759, 65600, 64159, 64159, 61119, 60000,
|
| 503 |
+
56800, 56639, 53760, 53440, 52640, 52479, 50720, 50400, 49760, 46880,
|
| 504 |
+
46080, 45280, 45120], dtype=torch.int32)
|
| 505 |
+
2026-01-26 05:12:44,120 INFO [inference.py:332] Encoder out shape: torch.Size([23, 214, 1024])
|
| 506 |
+
2026-01-26 05:12:44,120 INFO [inference.py:333] Encoder out lens: tensor([214, 211, 208, 206, 205, 204, 200, 200, 190, 187, 177, 176, 167, 166,
|
| 507 |
+
164, 163, 158, 157, 155, 146, 143, 141, 140])
|
| 508 |
+
2026-01-26 05:12:44,121 INFO [inference.py:334] Encoder out range: [-13.289, 13.747]
|
| 509 |
+
2026-01-26 05:12:44,824 INFO [inference.py:344] Number of hypotheses: 23
|
| 510 |
+
2026-01-26 05:12:44,824 INFO [inference.py:346] First hypothesis: [89, 186, 32, 7, 8, 234, 13]
|
| 511 |
+
2026-01-26 05:12:44,831 INFO [inference.py:309] Audio shape: torch.Size([40, 39520]), dtype: torch.float32
|
| 512 |
+
2026-01-26 05:12:44,832 INFO [inference.py:310] Audio range: [-0.170, 0.217]
|
| 513 |
+
2026-01-26 05:12:44,832 INFO [inference.py:311] Audio lengths: tensor([39520, 38720, 37760, 36800, 36320, 36159, 34720, 33919, 32640, 31200,
|
| 514 |
+
29760, 28479, 27840, 27840, 24320, 23040, 21120, 20639, 17920, 16800,
|
| 515 |
+
16160, 15840, 14720, 14560, 14560, 14400, 13760, 11520, 9920, 9919,
|
| 516 |
+
9760, 9120, 7840, 7360, 7040, 5440, 4960, 4960, 4800, 4320],
|
| 517 |
+
dtype=torch.int32)
|
| 518 |
+
2026-01-26 05:12:52,729 INFO [inference.py:332] Encoder out shape: torch.Size([40, 123, 1024])
|
| 519 |
+
2026-01-26 05:12:52,730 INFO [inference.py:333] Encoder out lens: tensor([123, 120, 117, 114, 113, 112, 108, 105, 101, 97, 92, 88, 86, 86,
|
| 520 |
+
75, 71, 65, 64, 55, 52, 50, 49, 45, 45, 45, 44, 42, 35,
|
| 521 |
+
30, 30, 30, 28, 24, 22, 21, 16, 15, 15, 14, 13])
|
| 522 |
+
2026-01-26 05:12:52,730 INFO [inference.py:334] Encoder out range: [-11.403, 12.142]
|
| 523 |
+
2026-01-26 05:12:53,318 INFO [inference.py:344] Number of hypotheses: 40
|
| 524 |
+
2026-01-26 05:12:53,319 INFO [inference.py:346] First hypothesis: [89]
|
| 525 |
+
2026-01-26 05:12:53,319 INFO [inference.py:535] Processed 847 utterances in 50 batches
|
| 526 |
+
2026-01-26 05:12:53,326 INFO [inference.py:309] Audio shape: torch.Size([9, 176320]), dtype: torch.float32
|
| 527 |
+
2026-01-26 05:12:53,326 INFO [inference.py:310] Audio range: [-0.145, 0.173]
|
| 528 |
+
2026-01-26 05:12:53,327 INFO [inference.py:311] Audio lengths: tensor([176320, 174879, 170880, 161280, 161120, 158880, 155039, 153760, 146079],
|
| 529 |
+
dtype=torch.int32)
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-13-05
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:13:05,965 INFO [inference.py:617] ================================================================================
|
| 2 |
+
2026-01-26 05:13:05,965 INFO [inference.py:618] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:13:05,965 INFO [inference.py:619] ================================================================================
|
| 4 |
+
2026-01-26 05:13:05,965 INFO [inference.py:620] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:13:05,965 INFO [inference.py:621] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:13:05,965 INFO [inference.py:622] Test set: ihm
|
| 7 |
+
2026-01-26 05:13:05,965 INFO [inference.py:623] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:13:05,966 INFO [inference.py:625] Beam size: 4
|
| 9 |
+
2026-01-26 05:13:05,966 INFO [inference.py:626] Max states: 64
|
| 10 |
+
2026-01-26 05:13:05,966 INFO [inference.py:627] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:13:05,966 INFO [inference.py:633] Device: cpu
|
| 12 |
+
2026-01-26 05:13:05,966 INFO [inference.py:636] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:13:05,967 INFO [inference.py:644] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:13:05,967 INFO [inference.py:645] Blank ID: 0
|
| 15 |
+
2026-01-26 05:13:05,967 INFO [inference.py:648] Creating model
|
| 16 |
+
2026-01-26 05:13:07,626 INFO [inference.py:655] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 17 |
+
2026-01-26 05:13:07,626 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 18 |
+
2026-01-26 05:13:12,816 INFO [inference.py:684] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:13:12,816 INFO [inference.py:687] Loading test data
|
| 20 |
+
2026-01-26 05:13:12,816 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:13:13,985 INFO [inference.py:698] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:13:13,986 INFO [inference.py:701] Starting inference...
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-14-59
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:14:59,640 INFO [inference.py:625] ================================================================================
|
| 2 |
+
2026-01-26 05:14:59,640 INFO [inference.py:626] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:14:59,640 INFO [inference.py:627] ================================================================================
|
| 4 |
+
2026-01-26 05:14:59,640 INFO [inference.py:628] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:14:59,640 INFO [inference.py:629] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:14:59,640 INFO [inference.py:630] Test set: ihm
|
| 7 |
+
2026-01-26 05:14:59,640 INFO [inference.py:631] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:14:59,640 INFO [inference.py:633] Beam size: 4
|
| 9 |
+
2026-01-26 05:14:59,640 INFO [inference.py:634] Max states: 64
|
| 10 |
+
2026-01-26 05:14:59,640 INFO [inference.py:635] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:14:59,640 INFO [inference.py:641] Device: cpu
|
| 12 |
+
2026-01-26 05:14:59,640 INFO [inference.py:644] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:14:59,642 INFO [inference.py:652] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:14:59,642 INFO [inference.py:653] Blank ID: 0
|
| 15 |
+
2026-01-26 05:14:59,642 INFO [inference.py:656] Creating model
|
| 16 |
+
2026-01-26 05:15:01,252 INFO [inference.py:663] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 17 |
+
2026-01-26 05:15:01,252 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 18 |
+
2026-01-26 05:15:06,040 INFO [inference.py:692] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:15:06,040 INFO [inference.py:695] Loading test data
|
| 20 |
+
2026-01-26 05:15:06,040 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:15:07,133 INFO [inference.py:706] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:15:07,133 INFO [inference.py:709] Starting inference...
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-17-40
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:17:40,782 INFO [inference.py:622] ================================================================================
|
| 2 |
+
2026-01-26 05:17:40,782 INFO [inference.py:623] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:17:40,782 INFO [inference.py:624] ================================================================================
|
| 4 |
+
2026-01-26 05:17:40,782 INFO [inference.py:625] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:17:40,782 INFO [inference.py:626] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:17:40,783 INFO [inference.py:627] Test set: ihm
|
| 7 |
+
2026-01-26 05:17:40,783 INFO [inference.py:628] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:17:40,783 INFO [inference.py:630] Beam size: 4
|
| 9 |
+
2026-01-26 05:17:40,783 INFO [inference.py:631] Max states: 64
|
| 10 |
+
2026-01-26 05:17:40,783 INFO [inference.py:632] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:17:40,783 INFO [inference.py:638] Device: cpu
|
| 12 |
+
2026-01-26 05:17:40,783 INFO [inference.py:641] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:17:40,784 INFO [inference.py:649] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:17:40,784 INFO [inference.py:650] Blank ID: 0
|
| 15 |
+
2026-01-26 05:17:40,785 INFO [inference.py:653] Creating model
|
| 16 |
+
2026-01-26 05:17:42,399 INFO [inference.py:660] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 17 |
+
2026-01-26 05:17:42,400 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 18 |
+
2026-01-26 05:17:47,415 INFO [inference.py:689] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:17:47,416 INFO [inference.py:692] Loading test data
|
| 20 |
+
2026-01-26 05:17:47,416 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:17:48,537 INFO [inference.py:703] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:17:48,538 INFO [inference.py:706] Starting inference...
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-20-04
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:20:04,436 INFO [inference.py:613] ================================================================================
|
| 2 |
+
2026-01-26 05:20:04,436 INFO [inference.py:614] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:20:04,436 INFO [inference.py:615] ================================================================================
|
| 4 |
+
2026-01-26 05:20:04,436 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:20:04,436 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:20:04,436 INFO [inference.py:618] Test set: ihm
|
| 7 |
+
2026-01-26 05:20:04,436 INFO [inference.py:619] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:20:04,436 INFO [inference.py:621] Beam size: 4
|
| 9 |
+
2026-01-26 05:20:04,436 INFO [inference.py:622] Max states: 64
|
| 10 |
+
2026-01-26 05:20:04,436 INFO [inference.py:623] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:20:04,437 INFO [inference.py:629] Device: cpu
|
| 12 |
+
2026-01-26 05:20:04,437 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:20:04,438 INFO [inference.py:640] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:20:04,438 INFO [inference.py:641] Blank ID: 0
|
| 15 |
+
2026-01-26 05:20:04,438 INFO [inference.py:644] Creating model
|
| 16 |
+
2026-01-26 05:20:05,956 INFO [inference.py:651] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 17 |
+
2026-01-26 05:20:05,957 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-train-loss.pt
|
| 18 |
+
2026-01-26 05:20:10,638 INFO [inference.py:680] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:20:10,639 INFO [inference.py:683] Loading test data
|
| 20 |
+
2026-01-26 05:20:10,639 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:20:11,677 INFO [inference.py:694] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:20:11,677 INFO [inference.py:697] Starting inference...
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-29-29
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:29:29,151 INFO [inference.py:613] ================================================================================
|
| 2 |
+
2026-01-26 05:29:29,151 INFO [inference.py:614] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:29:29,151 INFO [inference.py:615] ================================================================================
|
| 4 |
+
2026-01-26 05:29:29,151 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:29:29,151 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:29:29,151 INFO [inference.py:618] Test set: ihm
|
| 7 |
+
2026-01-26 05:29:29,151 INFO [inference.py:619] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:29:29,151 INFO [inference.py:621] Beam size: 4
|
| 9 |
+
2026-01-26 05:29:29,151 INFO [inference.py:622] Max states: 64
|
| 10 |
+
2026-01-26 05:29:29,151 INFO [inference.py:623] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:29:29,151 INFO [inference.py:629] Device: cpu
|
| 12 |
+
2026-01-26 05:29:29,151 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:29:29,153 INFO [inference.py:640] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:29:29,153 INFO [inference.py:641] Blank ID: 0
|
| 15 |
+
2026-01-26 05:29:29,153 INFO [inference.py:644] Creating model
|
| 16 |
+
2026-01-26 05:29:30,733 INFO [inference.py:673] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 17 |
+
2026-01-26 05:29:30,734 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 18 |
+
2026-01-26 05:29:35,902 INFO [inference.py:680] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:29:35,902 INFO [inference.py:683] Loading test data
|
| 20 |
+
2026-01-26 05:29:35,902 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:29:37,022 INFO [inference.py:694] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:29:37,023 INFO [inference.py:697] Starting inference...
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-48-19
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:48:19,123 INFO [inference.py:613] ================================================================================
|
| 2 |
+
2026-01-26 05:48:19,123 INFO [inference.py:614] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:48:19,123 INFO [inference.py:615] ================================================================================
|
| 4 |
+
2026-01-26 05:48:19,123 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:48:19,123 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:48:19,123 INFO [inference.py:618] Test set: ihm
|
| 7 |
+
2026-01-26 05:48:19,123 INFO [inference.py:619] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:48:19,123 INFO [inference.py:621] Beam size: 4
|
| 9 |
+
2026-01-26 05:48:19,123 INFO [inference.py:622] Max states: 64
|
| 10 |
+
2026-01-26 05:48:19,123 INFO [inference.py:623] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:48:19,123 INFO [inference.py:629] Device: cpu
|
| 12 |
+
2026-01-26 05:48:19,123 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:48:19,125 INFO [inference.py:640] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:48:19,125 INFO [inference.py:641] Blank ID: 0
|
| 15 |
+
2026-01-26 05:48:19,125 INFO [inference.py:644] Creating model
|
| 16 |
+
2026-01-26 05:48:22,516 INFO [inference.py:651] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 17 |
+
2026-01-26 05:48:22,517 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 18 |
+
2026-01-26 05:48:39,229 INFO [inference.py:680] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:48:39,229 INFO [inference.py:683] Loading test data
|
| 20 |
+
2026-01-26 05:48:39,229 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:48:41,915 INFO [inference.py:694] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:48:41,915 INFO [inference.py:697] Starting inference...
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-50-10
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:50:10,649 INFO [inference.py:613] ================================================================================
|
| 2 |
+
2026-01-26 05:50:10,649 INFO [inference.py:614] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:50:10,649 INFO [inference.py:615] ================================================================================
|
| 4 |
+
2026-01-26 05:50:10,649 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:50:10,649 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:50:10,649 INFO [inference.py:618] Test set: ihm
|
| 7 |
+
2026-01-26 05:50:10,649 INFO [inference.py:619] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:50:10,649 INFO [inference.py:621] Beam size: 4
|
| 9 |
+
2026-01-26 05:50:10,649 INFO [inference.py:622] Max states: 64
|
| 10 |
+
2026-01-26 05:50:10,649 INFO [inference.py:623] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:50:10,649 INFO [inference.py:629] Device: cuda:0
|
| 12 |
+
2026-01-26 05:50:10,649 INFO [inference.py:632] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:50:10,651 INFO [inference.py:640] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:50:10,651 INFO [inference.py:641] Blank ID: 0
|
| 15 |
+
2026-01-26 05:50:10,651 INFO [inference.py:644] Creating model
|
| 16 |
+
2026-01-26 05:50:12,218 INFO [inference.py:651] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 17 |
+
2026-01-26 05:50:12,219 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 18 |
+
2026-01-26 05:50:18,117 INFO [inference.py:680] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:50:18,118 INFO [inference.py:683] Loading test data
|
| 20 |
+
2026-01-26 05:50:18,118 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:50:19,185 INFO [inference.py:694] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:50:19,186 INFO [inference.py:697] Starting inference...
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-05-54-32
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 05:54:32,577 INFO [inference.py:613] ================================================================================
|
| 2 |
+
2026-01-26 05:54:32,577 INFO [inference.py:614] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 05:54:32,577 INFO [inference.py:615] ================================================================================
|
| 4 |
+
2026-01-26 05:54:32,578 INFO [inference.py:616] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 05:54:32,578 INFO [inference.py:617] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 05:54:32,578 INFO [inference.py:618] Test set: ihm
|
| 7 |
+
2026-01-26 05:54:32,578 INFO [inference.py:619] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 05:54:32,578 INFO [inference.py:621] Beam size: 4
|
| 9 |
+
2026-01-26 05:54:32,578 INFO [inference.py:622] Max states: 64
|
| 10 |
+
2026-01-26 05:54:32,578 INFO [inference.py:623] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 05:54:32,578 INFO [inference.py:627] Device: cuda:0
|
| 12 |
+
2026-01-26 05:54:32,578 INFO [inference.py:630] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 05:54:32,579 INFO [inference.py:638] Vocabulary size: 500
|
| 14 |
+
2026-01-26 05:54:32,580 INFO [inference.py:639] Blank ID: 0
|
| 15 |
+
2026-01-26 05:54:32,580 INFO [inference.py:642] Creating model
|
| 16 |
+
2026-01-26 05:54:34,158 INFO [inference.py:649] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 17 |
+
2026-01-26 05:54:34,158 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 18 |
+
2026-01-26 05:54:40,547 INFO [inference.py:678] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 05:54:40,547 INFO [inference.py:681] Loading test data
|
| 20 |
+
2026-01-26 05:54:40,548 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 05:54:41,686 INFO [inference.py:692] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 05:54:41,686 INFO [inference.py:695] Starting inference...
|
| 23 |
+
2026-01-26 05:54:41,686 INFO [inference.py:696] Note: First batch may take longer due to GPU warmup
|
| 24 |
+
2026-01-26 05:54:42,879 INFO [inference.py:711]
|
| 25 |
+
============================================================
|
| 26 |
+
2026-01-26 05:54:42,879 INFO [inference.py:712] Processing batch 1
|
| 27 |
+
2026-01-26 05:54:42,879 INFO [inference.py:718] Batch size: 6
|
| 28 |
+
2026-01-26 05:54:42,879 INFO [inference.py:736] Starting decoding for this batch...
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-02-34
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 06:02:34,679 INFO [inference.py:630] ================================================================================
|
| 2 |
+
2026-01-26 06:02:34,679 INFO [inference.py:631] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 06:02:34,679 INFO [inference.py:632] ================================================================================
|
| 4 |
+
2026-01-26 06:02:34,679 INFO [inference.py:633] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 06:02:34,679 INFO [inference.py:634] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 06:02:34,679 INFO [inference.py:635] Test set: ihm
|
| 7 |
+
2026-01-26 06:02:34,679 INFO [inference.py:636] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 06:02:34,679 INFO [inference.py:638] Beam size: 4
|
| 9 |
+
2026-01-26 06:02:34,679 INFO [inference.py:639] Max states: 64
|
| 10 |
+
2026-01-26 06:02:34,680 INFO [inference.py:640] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 06:02:34,680 INFO [inference.py:644] Device: cuda:0
|
| 12 |
+
2026-01-26 06:02:34,680 INFO [inference.py:647] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 06:02:34,681 INFO [inference.py:655] Vocabulary size: 500
|
| 14 |
+
2026-01-26 06:02:34,681 INFO [inference.py:656] Blank ID: 0
|
| 15 |
+
2026-01-26 06:02:34,681 INFO [inference.py:659] Creating model
|
| 16 |
+
2026-01-26 06:02:36,292 INFO [inference.py:666] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 17 |
+
2026-01-26 06:02:36,293 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 18 |
+
2026-01-26 06:02:42,168 INFO [inference.py:695] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 06:02:42,168 INFO [inference.py:698] Loading test data
|
| 20 |
+
2026-01-26 06:02:42,168 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 06:02:43,277 INFO [inference.py:709] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 06:02:43,278 INFO [inference.py:712] Starting inference...
|
| 23 |
+
2026-01-26 06:02:43,278 INFO [inference.py:713] Note: First batch may take longer due to GPU warmup
|
| 24 |
+
2026-01-26 06:02:44,374 INFO [inference.py:728]
|
| 25 |
+
============================================================
|
| 26 |
+
2026-01-26 06:02:44,375 INFO [inference.py:729] Processing batch 1
|
| 27 |
+
2026-01-26 06:02:44,375 INFO [inference.py:735] Batch size: 6
|
| 28 |
+
2026-01-26 06:02:44,375 INFO [inference.py:753] Starting decoding for this batch...
|
| 29 |
+
2026-01-26 06:02:45,363 INFO [inference.py:299] Beam search: Processing 6 utterances
|
| 30 |
+
2026-01-26 06:02:45,363 INFO [inference.py:305] Utterance 1/6: 769 frames
|
| 31 |
+
2026-01-26 06:02:45,363 INFO [inference.py:312] Frame 0/769, |B|=1
|
| 32 |
+
2026-01-26 06:02:45,503 INFO [inference.py:362] After initial expansion: |A|=5
|
| 33 |
+
2026-01-26 06:02:45,503 INFO [inference.py:371] Emission iteration 0, |A|=5
|
| 34 |
+
2026-01-26 06:02:45,506 INFO [inference.py:371] Emission iteration 1, |A|=15
|
| 35 |
+
2026-01-26 06:02:45,513 INFO [inference.py:371] Emission iteration 2, |A|=45
|
| 36 |
+
2026-01-26 06:02:56,541 INFO [inference.py:312] Frame 100/769, |B|=64
|
| 37 |
+
2026-01-26 06:02:56,567 INFO [inference.py:362] After initial expansion: |A|=320
|
| 38 |
+
2026-01-26 06:02:56,567 INFO [inference.py:371] Emission iteration 0, |A|=320
|
| 39 |
+
2026-01-26 06:02:56,594 INFO [inference.py:371] Emission iteration 1, |A|=64
|
| 40 |
+
2026-01-26 06:02:56,620 INFO [inference.py:371] Emission iteration 2, |A|=64
|
| 41 |
+
2026-01-26 06:03:07,191 INFO [inference.py:312] Frame 200/769, |B|=64
|
| 42 |
+
2026-01-26 06:03:07,216 INFO [inference.py:362] After initial expansion: |A|=320
|
| 43 |
+
2026-01-26 06:03:07,217 INFO [inference.py:371] Emission iteration 0, |A|=320
|
| 44 |
+
2026-01-26 06:03:07,243 INFO [inference.py:371] Emission iteration 1, |A|=64
|
| 45 |
+
2026-01-26 06:03:07,270 INFO [inference.py:371] Emission iteration 2, |A|=64
|
| 46 |
+
2026-01-26 06:03:17,826 INFO [inference.py:312] Frame 300/769, |B|=64
|
| 47 |
+
2026-01-26 06:03:17,851 INFO [inference.py:362] After initial expansion: |A|=320
|
| 48 |
+
2026-01-26 06:03:17,851 INFO [inference.py:371] Emission iteration 0, |A|=320
|
| 49 |
+
2026-01-26 06:03:17,878 INFO [inference.py:371] Emission iteration 1, |A|=64
|
| 50 |
+
2026-01-26 06:03:17,904 INFO [inference.py:371] Emission iteration 2, |A|=64
|
| 51 |
+
2026-01-26 06:03:28,408 INFO [inference.py:312] Frame 400/769, |B|=64
|
| 52 |
+
2026-01-26 06:03:28,434 INFO [inference.py:362] After initial expansion: |A|=320
|
| 53 |
+
2026-01-26 06:03:28,434 INFO [inference.py:371] Emission iteration 0, |A|=320
|
| 54 |
+
2026-01-26 06:03:28,460 INFO [inference.py:371] Emission iteration 1, |A|=64
|
| 55 |
+
2026-01-26 06:03:28,487 INFO [inference.py:371] Emission iteration 2, |A|=64
|
| 56 |
+
2026-01-26 06:03:39,030 INFO [inference.py:312] Frame 500/769, |B|=64
|
| 57 |
+
2026-01-26 06:03:39,060 INFO [inference.py:362] After initial expansion: |A|=320
|
| 58 |
+
2026-01-26 06:03:39,060 INFO [inference.py:371] Emission iteration 0, |A|=320
|
| 59 |
+
2026-01-26 06:03:39,094 INFO [inference.py:371] Emission iteration 1, |A|=64
|
| 60 |
+
2026-01-26 06:03:39,125 INFO [inference.py:371] Emission iteration 2, |A|=64
|
| 61 |
+
2026-01-26 06:03:49,620 INFO [inference.py:312] Frame 600/769, |B|=64
|
| 62 |
+
2026-01-26 06:03:49,646 INFO [inference.py:362] After initial expansion: |A|=320
|
| 63 |
+
2026-01-26 06:03:49,646 INFO [inference.py:371] Emission iteration 0, |A|=320
|
| 64 |
+
2026-01-26 06:03:49,673 INFO [inference.py:371] Emission iteration 1, |A|=64
|
| 65 |
+
2026-01-26 06:03:49,699 INFO [inference.py:371] Emission iteration 2, |A|=64
|
| 66 |
+
2026-01-26 06:04:00,283 INFO [inference.py:312] Frame 700/769, |B|=64
|
| 67 |
+
2026-01-26 06:04:00,309 INFO [inference.py:362] After initial expansion: |A|=320
|
| 68 |
+
2026-01-26 06:04:00,309 INFO [inference.py:371] Emission iteration 0, |A|=320
|
| 69 |
+
2026-01-26 06:04:00,335 INFO [inference.py:371] Emission iteration 1, |A|=64
|
| 70 |
+
2026-01-26 06:04:00,362 INFO [inference.py:371] Emission iteration 2, |A|=64
|
| 71 |
+
2026-01-26 06:04:07,525 INFO [inference.py:455] Utterance 1 result: 2 tokens
|
| 72 |
+
2026-01-26 06:04:07,525 INFO [inference.py:305] Utterance 2/6: 764 frames
|
| 73 |
+
2026-01-26 06:04:07,525 INFO [inference.py:312] Frame 0/764, |B|=1
|
| 74 |
+
2026-01-26 06:04:07,526 INFO [inference.py:362] After initial expansion: |A|=5
|
| 75 |
+
2026-01-26 06:04:07,526 INFO [inference.py:371] Emission iteration 0, |A|=5
|
| 76 |
+
2026-01-26 06:04:07,528 INFO [inference.py:371] Emission iteration 1, |A|=15
|
| 77 |
+
2026-01-26 06:04:07,534 INFO [inference.py:371] Emission iteration 2, |A|=45
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-04-30
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-01-26 06:04:30,814 INFO [inference.py:578] ================================================================================
|
| 2 |
+
2026-01-26 06:04:30,815 INFO [inference.py:579] XLSR-Transducer Inference on AMI
|
| 3 |
+
2026-01-26 06:04:30,815 INFO [inference.py:580] ================================================================================
|
| 4 |
+
2026-01-26 06:04:30,815 INFO [inference.py:581] Experiment dir: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
2026-01-26 06:04:30,815 INFO [inference.py:582] Output dir: xlsr_transducer/exp_16gb_scd/inference_results
|
| 6 |
+
2026-01-26 06:04:30,815 INFO [inference.py:583] Test set: ihm
|
| 7 |
+
2026-01-26 06:04:30,815 INFO [inference.py:584] Decoding method: modified_beam_search
|
| 8 |
+
2026-01-26 06:04:30,815 INFO [inference.py:586] Beam size: 4
|
| 9 |
+
2026-01-26 06:04:30,815 INFO [inference.py:587] Max states: 64
|
| 10 |
+
2026-01-26 06:04:30,815 INFO [inference.py:588] Max symbols per frame: 3
|
| 11 |
+
2026-01-26 06:04:30,815 INFO [inference.py:592] Device: cuda:0
|
| 12 |
+
2026-01-26 06:04:30,815 INFO [inference.py:595] Loading BPE model from data/lang_bpe_500_scd
|
| 13 |
+
2026-01-26 06:04:30,817 INFO [inference.py:603] Vocabulary size: 500
|
| 14 |
+
2026-01-26 06:04:30,817 INFO [inference.py:604] Blank ID: 0
|
| 15 |
+
2026-01-26 06:04:30,817 INFO [inference.py:607] Creating model
|
| 16 |
+
2026-01-26 06:04:32,424 INFO [inference.py:614] Loading checkpoint: xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 17 |
+
2026-01-26 06:04:32,424 INFO [checkpoint.py:111] Loading checkpoint from xlsr_transducer/exp_16gb_scd/best-valid-loss.pt
|
| 18 |
+
2026-01-26 06:04:38,254 INFO [inference.py:643] Number of model parameters: 317,511,772
|
| 19 |
+
2026-01-26 06:04:38,254 INFO [inference.py:646] Loading test data
|
| 20 |
+
2026-01-26 06:04:38,254 INFO [asr_datamodule.py:448] About to get AMI IHM test cuts with [SCD] tokens
|
| 21 |
+
2026-01-26 06:04:39,360 INFO [inference.py:657] Number of test utterances: 6676
|
| 22 |
+
2026-01-26 06:04:39,361 INFO [inference.py:660] Starting inference...
|
| 23 |
+
2026-01-26 06:04:39,361 INFO [inference.py:661] Note: First batch may take longer due to GPU warmup
|
| 24 |
+
2026-01-26 06:04:40,450 INFO [inference.py:676]
|
| 25 |
+
============================================================
|
| 26 |
+
2026-01-26 06:04:40,450 INFO [inference.py:677] Processing batch 1
|
| 27 |
+
2026-01-26 06:04:40,450 INFO [inference.py:683] Batch size: 6
|
| 28 |
+
2026-01-26 06:04:40,450 INFO [inference.py:701] Starting decoding for this batch...
|
| 29 |
+
2026-01-26 06:04:41,439 INFO [inference.py:283] Beam search: Processing 6 utterances
|
| 30 |
+
2026-01-26 06:04:41,440 INFO [inference.py:289] Utterance 1/6: 769 frames
|
| 31 |
+
2026-01-26 06:04:41,440 INFO [inference.py:296] Frame 0/769, |B|=1
|
| 32 |
+
2026-01-26 06:04:41,938 INFO [inference.py:296] Frame 200/769, |B|=4
|
| 33 |
+
2026-01-26 06:04:42,252 INFO [inference.py:296] Frame 400/769, |B|=4
|
| 34 |
+
2026-01-26 06:04:42,564 INFO [inference.py:296] Frame 600/769, |B|=4
|
| 35 |
+
2026-01-26 06:04:42,846 INFO [inference.py:403] Utterance 1 result: 1 tokens
|
| 36 |
+
2026-01-26 06:04:42,846 INFO [inference.py:289] Utterance 2/6: 764 frames
|
| 37 |
+
2026-01-26 06:04:42,846 INFO [inference.py:296] Frame 0/764, |B|=1
|
| 38 |
+
2026-01-26 06:04:43,158 INFO [inference.py:296] Frame 200/764, |B|=4
|
| 39 |
+
2026-01-26 06:04:43,477 INFO [inference.py:296] Frame 400/764, |B|=4
|
| 40 |
+
2026-01-26 06:04:43,804 INFO [inference.py:296] Frame 600/764, |B|=4
|
| 41 |
+
2026-01-26 06:04:44,077 INFO [inference.py:403] Utterance 2 result: 31 tokens
|
| 42 |
+
2026-01-26 06:04:44,078 INFO [inference.py:289] Utterance 3/6: 743 frames
|
| 43 |
+
2026-01-26 06:04:44,078 INFO [inference.py:296] Frame 0/743, |B|=1
|
| 44 |
+
2026-01-26 06:04:44,393 INFO [inference.py:296] Frame 200/743, |B|=4
|
| 45 |
+
2026-01-26 06:04:44,721 INFO [inference.py:296] Frame 400/743, |B|=4
|
| 46 |
+
2026-01-26 06:04:45,054 INFO [inference.py:296] Frame 600/743, |B|=4
|
| 47 |
+
2026-01-26 06:04:45,278 INFO [inference.py:403] Utterance 3 result: 1 tokens
|
| 48 |
+
2026-01-26 06:04:45,278 INFO [inference.py:289] Utterance 4/6: 712 frames
|
| 49 |
+
2026-01-26 06:04:45,278 INFO [inference.py:296] Frame 0/712, |B|=1
|
| 50 |
+
2026-01-26 06:04:45,592 INFO [inference.py:296] Frame 200/712, |B|=4
|
| 51 |
+
2026-01-26 06:04:45,907 INFO [inference.py:296] Frame 400/712, |B|=4
|
| 52 |
+
2026-01-26 06:04:46,221 INFO [inference.py:296] Frame 600/712, |B|=4
|
| 53 |
+
2026-01-26 06:04:46,396 INFO [inference.py:403] Utterance 4 result: 13 tokens
|
| 54 |
+
2026-01-26 06:04:46,397 INFO [inference.py:289] Utterance 5/6: 699 frames
|
| 55 |
+
2026-01-26 06:04:46,397 INFO [inference.py:296] Frame 0/699, |B|=1
|
| 56 |
+
2026-01-26 06:04:46,713 INFO [inference.py:296] Frame 200/699, |B|=4
|
| 57 |
+
2026-01-26 06:04:47,059 INFO [inference.py:296] Frame 400/699, |B|=4
|
| 58 |
+
2026-01-26 06:04:47,404 INFO [inference.py:296] Frame 600/699, |B|=4
|
| 59 |
+
2026-01-26 06:04:47,572 INFO [inference.py:403] Utterance 5 result: 11 tokens
|
| 60 |
+
2026-01-26 06:04:47,572 INFO [inference.py:289] Utterance 6/6: 696 frames
|
| 61 |
+
2026-01-26 06:04:47,572 INFO [inference.py:296] Frame 0/696, |B|=1
|
| 62 |
+
2026-01-26 06:04:47,895 INFO [inference.py:296] Frame 200/696, |B|=4
|
| 63 |
+
2026-01-26 06:04:48,221 INFO [inference.py:296] Frame 400/696, |B|=4
|
| 64 |
+
2026-01-26 06:04:48,558 INFO [inference.py:296] Frame 600/696, |B|=4
|
| 65 |
+
2026-01-26 06:04:48,713 INFO [inference.py:403] Utterance 6 result: 13 tokens
|
| 66 |
+
2026-01-26 06:04:48,713 INFO [inference.py:410] Beam search complete
|
| 67 |
+
2026-01-26 06:04:48,713 INFO [inference.py:707] Decoding completed in 8.26s
|
| 68 |
+
2026-01-26 06:04:48,713 INFO [inference.py:710] Converting tokens to text...
|
| 69 |
+
2026-01-26 06:04:48,714 INFO [inference.py:715] First hypothesis: OKAY...
|
| 70 |
+
2026-01-26 06:04:48,714 INFO [inference.py:723] Batch 1 completed in 8.26s
|
| 71 |
+
2026-01-26 06:04:48,714 INFO [inference.py:724] Average time per utterance: 1.38s
|
| 72 |
+
2026-01-26 06:04:48,714 INFO [inference.py:725] Total processed so far: 6 utterances in 1 batches
|
egs/ami/ASR/xlsr_transducer/inference_results/log-inference-ihm-2026-01-26-06-07-36
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
egs/ami/ASR/xlsr_transducer/inference_results/metrics-ihm.txt
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
================================================================================
|
| 2 |
+
XLSR-Transducer Inference Results
|
| 3 |
+
================================================================================
|
| 4 |
+
Experiment: xlsr_transducer/exp_16gb_scd
|
| 5 |
+
Test set: ihm
|
| 6 |
+
Decoding method: modified_beam_search
|
| 7 |
+
Beam size: 4
|
| 8 |
+
Max states: 64
|
| 9 |
+
Max symbols per frame: 3
|
| 10 |
+
Number of utterances: 6676
|
| 11 |
+
Total words: 92205
|
| 12 |
+
Total errors: 73964
|
| 13 |
+
WER: 80.22%
|
| 14 |
+
Total inference time: 46.9 minutes
|
| 15 |
+
Average time per utterance: 0.42s
|
| 16 |
+
================================================================================
|
egs/ami/ASR/xlsr_transducer/inference_results/ref-ihm.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
egs/ami/ASR/xlsr_transducer/log/log-train-2026-01-25-02-57-28
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27310bd90828a6f6d515d1181fa187228601dfe8247ecc89d39848c95e54ea20
|
| 3 |
+
size 174840669
|
egs/ami/ASR/xlsr_transducer/tensorboard/events.out.tfevents.1769309848.3edaabdb707c.1028020.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bb2715978701cb9358c38337c7bb5316cffc55440353a079aeb9c0bdc3867f2
|
| 3 |
+
size 158109
|