NeMo
AmirHussein commited on
Commit
0142251
·
1 Parent(s): e9566c8

Add duplex model checkpoint

Browse files
dummy_nemo_duplex/qwen_1b/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ /export/fs06/ahussei6/nvidia/github/NeMo/examples/speechlm2/s2s_duplex_speech_decoder_train.py --config-path=conf/train --config-name=qwen_1b ++exp_manager.checkpoint_callback_params.save_top_k=3 exp_manager.name=qwen_1b ++model.pretrained_audio_codec=/export/fs06/ahussei6/nvidia/pretrained_models/nano_codec/Low_Frame-rate_Speech_Codec++.nemo ++model.pretrained_tts_from_s2s=/export/fs06/ahussei6/nvidia/pretrained_models/magpie_tts/tts-pretraining_qwnen_2.5_81007_steps.ckpt ++model.pretrained_asr=/export/fs06/ahussei6/nvidia/pretrained_models/asr/stt_en_fastconformer_hybrid_large_streaming_multi_v1.20.0/stt_en_fastconformer_hybrid_large_streaming_multi.nemo ++model.mask_sequence_loss=True trainer.num_nodes=1 exp_manager.explicit_log_dir=/export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b data.train_ds.seed=42 ++model.audio_loss_weight=20 ++model.speech_decoder.cond_on_prev_audio_tokens=True ++model.speech_decoder.use_speaker_encoder=True ++model.speech_decoder.cond_on_char_embedding=True ++model.speech_decoder.cond_on_asr_emb=False ++model.speech_decoder.cond_on_llm_latent=False ++model.speech_decoder.cond_on_modality_adapter_emb=False ++model.speech_decoder.cond_on_text_tokens=False ++model.speech_decoder.cfg_scale=2.5 ++model.speech_decoder.kernel_size=3 ++model.speech_decoder.cfg_unconditional_prob=0.2 ++model.custom_codebook_size=2045 ++model.custom_speech_bos_id=2019 ++model.custom_speech_eos_id=2020 ++model.custom_speech_delay_id=2018 model.perception.encoder.att_context_size=[70,0] model.perception.modality_adapter.att_context_size=[70,0] ++model.pretrained_llm=/export/fs06/ahussei6/nvidia/cache/HFCACHE/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306 ++trainer.limit_val_batches=1 ++trainer.val_check_interval=1000 ++model.scale_loss_by=non_sil_t ++model.scale_loss_mask=10 ++model.val_acc_tolerance=480 data.validation_ds.seed=42
dummy_nemo_duplex/qwen_1b/exp_config.yaml ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ pretrained_llm: /export/fs06/ahussei6/nvidia/cache/HFCACHE/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306
3
+ pretrained_audio_codec: /export/fs06/ahussei6/nvidia/pretrained_models/nano_codec/Low_Frame-rate_Speech_Codec++.nemo
4
+ pretrained_asr: /export/fs06/ahussei6/nvidia/pretrained_models/asr/stt_en_fastconformer_hybrid_large_streaming_multi_v1.20.0/stt_en_fastconformer_hybrid_large_streaming_multi.nemo
5
+ scoring_asr: stt_en_fastconformer_transducer_large
6
+ pretrained_weights: true
7
+ audio_loss_weight: 20
8
+ text_loss_weight: 3
9
+ freeze_params:
10
+ - ^audio_codec\..+$
11
+ - ^speech_generation\.speaker_encoder\..+$
12
+ prevent_freeze_params: []
13
+ duplex_user_channel_weight: 1.0
14
+ hidden_pooling_factor: 2
15
+ audio_save_path: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/pred_audios
16
+ perception:
17
+ target: nemo.collections.multimodal.speech_llm.modules.perception_modules.AudioPerceptionModule
18
+ use_multi_layer_feat: false
19
+ add_sep: true
20
+ is_canary: true
21
+ preprocessor:
22
+ normalize: NA
23
+ encoder:
24
+ self_attention_model: rel_pos
25
+ att_context_size:
26
+ - 70
27
+ - 0
28
+ conv_context_size: causal
29
+ conv_norm_type: layer_norm
30
+ modality_adapter:
31
+ _target_: nemo.collections.asr.modules.ConformerEncoder
32
+ feat_in: 512
33
+ feat_out: -1
34
+ n_layers: 2
35
+ d_model: 512
36
+ subsampling: dw_striding
37
+ subsampling_factor: 1
38
+ subsampling_conv_channels: 256
39
+ causal_downsampling: true
40
+ ff_expansion_factor: 4
41
+ self_attention_model: rel_pos
42
+ n_heads: 8
43
+ att_context_size:
44
+ - 70
45
+ - 0
46
+ att_context_style: chunked_limited
47
+ xscaling: true
48
+ untie_biases: true
49
+ pos_emb_max_len: 5000
50
+ conv_kernel_size: 9
51
+ conv_norm_type: layer_norm
52
+ conv_context_size: causal
53
+ dropout: 0
54
+ dropout_pre_encoder: 0
55
+ dropout_emb: 0.0
56
+ dropout_att: 0
57
+ spec_augment:
58
+ _target_: nemo.collections.asr.modules.SpectrogramAugmentation
59
+ freq_masks: 2
60
+ time_masks: 10
61
+ freq_width: 27
62
+ time_width: 0.05
63
+ speech_decoder:
64
+ n_layers: 12
65
+ d_model: 768
66
+ d_ffn: 3072
67
+ sa_n_heads: 12
68
+ kernel_size: 3
69
+ p_dropout: 0.1
70
+ p_dropout_out: 0.0
71
+ has_xattn: false
72
+ xa_d_memory: 768
73
+ xa_n_heads: 12
74
+ is_causal: true
75
+ apply_norm_to_cond: true
76
+ apply_norm_out: true
77
+ max_length_causal_mask: 5000
78
+ cond_on_prev_audio_tokens: true
79
+ detach_input: false
80
+ use_learnable_pos_emb: true
81
+ cond_on_modality_adapter_emb: false
82
+ cond_on_char_embedding: true
83
+ cond_on_llm_latent: false
84
+ use_speaker_encoder: true
85
+ cond_on_asr_emb: false
86
+ cond_on_text_tokens: false
87
+ cfg_scale: 2.5
88
+ cfg_unconditional_prob: 0.2
89
+ optimizer:
90
+ _target_: torch.optim.AdamW
91
+ lr: 0.0001
92
+ betas:
93
+ - 0.9
94
+ - 0.98
95
+ weight_decay: 0
96
+ foreach: true
97
+ lr_scheduler:
98
+ _target_: nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing
99
+ warmup_steps: 4000
100
+ min_lr: 1.0e-06
101
+ max_steps: 100000
102
+ pretrained_tts_from_s2s: /export/fs06/ahussei6/nvidia/pretrained_models/magpie_tts/tts-pretraining_qwnen_2.5_81007_steps.ckpt
103
+ mask_sequence_loss: true
104
+ custom_codebook_size: 2045
105
+ custom_speech_bos_id: 2019
106
+ custom_speech_eos_id: 2020
107
+ custom_speech_delay_id: 2018
108
+ scale_loss_by: non_sil_t
109
+ scale_loss_mask: 10
110
+ val_acc_tolerance: 480
111
+ trainer:
112
+ devices: -1
113
+ accelerator: gpu
114
+ num_nodes: 1
115
+ precision: bf16-true
116
+ logger: false
117
+ enable_checkpointing: false
118
+ use_distributed_sampler: false
119
+ max_steps: 100000
120
+ val_check_interval: 1000
121
+ limit_train_batches: 1000
122
+ limit_val_batches: 1
123
+ log_every_n_steps: 20
124
+ num_sanity_val_steps: 0
125
+ gradient_clip_val: 1.0
126
+ accumulate_grad_batches: 1
127
+ strategy:
128
+ _target_: lightning.pytorch.strategies.DDPStrategy
129
+ gradient_as_bucket_view: true
130
+ find_unused_parameters: true
131
+ data:
132
+ frame_length: 0.08
133
+ source_sample_rate: 16000
134
+ target_sample_rate: 22050
135
+ input_roles:
136
+ - user
137
+ - User
138
+ output_roles:
139
+ - agent
140
+ - Assistant
141
+ - assistant
142
+ - Agent
143
+ add_delay_token: false
144
+ train_ds:
145
+ sample_rate: 22050
146
+ multi_config: true
147
+ shuffle: true
148
+ sampler_fusion: randomized_round_robin
149
+ seed: 42
150
+ shard_seed: trng
151
+ sampler_weights:
152
+ cv_en: 1.0
153
+ cv_en:
154
+ input_cfg: /export/fs06/ahussei6/nvidia/github/NeMo/examples/speechlm2/conf/data/en.yaml
155
+ bucket_buffer_size: 10000
156
+ shuffle_buffer_size: 10000
157
+ num_workers: 8
158
+ pin_memory: true
159
+ max_duration: 60
160
+ min_duration: 1
161
+ num_buckets: 30
162
+ bucket_duration_bins:
163
+ - 5.33
164
+ - 6.27
165
+ - 6.96
166
+ - 7.54
167
+ - 8.07
168
+ - 8.55
169
+ - 8.99
170
+ - 9.39
171
+ - 9.76
172
+ - 10.14
173
+ - 10.54
174
+ - 10.99
175
+ - 11.49
176
+ - 12.04
177
+ - 12.64
178
+ - 13.28
179
+ - 13.96
180
+ - 14.69
181
+ - 15.48
182
+ - 16.32
183
+ - 17.25
184
+ - 18.28
185
+ - 19.39
186
+ - 20.65
187
+ - 22.14
188
+ - 24.01
189
+ - 26.34
190
+ - 29.21
191
+ - 32.79
192
+ batch_duration: 400
193
+ use_bucketing: true
194
+ seed: trng
195
+ validation_ds:
196
+ datasets:
197
+ en_dev:
198
+ shar_path: /export/fs06/ahussei6/nvidia/github/debug
199
+ sample_rate: 22050
200
+ batch_size: 64
201
+ seed: 42
202
+ shard_seed: randomized
203
+ exp_manager:
204
+ exp_dir: null
205
+ explicit_log_dir: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b
206
+ name: qwen_1b
207
+ create_tensorboard_logger: false
208
+ create_checkpoint_callback: true
209
+ use_datetime_version: true
210
+ max_time_per_run: 00:03:50:00
211
+ resume_from_checkpoint: null
212
+ resume_if_exists: true
213
+ resume_ignore_no_checkpoint: true
214
+ create_wandb_logger: false
215
+ wandb_logger_kwargs:
216
+ name: development-run
217
+ project: salm_s2s_speech_decoder_v2_char_tokens
218
+ resume: true
219
+ checkpoint_callback_params:
220
+ filename: '{step}'
221
+ monitor: val_asr_bleu
222
+ mode: max
223
+ every_n_train_steps: null
224
+ every_n_epochs: 1
225
+ save_top_k: 3
226
+ always_save_nemo: false
dummy_nemo_duplex/qwen_1b/lightning_logs.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2
+
3
+ | Name | Type | Params | Mode
4
+ --------------------------------------------------------------------------
5
+ 0 | audio_codec | AudioCodecModel | 62.2 M | eval
6
+ 1 | llm | Qwen2Model | 1.3 B | train
7
+ 2 | lm_head | Linear | 233 M | train
8
+ 3 | embed_tokens | Embedding | 233 M | train
9
+ 4 | perception | AudioPerceptionModule | 122 M | train
10
+ 5 | speech_generation | TransformerARSpeechDecoder | 288 M | train
11
+ 6 | embed_audio_tokens | ModuleList | 40.9 M | train
12
+ 7 | audio_head | Linear | 40.9 M | train
13
+ --------------------------------------------------------------------------
14
+ 2.0 B Trainable params
15
+ 87.5 M Non-trainable params
16
+ 2.1 B Total params
17
+ 8,397.045 Total estimated model params size (MB)
18
+ 1141 Modules in train mode
19
+ 2018 Modules in eval mode
dummy_nemo_duplex/qwen_1b/nemo_error_log.txt ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-26 23:19:11 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-26 23:19:15 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /export/fs06/ahussei6/nvidia/github/NeMo/examples/sp ...
5
+
6
+ [NeMo W 2026-04-26 23:19:15 exp_manager:1079] Exp_manager is logging to /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b, but it already exists.
7
+ [NeMo W 2026-04-26 23:19:15 exp_manager:997] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :/export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/checkpoints. Training from scratch.
8
+ [NeMo W 2026-04-26 23:19:15 exp_manager:1388] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 100000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
9
+ [NeMo W 2026-04-26 23:19:16 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
10
+ Train config :
11
+ dataset:
12
+ dataset_type: tarred_vocoder
13
+ dataset_args:
14
+ dataset_meta:
15
+ lindy:
16
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/train_manifest.json
17
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/audio_{0..3}.tar
18
+ emma:
19
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/train_manifest.json
20
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_{0..3}.tar
21
+ libritts_r:
22
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/train_manifest.json
23
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/audio_{0..48}.tar
24
+ sample_rate: 22050
25
+ n_samples: 24696
26
+ min_duration: 0.4
27
+ max_duration: null
28
+ audio_augmentator_config:
29
+ impulse:
30
+ prob: 0.5
31
+ rng: 42
32
+ manifest_path: /lustre/fsw/portfolios/convai/projects/convai_convaird_nemo-speech/data/ROOM_Response_and_Noise_Database/processed/rir.json
33
+ normalize_impulse: false
34
+ shift_impulse: true
35
+ codec:
36
+ prob: 0.5
37
+ rng: 42
38
+ format_params:
39
+ - format: mp3
40
+ prob: 0.5
41
+ bit_rates:
42
+ - 16000
43
+ - 32000
44
+ - 64000
45
+ - 128000
46
+ - format: ogg
47
+ prob: 0.075
48
+ encoder: vorbis
49
+ bit_rates:
50
+ - 32000
51
+ - 48000
52
+ - 64000
53
+ - format: ogg
54
+ prob: 0.375
55
+ encoder: opus
56
+ bit_rates:
57
+ - 8000
58
+ - 16000
59
+ - 32000
60
+ - 64000
61
+ - 128000
62
+ - format: wav
63
+ prob: 0.025
64
+ encoder: pcm_alaw
65
+ bit_rates:
66
+ - 64000
67
+ - format: wav
68
+ prob: 0.025
69
+ encoder: pcm_mulaw
70
+ bit_rates:
71
+ - 64000
72
+ shard_strategy: replicate
73
+ sample_type: weighted_random
74
+ sample_args:
75
+ batch_size: 32
76
+ steps_per_epoch: 233
77
+ dataset_weights:
78
+ - 0.2
79
+ - 0.2
80
+ - 0.6
81
+ shuffle_n: 10000
82
+ dataloader_params:
83
+ batch_size: 32
84
+ drop_last: true
85
+ num_workers: 4
86
+
87
+ [NeMo W 2026-04-26 23:19:16 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
88
+ Validation config :
89
+ dataset:
90
+ dataset_type: vocoder
91
+ dataset_args:
92
+ sample_rate: 22050
93
+ n_samples: null
94
+ min_duration: null
95
+ max_duration: null
96
+ trunc_duration: 10.0
97
+ dataset_meta:
98
+ lindy:
99
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/val_manifest.json
100
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/audio_22khz
101
+ emma:
102
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/val_manifest.json
103
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_22khz
104
+ libritts_r:
105
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/val_manifest.json
106
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/audio_22khz
107
+ dataloader_params:
108
+ batch_size: 4
109
+ num_workers: 2
110
+
111
+ [NeMo W 2026-04-26 23:19:19 duplex_s2s_speech_decoder_model:148] Tokenizer does not have a `bos_token`. Setting it to '<|im_start|>'.
112
+ [NeMo W 2026-04-26 23:19:22 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
113
+ Train config :
114
+ manifest_filepath:
115
+ - - /raid/local//bucket1/tarred_audio_manifest.json
116
+ - - /raid/local//bucket2/tarred_audio_manifest.json
117
+ - - /raid/local//bucket3/tarred_audio_manifest.json
118
+ - - /raid/local//bucket4/tarred_audio_manifest.json
119
+ - - /raid/local//bucket5/tarred_audio_manifest.json
120
+ - - /raid/local//bucket6/tarred_audio_manifest.json
121
+ - - /raid/local//bucket7/tarred_audio_manifest.json
122
+ - - /raid/local//bucket8/tarred_audio_manifest.json
123
+ sample_rate: 16000
124
+ batch_size: 1
125
+ shuffle: true
126
+ num_workers: 4
127
+ pin_memory: true
128
+ use_start_end_token: false
129
+ trim_silence: false
130
+ max_duration: 25
131
+ min_duration: 0.1
132
+ is_tarred: true
133
+ tarred_audio_filepaths:
134
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket1/audio__OP_0..8191_CL_.tar
135
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket2/audio__OP_0..8191_CL_.tar
136
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket3/audio__OP_0..8191_CL_.tar
137
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket4/audio__OP_0..8191_CL_.tar
138
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket5/audio__OP_0..8191_CL_.tar
139
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket6/audio__OP_0..8191_CL_.tar
140
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket7/audio__OP_0..8191_CL_.tar
141
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket8/audio__OP_0..8191_CL_.tar
142
+ shuffle_n: 2048
143
+ bucketing_strategy: fully_randomized
144
+ bucketing_batch_size:
145
+ - 72
146
+ - 64
147
+ - 56
148
+ - 48
149
+ - 40
150
+ - 32
151
+ - 24
152
+ - 16
153
+
154
+ [NeMo W 2026-04-26 23:19:22 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
155
+ Validation config :
156
+ manifest_filepath:
157
+ - /manifests/librispeech/librivox-dev-other.json
158
+ - /manifests/librispeech/librivox-dev-clean.json
159
+ - /manifests/librispeech/librivox-test-other.json
160
+ - /manifests/librispeech/librivox-test-clean.json
161
+ sample_rate: 16000
162
+ batch_size: 16
163
+ shuffle: false
164
+ num_workers: 8
165
+ pin_memory: true
166
+ use_start_end_token: false
167
+
168
+ [NeMo W 2026-04-26 23:19:22 modelPT:202] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).
169
+ Test config :
170
+ manifest_filepath:
171
+ - /manifests/librispeech/librivox-dev-other.json
172
+ - /manifests/librispeech/librivox-dev-clean.json
173
+ - /manifests/librispeech/librivox-test-other.json
174
+ - /manifests/librispeech/librivox-test-clean.json
175
+ sample_rate: 16000
176
+ batch_size: 16
177
+ shuffle: false
178
+ num_workers: 8
179
+ pin_memory: true
180
+ use_start_end_token: false
181
+
182
+ [NeMo W 2026-04-26 23:19:24 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
183
+ Train config :
184
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
185
+ sample_rate: 16000
186
+ labels: null
187
+ batch_size: 64
188
+ shuffle: true
189
+ is_tarred: false
190
+ tarred_audio_filepaths: null
191
+ tarred_shard_strategy: scatter
192
+ augmentor:
193
+ noise:
194
+ manifest_path: /manifests/noise/rir_noise_manifest.json
195
+ prob: 0.5
196
+ min_snr_db: 0
197
+ max_snr_db: 15
198
+ speed:
199
+ prob: 0.5
200
+ sr: 16000
201
+ resample_type: kaiser_fast
202
+ min_speed_rate: 0.95
203
+ max_speed_rate: 1.05
204
+ num_workers: 15
205
+ pin_memory: true
206
+
207
+ [NeMo W 2026-04-26 23:19:24 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
208
+ Validation config :
209
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
210
+ sample_rate: 16000
211
+ labels: null
212
+ batch_size: 128
213
+ shuffle: false
214
+ num_workers: 15
215
+ pin_memory: true
216
+
217
+ [NeMo W 2026-04-26 23:19:34 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /export/fs06/ahussei6/nvidia/github/NeMo/examples/sp ...
218
+
219
+ [NeMo W 2026-04-26 23:19:35 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
220
+
221
+ [NeMo W 2026-04-26 23:19:35 dataloader:480] You are using a non-tarred dataset and requested tokenization during data sampling (pretokenize=True). This will cause the tokenization to happen in the main (GPU) process,possibly impacting the training speed if your tokenizer is very large.If the impact is noticable, set pretokenize=False in dataloader config.(note: that will disable token-per-second filtering and 2D bucketing features)
222
+ [NeMo W 2026-04-26 23:19:35 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
223
+
224
+ [NeMo W 2026-04-26 23:19:35 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
225
+ Train config :
226
+ dataset:
227
+ dataset_type: tarred_vocoder
228
+ dataset_args:
229
+ dataset_meta:
230
+ lindy:
231
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/train_manifest.json
232
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/audio_{0..3}.tar
233
+ emma:
234
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/train_manifest.json
235
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_{0..3}.tar
236
+ libritts_r:
237
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/train_manifest.json
238
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/audio_{0..48}.tar
239
+ sample_rate: 22050
240
+ n_samples: 24696
241
+ min_duration: 0.4
242
+ max_duration: null
243
+ audio_augmentator_config:
244
+ impulse:
245
+ prob: 0.5
246
+ rng: 42
247
+ manifest_path: /lustre/fsw/portfolios/convai/projects/convai_convaird_nemo-speech/data/ROOM_Response_and_Noise_Database/processed/rir.json
248
+ normalize_impulse: false
249
+ shift_impulse: true
250
+ codec:
251
+ prob: 0.5
252
+ rng: 42
253
+ format_params:
254
+ - format: mp3
255
+ prob: 0.5
256
+ bit_rates:
257
+ - 16000
258
+ - 32000
259
+ - 64000
260
+ - 128000
261
+ - format: ogg
262
+ prob: 0.075
263
+ encoder: vorbis
264
+ bit_rates:
265
+ - 32000
266
+ - 48000
267
+ - 64000
268
+ - format: ogg
269
+ prob: 0.375
270
+ encoder: opus
271
+ bit_rates:
272
+ - 8000
273
+ - 16000
274
+ - 32000
275
+ - 64000
276
+ - 128000
277
+ - format: wav
278
+ prob: 0.025
279
+ encoder: pcm_alaw
280
+ bit_rates:
281
+ - 64000
282
+ - format: wav
283
+ prob: 0.025
284
+ encoder: pcm_mulaw
285
+ bit_rates:
286
+ - 64000
287
+ shard_strategy: replicate
288
+ sample_type: weighted_random
289
+ sample_args:
290
+ batch_size: 32
291
+ steps_per_epoch: 233
292
+ dataset_weights:
293
+ - 0.2
294
+ - 0.2
295
+ - 0.6
296
+ shuffle_n: 10000
297
+ dataloader_params:
298
+ batch_size: 32
299
+ drop_last: true
300
+ num_workers: 4
301
+
302
+ [NeMo W 2026-04-26 23:19:35 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
303
+ Validation config :
304
+ dataset:
305
+ dataset_type: vocoder
306
+ dataset_args:
307
+ sample_rate: 22050
308
+ n_samples: null
309
+ min_duration: null
310
+ max_duration: null
311
+ trunc_duration: 10.0
312
+ dataset_meta:
313
+ lindy:
314
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/val_manifest.json
315
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/audio_22khz
316
+ emma:
317
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/val_manifest.json
318
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_22khz
319
+ libritts_r:
320
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/val_manifest.json
321
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/audio_22khz
322
+ dataloader_params:
323
+ batch_size: 4
324
+ num_workers: 2
325
+
326
+ [NeMo W 2026-04-26 23:19:37 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
327
+ Train config :
328
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
329
+ sample_rate: 16000
330
+ labels: null
331
+ batch_size: 64
332
+ shuffle: true
333
+ is_tarred: false
334
+ tarred_audio_filepaths: null
335
+ tarred_shard_strategy: scatter
336
+ augmentor:
337
+ noise:
338
+ manifest_path: /manifests/noise/rir_noise_manifest.json
339
+ prob: 0.5
340
+ min_snr_db: 0
341
+ max_snr_db: 15
342
+ speed:
343
+ prob: 0.5
344
+ sr: 16000
345
+ resample_type: kaiser_fast
346
+ min_speed_rate: 0.95
347
+ max_speed_rate: 1.05
348
+ num_workers: 15
349
+ pin_memory: true
350
+
351
+ [NeMo W 2026-04-26 23:19:37 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
352
+ Validation config :
353
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
354
+ sample_rate: 16000
355
+ labels: null
356
+ batch_size: 128
357
+ shuffle: false
358
+ num_workers: 15
359
+ pin_memory: true
360
+
361
+ [NeMo W 2026-04-26 23:47:29 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
362
+ Train config :
363
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
364
+ sample_rate: 16000
365
+ labels: null
366
+ batch_size: 64
367
+ shuffle: true
368
+ is_tarred: false
369
+ tarred_audio_filepaths: null
370
+ tarred_shard_strategy: scatter
371
+ augmentor:
372
+ noise:
373
+ manifest_path: /manifests/noise/rir_noise_manifest.json
374
+ prob: 0.5
375
+ min_snr_db: 0
376
+ max_snr_db: 15
377
+ speed:
378
+ prob: 0.5
379
+ sr: 16000
380
+ resample_type: kaiser_fast
381
+ min_speed_rate: 0.95
382
+ max_speed_rate: 1.05
383
+ num_workers: 15
384
+ pin_memory: true
385
+
386
+ [NeMo W 2026-04-26 23:47:29 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
387
+ Validation config :
388
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
389
+ sample_rate: 16000
390
+ labels: null
391
+ batch_size: 128
392
+ shuffle: false
393
+ num_workers: 15
394
+ pin_memory: true
395
+
396
+ [NeMo W 2026-04-26 23:47:32 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
397
+ Train config :
398
+ manifest_filepath: null
399
+ sample_rate: 16000
400
+ batch_size: 1
401
+ shuffle: true
402
+ num_workers: 8
403
+ pin_memory: true
404
+ use_start_end_token: false
405
+ trim_silence: false
406
+ max_duration: 20
407
+ min_duration: 0.1
408
+ is_tarred: false
409
+ tarred_audio_filepaths: null
410
+ shuffle_n: 2048
411
+ bucketing_strategy: fully_randomized
412
+ bucketing_batch_size: null
413
+
414
+ [NeMo W 2026-04-26 23:47:32 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
415
+ Validation config :
416
+ manifest_filepath: null
417
+ sample_rate: 16000
418
+ batch_size: 32
419
+ shuffle: false
420
+ num_workers: 8
421
+ pin_memory: true
422
+ use_start_end_token: false
423
+ max_duration: 20
424
+
425
+ [NeMo W 2026-04-26 23:47:32 modelPT:202] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).
426
+ Test config :
427
+ manifest_filepath: null
428
+ sample_rate: 16000
429
+ batch_size: 16
430
+ shuffle: false
431
+ num_workers: 8
432
+ pin_memory: true
433
+ use_start_end_token: false
434
+
435
+ [NeMo W 2026-04-26 23:51:44 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
436
+ Train config :
437
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
438
+ sample_rate: 16000
439
+ labels: null
440
+ batch_size: 64
441
+ shuffle: true
442
+ is_tarred: false
443
+ tarred_audio_filepaths: null
444
+ tarred_shard_strategy: scatter
445
+ augmentor:
446
+ noise:
447
+ manifest_path: /manifests/noise/rir_noise_manifest.json
448
+ prob: 0.5
449
+ min_snr_db: 0
450
+ max_snr_db: 15
451
+ speed:
452
+ prob: 0.5
453
+ sr: 16000
454
+ resample_type: kaiser_fast
455
+ min_speed_rate: 0.95
456
+ max_speed_rate: 1.05
457
+ num_workers: 15
458
+ pin_memory: true
459
+
460
+ [NeMo W 2026-04-26 23:51:44 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
461
+ Validation config :
462
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
463
+ sample_rate: 16000
464
+ labels: null
465
+ batch_size: 128
466
+ shuffle: false
467
+ num_workers: 15
468
+ pin_memory: true
469
+
dummy_nemo_duplex/qwen_1b/nemo_log_globalrank-0_localrank-0.txt ADDED
@@ -0,0 +1,861 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-26 23:19:11 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-26 23:19:15 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /export/fs06/ahussei6/nvidia/github/NeMo/examples/sp ...
5
+
6
+ [NeMo I 2026-04-26 23:19:15 exp_manager:574] ExpManager schema
7
+ [NeMo I 2026-04-26 23:19:15 exp_manager:575] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1, 'multistorageclient_enabled': False}, 'create_early_stopping_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
8
+ [NeMo W 2026-04-26 23:19:15 exp_manager:1079] Exp_manager is logging to /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b, but it already exists.
9
+ [NeMo W 2026-04-26 23:19:15 exp_manager:997] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :/export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/checkpoints. Training from scratch.
10
+ [NeMo I 2026-04-26 23:19:15 exp_manager:635] Experiments will be logged at /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b
11
+ [NeMo W 2026-04-26 23:19:15 exp_manager:1388] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 100000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
12
+ [NeMo I 2026-04-26 23:19:15 exp_manager:780] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
13
+ [NeMo W 2026-04-26 23:19:16 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
14
+ Train config :
15
+ dataset:
16
+ dataset_type: tarred_vocoder
17
+ dataset_args:
18
+ dataset_meta:
19
+ lindy:
20
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/train_manifest.json
21
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/audio_{0..3}.tar
22
+ emma:
23
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/train_manifest.json
24
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_{0..3}.tar
25
+ libritts_r:
26
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/train_manifest.json
27
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/audio_{0..48}.tar
28
+ sample_rate: 22050
29
+ n_samples: 24696
30
+ min_duration: 0.4
31
+ max_duration: null
32
+ audio_augmentator_config:
33
+ impulse:
34
+ prob: 0.5
35
+ rng: 42
36
+ manifest_path: /lustre/fsw/portfolios/convai/projects/convai_convaird_nemo-speech/data/ROOM_Response_and_Noise_Database/processed/rir.json
37
+ normalize_impulse: false
38
+ shift_impulse: true
39
+ codec:
40
+ prob: 0.5
41
+ rng: 42
42
+ format_params:
43
+ - format: mp3
44
+ prob: 0.5
45
+ bit_rates:
46
+ - 16000
47
+ - 32000
48
+ - 64000
49
+ - 128000
50
+ - format: ogg
51
+ prob: 0.075
52
+ encoder: vorbis
53
+ bit_rates:
54
+ - 32000
55
+ - 48000
56
+ - 64000
57
+ - format: ogg
58
+ prob: 0.375
59
+ encoder: opus
60
+ bit_rates:
61
+ - 8000
62
+ - 16000
63
+ - 32000
64
+ - 64000
65
+ - 128000
66
+ - format: wav
67
+ prob: 0.025
68
+ encoder: pcm_alaw
69
+ bit_rates:
70
+ - 64000
71
+ - format: wav
72
+ prob: 0.025
73
+ encoder: pcm_mulaw
74
+ bit_rates:
75
+ - 64000
76
+ shard_strategy: replicate
77
+ sample_type: weighted_random
78
+ sample_args:
79
+ batch_size: 32
80
+ steps_per_epoch: 233
81
+ dataset_weights:
82
+ - 0.2
83
+ - 0.2
84
+ - 0.6
85
+ shuffle_n: 10000
86
+ dataloader_params:
87
+ batch_size: 32
88
+ drop_last: true
89
+ num_workers: 4
90
+
91
+ [NeMo W 2026-04-26 23:19:16 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
92
+ Validation config :
93
+ dataset:
94
+ dataset_type: vocoder
95
+ dataset_args:
96
+ sample_rate: 22050
97
+ n_samples: null
98
+ min_duration: null
99
+ max_duration: null
100
+ trunc_duration: 10.0
101
+ dataset_meta:
102
+ lindy:
103
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/val_manifest.json
104
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/audio_22khz
105
+ emma:
106
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/val_manifest.json
107
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_22khz
108
+ libritts_r:
109
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/val_manifest.json
110
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/audio_22khz
111
+ dataloader_params:
112
+ batch_size: 4
113
+ num_workers: 2
114
+
115
+ [NeMo I 2026-04-26 23:19:16 audio_codec:101] Vector quantizer does not support commit loss.
116
+ [NeMo I 2026-04-26 23:19:18 features:305] PADDING: 1
117
+ [NeMo I 2026-04-26 23:19:19 features:305] PADDING: 1
118
+ [NeMo I 2026-04-26 23:19:19 features:305] PADDING: 1
119
+ [NeMo I 2026-04-26 23:19:19 features:305] PADDING: 1
120
+ [NeMo I 2026-04-26 23:19:19 features:305] PADDING: 1
121
+ [NeMo I 2026-04-26 23:19:19 features:305] PADDING: 1
122
+ [NeMo I 2026-04-26 23:19:19 save_restore_connector:283] Model AudioCodecModel was successfully restored from /export/fs06/ahussei6/nvidia/pretrained_models/nano_codec/Low_Frame-rate_Speech_Codec++.nemo.
123
+ [NeMo W 2026-04-26 23:19:19 duplex_s2s_speech_decoder_model:148] Tokenizer does not have a `bos_token`. Setting it to '<|im_start|>'.
124
+ [NeMo I 2026-04-26 23:19:21 mixins:181] Tokenizer SentencePieceTokenizer initialized with 1024 tokens
125
+ [NeMo W 2026-04-26 23:19:22 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
126
+ Train config :
127
+ manifest_filepath:
128
+ - - /raid/local//bucket1/tarred_audio_manifest.json
129
+ - - /raid/local//bucket2/tarred_audio_manifest.json
130
+ - - /raid/local//bucket3/tarred_audio_manifest.json
131
+ - - /raid/local//bucket4/tarred_audio_manifest.json
132
+ - - /raid/local//bucket5/tarred_audio_manifest.json
133
+ - - /raid/local//bucket6/tarred_audio_manifest.json
134
+ - - /raid/local//bucket7/tarred_audio_manifest.json
135
+ - - /raid/local//bucket8/tarred_audio_manifest.json
136
+ sample_rate: 16000
137
+ batch_size: 1
138
+ shuffle: true
139
+ num_workers: 4
140
+ pin_memory: true
141
+ use_start_end_token: false
142
+ trim_silence: false
143
+ max_duration: 25
144
+ min_duration: 0.1
145
+ is_tarred: true
146
+ tarred_audio_filepaths:
147
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket1/audio__OP_0..8191_CL_.tar
148
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket2/audio__OP_0..8191_CL_.tar
149
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket3/audio__OP_0..8191_CL_.tar
150
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket4/audio__OP_0..8191_CL_.tar
151
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket5/audio__OP_0..8191_CL_.tar
152
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket6/audio__OP_0..8191_CL_.tar
153
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket7/audio__OP_0..8191_CL_.tar
154
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket8/audio__OP_0..8191_CL_.tar
155
+ shuffle_n: 2048
156
+ bucketing_strategy: fully_randomized
157
+ bucketing_batch_size:
158
+ - 72
159
+ - 64
160
+ - 56
161
+ - 48
162
+ - 40
163
+ - 32
164
+ - 24
165
+ - 16
166
+
167
+ [NeMo W 2026-04-26 23:19:22 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
168
+ Validation config :
169
+ manifest_filepath:
170
+ - /manifests/librispeech/librivox-dev-other.json
171
+ - /manifests/librispeech/librivox-dev-clean.json
172
+ - /manifests/librispeech/librivox-test-other.json
173
+ - /manifests/librispeech/librivox-test-clean.json
174
+ sample_rate: 16000
175
+ batch_size: 16
176
+ shuffle: false
177
+ num_workers: 8
178
+ pin_memory: true
179
+ use_start_end_token: false
180
+
181
+ [NeMo W 2026-04-26 23:19:22 modelPT:202] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).
182
+ Test config :
183
+ manifest_filepath:
184
+ - /manifests/librispeech/librivox-dev-other.json
185
+ - /manifests/librispeech/librivox-dev-clean.json
186
+ - /manifests/librispeech/librivox-test-other.json
187
+ - /manifests/librispeech/librivox-test-clean.json
188
+ sample_rate: 16000
189
+ batch_size: 16
190
+ shuffle: false
191
+ num_workers: 8
192
+ pin_memory: true
193
+ use_start_end_token: false
194
+
195
+ [NeMo I 2026-04-26 23:19:22 features:305] PADDING: 0
196
+ [NeMo I 2026-04-26 23:19:23 rnnt_models:226] Using RNNT Loss : warprnnt_numba
197
+ Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.005, 'clamp': -1.0}
198
+ [NeMo I 2026-04-26 23:19:23 rnnt_models:226] Using RNNT Loss : warprnnt_numba
199
+ Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.005, 'clamp': -1.0}
200
+ [NeMo I 2026-04-26 23:19:23 rnnt_models:226] Using RNNT Loss : warprnnt_numba
201
+ Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.005, 'clamp': -1.0}
202
+ [NeMo I 2026-04-26 23:19:24 save_restore_connector:283] Model EncDecHybridRNNTCTCBPEModel was successfully restored from /export/fs06/ahussei6/nvidia/pretrained_models/asr/stt_en_fastconformer_hybrid_large_streaming_multi_v1.20.0/stt_en_fastconformer_hybrid_large_streaming_multi.nemo.
203
+ [NeMo I 2026-04-26 23:19:24 features:305] PADDING: 0
204
+ [NeMo I 2026-04-26 23:19:24 cloud:58] Found existing object /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
205
+ [NeMo I 2026-04-26 23:19:24 cloud:64] Re-using file from: /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo
206
+ [NeMo I 2026-04-26 23:19:24 common:827] Instantiating model from pre-trained checkpoint
207
+ [NeMo W 2026-04-26 23:19:24 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
208
+ Train config :
209
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
210
+ sample_rate: 16000
211
+ labels: null
212
+ batch_size: 64
213
+ shuffle: true
214
+ is_tarred: false
215
+ tarred_audio_filepaths: null
216
+ tarred_shard_strategy: scatter
217
+ augmentor:
218
+ noise:
219
+ manifest_path: /manifests/noise/rir_noise_manifest.json
220
+ prob: 0.5
221
+ min_snr_db: 0
222
+ max_snr_db: 15
223
+ speed:
224
+ prob: 0.5
225
+ sr: 16000
226
+ resample_type: kaiser_fast
227
+ min_speed_rate: 0.95
228
+ max_speed_rate: 1.05
229
+ num_workers: 15
230
+ pin_memory: true
231
+
232
+ [NeMo W 2026-04-26 23:19:24 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
233
+ Validation config :
234
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
235
+ sample_rate: 16000
236
+ labels: null
237
+ batch_size: 128
238
+ shuffle: false
239
+ num_workers: 15
240
+ pin_memory: true
241
+
242
+ [NeMo I 2026-04-26 23:19:24 features:305] PADDING: 16
243
+ [NeMo I 2026-04-26 23:19:24 save_restore_connector:283] Model EncDecSpeakerLabelModel was successfully restored from /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
244
+ [NeMo I 2026-04-26 23:19:34 pretrained:110] | > 237 / 237 layers are restored.
245
+ [NeMo W 2026-04-26 23:19:34 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /export/fs06/ahussei6/nvidia/github/NeMo/examples/sp ...
246
+
247
+ [NeMo I 2026-04-26 23:19:35 optim_setup:136] Parameters | trainable=2011762688 (95.83%) | total=2099261314
248
+ [NeMo I 2026-04-26 23:19:35 dataloader:559] Creating a Lhotse DynamicBucketingSampler (max_batch_duration=400.0 max_batch_size=None)
249
+ [NeMo W 2026-04-26 23:19:35 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
250
+
251
+ [NeMo I 2026-04-26 23:19:35 dataloader:290] We will be using a Lhotse DataLoader.
252
+ [NeMo W 2026-04-26 23:19:35 dataloader:480] You are using a non-tarred dataset and requested tokenization during data sampling (pretokenize=True). This will cause the tokenization to happen in the main (GPU) process,possibly impacting the training speed if your tokenizer is very large.If the impact is noticable, set pretokenize=False in dataloader config.(note: that will disable token-per-second filtering and 2D bucketing features)
253
+ [NeMo I 2026-04-26 23:19:35 dataloader:583] Creating a Lhotse DynamicCutSampler (bucketing is disabled, (max_batch_duration=None max_batch_size=64)
254
+ [NeMo W 2026-04-26 23:19:35 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.
255
+
256
+ [NeMo W 2026-04-26 23:19:35 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
257
+ Train config :
258
+ dataset:
259
+ dataset_type: tarred_vocoder
260
+ dataset_args:
261
+ dataset_meta:
262
+ lindy:
263
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/train_manifest.json
264
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/audio_{0..3}.tar
265
+ emma:
266
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/train_manifest.json
267
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_{0..3}.tar
268
+ libritts_r:
269
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/train_manifest.json
270
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/audio_{0..48}.tar
271
+ sample_rate: 22050
272
+ n_samples: 24696
273
+ min_duration: 0.4
274
+ max_duration: null
275
+ audio_augmentator_config:
276
+ impulse:
277
+ prob: 0.5
278
+ rng: 42
279
+ manifest_path: /lustre/fsw/portfolios/convai/projects/convai_convaird_nemo-speech/data/ROOM_Response_and_Noise_Database/processed/rir.json
280
+ normalize_impulse: false
281
+ shift_impulse: true
282
+ codec:
283
+ prob: 0.5
284
+ rng: 42
285
+ format_params:
286
+ - format: mp3
287
+ prob: 0.5
288
+ bit_rates:
289
+ - 16000
290
+ - 32000
291
+ - 64000
292
+ - 128000
293
+ - format: ogg
294
+ prob: 0.075
295
+ encoder: vorbis
296
+ bit_rates:
297
+ - 32000
298
+ - 48000
299
+ - 64000
300
+ - format: ogg
301
+ prob: 0.375
302
+ encoder: opus
303
+ bit_rates:
304
+ - 8000
305
+ - 16000
306
+ - 32000
307
+ - 64000
308
+ - 128000
309
+ - format: wav
310
+ prob: 0.025
311
+ encoder: pcm_alaw
312
+ bit_rates:
313
+ - 64000
314
+ - format: wav
315
+ prob: 0.025
316
+ encoder: pcm_mulaw
317
+ bit_rates:
318
+ - 64000
319
+ shard_strategy: replicate
320
+ sample_type: weighted_random
321
+ sample_args:
322
+ batch_size: 32
323
+ steps_per_epoch: 233
324
+ dataset_weights:
325
+ - 0.2
326
+ - 0.2
327
+ - 0.6
328
+ shuffle_n: 10000
329
+ dataloader_params:
330
+ batch_size: 32
331
+ drop_last: true
332
+ num_workers: 4
333
+
334
+ [NeMo W 2026-04-26 23:19:35 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
335
+ Validation config :
336
+ dataset:
337
+ dataset_type: vocoder
338
+ dataset_args:
339
+ sample_rate: 22050
340
+ n_samples: null
341
+ min_duration: null
342
+ max_duration: null
343
+ trunc_duration: 10.0
344
+ dataset_meta:
345
+ lindy:
346
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/val_manifest.json
347
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/audio_22khz
348
+ emma:
349
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/val_manifest.json
350
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_22khz
351
+ libritts_r:
352
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/val_manifest.json
353
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/audio_22khz
354
+ dataloader_params:
355
+ batch_size: 4
356
+ num_workers: 2
357
+
358
+ [NeMo I 2026-04-26 23:19:36 audio_codec:101] Vector quantizer does not support commit loss.
359
+ [NeMo I 2026-04-26 23:19:36 features:305] PADDING: 1
360
+ [NeMo I 2026-04-26 23:19:36 features:305] PADDING: 1
361
+ [NeMo I 2026-04-26 23:19:36 features:305] PADDING: 1
362
+ [NeMo I 2026-04-26 23:19:36 features:305] PADDING: 1
363
+ [NeMo I 2026-04-26 23:19:36 features:305] PADDING: 1
364
+ [NeMo I 2026-04-26 23:19:36 features:305] PADDING: 1
365
+ [NeMo I 2026-04-26 23:19:37 save_restore_connector:283] Model AudioCodecModel was successfully restored from /export/fs06/ahussei6/nvidia/pretrained_models/nano_codec/Low_Frame-rate_Speech_Codec++.nemo.
366
+ [NeMo I 2026-04-26 23:19:37 cloud:58] Found existing object /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
367
+ [NeMo I 2026-04-26 23:19:37 cloud:64] Re-using file from: /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo
368
+ [NeMo I 2026-04-26 23:19:37 common:827] Instantiating model from pre-trained checkpoint
369
+ [NeMo W 2026-04-26 23:19:37 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
370
+ Train config :
371
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
372
+ sample_rate: 16000
373
+ labels: null
374
+ batch_size: 64
375
+ shuffle: true
376
+ is_tarred: false
377
+ tarred_audio_filepaths: null
378
+ tarred_shard_strategy: scatter
379
+ augmentor:
380
+ noise:
381
+ manifest_path: /manifests/noise/rir_noise_manifest.json
382
+ prob: 0.5
383
+ min_snr_db: 0
384
+ max_snr_db: 15
385
+ speed:
386
+ prob: 0.5
387
+ sr: 16000
388
+ resample_type: kaiser_fast
389
+ min_speed_rate: 0.95
390
+ max_speed_rate: 1.05
391
+ num_workers: 15
392
+ pin_memory: true
393
+
394
+ [NeMo W 2026-04-26 23:19:37 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
395
+ Validation config :
396
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
397
+ sample_rate: 16000
398
+ labels: null
399
+ batch_size: 128
400
+ shuffle: false
401
+ num_workers: 15
402
+ pin_memory: true
403
+
404
+ [NeMo I 2026-04-26 23:19:37 features:305] PADDING: 16
405
+ [NeMo I 2026-04-26 23:19:37 save_restore_connector:283] Model EncDecSpeakerLabelModel was successfully restored from /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
406
+ [NeMo I 2026-04-26 23:47:28 cloud:58] Found existing object /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
407
+ [NeMo I 2026-04-26 23:47:28 cloud:64] Re-using file from: /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo
408
+ [NeMo I 2026-04-26 23:47:28 common:827] Instantiating model from pre-trained checkpoint
409
+ [NeMo W 2026-04-26 23:47:29 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
410
+ Train config :
411
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
412
+ sample_rate: 16000
413
+ labels: null
414
+ batch_size: 64
415
+ shuffle: true
416
+ is_tarred: false
417
+ tarred_audio_filepaths: null
418
+ tarred_shard_strategy: scatter
419
+ augmentor:
420
+ noise:
421
+ manifest_path: /manifests/noise/rir_noise_manifest.json
422
+ prob: 0.5
423
+ min_snr_db: 0
424
+ max_snr_db: 15
425
+ speed:
426
+ prob: 0.5
427
+ sr: 16000
428
+ resample_type: kaiser_fast
429
+ min_speed_rate: 0.95
430
+ max_speed_rate: 1.05
431
+ num_workers: 15
432
+ pin_memory: true
433
+
434
+ [NeMo W 2026-04-26 23:47:29 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
435
+ Validation config :
436
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
437
+ sample_rate: 16000
438
+ labels: null
439
+ batch_size: 128
440
+ shuffle: false
441
+ num_workers: 15
442
+ pin_memory: true
443
+
444
+ [NeMo I 2026-04-26 23:47:29 features:305] PADDING: 16
445
+ [NeMo I 2026-04-26 23:47:29 save_restore_connector:283] Model EncDecSpeakerLabelModel was successfully restored from /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
446
+ [NeMo I 2026-04-26 23:47:29 cloud:58] Found existing object /export/fs06/ahussei6/nvidia/cache/HFCACHE/stt_en_fastconformer_transducer_large/74b4e49a2b22465da737dd5345767e61/stt_en_fastconformer_transducer_large.nemo.
447
+ [NeMo I 2026-04-26 23:47:29 cloud:64] Re-using file from: /export/fs06/ahussei6/nvidia/cache/HFCACHE/stt_en_fastconformer_transducer_large/74b4e49a2b22465da737dd5345767e61/stt_en_fastconformer_transducer_large.nemo
448
+ [NeMo I 2026-04-26 23:47:29 common:827] Instantiating model from pre-trained checkpoint
449
+ [NeMo I 2026-04-26 23:47:32 mixins:181] Tokenizer SentencePieceTokenizer initialized with 1024 tokens
450
+ [NeMo W 2026-04-26 23:47:32 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
451
+ Train config :
452
+ manifest_filepath: null
453
+ sample_rate: 16000
454
+ batch_size: 1
455
+ shuffle: true
456
+ num_workers: 8
457
+ pin_memory: true
458
+ use_start_end_token: false
459
+ trim_silence: false
460
+ max_duration: 20
461
+ min_duration: 0.1
462
+ is_tarred: false
463
+ tarred_audio_filepaths: null
464
+ shuffle_n: 2048
465
+ bucketing_strategy: fully_randomized
466
+ bucketing_batch_size: null
467
+
468
+ [NeMo W 2026-04-26 23:47:32 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
469
+ Validation config :
470
+ manifest_filepath: null
471
+ sample_rate: 16000
472
+ batch_size: 32
473
+ shuffle: false
474
+ num_workers: 8
475
+ pin_memory: true
476
+ use_start_end_token: false
477
+ max_duration: 20
478
+
479
+ [NeMo W 2026-04-26 23:47:32 modelPT:202] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).
480
+ Test config :
481
+ manifest_filepath: null
482
+ sample_rate: 16000
483
+ batch_size: 16
484
+ shuffle: false
485
+ num_workers: 8
486
+ pin_memory: true
487
+ use_start_end_token: false
488
+
489
+ [NeMo I 2026-04-26 23:47:32 features:305] PADDING: 0
490
+ [NeMo I 2026-04-26 23:47:33 rnnt_models:226] Using RNNT Loss : warprnnt_numba
491
+ Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
492
+ [NeMo I 2026-04-26 23:47:33 rnnt_models:226] Using RNNT Loss : warprnnt_numba
493
+ Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
494
+ [NeMo I 2026-04-26 23:47:33 rnnt_models:226] Using RNNT Loss : warprnnt_numba
495
+ Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.0, 'clamp': -1.0}
496
+ [NeMo I 2026-04-26 23:47:34 save_restore_connector:283] Model EncDecRNNTBPEModel was successfully restored from /export/fs06/ahussei6/nvidia/cache/HFCACHE/stt_en_fastconformer_transducer_large/74b4e49a2b22465da737dd5345767e61/stt_en_fastconformer_transducer_large.nemo.
497
+ [NeMo I 2026-04-26 23:47:34 optional_cuda_graphs:53] Disabled CUDA graphs for module <class 'nemo.collections.asr.models.rnnt_bpe_models.EncDecRNNTBPEModel'>.decoding.decoding
498
+ [NeMo I 2026-04-26 23:47:34 optional_cuda_graphs:53] Disabled CUDA graphs for module <class 'nemo.collections.asr.metrics.wer.WER'>joint._wer.decoding.decoding
499
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] i responded in a bad way straightening myself in the chair
500
+ [ASR] the [0.00]
501
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] previously she was friends of the cruella from school
502
+ [ASR] one [0.00]
503
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] at present democrat jim mcdermott is the district representative
504
+ [ASR] you d rather the dividend of the leader [4.87]
505
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it is found in the afro tropical and southeast asia eco zone
506
+ [ASR] the love of a [2.16]
507
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] poor things they love you so much yes they do love me wow don't tell them
508
+ [ASR] [0.00]
509
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] this is developed at different times of the year in the processions and stations of the cross
510
+ [ASR] the tongue but the figure didn't [1.54]
511
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] they have high abrasion resistance
512
+ [ASR] [0.00]
513
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the award included three categories science art and literature
514
+ [ASR] [0.00]
515
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] his children are paloma yerma and natalio
516
+ [ASR] [0.00]
517
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] my friend sara just got her backpack stolen
518
+ [ASR] [0.00]
519
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it included salafist groups inspired by the muslim brothers
520
+ [ASR] yes [0.00]
521
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it's related to the cows
522
+ [ASR] he's the pee [14.13]
523
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] this vowel system is the same as in vulgar latin
524
+ [ASR] was the [0.92]
525
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the fruit comprises a couple of drupes
526
+ [ASR] they didn't need to [0.00]
527
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] its meaning is about the path
528
+ [ASR] d is the debate that to be d [6.57]
529
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it does not grow naturally in the city of jericho
530
+ [ASR] [0.00]
531
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the capital of the region is mena
532
+ [ASR] [0.00]
533
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] this is a mess
534
+ [ASR] [0.00]
535
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] again do we have the right to know the private lives of celebrities
536
+ [ASR] you learned that the [1.68]
537
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] for this reason in august of the same year she temporarily withdrew from television
538
+ [ASR] [0.00]
539
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it s native to the hawaiian islands
540
+ [ASR] did they [0.00]
541
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] she led us to question everything we had learned about design
542
+ [ASR] blue the beer [0.00]
543
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] there's no advice for the past
544
+ [ASR] [0.00]
545
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the ships of the second one were destined for hong kong
546
+ [ASR] [0.00]
547
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it lives in afghanistan iran pakistan and turkey
548
+ [ASR] [0.00]
549
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] sams' creativity and the king's promotional campaigns worked
550
+ [ASR] [0.00]
551
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] in the united states it was released as the volkswagen quantum
552
+ [ASR] di [0.00]
553
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] that mutation is more prominent in male voices than in female ones
554
+ [ASR] di [0.00]
555
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the grand master elected the defender to join his band
556
+ [ASR] detected a war [0.00]
557
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] declared protected areas
558
+ [ASR] in the end and [0.00]
559
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] degree in social communication
560
+ [ASR] the [0.00]
561
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] at eighteen he could produce and sell many pieces
562
+ [ASR] [0.00]
563
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] they had several guest bands as in previous tours
564
+ [ASR] [0.00]
565
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] like all the rapsids the location of the head was tilted back
566
+ [ASR] the above the above [2.57]
567
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it had as a goal scorer the colombian victor renteria
568
+ [ASR] [0.00]
569
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] finally he defeats the attackers and kills miller in mortal combat
570
+ [ASR] let me [0.00]
571
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] listen whoever he is he doesn't want us to find him
572
+ [ASR] [0.00]
573
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] he lives in paris
574
+ [ASR] [0.00]
575
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] his patron saint's name was saint justus
576
+ [ASR] is the [0.00]
577
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] its habitat extends between the guianas and venezuela
578
+ [ASR] did you [0.00]
579
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] montenegro and serbia divided the sandzak and albania was created
580
+ [ASR] d [0.00]
581
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] come on friend come on
582
+ [ASR] and [0.00]
583
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] there are older references
584
+ [ASR] r [0.00]
585
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] he witnesses the doctor and astrid teleporting back to the intergalactic cruise ship titanic
586
+ [ASR] [0.00]
587
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the oblivion relegated the emotions and even the color
588
+ [ASR] [0.00]
589
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] look i cut the cookies in little pieces the cookies
590
+ [ASR] [0.00]
591
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] their playing usually lasts six minutes
592
+ [ASR] [0.00]
593
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] two of these pterosaurs have not been named yet
594
+ [ASR] the deep [0.00]
595
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] with her he premiered at the teatro infanta isabel plays such as oh doctor
596
+ [ASR] no [0.00]
597
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it is found in the northern region of the amazon basin
598
+ [ASR] the [0.00]
599
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] thus the number of active participants will be reduced to only twenty five
600
+ [ASR] nay they did [0.00]
601
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the organization was authorized to build gates and dikes
602
+ [ASR] worry year [0.00]
603
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the beak is almost black with the base of the jaw light grey
604
+ [ASR] [0.00]
605
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] he has a younger brother benjamin ben draiman who makes folk rock and ambient music
606
+ [ASR] [0.00]
607
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] overcoming this problem he has returned to the team for the current season
608
+ [ASR] okay [0.00]
609
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] he created a radio network in new york
610
+ [ASR] did the thing he did the d [5.69]
611
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it landed vertically backward
612
+ [ASR] [0.00]
613
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the inside is mainly built in wood
614
+ [ASR] oh [0.00]
615
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] he scored three goals and assisted on a fourth
616
+ [ASR] two [0.00]
617
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] each of these ayllus has its own neighborhood and square
618
+ [ASR] the d n n [0.00]
619
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] it lasted less than candy in a child's mouth
620
+ [ASR] is the [0.00]
621
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] this symbolic act represents the end of the crisis
622
+ [ASR] [0.00]
623
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] they think margo has left clues so that quentin can find her
624
+ [ASR] [0.00]
625
+ [NeMo I 2026-04-26 23:47:53 asr_bleu:80] [REF] the painter is being interviewed on the radio
626
+ [ASR] [0.00]
627
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/864c60f6-3cfe-46d4-8137-dda948fedadf.wav
628
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/47414fd7-2788-4671-a1f7-7deb1e7a8043.wav
629
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/a51c5f30-cb3b-4c52-bf5a-fea3f3b308ae.wav
630
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/93ce123f-4d93-48fd-ba3a-fa6cb1912502.wav
631
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/a7623f82-9030-41f7-a2a8-f6070fdc00ae.wav
632
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/182f4c63-9602-4a71-98b8-d8d50845e25c.wav
633
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/6baffb94-af59-42b6-bbe1-cb0776e68d0f.wav
634
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/e587f7d8-0a8e-4286-9d77-f5d8edae3ace.wav
635
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/09aecae5-b953-4f02-a22b-296fdc73cc06.wav
636
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/8ec1d43f-7b47-4cda-b453-d8cb77a2e1e1.wav
637
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/2eb2c5b8-1e52-4b23-b180-ff766626b1a8.wav
638
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/9f19424b-c820-487f-99b5-6e7516e9b934.wav
639
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/5c6a8b18-8918-43f6-b04d-605e108ba994.wav
640
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/9d8dbddc-7316-407c-a65d-7e688a99e045.wav
641
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/e9310fa9-0dcd-4a92-ae97-d7e886b3066b.wav
642
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/1b540a01-969f-4fe5-a449-f7a8e18f7ad1.wav
643
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/b70e4973-ae52-46f6-8b57-094b77d72f97.wav
644
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/f96c7b27-afda-460f-9899-db11a143a389.wav
645
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/b7298c70-dc12-447c-8ff4-b90a886cd38a.wav
646
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/f7a2d6d2-6d69-439b-a8a2-76ed71fbabdc.wav
647
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/e352f1c7-8e9f-4984-8d49-70819616617c.wav
648
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/bd12b66a-8b65-475c-9fa6-0f7b2b44bba1.wav
649
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/a4608886-ec28-45a2-b5e7-db98328d95f9.wav
650
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/905bea97-4b68-456c-bb1a-fd38c278c8de.wav
651
+ [NeMo I 2026-04-26 23:47:53 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/3a42d3b8-5c35-4a13-bce1-5e559273aed7.wav
652
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/7e96c80f-b460-40b5-bbbd-53f938c84523.wav
653
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/414f9639-0caa-4db3-981d-a0e7f93f1030.wav
654
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/233b572b-a3e7-472e-922d-c2ce9e1fe6db.wav
655
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/c5d41939-29b9-4333-ab95-4d9bb5cba837.wav
656
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/35209995-5fc9-498f-a1a1-9fe5ae7e0501.wav
657
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/5364a9dc-4b88-4656-91a4-e08b928df2f9.wav
658
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/8988b56b-9a8e-40c9-9191-ad4273846c09.wav
659
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/4ff65a04-79d5-47bb-bac4-1573dcf8fe93.wav
660
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/628e28c0-aa88-45aa-9cd4-0371f5cfe9a5.wav
661
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/603c944c-f666-4fe0-9ec1-752698168608.wav
662
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/5f3624b4-12f6-4bdd-b510-daebc84350b8.wav
663
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/bede7c1d-e6c9-4345-8794-dbaf69766190.wav
664
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/08d0c818-3b13-41f4-9082-2394c0f332de.wav
665
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/416572c3-7838-49ac-aaf0-244dffed08c0.wav
666
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/d7fc60c2-77d6-4aaf-8b69-a2a998d504e2.wav
667
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/78b555eb-3470-4279-848c-af3c72208dda.wav
668
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/64429b85-ab36-4b9f-a3ea-66ae17ffc47f.wav
669
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/da47d7af-6da4-4d2e-8dde-2b469d95c79d.wav
670
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/82835111-4af5-4dba-889c-e8c5d6c2cb6e.wav
671
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/a0f90b27-57ac-4de1-8b8f-2943a2539f6e.wav
672
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/64d99fe4-c70a-4084-b21e-7591cfdfc688.wav
673
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/100db0bc-b3fe-4514-bb70-4a7bff839029.wav
674
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/3c868384-211c-4845-9fdd-c0c8d8275cec.wav
675
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/e70a297d-0607-4c3a-973c-fc6926eb25bb.wav
676
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/08b61e60-fdbe-4386-8ba7-2e10ec3bf948.wav
677
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/3a89894f-19ea-4613-9e7b-639a07a23d24.wav
678
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/2cab0072-a929-40bc-aaaf-d94eddb02380.wav
679
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/315156bf-45dc-4bb3-b6b6-8571b53097a1.wav
680
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/4da5c489-586e-4758-9757-144a298871c2.wav
681
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/002a68c1-69c3-4439-98f2-89cd2b650d16.wav
682
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/bf07db1a-4b32-42b5-ab18-fe50b8431c03.wav
683
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/fc3b1cd0-86d6-42fd-babf-af5a86d3507f.wav
684
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/4eb6a1f3-c866-4aae-9cea-af7e410a5298.wav
685
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/27a5f6ac-975d-4185-9880-7cd902973579.wav
686
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/4c211e5a-5299-4025-81e9-e41c9de20712.wav
687
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/81a195f5-70d1-49b3-bd55-51a3156ac0d7.wav
688
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/5125c20c-23c0-4291-86de-1157e5ece331.wav
689
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/4c96af03-b294-400c-becc-cf5028945b1a.wav
690
+ [NeMo I 2026-04-26 23:47:54 results_logger:72] Audio saved at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/pred_wavs/en_dev/9d4edfb3-7021-4083-b7f9-dc7346452bb5.wav
691
+ [NeMo I 2026-04-26 23:47:54 results_logger:129] Metadata file for en_dev dataset updated at: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/validation_logs/metadatas/en_dev.json
692
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] i responded in a bad way straightening myself in the chair
693
+ [HYP] the is the the the the the the the the the the [3.39]
694
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] previously she was friends of the cruella from school
695
+ [HYP] the is the the the the the the the the the [3.75]
696
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] at present democrat jim mcdermott is the district representative
697
+ [HYP] the the the the the the the the the the the the the the the [2.63]
698
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it is found in the afro tropical and southeast asia eco zone
699
+ [HYP] the is the the the the the the the the the the the [3.67]
700
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] poor things they love you so much yes they do love me wow don't tell them
701
+ [HYP] the the the the thethe the the the the the the thethe the the the the the the the the the the and and [0.00]
702
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] this is developed at different times of the year in the processions and stations of the cross
703
+ [HYP] the the the the the the the the the the the the the the [3.02]
704
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] they have high abrasion resistance
705
+ [HYP] the the the the the the the the the the the [0.00]
706
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the award included three categories science art and literature
707
+ [HYP] the the the the the the the the the the the the the the [2.84]
708
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] his children are paloma yerma and natalio
709
+ [HYP] is is the the the the the the the the the the the the the the the the the the the [0.00]
710
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] my friend sara just got her backpack stolen
711
+ [HYP] the the the the the the the the the the the [0.00]
712
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it included salafist groups inspired by the muslim brothers
713
+ [HYP] the the the the the the the the the the the the [3.39]
714
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it's related to the cows
715
+ [HYP] the is the the the the the the [5.52]
716
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] this vowel system is the same as in vulgar latin
717
+ [HYP] the the the the the the the the the the the the the [3.09]
718
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the fruit comprises a couple of drupes
719
+ [HYP] the the the the the the the the the the [4.20]
720
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] its meaning is about the path
721
+ [HYP] the is the the the the the the the the [4.99]
722
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it does not grow naturally in the city of jericho
723
+ [HYP] the the the the the the the the the the the the [3.39]
724
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the capital of the region is mena
725
+ [HYP] the the the the the the the [7.81]
726
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] this is a mess
727
+ [HYP] the the the the the the the [0.00]
728
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] again do we have the right to know the private lives of celebrities
729
+ [HYP] the the the the the the the the the the the the the the the [3.13]
730
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] for this reason in august of the same year she temporarily withdrew from television
731
+ [HYP] the the the the the the the the the the the the the the the the the the the the [1.91]
732
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it s native to the hawaiian islands
733
+ [HYP] the the the the the the the the the the the [3.75]
734
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] she led us to question everything we had learned about design
735
+ [HYP] the the the the the the the the the the the the the the the [0.00]
736
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] there's no advice for the past
737
+ [HYP] the the the the the the the the the the [4.20]
738
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the ships of the second one were destined for hong kong
739
+ [HYP] thethe the the the the the the the the the the the the the the [3.13]
740
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it lives in afghanistan iran pakistan and turkey
741
+ [HYP] are and and and and and and and and and and and and and and and and [2.29]
742
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] sams' creativity and the king's promotional campaigns worked
743
+ [HYP] the the the the the the the the the the the the the the the the the [2.29]
744
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] in the united states it was released as the volkswagen quantum
745
+ [HYP] the the the the the the the the the the the the the the the the [2.91]
746
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] that mutation is more prominent in male voices than in female ones
747
+ [HYP] the are the the the the the the the the the the the the the the the the and and and [0.00]
748
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the grand master elected the defender to join his band
749
+ [HYP] the the the the the the the the the the the the the the the [3.13]
750
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] declared protected areas
751
+ [HYP] the the the the the the the the the the the and and and and [0.00]
752
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] degree in social communication
753
+ [HYP] the the the the the the the the the the [0.00]
754
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] at eighteen he could produce and sell many pieces
755
+ [HYP] the the the the the the the the the the the the the the the [0.00]
756
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] they had several guest bands as in previous tours
757
+ [HYP] the the the the the the the the the the the [0.00]
758
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] like all the rapsids the location of the head was tilted back
759
+ [HYP] the the the the the the the the of of the of of [7.35]
760
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it had as a goal scorer the colombian victor renteria
761
+ [HYP] i a a a a the the the the the the the the the the the [2.91]
762
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] finally he defeats the attackers and kills miller in mortal combat
763
+ [HYP] thethe the the the the the the the the the the the the the the the the [2.29]
764
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] listen whoever he is he doesn't want us to find him
765
+ [HYP] the is is the the the the the the the the the the the the the the the [2.15]
766
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] he lives in paris
767
+ [HYP] is the the the the the the the the the the [0.00]
768
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] his patron saint's name was saint justus
769
+ [HYP] the is is the the the the the the the the [0.00]
770
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] its habitat extends between the guianas and venezuela
771
+ [HYP] the the the the the the the the the [4.77]
772
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] montenegro and serbia divided the sandzak and albania was created
773
+ [HYP] the the the the the the the the the the the and and and and and [3.22]
774
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] come on friend come on
775
+ [HYP] the are are are the and and and and [0.00]
776
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] there are older references
777
+ [HYP] are are are the the the the the the the the [3.75]
778
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] he witnesses the doctor and astrid teleporting back to the intergalactic cruise ship titanic
779
+ [HYP] the the the the the the the the the the the the the the the the the the the [2.41]
780
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the oblivion relegated the emotions and even the color
781
+ [HYP] the the the the the the the the the the the the the the the the the the the the [2.52]
782
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] look i cut the cookies in little pieces the cookies
783
+ [HYP] the the the the the the the the the the the the the the and and [2.91]
784
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] their playing usually lasts six minutes
785
+ [HYP] the the the the the the the the the the the the the the the the the and and [0.00]
786
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] two of these pterosaurs have not been named yet
787
+ [HYP] the the the the the the the the the the the the the and and and and and [0.00]
788
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] with her he premiered at the teatro infanta isabel plays such as oh doctor
789
+ [HYP] the the the the the the the the the the the the the the the [2.63]
790
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it is found in the northern region of the amazon basin
791
+ [HYP] the the the the the the the the the the the the [4.03]
792
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] thus the number of active participants will be reduced to only twenty five
793
+ [HYP] the the the the the the the the the the the the the the the [2.63]
794
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the organization was authorized to build gates and dikes
795
+ [HYP] the the the the the the the the the the the the the the the [2.63]
796
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the beak is almost black with the base of the jaw light grey
797
+ [HYP] the the the the the the the the the the the the the the [3.74]
798
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] he has a younger brother benjamin ben draiman who makes folk rock and ambient music
799
+ [HYP] the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the the [0.00]
800
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] overcoming this problem he has returned to the team for the current season
801
+ [HYP] the the the the the the the the the the the the the the the the the the the [2.41]
802
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] he created a radio network in new york
803
+ [HYP] the the the the the the the the the the the the [0.00]
804
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it landed vertically backward
805
+ [HYP] the are are the the the the the the the the and [0.00]
806
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the inside is mainly built in wood
807
+ [HYP] the is the the the the the the the the the the [4.03]
808
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] he scored three goals and assisted on a fourth
809
+ [HYP] the the the the the the the the the the the [0.00]
810
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] each of these ayllus has its own neighborhood and square
811
+ [HYP] the the the the the the and and and and and and and and [2.84]
812
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] it lasted less than candy in a child's mouth
813
+ [HYP] is the the the the the the the the the the the the the [0.00]
814
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] this symbolic act represents the end of the crisis
815
+ [HYP] the is the the the the the the the the the the [4.03]
816
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] they think margo has left clues so that quentin can find her
817
+ [HYP] the the the the the the the the the the the the the the [0.00]
818
+ [NeMo I 2026-04-26 23:47:54 bleu:51] [REF] the painter is being interviewed on the radio
819
+ [HYP] the't the the the the the the the the the [4.99]
820
+ [NeMo I 2026-04-26 23:47:55 nemo_model_checkpoint:569] Checkpoint save for step 1000 started at 1777261675.269628.
821
+ [NeMo I 2026-04-26 23:49:52 nemo_model_checkpoint:569] Checkpoint save for step 1000 started at 1777261792.7167933.
822
+ [NeMo I 2026-04-26 23:51:44 cloud:58] Found existing object /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
823
+ [NeMo I 2026-04-26 23:51:44 cloud:64] Re-using file from: /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo
824
+ [NeMo I 2026-04-26 23:51:44 common:827] Instantiating model from pre-trained checkpoint
825
+ [NeMo W 2026-04-26 23:51:44 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
826
+ Train config :
827
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
828
+ sample_rate: 16000
829
+ labels: null
830
+ batch_size: 64
831
+ shuffle: true
832
+ is_tarred: false
833
+ tarred_audio_filepaths: null
834
+ tarred_shard_strategy: scatter
835
+ augmentor:
836
+ noise:
837
+ manifest_path: /manifests/noise/rir_noise_manifest.json
838
+ prob: 0.5
839
+ min_snr_db: 0
840
+ max_snr_db: 15
841
+ speed:
842
+ prob: 0.5
843
+ sr: 16000
844
+ resample_type: kaiser_fast
845
+ min_speed_rate: 0.95
846
+ max_speed_rate: 1.05
847
+ num_workers: 15
848
+ pin_memory: true
849
+
850
+ [NeMo W 2026-04-26 23:51:44 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
851
+ Validation config :
852
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
853
+ sample_rate: 16000
854
+ labels: null
855
+ batch_size: 128
856
+ shuffle: false
857
+ num_workers: 15
858
+ pin_memory: true
859
+
860
+ [NeMo I 2026-04-26 23:51:44 features:305] PADDING: 16
861
+ [NeMo I 2026-04-26 23:51:45 save_restore_connector:283] Model EncDecSpeakerLabelModel was successfully restored from /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
dummy_nemo_duplex/qwen_1b/run_0/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ /export/fs06/ahussei6/nvidia/github/NeMo/examples/speechlm2/s2s_duplex_speech_decoder_train.py --config-path=conf/train --config-name=qwen_1b ++exp_manager.checkpoint_callback_params.save_top_k=3 exp_manager.name=qwen_1b ++model.pretrained_audio_codec=/export/fs06/ahussei6/nvidia/pretrained_models/nano_codec/Low_Frame-rate_Speech_Codec++.nemo ++model.pretrained_tts_from_s2s=/export/fs06/ahussei6/nvidia/pretrained_models/magpie_tts/tts-pretraining_qwnen_2.5_81007_steps.ckpt ++model.pretrained_asr=/export/fs06/ahussei6/nvidia/pretrained_models/asr/stt_en_fastconformer_hybrid_large_streaming_multi_v1.20.0/stt_en_fastconformer_hybrid_large_streaming_multi.nemo ++model.mask_sequence_loss=True trainer.num_nodes=1 exp_manager.explicit_log_dir=/export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b data.train_ds.seed= ++model.audio_loss_weight=20 ++model.speech_decoder.cond_on_prev_audio_tokens=True ++model.speech_decoder.use_speaker_encoder=True ++model.speech_decoder.cond_on_char_embedding=True ++model.speech_decoder.cond_on_asr_emb=False ++model.speech_decoder.cond_on_llm_latent=False ++model.speech_decoder.cond_on_modality_adapter_emb=False ++model.speech_decoder.cond_on_text_tokens=False ++model.speech_decoder.cfg_scale=2.5 ++model.speech_decoder.kernel_size=3 ++model.speech_decoder.cfg_unconditional_prob=0.2 ++model.custom_codebook_size=2045 ++model.custom_speech_bos_id=2019 ++model.custom_speech_eos_id=2020 ++model.custom_speech_delay_id=2018 model.perception.encoder.att_context_size=[70,0] model.perception.modality_adapter.att_context_size=[70,0] ++model.pretrained_llm=/export/fs06/ahussei6/nvidia/cache/HFCACHE/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306 ++trainer.limit_val_batches=1 ++trainer.val_check_interval=1000 ++model.scale_loss_by=non_sil_t ++model.scale_loss_mask=10 ++model.val_acc_tolerance=480 data.validation_ds.seed=
dummy_nemo_duplex/qwen_1b/run_0/exp_config.yaml ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ pretrained_llm: /export/fs06/ahussei6/nvidia/cache/HFCACHE/hub/models--Qwen--Qwen2.5-1.5B-Instruct/snapshots/989aa7980e4cf806f80c7fef2b1adb7bc71aa306
3
+ pretrained_audio_codec: /export/fs06/ahussei6/nvidia/pretrained_models/nano_codec/Low_Frame-rate_Speech_Codec++.nemo
4
+ pretrained_asr: /export/fs06/ahussei6/nvidia/pretrained_models/asr/stt_en_fastconformer_hybrid_large_streaming_multi_v1.20.0/stt_en_fastconformer_hybrid_large_streaming_multi.nemo
5
+ scoring_asr: stt_en_fastconformer_transducer_large
6
+ pretrained_weights: true
7
+ audio_loss_weight: 20
8
+ text_loss_weight: 3
9
+ freeze_params:
10
+ - ^audio_codec\..+$
11
+ - ^speech_generation\.speaker_encoder\..+$
12
+ prevent_freeze_params: []
13
+ duplex_user_channel_weight: 1.0
14
+ hidden_pooling_factor: 2
15
+ audio_save_path: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/pred_audios
16
+ perception:
17
+ target: nemo.collections.multimodal.speech_llm.modules.perception_modules.AudioPerceptionModule
18
+ use_multi_layer_feat: false
19
+ add_sep: true
20
+ is_canary: true
21
+ preprocessor:
22
+ normalize: NA
23
+ encoder:
24
+ self_attention_model: rel_pos
25
+ att_context_size:
26
+ - 70
27
+ - 0
28
+ conv_context_size: causal
29
+ conv_norm_type: layer_norm
30
+ modality_adapter:
31
+ _target_: nemo.collections.asr.modules.ConformerEncoder
32
+ feat_in: 512
33
+ feat_out: -1
34
+ n_layers: 2
35
+ d_model: 512
36
+ subsampling: dw_striding
37
+ subsampling_factor: 1
38
+ subsampling_conv_channels: 256
39
+ causal_downsampling: true
40
+ ff_expansion_factor: 4
41
+ self_attention_model: rel_pos
42
+ n_heads: 8
43
+ att_context_size:
44
+ - 70
45
+ - 0
46
+ att_context_style: chunked_limited
47
+ xscaling: true
48
+ untie_biases: true
49
+ pos_emb_max_len: 5000
50
+ conv_kernel_size: 9
51
+ conv_norm_type: layer_norm
52
+ conv_context_size: causal
53
+ dropout: 0
54
+ dropout_pre_encoder: 0
55
+ dropout_emb: 0.0
56
+ dropout_att: 0
57
+ spec_augment:
58
+ _target_: nemo.collections.asr.modules.SpectrogramAugmentation
59
+ freq_masks: 2
60
+ time_masks: 10
61
+ freq_width: 27
62
+ time_width: 0.05
63
+ speech_decoder:
64
+ n_layers: 12
65
+ d_model: 768
66
+ d_ffn: 3072
67
+ sa_n_heads: 12
68
+ kernel_size: 3
69
+ p_dropout: 0.1
70
+ p_dropout_out: 0.0
71
+ has_xattn: false
72
+ xa_d_memory: 768
73
+ xa_n_heads: 12
74
+ is_causal: true
75
+ apply_norm_to_cond: true
76
+ apply_norm_out: true
77
+ max_length_causal_mask: 5000
78
+ cond_on_prev_audio_tokens: true
79
+ detach_input: false
80
+ use_learnable_pos_emb: true
81
+ cond_on_modality_adapter_emb: false
82
+ cond_on_char_embedding: true
83
+ cond_on_llm_latent: false
84
+ use_speaker_encoder: true
85
+ cond_on_asr_emb: false
86
+ cond_on_text_tokens: false
87
+ cfg_scale: 2.5
88
+ cfg_unconditional_prob: 0.2
89
+ optimizer:
90
+ _target_: torch.optim.AdamW
91
+ lr: 0.0001
92
+ betas:
93
+ - 0.9
94
+ - 0.98
95
+ weight_decay: 0
96
+ foreach: true
97
+ lr_scheduler:
98
+ _target_: nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing
99
+ warmup_steps: 4000
100
+ min_lr: 1.0e-06
101
+ max_steps: 100000
102
+ pretrained_tts_from_s2s: /export/fs06/ahussei6/nvidia/pretrained_models/magpie_tts/tts-pretraining_qwnen_2.5_81007_steps.ckpt
103
+ mask_sequence_loss: true
104
+ custom_codebook_size: 2045
105
+ custom_speech_bos_id: 2019
106
+ custom_speech_eos_id: 2020
107
+ custom_speech_delay_id: 2018
108
+ scale_loss_by: non_sil_t
109
+ scale_loss_mask: 10
110
+ val_acc_tolerance: 480
111
+ trainer:
112
+ devices: -1
113
+ accelerator: gpu
114
+ num_nodes: 1
115
+ precision: bf16-true
116
+ logger: false
117
+ enable_checkpointing: false
118
+ use_distributed_sampler: false
119
+ max_steps: 100000
120
+ val_check_interval: 1000
121
+ limit_train_batches: 1000
122
+ limit_val_batches: 1
123
+ log_every_n_steps: 20
124
+ num_sanity_val_steps: 0
125
+ gradient_clip_val: 1.0
126
+ accumulate_grad_batches: 1
127
+ strategy:
128
+ _target_: lightning.pytorch.strategies.DDPStrategy
129
+ gradient_as_bucket_view: true
130
+ find_unused_parameters: true
131
+ data:
132
+ frame_length: 0.08
133
+ source_sample_rate: 16000
134
+ target_sample_rate: 22050
135
+ input_roles:
136
+ - user
137
+ - User
138
+ output_roles:
139
+ - agent
140
+ - Assistant
141
+ - assistant
142
+ - Agent
143
+ add_delay_token: false
144
+ train_ds:
145
+ sample_rate: 22050
146
+ multi_config: true
147
+ shuffle: true
148
+ sampler_fusion: randomized_round_robin
149
+ seed: ''
150
+ shard_seed: trng
151
+ sampler_weights:
152
+ cv_en: 1.0
153
+ cv_en:
154
+ input_cfg: /export/fs06/ahussei6/nvidia/github/NeMo/examples/speechlm2/conf/data/en.yaml
155
+ bucket_buffer_size: 10000
156
+ shuffle_buffer_size: 10000
157
+ num_workers: 8
158
+ pin_memory: true
159
+ max_duration: 60
160
+ min_duration: 1
161
+ num_buckets: 30
162
+ bucket_duration_bins:
163
+ - 5.33
164
+ - 6.27
165
+ - 6.96
166
+ - 7.54
167
+ - 8.07
168
+ - 8.55
169
+ - 8.99
170
+ - 9.39
171
+ - 9.76
172
+ - 10.14
173
+ - 10.54
174
+ - 10.99
175
+ - 11.49
176
+ - 12.04
177
+ - 12.64
178
+ - 13.28
179
+ - 13.96
180
+ - 14.69
181
+ - 15.48
182
+ - 16.32
183
+ - 17.25
184
+ - 18.28
185
+ - 19.39
186
+ - 20.65
187
+ - 22.14
188
+ - 24.01
189
+ - 26.34
190
+ - 29.21
191
+ - 32.79
192
+ batch_duration: 400
193
+ use_bucketing: true
194
+ seed: trng
195
+ validation_ds:
196
+ datasets:
197
+ en_dev:
198
+ shar_path: /export/fs06/ahussei6/nvidia/github/debug
199
+ sample_rate: 22050
200
+ batch_size: 64
201
+ seed: ''
202
+ shard_seed: randomized
203
+ exp_manager:
204
+ exp_dir: null
205
+ explicit_log_dir: /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b
206
+ name: qwen_1b
207
+ create_tensorboard_logger: false
208
+ create_checkpoint_callback: true
209
+ use_datetime_version: true
210
+ max_time_per_run: 00:03:50:00
211
+ resume_from_checkpoint: null
212
+ resume_if_exists: true
213
+ resume_ignore_no_checkpoint: true
214
+ create_wandb_logger: false
215
+ wandb_logger_kwargs:
216
+ name: development-run
217
+ project: salm_s2s_speech_decoder_v2_char_tokens
218
+ resume: true
219
+ checkpoint_callback_params:
220
+ filename: '{step}'
221
+ monitor: val_asr_bleu
222
+ mode: max
223
+ every_n_train_steps: null
224
+ every_n_epochs: 1
225
+ save_top_k: 3
226
+ always_save_nemo: false
dummy_nemo_duplex/qwen_1b/run_0/lightning_logs.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2
+
3
+ | Name | Type | Params | Mode
4
+ --------------------------------------------------------------------------
5
+ 0 | audio_codec | AudioCodecModel | 62.2 M | eval
6
+ 1 | llm | Qwen2Model | 1.3 B | train
7
+ 2 | lm_head | Linear | 233 M | train
8
+ 3 | embed_tokens | Embedding | 233 M | train
9
+ 4 | perception | AudioPerceptionModule | 122 M | train
10
+ 5 | speech_generation | TransformerARSpeechDecoder | 288 M | train
11
+ 6 | embed_audio_tokens | ModuleList | 40.9 M | train
12
+ 7 | audio_head | Linear | 40.9 M | train
13
+ --------------------------------------------------------------------------
14
+ 2.0 B Trainable params
15
+ 87.5 M Non-trainable params
16
+ 2.1 B Total params
17
+ 8,397.045 Total estimated model params size (MB)
18
+ 1141 Modules in train mode
19
+ 2018 Modules in eval mode
dummy_nemo_duplex/qwen_1b/run_0/nemo_error_log.txt ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-26 23:14:55 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-26 23:14:59 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /export/fs06/ahussei6/nvidia/github/NeMo/examples/sp ...
5
+
6
+ [NeMo W 2026-04-26 23:14:59 exp_manager:1079] Exp_manager is logging to /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b, but it already exists.
7
+ [NeMo W 2026-04-26 23:14:59 exp_manager:997] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :/export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/checkpoints. Training from scratch.
8
+ [NeMo W 2026-04-26 23:14:59 exp_manager:1388] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 100000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
9
+ [NeMo W 2026-04-26 23:15:02 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
10
+ Train config :
11
+ dataset:
12
+ dataset_type: tarred_vocoder
13
+ dataset_args:
14
+ dataset_meta:
15
+ lindy:
16
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/train_manifest.json
17
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/audio_{0..3}.tar
18
+ emma:
19
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/train_manifest.json
20
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_{0..3}.tar
21
+ libritts_r:
22
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/train_manifest.json
23
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/audio_{0..48}.tar
24
+ sample_rate: 22050
25
+ n_samples: 24696
26
+ min_duration: 0.4
27
+ max_duration: null
28
+ audio_augmentator_config:
29
+ impulse:
30
+ prob: 0.5
31
+ rng: 42
32
+ manifest_path: /lustre/fsw/portfolios/convai/projects/convai_convaird_nemo-speech/data/ROOM_Response_and_Noise_Database/processed/rir.json
33
+ normalize_impulse: false
34
+ shift_impulse: true
35
+ codec:
36
+ prob: 0.5
37
+ rng: 42
38
+ format_params:
39
+ - format: mp3
40
+ prob: 0.5
41
+ bit_rates:
42
+ - 16000
43
+ - 32000
44
+ - 64000
45
+ - 128000
46
+ - format: ogg
47
+ prob: 0.075
48
+ encoder: vorbis
49
+ bit_rates:
50
+ - 32000
51
+ - 48000
52
+ - 64000
53
+ - format: ogg
54
+ prob: 0.375
55
+ encoder: opus
56
+ bit_rates:
57
+ - 8000
58
+ - 16000
59
+ - 32000
60
+ - 64000
61
+ - 128000
62
+ - format: wav
63
+ prob: 0.025
64
+ encoder: pcm_alaw
65
+ bit_rates:
66
+ - 64000
67
+ - format: wav
68
+ prob: 0.025
69
+ encoder: pcm_mulaw
70
+ bit_rates:
71
+ - 64000
72
+ shard_strategy: replicate
73
+ sample_type: weighted_random
74
+ sample_args:
75
+ batch_size: 32
76
+ steps_per_epoch: 233
77
+ dataset_weights:
78
+ - 0.2
79
+ - 0.2
80
+ - 0.6
81
+ shuffle_n: 10000
82
+ dataloader_params:
83
+ batch_size: 32
84
+ drop_last: true
85
+ num_workers: 4
86
+
87
+ [NeMo W 2026-04-26 23:15:02 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
88
+ Validation config :
89
+ dataset:
90
+ dataset_type: vocoder
91
+ dataset_args:
92
+ sample_rate: 22050
93
+ n_samples: null
94
+ min_duration: null
95
+ max_duration: null
96
+ trunc_duration: 10.0
97
+ dataset_meta:
98
+ lindy:
99
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/val_manifest.json
100
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/audio_22khz
101
+ emma:
102
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/val_manifest.json
103
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_22khz
104
+ libritts_r:
105
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/val_manifest.json
106
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/audio_22khz
107
+ dataloader_params:
108
+ batch_size: 4
109
+ num_workers: 2
110
+
111
+ [NeMo W 2026-04-26 23:15:19 duplex_s2s_speech_decoder_model:148] Tokenizer does not have a `bos_token`. Setting it to '<|im_start|>'.
112
+ [NeMo W 2026-04-26 23:15:36 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
113
+ Train config :
114
+ manifest_filepath:
115
+ - - /raid/local//bucket1/tarred_audio_manifest.json
116
+ - - /raid/local//bucket2/tarred_audio_manifest.json
117
+ - - /raid/local//bucket3/tarred_audio_manifest.json
118
+ - - /raid/local//bucket4/tarred_audio_manifest.json
119
+ - - /raid/local//bucket5/tarred_audio_manifest.json
120
+ - - /raid/local//bucket6/tarred_audio_manifest.json
121
+ - - /raid/local//bucket7/tarred_audio_manifest.json
122
+ - - /raid/local//bucket8/tarred_audio_manifest.json
123
+ sample_rate: 16000
124
+ batch_size: 1
125
+ shuffle: true
126
+ num_workers: 4
127
+ pin_memory: true
128
+ use_start_end_token: false
129
+ trim_silence: false
130
+ max_duration: 25
131
+ min_duration: 0.1
132
+ is_tarred: true
133
+ tarred_audio_filepaths:
134
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket1/audio__OP_0..8191_CL_.tar
135
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket2/audio__OP_0..8191_CL_.tar
136
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket3/audio__OP_0..8191_CL_.tar
137
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket4/audio__OP_0..8191_CL_.tar
138
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket5/audio__OP_0..8191_CL_.tar
139
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket6/audio__OP_0..8191_CL_.tar
140
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket7/audio__OP_0..8191_CL_.tar
141
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket8/audio__OP_0..8191_CL_.tar
142
+ shuffle_n: 2048
143
+ bucketing_strategy: fully_randomized
144
+ bucketing_batch_size:
145
+ - 72
146
+ - 64
147
+ - 56
148
+ - 48
149
+ - 40
150
+ - 32
151
+ - 24
152
+ - 16
153
+
154
+ [NeMo W 2026-04-26 23:15:36 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
155
+ Validation config :
156
+ manifest_filepath:
157
+ - /manifests/librispeech/librivox-dev-other.json
158
+ - /manifests/librispeech/librivox-dev-clean.json
159
+ - /manifests/librispeech/librivox-test-other.json
160
+ - /manifests/librispeech/librivox-test-clean.json
161
+ sample_rate: 16000
162
+ batch_size: 16
163
+ shuffle: false
164
+ num_workers: 8
165
+ pin_memory: true
166
+ use_start_end_token: false
167
+
168
+ [NeMo W 2026-04-26 23:15:36 modelPT:202] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).
169
+ Test config :
170
+ manifest_filepath:
171
+ - /manifests/librispeech/librivox-dev-other.json
172
+ - /manifests/librispeech/librivox-dev-clean.json
173
+ - /manifests/librispeech/librivox-test-other.json
174
+ - /manifests/librispeech/librivox-test-clean.json
175
+ sample_rate: 16000
176
+ batch_size: 16
177
+ shuffle: false
178
+ num_workers: 8
179
+ pin_memory: true
180
+ use_start_end_token: false
181
+
182
+ [NeMo W 2026-04-26 23:15:39 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
183
+ Train config :
184
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
185
+ sample_rate: 16000
186
+ labels: null
187
+ batch_size: 64
188
+ shuffle: true
189
+ is_tarred: false
190
+ tarred_audio_filepaths: null
191
+ tarred_shard_strategy: scatter
192
+ augmentor:
193
+ noise:
194
+ manifest_path: /manifests/noise/rir_noise_manifest.json
195
+ prob: 0.5
196
+ min_snr_db: 0
197
+ max_snr_db: 15
198
+ speed:
199
+ prob: 0.5
200
+ sr: 16000
201
+ resample_type: kaiser_fast
202
+ min_speed_rate: 0.95
203
+ max_speed_rate: 1.05
204
+ num_workers: 15
205
+ pin_memory: true
206
+
207
+ [NeMo W 2026-04-26 23:15:39 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
208
+ Validation config :
209
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
210
+ sample_rate: 16000
211
+ labels: null
212
+ batch_size: 128
213
+ shuffle: false
214
+ num_workers: 15
215
+ pin_memory: true
216
+
217
+ [NeMo W 2026-04-26 23:17:10 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /export/fs06/ahussei6/nvidia/github/NeMo/examples/sp ...
218
+
dummy_nemo_duplex/qwen_1b/run_0/nemo_log_globalrank-0_localrank-0.txt ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-26 23:14:55 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-26 23:14:59 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /export/fs06/ahussei6/nvidia/github/NeMo/examples/sp ...
5
+
6
+ [NeMo I 2026-04-26 23:14:59 exp_manager:574] ExpManager schema
7
+ [NeMo I 2026-04-26 23:14:59 exp_manager:575] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1, 'multistorageclient_enabled': False}, 'create_early_stopping_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
8
+ [NeMo W 2026-04-26 23:14:59 exp_manager:1079] Exp_manager is logging to /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b, but it already exists.
9
+ [NeMo W 2026-04-26 23:14:59 exp_manager:997] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :/export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b/checkpoints. Training from scratch.
10
+ [NeMo I 2026-04-26 23:14:59 exp_manager:635] Experiments will be logged at /export/fs06/ahussei6/nvidia/ara_duplex_test/qwen_1b
11
+ [NeMo W 2026-04-26 23:14:59 exp_manager:1388] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 100000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
12
+ [NeMo I 2026-04-26 23:14:59 exp_manager:780] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
13
+ [NeMo W 2026-04-26 23:15:02 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
14
+ Train config :
15
+ dataset:
16
+ dataset_type: tarred_vocoder
17
+ dataset_args:
18
+ dataset_meta:
19
+ lindy:
20
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/train_manifest.json
21
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/tarred_audio/audio_{0..3}.tar
22
+ emma:
23
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/train_manifest.json
24
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_{0..3}.tar
25
+ libritts_r:
26
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/train_manifest.json
27
+ tar_filepath: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/tarred_audio/audio_{0..48}.tar
28
+ sample_rate: 22050
29
+ n_samples: 24696
30
+ min_duration: 0.4
31
+ max_duration: null
32
+ audio_augmentator_config:
33
+ impulse:
34
+ prob: 0.5
35
+ rng: 42
36
+ manifest_path: /lustre/fsw/portfolios/convai/projects/convai_convaird_nemo-speech/data/ROOM_Response_and_Noise_Database/processed/rir.json
37
+ normalize_impulse: false
38
+ shift_impulse: true
39
+ codec:
40
+ prob: 0.5
41
+ rng: 42
42
+ format_params:
43
+ - format: mp3
44
+ prob: 0.5
45
+ bit_rates:
46
+ - 16000
47
+ - 32000
48
+ - 64000
49
+ - 128000
50
+ - format: ogg
51
+ prob: 0.075
52
+ encoder: vorbis
53
+ bit_rates:
54
+ - 32000
55
+ - 48000
56
+ - 64000
57
+ - format: ogg
58
+ prob: 0.375
59
+ encoder: opus
60
+ bit_rates:
61
+ - 8000
62
+ - 16000
63
+ - 32000
64
+ - 64000
65
+ - 128000
66
+ - format: wav
67
+ prob: 0.025
68
+ encoder: pcm_alaw
69
+ bit_rates:
70
+ - 64000
71
+ - format: wav
72
+ prob: 0.025
73
+ encoder: pcm_mulaw
74
+ bit_rates:
75
+ - 64000
76
+ shard_strategy: replicate
77
+ sample_type: weighted_random
78
+ sample_args:
79
+ batch_size: 32
80
+ steps_per_epoch: 233
81
+ dataset_weights:
82
+ - 0.2
83
+ - 0.2
84
+ - 0.6
85
+ shuffle_n: 10000
86
+ dataloader_params:
87
+ batch_size: 32
88
+ drop_last: true
89
+ num_workers: 4
90
+
91
+ [NeMo W 2026-04-26 23:15:02 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
92
+ Validation config :
93
+ dataset:
94
+ dataset_type: vocoder
95
+ dataset_args:
96
+ sample_rate: 22050
97
+ n_samples: null
98
+ min_duration: null
99
+ max_duration: null
100
+ trunc_duration: 10.0
101
+ dataset_meta:
102
+ lindy:
103
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/val_manifest.json
104
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/lindy/22khz/audio_22khz
105
+ emma:
106
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/val_manifest.json
107
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/Blackwell-Demo/emma_audio_tar/tarred_audio_22khz/audio_22khz
108
+ libritts_r:
109
+ manifest_path: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/val_manifest.json
110
+ audio_dir: /lustre/fsw/portfolios/llmservice/projects/llmservice_nemo_speechlm/data/TTS/libritts_r/22khz/audio_22khz
111
+ dataloader_params:
112
+ batch_size: 4
113
+ num_workers: 2
114
+
115
+ [NeMo I 2026-04-26 23:15:02 audio_codec:101] Vector quantizer does not support commit loss.
116
+ [NeMo I 2026-04-26 23:15:14 features:305] PADDING: 1
117
+ [NeMo I 2026-04-26 23:15:18 features:305] PADDING: 1
118
+ [NeMo I 2026-04-26 23:15:18 features:305] PADDING: 1
119
+ [NeMo I 2026-04-26 23:15:18 features:305] PADDING: 1
120
+ [NeMo I 2026-04-26 23:15:18 features:305] PADDING: 1
121
+ [NeMo I 2026-04-26 23:15:18 features:305] PADDING: 1
122
+ [NeMo I 2026-04-26 23:15:18 save_restore_connector:283] Model AudioCodecModel was successfully restored from /export/fs06/ahussei6/nvidia/pretrained_models/nano_codec/Low_Frame-rate_Speech_Codec++.nemo.
123
+ [NeMo W 2026-04-26 23:15:19 duplex_s2s_speech_decoder_model:148] Tokenizer does not have a `bos_token`. Setting it to '<|im_start|>'.
124
+ [NeMo I 2026-04-26 23:15:35 mixins:181] Tokenizer SentencePieceTokenizer initialized with 1024 tokens
125
+ [NeMo W 2026-04-26 23:15:36 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
126
+ Train config :
127
+ manifest_filepath:
128
+ - - /raid/local//bucket1/tarred_audio_manifest.json
129
+ - - /raid/local//bucket2/tarred_audio_manifest.json
130
+ - - /raid/local//bucket3/tarred_audio_manifest.json
131
+ - - /raid/local//bucket4/tarred_audio_manifest.json
132
+ - - /raid/local//bucket5/tarred_audio_manifest.json
133
+ - - /raid/local//bucket6/tarred_audio_manifest.json
134
+ - - /raid/local//bucket7/tarred_audio_manifest.json
135
+ - - /raid/local//bucket8/tarred_audio_manifest.json
136
+ sample_rate: 16000
137
+ batch_size: 1
138
+ shuffle: true
139
+ num_workers: 4
140
+ pin_memory: true
141
+ use_start_end_token: false
142
+ trim_silence: false
143
+ max_duration: 25
144
+ min_duration: 0.1
145
+ is_tarred: true
146
+ tarred_audio_filepaths:
147
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket1/audio__OP_0..8191_CL_.tar
148
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket2/audio__OP_0..8191_CL_.tar
149
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket3/audio__OP_0..8191_CL_.tar
150
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket4/audio__OP_0..8191_CL_.tar
151
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket5/audio__OP_0..8191_CL_.tar
152
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket6/audio__OP_0..8191_CL_.tar
153
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket7/audio__OP_0..8191_CL_.tar
154
+ - - /data2/nemo_asr/nemo_asr_set_3.0//bucket8/audio__OP_0..8191_CL_.tar
155
+ shuffle_n: 2048
156
+ bucketing_strategy: fully_randomized
157
+ bucketing_batch_size:
158
+ - 72
159
+ - 64
160
+ - 56
161
+ - 48
162
+ - 40
163
+ - 32
164
+ - 24
165
+ - 16
166
+
167
+ [NeMo W 2026-04-26 23:15:36 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
168
+ Validation config :
169
+ manifest_filepath:
170
+ - /manifests/librispeech/librivox-dev-other.json
171
+ - /manifests/librispeech/librivox-dev-clean.json
172
+ - /manifests/librispeech/librivox-test-other.json
173
+ - /manifests/librispeech/librivox-test-clean.json
174
+ sample_rate: 16000
175
+ batch_size: 16
176
+ shuffle: false
177
+ num_workers: 8
178
+ pin_memory: true
179
+ use_start_end_token: false
180
+
181
+ [NeMo W 2026-04-26 23:15:36 modelPT:202] Please call the ModelPT.setup_test_data() or ModelPT.setup_multiple_test_data() method and provide a valid configuration file to setup the test data loader(s).
182
+ Test config :
183
+ manifest_filepath:
184
+ - /manifests/librispeech/librivox-dev-other.json
185
+ - /manifests/librispeech/librivox-dev-clean.json
186
+ - /manifests/librispeech/librivox-test-other.json
187
+ - /manifests/librispeech/librivox-test-clean.json
188
+ sample_rate: 16000
189
+ batch_size: 16
190
+ shuffle: false
191
+ num_workers: 8
192
+ pin_memory: true
193
+ use_start_end_token: false
194
+
195
+ [NeMo I 2026-04-26 23:15:36 features:305] PADDING: 0
196
+ [NeMo I 2026-04-26 23:15:36 rnnt_models:226] Using RNNT Loss : warprnnt_numba
197
+ Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.005, 'clamp': -1.0}
198
+ [NeMo I 2026-04-26 23:15:36 rnnt_models:226] Using RNNT Loss : warprnnt_numba
199
+ Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.005, 'clamp': -1.0}
200
+ [NeMo I 2026-04-26 23:15:37 rnnt_models:226] Using RNNT Loss : warprnnt_numba
201
+ Loss warprnnt_numba_kwargs: {'fastemit_lambda': 0.005, 'clamp': -1.0}
202
+ [NeMo I 2026-04-26 23:15:38 save_restore_connector:283] Model EncDecHybridRNNTCTCBPEModel was successfully restored from /export/fs06/ahussei6/nvidia/pretrained_models/asr/stt_en_fastconformer_hybrid_large_streaming_multi_v1.20.0/stt_en_fastconformer_hybrid_large_streaming_multi.nemo.
203
+ [NeMo I 2026-04-26 23:15:38 features:305] PADDING: 0
204
+ [NeMo I 2026-04-26 23:15:38 cloud:58] Found existing object /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
205
+ [NeMo I 2026-04-26 23:15:38 cloud:64] Re-using file from: /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo
206
+ [NeMo I 2026-04-26 23:15:38 common:827] Instantiating model from pre-trained checkpoint
207
+ [NeMo W 2026-04-26 23:15:39 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
208
+ Train config :
209
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
210
+ sample_rate: 16000
211
+ labels: null
212
+ batch_size: 64
213
+ shuffle: true
214
+ is_tarred: false
215
+ tarred_audio_filepaths: null
216
+ tarred_shard_strategy: scatter
217
+ augmentor:
218
+ noise:
219
+ manifest_path: /manifests/noise/rir_noise_manifest.json
220
+ prob: 0.5
221
+ min_snr_db: 0
222
+ max_snr_db: 15
223
+ speed:
224
+ prob: 0.5
225
+ sr: 16000
226
+ resample_type: kaiser_fast
227
+ min_speed_rate: 0.95
228
+ max_speed_rate: 1.05
229
+ num_workers: 15
230
+ pin_memory: true
231
+
232
+ [NeMo W 2026-04-26 23:15:39 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
233
+ Validation config :
234
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
235
+ sample_rate: 16000
236
+ labels: null
237
+ batch_size: 128
238
+ shuffle: false
239
+ num_workers: 15
240
+ pin_memory: true
241
+
242
+ [NeMo I 2026-04-26 23:15:39 features:305] PADDING: 16
243
+ [NeMo I 2026-04-26 23:15:39 save_restore_connector:283] Model EncDecSpeakerLabelModel was successfully restored from /export/fs06/ahussei6/nvidia/cache/HFCACHE/titanet-l/11ba0924fdf87c049e339adbf6899d48/titanet-l.nemo.
244
+ [NeMo I 2026-04-26 23:17:09 pretrained:110] | > 237 / 237 layers are restored.
245
+ [NeMo W 2026-04-26 23:17:10 nemo_logging:361] /home/ahussein/miniconda3/envs/nemo/lib/python3.10/site-packages/lightning/fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /export/fs06/ahussei6/nvidia/github/NeMo/examples/sp ...
246
+
247
+ [NeMo I 2026-04-26 23:17:10 optim_setup:136] Parameters | trainable=2011762688 (95.83%) | total=2099261314