Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +54 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/lightning_logs.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/nemo_error_log.txt +45 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/nemo_log_globalrank-0_localrank-0.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/lightning_logs.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/nemo_error_log.txt +45 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/nemo_log_globalrank-0_localrank-0.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/lightning_logs.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/nemo_error_log.txt +45 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/nemo_log_globalrank-0_localrank-0.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/lightning_logs.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/nemo_error_log.txt +45 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/nemo_log_globalrank-0_localrank-0.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/lightning_logs.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/nemo_error_log.txt +45 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/nemo_log_globalrank-0_localrank-0.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/events.out.tfevents.1775305282.d12a7902a35c.540.0 +3 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/hparams.yaml +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/lightning_logs.txt +19 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/nemo_error_log.txt +48 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/nemo_log_globalrank-0_localrank-0.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/events.out.tfevents.1775305542.d12a7902a35c.1101.0 +3 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/hparams.yaml +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/lightning_logs.txt +19 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/nemo_error_log.txt +49 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/nemo_log_globalrank-0_localrank-0.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/lightning_logs.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/nemo_error_log.txt +4 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/nemo_log_globalrank-0_localrank-0.txt +106 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/events.out.tfevents.1775305842.d12a7902a35c.1768.0 +3 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/hparams.yaml +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/lightning_logs.txt +19 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/nemo_error_log.txt +48 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/nemo_log_globalrank-0_localrank-0.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/lightning_logs.txt +0 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/nemo_error_log.txt +40 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/nemo_log_globalrank-0_localrank-0.txt +143 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/cmd-args.log +1 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/events.out.tfevents.1775306597.371eaa8bcdbe.372.0 +3 -0
- nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/hparams.yaml +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,57 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
nemo_experiments/Speech_To_Text_Finetuning/2026-04-09_09-06-28/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
nemo_experiments/Speech_To_Text_Finetuning/checkpoints/Speech_To_Text_Finetuning.nemo filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
nemo_experiments/Speech_To_Text_Finetuning/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_0/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_1/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_10/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_2/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_3/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_4/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_5/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_6/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_7/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_8/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_9/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
nemo_experiments/Speech_To_Text_Finetuning/run_9/nemo_log_globalrank-0_localrank-0.txt filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-13-43/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-21-07/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-32-14/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-33-32/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-34-13/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-52-34/checkpoints/Speech_To_Text_Phase2.nemo filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-52-34/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_13-04-20/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_13-28-11/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
nemo_experiments/Speech_To_Text_Phase2/2026-04-09_13-29-07/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
nemo_experiments/Speech_To_Text_Phase2/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
nemo_experiments/Speech_To_Text_Phase2/run_0/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
nemo_experiments/Speech_To_Text_Phase2/run_1/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
nemo_experiments/Speech_To_Text_Phase2/run_2/git-info.log filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
nemo_experiments/wandb/run-20260404_122123-2026-04-04_12-20-06/run-2026-04-04_12-20-06.wandb filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
nemo_experiments/wandb/run-20260404_122542-2026-04-04_12-24-26/run-2026-04-04_12-24-26.wandb filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
nemo_experiments/wandb/run-20260404_123043-2026-04-04_12-29-27/run-2026-04-04_12-29-27.wandb filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
nemo_experiments/wandb/run-20260404_124317-2026-04-04_12-42-01/run-2026-04-04_12-42-01.wandb filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
nemo_experiments/wandb/run-20260404_125341-2026-04-04_12-49-00/run-2026-04-04_12-49-00.wandb filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
nemo_experiments/wandb/run-20260408_114122-0jtbmf55/run-0jtbmf55.wandb filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
nemo_experiments/wandb/run-20260408_120315-fl4jp2jy/run-fl4jp2jy.wandb filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
nemo_experiments/wandb/run-20260408_142513-1q7swtxr/run-1q7swtxr.wandb filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
nemo_experiments/wandb/run-20260408_152645-22lidhir/run-22lidhir.wandb filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
nemo_experiments/wandb/run-20260408_171021-jszyom5l/run-jszyom5l.wandb filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
nemo_experiments/wandb/run-20260408_171313-xy9wfyvz/run-xy9wfyvz.wandb filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
nemo_experiments/wandb/run-20260408_181741-0xkhtecb/run-0xkhtecb.wandb filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
nemo_experiments/wandb/run-20260408_185350-vp5mr58h/run-vp5mr58h.wandb filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
nemo_experiments/wandb/run-20260408_202234-pyv7fhv7/files/output.log filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
nemo_experiments/wandb/run-20260408_202234-pyv7fhv7/run-pyv7fhv7.wandb filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
nemo_experiments/wandb/run-20260409_083758-ick94apk/run-ick94apk.wandb filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
nemo_experiments/wandb/run-20260409_090414-6q8ded8v/run-6q8ded8v.wandb filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
nemo_experiments/wandb/run-20260409_090749-2026-04-09_09-06-28/run-2026-04-09_09-06-28.wandb filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
nemo_experiments/wandb/run-20260409_092141-2026-04-09_09-21-07/run-2026-04-09_09-21-07.wandb filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
nemo_experiments/wandb/run-20260409_093449-2026-04-09_09-34-13/run-2026-04-09_09-34-13.wandb filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
nemo_experiments/wandb/run-20260409_095308-2026-04-09_09-52-34/run-2026-04-09_09-52-34.wandb filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
nemo_experiments/wandb/run-20260409_130455-2026-04-09_13-04-20/run-2026-04-09_13-04-20.wandb filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
nemo_experiments/wandb/run-20260409_130627-dyzduaki/run-dyzduaki.wandb filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
nemo_experiments/wandb/run-20260409_131150-50lfjpsn/run-50lfjpsn.wandb filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
nemo_experiments/wandb/run-20260409_133028-2026-04-09_13-29-07/run-2026-04-09_13-29-07.wandb filter=lfs diff=lfs merge=lfs -text
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/lightning_logs.txt
ADDED
|
File without changes
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 11:18:02 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 11:18:03 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 5 |
+
[NeMo W 2026-04-04 11:23:01 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 6 |
+
Train config :
|
| 7 |
+
use_lhotse: true
|
| 8 |
+
skip_missing_manifest_entries: true
|
| 9 |
+
input_cfg: null
|
| 10 |
+
tarred_audio_filepaths: null
|
| 11 |
+
manifest_filepath: null
|
| 12 |
+
sample_rate: 16000
|
| 13 |
+
shuffle: true
|
| 14 |
+
num_workers: 2
|
| 15 |
+
pin_memory: true
|
| 16 |
+
max_duration: 10.0
|
| 17 |
+
min_duration: 1.0
|
| 18 |
+
text_field: answer
|
| 19 |
+
batch_duration: null
|
| 20 |
+
max_tps: null
|
| 21 |
+
use_bucketing: true
|
| 22 |
+
bucket_duration_bins: null
|
| 23 |
+
bucket_batch_size: null
|
| 24 |
+
num_buckets: 30
|
| 25 |
+
bucket_buffer_size: 20000
|
| 26 |
+
shuffle_buffer_size: 10000
|
| 27 |
+
|
| 28 |
+
[NeMo W 2026-04-04 11:23:01 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 29 |
+
Validation config :
|
| 30 |
+
use_lhotse: true
|
| 31 |
+
manifest_filepath: null
|
| 32 |
+
sample_rate: 16000
|
| 33 |
+
batch_size: 16
|
| 34 |
+
shuffle: false
|
| 35 |
+
max_duration: 40.0
|
| 36 |
+
min_duration: 0.1
|
| 37 |
+
num_workers: 2
|
| 38 |
+
pin_memory: true
|
| 39 |
+
text_field: answer
|
| 40 |
+
|
| 41 |
+
[NeMo W 2026-04-04 11:23:05 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
|
| 42 |
+
[NeMo W 2026-04-04 11:23:05 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
|
| 43 |
+
[NeMo W 2026-04-04 11:23:05 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
|
| 44 |
+
[NeMo W 2026-04-04 11:23:09 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
|
| 45 |
+
[NeMo W 2026-04-04 11:23:10 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: allow_missing_data,is_tarred
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/lightning_logs.txt
ADDED
|
File without changes
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 11:42:26 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 11:42:27 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 5 |
+
[NeMo W 2026-04-04 11:46:53 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 6 |
+
Train config :
|
| 7 |
+
use_lhotse: true
|
| 8 |
+
skip_missing_manifest_entries: true
|
| 9 |
+
input_cfg: null
|
| 10 |
+
tarred_audio_filepaths: null
|
| 11 |
+
manifest_filepath: null
|
| 12 |
+
sample_rate: 16000
|
| 13 |
+
shuffle: true
|
| 14 |
+
num_workers: 2
|
| 15 |
+
pin_memory: true
|
| 16 |
+
max_duration: 10.0
|
| 17 |
+
min_duration: 1.0
|
| 18 |
+
text_field: answer
|
| 19 |
+
batch_duration: null
|
| 20 |
+
max_tps: null
|
| 21 |
+
use_bucketing: true
|
| 22 |
+
bucket_duration_bins: null
|
| 23 |
+
bucket_batch_size: null
|
| 24 |
+
num_buckets: 30
|
| 25 |
+
bucket_buffer_size: 20000
|
| 26 |
+
shuffle_buffer_size: 10000
|
| 27 |
+
|
| 28 |
+
[NeMo W 2026-04-04 11:46:53 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 29 |
+
Validation config :
|
| 30 |
+
use_lhotse: true
|
| 31 |
+
manifest_filepath: null
|
| 32 |
+
sample_rate: 16000
|
| 33 |
+
batch_size: 16
|
| 34 |
+
shuffle: false
|
| 35 |
+
max_duration: 40.0
|
| 36 |
+
min_duration: 0.1
|
| 37 |
+
num_workers: 2
|
| 38 |
+
pin_memory: true
|
| 39 |
+
text_field: answer
|
| 40 |
+
|
| 41 |
+
[NeMo W 2026-04-04 11:46:57 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
|
| 42 |
+
[NeMo W 2026-04-04 11:46:57 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
|
| 43 |
+
[NeMo W 2026-04-04 11:46:57 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
|
| 44 |
+
[NeMo W 2026-04-04 11:47:01 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
|
| 45 |
+
[NeMo W 2026-04-04 11:47:02 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: allow_missing_data,is_tarred
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/lightning_logs.txt
ADDED
|
File without changes
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 11:48:34 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 11:48:34 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 5 |
+
[NeMo W 2026-04-04 11:49:38 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 6 |
+
Train config :
|
| 7 |
+
use_lhotse: true
|
| 8 |
+
skip_missing_manifest_entries: true
|
| 9 |
+
input_cfg: null
|
| 10 |
+
tarred_audio_filepaths: null
|
| 11 |
+
manifest_filepath: null
|
| 12 |
+
sample_rate: 16000
|
| 13 |
+
shuffle: true
|
| 14 |
+
num_workers: 2
|
| 15 |
+
pin_memory: true
|
| 16 |
+
max_duration: 10.0
|
| 17 |
+
min_duration: 1.0
|
| 18 |
+
text_field: answer
|
| 19 |
+
batch_duration: null
|
| 20 |
+
max_tps: null
|
| 21 |
+
use_bucketing: true
|
| 22 |
+
bucket_duration_bins: null
|
| 23 |
+
bucket_batch_size: null
|
| 24 |
+
num_buckets: 30
|
| 25 |
+
bucket_buffer_size: 20000
|
| 26 |
+
shuffle_buffer_size: 10000
|
| 27 |
+
|
| 28 |
+
[NeMo W 2026-04-04 11:49:38 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 29 |
+
Validation config :
|
| 30 |
+
use_lhotse: true
|
| 31 |
+
manifest_filepath: null
|
| 32 |
+
sample_rate: 16000
|
| 33 |
+
batch_size: 16
|
| 34 |
+
shuffle: false
|
| 35 |
+
max_duration: 40.0
|
| 36 |
+
min_duration: 0.1
|
| 37 |
+
num_workers: 2
|
| 38 |
+
pin_memory: true
|
| 39 |
+
text_field: answer
|
| 40 |
+
|
| 41 |
+
[NeMo W 2026-04-04 11:49:42 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
|
| 42 |
+
[NeMo W 2026-04-04 11:49:42 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
|
| 43 |
+
[NeMo W 2026-04-04 11:49:42 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
|
| 44 |
+
[NeMo W 2026-04-04 11:49:46 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
|
| 45 |
+
[NeMo W 2026-04-04 11:49:47 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: allow_missing_data,is_tarred
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/lightning_logs.txt
ADDED
|
File without changes
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 11:54:04 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 11:54:05 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 5 |
+
[NeMo W 2026-04-04 11:55:09 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 6 |
+
Train config :
|
| 7 |
+
use_lhotse: true
|
| 8 |
+
skip_missing_manifest_entries: true
|
| 9 |
+
input_cfg: null
|
| 10 |
+
tarred_audio_filepaths: null
|
| 11 |
+
manifest_filepath: null
|
| 12 |
+
sample_rate: 16000
|
| 13 |
+
shuffle: true
|
| 14 |
+
num_workers: 2
|
| 15 |
+
pin_memory: true
|
| 16 |
+
max_duration: 10.0
|
| 17 |
+
min_duration: 1.0
|
| 18 |
+
text_field: answer
|
| 19 |
+
batch_duration: null
|
| 20 |
+
max_tps: null
|
| 21 |
+
use_bucketing: true
|
| 22 |
+
bucket_duration_bins: null
|
| 23 |
+
bucket_batch_size: null
|
| 24 |
+
num_buckets: 30
|
| 25 |
+
bucket_buffer_size: 20000
|
| 26 |
+
shuffle_buffer_size: 10000
|
| 27 |
+
|
| 28 |
+
[NeMo W 2026-04-04 11:55:09 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 29 |
+
Validation config :
|
| 30 |
+
use_lhotse: true
|
| 31 |
+
manifest_filepath: null
|
| 32 |
+
sample_rate: 16000
|
| 33 |
+
batch_size: 16
|
| 34 |
+
shuffle: false
|
| 35 |
+
max_duration: 40.0
|
| 36 |
+
min_duration: 0.1
|
| 37 |
+
num_workers: 2
|
| 38 |
+
pin_memory: true
|
| 39 |
+
text_field: answer
|
| 40 |
+
|
| 41 |
+
[NeMo W 2026-04-04 11:55:12 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
|
| 42 |
+
[NeMo W 2026-04-04 11:55:12 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
|
| 43 |
+
[NeMo W 2026-04-04 11:55:12 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
|
| 44 |
+
[NeMo W 2026-04-04 11:55:16 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
|
| 45 |
+
[NeMo W 2026-04-04 11:55:17 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: is_tarred,allow_missing_data
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/lightning_logs.txt
ADDED
|
File without changes
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 12:12:11 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 12:12:11 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 5 |
+
[NeMo W 2026-04-04 12:16:38 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 6 |
+
Train config :
|
| 7 |
+
use_lhotse: true
|
| 8 |
+
skip_missing_manifest_entries: true
|
| 9 |
+
input_cfg: null
|
| 10 |
+
tarred_audio_filepaths: null
|
| 11 |
+
manifest_filepath: null
|
| 12 |
+
sample_rate: 16000
|
| 13 |
+
shuffle: true
|
| 14 |
+
num_workers: 2
|
| 15 |
+
pin_memory: true
|
| 16 |
+
max_duration: 10.0
|
| 17 |
+
min_duration: 1.0
|
| 18 |
+
text_field: answer
|
| 19 |
+
batch_duration: null
|
| 20 |
+
max_tps: null
|
| 21 |
+
use_bucketing: true
|
| 22 |
+
bucket_duration_bins: null
|
| 23 |
+
bucket_batch_size: null
|
| 24 |
+
num_buckets: 30
|
| 25 |
+
bucket_buffer_size: 20000
|
| 26 |
+
shuffle_buffer_size: 10000
|
| 27 |
+
|
| 28 |
+
[NeMo W 2026-04-04 12:16:38 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 29 |
+
Validation config :
|
| 30 |
+
use_lhotse: true
|
| 31 |
+
manifest_filepath: null
|
| 32 |
+
sample_rate: 16000
|
| 33 |
+
batch_size: 16
|
| 34 |
+
shuffle: false
|
| 35 |
+
max_duration: 40.0
|
| 36 |
+
min_duration: 0.1
|
| 37 |
+
num_workers: 2
|
| 38 |
+
pin_memory: true
|
| 39 |
+
text_field: answer
|
| 40 |
+
|
| 41 |
+
[NeMo W 2026-04-04 12:16:41 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
|
| 42 |
+
[NeMo W 2026-04-04 12:16:41 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
|
| 43 |
+
[NeMo W 2026-04-04 12:16:41 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
|
| 44 |
+
[NeMo W 2026-04-04 12:16:46 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
|
| 45 |
+
[NeMo W 2026-04-04 12:16:47 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: is_tarred,allow_missing_data
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/events.out.tfevents.1775305282.d12a7902a35c.540.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:274e9dfb7ac1204275503319612ed5da784fcf5001dd5ff78a1bbeaa47750342
|
| 3 |
+
size 626485
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/hparams.yaml
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/lightning_logs.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
|
| 2 |
+
|
| 3 |
+
| Name | Type | Params | Mode
|
| 4 |
+
--------------------------------------------------------------------------------
|
| 5 |
+
0 | preprocessor | AudioToMelSpectrogramPreprocessor | 0 | train
|
| 6 |
+
1 | encoder | ConformerEncoder | 608 M | train
|
| 7 |
+
2 | spec_augmentation | SpectrogramAugmentation | 0 | train
|
| 8 |
+
3 | wer | WER | 0 | train
|
| 9 |
+
4 | joint | RNNTJoint | 22.1 M | train
|
| 10 |
+
5 | decoder | RNNTDecoder | 27.5 M | train
|
| 11 |
+
6 | loss | RNNTLoss | 0 | train
|
| 12 |
+
7 | spec_augment | SpectrogramAugmentation | 0 | train
|
| 13 |
+
--------------------------------------------------------------------------------
|
| 14 |
+
658 M Trainable params
|
| 15 |
+
0 Non-trainable params
|
| 16 |
+
658 M Total params
|
| 17 |
+
2,633.960 Total estimated model params size (MB)
|
| 18 |
+
708 Modules in train mode
|
| 19 |
+
0 Modules in eval mode
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 12:20:06 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 12:20:06 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 5 |
+
[NeMo W 2026-04-04 12:21:10 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 6 |
+
Train config :
|
| 7 |
+
use_lhotse: true
|
| 8 |
+
skip_missing_manifest_entries: true
|
| 9 |
+
input_cfg: null
|
| 10 |
+
tarred_audio_filepaths: null
|
| 11 |
+
manifest_filepath: null
|
| 12 |
+
sample_rate: 16000
|
| 13 |
+
shuffle: true
|
| 14 |
+
num_workers: 2
|
| 15 |
+
pin_memory: true
|
| 16 |
+
max_duration: 10.0
|
| 17 |
+
min_duration: 1.0
|
| 18 |
+
text_field: answer
|
| 19 |
+
batch_duration: null
|
| 20 |
+
max_tps: null
|
| 21 |
+
use_bucketing: true
|
| 22 |
+
bucket_duration_bins: null
|
| 23 |
+
bucket_batch_size: null
|
| 24 |
+
num_buckets: 30
|
| 25 |
+
bucket_buffer_size: 20000
|
| 26 |
+
shuffle_buffer_size: 10000
|
| 27 |
+
|
| 28 |
+
[NeMo W 2026-04-04 12:21:10 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 29 |
+
Validation config :
|
| 30 |
+
use_lhotse: true
|
| 31 |
+
manifest_filepath: null
|
| 32 |
+
sample_rate: 16000
|
| 33 |
+
batch_size: 16
|
| 34 |
+
shuffle: false
|
| 35 |
+
max_duration: 40.0
|
| 36 |
+
min_duration: 0.1
|
| 37 |
+
num_workers: 2
|
| 38 |
+
pin_memory: true
|
| 39 |
+
text_field: answer
|
| 40 |
+
|
| 41 |
+
[NeMo W 2026-04-04 12:21:14 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
|
| 42 |
+
[NeMo W 2026-04-04 12:21:14 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
|
| 43 |
+
[NeMo W 2026-04-04 12:21:14 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
|
| 44 |
+
[NeMo W 2026-04-04 12:21:18 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
|
| 45 |
+
[NeMo W 2026-04-04 12:21:19 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: allow_missing_data,is_tarred
|
| 46 |
+
[NeMo W 2026-04-04 12:21:21 audio_to_text_dataset:833] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}
|
| 47 |
+
[NeMo W 2026-04-04 12:21:27 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
|
| 48 |
+
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/events.out.tfevents.1775305542.d12a7902a35c.1101.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e78d7b83a7c4161e082ac67c8fc8d0d3081eaef37e5cd333c5017ed9926aac6
|
| 3 |
+
size 626485
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/hparams.yaml
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/lightning_logs.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
|
| 2 |
+
|
| 3 |
+
| Name | Type | Params | Mode
|
| 4 |
+
--------------------------------------------------------------------------------
|
| 5 |
+
0 | preprocessor | AudioToMelSpectrogramPreprocessor | 0 | train
|
| 6 |
+
1 | encoder | ConformerEncoder | 608 M | train
|
| 7 |
+
2 | spec_augmentation | SpectrogramAugmentation | 0 | train
|
| 8 |
+
3 | wer | WER | 0 | train
|
| 9 |
+
4 | joint | RNNTJoint | 22.1 M | train
|
| 10 |
+
5 | decoder | RNNTDecoder | 27.5 M | train
|
| 11 |
+
6 | loss | RNNTLoss | 0 | train
|
| 12 |
+
7 | spec_augment | SpectrogramAugmentation | 0 | train
|
| 13 |
+
--------------------------------------------------------------------------------
|
| 14 |
+
658 M Trainable params
|
| 15 |
+
0 Non-trainable params
|
| 16 |
+
658 M Total params
|
| 17 |
+
2,633.960 Total estimated model params size (MB)
|
| 18 |
+
708 Modules in train mode
|
| 19 |
+
0 Modules in eval mode
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 12:24:26 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 12:24:26 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 5 |
+
[NeMo W 2026-04-04 12:25:30 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 6 |
+
Train config :
|
| 7 |
+
use_lhotse: true
|
| 8 |
+
skip_missing_manifest_entries: true
|
| 9 |
+
input_cfg: null
|
| 10 |
+
tarred_audio_filepaths: null
|
| 11 |
+
manifest_filepath: null
|
| 12 |
+
sample_rate: 16000
|
| 13 |
+
shuffle: true
|
| 14 |
+
num_workers: 2
|
| 15 |
+
pin_memory: true
|
| 16 |
+
max_duration: 10.0
|
| 17 |
+
min_duration: 1.0
|
| 18 |
+
text_field: answer
|
| 19 |
+
batch_duration: null
|
| 20 |
+
max_tps: null
|
| 21 |
+
use_bucketing: true
|
| 22 |
+
bucket_duration_bins: null
|
| 23 |
+
bucket_batch_size: null
|
| 24 |
+
num_buckets: 30
|
| 25 |
+
bucket_buffer_size: 20000
|
| 26 |
+
shuffle_buffer_size: 10000
|
| 27 |
+
|
| 28 |
+
[NeMo W 2026-04-04 12:25:30 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 29 |
+
Validation config :
|
| 30 |
+
use_lhotse: true
|
| 31 |
+
manifest_filepath: null
|
| 32 |
+
sample_rate: 16000
|
| 33 |
+
batch_size: 16
|
| 34 |
+
shuffle: false
|
| 35 |
+
max_duration: 40.0
|
| 36 |
+
min_duration: 0.1
|
| 37 |
+
num_workers: 2
|
| 38 |
+
pin_memory: true
|
| 39 |
+
text_field: answer
|
| 40 |
+
|
| 41 |
+
[NeMo W 2026-04-04 12:25:33 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
|
| 42 |
+
[NeMo W 2026-04-04 12:25:33 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
|
| 43 |
+
[NeMo W 2026-04-04 12:25:33 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
|
| 44 |
+
[NeMo W 2026-04-04 12:25:38 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
|
| 45 |
+
[NeMo W 2026-04-04 12:25:38 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: is_tarred,allow_missing_data
|
| 46 |
+
[NeMo W 2026-04-04 12:25:40 audio_to_text_dataset:833] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}
|
| 47 |
+
[NeMo W 2026-04-04 12:25:46 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
|
| 48 |
+
|
| 49 |
+
[NeMo W 2026-04-04 12:25:51 audio_preprocessing:85] AudioPreprocessor received an input signal of dtype torch.bfloat16, rather than torch.float32. In sweeps across multiple datasets, we have found that the preprocessor is not robust to low precision mathematics. As such, it runs in float32. Your input will be cast to float32, but this is not necessarily enough to recovery full accuracy. For example, simply casting input_signal from torch.float32 to torch.bfloat16, then back to torch.float32 before running AudioPreprocessor causes drops in absolute WER of up to 0.1%. torch.bfloat16 simply does not have enough mantissa bits to represent enough values in the range [-1.0,+1.0] correctly.
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/lightning_logs.txt
ADDED
|
File without changes
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 12:27:59 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 12:28:00 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 12:27:59 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo I 2026-04-04 12:27:59 speech_to_text_finetune:198] Hydra config: name: Speech_To_Text_Finetuning
|
| 5 |
+
init_from_pretrained_model: nvidia/parakeet-tdt-0.6b-v3
|
| 6 |
+
model:
|
| 7 |
+
sample_rate: 16000
|
| 8 |
+
activation_checkpointing: true
|
| 9 |
+
train_ds:
|
| 10 |
+
shar_path: hf://buckets/Tyl3rDrden/IvritKnessetRecordingsData
|
| 11 |
+
use_lhotse: true
|
| 12 |
+
is_tarred: true
|
| 13 |
+
batch_size: 1
|
| 14 |
+
allow_missing_data: true
|
| 15 |
+
shuffle: false
|
| 16 |
+
max_duration: 30.1
|
| 17 |
+
num_workers: 8
|
| 18 |
+
pin_memory: true
|
| 19 |
+
validation_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
batch_size: 16
|
| 23 |
+
shuffle: false
|
| 24 |
+
use_start_end_token: false
|
| 25 |
+
num_workers: 8
|
| 26 |
+
pin_memory: true
|
| 27 |
+
test_ds:
|
| 28 |
+
manifest_filepath: null
|
| 29 |
+
sample_rate: ${model.sample_rate}
|
| 30 |
+
batch_size: 16
|
| 31 |
+
shuffle: false
|
| 32 |
+
use_start_end_token: false
|
| 33 |
+
num_workers: 8
|
| 34 |
+
pin_memory: true
|
| 35 |
+
char_labels:
|
| 36 |
+
update_labels: false
|
| 37 |
+
labels: null
|
| 38 |
+
tokenizer:
|
| 39 |
+
update_tokenizer: true
|
| 40 |
+
dir: ./tokenizer_spe_bpe_v32768_pad_bos_eos
|
| 41 |
+
type: bpe
|
| 42 |
+
spec_augment:
|
| 43 |
+
_target_: nemo.collections.asr.modules.SpectrogramAugmentation
|
| 44 |
+
freq_masks: 2
|
| 45 |
+
time_masks: 10
|
| 46 |
+
freq_width: 27
|
| 47 |
+
time_width: 0.05
|
| 48 |
+
optim:
|
| 49 |
+
name: adamw
|
| 50 |
+
lr: 0.0001
|
| 51 |
+
betas:
|
| 52 |
+
- 0.9
|
| 53 |
+
- 0.98
|
| 54 |
+
weight_decay: 0.001
|
| 55 |
+
sched:
|
| 56 |
+
name: CosineAnnealing
|
| 57 |
+
warmup_steps: 5000
|
| 58 |
+
warmup_ratio: null
|
| 59 |
+
min_lr: 5.0e-06
|
| 60 |
+
trainer:
|
| 61 |
+
devices: 1
|
| 62 |
+
num_nodes: 1
|
| 63 |
+
max_epochs: -1
|
| 64 |
+
max_steps: 1000000
|
| 65 |
+
val_check_interval: 2000
|
| 66 |
+
accelerator: gpu
|
| 67 |
+
strategy:
|
| 68 |
+
_target_: lightning.pytorch.strategies.DDPStrategy
|
| 69 |
+
gradient_as_bucket_view: true
|
| 70 |
+
accumulate_grad_batches: 16
|
| 71 |
+
gradient_clip_val: 0.0
|
| 72 |
+
precision: bf16
|
| 73 |
+
log_every_n_steps: 10
|
| 74 |
+
enable_progress_bar: true
|
| 75 |
+
num_sanity_val_steps: 0
|
| 76 |
+
check_val_every_n_epoch: null
|
| 77 |
+
sync_batchnorm: true
|
| 78 |
+
enable_checkpointing: false
|
| 79 |
+
logger: false
|
| 80 |
+
benchmark: false
|
| 81 |
+
limit_train_batches: null
|
| 82 |
+
exp_manager:
|
| 83 |
+
exp_dir: null
|
| 84 |
+
name: ${name}
|
| 85 |
+
create_tensorboard_logger: true
|
| 86 |
+
create_checkpoint_callback: true
|
| 87 |
+
checkpoint_callback_params:
|
| 88 |
+
monitor: val_wer
|
| 89 |
+
mode: min
|
| 90 |
+
save_top_k: 5
|
| 91 |
+
always_save_nemo: true
|
| 92 |
+
resume_if_exists: false
|
| 93 |
+
resume_ignore_no_checkpoint: false
|
| 94 |
+
create_wandb_logger: true
|
| 95 |
+
wandb_logger_kwargs:
|
| 96 |
+
name: parakeet_v3_finetune_fixed
|
| 97 |
+
project: parakeet-hebrew-asr
|
| 98 |
+
|
| 99 |
+
[NeMo I 2026-04-04 12:27:59 exp_manager:594] ExpManager schema
|
| 100 |
+
[NeMo I 2026-04-04 12:27:59 exp_manager:595] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'run_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1}, 'create_early_stopping_callback': False, 'create_ipl_epoch_stopper_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'ipl_epoch_stopper_callback_params': {'enable_stop': True, 'stop_every_n_epochs': 1}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
|
| 101 |
+
[NeMo I 2026-04-04 12:27:59 exp_manager:655] Experiments will be logged at /workspace/nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59
|
| 102 |
+
[NeMo I 2026-04-04 12:28:00 exp_manager:1262] TensorboardLogger has been set up
|
| 103 |
+
[NeMo I 2026-04-04 12:28:00 exp_manager:1277] WandBLogger has been set up
|
| 104 |
+
[NeMo W 2026-04-04 12:28:00 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 105 |
+
[NeMo I 2026-04-04 12:28:00 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 106 |
+
[NeMo I 2026-04-04 12:28:00 speech_to_text_finetune:106] Sleeping for at least 60 seconds to wait for model download to finish.
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/events.out.tfevents.1775305842.d12a7902a35c.1768.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:139ab44e948c263b99dc14d04c326745db4f67c834e9a84e841f1e97593e3923
|
| 3 |
+
size 626485
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/hparams.yaml
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/lightning_logs.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
|
| 2 |
+
|
| 3 |
+
| Name | Type | Params | Mode
|
| 4 |
+
--------------------------------------------------------------------------------
|
| 5 |
+
0 | preprocessor | AudioToMelSpectrogramPreprocessor | 0 | train
|
| 6 |
+
1 | encoder | ConformerEncoder | 608 M | train
|
| 7 |
+
2 | spec_augmentation | SpectrogramAugmentation | 0 | train
|
| 8 |
+
3 | wer | WER | 0 | train
|
| 9 |
+
4 | joint | RNNTJoint | 22.1 M | train
|
| 10 |
+
5 | decoder | RNNTDecoder | 27.5 M | train
|
| 11 |
+
6 | loss | RNNTLoss | 0 | train
|
| 12 |
+
7 | spec_augment | SpectrogramAugmentation | 0 | train
|
| 13 |
+
--------------------------------------------------------------------------------
|
| 14 |
+
658 M Trainable params
|
| 15 |
+
0 Non-trainable params
|
| 16 |
+
658 M Total params
|
| 17 |
+
2,633.960 Total estimated model params size (MB)
|
| 18 |
+
708 Modules in train mode
|
| 19 |
+
0 Modules in eval mode
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 12:29:26 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 12:29:27 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 5 |
+
[NeMo W 2026-04-04 12:30:31 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 6 |
+
Train config :
|
| 7 |
+
use_lhotse: true
|
| 8 |
+
skip_missing_manifest_entries: true
|
| 9 |
+
input_cfg: null
|
| 10 |
+
tarred_audio_filepaths: null
|
| 11 |
+
manifest_filepath: null
|
| 12 |
+
sample_rate: 16000
|
| 13 |
+
shuffle: true
|
| 14 |
+
num_workers: 2
|
| 15 |
+
pin_memory: true
|
| 16 |
+
max_duration: 10.0
|
| 17 |
+
min_duration: 1.0
|
| 18 |
+
text_field: answer
|
| 19 |
+
batch_duration: null
|
| 20 |
+
max_tps: null
|
| 21 |
+
use_bucketing: true
|
| 22 |
+
bucket_duration_bins: null
|
| 23 |
+
bucket_batch_size: null
|
| 24 |
+
num_buckets: 30
|
| 25 |
+
bucket_buffer_size: 20000
|
| 26 |
+
shuffle_buffer_size: 10000
|
| 27 |
+
|
| 28 |
+
[NeMo W 2026-04-04 12:30:31 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 29 |
+
Validation config :
|
| 30 |
+
use_lhotse: true
|
| 31 |
+
manifest_filepath: null
|
| 32 |
+
sample_rate: 16000
|
| 33 |
+
batch_size: 16
|
| 34 |
+
shuffle: false
|
| 35 |
+
max_duration: 40.0
|
| 36 |
+
min_duration: 0.1
|
| 37 |
+
num_workers: 2
|
| 38 |
+
pin_memory: true
|
| 39 |
+
text_field: answer
|
| 40 |
+
|
| 41 |
+
[NeMo W 2026-04-04 12:30:34 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
|
| 42 |
+
[NeMo W 2026-04-04 12:30:34 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
|
| 43 |
+
[NeMo W 2026-04-04 12:30:34 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
|
| 44 |
+
[NeMo W 2026-04-04 12:30:39 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
|
| 45 |
+
[NeMo W 2026-04-04 12:30:39 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: is_tarred,allow_missing_data
|
| 46 |
+
[NeMo W 2026-04-04 12:30:41 audio_to_text_dataset:833] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}
|
| 47 |
+
[NeMo W 2026-04-04 12:30:47 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
|
| 48 |
+
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/lightning_logs.txt
ADDED
|
File without changes
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 12:37:16 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo W 2026-04-04 12:37:16 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 5 |
+
[NeMo W 2026-04-04 12:41:44 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 6 |
+
Train config :
|
| 7 |
+
use_lhotse: true
|
| 8 |
+
skip_missing_manifest_entries: true
|
| 9 |
+
input_cfg: null
|
| 10 |
+
tarred_audio_filepaths: null
|
| 11 |
+
manifest_filepath: null
|
| 12 |
+
sample_rate: 16000
|
| 13 |
+
shuffle: true
|
| 14 |
+
num_workers: 2
|
| 15 |
+
pin_memory: true
|
| 16 |
+
max_duration: 10.0
|
| 17 |
+
min_duration: 1.0
|
| 18 |
+
text_field: answer
|
| 19 |
+
batch_duration: null
|
| 20 |
+
max_tps: null
|
| 21 |
+
use_bucketing: true
|
| 22 |
+
bucket_duration_bins: null
|
| 23 |
+
bucket_batch_size: null
|
| 24 |
+
num_buckets: 30
|
| 25 |
+
bucket_buffer_size: 20000
|
| 26 |
+
shuffle_buffer_size: 10000
|
| 27 |
+
|
| 28 |
+
[NeMo W 2026-04-04 12:41:44 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 29 |
+
Validation config :
|
| 30 |
+
use_lhotse: true
|
| 31 |
+
manifest_filepath: null
|
| 32 |
+
sample_rate: 16000
|
| 33 |
+
batch_size: 16
|
| 34 |
+
shuffle: false
|
| 35 |
+
max_duration: 40.0
|
| 36 |
+
min_duration: 0.1
|
| 37 |
+
num_workers: 2
|
| 38 |
+
pin_memory: true
|
| 39 |
+
text_field: answer
|
| 40 |
+
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-04 12:37:16 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
|
| 2 |
+
warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
|
| 3 |
+
|
| 4 |
+
[NeMo I 2026-04-04 12:37:16 speech_to_text_finetune:198] Hydra config: name: Speech_To_Text_Finetuning
|
| 5 |
+
init_from_pretrained_model: nvidia/parakeet-tdt-0.6b-v3
|
| 6 |
+
model:
|
| 7 |
+
sample_rate: 16000
|
| 8 |
+
activation_checkpointing: true
|
| 9 |
+
train_ds:
|
| 10 |
+
shar_path: hf://buckets/Tyl3rDrden/IvritKnessetRecordingsData
|
| 11 |
+
use_lhotse: true
|
| 12 |
+
is_tarred: true
|
| 13 |
+
batch_size: 1
|
| 14 |
+
allow_missing_data: true
|
| 15 |
+
shuffle: false
|
| 16 |
+
max_duration: 30.1
|
| 17 |
+
num_workers: 8
|
| 18 |
+
pin_memory: true
|
| 19 |
+
validation_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
batch_size: 16
|
| 23 |
+
shuffle: false
|
| 24 |
+
use_start_end_token: false
|
| 25 |
+
num_workers: 8
|
| 26 |
+
pin_memory: true
|
| 27 |
+
test_ds:
|
| 28 |
+
manifest_filepath: null
|
| 29 |
+
sample_rate: ${model.sample_rate}
|
| 30 |
+
batch_size: 16
|
| 31 |
+
shuffle: false
|
| 32 |
+
use_start_end_token: false
|
| 33 |
+
num_workers: 8
|
| 34 |
+
pin_memory: true
|
| 35 |
+
char_labels:
|
| 36 |
+
update_labels: false
|
| 37 |
+
labels: null
|
| 38 |
+
tokenizer:
|
| 39 |
+
update_tokenizer: true
|
| 40 |
+
dir: ./tokenizer_spe_bpe_v32768_pad_bos_eos
|
| 41 |
+
type: bpe
|
| 42 |
+
spec_augment:
|
| 43 |
+
_target_: nemo.collections.asr.modules.SpectrogramAugmentation
|
| 44 |
+
freq_masks: 2
|
| 45 |
+
time_masks: 10
|
| 46 |
+
freq_width: 27
|
| 47 |
+
time_width: 0.05
|
| 48 |
+
optim:
|
| 49 |
+
name: adamw
|
| 50 |
+
lr: 0.0001
|
| 51 |
+
betas:
|
| 52 |
+
- 0.9
|
| 53 |
+
- 0.98
|
| 54 |
+
weight_decay: 0.001
|
| 55 |
+
sched:
|
| 56 |
+
name: CosineAnnealing
|
| 57 |
+
warmup_steps: 5000
|
| 58 |
+
warmup_ratio: null
|
| 59 |
+
min_lr: 5.0e-06
|
| 60 |
+
trainer:
|
| 61 |
+
devices: 1
|
| 62 |
+
num_nodes: 1
|
| 63 |
+
max_epochs: -1
|
| 64 |
+
max_steps: 1000000
|
| 65 |
+
val_check_interval: 2000
|
| 66 |
+
accelerator: gpu
|
| 67 |
+
strategy:
|
| 68 |
+
_target_: lightning.pytorch.strategies.DDPStrategy
|
| 69 |
+
gradient_as_bucket_view: true
|
| 70 |
+
accumulate_grad_batches: 16
|
| 71 |
+
gradient_clip_val: 0.0
|
| 72 |
+
precision: bf16
|
| 73 |
+
log_every_n_steps: 10
|
| 74 |
+
enable_progress_bar: true
|
| 75 |
+
num_sanity_val_steps: 0
|
| 76 |
+
check_val_every_n_epoch: null
|
| 77 |
+
sync_batchnorm: true
|
| 78 |
+
enable_checkpointing: false
|
| 79 |
+
logger: false
|
| 80 |
+
benchmark: false
|
| 81 |
+
limit_train_batches: null
|
| 82 |
+
exp_manager:
|
| 83 |
+
exp_dir: null
|
| 84 |
+
name: ${name}
|
| 85 |
+
create_tensorboard_logger: true
|
| 86 |
+
create_checkpoint_callback: true
|
| 87 |
+
checkpoint_callback_params:
|
| 88 |
+
monitor: val_wer
|
| 89 |
+
mode: min
|
| 90 |
+
save_top_k: 5
|
| 91 |
+
always_save_nemo: true
|
| 92 |
+
resume_if_exists: false
|
| 93 |
+
resume_ignore_no_checkpoint: false
|
| 94 |
+
create_wandb_logger: true
|
| 95 |
+
wandb_logger_kwargs:
|
| 96 |
+
name: parakeet_v3_finetune_fixed
|
| 97 |
+
project: parakeet-hebrew-asr
|
| 98 |
+
|
| 99 |
+
[NeMo I 2026-04-04 12:37:16 exp_manager:594] ExpManager schema
|
| 100 |
+
[NeMo I 2026-04-04 12:37:16 exp_manager:595] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'run_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1}, 'create_early_stopping_callback': False, 'create_ipl_epoch_stopper_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'ipl_epoch_stopper_callback_params': {'enable_stop': True, 'stop_every_n_epochs': 1}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
|
| 101 |
+
[NeMo I 2026-04-04 12:37:16 exp_manager:655] Experiments will be logged at /workspace/nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16
|
| 102 |
+
[NeMo I 2026-04-04 12:37:16 exp_manager:1262] TensorboardLogger has been set up
|
| 103 |
+
[NeMo I 2026-04-04 12:37:16 exp_manager:1277] WandBLogger has been set up
|
| 104 |
+
[NeMo W 2026-04-04 12:37:16 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
|
| 105 |
+
[NeMo I 2026-04-04 12:37:16 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 106 |
+
[NeMo I 2026-04-04 12:37:17 speech_to_text_finetune:106] Sleeping for at least 60 seconds to wait for model download to finish.
|
| 107 |
+
[NeMo I 2026-04-04 12:41:43 mixins:184] Tokenizer SentencePieceTokenizer initialized with 8192 tokens
|
| 108 |
+
[NeMo W 2026-04-04 12:41:44 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
|
| 109 |
+
Train config :
|
| 110 |
+
use_lhotse: true
|
| 111 |
+
skip_missing_manifest_entries: true
|
| 112 |
+
input_cfg: null
|
| 113 |
+
tarred_audio_filepaths: null
|
| 114 |
+
manifest_filepath: null
|
| 115 |
+
sample_rate: 16000
|
| 116 |
+
shuffle: true
|
| 117 |
+
num_workers: 2
|
| 118 |
+
pin_memory: true
|
| 119 |
+
max_duration: 10.0
|
| 120 |
+
min_duration: 1.0
|
| 121 |
+
text_field: answer
|
| 122 |
+
batch_duration: null
|
| 123 |
+
max_tps: null
|
| 124 |
+
use_bucketing: true
|
| 125 |
+
bucket_duration_bins: null
|
| 126 |
+
bucket_batch_size: null
|
| 127 |
+
num_buckets: 30
|
| 128 |
+
bucket_buffer_size: 20000
|
| 129 |
+
shuffle_buffer_size: 10000
|
| 130 |
+
|
| 131 |
+
[NeMo W 2026-04-04 12:41:44 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
|
| 132 |
+
Validation config :
|
| 133 |
+
use_lhotse: true
|
| 134 |
+
manifest_filepath: null
|
| 135 |
+
sample_rate: 16000
|
| 136 |
+
batch_size: 16
|
| 137 |
+
shuffle: false
|
| 138 |
+
max_duration: 40.0
|
| 139 |
+
min_duration: 0.1
|
| 140 |
+
num_workers: 2
|
| 141 |
+
pin_memory: true
|
| 142 |
+
text_field: answer
|
| 143 |
+
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/events.out.tfevents.1775306597.371eaa8bcdbe.372.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59663ec21cc56d873f79139f8096a37a029259323d95a283c610907f70a06dff
|
| 3 |
+
size 626485
|
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/hparams.yaml
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|