Tyl3rDrden commited on
Commit
a67cb12
·
verified ·
1 Parent(s): 7d456c1

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +54 -0
  2. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/cmd-args.log +1 -0
  3. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/lightning_logs.txt +0 -0
  4. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/nemo_error_log.txt +45 -0
  5. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/nemo_log_globalrank-0_localrank-0.txt +0 -0
  6. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/cmd-args.log +1 -0
  7. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/lightning_logs.txt +0 -0
  8. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/nemo_error_log.txt +45 -0
  9. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/nemo_log_globalrank-0_localrank-0.txt +0 -0
  10. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/cmd-args.log +1 -0
  11. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/lightning_logs.txt +0 -0
  12. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/nemo_error_log.txt +45 -0
  13. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/nemo_log_globalrank-0_localrank-0.txt +0 -0
  14. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/cmd-args.log +1 -0
  15. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/lightning_logs.txt +0 -0
  16. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/nemo_error_log.txt +45 -0
  17. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/nemo_log_globalrank-0_localrank-0.txt +0 -0
  18. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/cmd-args.log +1 -0
  19. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/lightning_logs.txt +0 -0
  20. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/nemo_error_log.txt +45 -0
  21. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/nemo_log_globalrank-0_localrank-0.txt +0 -0
  22. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/cmd-args.log +1 -0
  23. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/events.out.tfevents.1775305282.d12a7902a35c.540.0 +3 -0
  24. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/hparams.yaml +0 -0
  25. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/lightning_logs.txt +19 -0
  26. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/nemo_error_log.txt +48 -0
  27. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/nemo_log_globalrank-0_localrank-0.txt +0 -0
  28. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/cmd-args.log +1 -0
  29. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/events.out.tfevents.1775305542.d12a7902a35c.1101.0 +3 -0
  30. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/hparams.yaml +0 -0
  31. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/lightning_logs.txt +19 -0
  32. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/nemo_error_log.txt +49 -0
  33. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/nemo_log_globalrank-0_localrank-0.txt +0 -0
  34. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/cmd-args.log +1 -0
  35. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/lightning_logs.txt +0 -0
  36. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/nemo_error_log.txt +4 -0
  37. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/nemo_log_globalrank-0_localrank-0.txt +106 -0
  38. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/cmd-args.log +1 -0
  39. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/events.out.tfevents.1775305842.d12a7902a35c.1768.0 +3 -0
  40. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/hparams.yaml +0 -0
  41. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/lightning_logs.txt +19 -0
  42. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/nemo_error_log.txt +48 -0
  43. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/nemo_log_globalrank-0_localrank-0.txt +0 -0
  44. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/cmd-args.log +1 -0
  45. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/lightning_logs.txt +0 -0
  46. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/nemo_error_log.txt +40 -0
  47. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/nemo_log_globalrank-0_localrank-0.txt +143 -0
  48. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/cmd-args.log +1 -0
  49. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/events.out.tfevents.1775306597.371eaa8bcdbe.372.0 +3 -0
  50. nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/hparams.yaml +0 -0
.gitattributes CHANGED
@@ -33,3 +33,57 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ nemo_experiments/Speech_To_Text_Finetuning/2026-04-09_09-06-28/git-info.log filter=lfs diff=lfs merge=lfs -text
37
+ nemo_experiments/Speech_To_Text_Finetuning/checkpoints/Speech_To_Text_Finetuning.nemo filter=lfs diff=lfs merge=lfs -text
38
+ nemo_experiments/Speech_To_Text_Finetuning/git-info.log filter=lfs diff=lfs merge=lfs -text
39
+ nemo_experiments/Speech_To_Text_Finetuning/run_0/git-info.log filter=lfs diff=lfs merge=lfs -text
40
+ nemo_experiments/Speech_To_Text_Finetuning/run_1/git-info.log filter=lfs diff=lfs merge=lfs -text
41
+ nemo_experiments/Speech_To_Text_Finetuning/run_10/git-info.log filter=lfs diff=lfs merge=lfs -text
42
+ nemo_experiments/Speech_To_Text_Finetuning/run_2/git-info.log filter=lfs diff=lfs merge=lfs -text
43
+ nemo_experiments/Speech_To_Text_Finetuning/run_3/git-info.log filter=lfs diff=lfs merge=lfs -text
44
+ nemo_experiments/Speech_To_Text_Finetuning/run_4/git-info.log filter=lfs diff=lfs merge=lfs -text
45
+ nemo_experiments/Speech_To_Text_Finetuning/run_5/git-info.log filter=lfs diff=lfs merge=lfs -text
46
+ nemo_experiments/Speech_To_Text_Finetuning/run_6/git-info.log filter=lfs diff=lfs merge=lfs -text
47
+ nemo_experiments/Speech_To_Text_Finetuning/run_7/git-info.log filter=lfs diff=lfs merge=lfs -text
48
+ nemo_experiments/Speech_To_Text_Finetuning/run_8/git-info.log filter=lfs diff=lfs merge=lfs -text
49
+ nemo_experiments/Speech_To_Text_Finetuning/run_9/git-info.log filter=lfs diff=lfs merge=lfs -text
50
+ nemo_experiments/Speech_To_Text_Finetuning/run_9/nemo_log_globalrank-0_localrank-0.txt filter=lfs diff=lfs merge=lfs -text
51
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-13-43/git-info.log filter=lfs diff=lfs merge=lfs -text
52
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-21-07/git-info.log filter=lfs diff=lfs merge=lfs -text
53
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-32-14/git-info.log filter=lfs diff=lfs merge=lfs -text
54
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-33-32/git-info.log filter=lfs diff=lfs merge=lfs -text
55
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-34-13/git-info.log filter=lfs diff=lfs merge=lfs -text
56
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-52-34/checkpoints/Speech_To_Text_Phase2.nemo filter=lfs diff=lfs merge=lfs -text
57
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_09-52-34/git-info.log filter=lfs diff=lfs merge=lfs -text
58
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_13-04-20/git-info.log filter=lfs diff=lfs merge=lfs -text
59
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_13-28-11/git-info.log filter=lfs diff=lfs merge=lfs -text
60
+ nemo_experiments/Speech_To_Text_Phase2/2026-04-09_13-29-07/git-info.log filter=lfs diff=lfs merge=lfs -text
61
+ nemo_experiments/Speech_To_Text_Phase2/git-info.log filter=lfs diff=lfs merge=lfs -text
62
+ nemo_experiments/Speech_To_Text_Phase2/run_0/git-info.log filter=lfs diff=lfs merge=lfs -text
63
+ nemo_experiments/Speech_To_Text_Phase2/run_1/git-info.log filter=lfs diff=lfs merge=lfs -text
64
+ nemo_experiments/Speech_To_Text_Phase2/run_2/git-info.log filter=lfs diff=lfs merge=lfs -text
65
+ nemo_experiments/wandb/run-20260404_122123-2026-04-04_12-20-06/run-2026-04-04_12-20-06.wandb filter=lfs diff=lfs merge=lfs -text
66
+ nemo_experiments/wandb/run-20260404_122542-2026-04-04_12-24-26/run-2026-04-04_12-24-26.wandb filter=lfs diff=lfs merge=lfs -text
67
+ nemo_experiments/wandb/run-20260404_123043-2026-04-04_12-29-27/run-2026-04-04_12-29-27.wandb filter=lfs diff=lfs merge=lfs -text
68
+ nemo_experiments/wandb/run-20260404_124317-2026-04-04_12-42-01/run-2026-04-04_12-42-01.wandb filter=lfs diff=lfs merge=lfs -text
69
+ nemo_experiments/wandb/run-20260404_125341-2026-04-04_12-49-00/run-2026-04-04_12-49-00.wandb filter=lfs diff=lfs merge=lfs -text
70
+ nemo_experiments/wandb/run-20260408_114122-0jtbmf55/run-0jtbmf55.wandb filter=lfs diff=lfs merge=lfs -text
71
+ nemo_experiments/wandb/run-20260408_120315-fl4jp2jy/run-fl4jp2jy.wandb filter=lfs diff=lfs merge=lfs -text
72
+ nemo_experiments/wandb/run-20260408_142513-1q7swtxr/run-1q7swtxr.wandb filter=lfs diff=lfs merge=lfs -text
73
+ nemo_experiments/wandb/run-20260408_152645-22lidhir/run-22lidhir.wandb filter=lfs diff=lfs merge=lfs -text
74
+ nemo_experiments/wandb/run-20260408_171021-jszyom5l/run-jszyom5l.wandb filter=lfs diff=lfs merge=lfs -text
75
+ nemo_experiments/wandb/run-20260408_171313-xy9wfyvz/run-xy9wfyvz.wandb filter=lfs diff=lfs merge=lfs -text
76
+ nemo_experiments/wandb/run-20260408_181741-0xkhtecb/run-0xkhtecb.wandb filter=lfs diff=lfs merge=lfs -text
77
+ nemo_experiments/wandb/run-20260408_185350-vp5mr58h/run-vp5mr58h.wandb filter=lfs diff=lfs merge=lfs -text
78
+ nemo_experiments/wandb/run-20260408_202234-pyv7fhv7/files/output.log filter=lfs diff=lfs merge=lfs -text
79
+ nemo_experiments/wandb/run-20260408_202234-pyv7fhv7/run-pyv7fhv7.wandb filter=lfs diff=lfs merge=lfs -text
80
+ nemo_experiments/wandb/run-20260409_083758-ick94apk/run-ick94apk.wandb filter=lfs diff=lfs merge=lfs -text
81
+ nemo_experiments/wandb/run-20260409_090414-6q8ded8v/run-6q8ded8v.wandb filter=lfs diff=lfs merge=lfs -text
82
+ nemo_experiments/wandb/run-20260409_090749-2026-04-09_09-06-28/run-2026-04-09_09-06-28.wandb filter=lfs diff=lfs merge=lfs -text
83
+ nemo_experiments/wandb/run-20260409_092141-2026-04-09_09-21-07/run-2026-04-09_09-21-07.wandb filter=lfs diff=lfs merge=lfs -text
84
+ nemo_experiments/wandb/run-20260409_093449-2026-04-09_09-34-13/run-2026-04-09_09-34-13.wandb filter=lfs diff=lfs merge=lfs -text
85
+ nemo_experiments/wandb/run-20260409_095308-2026-04-09_09-52-34/run-2026-04-09_09-52-34.wandb filter=lfs diff=lfs merge=lfs -text
86
+ nemo_experiments/wandb/run-20260409_130455-2026-04-09_13-04-20/run-2026-04-09_13-04-20.wandb filter=lfs diff=lfs merge=lfs -text
87
+ nemo_experiments/wandb/run-20260409_130627-dyzduaki/run-dyzduaki.wandb filter=lfs diff=lfs merge=lfs -text
88
+ nemo_experiments/wandb/run-20260409_131150-50lfjpsn/run-50lfjpsn.wandb filter=lfs diff=lfs merge=lfs -text
89
+ nemo_experiments/wandb/run-20260409_133028-2026-04-09_13-29-07/run-2026-04-09_13-29-07.wandb filter=lfs diff=lfs merge=lfs -text
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/lightning_logs.txt ADDED
File without changes
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/nemo_error_log.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 11:18:02 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 11:18:03 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
5
+ [NeMo W 2026-04-04 11:23:01 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
6
+ Train config :
7
+ use_lhotse: true
8
+ skip_missing_manifest_entries: true
9
+ input_cfg: null
10
+ tarred_audio_filepaths: null
11
+ manifest_filepath: null
12
+ sample_rate: 16000
13
+ shuffle: true
14
+ num_workers: 2
15
+ pin_memory: true
16
+ max_duration: 10.0
17
+ min_duration: 1.0
18
+ text_field: answer
19
+ batch_duration: null
20
+ max_tps: null
21
+ use_bucketing: true
22
+ bucket_duration_bins: null
23
+ bucket_batch_size: null
24
+ num_buckets: 30
25
+ bucket_buffer_size: 20000
26
+ shuffle_buffer_size: 10000
27
+
28
+ [NeMo W 2026-04-04 11:23:01 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
29
+ Validation config :
30
+ use_lhotse: true
31
+ manifest_filepath: null
32
+ sample_rate: 16000
33
+ batch_size: 16
34
+ shuffle: false
35
+ max_duration: 40.0
36
+ min_duration: 0.1
37
+ num_workers: 2
38
+ pin_memory: true
39
+ text_field: answer
40
+
41
+ [NeMo W 2026-04-04 11:23:05 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
42
+ [NeMo W 2026-04-04 11:23:05 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
43
+ [NeMo W 2026-04-04 11:23:05 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
44
+ [NeMo W 2026-04-04 11:23:09 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
45
+ [NeMo W 2026-04-04 11:23:10 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: allow_missing_data,is_tarred
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-18-03/nemo_log_globalrank-0_localrank-0.txt ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/lightning_logs.txt ADDED
File without changes
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/nemo_error_log.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 11:42:26 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 11:42:27 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
5
+ [NeMo W 2026-04-04 11:46:53 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
6
+ Train config :
7
+ use_lhotse: true
8
+ skip_missing_manifest_entries: true
9
+ input_cfg: null
10
+ tarred_audio_filepaths: null
11
+ manifest_filepath: null
12
+ sample_rate: 16000
13
+ shuffle: true
14
+ num_workers: 2
15
+ pin_memory: true
16
+ max_duration: 10.0
17
+ min_duration: 1.0
18
+ text_field: answer
19
+ batch_duration: null
20
+ max_tps: null
21
+ use_bucketing: true
22
+ bucket_duration_bins: null
23
+ bucket_batch_size: null
24
+ num_buckets: 30
25
+ bucket_buffer_size: 20000
26
+ shuffle_buffer_size: 10000
27
+
28
+ [NeMo W 2026-04-04 11:46:53 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
29
+ Validation config :
30
+ use_lhotse: true
31
+ manifest_filepath: null
32
+ sample_rate: 16000
33
+ batch_size: 16
34
+ shuffle: false
35
+ max_duration: 40.0
36
+ min_duration: 0.1
37
+ num_workers: 2
38
+ pin_memory: true
39
+ text_field: answer
40
+
41
+ [NeMo W 2026-04-04 11:46:57 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
42
+ [NeMo W 2026-04-04 11:46:57 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
43
+ [NeMo W 2026-04-04 11:46:57 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
44
+ [NeMo W 2026-04-04 11:47:01 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
45
+ [NeMo W 2026-04-04 11:47:02 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: allow_missing_data,is_tarred
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-42-27/nemo_log_globalrank-0_localrank-0.txt ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/lightning_logs.txt ADDED
File without changes
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/nemo_error_log.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 11:48:34 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 11:48:34 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
5
+ [NeMo W 2026-04-04 11:49:38 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
6
+ Train config :
7
+ use_lhotse: true
8
+ skip_missing_manifest_entries: true
9
+ input_cfg: null
10
+ tarred_audio_filepaths: null
11
+ manifest_filepath: null
12
+ sample_rate: 16000
13
+ shuffle: true
14
+ num_workers: 2
15
+ pin_memory: true
16
+ max_duration: 10.0
17
+ min_duration: 1.0
18
+ text_field: answer
19
+ batch_duration: null
20
+ max_tps: null
21
+ use_bucketing: true
22
+ bucket_duration_bins: null
23
+ bucket_batch_size: null
24
+ num_buckets: 30
25
+ bucket_buffer_size: 20000
26
+ shuffle_buffer_size: 10000
27
+
28
+ [NeMo W 2026-04-04 11:49:38 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
29
+ Validation config :
30
+ use_lhotse: true
31
+ manifest_filepath: null
32
+ sample_rate: 16000
33
+ batch_size: 16
34
+ shuffle: false
35
+ max_duration: 40.0
36
+ min_duration: 0.1
37
+ num_workers: 2
38
+ pin_memory: true
39
+ text_field: answer
40
+
41
+ [NeMo W 2026-04-04 11:49:42 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
42
+ [NeMo W 2026-04-04 11:49:42 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
43
+ [NeMo W 2026-04-04 11:49:42 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
44
+ [NeMo W 2026-04-04 11:49:46 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
45
+ [NeMo W 2026-04-04 11:49:47 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: allow_missing_data,is_tarred
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-48-34/nemo_log_globalrank-0_localrank-0.txt ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/lightning_logs.txt ADDED
File without changes
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/nemo_error_log.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 11:54:04 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 11:54:05 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
5
+ [NeMo W 2026-04-04 11:55:09 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
6
+ Train config :
7
+ use_lhotse: true
8
+ skip_missing_manifest_entries: true
9
+ input_cfg: null
10
+ tarred_audio_filepaths: null
11
+ manifest_filepath: null
12
+ sample_rate: 16000
13
+ shuffle: true
14
+ num_workers: 2
15
+ pin_memory: true
16
+ max_duration: 10.0
17
+ min_duration: 1.0
18
+ text_field: answer
19
+ batch_duration: null
20
+ max_tps: null
21
+ use_bucketing: true
22
+ bucket_duration_bins: null
23
+ bucket_batch_size: null
24
+ num_buckets: 30
25
+ bucket_buffer_size: 20000
26
+ shuffle_buffer_size: 10000
27
+
28
+ [NeMo W 2026-04-04 11:55:09 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
29
+ Validation config :
30
+ use_lhotse: true
31
+ manifest_filepath: null
32
+ sample_rate: 16000
33
+ batch_size: 16
34
+ shuffle: false
35
+ max_duration: 40.0
36
+ min_duration: 0.1
37
+ num_workers: 2
38
+ pin_memory: true
39
+ text_field: answer
40
+
41
+ [NeMo W 2026-04-04 11:55:12 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
42
+ [NeMo W 2026-04-04 11:55:12 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
43
+ [NeMo W 2026-04-04 11:55:12 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
44
+ [NeMo W 2026-04-04 11:55:16 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
45
+ [NeMo W 2026-04-04 11:55:17 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: is_tarred,allow_missing_data
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_11-54-05/nemo_log_globalrank-0_localrank-0.txt ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/lightning_logs.txt ADDED
File without changes
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/nemo_error_log.txt ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 12:12:11 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 12:12:11 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
5
+ [NeMo W 2026-04-04 12:16:38 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
6
+ Train config :
7
+ use_lhotse: true
8
+ skip_missing_manifest_entries: true
9
+ input_cfg: null
10
+ tarred_audio_filepaths: null
11
+ manifest_filepath: null
12
+ sample_rate: 16000
13
+ shuffle: true
14
+ num_workers: 2
15
+ pin_memory: true
16
+ max_duration: 10.0
17
+ min_duration: 1.0
18
+ text_field: answer
19
+ batch_duration: null
20
+ max_tps: null
21
+ use_bucketing: true
22
+ bucket_duration_bins: null
23
+ bucket_batch_size: null
24
+ num_buckets: 30
25
+ bucket_buffer_size: 20000
26
+ shuffle_buffer_size: 10000
27
+
28
+ [NeMo W 2026-04-04 12:16:38 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
29
+ Validation config :
30
+ use_lhotse: true
31
+ manifest_filepath: null
32
+ sample_rate: 16000
33
+ batch_size: 16
34
+ shuffle: false
35
+ max_duration: 40.0
36
+ min_duration: 0.1
37
+ num_workers: 2
38
+ pin_memory: true
39
+ text_field: answer
40
+
41
+ [NeMo W 2026-04-04 12:16:41 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
42
+ [NeMo W 2026-04-04 12:16:41 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
43
+ [NeMo W 2026-04-04 12:16:41 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
44
+ [NeMo W 2026-04-04 12:16:46 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
45
+ [NeMo W 2026-04-04 12:16:47 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: is_tarred,allow_missing_data
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-12-11/nemo_log_globalrank-0_localrank-0.txt ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/events.out.tfevents.1775305282.d12a7902a35c.540.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:274e9dfb7ac1204275503319612ed5da784fcf5001dd5ff78a1bbeaa47750342
3
+ size 626485
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/hparams.yaml ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/lightning_logs.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2
+
3
+ | Name | Type | Params | Mode
4
+ --------------------------------------------------------------------------------
5
+ 0 | preprocessor | AudioToMelSpectrogramPreprocessor | 0 | train
6
+ 1 | encoder | ConformerEncoder | 608 M | train
7
+ 2 | spec_augmentation | SpectrogramAugmentation | 0 | train
8
+ 3 | wer | WER | 0 | train
9
+ 4 | joint | RNNTJoint | 22.1 M | train
10
+ 5 | decoder | RNNTDecoder | 27.5 M | train
11
+ 6 | loss | RNNTLoss | 0 | train
12
+ 7 | spec_augment | SpectrogramAugmentation | 0 | train
13
+ --------------------------------------------------------------------------------
14
+ 658 M Trainable params
15
+ 0 Non-trainable params
16
+ 658 M Total params
17
+ 2,633.960 Total estimated model params size (MB)
18
+ 708 Modules in train mode
19
+ 0 Modules in eval mode
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/nemo_error_log.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 12:20:06 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 12:20:06 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
5
+ [NeMo W 2026-04-04 12:21:10 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
6
+ Train config :
7
+ use_lhotse: true
8
+ skip_missing_manifest_entries: true
9
+ input_cfg: null
10
+ tarred_audio_filepaths: null
11
+ manifest_filepath: null
12
+ sample_rate: 16000
13
+ shuffle: true
14
+ num_workers: 2
15
+ pin_memory: true
16
+ max_duration: 10.0
17
+ min_duration: 1.0
18
+ text_field: answer
19
+ batch_duration: null
20
+ max_tps: null
21
+ use_bucketing: true
22
+ bucket_duration_bins: null
23
+ bucket_batch_size: null
24
+ num_buckets: 30
25
+ bucket_buffer_size: 20000
26
+ shuffle_buffer_size: 10000
27
+
28
+ [NeMo W 2026-04-04 12:21:10 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
29
+ Validation config :
30
+ use_lhotse: true
31
+ manifest_filepath: null
32
+ sample_rate: 16000
33
+ batch_size: 16
34
+ shuffle: false
35
+ max_duration: 40.0
36
+ min_duration: 0.1
37
+ num_workers: 2
38
+ pin_memory: true
39
+ text_field: answer
40
+
41
+ [NeMo W 2026-04-04 12:21:14 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
42
+ [NeMo W 2026-04-04 12:21:14 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
43
+ [NeMo W 2026-04-04 12:21:14 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
44
+ [NeMo W 2026-04-04 12:21:18 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
45
+ [NeMo W 2026-04-04 12:21:19 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: allow_missing_data,is_tarred
46
+ [NeMo W 2026-04-04 12:21:21 audio_to_text_dataset:833] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}
47
+ [NeMo W 2026-04-04 12:21:27 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
48
+
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-20-06/nemo_log_globalrank-0_localrank-0.txt ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/events.out.tfevents.1775305542.d12a7902a35c.1101.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e78d7b83a7c4161e082ac67c8fc8d0d3081eaef37e5cd333c5017ed9926aac6
3
+ size 626485
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/hparams.yaml ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/lightning_logs.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2
+
3
+ | Name | Type | Params | Mode
4
+ --------------------------------------------------------------------------------
5
+ 0 | preprocessor | AudioToMelSpectrogramPreprocessor | 0 | train
6
+ 1 | encoder | ConformerEncoder | 608 M | train
7
+ 2 | spec_augmentation | SpectrogramAugmentation | 0 | train
8
+ 3 | wer | WER | 0 | train
9
+ 4 | joint | RNNTJoint | 22.1 M | train
10
+ 5 | decoder | RNNTDecoder | 27.5 M | train
11
+ 6 | loss | RNNTLoss | 0 | train
12
+ 7 | spec_augment | SpectrogramAugmentation | 0 | train
13
+ --------------------------------------------------------------------------------
14
+ 658 M Trainable params
15
+ 0 Non-trainable params
16
+ 658 M Total params
17
+ 2,633.960 Total estimated model params size (MB)
18
+ 708 Modules in train mode
19
+ 0 Modules in eval mode
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/nemo_error_log.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 12:24:26 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 12:24:26 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
5
+ [NeMo W 2026-04-04 12:25:30 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
6
+ Train config :
7
+ use_lhotse: true
8
+ skip_missing_manifest_entries: true
9
+ input_cfg: null
10
+ tarred_audio_filepaths: null
11
+ manifest_filepath: null
12
+ sample_rate: 16000
13
+ shuffle: true
14
+ num_workers: 2
15
+ pin_memory: true
16
+ max_duration: 10.0
17
+ min_duration: 1.0
18
+ text_field: answer
19
+ batch_duration: null
20
+ max_tps: null
21
+ use_bucketing: true
22
+ bucket_duration_bins: null
23
+ bucket_batch_size: null
24
+ num_buckets: 30
25
+ bucket_buffer_size: 20000
26
+ shuffle_buffer_size: 10000
27
+
28
+ [NeMo W 2026-04-04 12:25:30 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
29
+ Validation config :
30
+ use_lhotse: true
31
+ manifest_filepath: null
32
+ sample_rate: 16000
33
+ batch_size: 16
34
+ shuffle: false
35
+ max_duration: 40.0
36
+ min_duration: 0.1
37
+ num_workers: 2
38
+ pin_memory: true
39
+ text_field: answer
40
+
41
+ [NeMo W 2026-04-04 12:25:33 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
42
+ [NeMo W 2026-04-04 12:25:33 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
43
+ [NeMo W 2026-04-04 12:25:33 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
44
+ [NeMo W 2026-04-04 12:25:38 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
45
+ [NeMo W 2026-04-04 12:25:38 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: is_tarred,allow_missing_data
46
+ [NeMo W 2026-04-04 12:25:40 audio_to_text_dataset:833] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}
47
+ [NeMo W 2026-04-04 12:25:46 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
48
+
49
+ [NeMo W 2026-04-04 12:25:51 audio_preprocessing:85] AudioPreprocessor received an input signal of dtype torch.bfloat16, rather than torch.float32. In sweeps across multiple datasets, we have found that the preprocessor is not robust to low precision mathematics. As such, it runs in float32. Your input will be cast to float32, but this is not necessarily enough to recovery full accuracy. For example, simply casting input_signal from torch.float32 to torch.bfloat16, then back to torch.float32 before running AudioPreprocessor causes drops in absolute WER of up to 0.1%. torch.bfloat16 simply does not have enough mantissa bits to represent enough values in the range [-1.0,+1.0] correctly.
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-24-26/nemo_log_globalrank-0_localrank-0.txt ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/lightning_logs.txt ADDED
File without changes
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/nemo_error_log.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [NeMo W 2026-04-04 12:27:59 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 12:28:00 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59/nemo_log_globalrank-0_localrank-0.txt ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 12:27:59 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo I 2026-04-04 12:27:59 speech_to_text_finetune:198] Hydra config: name: Speech_To_Text_Finetuning
5
+ init_from_pretrained_model: nvidia/parakeet-tdt-0.6b-v3
6
+ model:
7
+ sample_rate: 16000
8
+ activation_checkpointing: true
9
+ train_ds:
10
+ shar_path: hf://buckets/Tyl3rDrden/IvritKnessetRecordingsData
11
+ use_lhotse: true
12
+ is_tarred: true
13
+ batch_size: 1
14
+ allow_missing_data: true
15
+ shuffle: false
16
+ max_duration: 30.1
17
+ num_workers: 8
18
+ pin_memory: true
19
+ validation_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ batch_size: 16
23
+ shuffle: false
24
+ use_start_end_token: false
25
+ num_workers: 8
26
+ pin_memory: true
27
+ test_ds:
28
+ manifest_filepath: null
29
+ sample_rate: ${model.sample_rate}
30
+ batch_size: 16
31
+ shuffle: false
32
+ use_start_end_token: false
33
+ num_workers: 8
34
+ pin_memory: true
35
+ char_labels:
36
+ update_labels: false
37
+ labels: null
38
+ tokenizer:
39
+ update_tokenizer: true
40
+ dir: ./tokenizer_spe_bpe_v32768_pad_bos_eos
41
+ type: bpe
42
+ spec_augment:
43
+ _target_: nemo.collections.asr.modules.SpectrogramAugmentation
44
+ freq_masks: 2
45
+ time_masks: 10
46
+ freq_width: 27
47
+ time_width: 0.05
48
+ optim:
49
+ name: adamw
50
+ lr: 0.0001
51
+ betas:
52
+ - 0.9
53
+ - 0.98
54
+ weight_decay: 0.001
55
+ sched:
56
+ name: CosineAnnealing
57
+ warmup_steps: 5000
58
+ warmup_ratio: null
59
+ min_lr: 5.0e-06
60
+ trainer:
61
+ devices: 1
62
+ num_nodes: 1
63
+ max_epochs: -1
64
+ max_steps: 1000000
65
+ val_check_interval: 2000
66
+ accelerator: gpu
67
+ strategy:
68
+ _target_: lightning.pytorch.strategies.DDPStrategy
69
+ gradient_as_bucket_view: true
70
+ accumulate_grad_batches: 16
71
+ gradient_clip_val: 0.0
72
+ precision: bf16
73
+ log_every_n_steps: 10
74
+ enable_progress_bar: true
75
+ num_sanity_val_steps: 0
76
+ check_val_every_n_epoch: null
77
+ sync_batchnorm: true
78
+ enable_checkpointing: false
79
+ logger: false
80
+ benchmark: false
81
+ limit_train_batches: null
82
+ exp_manager:
83
+ exp_dir: null
84
+ name: ${name}
85
+ create_tensorboard_logger: true
86
+ create_checkpoint_callback: true
87
+ checkpoint_callback_params:
88
+ monitor: val_wer
89
+ mode: min
90
+ save_top_k: 5
91
+ always_save_nemo: true
92
+ resume_if_exists: false
93
+ resume_ignore_no_checkpoint: false
94
+ create_wandb_logger: true
95
+ wandb_logger_kwargs:
96
+ name: parakeet_v3_finetune_fixed
97
+ project: parakeet-hebrew-asr
98
+
99
+ [NeMo I 2026-04-04 12:27:59 exp_manager:594] ExpManager schema
100
+ [NeMo I 2026-04-04 12:27:59 exp_manager:595] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'run_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1}, 'create_early_stopping_callback': False, 'create_ipl_epoch_stopper_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'ipl_epoch_stopper_callback_params': {'enable_stop': True, 'stop_every_n_epochs': 1}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
101
+ [NeMo I 2026-04-04 12:27:59 exp_manager:655] Experiments will be logged at /workspace/nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-27-59
102
+ [NeMo I 2026-04-04 12:28:00 exp_manager:1262] TensorboardLogger has been set up
103
+ [NeMo I 2026-04-04 12:28:00 exp_manager:1277] WandBLogger has been set up
104
+ [NeMo W 2026-04-04 12:28:00 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
105
+ [NeMo I 2026-04-04 12:28:00 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
106
+ [NeMo I 2026-04-04 12:28:00 speech_to_text_finetune:106] Sleeping for at least 60 seconds to wait for model download to finish.
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/events.out.tfevents.1775305842.d12a7902a35c.1768.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139ab44e948c263b99dc14d04c326745db4f67c834e9a84e841f1e97593e3923
3
+ size 626485
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/hparams.yaml ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/lightning_logs.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
2
+
3
+ | Name | Type | Params | Mode
4
+ --------------------------------------------------------------------------------
5
+ 0 | preprocessor | AudioToMelSpectrogramPreprocessor | 0 | train
6
+ 1 | encoder | ConformerEncoder | 608 M | train
7
+ 2 | spec_augmentation | SpectrogramAugmentation | 0 | train
8
+ 3 | wer | WER | 0 | train
9
+ 4 | joint | RNNTJoint | 22.1 M | train
10
+ 5 | decoder | RNNTDecoder | 27.5 M | train
11
+ 6 | loss | RNNTLoss | 0 | train
12
+ 7 | spec_augment | SpectrogramAugmentation | 0 | train
13
+ --------------------------------------------------------------------------------
14
+ 658 M Trainable params
15
+ 0 Non-trainable params
16
+ 658 M Total params
17
+ 2,633.960 Total estimated model params size (MB)
18
+ 708 Modules in train mode
19
+ 0 Modules in eval mode
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/nemo_error_log.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 12:29:26 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 12:29:27 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
5
+ [NeMo W 2026-04-04 12:30:31 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
6
+ Train config :
7
+ use_lhotse: true
8
+ skip_missing_manifest_entries: true
9
+ input_cfg: null
10
+ tarred_audio_filepaths: null
11
+ manifest_filepath: null
12
+ sample_rate: 16000
13
+ shuffle: true
14
+ num_workers: 2
15
+ pin_memory: true
16
+ max_duration: 10.0
17
+ min_duration: 1.0
18
+ text_field: answer
19
+ batch_duration: null
20
+ max_tps: null
21
+ use_bucketing: true
22
+ bucket_duration_bins: null
23
+ bucket_batch_size: null
24
+ num_buckets: 30
25
+ bucket_buffer_size: 20000
26
+ shuffle_buffer_size: 10000
27
+
28
+ [NeMo W 2026-04-04 12:30:31 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
29
+ Validation config :
30
+ use_lhotse: true
31
+ manifest_filepath: null
32
+ sample_rate: 16000
33
+ batch_size: 16
34
+ shuffle: false
35
+ max_duration: 40.0
36
+ min_duration: 0.1
37
+ num_workers: 2
38
+ pin_memory: true
39
+ text_field: answer
40
+
41
+ [NeMo W 2026-04-04 12:30:34 modelPT:300] You tried to register an artifact under config key=tokenizer.model_path but an artifact for it has already been registered.
42
+ [NeMo W 2026-04-04 12:30:34 modelPT:300] You tried to register an artifact under config key=tokenizer.vocab_path but an artifact for it has already been registered.
43
+ [NeMo W 2026-04-04 12:30:34 modelPT:300] You tried to register an artifact under config key=tokenizer.spe_tokenizer_vocab but an artifact for it has already been registered.
44
+ [NeMo W 2026-04-04 12:30:39 speech_to_text_finetune:167] The vocabulary size of the new tokenizer differs from that of the loaded model. As a result, finetuning will proceed with the new vocabulary, and the decoder will be reinitialized.
45
+ [NeMo W 2026-04-04 12:30:39 dataloader:826] The following configuration keys are ignored by Lhotse dataloader: is_tarred,allow_missing_data
46
+ [NeMo W 2026-04-04 12:30:41 audio_to_text_dataset:833] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'sample_rate': 16000, 'batch_size': 16, 'shuffle': False, 'use_start_end_token': False, 'num_workers': 8, 'pin_memory': True}
47
+ [NeMo W 2026-04-04 12:30:47 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
48
+
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-29-27/nemo_log_globalrank-0_localrank-0.txt ADDED
The diff for this file is too large to render. See raw diff
 
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/lightning_logs.txt ADDED
File without changes
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/nemo_error_log.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 12:37:16 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo W 2026-04-04 12:37:16 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
5
+ [NeMo W 2026-04-04 12:41:44 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
6
+ Train config :
7
+ use_lhotse: true
8
+ skip_missing_manifest_entries: true
9
+ input_cfg: null
10
+ tarred_audio_filepaths: null
11
+ manifest_filepath: null
12
+ sample_rate: 16000
13
+ shuffle: true
14
+ num_workers: 2
15
+ pin_memory: true
16
+ max_duration: 10.0
17
+ min_duration: 1.0
18
+ text_field: answer
19
+ batch_duration: null
20
+ max_tps: null
21
+ use_bucketing: true
22
+ bucket_duration_bins: null
23
+ bucket_batch_size: null
24
+ num_buckets: 30
25
+ bucket_buffer_size: 20000
26
+ shuffle_buffer_size: 10000
27
+
28
+ [NeMo W 2026-04-04 12:41:44 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
29
+ Validation config :
30
+ use_lhotse: true
31
+ manifest_filepath: null
32
+ sample_rate: 16000
33
+ batch_size: 16
34
+ shuffle: false
35
+ max_duration: 40.0
36
+ min_duration: 0.1
37
+ num_workers: 2
38
+ pin_memory: true
39
+ text_field: answer
40
+
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16/nemo_log_globalrank-0_localrank-0.txt ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-04 12:37:16 nemo_logging:364] /opt/venv/lib/python3.12/site-packages/pydub/utils.py:170: RuntimeWarning: Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work
2
+ warn("Couldn't find ffmpeg or avconv - defaulting to ffmpeg, but may not work", RuntimeWarning)
3
+
4
+ [NeMo I 2026-04-04 12:37:16 speech_to_text_finetune:198] Hydra config: name: Speech_To_Text_Finetuning
5
+ init_from_pretrained_model: nvidia/parakeet-tdt-0.6b-v3
6
+ model:
7
+ sample_rate: 16000
8
+ activation_checkpointing: true
9
+ train_ds:
10
+ shar_path: hf://buckets/Tyl3rDrden/IvritKnessetRecordingsData
11
+ use_lhotse: true
12
+ is_tarred: true
13
+ batch_size: 1
14
+ allow_missing_data: true
15
+ shuffle: false
16
+ max_duration: 30.1
17
+ num_workers: 8
18
+ pin_memory: true
19
+ validation_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ batch_size: 16
23
+ shuffle: false
24
+ use_start_end_token: false
25
+ num_workers: 8
26
+ pin_memory: true
27
+ test_ds:
28
+ manifest_filepath: null
29
+ sample_rate: ${model.sample_rate}
30
+ batch_size: 16
31
+ shuffle: false
32
+ use_start_end_token: false
33
+ num_workers: 8
34
+ pin_memory: true
35
+ char_labels:
36
+ update_labels: false
37
+ labels: null
38
+ tokenizer:
39
+ update_tokenizer: true
40
+ dir: ./tokenizer_spe_bpe_v32768_pad_bos_eos
41
+ type: bpe
42
+ spec_augment:
43
+ _target_: nemo.collections.asr.modules.SpectrogramAugmentation
44
+ freq_masks: 2
45
+ time_masks: 10
46
+ freq_width: 27
47
+ time_width: 0.05
48
+ optim:
49
+ name: adamw
50
+ lr: 0.0001
51
+ betas:
52
+ - 0.9
53
+ - 0.98
54
+ weight_decay: 0.001
55
+ sched:
56
+ name: CosineAnnealing
57
+ warmup_steps: 5000
58
+ warmup_ratio: null
59
+ min_lr: 5.0e-06
60
+ trainer:
61
+ devices: 1
62
+ num_nodes: 1
63
+ max_epochs: -1
64
+ max_steps: 1000000
65
+ val_check_interval: 2000
66
+ accelerator: gpu
67
+ strategy:
68
+ _target_: lightning.pytorch.strategies.DDPStrategy
69
+ gradient_as_bucket_view: true
70
+ accumulate_grad_batches: 16
71
+ gradient_clip_val: 0.0
72
+ precision: bf16
73
+ log_every_n_steps: 10
74
+ enable_progress_bar: true
75
+ num_sanity_val_steps: 0
76
+ check_val_every_n_epoch: null
77
+ sync_batchnorm: true
78
+ enable_checkpointing: false
79
+ logger: false
80
+ benchmark: false
81
+ limit_train_batches: null
82
+ exp_manager:
83
+ exp_dir: null
84
+ name: ${name}
85
+ create_tensorboard_logger: true
86
+ create_checkpoint_callback: true
87
+ checkpoint_callback_params:
88
+ monitor: val_wer
89
+ mode: min
90
+ save_top_k: 5
91
+ always_save_nemo: true
92
+ resume_if_exists: false
93
+ resume_ignore_no_checkpoint: false
94
+ create_wandb_logger: true
95
+ wandb_logger_kwargs:
96
+ name: parakeet_v3_finetune_fixed
97
+ project: parakeet-hebrew-asr
98
+
99
+ [NeMo I 2026-04-04 12:37:16 exp_manager:594] ExpManager schema
100
+ [NeMo I 2026-04-04 12:37:16 exp_manager:595] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'run_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1}, 'create_early_stopping_callback': False, 'create_ipl_epoch_stopper_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'ipl_epoch_stopper_callback_params': {'enable_stop': True, 'stop_every_n_epochs': 1}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
101
+ [NeMo I 2026-04-04 12:37:16 exp_manager:655] Experiments will be logged at /workspace/nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-37-16
102
+ [NeMo I 2026-04-04 12:37:16 exp_manager:1262] TensorboardLogger has been set up
103
+ [NeMo I 2026-04-04 12:37:16 exp_manager:1277] WandBLogger has been set up
104
+ [NeMo W 2026-04-04 12:37:16 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 1000000. Please ensure that max_steps will run for at least None epochs to ensure that checkpointing will not error out.
105
+ [NeMo I 2026-04-04 12:37:16 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
106
+ [NeMo I 2026-04-04 12:37:17 speech_to_text_finetune:106] Sleeping for at least 60 seconds to wait for model download to finish.
107
+ [NeMo I 2026-04-04 12:41:43 mixins:184] Tokenizer SentencePieceTokenizer initialized with 8192 tokens
108
+ [NeMo W 2026-04-04 12:41:44 modelPT:188] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
109
+ Train config :
110
+ use_lhotse: true
111
+ skip_missing_manifest_entries: true
112
+ input_cfg: null
113
+ tarred_audio_filepaths: null
114
+ manifest_filepath: null
115
+ sample_rate: 16000
116
+ shuffle: true
117
+ num_workers: 2
118
+ pin_memory: true
119
+ max_duration: 10.0
120
+ min_duration: 1.0
121
+ text_field: answer
122
+ batch_duration: null
123
+ max_tps: null
124
+ use_bucketing: true
125
+ bucket_duration_bins: null
126
+ bucket_batch_size: null
127
+ num_buckets: 30
128
+ bucket_buffer_size: 20000
129
+ shuffle_buffer_size: 10000
130
+
131
+ [NeMo W 2026-04-04 12:41:44 modelPT:195] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s).
132
+ Validation config :
133
+ use_lhotse: true
134
+ manifest_filepath: null
135
+ sample_rate: 16000
136
+ batch_size: 16
137
+ shuffle: false
138
+ max_duration: 40.0
139
+ min_duration: 0.1
140
+ num_workers: 2
141
+ pin_memory: true
142
+ text_field: answer
143
+
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ ./speech_to_text_finetune.py --config-path=. --config-name=speech_to_text_finetune trainer.accumulate_grad_batches=16 trainer.devices=1 trainer.accelerator=gpu +trainer.limit_train_batches=null exp_manager.create_wandb_logger=True exp_manager.wandb_logger_kwargs.name=parakeet_v3_finetune_fixed exp_manager.wandb_logger_kwargs.project=parakeet-hebrew-asr
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/events.out.tfevents.1775306597.371eaa8bcdbe.372.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59663ec21cc56d873f79139f8096a37a029259323d95a283c610907f70a06dff
3
+ size 626485
nemo_experiments/Speech_To_Text_Finetuning/2026-04-04_12-42-01/hparams.yaml ADDED
The diff for this file is too large to render. See raw diff