projecti7 commited on 28 days ago

Commit

fc58ffd

verified ·

1 Parent(s): 3c3c0db

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

log/log-train-2026-01-13-16-52-16-0 +96 -0
log/log-train-2026-01-13-16-52-16-1 +96 -0
log/log-train-2026-01-13-16-53-36-0 +107 -0
log/log-train-2026-01-13-16-53-36-1 +107 -0
log/log-train-2026-01-13-16-54-14-0 +107 -0
log/log-train-2026-01-13-16-54-14-1 +107 -0
log/log-train-2026-01-13-17-00-38-0 +107 -0
log/log-train-2026-01-13-17-00-38-1 +107 -0
log/log-train-2026-01-13-17-01-14-0 +107 -0
log/log-train-2026-01-13-17-01-14-1 +107 -0
log/log-train-2026-01-13-17-06-37-0 +169 -0
log/log-train-2026-01-13-17-06-37-1 +171 -0
tensorboard/events.out.tfevents.1768323136.6ec37ec2ba95.217.0 +3 -0
tensorboard/events.out.tfevents.1768323216.6ec37ec2ba95.324.0 +3 -0
tensorboard/events.out.tfevents.1768323254.6ec37ec2ba95.501.0 +3 -0
tensorboard/events.out.tfevents.1768323638.6ec37ec2ba95.678.0 +3 -0
tensorboard/events.out.tfevents.1768323674.6ec37ec2ba95.851.0 +3 -0
tensorboard/events.out.tfevents.1768323997.6ec37ec2ba95.1021.0 +3 -0

log/log-train-2026-01-13-16-52-16-0 ADDED Viewed

	@@ -0,0 +1,96 @@

+2026-01-13 16:52:16,624 INFO [train.py:967] (0/2) Training started
+2026-01-13 16:52:16,625 INFO [train.py:977] (0/2) Device: cuda:0
+2026-01-13 16:52:16,632 INFO [train.py:986] (0/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 16:52:16,633 INFO [train.py:988] (0/2) About to create model
+2026-01-13 16:52:17,282 INFO [zipformer.py:405] (0/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 16:52:17,300 INFO [train.py:992] (0/2) Number of model parameters: 71330891
+2026-01-13 16:52:18,071 INFO [train.py:1007] (0/2) Using DDP
+2026-01-13 16:52:22,679 INFO [asr_datamodule.py:422] (0/2) About to get train-clean-100 cuts

log/log-train-2026-01-13-16-52-16-1 ADDED Viewed

	@@ -0,0 +1,96 @@

+2026-01-13 16:52:16,766 INFO [train.py:967] (1/2) Training started
+2026-01-13 16:52:16,766 INFO [train.py:977] (1/2) Device: cuda:1
+2026-01-13 16:52:16,768 INFO [train.py:986] (1/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 16:52:16,769 INFO [train.py:988] (1/2) About to create model
+2026-01-13 16:52:17,360 INFO [zipformer.py:405] (1/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 16:52:17,377 INFO [train.py:992] (1/2) Number of model parameters: 71330891
+2026-01-13 16:52:17,486 INFO [train.py:1007] (1/2) Using DDP
+2026-01-13 16:52:22,679 INFO [asr_datamodule.py:422] (1/2) About to get train-clean-100 cuts

log/log-train-2026-01-13-16-53-36-0 ADDED Viewed

	@@ -0,0 +1,107 @@

+2026-01-13 16:53:36,680 INFO [train.py:967] (0/2) Training started
+2026-01-13 16:53:36,681 INFO [train.py:977] (0/2) Device: cuda:0
+2026-01-13 16:53:36,684 INFO [train.py:986] (0/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 16:53:36,684 INFO [train.py:988] (0/2) About to create model
+2026-01-13 16:53:37,260 INFO [zipformer.py:405] (0/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 16:53:37,277 INFO [train.py:992] (0/2) Number of model parameters: 71330891
+2026-01-13 16:53:38,042 INFO [train.py:1007] (0/2) Using DDP
+2026-01-13 16:53:39,351 INFO [asr_datamodule.py:422] (0/2) About to get train-clean-100 cuts
+2026-01-13 16:53:39,352 INFO [asr_datamodule.py:239] (0/2) Disable MUSAN
+2026-01-13 16:53:39,352 INFO [asr_datamodule.py:257] (0/2) Enable SpecAugment
+2026-01-13 16:53:39,352 INFO [asr_datamodule.py:258] (0/2) Time warp factor: 80
+2026-01-13 16:53:39,352 INFO [asr_datamodule.py:268] (0/2) Num frame mask: 10
+2026-01-13 16:53:39,352 INFO [asr_datamodule.py:281] (0/2) About to create train dataset
+2026-01-13 16:53:39,352 INFO [asr_datamodule.py:308] (0/2) Using DynamicBucketingSampler.
+2026-01-13 16:53:39,695 INFO [asr_datamodule.py:324] (0/2) About to create train dataloader
+2026-01-13 16:53:39,696 INFO [asr_datamodule.py:460] (0/2) About to get dev-clean cuts
+2026-01-13 16:53:39,696 INFO [asr_datamodule.py:467] (0/2) About to get dev-other cuts
+2026-01-13 16:53:39,697 INFO [asr_datamodule.py:355] (0/2) About to create dev dataset
+2026-01-13 16:53:39,923 INFO [asr_datamodule.py:372] (0/2) About to create dev dataloader

log/log-train-2026-01-13-16-53-36-1 ADDED Viewed

	@@ -0,0 +1,107 @@

+2026-01-13 16:53:36,791 INFO [train.py:967] (1/2) Training started
+2026-01-13 16:53:36,791 INFO [train.py:977] (1/2) Device: cuda:1
+2026-01-13 16:53:36,794 INFO [train.py:986] (1/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 16:53:36,794 INFO [train.py:988] (1/2) About to create model
+2026-01-13 16:53:37,379 INFO [zipformer.py:405] (1/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 16:53:37,398 INFO [train.py:992] (1/2) Number of model parameters: 71330891
+2026-01-13 16:53:37,505 INFO [train.py:1007] (1/2) Using DDP
+2026-01-13 16:53:39,347 INFO [asr_datamodule.py:422] (1/2) About to get train-clean-100 cuts
+2026-01-13 16:53:39,348 INFO [asr_datamodule.py:239] (1/2) Disable MUSAN
+2026-01-13 16:53:39,348 INFO [asr_datamodule.py:257] (1/2) Enable SpecAugment
+2026-01-13 16:53:39,348 INFO [asr_datamodule.py:258] (1/2) Time warp factor: 80
+2026-01-13 16:53:39,349 INFO [asr_datamodule.py:268] (1/2) Num frame mask: 10
+2026-01-13 16:53:39,349 INFO [asr_datamodule.py:281] (1/2) About to create train dataset
+2026-01-13 16:53:39,349 INFO [asr_datamodule.py:308] (1/2) Using DynamicBucketingSampler.
+2026-01-13 16:53:39,691 INFO [asr_datamodule.py:324] (1/2) About to create train dataloader
+2026-01-13 16:53:39,692 INFO [asr_datamodule.py:460] (1/2) About to get dev-clean cuts
+2026-01-13 16:53:39,692 INFO [asr_datamodule.py:467] (1/2) About to get dev-other cuts
+2026-01-13 16:53:39,693 INFO [asr_datamodule.py:355] (1/2) About to create dev dataset
+2026-01-13 16:53:39,908 INFO [asr_datamodule.py:372] (1/2) About to create dev dataloader

log/log-train-2026-01-13-16-54-14-0 ADDED Viewed

	@@ -0,0 +1,107 @@

+2026-01-13 16:54:14,567 INFO [train.py:967] (0/2) Training started
+2026-01-13 16:54:14,568 INFO [train.py:977] (0/2) Device: cuda:0
+2026-01-13 16:54:14,571 INFO [train.py:986] (0/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 16:54:14,572 INFO [train.py:988] (0/2) About to create model
+2026-01-13 16:54:15,171 INFO [zipformer.py:405] (0/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 16:54:15,189 INFO [train.py:992] (0/2) Number of model parameters: 71330891
+2026-01-13 16:54:15,951 INFO [train.py:1007] (0/2) Using DDP
+2026-01-13 16:54:17,256 INFO [asr_datamodule.py:422] (0/2) About to get train-clean-100 cuts
+2026-01-13 16:54:17,257 INFO [asr_datamodule.py:239] (0/2) Disable MUSAN
+2026-01-13 16:54:17,257 INFO [asr_datamodule.py:257] (0/2) Enable SpecAugment
+2026-01-13 16:54:17,258 INFO [asr_datamodule.py:258] (0/2) Time warp factor: 80
+2026-01-13 16:54:17,258 INFO [asr_datamodule.py:268] (0/2) Num frame mask: 10
+2026-01-13 16:54:17,258 INFO [asr_datamodule.py:281] (0/2) About to create train dataset
+2026-01-13 16:54:17,258 INFO [asr_datamodule.py:308] (0/2) Using DynamicBucketingSampler.
+2026-01-13 16:54:17,617 INFO [asr_datamodule.py:324] (0/2) About to create train dataloader
+2026-01-13 16:54:17,618 INFO [asr_datamodule.py:460] (0/2) About to get dev-clean cuts
+2026-01-13 16:54:17,618 INFO [asr_datamodule.py:467] (0/2) About to get dev-other cuts
+2026-01-13 16:54:17,619 INFO [asr_datamodule.py:355] (0/2) About to create dev dataset
+2026-01-13 16:54:17,834 INFO [asr_datamodule.py:372] (0/2) About to create dev dataloader

log/log-train-2026-01-13-16-54-14-1 ADDED Viewed

	@@ -0,0 +1,107 @@

+2026-01-13 16:54:14,677 INFO [train.py:967] (1/2) Training started
+2026-01-13 16:54:14,677 INFO [train.py:977] (1/2) Device: cuda:1
+2026-01-13 16:54:14,680 INFO [train.py:986] (1/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 16:54:14,680 INFO [train.py:988] (1/2) About to create model
+2026-01-13 16:54:15,275 INFO [zipformer.py:405] (1/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 16:54:15,293 INFO [train.py:992] (1/2) Number of model parameters: 71330891
+2026-01-13 16:54:15,400 INFO [train.py:1007] (1/2) Using DDP
+2026-01-13 16:54:17,262 INFO [asr_datamodule.py:422] (1/2) About to get train-clean-100 cuts
+2026-01-13 16:54:17,263 INFO [asr_datamodule.py:239] (1/2) Disable MUSAN
+2026-01-13 16:54:17,264 INFO [asr_datamodule.py:257] (1/2) Enable SpecAugment
+2026-01-13 16:54:17,264 INFO [asr_datamodule.py:258] (1/2) Time warp factor: 80
+2026-01-13 16:54:17,264 INFO [asr_datamodule.py:268] (1/2) Num frame mask: 10
+2026-01-13 16:54:17,264 INFO [asr_datamodule.py:281] (1/2) About to create train dataset
+2026-01-13 16:54:17,264 INFO [asr_datamodule.py:308] (1/2) Using DynamicBucketingSampler.
+2026-01-13 16:54:17,631 INFO [asr_datamodule.py:324] (1/2) About to create train dataloader
+2026-01-13 16:54:17,632 INFO [asr_datamodule.py:460] (1/2) About to get dev-clean cuts
+2026-01-13 16:54:17,632 INFO [asr_datamodule.py:467] (1/2) About to get dev-other cuts
+2026-01-13 16:54:17,633 INFO [asr_datamodule.py:355] (1/2) About to create dev dataset
+2026-01-13 16:54:17,869 INFO [asr_datamodule.py:372] (1/2) About to create dev dataloader

log/log-train-2026-01-13-17-00-38-0 ADDED Viewed

	@@ -0,0 +1,107 @@

+2026-01-13 17:00:38,644 INFO [train.py:967] (0/2) Training started
+2026-01-13 17:00:38,646 INFO [train.py:977] (0/2) Device: cuda:0
+2026-01-13 17:00:38,650 INFO [train.py:986] (0/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 17:00:38,651 INFO [train.py:988] (0/2) About to create model
+2026-01-13 17:00:39,258 INFO [zipformer.py:405] (0/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 17:00:39,275 INFO [train.py:992] (0/2) Number of model parameters: 71330891
+2026-01-13 17:00:40,039 INFO [train.py:1007] (0/2) Using DDP
+2026-01-13 17:00:41,419 INFO [asr_datamodule.py:422] (0/2) About to get train-clean-100 cuts
+2026-01-13 17:00:41,420 INFO [asr_datamodule.py:239] (0/2) Disable MUSAN
+2026-01-13 17:00:41,420 INFO [asr_datamodule.py:257] (0/2) Enable SpecAugment
+2026-01-13 17:00:41,420 INFO [asr_datamodule.py:258] (0/2) Time warp factor: 80
+2026-01-13 17:00:41,420 INFO [asr_datamodule.py:268] (0/2) Num frame mask: 10
+2026-01-13 17:00:41,420 INFO [asr_datamodule.py:281] (0/2) About to create train dataset
+2026-01-13 17:00:41,420 INFO [asr_datamodule.py:308] (0/2) Using DynamicBucketingSampler.
+2026-01-13 17:00:41,760 INFO [asr_datamodule.py:324] (0/2) About to create train dataloader
+2026-01-13 17:00:41,761 INFO [asr_datamodule.py:460] (0/2) About to get dev-clean cuts
+2026-01-13 17:00:41,761 INFO [asr_datamodule.py:467] (0/2) About to get dev-other cuts
+2026-01-13 17:00:41,762 INFO [asr_datamodule.py:355] (0/2) About to create dev dataset
+2026-01-13 17:00:41,982 INFO [asr_datamodule.py:372] (0/2) About to create dev dataloader

log/log-train-2026-01-13-17-00-38-1 ADDED Viewed

	@@ -0,0 +1,107 @@

+2026-01-13 17:00:38,739 INFO [train.py:967] (1/2) Training started
+2026-01-13 17:00:38,739 INFO [train.py:977] (1/2) Device: cuda:1
+2026-01-13 17:00:38,742 INFO [train.py:986] (1/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 17:00:38,742 INFO [train.py:988] (1/2) About to create model
+2026-01-13 17:00:39,358 INFO [zipformer.py:405] (1/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 17:00:39,375 INFO [train.py:992] (1/2) Number of model parameters: 71330891
+2026-01-13 17:00:39,482 INFO [train.py:1007] (1/2) Using DDP
+2026-01-13 17:00:41,419 INFO [asr_datamodule.py:422] (1/2) About to get train-clean-100 cuts
+2026-01-13 17:00:41,421 INFO [asr_datamodule.py:239] (1/2) Disable MUSAN
+2026-01-13 17:00:41,421 INFO [asr_datamodule.py:257] (1/2) Enable SpecAugment
+2026-01-13 17:00:41,421 INFO [asr_datamodule.py:258] (1/2) Time warp factor: 80
+2026-01-13 17:00:41,421 INFO [asr_datamodule.py:268] (1/2) Num frame mask: 10
+2026-01-13 17:00:41,421 INFO [asr_datamodule.py:281] (1/2) About to create train dataset
+2026-01-13 17:00:41,421 INFO [asr_datamodule.py:308] (1/2) Using DynamicBucketingSampler.
+2026-01-13 17:00:41,757 INFO [asr_datamodule.py:324] (1/2) About to create train dataloader
+2026-01-13 17:00:41,757 INFO [asr_datamodule.py:460] (1/2) About to get dev-clean cuts
+2026-01-13 17:00:41,758 INFO [asr_datamodule.py:467] (1/2) About to get dev-other cuts
+2026-01-13 17:00:41,759 INFO [asr_datamodule.py:355] (1/2) About to create dev dataset
+2026-01-13 17:00:41,980 INFO [asr_datamodule.py:372] (1/2) About to create dev dataloader

log/log-train-2026-01-13-17-01-14-0 ADDED Viewed

	@@ -0,0 +1,107 @@

+2026-01-13 17:01:14,363 INFO [train.py:967] (0/2) Training started
+2026-01-13 17:01:14,364 INFO [train.py:977] (0/2) Device: cuda:0
+2026-01-13 17:01:14,367 INFO [train.py:986] (0/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 17:01:14,367 INFO [train.py:988] (0/2) About to create model
+2026-01-13 17:01:14,952 INFO [zipformer.py:405] (0/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 17:01:14,971 INFO [train.py:992] (0/2) Number of model parameters: 71330891
+2026-01-13 17:01:15,734 INFO [train.py:1007] (0/2) Using DDP
+2026-01-13 17:01:17,024 INFO [asr_datamodule.py:422] (0/2) About to get train-clean-100 cuts
+2026-01-13 17:01:17,025 INFO [asr_datamodule.py:239] (0/2) Disable MUSAN
+2026-01-13 17:01:17,025 INFO [asr_datamodule.py:257] (0/2) Enable SpecAugment
+2026-01-13 17:01:17,025 INFO [asr_datamodule.py:258] (0/2) Time warp factor: 80
+2026-01-13 17:01:17,025 INFO [asr_datamodule.py:268] (0/2) Num frame mask: 10
+2026-01-13 17:01:17,025 INFO [asr_datamodule.py:281] (0/2) About to create train dataset
+2026-01-13 17:01:17,026 INFO [asr_datamodule.py:308] (0/2) Using DynamicBucketingSampler.
+2026-01-13 17:01:17,366 INFO [asr_datamodule.py:324] (0/2) About to create train dataloader
+2026-01-13 17:01:17,366 INFO [asr_datamodule.py:460] (0/2) About to get dev-clean cuts
+2026-01-13 17:01:17,367 INFO [asr_datamodule.py:467] (0/2) About to get dev-other cuts
+2026-01-13 17:01:17,367 INFO [asr_datamodule.py:355] (0/2) About to create dev dataset
+2026-01-13 17:01:17,579 INFO [asr_datamodule.py:372] (0/2) About to create dev dataloader

log/log-train-2026-01-13-17-01-14-1 ADDED Viewed

	@@ -0,0 +1,107 @@

+2026-01-13 17:01:14,470 INFO [train.py:967] (1/2) Training started
+2026-01-13 17:01:14,471 INFO [train.py:977] (1/2) Device: cuda:1
+2026-01-13 17:01:14,473 INFO [train.py:986] (1/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 17:01:14,473 INFO [train.py:988] (1/2) About to create model
+2026-01-13 17:01:15,050 INFO [zipformer.py:405] (1/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 17:01:15,068 INFO [train.py:992] (1/2) Number of model parameters: 71330891
+2026-01-13 17:01:15,174 INFO [train.py:1007] (1/2) Using DDP
+2026-01-13 17:01:17,037 INFO [asr_datamodule.py:422] (1/2) About to get train-clean-100 cuts
+2026-01-13 17:01:17,038 INFO [asr_datamodule.py:239] (1/2) Disable MUSAN
+2026-01-13 17:01:17,038 INFO [asr_datamodule.py:257] (1/2) Enable SpecAugment
+2026-01-13 17:01:17,038 INFO [asr_datamodule.py:258] (1/2) Time warp factor: 80
+2026-01-13 17:01:17,038 INFO [asr_datamodule.py:268] (1/2) Num frame mask: 10
+2026-01-13 17:01:17,038 INFO [asr_datamodule.py:281] (1/2) About to create train dataset
+2026-01-13 17:01:17,038 INFO [asr_datamodule.py:308] (1/2) Using DynamicBucketingSampler.
+2026-01-13 17:01:17,377 INFO [asr_datamodule.py:324] (1/2) About to create train dataloader
+2026-01-13 17:01:17,378 INFO [asr_datamodule.py:460] (1/2) About to get dev-clean cuts
+2026-01-13 17:01:17,378 INFO [asr_datamodule.py:467] (1/2) About to get dev-other cuts
+2026-01-13 17:01:17,379 INFO [asr_datamodule.py:355] (1/2) About to create dev dataset
+2026-01-13 17:01:17,591 INFO [asr_datamodule.py:372] (1/2) About to create dev dataloader

log/log-train-2026-01-13-17-06-37-0 ADDED Viewed

	@@ -0,0 +1,169 @@

+2026-01-13 17:06:37,678 INFO [train.py:967] (0/2) Training started
+2026-01-13 17:06:37,679 INFO [train.py:977] (0/2) Device: cuda:0
+2026-01-13 17:06:37,681 INFO [train.py:986] (0/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 17:06:37,682 INFO [train.py:988] (0/2) About to create model
+2026-01-13 17:06:38,266 INFO [zipformer.py:405] (0/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 17:06:38,285 INFO [train.py:992] (0/2) Number of model parameters: 71330891
+2026-01-13 17:06:39,101 INFO [train.py:1007] (0/2) Using DDP
+2026-01-13 17:06:40,454 INFO [asr_datamodule.py:422] (0/2) About to get train-clean-100 cuts
+2026-01-13 17:06:40,456 INFO [asr_datamodule.py:239] (0/2) Disable MUSAN
+2026-01-13 17:06:40,456 INFO [asr_datamodule.py:257] (0/2) Enable SpecAugment
+2026-01-13 17:06:40,456 INFO [asr_datamodule.py:258] (0/2) Time warp factor: 80
+2026-01-13 17:06:40,456 INFO [asr_datamodule.py:268] (0/2) Num frame mask: 10
+2026-01-13 17:06:40,456 INFO [asr_datamodule.py:281] (0/2) About to create train dataset
+2026-01-13 17:06:40,456 INFO [asr_datamodule.py:308] (0/2) Using DynamicBucketingSampler.
+2026-01-13 17:06:40,855 INFO [asr_datamodule.py:324] (0/2) About to create train dataloader
+2026-01-13 17:06:40,855 INFO [asr_datamodule.py:460] (0/2) About to get dev-clean cuts
+2026-01-13 17:06:40,856 INFO [asr_datamodule.py:467] (0/2) About to get dev-other cuts
+2026-01-13 17:06:40,856 INFO [asr_datamodule.py:355] (0/2) About to create dev dataset
+2026-01-13 17:06:41,074 INFO [asr_datamodule.py:372] (0/2) About to create dev dataloader
+2026-01-13 17:06:56,066 INFO [train.py:895] (0/2) Epoch 1, batch 0, loss[loss=8.165, simple_loss=7.427, pruned_loss=7.363, over 2638.00 frames. ], tot_loss[loss=8.165, simple_loss=7.427, pruned_loss=7.363, over 2638.00 frames. ], batch size: 7, lr: 2.50e-02, grad_scale: 2.0
+2026-01-13 17:06:56,066 INFO [train.py:920] (0/2) Computing validation loss
+2026-01-13 17:08:00,243 INFO [zipformer.py:2441] (0/2) attn_weights_entropy = tensor([2.9147, 2.9149, 2.9154, 2.9121, 2.9146, 2.9150, 2.9150, 2.9151],
+       device='cuda:0'), covar=tensor([0.0029, 0.0041, 0.0048, 0.0025, 0.0032, 0.0035, 0.0052, 0.0034],
+       device='cuda:0'), in_proj_covar=tensor([0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009],
+       device='cuda:0'), out_proj_covar=tensor([8.5573e-06, 8.6460e-06, 8.6547e-06, 8.5689e-06, 8.8456e-06, 8.6908e-06,
+        8.7531e-06, 8.7239e-06], device='cuda:0')
+2026-01-13 17:08:21,491 INFO [train.py:929] (0/2) Epoch 1, validation: loss=8.291, simple_loss=7.534, pruned_loss=7.553, over 1639044.00 frames.
+2026-01-13 17:08:21,492 INFO [train.py:930] (0/2) Maximum memory allocated so far is 2796MB
+2026-01-13 17:08:23,199 INFO [zipformer.py:1188] (0/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=5.0, num_to_drop=2, layers_to_drop={1, 3}
+2026-01-13 17:08:29,961 INFO [zipformer.py:1188] (0/2) warmup_begin=666.7, warmup_end=1333.3, batch_count=23.0, num_to_drop=1, layers_to_drop={1}
+2026-01-13 17:08:32,511 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=96, metric=6.07 vs. limit=2.0
+2026-01-13 17:08:33,581 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=96, metric=5.73 vs. limit=2.0
+2026-01-13 17:08:37,070 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=192, metric=14.43 vs. limit=2.0
+2026-01-13 17:08:40,738 INFO [train.py:895] (0/2) Epoch 1, batch 50, loss[loss=1.1, simple_loss=0.9771, pruned_loss=1.103, over 2768.00 frames. ], tot_loss[loss=2.142, simple_loss=1.948, pruned_loss=1.876, over 122563.73 frames. ], batch size: 7, lr: 2.75e-02, grad_scale: 2.0
+2026-01-13 17:08:52,490 INFO [zipformer.py:2441] (0/2) attn_weights_entropy = tensor([5.1111, 5.1111, 5.1051, 5.1033, 5.1106, 5.1110, 5.1105, 5.1110],
+       device='cuda:0'), covar=tensor([0.0017, 0.0046, 0.0033, 0.0029, 0.0021, 0.0033, 0.0023, 0.0020],
+       device='cuda:0'), in_proj_covar=tensor([0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009],
+       device='cuda:0'), out_proj_covar=tensor([8.7029e-06, 8.8104e-06, 8.8122e-06, 8.6125e-06, 9.0121e-06, 8.7908e-06,
+        8.8471e-06, 8.8093e-06], device='cuda:0')
+2026-01-13 17:08:53,265 INFO [zipformer.py:1188] (0/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=83.0, num_to_drop=1, layers_to_drop={1}
+2026-01-13 17:08:57,962 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=96, metric=3.07 vs. limit=2.0
+2026-01-13 17:09:00,026 INFO [optim.py:365] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.726e+01 2.698e+01 5.079e+01 1.890e+02 2.214e+03, threshold=1.016e+02, percent-clipped=0.0
+2026-01-13 17:09:00,065 INFO [train.py:895] (0/2) Epoch 1, batch 100, loss[loss=0.9872, simple_loss=0.8589, pruned_loss=1.031, over 2891.00 frames. ], tot_loss[loss=1.553, simple_loss=1.394, pruned_loss=1.445, over 216716.41 frames. ], batch size: 8, lr: 3.00e-02, grad_scale: 2.0
+2026-01-13 17:09:13,046 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=192, metric=5.61 vs. limit=2.0
+2026-01-13 17:09:17,396 INFO [zipformer.py:1188] (0/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=144.0, num_to_drop=2, layers_to_drop={1, 3}
+2026-01-13 17:09:20,086 INFO [train.py:895] (0/2) Epoch 1, batch 150, loss[loss=0.8253, simple_loss=0.7036, pruned_loss=0.8828, over 2774.00 frames. ], tot_loss[loss=1.313, simple_loss=1.163, pruned_loss=1.277, over 290051.50 frames. ], batch size: 7, lr: 3.25e-02, grad_scale: 2.0
+2026-01-13 17:09:40,653 INFO [optim.py:365] (0/2) Clipping_scale=2.0, grad-norm quartiles 1.991e+01 2.552e+01 2.903e+01 3.376e+01 6.929e+01, threshold=5.806e+01, percent-clipped=0.0
+2026-01-13 17:09:40,692 INFO [train.py:895] (0/2) Epoch 1, batch 200, loss[loss=1.141, simple_loss=0.9689, pruned_loss=1.16, over 2637.00 frames. ], tot_loss[loss=1.177, simple_loss=1.03, pruned_loss=1.172, over 347964.82 frames. ], batch size: 16, lr: 3.50e-02, grad_scale: 2.0
+2026-01-13 17:09:43,454 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=192, metric=4.43 vs. limit=2.0
+2026-01-13 17:09:46,242 INFO [scaling.py:681] (0/2) Whitening: num_groups=1, num_channels=384, metric=37.43 vs. limit=5.0
+2026-01-13 17:09:56,985 INFO [scaling.py:681] (0/2) Whitening: num_groups=1, num_channels=384, metric=42.78 vs. limit=5.0
+2026-01-13 17:09:59,109 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=192, metric=14.33 vs. limit=2.0
+2026-01-13 17:10:00,956 INFO [train.py:895] (0/2) Epoch 1, batch 250, loss[loss=0.8344, simple_loss=0.6973, pruned_loss=0.8492, over 2755.00 frames. ], tot_loss[loss=1.088, simple_loss=0.942, pruned_loss=1.093, over 392480.77 frames. ], batch size: 11, lr: 3.75e-02, grad_scale: 2.0
+2026-01-13 17:10:19,125 INFO [zipformer.py:1188] (0/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=296.0, num_to_drop=1, layers_to_drop={1}
+2026-01-13 17:10:20,733 INFO [zipformer.py:1188] (0/2) warmup_begin=1333.3, warmup_end=2000.0, batch_count=300.0, num_to_drop=2, layers_to_drop={1, 3}
+2026-01-13 17:10:21,012 INFO [optim.py:365] (0/2) Clipping_scale=2.0, grad-norm quartiles 2.628e+01 3.406e+01 3.889e+01 4.977e+01 1.495e+02, threshold=7.778e+01, percent-clipped=13.0
+2026-01-13 17:10:21,048 INFO [train.py:895] (0/2) Epoch 1, batch 300, loss[loss=0.8336, simple_loss=0.6967, pruned_loss=0.8042, over 2892.00 frames. ], tot_loss[loss=1.026, simple_loss=0.8803, pruned_loss=1.03, over 428733.71 frames. ], batch size: 10, lr: 4.00e-02, grad_scale: 2.0
+2026-01-13 17:10:26,809 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=96, metric=2.54 vs. limit=2.0
+2026-01-13 17:10:39,996 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=96, metric=1.52 vs. limit=2.0
+2026-01-13 17:10:40,639 INFO [train.py:895] (0/2) Epoch 1, batch 350, loss[loss=0.8286, simple_loss=0.6797, pruned_loss=0.8085, over 2696.00 frames. ], tot_loss[loss=0.9781, simple_loss=0.832, pruned_loss=0.9753, over 455853.48 frames. ], batch size: 7, lr: 4.25e-02, grad_scale: 2.0
+2026-01-13 17:10:43,170 INFO [zipformer.py:1188] (0/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=357.0, num_to_drop=2, layers_to_drop={1, 3}
+2026-01-13 17:10:48,892 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=96, metric=2.92 vs. limit=2.0
+2026-01-13 17:10:53,144 INFO [scaling.py:681] (0/2) Whitening: num_groups=1, num_channels=384, metric=14.35 vs. limit=5.0
+2026-01-13 17:10:54,931 INFO [zipformer.py:1188] (0/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=387.0, num_to_drop=1, layers_to_drop={1}
+2026-01-13 17:10:55,070 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=192, metric=12.67 vs. limit=2.0
+2026-01-13 17:10:56,683 INFO [scaling.py:681] (0/2) Whitening: num_groups=1, num_channels=384, metric=11.14 vs. limit=5.0
+2026-01-13 17:11:00,683 INFO [optim.py:365] (0/2) Clipping_scale=2.0, grad-norm quartiles 2.792e+01 3.460e+01 4.419e+01 5.164e+01 2.002e+02, threshold=8.837e+01, percent-clipped=7.0
+2026-01-13 17:11:00,720 INFO [train.py:895] (0/2) Epoch 1, batch 400, loss[loss=0.8411, simple_loss=0.6884, pruned_loss=0.7904, over 2705.00 frames. ], tot_loss[loss=0.9489, simple_loss=0.8001, pruned_loss=0.9381, over 475942.16 frames. ], batch size: 8, lr: 4.50e-02, grad_scale: 4.0
+2026-01-13 17:11:07,553 INFO [scaling.py:681] (0/2) Whitening: num_groups=8, num_channels=192, metric=10.28 vs. limit=2.0
+2026-01-13 17:11:11,067 INFO [zipformer.py:2441] (0/2) attn_weights_entropy = tensor([4.6921, 5.1287, 4.3867, 4.6206, 3.9530, 5.0709, 4.4906, 4.6190],
+       device='cuda:0'), covar=tensor([0.0421, 0.0065, 0.0737, 0.0372, 0.1756, 0.0083, 0.0905, 0.0393],
+       device='cuda:0'), in_proj_covar=tensor([0.0009, 0.0009, 0.0009, 0.0009, 0.0010, 0.0009, 0.0009, 0.0009],
+       device='cuda:0'), out_proj_covar=tensor([9.0232e-06, 8.7080e-06, 9.0746e-06, 8.3539e-06, 1.0025e-05, 8.5254e-06,
+        8.9379e-06, 8.6380e-06], device='cuda:0')
+2026-01-13 17:11:15,769 INFO [zipformer.py:1188] (0/2) warmup_begin=1333.3, warmup_end=2000.0, batch_count=439.0, num_to_drop=2, layers_to_drop={1, 2}
+2026-01-13 17:11:19,201 INFO [scaling.py:681] (0/2) Whitening: num_groups=1, num_channels=384, metric=10.70 vs. limit=5.0
+2026-01-13 17:11:19,449 INFO [zipformer.py:1188] (0/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=448.0, num_to_drop=2, layers_to_drop={0, 3}
+2026-01-13 17:11:20,503 INFO [train.py:895] (0/2) Epoch 1, batch 450, loss[loss=0.7667, simple_loss=0.6296, pruned_loss=0.6868, over 2660.00 frames. ], tot_loss[loss=0.9287, simple_loss=0.7768, pruned_loss=0.9064, over 492502.80 frames. ], batch size: 8, lr: 4.75e-02, grad_scale: 4.0
+2026-01-13 17:11:26,921 INFO [scaling.py:681] (0/2) Whitening: num_groups=1, num_channels=384, metric=10.20 vs. limit=5.0
+2026-01-13 17:11:40,428 INFO [optim.py:365] (0/2) Clipping_scale=2.0, grad-norm quartiles 2.871e+01 3.474e+01 4.145e+01 5.434e+01 1.454e+02, threshold=8.291e+01, percent-clipped=4.0
+2026-01-13 17:11:40,465 INFO [train.py:895] (0/2) Epoch 1, batch 500, loss[loss=0.8643, simple_loss=0.6974, pruned_loss=0.7811, over 2804.00 frames. ], tot_loss[loss=0.9117, simple_loss=0.7566, pruned_loss=0.8773, over 506680.85 frames. ], batch size: 10, lr: 4.99e-02, grad_scale: 4.0

log/log-train-2026-01-13-17-06-37-1 ADDED Viewed

	@@ -0,0 +1,171 @@

+2026-01-13 17:06:37,791 INFO [train.py:967] (1/2) Training started
+2026-01-13 17:06:37,791 INFO [train.py:977] (1/2) Device: cuda:1
+2026-01-13 17:06:37,793 INFO [train.py:986] (1/2) {
+  "am_scale": 0.0,
+  "attention_dims": "192,192,192,192,192",
+  "average_period": 200,
+  "base_lr": 0.05,
+  "batch_idx_train": 0,
+  "best_train_epoch": -1,
+  "best_train_loss": Infinity,
+  "best_valid_epoch": -1,
+  "best_valid_loss": Infinity,
+  "blank_id": 0,
+  "bpe_model": "/kaggle/working/amharic_training/bpe/bpe.model",
+  "bucketing_sampler": true,
+  "cnn_module_kernels": "31,31,31,31,31",
+  "concatenate_cuts": false,
+  "context_size": 2,
+  "decode_chunk_len": 32,
+  "decoder_dim": 512,
+  "drop_last": true,
+  "duration_factor": 1.0,
+  "enable_musan": false,
+  "enable_spec_aug": true,
+  "encoder_dims": "384,384,384,384,384",
+  "encoder_unmasked_dims": "256,256,256,256,256",
+  "env_info": {
+    "IP address": "172.19.2.2",
+    "hostname": "6ec37ec2ba95",
+    "icefall-git-branch": "master",
+    "icefall-git-date": "Fri Nov 28 03:42:20 2025",
+    "icefall-git-sha1": "0904e490-clean",
+    "icefall-path": "/kaggle/working/icefall",
+    "k2-build-type": "Release",
+    "k2-git-date": "Thu Jul 25 03:34:26 2024",
+    "k2-git-sha1": "40e8d1676f6062e46458dc32ad21229c93cc9c50",
+    "k2-path": "/usr/local/lib/python3.12/dist-packages/k2/__init__.py",
+    "k2-version": "1.24.4",
+    "k2-with-cuda": true,
+    "lhotse-path": "/usr/local/lib/python3.12/dist-packages/lhotse/__init__.py",
+    "lhotse-version": "1.32.1",
+    "python-version": "3.12",
+    "torch-cuda-available": true,
+    "torch-cuda-version": "12.1",
+    "torch-version": "2.4.0+cu121"
+  },
+  "exp_dir": "/kaggle/working/amharic_training/exp_amharic_streaming",
+  "feature_dim": 80,
+  "feedforward_dims": "1024,1024,2048,2048,1024",
+  "full_libri": false,
+  "gap": 1.0,
+  "inf_check": false,
+  "input_strategy": "PrecomputedFeatures",
+  "joiner_dim": 512,
+  "keep_last_k": 5,
+  "lm_scale": 0.25,
+  "log_interval": 50,
+  "lr_batches": 5000,
+  "lr_epochs": 3.5,
+  "manifest_dir": "/kaggle/working/amharic_training/manifests",
+  "master_port": 12354,
+  "max_duration": 120,
+  "mini_libri": false,
+  "nhead": "8,8,8,8,8",
+  "num_buckets": 30,
+  "num_encoder_layers": "2,4,3,2,4",
+  "num_epochs": 50,
+  "num_left_chunks": 4,
+  "num_workers": 2,
+  "on_the_fly_feats": false,
+  "print_diagnostics": false,
+  "prune_range": 5,
+  "reset_interval": 200,
+  "return_cuts": true,
+  "save_every_n": 1000,
+  "seed": 42,
+  "short_chunk_size": 50,
+  "shuffle": true,
+  "simple_loss_scale": 0.5,
+  "spec_aug_time_warp_factor": 80,
+  "start_batch": 0,
+  "start_epoch": 1,
+  "subsampling_factor": 4,
+  "tensorboard": true,
+  "use_fp16": true,
+  "valid_interval": 1600,
+  "vocab_size": 1000,
+  "warm_step": 2000,
+  "world_size": 2,
+  "zipformer_downsampling_factors": "1,2,4,8,2"
+}
+2026-01-13 17:06:37,794 INFO [train.py:988] (1/2) About to create model
+2026-01-13 17:06:38,385 INFO [zipformer.py:405] (1/2) At encoder stack 4, which has downsampling_factor=2, we will combine the outputs of layers 1 and 3, with downsampling_factors=2 and 8.
+2026-01-13 17:06:38,403 INFO [train.py:992] (1/2) Number of model parameters: 71330891
+2026-01-13 17:06:38,512 INFO [train.py:1007] (1/2) Using DDP
+2026-01-13 17:06:40,469 INFO [asr_datamodule.py:422] (1/2) About to get train-clean-100 cuts
+2026-01-13 17:06:40,470 INFO [asr_datamodule.py:239] (1/2) Disable MUSAN
+2026-01-13 17:06:40,470 INFO [asr_datamodule.py:257] (1/2) Enable SpecAugment
+2026-01-13 17:06:40,471 INFO [asr_datamodule.py:258] (1/2) Time warp factor: 80
+2026-01-13 17:06:40,471 INFO [asr_datamodule.py:268] (1/2) Num frame mask: 10
+2026-01-13 17:06:40,471 INFO [asr_datamodule.py:281] (1/2) About to create train dataset
+2026-01-13 17:06:40,471 INFO [asr_datamodule.py:308] (1/2) Using DynamicBucketingSampler.
+2026-01-13 17:06:40,872 INFO [asr_datamodule.py:324] (1/2) About to create train dataloader
+2026-01-13 17:06:40,872 INFO [asr_datamodule.py:460] (1/2) About to get dev-clean cuts
+2026-01-13 17:06:40,873 INFO [asr_datamodule.py:467] (1/2) About to get dev-other cuts
+2026-01-13 17:06:40,873 INFO [asr_datamodule.py:355] (1/2) About to create dev dataset
+2026-01-13 17:06:41,102 INFO [asr_datamodule.py:372] (1/2) About to create dev dataloader
+2026-01-13 17:06:56,061 INFO [train.py:895] (1/2) Epoch 1, batch 0, loss[loss=8.191, simple_loss=7.455, pruned_loss=7.342, over 2645.00 frames. ], tot_loss[loss=8.191, simple_loss=7.455, pruned_loss=7.342, over 2645.00 frames. ], batch size: 7, lr: 2.50e-02, grad_scale: 2.0
+2026-01-13 17:06:56,062 INFO [train.py:920] (1/2) Computing validation loss
+2026-01-13 17:08:00,436 INFO [zipformer.py:2441] (1/2) attn_weights_entropy = tensor([2.9155, 2.9157, 2.9161, 2.9129, 2.9154, 2.9159, 2.9159, 2.9159],
+       device='cuda:1'), covar=tensor([0.0037, 0.0062, 0.0063, 0.0034, 0.0040, 0.0046, 0.0067, 0.0041],
+       device='cuda:1'), in_proj_covar=tensor([0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009],
+       device='cuda:1'), out_proj_covar=tensor([8.5573e-06, 8.6460e-06, 8.6547e-06, 8.5689e-06, 8.8456e-06, 8.6908e-06,
+        8.7531e-06, 8.7239e-06], device='cuda:1')
+2026-01-13 17:08:21,491 INFO [train.py:929] (1/2) Epoch 1, validation: loss=8.291, simple_loss=7.534, pruned_loss=7.553, over 1639044.00 frames.
+2026-01-13 17:08:21,492 INFO [train.py:930] (1/2) Maximum memory allocated so far is 2801MB
+2026-01-13 17:08:23,182 INFO [zipformer.py:1188] (1/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=5.0, num_to_drop=2, layers_to_drop={0, 1}
+2026-01-13 17:08:29,961 INFO [zipformer.py:1188] (1/2) warmup_begin=666.7, warmup_end=1333.3, batch_count=23.0, num_to_drop=1, layers_to_drop={1}
+2026-01-13 17:08:32,502 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=4.95 vs. limit=2.0
+2026-01-13 17:08:33,581 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=5.26 vs. limit=2.0
+2026-01-13 17:08:37,075 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=192, metric=13.56 vs. limit=2.0
+2026-01-13 17:08:40,737 INFO [train.py:895] (1/2) Epoch 1, batch 50, loss[loss=1.051, simple_loss=0.9344, pruned_loss=1.049, over 2766.00 frames. ], tot_loss[loss=2.147, simple_loss=1.951, pruned_loss=1.882, over 122589.82 frames. ], batch size: 7, lr: 2.75e-02, grad_scale: 2.0
+2026-01-13 17:08:52,476 INFO [zipformer.py:2441] (1/2) attn_weights_entropy = tensor([5.1140, 5.1141, 5.1095, 5.1045, 5.1129, 5.1137, 5.1126, 5.1135],
+       device='cuda:1'), covar=tensor([0.0009, 0.0023, 0.0019, 0.0016, 0.0006, 0.0018, 0.0013, 0.0012],
+       device='cuda:1'), in_proj_covar=tensor([0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009],
+       device='cuda:1'), out_proj_covar=tensor([8.5574e-06, 8.6733e-06, 8.6879e-06, 8.5465e-06, 8.8501e-06, 8.6969e-06,
+        8.7308e-06, 8.6867e-06], device='cuda:1')
+2026-01-13 17:08:53,257 INFO [zipformer.py:1188] (1/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=83.0, num_to_drop=1, layers_to_drop={1}
+2026-01-13 17:08:57,912 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=3.48 vs. limit=2.0
+2026-01-13 17:09:00,027 INFO [optim.py:365] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.726e+01 2.698e+01 5.079e+01 1.890e+02 2.214e+03, threshold=1.016e+02, percent-clipped=0.0
+2026-01-13 17:09:00,066 INFO [train.py:895] (1/2) Epoch 1, batch 100, loss[loss=0.9553, simple_loss=0.8285, pruned_loss=1.015, over 2893.00 frames. ], tot_loss[loss=1.55, simple_loss=1.391, pruned_loss=1.443, over 216705.88 frames. ], batch size: 8, lr: 3.00e-02, grad_scale: 2.0
+2026-01-13 17:09:17,393 INFO [zipformer.py:1188] (1/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=144.0, num_to_drop=2, layers_to_drop={1, 2}
+2026-01-13 17:09:20,085 INFO [train.py:895] (1/2) Epoch 1, batch 150, loss[loss=0.8435, simple_loss=0.7125, pruned_loss=0.9396, over 2786.00 frames. ], tot_loss[loss=1.311, simple_loss=1.161, pruned_loss=1.277, over 290274.69 frames. ], batch size: 7, lr: 3.25e-02, grad_scale: 2.0
+2026-01-13 17:09:39,620 INFO [zipformer.py:2441] (1/2) attn_weights_entropy = tensor([4.2447, 4.2507, 4.2467, 4.2488, 4.2510, 4.2508, 4.2486, 4.2512],
+       device='cuda:1'), covar=tensor([0.0020, 0.0018, 0.0026, 0.0025, 0.0023, 0.0022, 0.0015, 0.0020],
+       device='cuda:1'), in_proj_covar=tensor([0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009],
+       device='cuda:1'), out_proj_covar=tensor([8.8484e-06, 8.8488e-06, 8.6189e-06, 8.8502e-06, 8.6395e-06, 8.7008e-06,
+        8.7248e-06, 8.7738e-06], device='cuda:1')
+2026-01-13 17:09:40,650 INFO [optim.py:365] (1/2) Clipping_scale=2.0, grad-norm quartiles 1.991e+01 2.552e+01 2.903e+01 3.376e+01 6.929e+01, threshold=5.806e+01, percent-clipped=0.0
+2026-01-13 17:09:40,689 INFO [train.py:895] (1/2) Epoch 1, batch 200, loss[loss=1.119, simple_loss=0.947, pruned_loss=1.155, over 2665.00 frames. ], tot_loss[loss=1.179, simple_loss=1.032, pruned_loss=1.176, over 347390.37 frames. ], batch size: 16, lr: 3.50e-02, grad_scale: 2.0
+2026-01-13 17:09:53,935 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=5.48 vs. limit=2.0
+2026-01-13 17:10:00,956 INFO [train.py:895] (1/2) Epoch 1, batch 250, loss[loss=1.031, simple_loss=0.8593, pruned_loss=1.061, over 2763.00 frames. ], tot_loss[loss=1.091, simple_loss=0.9446, pruned_loss=1.099, over 391985.25 frames. ], batch size: 11, lr: 3.75e-02, grad_scale: 2.0
+2026-01-13 17:10:01,151 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=3.75 vs. limit=2.0
+2026-01-13 17:10:01,977 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=2.53 vs. limit=2.0
+2026-01-13 17:10:12,105 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=2.03 vs. limit=2.0
+2026-01-13 17:10:19,140 INFO [zipformer.py:1188] (1/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=296.0, num_to_drop=1, layers_to_drop={1}
+2026-01-13 17:10:20,730 INFO [zipformer.py:1188] (1/2) warmup_begin=1333.3, warmup_end=2000.0, batch_count=300.0, num_to_drop=2, layers_to_drop={0, 1}
+2026-01-13 17:10:21,011 INFO [optim.py:365] (1/2) Clipping_scale=2.0, grad-norm quartiles 2.628e+01 3.406e+01 3.889e+01 4.977e+01 1.495e+02, threshold=7.778e+01, percent-clipped=13.0
+2026-01-13 17:10:21,048 INFO [train.py:895] (1/2) Epoch 1, batch 300, loss[loss=0.8037, simple_loss=0.6637, pruned_loss=0.807, over 2885.00 frames. ], tot_loss[loss=1.029, simple_loss=0.8823, pruned_loss=1.034, over 428005.97 frames. ], batch size: 10, lr: 4.00e-02, grad_scale: 2.0
+2026-01-13 17:10:26,867 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=2.03 vs. limit=2.0
+2026-01-13 17:10:31,380 INFO [zipformer.py:2441] (1/2) attn_weights_entropy = tensor([3.6023, 3.6043, 3.6010, 3.6077, 3.6021, 3.6072, 3.6062, 3.6057],
+       device='cuda:1'), covar=tensor([0.0033, 0.0036, 0.0040, 0.0028, 0.0029, 0.0039, 0.0052, 0.0038],
+       device='cuda:1'), in_proj_covar=tensor([0.0009, 0.0008, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009],
+       device='cuda:1'), out_proj_covar=tensor([9.0297e-06, 8.5946e-06, 8.6648e-06, 8.7116e-06, 8.8798e-06, 8.5990e-06,
+        8.8286e-06, 8.7837e-06], device='cuda:1')
+2026-01-13 17:10:33,433 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=192, metric=19.84 vs. limit=2.0
+2026-01-13 17:10:33,517 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=96, metric=2.32 vs. limit=2.0
+2026-01-13 17:10:40,638 INFO [train.py:895] (1/2) Epoch 1, batch 350, loss[loss=0.9178, simple_loss=0.7621, pruned_loss=0.863, over 2690.00 frames. ], tot_loss[loss=0.9843, simple_loss=0.8369, pruned_loss=0.9828, over 455041.83 frames. ], batch size: 7, lr: 4.25e-02, grad_scale: 2.0
+2026-01-13 17:10:43,192 INFO [zipformer.py:1188] (1/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=357.0, num_to_drop=2, layers_to_drop={2, 3}
+2026-01-13 17:10:51,167 INFO [scaling.py:681] (1/2) Whitening: num_groups=1, num_channels=384, metric=15.06 vs. limit=5.0
+2026-01-13 17:10:54,931 INFO [zipformer.py:1188] (1/2) warmup_begin=2666.7, warmup_end=3333.3, batch_count=387.0, num_to_drop=1, layers_to_drop={0}
+2026-01-13 17:11:00,683 INFO [optim.py:365] (1/2) Clipping_scale=2.0, grad-norm quartiles 2.792e+01 3.460e+01 4.419e+01 5.164e+01 2.002e+02, threshold=8.837e+01, percent-clipped=7.0
+2026-01-13 17:11:00,720 INFO [train.py:895] (1/2) Epoch 1, batch 400, loss[loss=0.7745, simple_loss=0.6329, pruned_loss=0.7313, over 2716.00 frames. ], tot_loss[loss=0.9508, simple_loss=0.8014, pruned_loss=0.9415, over 474747.91 frames. ], batch size: 8, lr: 4.50e-02, grad_scale: 4.0
+2026-01-13 17:11:14,445 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=192, metric=15.28 vs. limit=2.0
+2026-01-13 17:11:15,775 INFO [zipformer.py:1188] (1/2) warmup_begin=1333.3, warmup_end=2000.0, batch_count=439.0, num_to_drop=2, layers_to_drop={0, 2}
+2026-01-13 17:11:19,454 INFO [zipformer.py:1188] (1/2) warmup_begin=3333.3, warmup_end=4000.0, batch_count=448.0, num_to_drop=2, layers_to_drop={0, 1}
+2026-01-13 17:11:20,503 INFO [train.py:895] (1/2) Epoch 1, batch 450, loss[loss=0.8531, simple_loss=0.6946, pruned_loss=0.7819, over 2672.00 frames. ], tot_loss[loss=0.9284, simple_loss=0.7761, pruned_loss=0.9079, over 491498.11 frames. ], batch size: 8, lr: 4.75e-02, grad_scale: 4.0
+2026-01-13 17:11:23,205 INFO [scaling.py:681] (1/2) Whitening: num_groups=8, num_channels=192, metric=14.11 vs. limit=2.0
+2026-01-13 17:11:30,442 INFO [scaling.py:681] (1/2) Whitening: num_groups=1, num_channels=384, metric=9.21 vs. limit=5.0
+2026-01-13 17:11:40,428 INFO [optim.py:365] (1/2) Clipping_scale=2.0, grad-norm quartiles 2.871e+01 3.474e+01 4.145e+01 5.434e+01 1.454e+02, threshold=8.291e+01, percent-clipped=4.0
+2026-01-13 17:11:40,465 INFO [train.py:895] (1/2) Epoch 1, batch 500, loss[loss=0.8659, simple_loss=0.6927, pruned_loss=0.7988, over 2806.00 frames. ], tot_loss[loss=0.9103, simple_loss=0.7551, pruned_loss=0.8777, over 504851.47 frames. ], batch size: 10, lr: 4.99e-02, grad_scale: 4.0

tensorboard/events.out.tfevents.1768323136.6ec37ec2ba95.217.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da79842ce2109414f3d22d8d057a74fa6834d0564f65cefc7609348b0bbb6050
+size 88

tensorboard/events.out.tfevents.1768323216.6ec37ec2ba95.324.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d6eab36efde4b857cc7e5d1e931baac6568e9d7b3d2b83a2b0663cd07690cb99
+size 135

tensorboard/events.out.tfevents.1768323254.6ec37ec2ba95.501.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8c51558b37d7235f244e95101572a614ddd83dcc0da82cc68be39db0f2974c45
+size 135

tensorboard/events.out.tfevents.1768323638.6ec37ec2ba95.678.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9722bfba8665b11089af7438d0d28f3e412a917f90090109dd167e2c7825351
+size 135

tensorboard/events.out.tfevents.1768323674.6ec37ec2ba95.851.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ccef400d06e7a278057a3931d6e1289fc8bab3f66e2544c741334ce1519f65c
+size 135

tensorboard/events.out.tfevents.1768323997.6ec37ec2ba95.1021.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56ed9a581c980c0d759e87e48e3006fdccbf4840ae4dc9ae582adb49b0c5ace8
+size 3584