efrainmain commited on
Commit
90b12e8
·
verified ·
1 Parent(s): b15143a

Upload ChunkFormer resume checkpoint latest_epoch=2 best_epoch=2 best_val_loss=155.457254

Browse files
FINETUNE_RESUME_README.md CHANGED
@@ -2,16 +2,21 @@
2
 
3
  This repo contains:
4
 
5
- - `pytorch_model.bin`: model weights for inference / loading model.
6
- - `resume_checkpoint/training_checkpoint.pt`: full checkpoint for resuming training.
7
- - `resume_checkpoint/model_state_dict.pt`: model state dict only.
8
- - `resume_checkpoint/optimizer.pt`: optimizer state, if available.
9
- - `resume_checkpoint/scheduler.pt`: scheduler state, if available.
10
- - `resume_checkpoint/scaler.pt`: AMP GradScaler state, if available.
11
  - `best_checkpoints.json`: validation loss metadata.
 
12
 
13
- Best epoch: 1
14
- Best val_loss: 192.81560051995353
15
- Train loss: inf
 
 
 
 
16
 
17
  Base model: khanhld/chunkformer-ctc-large-vie
 
2
 
3
  This repo contains:
4
 
5
+ - `pytorch_model.bin`: best model weights for inference / `ChunkFormerModel.from_pretrained`.
6
+ - `resume_checkpoint/training_checkpoint.pt`: latest full checkpoint for resuming training.
7
+ - `resume_checkpoint/model_state_dict.pt`: latest model state dict only.
8
+ - `resume_checkpoint/optimizer.pt`: latest optimizer state, if available.
9
+ - `resume_checkpoint/scheduler.pt`: latest scheduler state, if available.
10
+ - `resume_checkpoint/scaler.pt`: latest AMP GradScaler state, if available.
11
  - `best_checkpoints.json`: validation loss metadata.
12
+ - `history.json`: training history.
13
 
14
+ Latest trained epoch: 2
15
+ Latest train_loss: 178.68146068524644
16
+ Latest val_loss: 155.45725427576014
17
+
18
+ Best epoch: 2
19
+ Best train_loss: 178.68146068524644
20
+ Best val_loss: 155.45725427576014
21
 
22
  Base model: khanhld/chunkformer-ctc-large-vie
best_checkpoints.json CHANGED
@@ -1,8 +1,15 @@
1
  [
2
  {
 
 
 
 
 
 
 
3
  "epoch": 1,
4
- "val_loss": 192.81560051995353,
5
  "train_loss": 326.93824258185526,
6
- "path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt"
 
7
  }
8
  ]
 
1
  [
2
  {
3
+ "path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
4
+ "val_loss": 155.45725427576014,
5
+ "train_loss": 178.68146068524644,
6
+ "epoch": 2
7
+ },
8
+ {
9
+ "path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/loaded_epoch_1_val_loss_192.8156.pt",
10
  "epoch": 1,
 
11
  "train_loss": 326.93824258185526,
12
+ "val_loss": 192.81560051995353,
13
+ "source": "/root/.cache/huggingface/hub/models--efrainmain--chunkformer-ctc-vie-medical/snapshots/b15143a6a23e8174fb296dccfcfce9807e7456b1/resume_checkpoint/training_checkpoint.pt"
14
  }
15
  ]
history.json CHANGED
@@ -5,5 +5,12 @@
5
  "val_loss": 192.81560051995353,
6
  "lr": 1.0515468292711804e-07,
7
  "encoder_trainable": false
 
 
 
 
 
 
 
8
  }
9
  ]
 
5
  "val_loss": 192.81560051995353,
6
  "lr": 1.0515468292711804e-07,
7
  "encoder_trainable": false
8
+ },
9
+ {
10
+ "epoch": 2,
11
+ "train_loss": 178.68146068524644,
12
+ "val_loss": 155.45725427576014,
13
+ "lr": 9.865673153324975e-06,
14
+ "encoder_trainable": false
15
  }
16
  ]
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a059c4c310eefc7b0a3617969f66065cd77714ccaa124ff6919abb559221deab
3
  size 595480839
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7afaab521a8f916866676d34770baf1969fabac652cd1ae34c141c738a8e2329
3
  size 595480839
resume_checkpoint/best_checkpoints.json CHANGED
@@ -1,8 +1,15 @@
1
  [
2
  {
 
 
 
 
 
 
 
3
  "epoch": 1,
4
- "val_loss": 192.81560051995353,
5
  "train_loss": 326.93824258185526,
6
- "path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt"
 
7
  }
8
  ]
 
1
  [
2
  {
3
+ "path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
4
+ "val_loss": 155.45725427576014,
5
+ "train_loss": 178.68146068524644,
6
+ "epoch": 2
7
+ },
8
+ {
9
+ "path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/loaded_epoch_1_val_loss_192.8156.pt",
10
  "epoch": 1,
 
11
  "train_loss": 326.93824258185526,
12
+ "val_loss": 192.81560051995353,
13
+ "source": "/root/.cache/huggingface/hub/models--efrainmain--chunkformer-ctc-vie-medical/snapshots/b15143a6a23e8174fb296dccfcfce9807e7456b1/resume_checkpoint/training_checkpoint.pt"
14
  }
15
  ]
resume_checkpoint/history.json CHANGED
@@ -5,5 +5,12 @@
5
  "val_loss": 192.81560051995353,
6
  "lr": 1.0515468292711804e-07,
7
  "encoder_trainable": false
 
 
 
 
 
 
 
8
  }
9
  ]
 
5
  "val_loss": 192.81560051995353,
6
  "lr": 1.0515468292711804e-07,
7
  "encoder_trainable": false
8
+ },
9
+ {
10
+ "epoch": 2,
11
+ "train_loss": 178.68146068524644,
12
+ "val_loss": 155.45725427576014,
13
+ "lr": 9.865673153324975e-06,
14
+ "encoder_trainable": false
15
  }
16
  ]
resume_checkpoint/model_state_dict.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2280877d48f58d07ad9576af0be61499fdeb807678d9473b50cad58e55d4c43
3
- size 595483284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a67dc69aaaf40495ef6263463b6c9988a08f06963f72f26eebef17284144e9b6
3
+ size 595534292
resume_checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:145fa8b792a413b1c3fb509b27ac429040314b6cd4f25e7bae1da77bba91fc75
3
  size 287970059
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06656745b9befaf9f5060e966e9d02d239c1787212aadadc2ab7c1728abc94cd
3
  size 287970059
resume_checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:294e0d71ff4bf6c9a330f45f3200b5eb1db565d454c054091a0d21922053a55e
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43919c586204734cd7bf1a30e3768b07e6bcd0740d94d45727b84a0415abc6fb
3
  size 1383
resume_checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03775b05773d0e09256e22a3ef3692425bdf8073bd7a3fb9e193db74e54a1260
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a88b5f1832222e7af4ace6e567564ed23150980548b1849d2cde15fec3222429
3
  size 1401
resume_checkpoint/trainer_state.json CHANGED
@@ -2,18 +2,25 @@
2
  "resume_type": "chunkformer_finetune_resume",
3
  "base_repo": "khanhld/chunkformer-ctc-large-vie",
4
  "repo_id": "efrainmain/chunkformer-ctc-vie-medical",
5
- "best_checkpoint_path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt",
6
- "checkpoint_file_used": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt",
7
- "epoch": 1,
8
- "best_epoch": 1,
9
- "best_val_loss": 192.81560051995353,
10
- "val_loss": 192.81560051995353,
11
- "train_loss": 326.93824258185526,
 
 
 
12
  "has_optimizer_state": true,
13
  "has_scheduler_state": true,
14
  "has_scaler_state": true,
15
  "config": {
16
- "model_name": "khanhld/chunkformer-ctc-large-vie",
 
 
 
 
17
  "dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
18
  "train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
19
  "validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
@@ -21,12 +28,13 @@
21
  "preprocess_root": "/kaggle/working/chunkformer_preprocessed",
22
  "output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
23
  "batch_size": 8,
24
- "num_epochs": 1,
25
  "learning_rate": 1e-05,
26
  "weight_decay": 0.01,
27
  "scheduler_type": "warmup_cosine",
28
  "warmup_steps": 500,
29
  "min_learning_rate": 1e-07,
 
30
  "freeze_encoder_epochs": 2,
31
  "patience": 3,
32
  "keep_best": 2,
@@ -36,7 +44,11 @@
36
  "use_amp": true,
37
  "skip_bad_samples": true,
38
  "max_train_batches": null,
39
- "max_eval_batches": null
40
- },
41
- "best_train_loss": 326.93824258185526
 
 
 
 
42
  }
 
2
  "resume_type": "chunkformer_finetune_resume",
3
  "base_repo": "khanhld/chunkformer-ctc-large-vie",
4
  "repo_id": "efrainmain/chunkformer-ctc-vie-medical",
5
+ "epoch": 2,
6
+ "trained_epochs": 2,
7
+ "next_epoch": 3,
8
+ "train_loss": 178.68146068524644,
9
+ "val_loss": 155.45725427576014,
10
+ "best_checkpoint_path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
11
+ "checkpoint_file_used_for_best_model": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
12
+ "best_epoch": 2,
13
+ "best_val_loss": 155.45725427576014,
14
+ "best_train_loss": 178.68146068524644,
15
  "has_optimizer_state": true,
16
  "has_scheduler_state": true,
17
  "has_scaler_state": true,
18
  "config": {
19
+ "base_model_name": "khanhld/chunkformer-ctc-large-vie",
20
+ "resume_from_checkpoint": true,
21
+ "resume_repo_id": "efrainmain/chunkformer-ctc-vie-medical",
22
+ "resume_checkpoint_file": "resume_checkpoint/training_checkpoint.pt",
23
+ "model_name": "efrainmain/chunkformer-ctc-vie-medical",
24
  "dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
25
  "train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
26
  "validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
 
28
  "preprocess_root": "/kaggle/working/chunkformer_preprocessed",
29
  "output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
30
  "batch_size": 8,
31
+ "num_epochs": 3,
32
  "learning_rate": 1e-05,
33
  "weight_decay": 0.01,
34
  "scheduler_type": "warmup_cosine",
35
  "warmup_steps": 500,
36
  "min_learning_rate": 1e-07,
37
+ "freeze_encoder": true,
38
  "freeze_encoder_epochs": 2,
39
  "patience": 3,
40
  "keep_best": 2,
 
44
  "use_amp": true,
45
  "skip_bad_samples": true,
46
  "max_train_batches": null,
47
+ "max_eval_batches": null,
48
+ "resume_load_optimizer": true,
49
+ "resume_load_scheduler": true,
50
+ "resume_load_scaler": true,
51
+ "strict_resume_model_load": false,
52
+ "save_loaded_checkpoint_locally": true
53
+ }
54
  }
resume_checkpoint/training_checkpoint.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2d252e5d611ddeffabfb89f1f618b2f651429e40209f85264c01d8eeb93be0d
3
- size 883464805
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbd83dfb6087d576c758f88b49a5bdcf042ae538c0ee4c61f4e354ee82a926a1
3
+ size 883517285
trainer_state.json CHANGED
@@ -2,18 +2,25 @@
2
  "resume_type": "chunkformer_finetune_resume",
3
  "base_repo": "khanhld/chunkformer-ctc-large-vie",
4
  "repo_id": "efrainmain/chunkformer-ctc-vie-medical",
5
- "best_checkpoint_path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt",
6
- "checkpoint_file_used": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt",
7
- "epoch": 1,
8
- "best_epoch": 1,
9
- "best_val_loss": 192.81560051995353,
10
- "val_loss": 192.81560051995353,
11
- "train_loss": 326.93824258185526,
 
 
 
12
  "has_optimizer_state": true,
13
  "has_scheduler_state": true,
14
  "has_scaler_state": true,
15
  "config": {
16
- "model_name": "khanhld/chunkformer-ctc-large-vie",
 
 
 
 
17
  "dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
18
  "train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
19
  "validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
@@ -21,12 +28,13 @@
21
  "preprocess_root": "/kaggle/working/chunkformer_preprocessed",
22
  "output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
23
  "batch_size": 8,
24
- "num_epochs": 1,
25
  "learning_rate": 1e-05,
26
  "weight_decay": 0.01,
27
  "scheduler_type": "warmup_cosine",
28
  "warmup_steps": 500,
29
  "min_learning_rate": 1e-07,
 
30
  "freeze_encoder_epochs": 2,
31
  "patience": 3,
32
  "keep_best": 2,
@@ -36,7 +44,11 @@
36
  "use_amp": true,
37
  "skip_bad_samples": true,
38
  "max_train_batches": null,
39
- "max_eval_batches": null
40
- },
41
- "best_train_loss": 326.93824258185526
 
 
 
 
42
  }
 
2
  "resume_type": "chunkformer_finetune_resume",
3
  "base_repo": "khanhld/chunkformer-ctc-large-vie",
4
  "repo_id": "efrainmain/chunkformer-ctc-vie-medical",
5
+ "epoch": 2,
6
+ "trained_epochs": 2,
7
+ "next_epoch": 3,
8
+ "train_loss": 178.68146068524644,
9
+ "val_loss": 155.45725427576014,
10
+ "best_checkpoint_path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
11
+ "checkpoint_file_used_for_best_model": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
12
+ "best_epoch": 2,
13
+ "best_val_loss": 155.45725427576014,
14
+ "best_train_loss": 178.68146068524644,
15
  "has_optimizer_state": true,
16
  "has_scheduler_state": true,
17
  "has_scaler_state": true,
18
  "config": {
19
+ "base_model_name": "khanhld/chunkformer-ctc-large-vie",
20
+ "resume_from_checkpoint": true,
21
+ "resume_repo_id": "efrainmain/chunkformer-ctc-vie-medical",
22
+ "resume_checkpoint_file": "resume_checkpoint/training_checkpoint.pt",
23
+ "model_name": "efrainmain/chunkformer-ctc-vie-medical",
24
  "dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
25
  "train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
26
  "validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
 
28
  "preprocess_root": "/kaggle/working/chunkformer_preprocessed",
29
  "output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
30
  "batch_size": 8,
31
+ "num_epochs": 3,
32
  "learning_rate": 1e-05,
33
  "weight_decay": 0.01,
34
  "scheduler_type": "warmup_cosine",
35
  "warmup_steps": 500,
36
  "min_learning_rate": 1e-07,
37
+ "freeze_encoder": true,
38
  "freeze_encoder_epochs": 2,
39
  "patience": 3,
40
  "keep_best": 2,
 
44
  "use_amp": true,
45
  "skip_bad_samples": true,
46
  "max_train_batches": null,
47
+ "max_eval_batches": null,
48
+ "resume_load_optimizer": true,
49
+ "resume_load_scheduler": true,
50
+ "resume_load_scaler": true,
51
+ "strict_resume_model_load": false,
52
+ "save_loaded_checkpoint_locally": true
53
+ }
54
  }