Upload ChunkFormer resume checkpoint latest_epoch=2 best_epoch=2 best_val_loss=155.457254
Browse files- FINETUNE_RESUME_README.md +14 -9
- best_checkpoints.json +9 -2
- history.json +7 -0
- pytorch_model.bin +1 -1
- resume_checkpoint/best_checkpoints.json +9 -2
- resume_checkpoint/history.json +7 -0
- resume_checkpoint/model_state_dict.pt +2 -2
- resume_checkpoint/optimizer.pt +1 -1
- resume_checkpoint/scaler.pt +1 -1
- resume_checkpoint/scheduler.pt +1 -1
- resume_checkpoint/trainer_state.json +24 -12
- resume_checkpoint/training_checkpoint.pt +2 -2
- trainer_state.json +24 -12
FINETUNE_RESUME_README.md
CHANGED
|
@@ -2,16 +2,21 @@
|
|
| 2 |
|
| 3 |
This repo contains:
|
| 4 |
|
| 5 |
-
- `pytorch_model.bin`: model weights for inference /
|
| 6 |
-
- `resume_checkpoint/training_checkpoint.pt`: full checkpoint for resuming training.
|
| 7 |
-
- `resume_checkpoint/model_state_dict.pt`: model state dict only.
|
| 8 |
-
- `resume_checkpoint/optimizer.pt`: optimizer state, if available.
|
| 9 |
-
- `resume_checkpoint/scheduler.pt`: scheduler state, if available.
|
| 10 |
-
- `resume_checkpoint/scaler.pt`: AMP GradScaler state, if available.
|
| 11 |
- `best_checkpoints.json`: validation loss metadata.
|
|
|
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
Base model: khanhld/chunkformer-ctc-large-vie
|
|
|
|
| 2 |
|
| 3 |
This repo contains:
|
| 4 |
|
| 5 |
+
- `pytorch_model.bin`: best model weights for inference / `ChunkFormerModel.from_pretrained`.
|
| 6 |
+
- `resume_checkpoint/training_checkpoint.pt`: latest full checkpoint for resuming training.
|
| 7 |
+
- `resume_checkpoint/model_state_dict.pt`: latest model state dict only.
|
| 8 |
+
- `resume_checkpoint/optimizer.pt`: latest optimizer state, if available.
|
| 9 |
+
- `resume_checkpoint/scheduler.pt`: latest scheduler state, if available.
|
| 10 |
+
- `resume_checkpoint/scaler.pt`: latest AMP GradScaler state, if available.
|
| 11 |
- `best_checkpoints.json`: validation loss metadata.
|
| 12 |
+
- `history.json`: training history.
|
| 13 |
|
| 14 |
+
Latest trained epoch: 2
|
| 15 |
+
Latest train_loss: 178.68146068524644
|
| 16 |
+
Latest val_loss: 155.45725427576014
|
| 17 |
+
|
| 18 |
+
Best epoch: 2
|
| 19 |
+
Best train_loss: 178.68146068524644
|
| 20 |
+
Best val_loss: 155.45725427576014
|
| 21 |
|
| 22 |
Base model: khanhld/chunkformer-ctc-large-vie
|
best_checkpoints.json
CHANGED
|
@@ -1,8 +1,15 @@
|
|
| 1 |
[
|
| 2 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"epoch": 1,
|
| 4 |
-
"val_loss": 192.81560051995353,
|
| 5 |
"train_loss": 326.93824258185526,
|
| 6 |
-
"
|
|
|
|
| 7 |
}
|
| 8 |
]
|
|
|
|
| 1 |
[
|
| 2 |
{
|
| 3 |
+
"path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
|
| 4 |
+
"val_loss": 155.45725427576014,
|
| 5 |
+
"train_loss": 178.68146068524644,
|
| 6 |
+
"epoch": 2
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/loaded_epoch_1_val_loss_192.8156.pt",
|
| 10 |
"epoch": 1,
|
|
|
|
| 11 |
"train_loss": 326.93824258185526,
|
| 12 |
+
"val_loss": 192.81560051995353,
|
| 13 |
+
"source": "/root/.cache/huggingface/hub/models--efrainmain--chunkformer-ctc-vie-medical/snapshots/b15143a6a23e8174fb296dccfcfce9807e7456b1/resume_checkpoint/training_checkpoint.pt"
|
| 14 |
}
|
| 15 |
]
|
history.json
CHANGED
|
@@ -5,5 +5,12 @@
|
|
| 5 |
"val_loss": 192.81560051995353,
|
| 6 |
"lr": 1.0515468292711804e-07,
|
| 7 |
"encoder_trainable": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
}
|
| 9 |
]
|
|
|
|
| 5 |
"val_loss": 192.81560051995353,
|
| 6 |
"lr": 1.0515468292711804e-07,
|
| 7 |
"encoder_trainable": false
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"epoch": 2,
|
| 11 |
+
"train_loss": 178.68146068524644,
|
| 12 |
+
"val_loss": 155.45725427576014,
|
| 13 |
+
"lr": 9.865673153324975e-06,
|
| 14 |
+
"encoder_trainable": false
|
| 15 |
}
|
| 16 |
]
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 595480839
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7afaab521a8f916866676d34770baf1969fabac652cd1ae34c141c738a8e2329
|
| 3 |
size 595480839
|
resume_checkpoint/best_checkpoints.json
CHANGED
|
@@ -1,8 +1,15 @@
|
|
| 1 |
[
|
| 2 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"epoch": 1,
|
| 4 |
-
"val_loss": 192.81560051995353,
|
| 5 |
"train_loss": 326.93824258185526,
|
| 6 |
-
"
|
|
|
|
| 7 |
}
|
| 8 |
]
|
|
|
|
| 1 |
[
|
| 2 |
{
|
| 3 |
+
"path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
|
| 4 |
+
"val_loss": 155.45725427576014,
|
| 5 |
+
"train_loss": 178.68146068524644,
|
| 6 |
+
"epoch": 2
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/loaded_epoch_1_val_loss_192.8156.pt",
|
| 10 |
"epoch": 1,
|
|
|
|
| 11 |
"train_loss": 326.93824258185526,
|
| 12 |
+
"val_loss": 192.81560051995353,
|
| 13 |
+
"source": "/root/.cache/huggingface/hub/models--efrainmain--chunkformer-ctc-vie-medical/snapshots/b15143a6a23e8174fb296dccfcfce9807e7456b1/resume_checkpoint/training_checkpoint.pt"
|
| 14 |
}
|
| 15 |
]
|
resume_checkpoint/history.json
CHANGED
|
@@ -5,5 +5,12 @@
|
|
| 5 |
"val_loss": 192.81560051995353,
|
| 6 |
"lr": 1.0515468292711804e-07,
|
| 7 |
"encoder_trainable": false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
}
|
| 9 |
]
|
|
|
|
| 5 |
"val_loss": 192.81560051995353,
|
| 6 |
"lr": 1.0515468292711804e-07,
|
| 7 |
"encoder_trainable": false
|
| 8 |
+
},
|
| 9 |
+
{
|
| 10 |
+
"epoch": 2,
|
| 11 |
+
"train_loss": 178.68146068524644,
|
| 12 |
+
"val_loss": 155.45725427576014,
|
| 13 |
+
"lr": 9.865673153324975e-06,
|
| 14 |
+
"encoder_trainable": false
|
| 15 |
}
|
| 16 |
]
|
resume_checkpoint/model_state_dict.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a67dc69aaaf40495ef6263463b6c9988a08f06963f72f26eebef17284144e9b6
|
| 3 |
+
size 595534292
|
resume_checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 287970059
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06656745b9befaf9f5060e966e9d02d239c1787212aadadc2ab7c1728abc94cd
|
| 3 |
size 287970059
|
resume_checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43919c586204734cd7bf1a30e3768b07e6bcd0740d94d45727b84a0415abc6fb
|
| 3 |
size 1383
|
resume_checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1401
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a88b5f1832222e7af4ace6e567564ed23150980548b1849d2cde15fec3222429
|
| 3 |
size 1401
|
resume_checkpoint/trainer_state.json
CHANGED
|
@@ -2,18 +2,25 @@
|
|
| 2 |
"resume_type": "chunkformer_finetune_resume",
|
| 3 |
"base_repo": "khanhld/chunkformer-ctc-large-vie",
|
| 4 |
"repo_id": "efrainmain/chunkformer-ctc-vie-medical",
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"
|
|
|
|
|
|
|
|
|
|
| 12 |
"has_optimizer_state": true,
|
| 13 |
"has_scheduler_state": true,
|
| 14 |
"has_scaler_state": true,
|
| 15 |
"config": {
|
| 16 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
"dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
|
| 18 |
"train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
|
| 19 |
"validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
|
|
@@ -21,12 +28,13 @@
|
|
| 21 |
"preprocess_root": "/kaggle/working/chunkformer_preprocessed",
|
| 22 |
"output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
|
| 23 |
"batch_size": 8,
|
| 24 |
-
"num_epochs":
|
| 25 |
"learning_rate": 1e-05,
|
| 26 |
"weight_decay": 0.01,
|
| 27 |
"scheduler_type": "warmup_cosine",
|
| 28 |
"warmup_steps": 500,
|
| 29 |
"min_learning_rate": 1e-07,
|
|
|
|
| 30 |
"freeze_encoder_epochs": 2,
|
| 31 |
"patience": 3,
|
| 32 |
"keep_best": 2,
|
|
@@ -36,7 +44,11 @@
|
|
| 36 |
"use_amp": true,
|
| 37 |
"skip_bad_samples": true,
|
| 38 |
"max_train_batches": null,
|
| 39 |
-
"max_eval_batches": null
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
}
|
|
|
|
| 2 |
"resume_type": "chunkformer_finetune_resume",
|
| 3 |
"base_repo": "khanhld/chunkformer-ctc-large-vie",
|
| 4 |
"repo_id": "efrainmain/chunkformer-ctc-vie-medical",
|
| 5 |
+
"epoch": 2,
|
| 6 |
+
"trained_epochs": 2,
|
| 7 |
+
"next_epoch": 3,
|
| 8 |
+
"train_loss": 178.68146068524644,
|
| 9 |
+
"val_loss": 155.45725427576014,
|
| 10 |
+
"best_checkpoint_path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
|
| 11 |
+
"checkpoint_file_used_for_best_model": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
|
| 12 |
+
"best_epoch": 2,
|
| 13 |
+
"best_val_loss": 155.45725427576014,
|
| 14 |
+
"best_train_loss": 178.68146068524644,
|
| 15 |
"has_optimizer_state": true,
|
| 16 |
"has_scheduler_state": true,
|
| 17 |
"has_scaler_state": true,
|
| 18 |
"config": {
|
| 19 |
+
"base_model_name": "khanhld/chunkformer-ctc-large-vie",
|
| 20 |
+
"resume_from_checkpoint": true,
|
| 21 |
+
"resume_repo_id": "efrainmain/chunkformer-ctc-vie-medical",
|
| 22 |
+
"resume_checkpoint_file": "resume_checkpoint/training_checkpoint.pt",
|
| 23 |
+
"model_name": "efrainmain/chunkformer-ctc-vie-medical",
|
| 24 |
"dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
|
| 25 |
"train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
|
| 26 |
"validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
|
|
|
|
| 28 |
"preprocess_root": "/kaggle/working/chunkformer_preprocessed",
|
| 29 |
"output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
|
| 30 |
"batch_size": 8,
|
| 31 |
+
"num_epochs": 3,
|
| 32 |
"learning_rate": 1e-05,
|
| 33 |
"weight_decay": 0.01,
|
| 34 |
"scheduler_type": "warmup_cosine",
|
| 35 |
"warmup_steps": 500,
|
| 36 |
"min_learning_rate": 1e-07,
|
| 37 |
+
"freeze_encoder": true,
|
| 38 |
"freeze_encoder_epochs": 2,
|
| 39 |
"patience": 3,
|
| 40 |
"keep_best": 2,
|
|
|
|
| 44 |
"use_amp": true,
|
| 45 |
"skip_bad_samples": true,
|
| 46 |
"max_train_batches": null,
|
| 47 |
+
"max_eval_batches": null,
|
| 48 |
+
"resume_load_optimizer": true,
|
| 49 |
+
"resume_load_scheduler": true,
|
| 50 |
+
"resume_load_scaler": true,
|
| 51 |
+
"strict_resume_model_load": false,
|
| 52 |
+
"save_loaded_checkpoint_locally": true
|
| 53 |
+
}
|
| 54 |
}
|
resume_checkpoint/training_checkpoint.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dbd83dfb6087d576c758f88b49a5bdcf042ae538c0ee4c61f4e354ee82a926a1
|
| 3 |
+
size 883517285
|
trainer_state.json
CHANGED
|
@@ -2,18 +2,25 @@
|
|
| 2 |
"resume_type": "chunkformer_finetune_resume",
|
| 3 |
"base_repo": "khanhld/chunkformer-ctc-large-vie",
|
| 4 |
"repo_id": "efrainmain/chunkformer-ctc-vie-medical",
|
| 5 |
-
"
|
| 6 |
-
"
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"
|
| 10 |
-
"
|
| 11 |
-
"
|
|
|
|
|
|
|
|
|
|
| 12 |
"has_optimizer_state": true,
|
| 13 |
"has_scheduler_state": true,
|
| 14 |
"has_scaler_state": true,
|
| 15 |
"config": {
|
| 16 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
"dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
|
| 18 |
"train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
|
| 19 |
"validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
|
|
@@ -21,12 +28,13 @@
|
|
| 21 |
"preprocess_root": "/kaggle/working/chunkformer_preprocessed",
|
| 22 |
"output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
|
| 23 |
"batch_size": 8,
|
| 24 |
-
"num_epochs":
|
| 25 |
"learning_rate": 1e-05,
|
| 26 |
"weight_decay": 0.01,
|
| 27 |
"scheduler_type": "warmup_cosine",
|
| 28 |
"warmup_steps": 500,
|
| 29 |
"min_learning_rate": 1e-07,
|
|
|
|
| 30 |
"freeze_encoder_epochs": 2,
|
| 31 |
"patience": 3,
|
| 32 |
"keep_best": 2,
|
|
@@ -36,7 +44,11 @@
|
|
| 36 |
"use_amp": true,
|
| 37 |
"skip_bad_samples": true,
|
| 38 |
"max_train_batches": null,
|
| 39 |
-
"max_eval_batches": null
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
}
|
|
|
|
| 2 |
"resume_type": "chunkformer_finetune_resume",
|
| 3 |
"base_repo": "khanhld/chunkformer-ctc-large-vie",
|
| 4 |
"repo_id": "efrainmain/chunkformer-ctc-vie-medical",
|
| 5 |
+
"epoch": 2,
|
| 6 |
+
"trained_epochs": 2,
|
| 7 |
+
"next_epoch": 3,
|
| 8 |
+
"train_loss": 178.68146068524644,
|
| 9 |
+
"val_loss": 155.45725427576014,
|
| 10 |
+
"best_checkpoint_path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
|
| 11 |
+
"checkpoint_file_used_for_best_model": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_2_val_loss_155.4573.pt",
|
| 12 |
+
"best_epoch": 2,
|
| 13 |
+
"best_val_loss": 155.45725427576014,
|
| 14 |
+
"best_train_loss": 178.68146068524644,
|
| 15 |
"has_optimizer_state": true,
|
| 16 |
"has_scheduler_state": true,
|
| 17 |
"has_scaler_state": true,
|
| 18 |
"config": {
|
| 19 |
+
"base_model_name": "khanhld/chunkformer-ctc-large-vie",
|
| 20 |
+
"resume_from_checkpoint": true,
|
| 21 |
+
"resume_repo_id": "efrainmain/chunkformer-ctc-vie-medical",
|
| 22 |
+
"resume_checkpoint_file": "resume_checkpoint/training_checkpoint.pt",
|
| 23 |
+
"model_name": "efrainmain/chunkformer-ctc-vie-medical",
|
| 24 |
"dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
|
| 25 |
"train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
|
| 26 |
"validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
|
|
|
|
| 28 |
"preprocess_root": "/kaggle/working/chunkformer_preprocessed",
|
| 29 |
"output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
|
| 30 |
"batch_size": 8,
|
| 31 |
+
"num_epochs": 3,
|
| 32 |
"learning_rate": 1e-05,
|
| 33 |
"weight_decay": 0.01,
|
| 34 |
"scheduler_type": "warmup_cosine",
|
| 35 |
"warmup_steps": 500,
|
| 36 |
"min_learning_rate": 1e-07,
|
| 37 |
+
"freeze_encoder": true,
|
| 38 |
"freeze_encoder_epochs": 2,
|
| 39 |
"patience": 3,
|
| 40 |
"keep_best": 2,
|
|
|
|
| 44 |
"use_amp": true,
|
| 45 |
"skip_bad_samples": true,
|
| 46 |
"max_train_batches": null,
|
| 47 |
+
"max_eval_batches": null,
|
| 48 |
+
"resume_load_optimizer": true,
|
| 49 |
+
"resume_load_scheduler": true,
|
| 50 |
+
"resume_load_scaler": true,
|
| 51 |
+
"strict_resume_model_load": false,
|
| 52 |
+
"save_loaded_checkpoint_locally": true
|
| 53 |
+
}
|
| 54 |
}
|