Upload finetuned ChunkFormer with resume checkpoint epoch=1 val_loss=192.815601
Browse files- FINETUNE_RESUME_README.md +17 -0
- best_checkpoints.json +8 -0
- history.json +9 -0
- resume_checkpoint/best_checkpoints.json +8 -0
- resume_checkpoint/history.json +9 -0
- resume_checkpoint/model_state_dict.pt +3 -0
- resume_checkpoint/optimizer.pt +3 -0
- resume_checkpoint/scaler.pt +3 -0
- resume_checkpoint/scheduler.pt +3 -0
- resume_checkpoint/trainer_state.json +41 -0
- resume_checkpoint/training_checkpoint.pt +3 -0
- trainer_state.json +41 -0
FINETUNE_RESUME_README.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ChunkFormer fine-tuned checkpoint
|
| 2 |
+
|
| 3 |
+
This repo contains:
|
| 4 |
+
|
| 5 |
+
- `pytorch_model.bin`: model weights for inference / loading model.
|
| 6 |
+
- `resume_checkpoint/training_checkpoint.pt`: full checkpoint for resuming training.
|
| 7 |
+
- `resume_checkpoint/model_state_dict.pt`: model state dict only.
|
| 8 |
+
- `resume_checkpoint/optimizer.pt`: optimizer state, if available.
|
| 9 |
+
- `resume_checkpoint/scheduler.pt`: scheduler state, if available.
|
| 10 |
+
- `resume_checkpoint/scaler.pt`: AMP GradScaler state, if available.
|
| 11 |
+
- `best_checkpoints.json`: validation loss metadata.
|
| 12 |
+
|
| 13 |
+
Best epoch: 1
|
| 14 |
+
Best val_loss: 192.81560051995353
|
| 15 |
+
Train loss: inf
|
| 16 |
+
|
| 17 |
+
Base model: khanhld/chunkformer-ctc-large-vie
|
best_checkpoints.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 1,
|
| 4 |
+
"val_loss": 192.81560051995353,
|
| 5 |
+
"train_loss": Infinity,
|
| 6 |
+
"path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt"
|
| 7 |
+
}
|
| 8 |
+
]
|
history.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 1,
|
| 4 |
+
"train_loss": 326.93824258185526,
|
| 5 |
+
"val_loss": 192.81560051995353,
|
| 6 |
+
"lr": 1.0515468292711804e-07,
|
| 7 |
+
"encoder_trainable": false
|
| 8 |
+
}
|
| 9 |
+
]
|
resume_checkpoint/best_checkpoints.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 1,
|
| 4 |
+
"val_loss": 192.81560051995353,
|
| 5 |
+
"train_loss": Infinity,
|
| 6 |
+
"path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt"
|
| 7 |
+
}
|
| 8 |
+
]
|
resume_checkpoint/history.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 1,
|
| 4 |
+
"train_loss": 326.93824258185526,
|
| 5 |
+
"val_loss": 192.81560051995353,
|
| 6 |
+
"lr": 1.0515468292711804e-07,
|
| 7 |
+
"encoder_trainable": false
|
| 8 |
+
}
|
| 9 |
+
]
|
resume_checkpoint/model_state_dict.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2280877d48f58d07ad9576af0be61499fdeb807678d9473b50cad58e55d4c43
|
| 3 |
+
size 595483284
|
resume_checkpoint/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:145fa8b792a413b1c3fb509b27ac429040314b6cd4f25e7bae1da77bba91fc75
|
| 3 |
+
size 287970059
|
resume_checkpoint/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:294e0d71ff4bf6c9a330f45f3200b5eb1db565d454c054091a0d21922053a55e
|
| 3 |
+
size 1383
|
resume_checkpoint/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03775b05773d0e09256e22a3ef3692425bdf8073bd7a3fb9e193db74e54a1260
|
| 3 |
+
size 1401
|
resume_checkpoint/trainer_state.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resume_type": "chunkformer_finetune_resume",
|
| 3 |
+
"base_repo": "khanhld/chunkformer-ctc-large-vie",
|
| 4 |
+
"repo_id": "efrainmain/chunkformer-ctc-vie-medical",
|
| 5 |
+
"best_checkpoint_path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt",
|
| 6 |
+
"checkpoint_file_used": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt",
|
| 7 |
+
"epoch": 1,
|
| 8 |
+
"best_epoch": 1,
|
| 9 |
+
"best_val_loss": 192.81560051995353,
|
| 10 |
+
"val_loss": 192.81560051995353,
|
| 11 |
+
"train_loss": Infinity,
|
| 12 |
+
"has_optimizer_state": true,
|
| 13 |
+
"has_scheduler_state": true,
|
| 14 |
+
"has_scaler_state": true,
|
| 15 |
+
"config": {
|
| 16 |
+
"model_name": "khanhld/chunkformer-ctc-large-vie",
|
| 17 |
+
"dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
|
| 18 |
+
"train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
|
| 19 |
+
"validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
|
| 20 |
+
"test_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/test.parquet",
|
| 21 |
+
"preprocess_root": "/kaggle/working/chunkformer_preprocessed",
|
| 22 |
+
"output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
|
| 23 |
+
"batch_size": 8,
|
| 24 |
+
"num_epochs": 1,
|
| 25 |
+
"learning_rate": 1e-05,
|
| 26 |
+
"weight_decay": 0.01,
|
| 27 |
+
"scheduler_type": "warmup_cosine",
|
| 28 |
+
"warmup_steps": 500,
|
| 29 |
+
"min_learning_rate": 1e-07,
|
| 30 |
+
"freeze_encoder_epochs": 2,
|
| 31 |
+
"patience": 3,
|
| 32 |
+
"keep_best": 2,
|
| 33 |
+
"max_grad_norm": 5.0,
|
| 34 |
+
"num_workers": 0,
|
| 35 |
+
"device": "cuda",
|
| 36 |
+
"use_amp": true,
|
| 37 |
+
"skip_bad_samples": true,
|
| 38 |
+
"max_train_batches": null,
|
| 39 |
+
"max_eval_batches": null
|
| 40 |
+
}
|
| 41 |
+
}
|
resume_checkpoint/training_checkpoint.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2d252e5d611ddeffabfb89f1f618b2f651429e40209f85264c01d8eeb93be0d
|
| 3 |
+
size 883464805
|
trainer_state.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"resume_type": "chunkformer_finetune_resume",
|
| 3 |
+
"base_repo": "khanhld/chunkformer-ctc-large-vie",
|
| 4 |
+
"repo_id": "efrainmain/chunkformer-ctc-vie-medical",
|
| 5 |
+
"best_checkpoint_path": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt",
|
| 6 |
+
"checkpoint_file_used": "/kaggle/working/chunkformer-ctc-large-vie-finetune/epoch_1_val_loss_192.8156.pt",
|
| 7 |
+
"epoch": 1,
|
| 8 |
+
"best_epoch": 1,
|
| 9 |
+
"best_val_loss": 192.81560051995353,
|
| 10 |
+
"val_loss": 192.81560051995353,
|
| 11 |
+
"train_loss": Infinity,
|
| 12 |
+
"has_optimizer_state": true,
|
| 13 |
+
"has_scheduler_state": true,
|
| 14 |
+
"has_scaler_state": true,
|
| 15 |
+
"config": {
|
| 16 |
+
"model_name": "khanhld/chunkformer-ctc-large-vie",
|
| 17 |
+
"dataset_root": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset",
|
| 18 |
+
"train_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/train.parquet",
|
| 19 |
+
"validation_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/validation.parquet",
|
| 20 |
+
"test_parquet": "/kaggle/input/datasets/lqucng/vietmed-vimedcss-dataset/test.parquet",
|
| 21 |
+
"preprocess_root": "/kaggle/working/chunkformer_preprocessed",
|
| 22 |
+
"output_dir": "/kaggle/working/chunkformer-ctc-large-vie-finetune",
|
| 23 |
+
"batch_size": 8,
|
| 24 |
+
"num_epochs": 1,
|
| 25 |
+
"learning_rate": 1e-05,
|
| 26 |
+
"weight_decay": 0.01,
|
| 27 |
+
"scheduler_type": "warmup_cosine",
|
| 28 |
+
"warmup_steps": 500,
|
| 29 |
+
"min_learning_rate": 1e-07,
|
| 30 |
+
"freeze_encoder_epochs": 2,
|
| 31 |
+
"patience": 3,
|
| 32 |
+
"keep_best": 2,
|
| 33 |
+
"max_grad_norm": 5.0,
|
| 34 |
+
"num_workers": 0,
|
| 35 |
+
"device": "cuda",
|
| 36 |
+
"use_amp": true,
|
| 37 |
+
"skip_bad_samples": true,
|
| 38 |
+
"max_train_batches": null,
|
| 39 |
+
"max_eval_batches": null
|
| 40 |
+
}
|
| 41 |
+
}
|