Training in progress, epoch 1, checkpoint
Browse files- checkpoint-3599/adapter_model.safetensors +1 -1
- checkpoint-3599/optimizer.pt +1 -1
- checkpoint-3599/rng_state_0.pth +1 -1
- checkpoint-3599/rng_state_1.pth +1 -1
- checkpoint-3599/rng_state_2.pth +1 -1
- checkpoint-3599/rng_state_3.pth +1 -1
- checkpoint-3599/scheduler.pt +1 -1
- checkpoint-3599/trainer_state.json +33 -33
- checkpoint-3599/training_args.bin +1 -1
checkpoint-3599/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 541459256
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98ea57b88e77d0ad7464517a3a05f2d59c6197c3a9f78a086e88b01e15b346f4
|
| 3 |
size 541459256
|
checkpoint-3599/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 33662074
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa303a5c6351d717792656330fd7ca8a3d6f7d5c2dea6d16c3cf1c104ae52c6e
|
| 3 |
size 33662074
|
checkpoint-3599/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8409cfd85ce846fced25a9d710751277f8eb105ecd9b11986a85acca247b7f5d
|
| 3 |
size 15024
|
checkpoint-3599/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:864dbefa7c8251ca68d1d3b4700787ea5586f8e149337d67d4a16769235cdfc0
|
| 3 |
size 15024
|
checkpoint-3599/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ab4464c17dd70ab055e558bb33f713fce0403d324bcb68cbf0b9ade488e1e4f
|
| 3 |
size 15024
|
checkpoint-3599/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d158a9696b46d55b7fe4fd85e5e44cc5b5c857659799d699bf3d2b08ee524153
|
| 3 |
size 15024
|
checkpoint-3599/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:389dc0455bfde9e5bd7f663cc1299e3355b7c51181720b0bb246350e7f42f84d
|
| 3 |
size 1064
|
checkpoint-3599/trainer_state.json
CHANGED
|
@@ -10,83 +10,83 @@
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13892747985551543,
|
| 13 |
-
"grad_norm": 0.
|
| 14 |
-
"learning_rate": 0.
|
| 15 |
-
"loss": 0.
|
| 16 |
"step": 500
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.2000555709919422,
|
| 20 |
-
"eval_loss": 0.
|
| 21 |
-
"eval_runtime":
|
| 22 |
-
"eval_samples_per_second":
|
| 23 |
-
"eval_steps_per_second":
|
| 24 |
"step": 720
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.27785495971103086,
|
| 28 |
-
"grad_norm": 0.
|
| 29 |
"learning_rate": 0.00039075669167361305,
|
| 30 |
-
"loss": 0.
|
| 31 |
"step": 1000
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"epoch": 0.4001111419838844,
|
| 35 |
-
"eval_loss": 0.
|
| 36 |
-
"eval_runtime":
|
| 37 |
-
"eval_samples_per_second":
|
| 38 |
-
"eval_steps_per_second":
|
| 39 |
"step": 1440
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"epoch": 0.41678243956654626,
|
| 43 |
-
"grad_norm": 0.
|
| 44 |
"learning_rate": 0.0003861257756784292,
|
| 45 |
-
"loss": 0.
|
| 46 |
"step": 1500
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 0.5557099194220617,
|
| 50 |
-
"grad_norm": 0.
|
| 51 |
"learning_rate": 0.00038149485968324534,
|
| 52 |
-
"loss": 0.
|
| 53 |
"step": 2000
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"epoch": 0.6001667129758266,
|
| 57 |
-
"eval_loss": 0.
|
| 58 |
-
"eval_runtime":
|
| 59 |
-
"eval_samples_per_second":
|
| 60 |
-
"eval_steps_per_second":
|
| 61 |
"step": 2160
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"epoch": 0.6946373992775771,
|
| 65 |
-
"grad_norm": 0.
|
| 66 |
"learning_rate": 0.0003768639436880615,
|
| 67 |
-
"loss": 0.
|
| 68 |
"step": 2500
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"epoch": 0.8002222839677688,
|
| 72 |
-
"eval_loss": 0.
|
| 73 |
-
"eval_runtime":
|
| 74 |
-
"eval_samples_per_second":
|
| 75 |
-
"eval_steps_per_second":
|
| 76 |
"step": 2880
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"epoch": 0.8335648791330925,
|
| 80 |
-
"grad_norm": 0.
|
| 81 |
-
"learning_rate": 0.
|
| 82 |
-
"loss": 0.
|
| 83 |
"step": 3000
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"epoch": 0.972492358988608,
|
| 87 |
-
"grad_norm": 0.
|
| 88 |
-
"learning_rate": 0.
|
| 89 |
-
"loss": 0.
|
| 90 |
"step": 3500
|
| 91 |
}
|
| 92 |
],
|
|
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13892747985551543,
|
| 13 |
+
"grad_norm": 0.6994414925575256,
|
| 14 |
+
"learning_rate": 0.00039537834583680656,
|
| 15 |
+
"loss": 0.2943,
|
| 16 |
"step": 500
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.2000555709919422,
|
| 20 |
+
"eval_loss": 0.21401312947273254,
|
| 21 |
+
"eval_runtime": 16.5317,
|
| 22 |
+
"eval_samples_per_second": 30.245,
|
| 23 |
+
"eval_steps_per_second": 3.811,
|
| 24 |
"step": 720
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.27785495971103086,
|
| 28 |
+
"grad_norm": 0.5945971608161926,
|
| 29 |
"learning_rate": 0.00039075669167361305,
|
| 30 |
+
"loss": 0.1986,
|
| 31 |
"step": 1000
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"epoch": 0.4001111419838844,
|
| 35 |
+
"eval_loss": 0.17625106871128082,
|
| 36 |
+
"eval_runtime": 16.4823,
|
| 37 |
+
"eval_samples_per_second": 30.336,
|
| 38 |
+
"eval_steps_per_second": 3.822,
|
| 39 |
"step": 1440
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"epoch": 0.41678243956654626,
|
| 43 |
+
"grad_norm": 0.3619824945926666,
|
| 44 |
"learning_rate": 0.0003861257756784292,
|
| 45 |
+
"loss": 0.1802,
|
| 46 |
"step": 1500
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 0.5557099194220617,
|
| 50 |
+
"grad_norm": 0.40362638235092163,
|
| 51 |
"learning_rate": 0.00038149485968324534,
|
| 52 |
+
"loss": 0.1686,
|
| 53 |
"step": 2000
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"epoch": 0.6001667129758266,
|
| 57 |
+
"eval_loss": 0.16156445443630219,
|
| 58 |
+
"eval_runtime": 16.3535,
|
| 59 |
+
"eval_samples_per_second": 30.575,
|
| 60 |
+
"eval_steps_per_second": 3.852,
|
| 61 |
"step": 2160
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"epoch": 0.6946373992775771,
|
| 65 |
+
"grad_norm": 0.4962793290615082,
|
| 66 |
"learning_rate": 0.0003768639436880615,
|
| 67 |
+
"loss": 0.1577,
|
| 68 |
"step": 2500
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"epoch": 0.8002222839677688,
|
| 72 |
+
"eval_loss": 0.15043224394321442,
|
| 73 |
+
"eval_runtime": 16.3325,
|
| 74 |
+
"eval_samples_per_second": 30.614,
|
| 75 |
+
"eval_steps_per_second": 3.857,
|
| 76 |
"step": 2880
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"epoch": 0.8335648791330925,
|
| 80 |
+
"grad_norm": 0.41315382719039917,
|
| 81 |
+
"learning_rate": 0.0003722330276928777,
|
| 82 |
+
"loss": 0.1493,
|
| 83 |
"step": 3000
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"epoch": 0.972492358988608,
|
| 87 |
+
"grad_norm": 0.37953054904937744,
|
| 88 |
+
"learning_rate": 0.0003676113735296842,
|
| 89 |
+
"loss": 0.1477,
|
| 90 |
"step": 3500
|
| 91 |
}
|
| 92 |
],
|
checkpoint-3599/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58052fbf3a2b07f2a6024b5cc28db88f1f0e48109a11483aa716d00657e9906e
|
| 3 |
size 5496
|