Training in progress, step 690000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03c116c94691fe83930a5993880bd8c0998a3e0ec38012be8fad0b40e1c29568
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2b586adc3a504918e31f8a9a7e0f4be94f03956b7f0114b4da9476acb22a6ef
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33a77fe6605383afb833ca53fc48599ad56cfe3eec3e43ed5ccdec337bfa0ca8
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:815de40461412919a5889eec61609965c9d2866910047894a4dc0f6b20abb0b4
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4ae17687fd76755d54219d89fc11c8946b10678e74c0c01048fa01e50274084
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c17905fc96ebc0f02fa95bfe12b431ac787bae4299fa05067fe9a564d5bb62cc
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4ee1c07d4ce16af70500d9164050fdd6814fe5b1c70d5ddc9dc0d403bb72893
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -5038,11 +5038,85 @@
|
|
| 5038 |
"eval_samples_per_second": 1352.617,
|
| 5039 |
"eval_steps_per_second": 21.642,
|
| 5040 |
"step": 680000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5041 |
}
|
| 5042 |
],
|
| 5043 |
"max_steps": 1000000,
|
| 5044 |
"num_train_epochs": 16,
|
| 5045 |
-
"total_flos": 4.
|
| 5046 |
"trial_name": null,
|
| 5047 |
"trial_params": null
|
| 5048 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.536442347336113,
|
| 5 |
+
"global_step": 690000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 5038 |
"eval_samples_per_second": 1352.617,
|
| 5039 |
"eval_steps_per_second": 21.642,
|
| 5040 |
"step": 680000
|
| 5041 |
+
},
|
| 5042 |
+
{
|
| 5043 |
+
"epoch": 10.4,
|
| 5044 |
+
"learning_rate": 4.546880660622845e-05,
|
| 5045 |
+
"loss": 0.2463,
|
| 5046 |
+
"step": 681000
|
| 5047 |
+
},
|
| 5048 |
+
{
|
| 5049 |
+
"epoch": 10.41,
|
| 5050 |
+
"learning_rate": 4.5267636108191036e-05,
|
| 5051 |
+
"loss": 0.2466,
|
| 5052 |
+
"step": 682000
|
| 5053 |
+
},
|
| 5054 |
+
{
|
| 5055 |
+
"epoch": 10.43,
|
| 5056 |
+
"learning_rate": 4.5066845437720555e-05,
|
| 5057 |
+
"loss": 0.2462,
|
| 5058 |
+
"step": 683000
|
| 5059 |
+
},
|
| 5060 |
+
{
|
| 5061 |
+
"epoch": 10.44,
|
| 5062 |
+
"learning_rate": 4.4866436790631564e-05,
|
| 5063 |
+
"loss": 0.2463,
|
| 5064 |
+
"step": 684000
|
| 5065 |
+
},
|
| 5066 |
+
{
|
| 5067 |
+
"epoch": 10.46,
|
| 5068 |
+
"learning_rate": 4.4666412358560955e-05,
|
| 5069 |
+
"loss": 0.2461,
|
| 5070 |
+
"step": 685000
|
| 5071 |
+
},
|
| 5072 |
+
{
|
| 5073 |
+
"epoch": 10.46,
|
| 5074 |
+
"eval_runtime": 0.7931,
|
| 5075 |
+
"eval_samples_per_second": 1260.883,
|
| 5076 |
+
"eval_steps_per_second": 20.174,
|
| 5077 |
+
"step": 685000
|
| 5078 |
+
},
|
| 5079 |
+
{
|
| 5080 |
+
"epoch": 10.48,
|
| 5081 |
+
"learning_rate": 4.4466774328943796e-05,
|
| 5082 |
+
"loss": 0.2462,
|
| 5083 |
+
"step": 686000
|
| 5084 |
+
},
|
| 5085 |
+
{
|
| 5086 |
+
"epoch": 10.49,
|
| 5087 |
+
"learning_rate": 4.426752488498972e-05,
|
| 5088 |
+
"loss": 0.2462,
|
| 5089 |
+
"step": 687000
|
| 5090 |
+
},
|
| 5091 |
+
{
|
| 5092 |
+
"epoch": 10.51,
|
| 5093 |
+
"learning_rate": 4.406866620565862e-05,
|
| 5094 |
+
"loss": 0.2459,
|
| 5095 |
+
"step": 688000
|
| 5096 |
+
},
|
| 5097 |
+
{
|
| 5098 |
+
"epoch": 10.52,
|
| 5099 |
+
"learning_rate": 4.3870200465637164e-05,
|
| 5100 |
+
"loss": 0.2471,
|
| 5101 |
+
"step": 689000
|
| 5102 |
+
},
|
| 5103 |
+
{
|
| 5104 |
+
"epoch": 10.54,
|
| 5105 |
+
"learning_rate": 4.3672129835314955e-05,
|
| 5106 |
+
"loss": 0.2481,
|
| 5107 |
+
"step": 690000
|
| 5108 |
+
},
|
| 5109 |
+
{
|
| 5110 |
+
"epoch": 10.54,
|
| 5111 |
+
"eval_runtime": 0.8338,
|
| 5112 |
+
"eval_samples_per_second": 1199.26,
|
| 5113 |
+
"eval_steps_per_second": 19.188,
|
| 5114 |
+
"step": 690000
|
| 5115 |
}
|
| 5116 |
],
|
| 5117 |
"max_steps": 1000000,
|
| 5118 |
"num_train_epochs": 16,
|
| 5119 |
+
"total_flos": 4.8369114404124654e+22,
|
| 5120 |
"trial_name": null,
|
| 5121 |
"trial_params": null
|
| 5122 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2b586adc3a504918e31f8a9a7e0f4be94f03956b7f0114b4da9476acb22a6ef
|
| 3 |
size 449471589
|