Training in progress, step 930000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46ff1d8d7d94b8fc5390751564ccb419f91a9858db9bdfd9f11bebe5113b944a
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d2bac07d166f73980c3c9cce6825ce7a1c1f5f22b97d0264ce5e7ba42eeb3df
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:523ad7de15419f9770d9e7768264983ffe3ed3b6b2e41d3411de9307e3568dda
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f02b76320d64a6c1fa36467fd16fb842d38967c6960f680698d2b6c46fa9ddb
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eb4ba7042a42dc2dddfda89081c338c759af082271bf3d6b2249d374dfb5549f
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d492de85ba9ca1e7e895ebf249111dbc1e669c10034dc3697642d6b066377bc6
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87bf88b63314cf7e6bc76f627a1120e94df3871d06e2e6a10b31a22c360f0003
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b861dd3cac874664255fa5c36d008a64ba119b36b6461f68fec95ff34294b32
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b64737ac04d5942009955008d940bd1d1005db5a2c22b7982f258d011e0ad8cb
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bb15e8b762074a6cf94106e13146d7507cbb10c383c4eadc6b62f549780de27
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8bf7fa4ff5edd25c6aca18d0779a09bdac81be773b8a02595b2a01362d92297
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -11046,11 +11046,131 @@
|
|
| 11046 |
"learning_rate": 1.24353762805856e-05,
|
| 11047 |
"loss": 0.2847,
|
| 11048 |
"step": 920000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11049 |
}
|
| 11050 |
],
|
| 11051 |
"max_steps": 1000000,
|
| 11052 |
"num_train_epochs": 2,
|
| 11053 |
-
"total_flos": 6.
|
| 11054 |
"trial_name": null,
|
| 11055 |
"trial_params": null
|
| 11056 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.3985937612104495,
|
| 5 |
+
"global_step": 930000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 11046 |
"learning_rate": 1.24353762805856e-05,
|
| 11047 |
"loss": 0.2847,
|
| 11048 |
"step": 920000
|
| 11049 |
+
},
|
| 11050 |
+
{
|
| 11051 |
+
"epoch": 1.38,
|
| 11052 |
+
"learning_rate": 1.240520421060586e-05,
|
| 11053 |
+
"loss": 0.2843,
|
| 11054 |
+
"step": 920500
|
| 11055 |
+
},
|
| 11056 |
+
{
|
| 11057 |
+
"epoch": 1.38,
|
| 11058 |
+
"learning_rate": 1.2375216942216713e-05,
|
| 11059 |
+
"loss": 0.2848,
|
| 11060 |
+
"step": 921000
|
| 11061 |
+
},
|
| 11062 |
+
{
|
| 11063 |
+
"epoch": 1.38,
|
| 11064 |
+
"learning_rate": 1.2345414557402198e-05,
|
| 11065 |
+
"loss": 0.2848,
|
| 11066 |
+
"step": 921500
|
| 11067 |
+
},
|
| 11068 |
+
{
|
| 11069 |
+
"epoch": 1.38,
|
| 11070 |
+
"learning_rate": 1.2315797137640906e-05,
|
| 11071 |
+
"loss": 0.2843,
|
| 11072 |
+
"step": 922000
|
| 11073 |
+
},
|
| 11074 |
+
{
|
| 11075 |
+
"epoch": 1.38,
|
| 11076 |
+
"learning_rate": 1.2286364763905723e-05,
|
| 11077 |
+
"loss": 0.2847,
|
| 11078 |
+
"step": 922500
|
| 11079 |
+
},
|
| 11080 |
+
{
|
| 11081 |
+
"epoch": 1.38,
|
| 11082 |
+
"learning_rate": 1.225711751666363e-05,
|
| 11083 |
+
"loss": 0.2841,
|
| 11084 |
+
"step": 923000
|
| 11085 |
+
},
|
| 11086 |
+
{
|
| 11087 |
+
"epoch": 1.39,
|
| 11088 |
+
"learning_rate": 1.2228055475875488e-05,
|
| 11089 |
+
"loss": 0.285,
|
| 11090 |
+
"step": 923500
|
| 11091 |
+
},
|
| 11092 |
+
{
|
| 11093 |
+
"epoch": 1.39,
|
| 11094 |
+
"learning_rate": 1.2199178720995825e-05,
|
| 11095 |
+
"loss": 0.2845,
|
| 11096 |
+
"step": 924000
|
| 11097 |
+
},
|
| 11098 |
+
{
|
| 11099 |
+
"epoch": 1.39,
|
| 11100 |
+
"learning_rate": 1.217048733097256e-05,
|
| 11101 |
+
"loss": 0.2843,
|
| 11102 |
+
"step": 924500
|
| 11103 |
+
},
|
| 11104 |
+
{
|
| 11105 |
+
"epoch": 1.39,
|
| 11106 |
+
"learning_rate": 1.2141981384246874e-05,
|
| 11107 |
+
"loss": 0.2845,
|
| 11108 |
+
"step": 925000
|
| 11109 |
+
},
|
| 11110 |
+
{
|
| 11111 |
+
"epoch": 1.39,
|
| 11112 |
+
"learning_rate": 1.211366095875293e-05,
|
| 11113 |
+
"loss": 0.2849,
|
| 11114 |
+
"step": 925500
|
| 11115 |
+
},
|
| 11116 |
+
{
|
| 11117 |
+
"epoch": 1.39,
|
| 11118 |
+
"learning_rate": 1.2085526131917685e-05,
|
| 11119 |
+
"loss": 0.2847,
|
| 11120 |
+
"step": 926000
|
| 11121 |
+
},
|
| 11122 |
+
{
|
| 11123 |
+
"epoch": 1.39,
|
| 11124 |
+
"learning_rate": 1.2057576980660691e-05,
|
| 11125 |
+
"loss": 0.2841,
|
| 11126 |
+
"step": 926500
|
| 11127 |
+
},
|
| 11128 |
+
{
|
| 11129 |
+
"epoch": 1.39,
|
| 11130 |
+
"learning_rate": 1.2029813581393866e-05,
|
| 11131 |
+
"loss": 0.2847,
|
| 11132 |
+
"step": 927000
|
| 11133 |
+
},
|
| 11134 |
+
{
|
| 11135 |
+
"epoch": 1.39,
|
| 11136 |
+
"learning_rate": 1.2002236010021269e-05,
|
| 11137 |
+
"loss": 0.2849,
|
| 11138 |
+
"step": 927500
|
| 11139 |
+
},
|
| 11140 |
+
{
|
| 11141 |
+
"epoch": 1.39,
|
| 11142 |
+
"learning_rate": 1.197484434193893e-05,
|
| 11143 |
+
"loss": 0.2842,
|
| 11144 |
+
"step": 928000
|
| 11145 |
+
},
|
| 11146 |
+
{
|
| 11147 |
+
"epoch": 1.4,
|
| 11148 |
+
"learning_rate": 1.1947638652034617e-05,
|
| 11149 |
+
"loss": 0.2843,
|
| 11150 |
+
"step": 928500
|
| 11151 |
+
},
|
| 11152 |
+
{
|
| 11153 |
+
"epoch": 1.4,
|
| 11154 |
+
"learning_rate": 1.192061901468768e-05,
|
| 11155 |
+
"loss": 0.2844,
|
| 11156 |
+
"step": 929000
|
| 11157 |
+
},
|
| 11158 |
+
{
|
| 11159 |
+
"epoch": 1.4,
|
| 11160 |
+
"learning_rate": 1.1893785503768736e-05,
|
| 11161 |
+
"loss": 0.2838,
|
| 11162 |
+
"step": 929500
|
| 11163 |
+
},
|
| 11164 |
+
{
|
| 11165 |
+
"epoch": 1.4,
|
| 11166 |
+
"learning_rate": 1.1867138192639601e-05,
|
| 11167 |
+
"loss": 0.2842,
|
| 11168 |
+
"step": 930000
|
| 11169 |
}
|
| 11170 |
],
|
| 11171 |
"max_steps": 1000000,
|
| 11172 |
"num_train_epochs": 2,
|
| 11173 |
+
"total_flos": 6.287461350629951e+22,
|
| 11174 |
"trial_name": null,
|
| 11175 |
"trial_params": null
|
| 11176 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d2bac07d166f73980c3c9cce6825ce7a1c1f5f22b97d0264ce5e7ba42eeb3df
|
| 3 |
size 449450757
|