Training in progress, step 860000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15a77d7445b898e81ba1dbf302ad4aa88b3930b40801531ded9875b083edd127
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9da54e69aa4d985aac499cdb17fab8ee1c2bb36a1855c776c5bda5c5106d784
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
|
| 3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44925a24f970524178a5e9134f37953712fa04674397ceaa739828974d03640e
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1af16532ed7776301ec2b0d23baf8c67ba74ec07e3f7e0782860705643ea3c80
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 9.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -17006,11 +17006,211 @@
|
|
| 17006 |
"eval_samples_per_second": 857.76,
|
| 17007 |
"eval_steps_per_second": 13.443,
|
| 17008 |
"step": 850000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17009 |
}
|
| 17010 |
],
|
| 17011 |
"max_steps": 1000000,
|
| 17012 |
"num_train_epochs": 12,
|
| 17013 |
-
"total_flos":
|
| 17014 |
"trial_name": null,
|
| 17015 |
"trial_params": null
|
| 17016 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 9.234333002464785,
|
| 5 |
+
"global_step": 860000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 17006 |
"eval_samples_per_second": 857.76,
|
| 17007 |
"eval_steps_per_second": 13.443,
|
| 17008 |
"step": 850000
|
| 17009 |
+
},
|
| 17010 |
+
{
|
| 17011 |
+
"epoch": 9.13,
|
| 17012 |
+
"learning_rate": 1.838183407087156e-05,
|
| 17013 |
+
"loss": 0.1843,
|
| 17014 |
+
"step": 850500
|
| 17015 |
+
},
|
| 17016 |
+
{
|
| 17017 |
+
"epoch": 9.13,
|
| 17018 |
+
"learning_rate": 1.8326999187910095e-05,
|
| 17019 |
+
"loss": 0.1843,
|
| 17020 |
+
"step": 851000
|
| 17021 |
+
},
|
| 17022 |
+
{
|
| 17023 |
+
"epoch": 9.13,
|
| 17024 |
+
"eval_loss": 0.17367926239967346,
|
| 17025 |
+
"eval_runtime": 2.6854,
|
| 17026 |
+
"eval_samples_per_second": 855.351,
|
| 17027 |
+
"eval_steps_per_second": 13.406,
|
| 17028 |
+
"step": 851000
|
| 17029 |
+
},
|
| 17030 |
+
{
|
| 17031 |
+
"epoch": 9.14,
|
| 17032 |
+
"learning_rate": 1.8272332916577875e-05,
|
| 17033 |
+
"loss": 0.1846,
|
| 17034 |
+
"step": 851500
|
| 17035 |
+
},
|
| 17036 |
+
{
|
| 17037 |
+
"epoch": 9.15,
|
| 17038 |
+
"learning_rate": 1.8217835406330415e-05,
|
| 17039 |
+
"loss": 0.1844,
|
| 17040 |
+
"step": 852000
|
| 17041 |
+
},
|
| 17042 |
+
{
|
| 17043 |
+
"epoch": 9.15,
|
| 17044 |
+
"eval_loss": 0.17384441196918488,
|
| 17045 |
+
"eval_runtime": 2.6384,
|
| 17046 |
+
"eval_samples_per_second": 870.617,
|
| 17047 |
+
"eval_steps_per_second": 13.645,
|
| 17048 |
+
"step": 852000
|
| 17049 |
+
},
|
| 17050 |
+
{
|
| 17051 |
+
"epoch": 9.15,
|
| 17052 |
+
"learning_rate": 1.81635068061618e-05,
|
| 17053 |
+
"loss": 0.1844,
|
| 17054 |
+
"step": 852500
|
| 17055 |
+
},
|
| 17056 |
+
{
|
| 17057 |
+
"epoch": 9.16,
|
| 17058 |
+
"learning_rate": 1.810934726460436e-05,
|
| 17059 |
+
"loss": 0.1845,
|
| 17060 |
+
"step": 853000
|
| 17061 |
+
},
|
| 17062 |
+
{
|
| 17063 |
+
"epoch": 9.16,
|
| 17064 |
+
"eval_loss": 0.17389260232448578,
|
| 17065 |
+
"eval_runtime": 2.6711,
|
| 17066 |
+
"eval_samples_per_second": 859.942,
|
| 17067 |
+
"eval_steps_per_second": 13.478,
|
| 17068 |
+
"step": 853000
|
| 17069 |
+
},
|
| 17070 |
+
{
|
| 17071 |
+
"epoch": 9.16,
|
| 17072 |
+
"learning_rate": 1.80553569297282e-05,
|
| 17073 |
+
"loss": 0.1843,
|
| 17074 |
+
"step": 853500
|
| 17075 |
+
},
|
| 17076 |
+
{
|
| 17077 |
+
"epoch": 9.17,
|
| 17078 |
+
"learning_rate": 1.800153594914084e-05,
|
| 17079 |
+
"loss": 0.1843,
|
| 17080 |
+
"step": 854000
|
| 17081 |
+
},
|
| 17082 |
+
{
|
| 17083 |
+
"epoch": 9.17,
|
| 17084 |
+
"eval_loss": 0.17477978765964508,
|
| 17085 |
+
"eval_runtime": 2.681,
|
| 17086 |
+
"eval_samples_per_second": 856.769,
|
| 17087 |
+
"eval_steps_per_second": 13.428,
|
| 17088 |
+
"step": 854000
|
| 17089 |
+
},
|
| 17090 |
+
{
|
| 17091 |
+
"epoch": 9.17,
|
| 17092 |
+
"learning_rate": 1.7947884469986816e-05,
|
| 17093 |
+
"loss": 0.1841,
|
| 17094 |
+
"step": 854500
|
| 17095 |
+
},
|
| 17096 |
+
{
|
| 17097 |
+
"epoch": 9.18,
|
| 17098 |
+
"learning_rate": 1.7894402638947176e-05,
|
| 17099 |
+
"loss": 0.1841,
|
| 17100 |
+
"step": 855000
|
| 17101 |
+
},
|
| 17102 |
+
{
|
| 17103 |
+
"epoch": 9.18,
|
| 17104 |
+
"eval_loss": 0.1744370311498642,
|
| 17105 |
+
"eval_runtime": 2.6199,
|
| 17106 |
+
"eval_samples_per_second": 876.754,
|
| 17107 |
+
"eval_steps_per_second": 13.741,
|
| 17108 |
+
"step": 855000
|
| 17109 |
+
},
|
| 17110 |
+
{
|
| 17111 |
+
"epoch": 9.18,
|
| 17112 |
+
"learning_rate": 1.7841090602239237e-05,
|
| 17113 |
+
"loss": 0.1841,
|
| 17114 |
+
"step": 855500
|
| 17115 |
+
},
|
| 17116 |
+
{
|
| 17117 |
+
"epoch": 9.19,
|
| 17118 |
+
"learning_rate": 1.778794850561604e-05,
|
| 17119 |
+
"loss": 0.1844,
|
| 17120 |
+
"step": 856000
|
| 17121 |
+
},
|
| 17122 |
+
{
|
| 17123 |
+
"epoch": 9.19,
|
| 17124 |
+
"eval_loss": 0.17599613964557648,
|
| 17125 |
+
"eval_runtime": 2.6232,
|
| 17126 |
+
"eval_samples_per_second": 875.655,
|
| 17127 |
+
"eval_steps_per_second": 13.724,
|
| 17128 |
+
"step": 856000
|
| 17129 |
+
},
|
| 17130 |
+
{
|
| 17131 |
+
"epoch": 9.2,
|
| 17132 |
+
"learning_rate": 1.7734976494366073e-05,
|
| 17133 |
+
"loss": 0.1837,
|
| 17134 |
+
"step": 856500
|
| 17135 |
+
},
|
| 17136 |
+
{
|
| 17137 |
+
"epoch": 9.2,
|
| 17138 |
+
"learning_rate": 1.7682174713312805e-05,
|
| 17139 |
+
"loss": 0.1843,
|
| 17140 |
+
"step": 857000
|
| 17141 |
+
},
|
| 17142 |
+
{
|
| 17143 |
+
"epoch": 9.2,
|
| 17144 |
+
"eval_loss": 0.17385347187519073,
|
| 17145 |
+
"eval_runtime": 2.7089,
|
| 17146 |
+
"eval_samples_per_second": 847.948,
|
| 17147 |
+
"eval_steps_per_second": 13.29,
|
| 17148 |
+
"step": 857000
|
| 17149 |
+
},
|
| 17150 |
+
{
|
| 17151 |
+
"epoch": 9.21,
|
| 17152 |
+
"learning_rate": 1.7629543306814255e-05,
|
| 17153 |
+
"loss": 0.1838,
|
| 17154 |
+
"step": 857500
|
| 17155 |
+
},
|
| 17156 |
+
{
|
| 17157 |
+
"epoch": 9.21,
|
| 17158 |
+
"learning_rate": 1.75770824187627e-05,
|
| 17159 |
+
"loss": 0.1839,
|
| 17160 |
+
"step": 858000
|
| 17161 |
+
},
|
| 17162 |
+
{
|
| 17163 |
+
"epoch": 9.21,
|
| 17164 |
+
"eval_loss": 0.17458127439022064,
|
| 17165 |
+
"eval_runtime": 2.75,
|
| 17166 |
+
"eval_samples_per_second": 835.274,
|
| 17167 |
+
"eval_steps_per_second": 13.091,
|
| 17168 |
+
"step": 858000
|
| 17169 |
+
},
|
| 17170 |
+
{
|
| 17171 |
+
"epoch": 9.22,
|
| 17172 |
+
"learning_rate": 1.7524792192584186e-05,
|
| 17173 |
+
"loss": 0.1843,
|
| 17174 |
+
"step": 858500
|
| 17175 |
+
},
|
| 17176 |
+
{
|
| 17177 |
+
"epoch": 9.22,
|
| 17178 |
+
"learning_rate": 1.747267277123821e-05,
|
| 17179 |
+
"loss": 0.1839,
|
| 17180 |
+
"step": 859000
|
| 17181 |
+
},
|
| 17182 |
+
{
|
| 17183 |
+
"epoch": 9.22,
|
| 17184 |
+
"eval_loss": 0.1746589094400406,
|
| 17185 |
+
"eval_runtime": 2.6359,
|
| 17186 |
+
"eval_samples_per_second": 871.432,
|
| 17187 |
+
"eval_steps_per_second": 13.658,
|
| 17188 |
+
"step": 859000
|
| 17189 |
+
},
|
| 17190 |
+
{
|
| 17191 |
+
"epoch": 9.23,
|
| 17192 |
+
"learning_rate": 1.74207242972173e-05,
|
| 17193 |
+
"loss": 0.1837,
|
| 17194 |
+
"step": 859500
|
| 17195 |
+
},
|
| 17196 |
+
{
|
| 17197 |
+
"epoch": 9.23,
|
| 17198 |
+
"learning_rate": 1.7368946912546556e-05,
|
| 17199 |
+
"loss": 0.1836,
|
| 17200 |
+
"step": 860000
|
| 17201 |
+
},
|
| 17202 |
+
{
|
| 17203 |
+
"epoch": 9.23,
|
| 17204 |
+
"eval_loss": 0.1775263249874115,
|
| 17205 |
+
"eval_runtime": 2.6639,
|
| 17206 |
+
"eval_samples_per_second": 862.261,
|
| 17207 |
+
"eval_steps_per_second": 13.514,
|
| 17208 |
+
"step": 860000
|
| 17209 |
}
|
| 17210 |
],
|
| 17211 |
"max_steps": 1000000,
|
| 17212 |
"num_train_epochs": 12,
|
| 17213 |
+
"total_flos": 6.028573160582214e+22,
|
| 17214 |
"trial_name": null,
|
| 17215 |
"trial_params": null
|
| 17216 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d9da54e69aa4d985aac499cdb17fab8ee1c2bb36a1855c776c5bda5c5106d784
|
| 3 |
size 449471589
|