Training in progress, step 600000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3011d318834eca8ab53dbb6676157bffa205e38d5b1da950aa905371e910e8ab
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd1db57f69735f18b491ceac5dcc53e66fc033f8278840d48a2d1d7789a922a3
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d5ef830797817d960f06c7d56a345ac3affb87d9629b56b5bc8c9c3338bb01c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53e8adf2ea40b3fc922a7d11d4e61b79f1bf2e372d1d097107cce4c6dc566b51
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:198286929a9142846ad67730b33946ccbc3ba475f115c3a0aef90b3b51ce6035
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03eebf4a4ea0cd9321666974d21bc38214f697b5e4b4b439941abd6a346b886d
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b03692cd1ad26b8eef58c8c71c41233d4ef014e517e327f8e4798a79a3642d38
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4840589ea9729ad2c28a92a929b64277e6de332cdd873f9187cab513a624b326
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1d4e4a138e230b3265d7fc7f258a4846b3b6af6bce4e94e22abcfe742c12213
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7cc50ebf91016fa727fad392dd121c0404733da11c9da06aed7a47a81b65900
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1317449e5c457cd18ac6087cac07774393562aa747fda3bc1ae1eb6a47f1311
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -7086,11 +7086,131 @@
|
|
| 7086 |
"learning_rate": 6.506902584793773e-05,
|
| 7087 |
"loss": 0.3043,
|
| 7088 |
"step": 590000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7089 |
}
|
| 7090 |
],
|
| 7091 |
"max_steps": 1000000,
|
| 7092 |
"num_train_epochs": 2,
|
| 7093 |
-
"total_flos":
|
| 7094 |
"trial_name": null,
|
| 7095 |
"trial_params": null
|
| 7096 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.1994290717618414,
|
| 5 |
+
"global_step": 600000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 7086 |
"learning_rate": 6.506902584793773e-05,
|
| 7087 |
"loss": 0.3043,
|
| 7088 |
"step": 590000
|
| 7089 |
+
},
|
| 7090 |
+
{
|
| 7091 |
+
"epoch": 1.18,
|
| 7092 |
+
"learning_rate": 6.495596703323214e-05,
|
| 7093 |
+
"loss": 0.3039,
|
| 7094 |
+
"step": 590500
|
| 7095 |
+
},
|
| 7096 |
+
{
|
| 7097 |
+
"epoch": 1.18,
|
| 7098 |
+
"learning_rate": 6.484294934833822e-05,
|
| 7099 |
+
"loss": 0.3042,
|
| 7100 |
+
"step": 591000
|
| 7101 |
+
},
|
| 7102 |
+
{
|
| 7103 |
+
"epoch": 1.18,
|
| 7104 |
+
"learning_rate": 6.472997310224204e-05,
|
| 7105 |
+
"loss": 0.304,
|
| 7106 |
+
"step": 591500
|
| 7107 |
+
},
|
| 7108 |
+
{
|
| 7109 |
+
"epoch": 1.18,
|
| 7110 |
+
"learning_rate": 6.461703860381628e-05,
|
| 7111 |
+
"loss": 0.3039,
|
| 7112 |
+
"step": 592000
|
| 7113 |
+
},
|
| 7114 |
+
{
|
| 7115 |
+
"epoch": 1.18,
|
| 7116 |
+
"learning_rate": 6.450414616181959e-05,
|
| 7117 |
+
"loss": 0.3041,
|
| 7118 |
+
"step": 592500
|
| 7119 |
+
},
|
| 7120 |
+
{
|
| 7121 |
+
"epoch": 1.19,
|
| 7122 |
+
"learning_rate": 6.439129608489559e-05,
|
| 7123 |
+
"loss": 0.3043,
|
| 7124 |
+
"step": 593000
|
| 7125 |
+
},
|
| 7126 |
+
{
|
| 7127 |
+
"epoch": 1.19,
|
| 7128 |
+
"learning_rate": 6.427848868157208e-05,
|
| 7129 |
+
"loss": 0.3041,
|
| 7130 |
+
"step": 593500
|
| 7131 |
+
},
|
| 7132 |
+
{
|
| 7133 |
+
"epoch": 1.19,
|
| 7134 |
+
"learning_rate": 6.41657242602602e-05,
|
| 7135 |
+
"loss": 0.304,
|
| 7136 |
+
"step": 594000
|
| 7137 |
+
},
|
| 7138 |
+
{
|
| 7139 |
+
"epoch": 1.19,
|
| 7140 |
+
"learning_rate": 6.405300312925353e-05,
|
| 7141 |
+
"loss": 0.3044,
|
| 7142 |
+
"step": 594500
|
| 7143 |
+
},
|
| 7144 |
+
{
|
| 7145 |
+
"epoch": 1.19,
|
| 7146 |
+
"learning_rate": 6.39403255967274e-05,
|
| 7147 |
+
"loss": 0.304,
|
| 7148 |
+
"step": 595000
|
| 7149 |
+
},
|
| 7150 |
+
{
|
| 7151 |
+
"epoch": 1.19,
|
| 7152 |
+
"learning_rate": 6.382769197073783e-05,
|
| 7153 |
+
"loss": 0.3042,
|
| 7154 |
+
"step": 595500
|
| 7155 |
+
},
|
| 7156 |
+
{
|
| 7157 |
+
"epoch": 1.19,
|
| 7158 |
+
"learning_rate": 6.371510255922088e-05,
|
| 7159 |
+
"loss": 0.3041,
|
| 7160 |
+
"step": 596000
|
| 7161 |
+
},
|
| 7162 |
+
{
|
| 7163 |
+
"epoch": 1.19,
|
| 7164 |
+
"learning_rate": 6.360255766999172e-05,
|
| 7165 |
+
"loss": 0.3036,
|
| 7166 |
+
"step": 596500
|
| 7167 |
+
},
|
| 7168 |
+
{
|
| 7169 |
+
"epoch": 1.19,
|
| 7170 |
+
"learning_rate": 6.349005761074372e-05,
|
| 7171 |
+
"loss": 0.3044,
|
| 7172 |
+
"step": 597000
|
| 7173 |
+
},
|
| 7174 |
+
{
|
| 7175 |
+
"epoch": 1.19,
|
| 7176 |
+
"learning_rate": 6.33776026890478e-05,
|
| 7177 |
+
"loss": 0.3038,
|
| 7178 |
+
"step": 597500
|
| 7179 |
+
},
|
| 7180 |
+
{
|
| 7181 |
+
"epoch": 1.2,
|
| 7182 |
+
"learning_rate": 6.326519321235139e-05,
|
| 7183 |
+
"loss": 0.3036,
|
| 7184 |
+
"step": 598000
|
| 7185 |
+
},
|
| 7186 |
+
{
|
| 7187 |
+
"epoch": 1.2,
|
| 7188 |
+
"learning_rate": 6.315282948797776e-05,
|
| 7189 |
+
"loss": 0.3033,
|
| 7190 |
+
"step": 598500
|
| 7191 |
+
},
|
| 7192 |
+
{
|
| 7193 |
+
"epoch": 1.2,
|
| 7194 |
+
"learning_rate": 6.304051182312496e-05,
|
| 7195 |
+
"loss": 0.3034,
|
| 7196 |
+
"step": 599000
|
| 7197 |
+
},
|
| 7198 |
+
{
|
| 7199 |
+
"epoch": 1.2,
|
| 7200 |
+
"learning_rate": 6.292824052486525e-05,
|
| 7201 |
+
"loss": 0.3036,
|
| 7202 |
+
"step": 599500
|
| 7203 |
+
},
|
| 7204 |
+
{
|
| 7205 |
+
"epoch": 1.2,
|
| 7206 |
+
"learning_rate": 6.281601590014407e-05,
|
| 7207 |
+
"loss": 0.3039,
|
| 7208 |
+
"step": 600000
|
| 7209 |
}
|
| 7210 |
],
|
| 7211 |
"max_steps": 1000000,
|
| 7212 |
"num_train_epochs": 2,
|
| 7213 |
+
"total_flos": 4.056420774973463e+22,
|
| 7214 |
"trial_name": null,
|
| 7215 |
"trial_params": null
|
| 7216 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd1db57f69735f18b491ceac5dcc53e66fc033f8278840d48a2d1d7789a922a3
|
| 3 |
size 449450757
|