Training in progress, step 850000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +123 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893441093
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e84ef55f585409a7eb44e502edc32a6f98749bb849f76b8b16d08a3dc37d8ae
|
| 3 |
size 893441093
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a461e16ba87e1e69720d854eafd25f8ca62064d88d546c07f46eddd6113b782
|
| 3 |
size 449450757
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ac44739d68ab8d92ea75cbaa95e05b08c8692ab1a77edef6cb58ac613b56151
|
| 3 |
size 14583
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1df68ed20ea8916573dab6835f47431337c9ff47e9296153749cafb56a0237a7
|
| 3 |
size 14583
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:77f291c8b37b0aa8ca2df1a663842fb970bdeff54c0b2afea5e2500f1ec3c566
|
| 3 |
size 14583
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21eb46d9347a0d809f89248620dfe17276a72e359d7ca7fa7d35b0bd105aa105
|
| 3 |
size 14583
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:351c206e5b3b4ed149381ec4e0dbf48f45d70d99336ae5535af82fc3c087c11c
|
| 3 |
size 14583
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5aa383878cd306edcfd23540112b984f51a1f84d02ae2848e2f7b0c8c85af935
|
| 3 |
size 14583
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63948dedd46aa19ac12a8709dee2f12fa168b63bee773e75612f1919ff39b2f0
|
| 3 |
size 14583
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14583
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ec45b14f87f8f42299fc33d4fabbcc588674c41ff3399499af6ac36481288d5
|
| 3 |
size 14583
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:208c2830fa39459dcf60f5b94c27151cbc48e3dc8fdf884186b8d5be230917b9
|
| 3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 1.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -10086,11 +10086,131 @@
|
|
| 10086 |
"learning_rate": 1.9572046332969825e-05,
|
| 10087 |
"loss": 0.2881,
|
| 10088 |
"step": 840000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10089 |
}
|
| 10090 |
],
|
| 10091 |
"max_steps": 1000000,
|
| 10092 |
"num_train_epochs": 2,
|
| 10093 |
-
"total_flos": 5.
|
| 10094 |
"trial_name": null,
|
| 10095 |
"trial_params": null
|
| 10096 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 1.2391562567262697,
|
| 5 |
+
"global_step": 850000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 10086 |
"learning_rate": 1.9572046332969825e-05,
|
| 10087 |
"loss": 0.2881,
|
| 10088 |
"step": 840000
|
| 10089 |
+
},
|
| 10090 |
+
{
|
| 10091 |
+
"epoch": 1.22,
|
| 10092 |
+
"learning_rate": 1.95137059427344e-05,
|
| 10093 |
+
"loss": 0.2884,
|
| 10094 |
+
"step": 840500
|
| 10095 |
+
},
|
| 10096 |
+
{
|
| 10097 |
+
"epoch": 1.22,
|
| 10098 |
+
"learning_rate": 1.945553091971727e-05,
|
| 10099 |
+
"loss": 0.2883,
|
| 10100 |
+
"step": 841000
|
| 10101 |
+
},
|
| 10102 |
+
{
|
| 10103 |
+
"epoch": 1.22,
|
| 10104 |
+
"learning_rate": 1.93975214229667e-05,
|
| 10105 |
+
"loss": 0.2885,
|
| 10106 |
+
"step": 841500
|
| 10107 |
+
},
|
| 10108 |
+
{
|
| 10109 |
+
"epoch": 1.22,
|
| 10110 |
+
"learning_rate": 1.933967761107847e-05,
|
| 10111 |
+
"loss": 0.2877,
|
| 10112 |
+
"step": 842000
|
| 10113 |
+
},
|
| 10114 |
+
{
|
| 10115 |
+
"epoch": 1.22,
|
| 10116 |
+
"learning_rate": 1.928199964219533e-05,
|
| 10117 |
+
"loss": 0.2876,
|
| 10118 |
+
"step": 842500
|
| 10119 |
+
},
|
| 10120 |
+
{
|
| 10121 |
+
"epoch": 1.23,
|
| 10122 |
+
"learning_rate": 1.9224487674006694e-05,
|
| 10123 |
+
"loss": 0.2873,
|
| 10124 |
+
"step": 843000
|
| 10125 |
+
},
|
| 10126 |
+
{
|
| 10127 |
+
"epoch": 1.23,
|
| 10128 |
+
"learning_rate": 1.9167141863748015e-05,
|
| 10129 |
+
"loss": 0.288,
|
| 10130 |
+
"step": 843500
|
| 10131 |
+
},
|
| 10132 |
+
{
|
| 10133 |
+
"epoch": 1.23,
|
| 10134 |
+
"learning_rate": 1.9109962368200602e-05,
|
| 10135 |
+
"loss": 0.2874,
|
| 10136 |
+
"step": 844000
|
| 10137 |
+
},
|
| 10138 |
+
{
|
| 10139 |
+
"epoch": 1.23,
|
| 10140 |
+
"learning_rate": 1.9052949343690977e-05,
|
| 10141 |
+
"loss": 0.2884,
|
| 10142 |
+
"step": 844500
|
| 10143 |
+
},
|
| 10144 |
+
{
|
| 10145 |
+
"epoch": 1.23,
|
| 10146 |
+
"learning_rate": 1.8996102946090586e-05,
|
| 10147 |
+
"loss": 0.2874,
|
| 10148 |
+
"step": 845000
|
| 10149 |
+
},
|
| 10150 |
+
{
|
| 10151 |
+
"epoch": 1.23,
|
| 10152 |
+
"learning_rate": 1.8939423330815345e-05,
|
| 10153 |
+
"loss": 0.2879,
|
| 10154 |
+
"step": 845500
|
| 10155 |
+
},
|
| 10156 |
+
{
|
| 10157 |
+
"epoch": 1.23,
|
| 10158 |
+
"learning_rate": 1.888291065282509e-05,
|
| 10159 |
+
"loss": 0.2872,
|
| 10160 |
+
"step": 846000
|
| 10161 |
+
},
|
| 10162 |
+
{
|
| 10163 |
+
"epoch": 1.23,
|
| 10164 |
+
"learning_rate": 1.882656506662338e-05,
|
| 10165 |
+
"loss": 0.2882,
|
| 10166 |
+
"step": 846500
|
| 10167 |
+
},
|
| 10168 |
+
{
|
| 10169 |
+
"epoch": 1.23,
|
| 10170 |
+
"learning_rate": 1.8770386726256865e-05,
|
| 10171 |
+
"loss": 0.2875,
|
| 10172 |
+
"step": 847000
|
| 10173 |
+
},
|
| 10174 |
+
{
|
| 10175 |
+
"epoch": 1.23,
|
| 10176 |
+
"learning_rate": 1.8714375785315006e-05,
|
| 10177 |
+
"loss": 0.2871,
|
| 10178 |
+
"step": 847500
|
| 10179 |
+
},
|
| 10180 |
+
{
|
| 10181 |
+
"epoch": 1.24,
|
| 10182 |
+
"learning_rate": 1.8658532396929565e-05,
|
| 10183 |
+
"loss": 0.2872,
|
| 10184 |
+
"step": 848000
|
| 10185 |
+
},
|
| 10186 |
+
{
|
| 10187 |
+
"epoch": 1.24,
|
| 10188 |
+
"learning_rate": 1.8602856713774208e-05,
|
| 10189 |
+
"loss": 0.2874,
|
| 10190 |
+
"step": 848500
|
| 10191 |
+
},
|
| 10192 |
+
{
|
| 10193 |
+
"epoch": 1.24,
|
| 10194 |
+
"learning_rate": 1.8547348888064178e-05,
|
| 10195 |
+
"loss": 0.2869,
|
| 10196 |
+
"step": 849000
|
| 10197 |
+
},
|
| 10198 |
+
{
|
| 10199 |
+
"epoch": 1.24,
|
| 10200 |
+
"learning_rate": 1.8492009071555703e-05,
|
| 10201 |
+
"loss": 0.2873,
|
| 10202 |
+
"step": 849500
|
| 10203 |
+
},
|
| 10204 |
+
{
|
| 10205 |
+
"epoch": 1.24,
|
| 10206 |
+
"learning_rate": 1.8436837415545772e-05,
|
| 10207 |
+
"loss": 0.2874,
|
| 10208 |
+
"step": 850000
|
| 10209 |
}
|
| 10210 |
],
|
| 10211 |
"max_steps": 1000000,
|
| 10212 |
"num_train_epochs": 2,
|
| 10213 |
+
"total_flos": 5.746589779150674e+22,
|
| 10214 |
"trial_name": null,
|
| 10215 |
"trial_params": null
|
| 10216 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449450757
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a461e16ba87e1e69720d854eafd25f8ca62064d88d546c07f46eddd6113b782
|
| 3 |
size 449450757
|