Training in progress, step 200, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +39 -3
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 136062744
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c79b3d2dfa1e2ff08e34bee0a50e5d26df97d53fd00ae51087689ebaf5027fe9
|
| 3 |
size 136062744
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 272133748
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f5a1aea2e7e54386cf3ce389cec6fd8823514c6ea8045ebccf97c746d743cca
|
| 3 |
size 272133748
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2976a2f475d8edc9e9a00ed903ec6fa861e056646565524847948c22626d681
|
| 3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4a98cf07637306947ea7d3f67892a3b98b5c22007d4395c1de2047ef45cd95c
|
| 3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e06aedc7584f87414cddb2adf9cb46d6573a485741ffcbccef2e7d45ace8f8f8
|
| 3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af7c8c884b8c371ae21c399e557a74788ef7204c82d4d61283bc7a25749fb4a5
|
| 3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf3bb25966486ea21e5c0eda07f93bcccad75e0a9e396cccc2d2b31e52284d21
|
| 3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec6fb749e5390815130c1196c45f456b136c4b36339acd1006b814b606a29cec
|
| 3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6724cc3b6fa30b54ecf6969ff17f0fa6b8805e0ec29471ae97fdc0007eca256
|
| 3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15984
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5db64dfbdb44b1f0e091abfb9b970cb0dc413d3122f9ce2b84c891c3041b677
|
| 3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e961aafc4bb3a24acc13a66c3eb856682eb2bc992742878e58df3da341f94ce
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 3.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -127,6 +127,42 @@
|
|
| 127 |
"learning_rate": 0.0001993943172293368,
|
| 128 |
"loss": 2.5101,
|
| 129 |
"step": 160
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
}
|
| 131 |
],
|
| 132 |
"logging_steps": 10,
|
|
@@ -146,7 +182,7 @@
|
|
| 146 |
"attributes": {}
|
| 147 |
}
|
| 148 |
},
|
| 149 |
-
"total_flos": 2.
|
| 150 |
"train_batch_size": 1,
|
| 151 |
"trial_name": null,
|
| 152 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 3.7772511848341233,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 127 |
"learning_rate": 0.0001993943172293368,
|
| 128 |
"loss": 2.5101,
|
| 129 |
"step": 160
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 3.2085308056872037,
|
| 133 |
+
"grad_norm": 2.453125,
|
| 134 |
+
"learning_rate": 0.00019924361097105623,
|
| 135 |
+
"loss": 2.1293,
|
| 136 |
+
"step": 170
|
| 137 |
+
},
|
| 138 |
+
{
|
| 139 |
+
"epoch": 3.3981042654028437,
|
| 140 |
+
"grad_norm": 1.765625,
|
| 141 |
+
"learning_rate": 0.0001990762486348855,
|
| 142 |
+
"loss": 2.2484,
|
| 143 |
+
"step": 180
|
| 144 |
+
},
|
| 145 |
+
{
|
| 146 |
+
"epoch": 3.5876777251184833,
|
| 147 |
+
"grad_norm": 4.5,
|
| 148 |
+
"learning_rate": 0.00019889225830928365,
|
| 149 |
+
"loss": 2.0503,
|
| 150 |
+
"step": 190
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 3.7772511848341233,
|
| 154 |
+
"grad_norm": 1.7734375,
|
| 155 |
+
"learning_rate": 0.00019869167087338907,
|
| 156 |
+
"loss": 2.2932,
|
| 157 |
+
"step": 200
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 3.7772511848341233,
|
| 161 |
+
"eval_loss": 2.8660690784454346,
|
| 162 |
+
"eval_runtime": 1.0321,
|
| 163 |
+
"eval_samples_per_second": 1454.261,
|
| 164 |
+
"eval_steps_per_second": 182.146,
|
| 165 |
+
"step": 200
|
| 166 |
}
|
| 167 |
],
|
| 168 |
"logging_steps": 10,
|
|
|
|
| 182 |
"attributes": {}
|
| 183 |
}
|
| 184 |
},
|
| 185 |
+
"total_flos": 2.7236502626893824e+16,
|
| 186 |
"train_batch_size": 1,
|
| 187 |
"trial_name": null,
|
| 188 |
"trial_params": null
|