Training in progress, step 1700, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3826461296
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3919cab12afb691f8e2bde9aed0bdad3628d6f1a5ecae97beb9b67f52859024e
|
| 3 |
size 3826461296
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2479955235
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65cec01b065a22732babc2be6945a5935ab48f5f41fd2fba8b539e6256b0dfa7
|
| 3 |
size 2479955235
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1383
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da57c8097b451ef1168f1b0191d0689aff1a3bd0997413b1e9eeee0934b0b53c
|
| 3 |
size 1383
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d5ccf396d48a7891c1332094feb71b5d5d1edce123ef8038fc290770c5e3a02
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1128,6 +1128,76 @@
|
|
| 1128 |
"learning_rate": 1.8167202572347267e-05,
|
| 1129 |
"loss": 0.3986,
|
| 1130 |
"step": 1600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1131 |
}
|
| 1132 |
],
|
| 1133 |
"logging_steps": 10,
|
|
@@ -1147,7 +1217,7 @@
|
|
| 1147 |
"attributes": {}
|
| 1148 |
}
|
| 1149 |
},
|
| 1150 |
-
"total_flos":
|
| 1151 |
"train_batch_size": 2,
|
| 1152 |
"trial_name": null,
|
| 1153 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.68,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1700,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1128 |
"learning_rate": 1.8167202572347267e-05,
|
| 1129 |
"loss": 0.3986,
|
| 1130 |
"step": 1600
|
| 1131 |
+
},
|
| 1132 |
+
{
|
| 1133 |
+
"epoch": 0.644,
|
| 1134 |
+
"grad_norm": 13.649085998535156,
|
| 1135 |
+
"learning_rate": 1.796623794212219e-05,
|
| 1136 |
+
"loss": 0.4337,
|
| 1137 |
+
"step": 1610
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 0.648,
|
| 1141 |
+
"grad_norm": 7.645134449005127,
|
| 1142 |
+
"learning_rate": 1.7765273311897108e-05,
|
| 1143 |
+
"loss": 0.3901,
|
| 1144 |
+
"step": 1620
|
| 1145 |
+
},
|
| 1146 |
+
{
|
| 1147 |
+
"epoch": 0.652,
|
| 1148 |
+
"grad_norm": 11.727263450622559,
|
| 1149 |
+
"learning_rate": 1.7564308681672027e-05,
|
| 1150 |
+
"loss": 0.3545,
|
| 1151 |
+
"step": 1630
|
| 1152 |
+
},
|
| 1153 |
+
{
|
| 1154 |
+
"epoch": 0.656,
|
| 1155 |
+
"grad_norm": 6.705881595611572,
|
| 1156 |
+
"learning_rate": 1.736334405144695e-05,
|
| 1157 |
+
"loss": 0.3471,
|
| 1158 |
+
"step": 1640
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 0.66,
|
| 1162 |
+
"grad_norm": 12.363304138183594,
|
| 1163 |
+
"learning_rate": 1.7162379421221868e-05,
|
| 1164 |
+
"loss": 0.4351,
|
| 1165 |
+
"step": 1650
|
| 1166 |
+
},
|
| 1167 |
+
{
|
| 1168 |
+
"epoch": 0.664,
|
| 1169 |
+
"grad_norm": 20.208723068237305,
|
| 1170 |
+
"learning_rate": 1.6961414790996786e-05,
|
| 1171 |
+
"loss": 0.4284,
|
| 1172 |
+
"step": 1660
|
| 1173 |
+
},
|
| 1174 |
+
{
|
| 1175 |
+
"epoch": 0.668,
|
| 1176 |
+
"grad_norm": 10.82363224029541,
|
| 1177 |
+
"learning_rate": 1.6760450160771705e-05,
|
| 1178 |
+
"loss": 0.3369,
|
| 1179 |
+
"step": 1670
|
| 1180 |
+
},
|
| 1181 |
+
{
|
| 1182 |
+
"epoch": 0.672,
|
| 1183 |
+
"grad_norm": 9.544486045837402,
|
| 1184 |
+
"learning_rate": 1.6559485530546627e-05,
|
| 1185 |
+
"loss": 0.4059,
|
| 1186 |
+
"step": 1680
|
| 1187 |
+
},
|
| 1188 |
+
{
|
| 1189 |
+
"epoch": 0.676,
|
| 1190 |
+
"grad_norm": 8.426627159118652,
|
| 1191 |
+
"learning_rate": 1.6358520900321546e-05,
|
| 1192 |
+
"loss": 0.4494,
|
| 1193 |
+
"step": 1690
|
| 1194 |
+
},
|
| 1195 |
+
{
|
| 1196 |
+
"epoch": 0.68,
|
| 1197 |
+
"grad_norm": 8.424084663391113,
|
| 1198 |
+
"learning_rate": 1.6157556270096464e-05,
|
| 1199 |
+
"loss": 0.4807,
|
| 1200 |
+
"step": 1700
|
| 1201 |
}
|
| 1202 |
],
|
| 1203 |
"logging_steps": 10,
|
|
|
|
| 1217 |
"attributes": {}
|
| 1218 |
}
|
| 1219 |
},
|
| 1220 |
+
"total_flos": 3.0652319992449024e+16,
|
| 1221 |
"train_batch_size": 2,
|
| 1222 |
"trial_name": null,
|
| 1223 |
"trial_params": null
|