Training in progress, epoch 12, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1227009528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d087532161fc3d3113f958d4327ca8ab76fb93d1b9005d7b72d8341648a7f95e
|
| 3 |
size 1227009528
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2454133690
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d359385b3376fb641197873abbd6f199bc67d84ad37382d398095c1f51b664a9
|
| 3 |
size 2454133690
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c4d71a933e8a99a1b5e03ca178837d4af39c5cb9255b1959f57ce6925e566d0
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f18daae1b94bcadba9e921cdd5d160fa2fe3e4c34c14e032eed270d5a8a3cca
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 34.700294494628906,
|
| 3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1096,6 +1096,105 @@
|
|
| 1096 |
"eval_samples_per_second": 26.474,
|
| 1097 |
"eval_steps_per_second": 3.327,
|
| 1098 |
"step": 14355
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1099 |
}
|
| 1100 |
],
|
| 1101 |
"logging_steps": 100,
|
|
@@ -1110,7 +1209,7 @@
|
|
| 1110 |
"early_stopping_threshold": 0.0
|
| 1111 |
},
|
| 1112 |
"attributes": {
|
| 1113 |
-
"early_stopping_patience_counter":
|
| 1114 |
}
|
| 1115 |
},
|
| 1116 |
"TrainerControl": {
|
|
@@ -1124,7 +1223,7 @@
|
|
| 1124 |
"attributes": {}
|
| 1125 |
}
|
| 1126 |
},
|
| 1127 |
-
"total_flos": 1.
|
| 1128 |
"train_batch_size": 8,
|
| 1129 |
"trial_name": null,
|
| 1130 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 34.700294494628906,
|
| 3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
|
| 4 |
+
"epoch": 12.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 15660,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1096 |
"eval_samples_per_second": 26.474,
|
| 1097 |
"eval_steps_per_second": 3.327,
|
| 1098 |
"step": 14355
|
| 1099 |
+
},
|
| 1100 |
+
{
|
| 1101 |
+
"epoch": 11.03448275862069,
|
| 1102 |
+
"grad_norm": 2.7724273204803467,
|
| 1103 |
+
"learning_rate": 4.310632183908046e-05,
|
| 1104 |
+
"loss": 33.7759,
|
| 1105 |
+
"step": 14400
|
| 1106 |
+
},
|
| 1107 |
+
{
|
| 1108 |
+
"epoch": 11.11111111111111,
|
| 1109 |
+
"grad_norm": 3.9663071632385254,
|
| 1110 |
+
"learning_rate": 4.305842911877395e-05,
|
| 1111 |
+
"loss": 33.6063,
|
| 1112 |
+
"step": 14500
|
| 1113 |
+
},
|
| 1114 |
+
{
|
| 1115 |
+
"epoch": 11.187739463601533,
|
| 1116 |
+
"grad_norm": 2.53495717048645,
|
| 1117 |
+
"learning_rate": 4.3010536398467435e-05,
|
| 1118 |
+
"loss": 32.9251,
|
| 1119 |
+
"step": 14600
|
| 1120 |
+
},
|
| 1121 |
+
{
|
| 1122 |
+
"epoch": 11.264367816091955,
|
| 1123 |
+
"grad_norm": 3.928633689880371,
|
| 1124 |
+
"learning_rate": 4.296264367816092e-05,
|
| 1125 |
+
"loss": 33.41,
|
| 1126 |
+
"step": 14700
|
| 1127 |
+
},
|
| 1128 |
+
{
|
| 1129 |
+
"epoch": 11.340996168582375,
|
| 1130 |
+
"grad_norm": 1.888804316520691,
|
| 1131 |
+
"learning_rate": 4.291475095785441e-05,
|
| 1132 |
+
"loss": 33.147,
|
| 1133 |
+
"step": 14800
|
| 1134 |
+
},
|
| 1135 |
+
{
|
| 1136 |
+
"epoch": 11.417624521072797,
|
| 1137 |
+
"grad_norm": 3.151488780975342,
|
| 1138 |
+
"learning_rate": 4.2866858237547896e-05,
|
| 1139 |
+
"loss": 34.011,
|
| 1140 |
+
"step": 14900
|
| 1141 |
+
},
|
| 1142 |
+
{
|
| 1143 |
+
"epoch": 11.494252873563218,
|
| 1144 |
+
"grad_norm": 2.659867286682129,
|
| 1145 |
+
"learning_rate": 4.281896551724138e-05,
|
| 1146 |
+
"loss": 33.3559,
|
| 1147 |
+
"step": 15000
|
| 1148 |
+
},
|
| 1149 |
+
{
|
| 1150 |
+
"epoch": 11.57088122605364,
|
| 1151 |
+
"grad_norm": 4.092405319213867,
|
| 1152 |
+
"learning_rate": 4.277107279693487e-05,
|
| 1153 |
+
"loss": 33.2301,
|
| 1154 |
+
"step": 15100
|
| 1155 |
+
},
|
| 1156 |
+
{
|
| 1157 |
+
"epoch": 11.647509578544062,
|
| 1158 |
+
"grad_norm": 4.295740127563477,
|
| 1159 |
+
"learning_rate": 4.2723659003831415e-05,
|
| 1160 |
+
"loss": 33.1047,
|
| 1161 |
+
"step": 15200
|
| 1162 |
+
},
|
| 1163 |
+
{
|
| 1164 |
+
"epoch": 11.724137931034482,
|
| 1165 |
+
"grad_norm": 2.4472806453704834,
|
| 1166 |
+
"learning_rate": 4.26757662835249e-05,
|
| 1167 |
+
"loss": 33.8206,
|
| 1168 |
+
"step": 15300
|
| 1169 |
+
},
|
| 1170 |
+
{
|
| 1171 |
+
"epoch": 11.800766283524904,
|
| 1172 |
+
"grad_norm": 2.716550350189209,
|
| 1173 |
+
"learning_rate": 4.262787356321839e-05,
|
| 1174 |
+
"loss": 33.7173,
|
| 1175 |
+
"step": 15400
|
| 1176 |
+
},
|
| 1177 |
+
{
|
| 1178 |
+
"epoch": 11.877394636015326,
|
| 1179 |
+
"grad_norm": 3.1278491020202637,
|
| 1180 |
+
"learning_rate": 4.257998084291188e-05,
|
| 1181 |
+
"loss": 34.0344,
|
| 1182 |
+
"step": 15500
|
| 1183 |
+
},
|
| 1184 |
+
{
|
| 1185 |
+
"epoch": 11.954022988505747,
|
| 1186 |
+
"grad_norm": 2.4835212230682373,
|
| 1187 |
+
"learning_rate": 4.253208812260537e-05,
|
| 1188 |
+
"loss": 33.8397,
|
| 1189 |
+
"step": 15600
|
| 1190 |
+
},
|
| 1191 |
+
{
|
| 1192 |
+
"epoch": 12.0,
|
| 1193 |
+
"eval_loss": 34.70100402832031,
|
| 1194 |
+
"eval_runtime": 49.2554,
|
| 1195 |
+
"eval_samples_per_second": 26.495,
|
| 1196 |
+
"eval_steps_per_second": 3.33,
|
| 1197 |
+
"step": 15660
|
| 1198 |
}
|
| 1199 |
],
|
| 1200 |
"logging_steps": 100,
|
|
|
|
| 1209 |
"early_stopping_threshold": 0.0
|
| 1210 |
},
|
| 1211 |
"attributes": {
|
| 1212 |
+
"early_stopping_patience_counter": 2
|
| 1213 |
}
|
| 1214 |
},
|
| 1215 |
"TrainerControl": {
|
|
|
|
| 1223 |
"attributes": {}
|
| 1224 |
}
|
| 1225 |
},
|
| 1226 |
+
"total_flos": 1.6887730836566016e+16,
|
| 1227 |
"train_batch_size": 8,
|
| 1228 |
"trial_name": null,
|
| 1229 |
"trial_params": null
|