Training in progress, epoch 11, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1227009528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db4b5d9091a6dbab9d2b4be7cf992134ba4a3e0d729e96284bc4512ac0932620
|
| 3 |
size 1227009528
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2454133690
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d9dbc9961f1b825d07e327826af5885ae6801dfe3867c659b03e9c90764c433
|
| 3 |
size 2454133690
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1671761f2a32f97e49b389d83fe64fe54fae391ec682766d59ea01e911801f0d
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a8f171c30ec70e8b7de39e28734b3eb14c402c92c5675eccaa14ecf588e6cff
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 34.700294494628906,
|
| 3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -997,6 +997,105 @@
|
|
| 997 |
"eval_samples_per_second": 26.475,
|
| 998 |
"eval_steps_per_second": 3.327,
|
| 999 |
"step": 13050
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1000 |
}
|
| 1001 |
],
|
| 1002 |
"logging_steps": 100,
|
|
@@ -1011,7 +1110,7 @@
|
|
| 1011 |
"early_stopping_threshold": 0.0
|
| 1012 |
},
|
| 1013 |
"attributes": {
|
| 1014 |
-
"early_stopping_patience_counter":
|
| 1015 |
}
|
| 1016 |
},
|
| 1017 |
"TrainerControl": {
|
|
@@ -1025,7 +1124,7 @@
|
|
| 1025 |
"attributes": {}
|
| 1026 |
}
|
| 1027 |
},
|
| 1028 |
-
"total_flos": 1.
|
| 1029 |
"train_batch_size": 8,
|
| 1030 |
"trial_name": null,
|
| 1031 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 34.700294494628906,
|
| 3 |
"best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
|
| 4 |
+
"epoch": 11.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 14355,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 997 |
"eval_samples_per_second": 26.475,
|
| 998 |
"eval_steps_per_second": 3.327,
|
| 999 |
"step": 13050
|
| 1000 |
+
},
|
| 1001 |
+
{
|
| 1002 |
+
"epoch": 10.03831417624521,
|
| 1003 |
+
"grad_norm": 2.5322816371917725,
|
| 1004 |
+
"learning_rate": 4.3728448275862074e-05,
|
| 1005 |
+
"loss": 33.8873,
|
| 1006 |
+
"step": 13100
|
| 1007 |
+
},
|
| 1008 |
+
{
|
| 1009 |
+
"epoch": 10.114942528735632,
|
| 1010 |
+
"grad_norm": 2.1063241958618164,
|
| 1011 |
+
"learning_rate": 4.368103448275862e-05,
|
| 1012 |
+
"loss": 33.871,
|
| 1013 |
+
"step": 13200
|
| 1014 |
+
},
|
| 1015 |
+
{
|
| 1016 |
+
"epoch": 10.191570881226054,
|
| 1017 |
+
"grad_norm": 3.7001326084136963,
|
| 1018 |
+
"learning_rate": 4.3633141762452106e-05,
|
| 1019 |
+
"loss": 34.5129,
|
| 1020 |
+
"step": 13300
|
| 1021 |
+
},
|
| 1022 |
+
{
|
| 1023 |
+
"epoch": 10.268199233716475,
|
| 1024 |
+
"grad_norm": 1.8534705638885498,
|
| 1025 |
+
"learning_rate": 4.35852490421456e-05,
|
| 1026 |
+
"loss": 33.7739,
|
| 1027 |
+
"step": 13400
|
| 1028 |
+
},
|
| 1029 |
+
{
|
| 1030 |
+
"epoch": 10.344827586206897,
|
| 1031 |
+
"grad_norm": 1.9871069192886353,
|
| 1032 |
+
"learning_rate": 4.3537356321839086e-05,
|
| 1033 |
+
"loss": 33.4124,
|
| 1034 |
+
"step": 13500
|
| 1035 |
+
},
|
| 1036 |
+
{
|
| 1037 |
+
"epoch": 10.421455938697317,
|
| 1038 |
+
"grad_norm": 2.264529228210449,
|
| 1039 |
+
"learning_rate": 4.348946360153257e-05,
|
| 1040 |
+
"loss": 33.24,
|
| 1041 |
+
"step": 13600
|
| 1042 |
+
},
|
| 1043 |
+
{
|
| 1044 |
+
"epoch": 10.49808429118774,
|
| 1045 |
+
"grad_norm": 3.0297787189483643,
|
| 1046 |
+
"learning_rate": 4.344157088122606e-05,
|
| 1047 |
+
"loss": 33.2922,
|
| 1048 |
+
"step": 13700
|
| 1049 |
+
},
|
| 1050 |
+
{
|
| 1051 |
+
"epoch": 10.574712643678161,
|
| 1052 |
+
"grad_norm": 2.7185864448547363,
|
| 1053 |
+
"learning_rate": 4.339367816091954e-05,
|
| 1054 |
+
"loss": 33.4859,
|
| 1055 |
+
"step": 13800
|
| 1056 |
+
},
|
| 1057 |
+
{
|
| 1058 |
+
"epoch": 10.651340996168582,
|
| 1059 |
+
"grad_norm": 3.8887524604797363,
|
| 1060 |
+
"learning_rate": 4.334578544061303e-05,
|
| 1061 |
+
"loss": 33.4322,
|
| 1062 |
+
"step": 13900
|
| 1063 |
+
},
|
| 1064 |
+
{
|
| 1065 |
+
"epoch": 10.727969348659004,
|
| 1066 |
+
"grad_norm": 2.5119857788085938,
|
| 1067 |
+
"learning_rate": 4.3297892720306514e-05,
|
| 1068 |
+
"loss": 33.6234,
|
| 1069 |
+
"step": 14000
|
| 1070 |
+
},
|
| 1071 |
+
{
|
| 1072 |
+
"epoch": 10.804597701149426,
|
| 1073 |
+
"grad_norm": 3.2969565391540527,
|
| 1074 |
+
"learning_rate": 4.325e-05,
|
| 1075 |
+
"loss": 33.4341,
|
| 1076 |
+
"step": 14100
|
| 1077 |
+
},
|
| 1078 |
+
{
|
| 1079 |
+
"epoch": 10.881226053639846,
|
| 1080 |
+
"grad_norm": 3.3629229068756104,
|
| 1081 |
+
"learning_rate": 4.320210727969349e-05,
|
| 1082 |
+
"loss": 32.7636,
|
| 1083 |
+
"step": 14200
|
| 1084 |
+
},
|
| 1085 |
+
{
|
| 1086 |
+
"epoch": 10.957854406130268,
|
| 1087 |
+
"grad_norm": 3.0765013694763184,
|
| 1088 |
+
"learning_rate": 4.3154214559386975e-05,
|
| 1089 |
+
"loss": 33.7066,
|
| 1090 |
+
"step": 14300
|
| 1091 |
+
},
|
| 1092 |
+
{
|
| 1093 |
+
"epoch": 11.0,
|
| 1094 |
+
"eval_loss": 34.70278549194336,
|
| 1095 |
+
"eval_runtime": 49.2928,
|
| 1096 |
+
"eval_samples_per_second": 26.474,
|
| 1097 |
+
"eval_steps_per_second": 3.327,
|
| 1098 |
+
"step": 14355
|
| 1099 |
}
|
| 1100 |
],
|
| 1101 |
"logging_steps": 100,
|
|
|
|
| 1110 |
"early_stopping_threshold": 0.0
|
| 1111 |
},
|
| 1112 |
"attributes": {
|
| 1113 |
+
"early_stopping_patience_counter": 1
|
| 1114 |
}
|
| 1115 |
},
|
| 1116 |
"TrainerControl": {
|
|
|
|
| 1124 |
"attributes": {}
|
| 1125 |
}
|
| 1126 |
},
|
| 1127 |
+
"total_flos": 1.5480419933518848e+16,
|
| 1128 |
"train_batch_size": 8,
|
| 1129 |
"trial_name": null,
|
| 1130 |
"trial_params": null
|