Training in progress, step 1200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84029e8f70d12b2c7137d5b303188195f30521bd1c82299d6d37b24c0269a65a
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d1059ec2c30fa5b1aacfd6d9895b6233f1694495648c48fedab4d2b8e820425
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7622d316fe354db40f60bc22ab635af3869b60bf5a6c816cb74ee6598c94be27
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:351fd5edffc48c8b46106c61b298184039dcb3c5ee48faa68a22154873155edd
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7168284058570862,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1022,6 +1022,151 @@
|
|
| 1022 |
"eval_samples_per_second": 25.244,
|
| 1023 |
"eval_steps_per_second": 25.244,
|
| 1024 |
"step": 1050
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1025 |
}
|
| 1026 |
],
|
| 1027 |
"logging_steps": 10,
|
|
@@ -1041,7 +1186,7 @@
|
|
| 1041 |
"attributes": {}
|
| 1042 |
}
|
| 1043 |
},
|
| 1044 |
-
"total_flos":
|
| 1045 |
"train_batch_size": 4,
|
| 1046 |
"trial_name": null,
|
| 1047 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7168284058570862,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 52.17391304347826,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 1200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1022 |
"eval_samples_per_second": 25.244,
|
| 1023 |
"eval_steps_per_second": 25.244,
|
| 1024 |
"step": 1050
|
| 1025 |
+
},
|
| 1026 |
+
{
|
| 1027 |
+
"epoch": 46.08695652173913,
|
| 1028 |
+
"grad_norm": 1.668047308921814,
|
| 1029 |
+
"learning_rate": 1.130734941385923e-05,
|
| 1030 |
+
"loss": 0.4905,
|
| 1031 |
+
"step": 1060
|
| 1032 |
+
},
|
| 1033 |
+
{
|
| 1034 |
+
"epoch": 46.52173913043478,
|
| 1035 |
+
"grad_norm": 1.8213101625442505,
|
| 1036 |
+
"learning_rate": 1.1307334847763571e-05,
|
| 1037 |
+
"loss": 0.4654,
|
| 1038 |
+
"step": 1070
|
| 1039 |
+
},
|
| 1040 |
+
{
|
| 1041 |
+
"epoch": 46.95652173913044,
|
| 1042 |
+
"grad_norm": 1.7041969299316406,
|
| 1043 |
+
"learning_rate": 1.1307318040745661e-05,
|
| 1044 |
+
"loss": 0.4089,
|
| 1045 |
+
"step": 1080
|
| 1046 |
+
},
|
| 1047 |
+
{
|
| 1048 |
+
"epoch": 47.391304347826086,
|
| 1049 |
+
"grad_norm": 1.9170663356781006,
|
| 1050 |
+
"learning_rate": 1.1307298992812163e-05,
|
| 1051 |
+
"loss": 0.4169,
|
| 1052 |
+
"step": 1090
|
| 1053 |
+
},
|
| 1054 |
+
{
|
| 1055 |
+
"epoch": 47.82608695652174,
|
| 1056 |
+
"grad_norm": 1.3400579690933228,
|
| 1057 |
+
"learning_rate": 1.1307277703970627e-05,
|
| 1058 |
+
"loss": 0.4451,
|
| 1059 |
+
"step": 1100
|
| 1060 |
+
},
|
| 1061 |
+
{
|
| 1062 |
+
"epoch": 48.26086956521739,
|
| 1063 |
+
"grad_norm": 1.8620245456695557,
|
| 1064 |
+
"learning_rate": 1.1307254174229492e-05,
|
| 1065 |
+
"loss": 0.4606,
|
| 1066 |
+
"step": 1110
|
| 1067 |
+
},
|
| 1068 |
+
{
|
| 1069 |
+
"epoch": 48.69565217391305,
|
| 1070 |
+
"grad_norm": 1.4493643045425415,
|
| 1071 |
+
"learning_rate": 1.1307228403598083e-05,
|
| 1072 |
+
"loss": 0.3842,
|
| 1073 |
+
"step": 1120
|
| 1074 |
+
},
|
| 1075 |
+
{
|
| 1076 |
+
"epoch": 49.130434782608695,
|
| 1077 |
+
"grad_norm": 1.5963612794876099,
|
| 1078 |
+
"learning_rate": 1.1307200392086617e-05,
|
| 1079 |
+
"loss": 0.5088,
|
| 1080 |
+
"step": 1130
|
| 1081 |
+
},
|
| 1082 |
+
{
|
| 1083 |
+
"epoch": 49.56521739130435,
|
| 1084 |
+
"grad_norm": 1.0860666036605835,
|
| 1085 |
+
"learning_rate": 1.1307170139706193e-05,
|
| 1086 |
+
"loss": 0.4661,
|
| 1087 |
+
"step": 1140
|
| 1088 |
+
},
|
| 1089 |
+
{
|
| 1090 |
+
"epoch": 50.0,
|
| 1091 |
+
"grad_norm": 2.400817632675171,
|
| 1092 |
+
"learning_rate": 1.1307137646468805e-05,
|
| 1093 |
+
"loss": 0.3732,
|
| 1094 |
+
"step": 1150
|
| 1095 |
+
},
|
| 1096 |
+
{
|
| 1097 |
+
"epoch": 50.43478260869565,
|
| 1098 |
+
"grad_norm": 1.2243698835372925,
|
| 1099 |
+
"learning_rate": 1.130710291238733e-05,
|
| 1100 |
+
"loss": 0.3852,
|
| 1101 |
+
"step": 1160
|
| 1102 |
+
},
|
| 1103 |
+
{
|
| 1104 |
+
"epoch": 50.869565217391305,
|
| 1105 |
+
"grad_norm": 1.5243916511535645,
|
| 1106 |
+
"learning_rate": 1.130706593747554e-05,
|
| 1107 |
+
"loss": 0.4324,
|
| 1108 |
+
"step": 1170
|
| 1109 |
+
},
|
| 1110 |
+
{
|
| 1111 |
+
"epoch": 51.30434782608695,
|
| 1112 |
+
"grad_norm": 1.778385877609253,
|
| 1113 |
+
"learning_rate": 1.1307026721748087e-05,
|
| 1114 |
+
"loss": 0.45,
|
| 1115 |
+
"step": 1180
|
| 1116 |
+
},
|
| 1117 |
+
{
|
| 1118 |
+
"epoch": 51.73913043478261,
|
| 1119 |
+
"grad_norm": 1.0479800701141357,
|
| 1120 |
+
"learning_rate": 1.1306985265220515e-05,
|
| 1121 |
+
"loss": 0.3661,
|
| 1122 |
+
"step": 1190
|
| 1123 |
+
},
|
| 1124 |
+
{
|
| 1125 |
+
"epoch": 52.17391304347826,
|
| 1126 |
+
"grad_norm": 2.22280216217041,
|
| 1127 |
+
"learning_rate": 1.1306941567909254e-05,
|
| 1128 |
+
"loss": 0.518,
|
| 1129 |
+
"step": 1200
|
| 1130 |
+
},
|
| 1131 |
+
{
|
| 1132 |
+
"epoch": 52.17391304347826,
|
| 1133 |
+
"eval_loss": 0.7372099161148071,
|
| 1134 |
+
"eval_runtime": 0.5807,
|
| 1135 |
+
"eval_samples_per_second": 17.222,
|
| 1136 |
+
"eval_steps_per_second": 17.222,
|
| 1137 |
+
"step": 1200
|
| 1138 |
+
},
|
| 1139 |
+
{
|
| 1140 |
+
"epoch": 52.17391304347826,
|
| 1141 |
+
"eval_loss": 0.8616224527359009,
|
| 1142 |
+
"eval_runtime": 0.4577,
|
| 1143 |
+
"eval_samples_per_second": 21.848,
|
| 1144 |
+
"eval_steps_per_second": 21.848,
|
| 1145 |
+
"step": 1200
|
| 1146 |
+
},
|
| 1147 |
+
{
|
| 1148 |
+
"epoch": 52.17391304347826,
|
| 1149 |
+
"eval_loss": 0.7372099161148071,
|
| 1150 |
+
"eval_runtime": 0.4574,
|
| 1151 |
+
"eval_samples_per_second": 21.862,
|
| 1152 |
+
"eval_steps_per_second": 21.862,
|
| 1153 |
+
"step": 1200
|
| 1154 |
+
},
|
| 1155 |
+
{
|
| 1156 |
+
"epoch": 52.17391304347826,
|
| 1157 |
+
"eval_loss": 0.7205449938774109,
|
| 1158 |
+
"eval_runtime": 0.4458,
|
| 1159 |
+
"eval_samples_per_second": 22.433,
|
| 1160 |
+
"eval_steps_per_second": 22.433,
|
| 1161 |
+
"step": 1200
|
| 1162 |
+
},
|
| 1163 |
+
{
|
| 1164 |
+
"epoch": 52.17391304347826,
|
| 1165 |
+
"eval_loss": 0.8604005575180054,
|
| 1166 |
+
"eval_runtime": 0.4457,
|
| 1167 |
+
"eval_samples_per_second": 22.437,
|
| 1168 |
+
"eval_steps_per_second": 22.437,
|
| 1169 |
+
"step": 1200
|
| 1170 |
}
|
| 1171 |
],
|
| 1172 |
"logging_steps": 10,
|
|
|
|
| 1186 |
"attributes": {}
|
| 1187 |
}
|
| 1188 |
},
|
| 1189 |
+
"total_flos": 3.076671992345395e+16,
|
| 1190 |
"train_batch_size": 4,
|
| 1191 |
"trial_name": null,
|
| 1192 |
"trial_params": null
|