Training in progress, step 150, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 45118424
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fa8d826125079ec9c5474ab65c06145915471c18bc66f1805c7d717cadd029e
|
| 3 |
size 45118424
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 23159290
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:130d8ef83feeaaf7c5ecb1613710a52b6dfa277de7c2d71c5a4215216ab69e20
|
| 3 |
size 23159290
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ddb3a14e37d35591e2f920bed97a2ab497a50f4ae5ff4f56d4a7abfd88cf5f7
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:194456d3c9e165255d5406a0f3f62973b0bede79d91784f72431350783e27ae7
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.07654841244220734,
|
| 3 |
"best_model_checkpoint": "miner_id_besimray/checkpoint-120",
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 20,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1051,6 +1051,76 @@
|
|
| 1051 |
"eval_samples_per_second": 23.712,
|
| 1052 |
"eval_steps_per_second": 2.458,
|
| 1053 |
"step": 140
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1054 |
}
|
| 1055 |
],
|
| 1056 |
"logging_steps": 1,
|
|
@@ -1074,12 +1144,12 @@
|
|
| 1074 |
"should_evaluate": false,
|
| 1075 |
"should_log": false,
|
| 1076 |
"should_save": true,
|
| 1077 |
-
"should_training_stop":
|
| 1078 |
},
|
| 1079 |
"attributes": {}
|
| 1080 |
}
|
| 1081 |
},
|
| 1082 |
-
"total_flos": 3.
|
| 1083 |
"train_batch_size": 10,
|
| 1084 |
"trial_name": null,
|
| 1085 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.07654841244220734,
|
| 3 |
"best_model_checkpoint": "miner_id_besimray/checkpoint-120",
|
| 4 |
+
"epoch": 1.9292604501607717,
|
| 5 |
"eval_steps": 20,
|
| 6 |
+
"global_step": 150,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1051 |
"eval_samples_per_second": 23.712,
|
| 1052 |
"eval_steps_per_second": 2.458,
|
| 1053 |
"step": 140
|
| 1054 |
+
},
|
| 1055 |
+
{
|
| 1056 |
+
"epoch": 1.8135048231511255,
|
| 1057 |
+
"grad_norm": 0.7514206767082214,
|
| 1058 |
+
"learning_rate": 2.032460157676452e-06,
|
| 1059 |
+
"loss": 0.0471,
|
| 1060 |
+
"step": 141
|
| 1061 |
+
},
|
| 1062 |
+
{
|
| 1063 |
+
"epoch": 1.8263665594855305,
|
| 1064 |
+
"grad_norm": 1.090172529220581,
|
| 1065 |
+
"learning_rate": 1.6070411401370334e-06,
|
| 1066 |
+
"loss": 0.0541,
|
| 1067 |
+
"step": 142
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 1.8392282958199357,
|
| 1071 |
+
"grad_norm": 1.1606374979019165,
|
| 1072 |
+
"learning_rate": 1.231165940486234e-06,
|
| 1073 |
+
"loss": 0.0473,
|
| 1074 |
+
"step": 143
|
| 1075 |
+
},
|
| 1076 |
+
{
|
| 1077 |
+
"epoch": 1.852090032154341,
|
| 1078 |
+
"grad_norm": 0.8554772734642029,
|
| 1079 |
+
"learning_rate": 9.0502382320653e-07,
|
| 1080 |
+
"loss": 0.0556,
|
| 1081 |
+
"step": 144
|
| 1082 |
+
},
|
| 1083 |
+
{
|
| 1084 |
+
"epoch": 1.864951768488746,
|
| 1085 |
+
"grad_norm": 1.0313165187835693,
|
| 1086 |
+
"learning_rate": 6.287790106757396e-07,
|
| 1087 |
+
"loss": 0.0651,
|
| 1088 |
+
"step": 145
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"epoch": 1.877813504823151,
|
| 1092 |
+
"grad_norm": 1.2343289852142334,
|
| 1093 |
+
"learning_rate": 4.025706004760932e-07,
|
| 1094 |
+
"loss": 0.0919,
|
| 1095 |
+
"step": 146
|
| 1096 |
+
},
|
| 1097 |
+
{
|
| 1098 |
+
"epoch": 1.8906752411575563,
|
| 1099 |
+
"grad_norm": 0.6673130393028259,
|
| 1100 |
+
"learning_rate": 2.265124953543918e-07,
|
| 1101 |
+
"loss": 0.0284,
|
| 1102 |
+
"step": 147
|
| 1103 |
+
},
|
| 1104 |
+
{
|
| 1105 |
+
"epoch": 1.9035369774919615,
|
| 1106 |
+
"grad_norm": 0.460891991853714,
|
| 1107 |
+
"learning_rate": 1.0069334586854107e-07,
|
| 1108 |
+
"loss": 0.0221,
|
| 1109 |
+
"step": 148
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
"epoch": 1.9163987138263665,
|
| 1113 |
+
"grad_norm": 1.798021674156189,
|
| 1114 |
+
"learning_rate": 2.5176505749346936e-08,
|
| 1115 |
+
"loss": 0.1315,
|
| 1116 |
+
"step": 149
|
| 1117 |
+
},
|
| 1118 |
+
{
|
| 1119 |
+
"epoch": 1.9292604501607717,
|
| 1120 |
+
"grad_norm": 0.5896714925765991,
|
| 1121 |
+
"learning_rate": 0.0,
|
| 1122 |
+
"loss": 0.0268,
|
| 1123 |
+
"step": 150
|
| 1124 |
}
|
| 1125 |
],
|
| 1126 |
"logging_steps": 1,
|
|
|
|
| 1144 |
"should_evaluate": false,
|
| 1145 |
"should_log": false,
|
| 1146 |
"should_save": true,
|
| 1147 |
+
"should_training_stop": true
|
| 1148 |
},
|
| 1149 |
"attributes": {}
|
| 1150 |
}
|
| 1151 |
},
|
| 1152 |
+
"total_flos": 3.58265259884544e+16,
|
| 1153 |
"train_batch_size": 10,
|
| 1154 |
"trial_name": null,
|
| 1155 |
"trial_params": null
|