Upload folder using huggingface_hub
Browse files- adapter_config.json +4 -4
- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- trainer_state.json +113 -113
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -34,12 +34,12 @@
|
|
| 34 |
"revision": null,
|
| 35 |
"target_modules": [
|
| 36 |
"o_proj",
|
| 37 |
-
"k_proj",
|
| 38 |
-
"q_proj",
|
| 39 |
-
"gate_proj",
|
| 40 |
"v_proj",
|
| 41 |
"down_proj",
|
| 42 |
-
"
|
|
|
|
|
|
|
|
|
|
| 43 |
],
|
| 44 |
"target_parameters": null,
|
| 45 |
"task_type": "SEQ_CLS",
|
|
|
|
| 34 |
"revision": null,
|
| 35 |
"target_modules": [
|
| 36 |
"o_proj",
|
|
|
|
|
|
|
|
|
|
| 37 |
"v_proj",
|
| 38 |
"down_proj",
|
| 39 |
+
"q_proj",
|
| 40 |
+
"k_proj",
|
| 41 |
+
"up_proj",
|
| 42 |
+
"gate_proj"
|
| 43 |
],
|
| 44 |
"target_parameters": null,
|
| 45 |
"task_type": "SEQ_CLS",
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 664635272
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2416d6550deb818d0f05c387f8565c7b47944e0b1d169a75775dbd8f88bd149f
|
| 3 |
size 664635272
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1329479786
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc364ea3c42c9c346258ba687f8dc4e63455c63fbf573ffe6f682811a922e8f2
|
| 3 |
size 1329479786
|
trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"best_global_step": 320,
|
| 3 |
-
"best_metric": 0.
|
| 4 |
"best_model_checkpoint": "/content/gemma_lora_imb/checkpoint-320",
|
| 5 |
"epoch": 1.6494845360824741,
|
| 6 |
"eval_steps": 20,
|
|
@@ -11,258 +11,258 @@
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.10309278350515463,
|
| 14 |
-
"grad_norm":
|
| 15 |
"learning_rate": 9.510309278350516e-06,
|
| 16 |
-
"loss":
|
| 17 |
"step": 20
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.10309278350515463,
|
| 21 |
-
"eval_f1_macro": 0.
|
| 22 |
-
"eval_loss": 1.
|
| 23 |
-
"eval_runtime":
|
| 24 |
-
"eval_samples_per_second":
|
| 25 |
-
"eval_steps_per_second": 6.
|
| 26 |
"step": 20
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"epoch": 0.20618556701030927,
|
| 30 |
-
"grad_norm":
|
| 31 |
"learning_rate": 8.994845360824743e-06,
|
| 32 |
-
"loss": 1.
|
| 33 |
"step": 40
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"epoch": 0.20618556701030927,
|
| 37 |
-
"eval_f1_macro": 0.
|
| 38 |
-
"eval_loss": 1.
|
| 39 |
-
"eval_runtime":
|
| 40 |
-
"eval_samples_per_second":
|
| 41 |
-
"eval_steps_per_second":
|
| 42 |
"step": 40
|
| 43 |
},
|
| 44 |
{
|
| 45 |
"epoch": 0.30927835051546393,
|
| 46 |
-
"grad_norm":
|
| 47 |
"learning_rate": 8.479381443298969e-06,
|
| 48 |
-
"loss": 1.
|
| 49 |
"step": 60
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"epoch": 0.30927835051546393,
|
| 53 |
-
"eval_f1_macro": 0.
|
| 54 |
-
"eval_loss": 1.
|
| 55 |
-
"eval_runtime":
|
| 56 |
-
"eval_samples_per_second":
|
| 57 |
-
"eval_steps_per_second": 6.
|
| 58 |
"step": 60
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 0.41237113402061853,
|
| 62 |
-
"grad_norm":
|
| 63 |
"learning_rate": 7.963917525773196e-06,
|
| 64 |
-
"loss": 0.
|
| 65 |
"step": 80
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"epoch": 0.41237113402061853,
|
| 69 |
-
"eval_f1_macro": 0.
|
| 70 |
-
"eval_loss": 0.
|
| 71 |
-
"eval_runtime": 3.
|
| 72 |
-
"eval_samples_per_second":
|
| 73 |
-
"eval_steps_per_second": 6.
|
| 74 |
"step": 80
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"epoch": 0.5154639175257731,
|
| 78 |
-
"grad_norm":
|
| 79 |
"learning_rate": 7.448453608247424e-06,
|
| 80 |
-
"loss": 0.
|
| 81 |
"step": 100
|
| 82 |
},
|
| 83 |
{
|
| 84 |
"epoch": 0.5154639175257731,
|
| 85 |
-
"eval_f1_macro": 0.
|
| 86 |
-
"eval_loss": 0.
|
| 87 |
-
"eval_runtime": 3.
|
| 88 |
-
"eval_samples_per_second":
|
| 89 |
-
"eval_steps_per_second": 6.
|
| 90 |
"step": 100
|
| 91 |
},
|
| 92 |
{
|
| 93 |
"epoch": 0.6185567010309279,
|
| 94 |
-
"grad_norm":
|
| 95 |
"learning_rate": 6.93298969072165e-06,
|
| 96 |
-
"loss": 0.
|
| 97 |
"step": 120
|
| 98 |
},
|
| 99 |
{
|
| 100 |
"epoch": 0.6185567010309279,
|
| 101 |
-
"eval_f1_macro": 0.
|
| 102 |
-
"eval_loss": 0.
|
| 103 |
-
"eval_runtime":
|
| 104 |
-
"eval_samples_per_second":
|
| 105 |
-
"eval_steps_per_second": 6.
|
| 106 |
"step": 120
|
| 107 |
},
|
| 108 |
{
|
| 109 |
"epoch": 0.7216494845360825,
|
| 110 |
-
"grad_norm": 9.
|
| 111 |
"learning_rate": 6.417525773195877e-06,
|
| 112 |
-
"loss": 0.
|
| 113 |
"step": 140
|
| 114 |
},
|
| 115 |
{
|
| 116 |
"epoch": 0.7216494845360825,
|
| 117 |
-
"eval_f1_macro": 0.
|
| 118 |
-
"eval_loss": 0.
|
| 119 |
-
"eval_runtime": 3.
|
| 120 |
-
"eval_samples_per_second":
|
| 121 |
-
"eval_steps_per_second": 6.
|
| 122 |
"step": 140
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"epoch": 0.8247422680412371,
|
| 126 |
-
"grad_norm":
|
| 127 |
"learning_rate": 5.902061855670104e-06,
|
| 128 |
-
"loss": 0.
|
| 129 |
"step": 160
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"epoch": 0.8247422680412371,
|
| 133 |
-
"eval_f1_macro": 0.
|
| 134 |
-
"eval_loss": 0.
|
| 135 |
-
"eval_runtime": 3.
|
| 136 |
-
"eval_samples_per_second":
|
| 137 |
-
"eval_steps_per_second": 6.
|
| 138 |
"step": 160
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"epoch": 0.9278350515463918,
|
| 142 |
-
"grad_norm": 12.
|
| 143 |
"learning_rate": 5.38659793814433e-06,
|
| 144 |
-
"loss": 0.
|
| 145 |
"step": 180
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"epoch": 0.9278350515463918,
|
| 149 |
-
"eval_f1_macro": 0.
|
| 150 |
-
"eval_loss": 0.
|
| 151 |
-
"eval_runtime": 3.
|
| 152 |
-
"eval_samples_per_second":
|
| 153 |
-
"eval_steps_per_second": 6.
|
| 154 |
"step": 180
|
| 155 |
},
|
| 156 |
{
|
| 157 |
"epoch": 1.0309278350515463,
|
| 158 |
-
"grad_norm":
|
| 159 |
"learning_rate": 4.871134020618557e-06,
|
| 160 |
-
"loss": 0.
|
| 161 |
"step": 200
|
| 162 |
},
|
| 163 |
{
|
| 164 |
"epoch": 1.0309278350515463,
|
| 165 |
-
"eval_f1_macro": 0.
|
| 166 |
-
"eval_loss": 0.
|
| 167 |
-
"eval_runtime": 3.
|
| 168 |
-
"eval_samples_per_second":
|
| 169 |
-
"eval_steps_per_second": 6.
|
| 170 |
"step": 200
|
| 171 |
},
|
| 172 |
{
|
| 173 |
"epoch": 1.134020618556701,
|
| 174 |
-
"grad_norm":
|
| 175 |
"learning_rate": 4.355670103092784e-06,
|
| 176 |
-
"loss": 0.
|
| 177 |
"step": 220
|
| 178 |
},
|
| 179 |
{
|
| 180 |
"epoch": 1.134020618556701,
|
| 181 |
-
"eval_f1_macro": 0.
|
| 182 |
-
"eval_loss": 0.
|
| 183 |
-
"eval_runtime":
|
| 184 |
-
"eval_samples_per_second":
|
| 185 |
-
"eval_steps_per_second": 6.
|
| 186 |
"step": 220
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"epoch": 1.2371134020618557,
|
| 190 |
-
"grad_norm": 9.
|
| 191 |
"learning_rate": 3.840206185567011e-06,
|
| 192 |
-
"loss": 0.
|
| 193 |
"step": 240
|
| 194 |
},
|
| 195 |
{
|
| 196 |
"epoch": 1.2371134020618557,
|
| 197 |
-
"eval_f1_macro": 0.
|
| 198 |
-
"eval_loss": 0.
|
| 199 |
-
"eval_runtime":
|
| 200 |
-
"eval_samples_per_second":
|
| 201 |
-
"eval_steps_per_second": 6.
|
| 202 |
"step": 240
|
| 203 |
},
|
| 204 |
{
|
| 205 |
"epoch": 1.3402061855670104,
|
| 206 |
-
"grad_norm": 7.
|
| 207 |
"learning_rate": 3.324742268041237e-06,
|
| 208 |
-
"loss": 0.
|
| 209 |
"step": 260
|
| 210 |
},
|
| 211 |
{
|
| 212 |
"epoch": 1.3402061855670104,
|
| 213 |
-
"eval_f1_macro": 0.
|
| 214 |
-
"eval_loss": 0.
|
| 215 |
-
"eval_runtime": 3.
|
| 216 |
-
"eval_samples_per_second":
|
| 217 |
-
"eval_steps_per_second": 6.
|
| 218 |
"step": 260
|
| 219 |
},
|
| 220 |
{
|
| 221 |
"epoch": 1.443298969072165,
|
| 222 |
-
"grad_norm":
|
| 223 |
"learning_rate": 2.809278350515464e-06,
|
| 224 |
-
"loss": 0.
|
| 225 |
"step": 280
|
| 226 |
},
|
| 227 |
{
|
| 228 |
"epoch": 1.443298969072165,
|
| 229 |
-
"eval_f1_macro": 0.
|
| 230 |
-
"eval_loss": 0.
|
| 231 |
-
"eval_runtime": 3.
|
| 232 |
-
"eval_samples_per_second":
|
| 233 |
-
"eval_steps_per_second": 6.
|
| 234 |
"step": 280
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 1.5463917525773194,
|
| 238 |
-
"grad_norm":
|
| 239 |
"learning_rate": 2.293814432989691e-06,
|
| 240 |
-
"loss": 0.
|
| 241 |
"step": 300
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 1.5463917525773194,
|
| 245 |
-
"eval_f1_macro": 0.
|
| 246 |
-
"eval_loss": 0.
|
| 247 |
-
"eval_runtime": 4.
|
| 248 |
-
"eval_samples_per_second":
|
| 249 |
-
"eval_steps_per_second":
|
| 250 |
"step": 300
|
| 251 |
},
|
| 252 |
{
|
| 253 |
"epoch": 1.6494845360824741,
|
| 254 |
-
"grad_norm": 7.
|
| 255 |
"learning_rate": 1.7783505154639178e-06,
|
| 256 |
-
"loss": 0.
|
| 257 |
"step": 320
|
| 258 |
},
|
| 259 |
{
|
| 260 |
"epoch": 1.6494845360824741,
|
| 261 |
-
"eval_f1_macro": 0.
|
| 262 |
-
"eval_loss": 0.
|
| 263 |
-
"eval_runtime":
|
| 264 |
-
"eval_samples_per_second":
|
| 265 |
-
"eval_steps_per_second": 6.
|
| 266 |
"step": 320
|
| 267 |
}
|
| 268 |
],
|
|
|
|
| 1 |
{
|
| 2 |
"best_global_step": 320,
|
| 3 |
+
"best_metric": 0.5938381065356336,
|
| 4 |
"best_model_checkpoint": "/content/gemma_lora_imb/checkpoint-320",
|
| 5 |
"epoch": 1.6494845360824741,
|
| 6 |
"eval_steps": 20,
|
|
|
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.10309278350515463,
|
| 14 |
+
"grad_norm": 28.475317001342773,
|
| 15 |
"learning_rate": 9.510309278350516e-06,
|
| 16 |
+
"loss": 1.9688,
|
| 17 |
"step": 20
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"epoch": 0.10309278350515463,
|
| 21 |
+
"eval_f1_macro": 0.3268938093426131,
|
| 22 |
+
"eval_loss": 1.6321439743041992,
|
| 23 |
+
"eval_runtime": 3.9651,
|
| 24 |
+
"eval_samples_per_second": 194.698,
|
| 25 |
+
"eval_steps_per_second": 6.305,
|
| 26 |
"step": 20
|
| 27 |
},
|
| 28 |
{
|
| 29 |
"epoch": 0.20618556701030927,
|
| 30 |
+
"grad_norm": 20.50006866455078,
|
| 31 |
"learning_rate": 8.994845360824743e-06,
|
| 32 |
+
"loss": 1.3439,
|
| 33 |
"step": 40
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"epoch": 0.20618556701030927,
|
| 37 |
+
"eval_f1_macro": 0.3697218634049285,
|
| 38 |
+
"eval_loss": 1.2616751194000244,
|
| 39 |
+
"eval_runtime": 4.2784,
|
| 40 |
+
"eval_samples_per_second": 180.441,
|
| 41 |
+
"eval_steps_per_second": 5.843,
|
| 42 |
"step": 40
|
| 43 |
},
|
| 44 |
{
|
| 45 |
"epoch": 0.30927835051546393,
|
| 46 |
+
"grad_norm": 41.688751220703125,
|
| 47 |
"learning_rate": 8.479381443298969e-06,
|
| 48 |
+
"loss": 1.0698,
|
| 49 |
"step": 60
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"epoch": 0.30927835051546393,
|
| 53 |
+
"eval_f1_macro": 0.4542143805018043,
|
| 54 |
+
"eval_loss": 1.038082480430603,
|
| 55 |
+
"eval_runtime": 3.9347,
|
| 56 |
+
"eval_samples_per_second": 196.201,
|
| 57 |
+
"eval_steps_per_second": 6.354,
|
| 58 |
"step": 60
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 0.41237113402061853,
|
| 62 |
+
"grad_norm": 18.773658752441406,
|
| 63 |
"learning_rate": 7.963917525773196e-06,
|
| 64 |
+
"loss": 0.8925,
|
| 65 |
"step": 80
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"epoch": 0.41237113402061853,
|
| 69 |
+
"eval_f1_macro": 0.49878452511464727,
|
| 70 |
+
"eval_loss": 0.9446011185646057,
|
| 71 |
+
"eval_runtime": 3.9284,
|
| 72 |
+
"eval_samples_per_second": 196.516,
|
| 73 |
+
"eval_steps_per_second": 6.364,
|
| 74 |
"step": 80
|
| 75 |
},
|
| 76 |
{
|
| 77 |
"epoch": 0.5154639175257731,
|
| 78 |
+
"grad_norm": 15.985794067382812,
|
| 79 |
"learning_rate": 7.448453608247424e-06,
|
| 80 |
+
"loss": 0.876,
|
| 81 |
"step": 100
|
| 82 |
},
|
| 83 |
{
|
| 84 |
"epoch": 0.5154639175257731,
|
| 85 |
+
"eval_f1_macro": 0.5263883153406523,
|
| 86 |
+
"eval_loss": 0.8541163206100464,
|
| 87 |
+
"eval_runtime": 3.9231,
|
| 88 |
+
"eval_samples_per_second": 196.785,
|
| 89 |
+
"eval_steps_per_second": 6.373,
|
| 90 |
"step": 100
|
| 91 |
},
|
| 92 |
{
|
| 93 |
"epoch": 0.6185567010309279,
|
| 94 |
+
"grad_norm": 17.182302474975586,
|
| 95 |
"learning_rate": 6.93298969072165e-06,
|
| 96 |
+
"loss": 0.8147,
|
| 97 |
"step": 120
|
| 98 |
},
|
| 99 |
{
|
| 100 |
"epoch": 0.6185567010309279,
|
| 101 |
+
"eval_f1_macro": 0.5381800325973681,
|
| 102 |
+
"eval_loss": 0.8054100275039673,
|
| 103 |
+
"eval_runtime": 3.9483,
|
| 104 |
+
"eval_samples_per_second": 195.53,
|
| 105 |
+
"eval_steps_per_second": 6.332,
|
| 106 |
"step": 120
|
| 107 |
},
|
| 108 |
{
|
| 109 |
"epoch": 0.7216494845360825,
|
| 110 |
+
"grad_norm": 9.127381324768066,
|
| 111 |
"learning_rate": 6.417525773195877e-06,
|
| 112 |
+
"loss": 0.8025,
|
| 113 |
"step": 140
|
| 114 |
},
|
| 115 |
{
|
| 116 |
"epoch": 0.7216494845360825,
|
| 117 |
+
"eval_f1_macro": 0.5465635692018774,
|
| 118 |
+
"eval_loss": 0.7696816921234131,
|
| 119 |
+
"eval_runtime": 3.9227,
|
| 120 |
+
"eval_samples_per_second": 196.804,
|
| 121 |
+
"eval_steps_per_second": 6.373,
|
| 122 |
"step": 140
|
| 123 |
},
|
| 124 |
{
|
| 125 |
"epoch": 0.8247422680412371,
|
| 126 |
+
"grad_norm": 13.120288848876953,
|
| 127 |
"learning_rate": 5.902061855670104e-06,
|
| 128 |
+
"loss": 0.7859,
|
| 129 |
"step": 160
|
| 130 |
},
|
| 131 |
{
|
| 132 |
"epoch": 0.8247422680412371,
|
| 133 |
+
"eval_f1_macro": 0.5512889360726072,
|
| 134 |
+
"eval_loss": 0.7549648880958557,
|
| 135 |
+
"eval_runtime": 3.9339,
|
| 136 |
+
"eval_samples_per_second": 196.24,
|
| 137 |
+
"eval_steps_per_second": 6.355,
|
| 138 |
"step": 160
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"epoch": 0.9278350515463918,
|
| 142 |
+
"grad_norm": 12.05843448638916,
|
| 143 |
"learning_rate": 5.38659793814433e-06,
|
| 144 |
+
"loss": 0.7499,
|
| 145 |
"step": 180
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"epoch": 0.9278350515463918,
|
| 149 |
+
"eval_f1_macro": 0.5526664350368882,
|
| 150 |
+
"eval_loss": 0.7347894906997681,
|
| 151 |
+
"eval_runtime": 3.9585,
|
| 152 |
+
"eval_samples_per_second": 195.022,
|
| 153 |
+
"eval_steps_per_second": 6.315,
|
| 154 |
"step": 180
|
| 155 |
},
|
| 156 |
{
|
| 157 |
"epoch": 1.0309278350515463,
|
| 158 |
+
"grad_norm": 10.693439483642578,
|
| 159 |
"learning_rate": 4.871134020618557e-06,
|
| 160 |
+
"loss": 0.7156,
|
| 161 |
"step": 200
|
| 162 |
},
|
| 163 |
{
|
| 164 |
"epoch": 1.0309278350515463,
|
| 165 |
+
"eval_f1_macro": 0.5764236536326763,
|
| 166 |
+
"eval_loss": 0.7346844673156738,
|
| 167 |
+
"eval_runtime": 3.9136,
|
| 168 |
+
"eval_samples_per_second": 197.261,
|
| 169 |
+
"eval_steps_per_second": 6.388,
|
| 170 |
"step": 200
|
| 171 |
},
|
| 172 |
{
|
| 173 |
"epoch": 1.134020618556701,
|
| 174 |
+
"grad_norm": 10.037001609802246,
|
| 175 |
"learning_rate": 4.355670103092784e-06,
|
| 176 |
+
"loss": 0.628,
|
| 177 |
"step": 220
|
| 178 |
},
|
| 179 |
{
|
| 180 |
"epoch": 1.134020618556701,
|
| 181 |
+
"eval_f1_macro": 0.5775336754770706,
|
| 182 |
+
"eval_loss": 0.810948371887207,
|
| 183 |
+
"eval_runtime": 4.0413,
|
| 184 |
+
"eval_samples_per_second": 191.029,
|
| 185 |
+
"eval_steps_per_second": 6.186,
|
| 186 |
"step": 220
|
| 187 |
},
|
| 188 |
{
|
| 189 |
"epoch": 1.2371134020618557,
|
| 190 |
+
"grad_norm": 9.240849494934082,
|
| 191 |
"learning_rate": 3.840206185567011e-06,
|
| 192 |
+
"loss": 0.6601,
|
| 193 |
"step": 240
|
| 194 |
},
|
| 195 |
{
|
| 196 |
"epoch": 1.2371134020618557,
|
| 197 |
+
"eval_f1_macro": 0.5821846549080626,
|
| 198 |
+
"eval_loss": 0.7480236291885376,
|
| 199 |
+
"eval_runtime": 3.9625,
|
| 200 |
+
"eval_samples_per_second": 194.826,
|
| 201 |
+
"eval_steps_per_second": 6.309,
|
| 202 |
"step": 240
|
| 203 |
},
|
| 204 |
{
|
| 205 |
"epoch": 1.3402061855670104,
|
| 206 |
+
"grad_norm": 7.726218223571777,
|
| 207 |
"learning_rate": 3.324742268041237e-06,
|
| 208 |
+
"loss": 0.6286,
|
| 209 |
"step": 260
|
| 210 |
},
|
| 211 |
{
|
| 212 |
"epoch": 1.3402061855670104,
|
| 213 |
+
"eval_f1_macro": 0.5793924416627766,
|
| 214 |
+
"eval_loss": 0.7301892638206482,
|
| 215 |
+
"eval_runtime": 3.9159,
|
| 216 |
+
"eval_samples_per_second": 197.144,
|
| 217 |
+
"eval_steps_per_second": 6.384,
|
| 218 |
"step": 260
|
| 219 |
},
|
| 220 |
{
|
| 221 |
"epoch": 1.443298969072165,
|
| 222 |
+
"grad_norm": 8.408136367797852,
|
| 223 |
"learning_rate": 2.809278350515464e-06,
|
| 224 |
+
"loss": 0.6048,
|
| 225 |
"step": 280
|
| 226 |
},
|
| 227 |
{
|
| 228 |
"epoch": 1.443298969072165,
|
| 229 |
+
"eval_f1_macro": 0.5846632256257394,
|
| 230 |
+
"eval_loss": 0.7338100075721741,
|
| 231 |
+
"eval_runtime": 3.9641,
|
| 232 |
+
"eval_samples_per_second": 194.749,
|
| 233 |
+
"eval_steps_per_second": 6.307,
|
| 234 |
"step": 280
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 1.5463917525773194,
|
| 238 |
+
"grad_norm": 8.853802680969238,
|
| 239 |
"learning_rate": 2.293814432989691e-06,
|
| 240 |
+
"loss": 0.6399,
|
| 241 |
"step": 300
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 1.5463917525773194,
|
| 245 |
+
"eval_f1_macro": 0.5798440723934543,
|
| 246 |
+
"eval_loss": 0.7210143208503723,
|
| 247 |
+
"eval_runtime": 4.4541,
|
| 248 |
+
"eval_samples_per_second": 173.322,
|
| 249 |
+
"eval_steps_per_second": 5.613,
|
| 250 |
"step": 300
|
| 251 |
},
|
| 252 |
{
|
| 253 |
"epoch": 1.6494845360824741,
|
| 254 |
+
"grad_norm": 7.352003574371338,
|
| 255 |
"learning_rate": 1.7783505154639178e-06,
|
| 256 |
+
"loss": 0.5882,
|
| 257 |
"step": 320
|
| 258 |
},
|
| 259 |
{
|
| 260 |
"epoch": 1.6494845360824741,
|
| 261 |
+
"eval_f1_macro": 0.5938381065356336,
|
| 262 |
+
"eval_loss": 0.7336726188659668,
|
| 263 |
+
"eval_runtime": 3.9872,
|
| 264 |
+
"eval_samples_per_second": 193.619,
|
| 265 |
+
"eval_steps_per_second": 6.27,
|
| 266 |
"step": 320
|
| 267 |
}
|
| 268 |
],
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5905
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:697c137f0af9ca57111ce6bb1fdc70aa1d95192c2c6fdff6a83fa4dc1f74828b
|
| 3 |
size 5905
|