Instructions to use Muhammed164/SDPO with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Muhammed164/SDPO with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Muhammed164/SDPO", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- Unsloth Studio
How to use Muhammed164/SDPO with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Muhammed164/SDPO to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for Muhammed164/SDPO to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for Muhammed164/SDPO to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="Muhammed164/SDPO", max_seq_length=2048, )
Training in progress, step 200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 204500912
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:065322e97e075055ae2c6bcbf10fdfffbac7dd29ef45906fca7a9bacc7abec43
|
| 3 |
size 204500912
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0e97a95edb102b68426558e6f6306ffd55d9a0ba3fc011ab3b76edaea3a99e3
|
| 3 |
+
size 104062731
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c800b778fa7e115e4c34de8529902de8b61c9a1b4bab3eb8295d06dafff030e
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb9a36b5dff54d4dc680b7c932dff5afaac16543707cbf68dd86d83d274f369f
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,1518 +2,318 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.026586905948820207,
|
| 14 |
-
"grad_norm":
|
| 15 |
-
"learning_rate":
|
| 16 |
-
"logits/chosen": 1.
|
| 17 |
-
"logits/rejected": 2.
|
| 18 |
-
"logps/chosen": -180.
|
| 19 |
-
"logps/rejected": -
|
| 20 |
-
"loss":
|
| 21 |
"rewards/accuracies": 0.643750011920929,
|
| 22 |
-
"rewards/chosen":
|
| 23 |
-
"rewards/margins":
|
| 24 |
-
"rewards/rejected":
|
| 25 |
"step": 10
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"epoch": 0.053173811897640415,
|
| 29 |
-
"grad_norm":
|
| 30 |
-
"learning_rate":
|
| 31 |
-
"logits/chosen": 2.
|
| 32 |
-
"logits/rejected": 2.
|
| 33 |
-
"logps/chosen": -
|
| 34 |
-
"logps/rejected": -
|
| 35 |
-
"loss":
|
| 36 |
-
"rewards/accuracies": 0.
|
| 37 |
-
"rewards/chosen":
|
| 38 |
-
"rewards/margins":
|
| 39 |
-
"rewards/rejected":
|
| 40 |
"step": 20
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"epoch": 0.07976071784646062,
|
| 44 |
-
"grad_norm":
|
| 45 |
-
"learning_rate":
|
| 46 |
-
"logits/chosen": 2.
|
| 47 |
-
"logits/rejected": 2.
|
| 48 |
-
"logps/chosen": -201.
|
| 49 |
-
"logps/rejected": -
|
| 50 |
-
"loss":
|
| 51 |
-
"rewards/accuracies": 0.
|
| 52 |
-
"rewards/chosen":
|
| 53 |
-
"rewards/margins":
|
| 54 |
-
"rewards/rejected":
|
| 55 |
"step": 30
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"epoch": 0.10634762379528083,
|
| 59 |
-
"grad_norm":
|
| 60 |
-
"learning_rate":
|
| 61 |
-
"logits/chosen":
|
| 62 |
-
"logits/rejected": 2.
|
| 63 |
-
"logps/chosen": -
|
| 64 |
-
"logps/rejected": -
|
| 65 |
-
"loss":
|
| 66 |
-
"rewards/accuracies": 0.
|
| 67 |
-
"rewards/chosen":
|
| 68 |
-
"rewards/margins":
|
| 69 |
-
"rewards/rejected":
|
| 70 |
"step": 40
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"epoch": 0.13293452974410103,
|
| 74 |
-
"grad_norm":
|
| 75 |
-
"learning_rate":
|
| 76 |
-
"logits/chosen":
|
| 77 |
-
"logits/rejected":
|
| 78 |
-
"logps/chosen": -
|
| 79 |
-
"logps/rejected": -
|
| 80 |
-
"loss":
|
| 81 |
-
"rewards/accuracies": 0.
|
| 82 |
-
"rewards/chosen":
|
| 83 |
-
"rewards/margins":
|
| 84 |
-
"rewards/rejected":
|
| 85 |
"step": 50
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"epoch": 0.15952143569292124,
|
| 89 |
-
"grad_norm":
|
| 90 |
-
"learning_rate":
|
| 91 |
-
"logits/chosen":
|
| 92 |
-
"logits/rejected":
|
| 93 |
-
"logps/chosen": -
|
| 94 |
-
"logps/rejected": -
|
| 95 |
-
"loss":
|
| 96 |
-
"rewards/accuracies": 0.
|
| 97 |
-
"rewards/chosen":
|
| 98 |
-
"rewards/margins":
|
| 99 |
-
"rewards/rejected":
|
| 100 |
"step": 60
|
| 101 |
},
|
| 102 |
{
|
| 103 |
"epoch": 0.18610834164174145,
|
| 104 |
-
"grad_norm":
|
| 105 |
-
"learning_rate":
|
| 106 |
-
"logits/chosen":
|
| 107 |
-
"logits/rejected":
|
| 108 |
-
"logps/chosen": -
|
| 109 |
-
"logps/rejected": -
|
| 110 |
-
"loss":
|
| 111 |
-
"rewards/accuracies": 0.
|
| 112 |
-
"rewards/chosen":
|
| 113 |
-
"rewards/margins":
|
| 114 |
-
"rewards/rejected":
|
| 115 |
"step": 70
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"epoch": 0.21269524759056166,
|
| 119 |
-
"grad_norm":
|
| 120 |
-
"learning_rate":
|
| 121 |
-
"logits/chosen":
|
| 122 |
-
"logits/rejected":
|
| 123 |
-
"logps/chosen": -
|
| 124 |
-
"logps/rejected": -
|
| 125 |
-
"loss":
|
| 126 |
-
"rewards/accuracies": 0.
|
| 127 |
-
"rewards/chosen":
|
| 128 |
-
"rewards/margins":
|
| 129 |
-
"rewards/rejected":
|
| 130 |
"step": 80
|
| 131 |
},
|
| 132 |
{
|
| 133 |
"epoch": 0.23928215353938184,
|
| 134 |
-
"grad_norm":
|
| 135 |
-
"learning_rate": 8.
|
| 136 |
-
"logits/chosen":
|
| 137 |
-
"logits/rejected":
|
| 138 |
-
"logps/chosen": -
|
| 139 |
-
"logps/rejected": -
|
| 140 |
-
"loss":
|
| 141 |
-
"rewards/accuracies": 0.
|
| 142 |
-
"rewards/chosen":
|
| 143 |
-
"rewards/margins":
|
| 144 |
-
"rewards/rejected":
|
| 145 |
"step": 90
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"epoch": 0.26586905948820205,
|
| 149 |
-
"grad_norm":
|
| 150 |
-
"learning_rate":
|
| 151 |
-
"logits/chosen":
|
| 152 |
-
"logits/rejected":
|
| 153 |
-
"logps/chosen": -
|
| 154 |
-
"logps/rejected": -
|
| 155 |
-
"loss":
|
| 156 |
-
"rewards/accuracies": 0.
|
| 157 |
-
"rewards/chosen":
|
| 158 |
-
"rewards/margins":
|
| 159 |
-
"rewards/rejected":
|
| 160 |
"step": 100
|
| 161 |
},
|
| 162 |
{
|
| 163 |
"epoch": 0.2924559654370223,
|
| 164 |
-
"grad_norm":
|
| 165 |
-
"learning_rate":
|
| 166 |
-
"logits/chosen":
|
| 167 |
-
"logits/rejected":
|
| 168 |
-
"logps/chosen": -
|
| 169 |
-
"logps/rejected": -
|
| 170 |
-
"loss":
|
| 171 |
-
"rewards/accuracies": 0.
|
| 172 |
-
"rewards/chosen":
|
| 173 |
-
"rewards/margins":
|
| 174 |
-
"rewards/rejected":
|
| 175 |
"step": 110
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"epoch": 0.3190428713858425,
|
| 179 |
-
"grad_norm":
|
| 180 |
-
"learning_rate":
|
| 181 |
-
"logits/chosen":
|
| 182 |
-
"logits/rejected":
|
| 183 |
-
"logps/chosen": -
|
| 184 |
-
"logps/rejected": -
|
| 185 |
-
"loss":
|
| 186 |
-
"rewards/accuracies": 0.
|
| 187 |
-
"rewards/chosen":
|
| 188 |
-
"rewards/margins":
|
| 189 |
-
"rewards/rejected":
|
| 190 |
"step": 120
|
| 191 |
},
|
| 192 |
{
|
| 193 |
"epoch": 0.34562977733466266,
|
| 194 |
-
"grad_norm":
|
| 195 |
-
"learning_rate":
|
| 196 |
-
"logits/chosen":
|
| 197 |
-
"logits/rejected":
|
| 198 |
-
"logps/chosen": -
|
| 199 |
-
"logps/rejected": -
|
| 200 |
-
"loss":
|
| 201 |
-
"rewards/accuracies": 0.
|
| 202 |
-
"rewards/chosen":
|
| 203 |
-
"rewards/margins":
|
| 204 |
-
"rewards/rejected":
|
| 205 |
"step": 130
|
| 206 |
},
|
| 207 |
{
|
| 208 |
"epoch": 0.3722166832834829,
|
| 209 |
-
"grad_norm":
|
| 210 |
-
"learning_rate":
|
| 211 |
-
"logits/chosen":
|
| 212 |
-
"logits/rejected":
|
| 213 |
-
"logps/chosen": -
|
| 214 |
-
"logps/rejected": -
|
| 215 |
-
"loss":
|
| 216 |
-
"rewards/accuracies": 0.
|
| 217 |
-
"rewards/chosen":
|
| 218 |
-
"rewards/margins":
|
| 219 |
-
"rewards/rejected":
|
| 220 |
"step": 140
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 0.3988035892323031,
|
| 224 |
-
"grad_norm":
|
| 225 |
-
"learning_rate":
|
| 226 |
-
"logits/chosen":
|
| 227 |
-
"logits/rejected":
|
| 228 |
-
"logps/chosen": -
|
| 229 |
-
"logps/rejected": -
|
| 230 |
-
"loss":
|
| 231 |
-
"rewards/accuracies": 0.
|
| 232 |
-
"rewards/chosen":
|
| 233 |
-
"rewards/margins":
|
| 234 |
-
"rewards/rejected":
|
| 235 |
"step": 150
|
| 236 |
},
|
| 237 |
{
|
| 238 |
"epoch": 0.4253904951811233,
|
| 239 |
-
"grad_norm":
|
| 240 |
-
"learning_rate":
|
| 241 |
-
"logits/chosen":
|
| 242 |
-
"logits/rejected":
|
| 243 |
-
"logps/chosen": -
|
| 244 |
-
"logps/rejected": -
|
| 245 |
-
"loss":
|
| 246 |
-
"rewards/accuracies": 0.
|
| 247 |
-
"rewards/chosen":
|
| 248 |
-
"rewards/margins":
|
| 249 |
-
"rewards/rejected":
|
| 250 |
"step": 160
|
| 251 |
},
|
| 252 |
{
|
| 253 |
"epoch": 0.4519774011299435,
|
| 254 |
-
"grad_norm":
|
| 255 |
-
"learning_rate":
|
| 256 |
-
"logits/chosen":
|
| 257 |
-
"logits/rejected":
|
| 258 |
-
"logps/chosen": -
|
| 259 |
-
"logps/rejected": -
|
| 260 |
-
"loss":
|
| 261 |
-
"rewards/accuracies": 0.
|
| 262 |
-
"rewards/chosen":
|
| 263 |
-
"rewards/margins":
|
| 264 |
-
"rewards/rejected":
|
| 265 |
"step": 170
|
| 266 |
},
|
| 267 |
{
|
| 268 |
"epoch": 0.4785643070787637,
|
| 269 |
-
"grad_norm":
|
| 270 |
-
"learning_rate":
|
| 271 |
-
"logits/chosen":
|
| 272 |
-
"logits/rejected":
|
| 273 |
-
"logps/chosen": -
|
| 274 |
-
"logps/rejected": -
|
| 275 |
-
"loss":
|
| 276 |
-
"rewards/accuracies": 0.
|
| 277 |
-
"rewards/chosen":
|
| 278 |
-
"rewards/margins":
|
| 279 |
-
"rewards/rejected": -
|
| 280 |
"step": 180
|
| 281 |
},
|
| 282 |
{
|
| 283 |
"epoch": 0.5051512130275839,
|
| 284 |
-
"grad_norm":
|
| 285 |
-
"learning_rate":
|
| 286 |
-
"logits/chosen":
|
| 287 |
-
"logits/rejected":
|
| 288 |
-
"logps/chosen": -
|
| 289 |
-
"logps/rejected": -
|
| 290 |
-
"loss":
|
| 291 |
-
"rewards/accuracies": 0.
|
| 292 |
-
"rewards/chosen":
|
| 293 |
-
"rewards/margins":
|
| 294 |
-
"rewards/rejected": -
|
| 295 |
"step": 190
|
| 296 |
},
|
| 297 |
{
|
| 298 |
"epoch": 0.5317381189764041,
|
| 299 |
-
"grad_norm":
|
| 300 |
-
"learning_rate":
|
| 301 |
-
"logits/chosen":
|
| 302 |
-
"logits/rejected":
|
| 303 |
-
"logps/chosen": -
|
| 304 |
-
"logps/rejected": -
|
| 305 |
-
"loss":
|
| 306 |
-
"rewards/accuracies": 0.7124999761581421,
|
| 307 |
-
"rewards/chosen": 49.085548400878906,
|
| 308 |
-
"rewards/margins": 58.6345329284668,
|
| 309 |
-
"rewards/rejected": -9.54898452758789,
|
| 310 |
-
"step": 200
|
| 311 |
-
},
|
| 312 |
-
{
|
| 313 |
-
"epoch": 0.5583250249252243,
|
| 314 |
-
"grad_norm": 67.41759490966797,
|
| 315 |
-
"learning_rate": 8.788888888888889e-07,
|
| 316 |
-
"logits/chosen": 3.120459794998169,
|
| 317 |
-
"logits/rejected": 3.3150908946990967,
|
| 318 |
-
"logps/chosen": -201.68368530273438,
|
| 319 |
-
"logps/rejected": -479.9122619628906,
|
| 320 |
-
"loss": 9.525629425048828,
|
| 321 |
-
"rewards/accuracies": 0.75,
|
| 322 |
-
"rewards/chosen": 49.86951446533203,
|
| 323 |
-
"rewards/margins": 59.1157112121582,
|
| 324 |
-
"rewards/rejected": -9.246195793151855,
|
| 325 |
-
"step": 210
|
| 326 |
-
},
|
| 327 |
-
{
|
| 328 |
-
"epoch": 0.5849119308740446,
|
| 329 |
-
"grad_norm": 89.28022003173828,
|
| 330 |
-
"learning_rate": 8.677777777777777e-07,
|
| 331 |
-
"logits/chosen": 3.2067043781280518,
|
| 332 |
-
"logits/rejected": 3.2518234252929688,
|
| 333 |
-
"logps/chosen": -198.66351318359375,
|
| 334 |
-
"logps/rejected": -459.51287841796875,
|
| 335 |
-
"loss": 11.018878936767578,
|
| 336 |
-
"rewards/accuracies": 0.6937500238418579,
|
| 337 |
-
"rewards/chosen": 52.5185432434082,
|
| 338 |
-
"rewards/margins": 51.389251708984375,
|
| 339 |
-
"rewards/rejected": 1.129294991493225,
|
| 340 |
-
"step": 220
|
| 341 |
-
},
|
| 342 |
-
{
|
| 343 |
-
"epoch": 0.6114988368228648,
|
| 344 |
-
"grad_norm": 57.3789176940918,
|
| 345 |
-
"learning_rate": 8.566666666666667e-07,
|
| 346 |
-
"logits/chosen": 3.428664445877075,
|
| 347 |
-
"logits/rejected": 3.6689727306365967,
|
| 348 |
-
"logps/chosen": -188.14273071289062,
|
| 349 |
-
"logps/rejected": -484.1524963378906,
|
| 350 |
-
"loss": 8.581022644042969,
|
| 351 |
-
"rewards/accuracies": 0.7124999761581421,
|
| 352 |
-
"rewards/chosen": 49.485206604003906,
|
| 353 |
-
"rewards/margins": 54.331947326660156,
|
| 354 |
-
"rewards/rejected": -4.846745491027832,
|
| 355 |
-
"step": 230
|
| 356 |
-
},
|
| 357 |
-
{
|
| 358 |
-
"epoch": 0.638085742771685,
|
| 359 |
-
"grad_norm": 78.1611099243164,
|
| 360 |
-
"learning_rate": 8.455555555555555e-07,
|
| 361 |
-
"logits/chosen": 3.273719072341919,
|
| 362 |
-
"logits/rejected": 3.5895423889160156,
|
| 363 |
-
"logps/chosen": -198.40890502929688,
|
| 364 |
-
"logps/rejected": -568.0107421875,
|
| 365 |
-
"loss": 8.444003295898437,
|
| 366 |
-
"rewards/accuracies": 0.793749988079071,
|
| 367 |
-
"rewards/chosen": 55.26072311401367,
|
| 368 |
-
"rewards/margins": 84.45598602294922,
|
| 369 |
-
"rewards/rejected": -29.195270538330078,
|
| 370 |
-
"step": 240
|
| 371 |
-
},
|
| 372 |
-
{
|
| 373 |
-
"epoch": 0.6646726487205051,
|
| 374 |
-
"grad_norm": 87.57330322265625,
|
| 375 |
-
"learning_rate": 8.344444444444444e-07,
|
| 376 |
-
"logits/chosen": 3.921356201171875,
|
| 377 |
-
"logits/rejected": 4.107032775878906,
|
| 378 |
-
"logps/chosen": -219.56887817382812,
|
| 379 |
-
"logps/rejected": -529.9613647460938,
|
| 380 |
-
"loss": 11.188172912597656,
|
| 381 |
-
"rewards/accuracies": 0.7437499761581421,
|
| 382 |
-
"rewards/chosen": 63.129974365234375,
|
| 383 |
-
"rewards/margins": 69.01350402832031,
|
| 384 |
-
"rewards/rejected": -5.883524417877197,
|
| 385 |
-
"step": 250
|
| 386 |
-
},
|
| 387 |
-
{
|
| 388 |
-
"epoch": 0.6912595546693253,
|
| 389 |
-
"grad_norm": 78.89329528808594,
|
| 390 |
-
"learning_rate": 8.233333333333333e-07,
|
| 391 |
-
"logits/chosen": 4.225001335144043,
|
| 392 |
-
"logits/rejected": 4.386289119720459,
|
| 393 |
-
"logps/chosen": -241.30734252929688,
|
| 394 |
-
"logps/rejected": -510.9679260253906,
|
| 395 |
-
"loss": 10.638973236083984,
|
| 396 |
-
"rewards/accuracies": 0.7437499761581421,
|
| 397 |
-
"rewards/chosen": 55.27728271484375,
|
| 398 |
-
"rewards/margins": 60.547119140625,
|
| 399 |
-
"rewards/rejected": -5.269834041595459,
|
| 400 |
-
"step": 260
|
| 401 |
-
},
|
| 402 |
-
{
|
| 403 |
-
"epoch": 0.7178464606181456,
|
| 404 |
-
"grad_norm": 88.03643798828125,
|
| 405 |
-
"learning_rate": 8.122222222222221e-07,
|
| 406 |
-
"logits/chosen": 3.850262403488159,
|
| 407 |
-
"logits/rejected": 4.041484832763672,
|
| 408 |
-
"logps/chosen": -241.4730987548828,
|
| 409 |
-
"logps/rejected": -527.6182250976562,
|
| 410 |
-
"loss": 11.718121337890626,
|
| 411 |
-
"rewards/accuracies": 0.699999988079071,
|
| 412 |
-
"rewards/chosen": 48.15225601196289,
|
| 413 |
-
"rewards/margins": 54.50339889526367,
|
| 414 |
-
"rewards/rejected": -6.351143836975098,
|
| 415 |
-
"step": 270
|
| 416 |
-
},
|
| 417 |
-
{
|
| 418 |
-
"epoch": 0.7444333665669658,
|
| 419 |
-
"grad_norm": 161.95907592773438,
|
| 420 |
-
"learning_rate": 8.01111111111111e-07,
|
| 421 |
-
"logits/chosen": 3.8429579734802246,
|
| 422 |
-
"logits/rejected": 4.090743064880371,
|
| 423 |
-
"logps/chosen": -197.0509033203125,
|
| 424 |
-
"logps/rejected": -592.664794921875,
|
| 425 |
-
"loss": 8.627317810058594,
|
| 426 |
-
"rewards/accuracies": 0.762499988079071,
|
| 427 |
-
"rewards/chosen": 44.076698303222656,
|
| 428 |
-
"rewards/margins": 71.58090209960938,
|
| 429 |
-
"rewards/rejected": -27.504215240478516,
|
| 430 |
-
"step": 280
|
| 431 |
-
},
|
| 432 |
-
{
|
| 433 |
-
"epoch": 0.771020272515786,
|
| 434 |
-
"grad_norm": 59.20638656616211,
|
| 435 |
-
"learning_rate": 7.9e-07,
|
| 436 |
-
"logits/chosen": 3.9963154792785645,
|
| 437 |
-
"logits/rejected": 4.247437477111816,
|
| 438 |
-
"logps/chosen": -198.98069763183594,
|
| 439 |
-
"logps/rejected": -636.8574829101562,
|
| 440 |
-
"loss": 7.916163635253906,
|
| 441 |
-
"rewards/accuracies": 0.8062499761581421,
|
| 442 |
-
"rewards/chosen": 45.44010543823242,
|
| 443 |
-
"rewards/margins": 82.26910400390625,
|
| 444 |
-
"rewards/rejected": -36.82899475097656,
|
| 445 |
-
"step": 290
|
| 446 |
-
},
|
| 447 |
-
{
|
| 448 |
-
"epoch": 0.7976071784646062,
|
| 449 |
-
"grad_norm": 3.873155947076157e-05,
|
| 450 |
-
"learning_rate": 7.788888888888889e-07,
|
| 451 |
-
"logits/chosen": 3.5579922199249268,
|
| 452 |
-
"logits/rejected": 3.9878501892089844,
|
| 453 |
-
"logps/chosen": -208.97335815429688,
|
| 454 |
-
"logps/rejected": -655.0528564453125,
|
| 455 |
-
"loss": 6.305292129516602,
|
| 456 |
-
"rewards/accuracies": 0.8187500238418579,
|
| 457 |
-
"rewards/chosen": 36.67095184326172,
|
| 458 |
-
"rewards/margins": 88.25028991699219,
|
| 459 |
-
"rewards/rejected": -51.57932662963867,
|
| 460 |
-
"step": 300
|
| 461 |
-
},
|
| 462 |
-
{
|
| 463 |
-
"epoch": 0.8241940844134263,
|
| 464 |
-
"grad_norm": 101.79195404052734,
|
| 465 |
-
"learning_rate": 7.677777777777778e-07,
|
| 466 |
-
"logits/chosen": 4.391497611999512,
|
| 467 |
-
"logits/rejected": 4.607339382171631,
|
| 468 |
-
"logps/chosen": -249.1584930419922,
|
| 469 |
-
"logps/rejected": -639.23974609375,
|
| 470 |
-
"loss": 8.43834228515625,
|
| 471 |
-
"rewards/accuracies": 0.78125,
|
| 472 |
-
"rewards/chosen": 43.91454315185547,
|
| 473 |
-
"rewards/margins": 78.60186004638672,
|
| 474 |
-
"rewards/rejected": -34.68731689453125,
|
| 475 |
-
"step": 310
|
| 476 |
-
},
|
| 477 |
-
{
|
| 478 |
-
"epoch": 0.8507809903622466,
|
| 479 |
-
"grad_norm": 91.69438171386719,
|
| 480 |
-
"learning_rate": 7.566666666666667e-07,
|
| 481 |
-
"logits/chosen": 4.2728271484375,
|
| 482 |
-
"logits/rejected": 4.45902156829834,
|
| 483 |
-
"logps/chosen": -242.8008575439453,
|
| 484 |
-
"logps/rejected": -614.6475830078125,
|
| 485 |
-
"loss": 8.622640228271484,
|
| 486 |
-
"rewards/accuracies": 0.793749988079071,
|
| 487 |
-
"rewards/chosen": 51.20795440673828,
|
| 488 |
-
"rewards/margins": 79.56620788574219,
|
| 489 |
-
"rewards/rejected": -28.358264923095703,
|
| 490 |
-
"step": 320
|
| 491 |
-
},
|
| 492 |
-
{
|
| 493 |
-
"epoch": 0.8773678963110668,
|
| 494 |
-
"grad_norm": 64.15619659423828,
|
| 495 |
-
"learning_rate": 7.455555555555555e-07,
|
| 496 |
-
"logits/chosen": 4.106622219085693,
|
| 497 |
-
"logits/rejected": 4.50801944732666,
|
| 498 |
-
"logps/chosen": -230.63919067382812,
|
| 499 |
-
"logps/rejected": -676.0430908203125,
|
| 500 |
-
"loss": 5.273190307617187,
|
| 501 |
-
"rewards/accuracies": 0.793749988079071,
|
| 502 |
-
"rewards/chosen": 40.392784118652344,
|
| 503 |
-
"rewards/margins": 91.14765930175781,
|
| 504 |
-
"rewards/rejected": -50.75487518310547,
|
| 505 |
-
"step": 330
|
| 506 |
-
},
|
| 507 |
-
{
|
| 508 |
-
"epoch": 0.903954802259887,
|
| 509 |
-
"grad_norm": 97.1626205444336,
|
| 510 |
-
"learning_rate": 7.344444444444444e-07,
|
| 511 |
-
"logits/chosen": 4.293347358703613,
|
| 512 |
-
"logits/rejected": 4.595992088317871,
|
| 513 |
-
"logps/chosen": -215.70938110351562,
|
| 514 |
-
"logps/rejected": -666.0777587890625,
|
| 515 |
-
"loss": 5.9850719451904295,
|
| 516 |
-
"rewards/accuracies": 0.762499988079071,
|
| 517 |
-
"rewards/chosen": 44.4474983215332,
|
| 518 |
-
"rewards/margins": 83.35242462158203,
|
| 519 |
-
"rewards/rejected": -38.90492630004883,
|
| 520 |
-
"step": 340
|
| 521 |
-
},
|
| 522 |
-
{
|
| 523 |
-
"epoch": 0.9305417082087072,
|
| 524 |
-
"grad_norm": 85.94694519042969,
|
| 525 |
-
"learning_rate": 7.233333333333333e-07,
|
| 526 |
-
"logits/chosen": 4.838589191436768,
|
| 527 |
-
"logits/rejected": 5.159350872039795,
|
| 528 |
-
"logps/chosen": -264.10711669921875,
|
| 529 |
-
"logps/rejected": -796.7295532226562,
|
| 530 |
-
"loss": 3.4507820129394533,
|
| 531 |
-
"rewards/accuracies": 0.856249988079071,
|
| 532 |
-
"rewards/chosen": 42.16107940673828,
|
| 533 |
-
"rewards/margins": 113.5744857788086,
|
| 534 |
-
"rewards/rejected": -71.41340637207031,
|
| 535 |
-
"step": 350
|
| 536 |
-
},
|
| 537 |
-
{
|
| 538 |
-
"epoch": 0.9571286141575274,
|
| 539 |
-
"grad_norm": 236.06187438964844,
|
| 540 |
-
"learning_rate": 7.122222222222221e-07,
|
| 541 |
-
"logits/chosen": 4.589522361755371,
|
| 542 |
-
"logits/rejected": 4.999955177307129,
|
| 543 |
-
"logps/chosen": -258.4150695800781,
|
| 544 |
-
"logps/rejected": -727.7996826171875,
|
| 545 |
-
"loss": 6.599867248535157,
|
| 546 |
-
"rewards/accuracies": 0.8062499761581421,
|
| 547 |
-
"rewards/chosen": 35.76854705810547,
|
| 548 |
-
"rewards/margins": 92.19223022460938,
|
| 549 |
-
"rewards/rejected": -56.42368698120117,
|
| 550 |
-
"step": 360
|
| 551 |
-
},
|
| 552 |
-
{
|
| 553 |
-
"epoch": 0.9837155201063477,
|
| 554 |
-
"grad_norm": 100.6878662109375,
|
| 555 |
-
"learning_rate": 7.01111111111111e-07,
|
| 556 |
-
"logits/chosen": 5.191050052642822,
|
| 557 |
-
"logits/rejected": 5.309014320373535,
|
| 558 |
-
"logps/chosen": -296.0009460449219,
|
| 559 |
-
"logps/rejected": -759.0372924804688,
|
| 560 |
-
"loss": 3.534566116333008,
|
| 561 |
-
"rewards/accuracies": 0.856249988079071,
|
| 562 |
-
"rewards/chosen": 38.79043960571289,
|
| 563 |
-
"rewards/margins": 100.0891342163086,
|
| 564 |
-
"rewards/rejected": -61.29869842529297,
|
| 565 |
-
"step": 370
|
| 566 |
-
},
|
| 567 |
-
{
|
| 568 |
-
"epoch": 1.007976071784646,
|
| 569 |
-
"grad_norm": 71.57064819335938,
|
| 570 |
-
"learning_rate": 6.9e-07,
|
| 571 |
-
"logits/chosen": 5.157181262969971,
|
| 572 |
-
"logits/rejected": 5.5708699226379395,
|
| 573 |
-
"logps/chosen": -288.7073059082031,
|
| 574 |
-
"logps/rejected": -775.53173828125,
|
| 575 |
-
"loss": 2.5931621551513673,
|
| 576 |
-
"rewards/accuracies": 0.8698630332946777,
|
| 577 |
-
"rewards/chosen": 35.64141845703125,
|
| 578 |
-
"rewards/margins": 96.54639434814453,
|
| 579 |
-
"rewards/rejected": -60.904972076416016,
|
| 580 |
-
"step": 380
|
| 581 |
-
},
|
| 582 |
-
{
|
| 583 |
-
"epoch": 1.0345629777334662,
|
| 584 |
-
"grad_norm": 80.57012939453125,
|
| 585 |
-
"learning_rate": 6.788888888888889e-07,
|
| 586 |
-
"logits/chosen": 4.908313274383545,
|
| 587 |
-
"logits/rejected": 5.281552791595459,
|
| 588 |
-
"logps/chosen": -255.53994750976562,
|
| 589 |
-
"logps/rejected": -876.7796630859375,
|
| 590 |
-
"loss": 3.100166130065918,
|
| 591 |
-
"rewards/accuracies": 0.8999999761581421,
|
| 592 |
-
"rewards/chosen": 29.122013092041016,
|
| 593 |
-
"rewards/margins": 121.50981140136719,
|
| 594 |
-
"rewards/rejected": -92.38780212402344,
|
| 595 |
-
"step": 390
|
| 596 |
-
},
|
| 597 |
-
{
|
| 598 |
-
"epoch": 1.0611498836822866,
|
| 599 |
-
"grad_norm": 62.662662506103516,
|
| 600 |
-
"learning_rate": 6.677777777777778e-07,
|
| 601 |
-
"logits/chosen": 5.439974308013916,
|
| 602 |
-
"logits/rejected": 5.852138519287109,
|
| 603 |
-
"logps/chosen": -291.69049072265625,
|
| 604 |
-
"logps/rejected": -910.0514526367188,
|
| 605 |
-
"loss": 3.5051338195800783,
|
| 606 |
-
"rewards/accuracies": 0.887499988079071,
|
| 607 |
-
"rewards/chosen": 25.592533111572266,
|
| 608 |
-
"rewards/margins": 119.5553207397461,
|
| 609 |
-
"rewards/rejected": -93.96280670166016,
|
| 610 |
-
"step": 400
|
| 611 |
-
},
|
| 612 |
-
{
|
| 613 |
-
"epoch": 1.0877367896311068,
|
| 614 |
-
"grad_norm": 178.87042236328125,
|
| 615 |
-
"learning_rate": 6.566666666666666e-07,
|
| 616 |
-
"logits/chosen": 5.411637783050537,
|
| 617 |
-
"logits/rejected": 5.973018646240234,
|
| 618 |
-
"logps/chosen": -308.68487548828125,
|
| 619 |
-
"logps/rejected": -894.93359375,
|
| 620 |
-
"loss": 1.9858436584472656,
|
| 621 |
-
"rewards/accuracies": 0.8687499761581421,
|
| 622 |
-
"rewards/chosen": 25.565839767456055,
|
| 623 |
-
"rewards/margins": 120.69632720947266,
|
| 624 |
-
"rewards/rejected": -95.13047790527344,
|
| 625 |
-
"step": 410
|
| 626 |
-
},
|
| 627 |
-
{
|
| 628 |
-
"epoch": 1.114323695579927,
|
| 629 |
-
"grad_norm": 5.069334747531684e-07,
|
| 630 |
-
"learning_rate": 6.455555555555555e-07,
|
| 631 |
-
"logits/chosen": 5.404343605041504,
|
| 632 |
-
"logits/rejected": 5.771250247955322,
|
| 633 |
-
"logps/chosen": -301.6144104003906,
|
| 634 |
-
"logps/rejected": -933.3482666015625,
|
| 635 |
-
"loss": 2.1804153442382814,
|
| 636 |
-
"rewards/accuracies": 0.893750011920929,
|
| 637 |
-
"rewards/chosen": 25.12398910522461,
|
| 638 |
-
"rewards/margins": 123.10489654541016,
|
| 639 |
-
"rewards/rejected": -97.98091125488281,
|
| 640 |
-
"step": 420
|
| 641 |
-
},
|
| 642 |
-
{
|
| 643 |
-
"epoch": 1.1409106015287471,
|
| 644 |
-
"grad_norm": 42.221588134765625,
|
| 645 |
-
"learning_rate": 6.344444444444444e-07,
|
| 646 |
-
"logits/chosen": 5.612006187438965,
|
| 647 |
-
"logits/rejected": 6.014307022094727,
|
| 648 |
-
"logps/chosen": -324.22314453125,
|
| 649 |
-
"logps/rejected": -896.99658203125,
|
| 650 |
-
"loss": 2.2177616119384767,
|
| 651 |
-
"rewards/accuracies": 0.875,
|
| 652 |
-
"rewards/chosen": 22.134746551513672,
|
| 653 |
-
"rewards/margins": 109.2309341430664,
|
| 654 |
-
"rewards/rejected": -87.09618377685547,
|
| 655 |
-
"step": 430
|
| 656 |
-
},
|
| 657 |
-
{
|
| 658 |
-
"epoch": 1.1674975074775673,
|
| 659 |
-
"grad_norm": 26.945816040039062,
|
| 660 |
-
"learning_rate": 6.233333333333332e-07,
|
| 661 |
-
"logits/chosen": 5.816843509674072,
|
| 662 |
-
"logits/rejected": 6.254372596740723,
|
| 663 |
-
"logps/chosen": -350.730712890625,
|
| 664 |
-
"logps/rejected": -915.1998291015625,
|
| 665 |
-
"loss": 1.6761651992797852,
|
| 666 |
-
"rewards/accuracies": 0.887499988079071,
|
| 667 |
-
"rewards/chosen": 27.9744873046875,
|
| 668 |
-
"rewards/margins": 122.52757263183594,
|
| 669 |
-
"rewards/rejected": -94.55308532714844,
|
| 670 |
-
"step": 440
|
| 671 |
-
},
|
| 672 |
-
{
|
| 673 |
-
"epoch": 1.1940844134263875,
|
| 674 |
-
"grad_norm": 201.8599853515625,
|
| 675 |
-
"learning_rate": 6.122222222222222e-07,
|
| 676 |
-
"logits/chosen": 5.693143844604492,
|
| 677 |
-
"logits/rejected": 6.162411689758301,
|
| 678 |
-
"logps/chosen": -299.4298095703125,
|
| 679 |
-
"logps/rejected": -959.1011962890625,
|
| 680 |
-
"loss": 2.2222429275512696,
|
| 681 |
-
"rewards/accuracies": 0.90625,
|
| 682 |
-
"rewards/chosen": 22.612167358398438,
|
| 683 |
-
"rewards/margins": 126.55992126464844,
|
| 684 |
-
"rewards/rejected": -103.94776916503906,
|
| 685 |
-
"step": 450
|
| 686 |
-
},
|
| 687 |
-
{
|
| 688 |
-
"epoch": 1.2206713193752077,
|
| 689 |
-
"grad_norm": 139.24583435058594,
|
| 690 |
-
"learning_rate": 6.011111111111112e-07,
|
| 691 |
-
"logits/chosen": 5.940896034240723,
|
| 692 |
-
"logits/rejected": 6.417025566101074,
|
| 693 |
-
"logps/chosen": -349.1092834472656,
|
| 694 |
-
"logps/rejected": -947.8054809570312,
|
| 695 |
-
"loss": 1.6525358200073241,
|
| 696 |
-
"rewards/accuracies": 0.9125000238418579,
|
| 697 |
-
"rewards/chosen": 20.343765258789062,
|
| 698 |
-
"rewards/margins": 119.97611999511719,
|
| 699 |
-
"rewards/rejected": -99.63237762451172,
|
| 700 |
-
"step": 460
|
| 701 |
-
},
|
| 702 |
-
{
|
| 703 |
-
"epoch": 1.2472582253240279,
|
| 704 |
-
"grad_norm": 207.5171661376953,
|
| 705 |
-
"learning_rate": 5.9e-07,
|
| 706 |
-
"logits/chosen": 6.329422950744629,
|
| 707 |
-
"logits/rejected": 6.6328911781311035,
|
| 708 |
-
"logps/chosen": -353.35308837890625,
|
| 709 |
-
"logps/rejected": -962.7057495117188,
|
| 710 |
-
"loss": 2.509499740600586,
|
| 711 |
-
"rewards/accuracies": 0.8687499761581421,
|
| 712 |
-
"rewards/chosen": 18.188751220703125,
|
| 713 |
-
"rewards/margins": 119.3648910522461,
|
| 714 |
-
"rewards/rejected": -101.1761474609375,
|
| 715 |
-
"step": 470
|
| 716 |
-
},
|
| 717 |
-
{
|
| 718 |
-
"epoch": 1.273845131272848,
|
| 719 |
-
"grad_norm": 88.39582824707031,
|
| 720 |
-
"learning_rate": 5.788888888888889e-07,
|
| 721 |
-
"logits/chosen": 6.743104457855225,
|
| 722 |
-
"logits/rejected": 7.0454840660095215,
|
| 723 |
-
"logps/chosen": -417.47314453125,
|
| 724 |
-
"logps/rejected": -1049.904052734375,
|
| 725 |
-
"loss": 1.0497099876403808,
|
| 726 |
-
"rewards/accuracies": 0.9375,
|
| 727 |
-
"rewards/chosen": 14.974113464355469,
|
| 728 |
-
"rewards/margins": 132.9074249267578,
|
| 729 |
-
"rewards/rejected": -117.93331146240234,
|
| 730 |
-
"step": 480
|
| 731 |
-
},
|
| 732 |
-
{
|
| 733 |
-
"epoch": 1.3004320372216682,
|
| 734 |
-
"grad_norm": 22.960351943969727,
|
| 735 |
-
"learning_rate": 5.677777777777778e-07,
|
| 736 |
-
"logits/chosen": 6.46866512298584,
|
| 737 |
-
"logits/rejected": 6.957917213439941,
|
| 738 |
-
"logps/chosen": -405.8673400878906,
|
| 739 |
-
"logps/rejected": -1068.978759765625,
|
| 740 |
-
"loss": 1.990153694152832,
|
| 741 |
-
"rewards/accuracies": 0.925000011920929,
|
| 742 |
-
"rewards/chosen": 11.08216381072998,
|
| 743 |
-
"rewards/margins": 135.7135772705078,
|
| 744 |
-
"rewards/rejected": -124.63139343261719,
|
| 745 |
-
"step": 490
|
| 746 |
-
},
|
| 747 |
-
{
|
| 748 |
-
"epoch": 1.3270189431704886,
|
| 749 |
-
"grad_norm": 2.325967418670416e-09,
|
| 750 |
-
"learning_rate": 5.566666666666666e-07,
|
| 751 |
-
"logits/chosen": 7.048731327056885,
|
| 752 |
-
"logits/rejected": 7.304018497467041,
|
| 753 |
-
"logps/chosen": -447.47161865234375,
|
| 754 |
-
"logps/rejected": -1018.6476440429688,
|
| 755 |
-
"loss": 1.1221290588378907,
|
| 756 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 757 |
-
"rewards/chosen": 12.060667037963867,
|
| 758 |
-
"rewards/margins": 117.6533203125,
|
| 759 |
-
"rewards/rejected": -105.5926513671875,
|
| 760 |
-
"step": 500
|
| 761 |
-
},
|
| 762 |
-
{
|
| 763 |
-
"epoch": 1.3536058491193088,
|
| 764 |
-
"grad_norm": 0.007949860766530037,
|
| 765 |
-
"learning_rate": 5.455555555555555e-07,
|
| 766 |
-
"logits/chosen": 6.571198463439941,
|
| 767 |
-
"logits/rejected": 6.9949541091918945,
|
| 768 |
-
"logps/chosen": -425.16229248046875,
|
| 769 |
-
"logps/rejected": -1075.010009765625,
|
| 770 |
-
"loss": 1.1256651878356934,
|
| 771 |
-
"rewards/accuracies": 0.918749988079071,
|
| 772 |
-
"rewards/chosen": 6.658470153808594,
|
| 773 |
-
"rewards/margins": 131.23922729492188,
|
| 774 |
-
"rewards/rejected": -124.58077239990234,
|
| 775 |
-
"step": 510
|
| 776 |
-
},
|
| 777 |
-
{
|
| 778 |
-
"epoch": 1.380192755068129,
|
| 779 |
-
"grad_norm": 162.58592224121094,
|
| 780 |
-
"learning_rate": 5.344444444444445e-07,
|
| 781 |
-
"logits/chosen": 6.9003005027771,
|
| 782 |
-
"logits/rejected": 7.386146545410156,
|
| 783 |
-
"logps/chosen": -386.6710510253906,
|
| 784 |
-
"logps/rejected": -1033.316650390625,
|
| 785 |
-
"loss": 2.134552001953125,
|
| 786 |
-
"rewards/accuracies": 0.925000011920929,
|
| 787 |
-
"rewards/chosen": 8.298912048339844,
|
| 788 |
-
"rewards/margins": 122.3122329711914,
|
| 789 |
-
"rewards/rejected": -114.0133056640625,
|
| 790 |
-
"step": 520
|
| 791 |
-
},
|
| 792 |
-
{
|
| 793 |
-
"epoch": 1.4067796610169492,
|
| 794 |
-
"grad_norm": 2.28546106484373e-08,
|
| 795 |
-
"learning_rate": 5.233333333333333e-07,
|
| 796 |
-
"logits/chosen": 6.5765581130981445,
|
| 797 |
-
"logits/rejected": 7.117588996887207,
|
| 798 |
-
"logps/chosen": -364.48895263671875,
|
| 799 |
-
"logps/rejected": -1099.998291015625,
|
| 800 |
-
"loss": 1.2829697608947754,
|
| 801 |
"rewards/accuracies": 0.956250011920929,
|
| 802 |
-
"rewards/chosen":
|
| 803 |
-
"rewards/margins":
|
| 804 |
-
"rewards/rejected": -
|
| 805 |
-
"step":
|
| 806 |
-
},
|
| 807 |
-
{
|
| 808 |
-
"epoch": 1.4333665669657694,
|
| 809 |
-
"grad_norm": 0.8024188280105591,
|
| 810 |
-
"learning_rate": 5.122222222222222e-07,
|
| 811 |
-
"logits/chosen": 6.9486517906188965,
|
| 812 |
-
"logits/rejected": 7.226126194000244,
|
| 813 |
-
"logps/chosen": -452.52410888671875,
|
| 814 |
-
"logps/rejected": -1101.6812744140625,
|
| 815 |
-
"loss": 0.855518913269043,
|
| 816 |
-
"rewards/accuracies": 0.9624999761581421,
|
| 817 |
-
"rewards/chosen": 3.314232587814331,
|
| 818 |
-
"rewards/margins": 140.82302856445312,
|
| 819 |
-
"rewards/rejected": -137.50875854492188,
|
| 820 |
-
"step": 540
|
| 821 |
-
},
|
| 822 |
-
{
|
| 823 |
-
"epoch": 1.4599534729145895,
|
| 824 |
-
"grad_norm": 283.451416015625,
|
| 825 |
-
"learning_rate": 5.011111111111111e-07,
|
| 826 |
-
"logits/chosen": 6.747658729553223,
|
| 827 |
-
"logits/rejected": 7.270951271057129,
|
| 828 |
-
"logps/chosen": -445.3887634277344,
|
| 829 |
-
"logps/rejected": -1164.9342041015625,
|
| 830 |
-
"loss": 0.7842754364013672,
|
| 831 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 832 |
-
"rewards/chosen": 1.7988097667694092,
|
| 833 |
-
"rewards/margins": 148.59938049316406,
|
| 834 |
-
"rewards/rejected": -146.80056762695312,
|
| 835 |
-
"step": 550
|
| 836 |
-
},
|
| 837 |
-
{
|
| 838 |
-
"epoch": 1.4865403788634097,
|
| 839 |
-
"grad_norm": 6.495264507836457e-20,
|
| 840 |
-
"learning_rate": 4.9e-07,
|
| 841 |
-
"logits/chosen": 7.210297584533691,
|
| 842 |
-
"logits/rejected": 7.638421535491943,
|
| 843 |
-
"logps/chosen": -495.406494140625,
|
| 844 |
-
"logps/rejected": -1289.328125,
|
| 845 |
-
"loss": 1.998776626586914,
|
| 846 |
-
"rewards/accuracies": 0.956250011920929,
|
| 847 |
-
"rewards/chosen": 1.3115170001983643,
|
| 848 |
-
"rewards/margins": 165.7671661376953,
|
| 849 |
-
"rewards/rejected": -164.4556427001953,
|
| 850 |
-
"step": 560
|
| 851 |
-
},
|
| 852 |
-
{
|
| 853 |
-
"epoch": 1.5131272848122301,
|
| 854 |
-
"grad_norm": 162.8050994873047,
|
| 855 |
-
"learning_rate": 4.788888888888889e-07,
|
| 856 |
-
"logits/chosen": 7.229719638824463,
|
| 857 |
-
"logits/rejected": 7.55483341217041,
|
| 858 |
-
"logps/chosen": -467.35791015625,
|
| 859 |
-
"logps/rejected": -1125.8382568359375,
|
| 860 |
-
"loss": 0.433735990524292,
|
| 861 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 862 |
-
"rewards/chosen": 1.6890428066253662,
|
| 863 |
-
"rewards/margins": 135.47544860839844,
|
| 864 |
-
"rewards/rejected": -133.78640747070312,
|
| 865 |
-
"step": 570
|
| 866 |
-
},
|
| 867 |
-
{
|
| 868 |
-
"epoch": 1.53971419076105,
|
| 869 |
-
"grad_norm": 198.3883514404297,
|
| 870 |
-
"learning_rate": 4.677777777777778e-07,
|
| 871 |
-
"logits/chosen": 6.802654266357422,
|
| 872 |
-
"logits/rejected": 7.2844390869140625,
|
| 873 |
-
"logps/chosen": -412.0586853027344,
|
| 874 |
-
"logps/rejected": -1095.768798828125,
|
| 875 |
-
"loss": 0.20694947242736816,
|
| 876 |
-
"rewards/accuracies": 0.949999988079071,
|
| 877 |
-
"rewards/chosen": 3.9473280906677246,
|
| 878 |
-
"rewards/margins": 135.24732971191406,
|
| 879 |
-
"rewards/rejected": -131.3000030517578,
|
| 880 |
-
"step": 580
|
| 881 |
-
},
|
| 882 |
-
{
|
| 883 |
-
"epoch": 1.5663010967098705,
|
| 884 |
-
"grad_norm": 2.5432399297642405e-08,
|
| 885 |
-
"learning_rate": 4.5666666666666665e-07,
|
| 886 |
-
"logits/chosen": 7.081494331359863,
|
| 887 |
-
"logits/rejected": 7.492499351501465,
|
| 888 |
-
"logps/chosen": -423.6611328125,
|
| 889 |
-
"logps/rejected": -1167.4932861328125,
|
| 890 |
-
"loss": 1.276815414428711,
|
| 891 |
-
"rewards/accuracies": 0.949999988079071,
|
| 892 |
-
"rewards/chosen": -0.2825419008731842,
|
| 893 |
-
"rewards/margins": 142.3505096435547,
|
| 894 |
-
"rewards/rejected": -142.63302612304688,
|
| 895 |
-
"step": 590
|
| 896 |
-
},
|
| 897 |
-
{
|
| 898 |
-
"epoch": 1.5928880026586905,
|
| 899 |
-
"grad_norm": 0.6176006197929382,
|
| 900 |
-
"learning_rate": 4.455555555555555e-07,
|
| 901 |
-
"logits/chosen": 7.086031913757324,
|
| 902 |
-
"logits/rejected": 7.627284049987793,
|
| 903 |
-
"logps/chosen": -448.5660705566406,
|
| 904 |
-
"logps/rejected": -1207.191650390625,
|
| 905 |
-
"loss": 0.29495222568511964,
|
| 906 |
-
"rewards/accuracies": 0.9750000238418579,
|
| 907 |
-
"rewards/chosen": 0.9667795300483704,
|
| 908 |
-
"rewards/margins": 149.69699096679688,
|
| 909 |
-
"rewards/rejected": -148.73023986816406,
|
| 910 |
-
"step": 600
|
| 911 |
-
},
|
| 912 |
-
{
|
| 913 |
-
"epoch": 1.6194749086075109,
|
| 914 |
-
"grad_norm": 66.45056915283203,
|
| 915 |
-
"learning_rate": 4.344444444444444e-07,
|
| 916 |
-
"logits/chosen": 7.270118713378906,
|
| 917 |
-
"logits/rejected": 7.5960588455200195,
|
| 918 |
-
"logps/chosen": -503.7193298339844,
|
| 919 |
-
"logps/rejected": -1094.6021728515625,
|
| 920 |
-
"loss": 1.5946972846984864,
|
| 921 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 922 |
-
"rewards/chosen": -3.655120372772217,
|
| 923 |
-
"rewards/margins": 125.68217468261719,
|
| 924 |
-
"rewards/rejected": -129.33731079101562,
|
| 925 |
-
"step": 610
|
| 926 |
-
},
|
| 927 |
-
{
|
| 928 |
-
"epoch": 1.646061814556331,
|
| 929 |
-
"grad_norm": 280.2427978515625,
|
| 930 |
-
"learning_rate": 4.2333333333333334e-07,
|
| 931 |
-
"logits/chosen": 7.251768589019775,
|
| 932 |
-
"logits/rejected": 7.520864009857178,
|
| 933 |
-
"logps/chosen": -517.1514892578125,
|
| 934 |
-
"logps/rejected": -1172.587158203125,
|
| 935 |
-
"loss": 2.4477691650390625,
|
| 936 |
-
"rewards/accuracies": 0.925000011920929,
|
| 937 |
-
"rewards/chosen": 0.9268826246261597,
|
| 938 |
-
"rewards/margins": 142.18368530273438,
|
| 939 |
-
"rewards/rejected": -141.2567901611328,
|
| 940 |
-
"step": 620
|
| 941 |
-
},
|
| 942 |
-
{
|
| 943 |
-
"epoch": 1.6726487205051512,
|
| 944 |
-
"grad_norm": 1.0393255949020386,
|
| 945 |
-
"learning_rate": 4.122222222222222e-07,
|
| 946 |
-
"logits/chosen": 7.011075019836426,
|
| 947 |
-
"logits/rejected": 7.46621561050415,
|
| 948 |
-
"logps/chosen": -447.34124755859375,
|
| 949 |
-
"logps/rejected": -1143.3458251953125,
|
| 950 |
-
"loss": 1.0738434791564941,
|
| 951 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 952 |
-
"rewards/chosen": 1.6229969263076782,
|
| 953 |
-
"rewards/margins": 142.9796600341797,
|
| 954 |
-
"rewards/rejected": -141.35665893554688,
|
| 955 |
-
"step": 630
|
| 956 |
-
},
|
| 957 |
-
{
|
| 958 |
-
"epoch": 1.6992356264539714,
|
| 959 |
-
"grad_norm": 119.75847625732422,
|
| 960 |
-
"learning_rate": 4.0111111111111106e-07,
|
| 961 |
-
"logits/chosen": 6.9999237060546875,
|
| 962 |
-
"logits/rejected": 7.578449249267578,
|
| 963 |
-
"logps/chosen": -469.19012451171875,
|
| 964 |
-
"logps/rejected": -1200.680419921875,
|
| 965 |
-
"loss": 0.9937694549560547,
|
| 966 |
-
"rewards/accuracies": 0.949999988079071,
|
| 967 |
-
"rewards/chosen": 1.2244775295257568,
|
| 968 |
-
"rewards/margins": 147.14993286132812,
|
| 969 |
-
"rewards/rejected": -145.92544555664062,
|
| 970 |
-
"step": 640
|
| 971 |
-
},
|
| 972 |
-
{
|
| 973 |
-
"epoch": 1.7258225324027916,
|
| 974 |
-
"grad_norm": 27.75540542602539,
|
| 975 |
-
"learning_rate": 3.8999999999999997e-07,
|
| 976 |
-
"logits/chosen": 6.937554359436035,
|
| 977 |
-
"logits/rejected": 7.374237060546875,
|
| 978 |
-
"logps/chosen": -463.05438232421875,
|
| 979 |
-
"logps/rejected": -1168.0521240234375,
|
| 980 |
-
"loss": 0.39649856090545654,
|
| 981 |
-
"rewards/accuracies": 0.981249988079071,
|
| 982 |
-
"rewards/chosen": 3.329749345779419,
|
| 983 |
-
"rewards/margins": 145.75735473632812,
|
| 984 |
-
"rewards/rejected": -142.4276123046875,
|
| 985 |
-
"step": 650
|
| 986 |
-
},
|
| 987 |
-
{
|
| 988 |
-
"epoch": 1.7524094383516118,
|
| 989 |
-
"grad_norm": 31.218721389770508,
|
| 990 |
-
"learning_rate": 3.788888888888889e-07,
|
| 991 |
-
"logits/chosen": 7.072316646575928,
|
| 992 |
-
"logits/rejected": 7.550895690917969,
|
| 993 |
-
"logps/chosen": -483.40234375,
|
| 994 |
-
"logps/rejected": -1184.9073486328125,
|
| 995 |
-
"loss": 0.25033409595489503,
|
| 996 |
-
"rewards/accuracies": 0.9624999761581421,
|
| 997 |
-
"rewards/chosen": -2.29609751701355,
|
| 998 |
-
"rewards/margins": 147.64785766601562,
|
| 999 |
-
"rewards/rejected": -149.94395446777344,
|
| 1000 |
-
"step": 660
|
| 1001 |
-
},
|
| 1002 |
-
{
|
| 1003 |
-
"epoch": 1.778996344300432,
|
| 1004 |
-
"grad_norm": 6.414053359549143e-07,
|
| 1005 |
-
"learning_rate": 3.6777777777777774e-07,
|
| 1006 |
-
"logits/chosen": 7.303959846496582,
|
| 1007 |
-
"logits/rejected": 7.623525142669678,
|
| 1008 |
-
"logps/chosen": -508.6453552246094,
|
| 1009 |
-
"logps/rejected": -1223.940673828125,
|
| 1010 |
-
"loss": 0.31205618381500244,
|
| 1011 |
-
"rewards/accuracies": 0.9624999761581421,
|
| 1012 |
-
"rewards/chosen": -2.5321922302246094,
|
| 1013 |
-
"rewards/margins": 150.60842895507812,
|
| 1014 |
-
"rewards/rejected": -153.14060974121094,
|
| 1015 |
-
"step": 670
|
| 1016 |
-
},
|
| 1017 |
-
{
|
| 1018 |
-
"epoch": 1.8055832502492524,
|
| 1019 |
-
"grad_norm": 1.0985974499902462e-12,
|
| 1020 |
-
"learning_rate": 3.5666666666666666e-07,
|
| 1021 |
-
"logits/chosen": 7.344334602355957,
|
| 1022 |
-
"logits/rejected": 7.8254547119140625,
|
| 1023 |
-
"logps/chosen": -532.2833251953125,
|
| 1024 |
-
"logps/rejected": -1228.1844482421875,
|
| 1025 |
-
"loss": 1.0204992294311523,
|
| 1026 |
-
"rewards/accuracies": 0.956250011920929,
|
| 1027 |
-
"rewards/chosen": 3.5795791149139404,
|
| 1028 |
-
"rewards/margins": 159.29647827148438,
|
| 1029 |
-
"rewards/rejected": -155.71688842773438,
|
| 1030 |
-
"step": 680
|
| 1031 |
-
},
|
| 1032 |
-
{
|
| 1033 |
-
"epoch": 1.8321701561980723,
|
| 1034 |
-
"grad_norm": 66.1689453125,
|
| 1035 |
-
"learning_rate": 3.4555555555555557e-07,
|
| 1036 |
-
"logits/chosen": 7.0121636390686035,
|
| 1037 |
-
"logits/rejected": 7.367627143859863,
|
| 1038 |
-
"logps/chosen": -453.792236328125,
|
| 1039 |
-
"logps/rejected": -1141.1865234375,
|
| 1040 |
-
"loss": 0.38547022342681886,
|
| 1041 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 1042 |
-
"rewards/chosen": -0.3728172183036804,
|
| 1043 |
-
"rewards/margins": 139.9238739013672,
|
| 1044 |
-
"rewards/rejected": -140.29669189453125,
|
| 1045 |
-
"step": 690
|
| 1046 |
-
},
|
| 1047 |
-
{
|
| 1048 |
-
"epoch": 1.8587570621468927,
|
| 1049 |
-
"grad_norm": 1.7826409438004044e-20,
|
| 1050 |
-
"learning_rate": 3.3444444444444443e-07,
|
| 1051 |
-
"logits/chosen": 6.914497375488281,
|
| 1052 |
-
"logits/rejected": 7.344313144683838,
|
| 1053 |
-
"logps/chosen": -456.8873596191406,
|
| 1054 |
-
"logps/rejected": -1159.482666015625,
|
| 1055 |
-
"loss": 0.2864746332168579,
|
| 1056 |
-
"rewards/accuracies": 0.9750000238418579,
|
| 1057 |
-
"rewards/chosen": -0.41872739791870117,
|
| 1058 |
-
"rewards/margins": 139.0840606689453,
|
| 1059 |
-
"rewards/rejected": -139.50277709960938,
|
| 1060 |
-
"step": 700
|
| 1061 |
-
},
|
| 1062 |
-
{
|
| 1063 |
-
"epoch": 1.8853439680957127,
|
| 1064 |
-
"grad_norm": 0.6577161550521851,
|
| 1065 |
-
"learning_rate": 3.233333333333333e-07,
|
| 1066 |
-
"logits/chosen": 7.24100399017334,
|
| 1067 |
-
"logits/rejected": 7.729971408843994,
|
| 1068 |
-
"logps/chosen": -453.352783203125,
|
| 1069 |
-
"logps/rejected": -1139.1920166015625,
|
| 1070 |
-
"loss": 0.40453357696533204,
|
| 1071 |
-
"rewards/accuracies": 0.949999988079071,
|
| 1072 |
-
"rewards/chosen": -0.5374351739883423,
|
| 1073 |
-
"rewards/margins": 134.17910766601562,
|
| 1074 |
-
"rewards/rejected": -134.71653747558594,
|
| 1075 |
-
"step": 710
|
| 1076 |
-
},
|
| 1077 |
-
{
|
| 1078 |
-
"epoch": 1.911930874044533,
|
| 1079 |
-
"grad_norm": 58.065155029296875,
|
| 1080 |
-
"learning_rate": 3.122222222222222e-07,
|
| 1081 |
-
"logits/chosen": 7.2779541015625,
|
| 1082 |
-
"logits/rejected": 7.5862250328063965,
|
| 1083 |
-
"logps/chosen": -502.33489990234375,
|
| 1084 |
-
"logps/rejected": -1174.3145751953125,
|
| 1085 |
-
"loss": 0.2982128143310547,
|
| 1086 |
-
"rewards/accuracies": 0.949999988079071,
|
| 1087 |
-
"rewards/chosen": 0.528018593788147,
|
| 1088 |
-
"rewards/margins": 134.43031311035156,
|
| 1089 |
-
"rewards/rejected": -133.9022979736328,
|
| 1090 |
-
"step": 720
|
| 1091 |
-
},
|
| 1092 |
-
{
|
| 1093 |
-
"epoch": 1.9385177799933533,
|
| 1094 |
-
"grad_norm": 3.057793140411377,
|
| 1095 |
-
"learning_rate": 3.011111111111111e-07,
|
| 1096 |
-
"logits/chosen": 7.26782751083374,
|
| 1097 |
-
"logits/rejected": 7.741539001464844,
|
| 1098 |
-
"logps/chosen": -496.8504943847656,
|
| 1099 |
-
"logps/rejected": -1235.9169921875,
|
| 1100 |
-
"loss": 0.8299455642700195,
|
| 1101 |
-
"rewards/accuracies": 0.9750000238418579,
|
| 1102 |
-
"rewards/chosen": -0.2519731819629669,
|
| 1103 |
-
"rewards/margins": 152.95582580566406,
|
| 1104 |
-
"rewards/rejected": -153.20779418945312,
|
| 1105 |
-
"step": 730
|
| 1106 |
-
},
|
| 1107 |
-
{
|
| 1108 |
-
"epoch": 1.9651046859421735,
|
| 1109 |
-
"grad_norm": 185.18174743652344,
|
| 1110 |
-
"learning_rate": 2.9e-07,
|
| 1111 |
-
"logits/chosen": 7.201784610748291,
|
| 1112 |
-
"logits/rejected": 7.585198402404785,
|
| 1113 |
-
"logps/chosen": -474.599853515625,
|
| 1114 |
-
"logps/rejected": -1184.7464599609375,
|
| 1115 |
-
"loss": 1.5328912734985352,
|
| 1116 |
-
"rewards/accuracies": 0.9312499761581421,
|
| 1117 |
-
"rewards/chosen": -3.7899742126464844,
|
| 1118 |
-
"rewards/margins": 142.3949432373047,
|
| 1119 |
-
"rewards/rejected": -146.1849365234375,
|
| 1120 |
-
"step": 740
|
| 1121 |
-
},
|
| 1122 |
-
{
|
| 1123 |
-
"epoch": 1.9916915918909937,
|
| 1124 |
-
"grad_norm": 173.19436645507812,
|
| 1125 |
-
"learning_rate": 2.788888888888889e-07,
|
| 1126 |
-
"logits/chosen": 7.347403526306152,
|
| 1127 |
-
"logits/rejected": 7.8731865882873535,
|
| 1128 |
-
"logps/chosen": -484.5741271972656,
|
| 1129 |
-
"logps/rejected": -1241.9910888671875,
|
| 1130 |
-
"loss": 0.9414227485656739,
|
| 1131 |
-
"rewards/accuracies": 0.9624999761581421,
|
| 1132 |
-
"rewards/chosen": -0.26702070236206055,
|
| 1133 |
-
"rewards/margins": 149.2693328857422,
|
| 1134 |
-
"rewards/rejected": -149.53634643554688,
|
| 1135 |
-
"step": 750
|
| 1136 |
-
},
|
| 1137 |
-
{
|
| 1138 |
-
"epoch": 2.015952143569292,
|
| 1139 |
-
"grad_norm": 277.53521728515625,
|
| 1140 |
-
"learning_rate": 2.6777777777777775e-07,
|
| 1141 |
-
"logits/chosen": 7.591332912445068,
|
| 1142 |
-
"logits/rejected": 7.759430408477783,
|
| 1143 |
-
"logps/chosen": -578.1312866210938,
|
| 1144 |
-
"logps/rejected": -1104.3414306640625,
|
| 1145 |
-
"loss": 0.3479891538619995,
|
| 1146 |
-
"rewards/accuracies": 0.9726027250289917,
|
| 1147 |
-
"rewards/chosen": -3.462564468383789,
|
| 1148 |
-
"rewards/margins": 117.48858642578125,
|
| 1149 |
-
"rewards/rejected": -120.95115661621094,
|
| 1150 |
-
"step": 760
|
| 1151 |
-
},
|
| 1152 |
-
{
|
| 1153 |
-
"epoch": 2.0425390495181124,
|
| 1154 |
-
"grad_norm": 81.4224624633789,
|
| 1155 |
-
"learning_rate": 2.5666666666666666e-07,
|
| 1156 |
-
"logits/chosen": 6.820937156677246,
|
| 1157 |
-
"logits/rejected": 7.438076972961426,
|
| 1158 |
-
"logps/chosen": -445.38592529296875,
|
| 1159 |
-
"logps/rejected": -1255.2547607421875,
|
| 1160 |
-
"loss": 0.7632743835449218,
|
| 1161 |
-
"rewards/accuracies": 0.9624999761581421,
|
| 1162 |
-
"rewards/chosen": -1.3056232929229736,
|
| 1163 |
-
"rewards/margins": 162.7215576171875,
|
| 1164 |
-
"rewards/rejected": -164.02719116210938,
|
| 1165 |
-
"step": 770
|
| 1166 |
-
},
|
| 1167 |
-
{
|
| 1168 |
-
"epoch": 2.0691259554669323,
|
| 1169 |
-
"grad_norm": 80.839111328125,
|
| 1170 |
-
"learning_rate": 2.455555555555555e-07,
|
| 1171 |
-
"logits/chosen": 7.089077949523926,
|
| 1172 |
-
"logits/rejected": 7.562623500823975,
|
| 1173 |
-
"logps/chosen": -479.9771423339844,
|
| 1174 |
-
"logps/rejected": -1212.7470703125,
|
| 1175 |
-
"loss": 0.37755522727966306,
|
| 1176 |
-
"rewards/accuracies": 0.96875,
|
| 1177 |
-
"rewards/chosen": -0.4610620439052582,
|
| 1178 |
-
"rewards/margins": 147.49462890625,
|
| 1179 |
-
"rewards/rejected": -147.9556884765625,
|
| 1180 |
-
"step": 780
|
| 1181 |
-
},
|
| 1182 |
-
{
|
| 1183 |
-
"epoch": 2.0957128614157527,
|
| 1184 |
-
"grad_norm": 102.21258544921875,
|
| 1185 |
-
"learning_rate": 2.3444444444444444e-07,
|
| 1186 |
-
"logits/chosen": 7.348860740661621,
|
| 1187 |
-
"logits/rejected": 7.812272548675537,
|
| 1188 |
-
"logps/chosen": -545.9133911132812,
|
| 1189 |
-
"logps/rejected": -1209.3829345703125,
|
| 1190 |
-
"loss": 0.8432134628295899,
|
| 1191 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 1192 |
-
"rewards/chosen": -4.405800819396973,
|
| 1193 |
-
"rewards/margins": 136.8076629638672,
|
| 1194 |
-
"rewards/rejected": -141.2134552001953,
|
| 1195 |
-
"step": 790
|
| 1196 |
-
},
|
| 1197 |
-
{
|
| 1198 |
-
"epoch": 2.122299767364573,
|
| 1199 |
-
"grad_norm": 7.490438461303711,
|
| 1200 |
-
"learning_rate": 2.2333333333333332e-07,
|
| 1201 |
-
"logits/chosen": 6.928166389465332,
|
| 1202 |
-
"logits/rejected": 7.589695930480957,
|
| 1203 |
-
"logps/chosen": -411.7215881347656,
|
| 1204 |
-
"logps/rejected": -1234.5572509765625,
|
| 1205 |
-
"loss": 0.08373026251792907,
|
| 1206 |
-
"rewards/accuracies": 0.9750000238418579,
|
| 1207 |
-
"rewards/chosen": -1.1743253469467163,
|
| 1208 |
-
"rewards/margins": 159.31773376464844,
|
| 1209 |
-
"rewards/rejected": -160.49208068847656,
|
| 1210 |
-
"step": 800
|
| 1211 |
-
},
|
| 1212 |
-
{
|
| 1213 |
-
"epoch": 2.148886673313393,
|
| 1214 |
-
"grad_norm": 0.0003583618381526321,
|
| 1215 |
-
"learning_rate": 2.122222222222222e-07,
|
| 1216 |
-
"logits/chosen": 7.2483720779418945,
|
| 1217 |
-
"logits/rejected": 7.788289546966553,
|
| 1218 |
-
"logps/chosen": -497.52069091796875,
|
| 1219 |
-
"logps/rejected": -1158.642822265625,
|
| 1220 |
-
"loss": 0.1160581350326538,
|
| 1221 |
-
"rewards/accuracies": 0.96875,
|
| 1222 |
-
"rewards/chosen": 2.365230083465576,
|
| 1223 |
-
"rewards/margins": 146.15255737304688,
|
| 1224 |
-
"rewards/rejected": -143.78732299804688,
|
| 1225 |
-
"step": 810
|
| 1226 |
-
},
|
| 1227 |
-
{
|
| 1228 |
-
"epoch": 2.1754735792622135,
|
| 1229 |
-
"grad_norm": 6.3310980796813965,
|
| 1230 |
-
"learning_rate": 2.011111111111111e-07,
|
| 1231 |
-
"logits/chosen": 6.909984588623047,
|
| 1232 |
-
"logits/rejected": 7.344359397888184,
|
| 1233 |
-
"logps/chosen": -432.39764404296875,
|
| 1234 |
-
"logps/rejected": -1212.4569091796875,
|
| 1235 |
-
"loss": 0.9685474395751953,
|
| 1236 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 1237 |
-
"rewards/chosen": -2.124849319458008,
|
| 1238 |
-
"rewards/margins": 155.05654907226562,
|
| 1239 |
-
"rewards/rejected": -157.18141174316406,
|
| 1240 |
-
"step": 820
|
| 1241 |
-
},
|
| 1242 |
-
{
|
| 1243 |
-
"epoch": 2.2020604852110335,
|
| 1244 |
-
"grad_norm": 5.151050697094206e-09,
|
| 1245 |
-
"learning_rate": 1.8999999999999998e-07,
|
| 1246 |
-
"logits/chosen": 7.006634712219238,
|
| 1247 |
-
"logits/rejected": 7.5766754150390625,
|
| 1248 |
-
"logps/chosen": -431.0802307128906,
|
| 1249 |
-
"logps/rejected": -1220.452392578125,
|
| 1250 |
-
"loss": 1.1500192642211915,
|
| 1251 |
-
"rewards/accuracies": 0.9750000238418579,
|
| 1252 |
-
"rewards/chosen": -2.019794464111328,
|
| 1253 |
-
"rewards/margins": 152.16506958007812,
|
| 1254 |
-
"rewards/rejected": -154.18484497070312,
|
| 1255 |
-
"step": 830
|
| 1256 |
-
},
|
| 1257 |
-
{
|
| 1258 |
-
"epoch": 2.228647391159854,
|
| 1259 |
-
"grad_norm": 1.951496702049138e-18,
|
| 1260 |
-
"learning_rate": 1.7888888888888887e-07,
|
| 1261 |
-
"logits/chosen": 6.816000938415527,
|
| 1262 |
-
"logits/rejected": 7.375506401062012,
|
| 1263 |
-
"logps/chosen": -439.57891845703125,
|
| 1264 |
-
"logps/rejected": -1222.27001953125,
|
| 1265 |
-
"loss": 0.3972776889801025,
|
| 1266 |
-
"rewards/accuracies": 0.987500011920929,
|
| 1267 |
-
"rewards/chosen": 1.9101593494415283,
|
| 1268 |
-
"rewards/margins": 158.9185333251953,
|
| 1269 |
-
"rewards/rejected": -157.0083770751953,
|
| 1270 |
-
"step": 840
|
| 1271 |
-
},
|
| 1272 |
-
{
|
| 1273 |
-
"epoch": 2.255234297108674,
|
| 1274 |
-
"grad_norm": 2.449645117964328e-15,
|
| 1275 |
-
"learning_rate": 1.6777777777777778e-07,
|
| 1276 |
-
"logits/chosen": 7.166296482086182,
|
| 1277 |
-
"logits/rejected": 7.5857744216918945,
|
| 1278 |
-
"logps/chosen": -484.2479553222656,
|
| 1279 |
-
"logps/rejected": -1235.645263671875,
|
| 1280 |
-
"loss": 0.15833470821380616,
|
| 1281 |
-
"rewards/accuracies": 0.9750000238418579,
|
| 1282 |
-
"rewards/chosen": -0.6545869708061218,
|
| 1283 |
-
"rewards/margins": 155.0919952392578,
|
| 1284 |
-
"rewards/rejected": -155.74655151367188,
|
| 1285 |
-
"step": 850
|
| 1286 |
-
},
|
| 1287 |
-
{
|
| 1288 |
-
"epoch": 2.2818212030574943,
|
| 1289 |
-
"grad_norm": 67.49964141845703,
|
| 1290 |
-
"learning_rate": 1.5666666666666667e-07,
|
| 1291 |
-
"logits/chosen": 6.9471001625061035,
|
| 1292 |
-
"logits/rejected": 7.408398628234863,
|
| 1293 |
-
"logps/chosen": -406.9446105957031,
|
| 1294 |
-
"logps/rejected": -1206.536376953125,
|
| 1295 |
-
"loss": 0.3223508358001709,
|
| 1296 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 1297 |
-
"rewards/chosen": 4.314828395843506,
|
| 1298 |
-
"rewards/margins": 160.91775512695312,
|
| 1299 |
-
"rewards/rejected": -156.60293579101562,
|
| 1300 |
-
"step": 860
|
| 1301 |
-
},
|
| 1302 |
-
{
|
| 1303 |
-
"epoch": 2.308408109006314,
|
| 1304 |
-
"grad_norm": 3.4588420021464117e-06,
|
| 1305 |
-
"learning_rate": 1.4555555555555555e-07,
|
| 1306 |
-
"logits/chosen": 6.990222930908203,
|
| 1307 |
-
"logits/rejected": 7.685202598571777,
|
| 1308 |
-
"logps/chosen": -426.66973876953125,
|
| 1309 |
-
"logps/rejected": -1176.889404296875,
|
| 1310 |
-
"loss": 0.8611475944519043,
|
| 1311 |
-
"rewards/accuracies": 0.9624999761581421,
|
| 1312 |
-
"rewards/chosen": 1.3860576152801514,
|
| 1313 |
-
"rewards/margins": 144.15267944335938,
|
| 1314 |
-
"rewards/rejected": -142.76663208007812,
|
| 1315 |
-
"step": 870
|
| 1316 |
-
},
|
| 1317 |
-
{
|
| 1318 |
-
"epoch": 2.3349950149551346,
|
| 1319 |
-
"grad_norm": 4.1328581182331625e-12,
|
| 1320 |
-
"learning_rate": 1.3444444444444444e-07,
|
| 1321 |
-
"logits/chosen": 7.256162166595459,
|
| 1322 |
-
"logits/rejected": 7.685450553894043,
|
| 1323 |
-
"logps/chosen": -462.0904846191406,
|
| 1324 |
-
"logps/rejected": -1166.178466796875,
|
| 1325 |
-
"loss": 0.024902737140655516,
|
| 1326 |
-
"rewards/accuracies": 0.987500011920929,
|
| 1327 |
-
"rewards/chosen": 1.212837815284729,
|
| 1328 |
-
"rewards/margins": 140.08041381835938,
|
| 1329 |
-
"rewards/rejected": -138.8675537109375,
|
| 1330 |
-
"step": 880
|
| 1331 |
-
},
|
| 1332 |
-
{
|
| 1333 |
-
"epoch": 2.361581920903955,
|
| 1334 |
-
"grad_norm": 22.725154876708984,
|
| 1335 |
-
"learning_rate": 1.2333333333333333e-07,
|
| 1336 |
-
"logits/chosen": 7.431256294250488,
|
| 1337 |
-
"logits/rejected": 7.865132808685303,
|
| 1338 |
-
"logps/chosen": -456.8827209472656,
|
| 1339 |
-
"logps/rejected": -1153.871337890625,
|
| 1340 |
-
"loss": 0.13207526206970216,
|
| 1341 |
-
"rewards/accuracies": 0.981249988079071,
|
| 1342 |
-
"rewards/chosen": 1.6622031927108765,
|
| 1343 |
-
"rewards/margins": 136.24082946777344,
|
| 1344 |
-
"rewards/rejected": -134.57862854003906,
|
| 1345 |
-
"step": 890
|
| 1346 |
-
},
|
| 1347 |
-
{
|
| 1348 |
-
"epoch": 2.388168826852775,
|
| 1349 |
-
"grad_norm": 132.83956909179688,
|
| 1350 |
-
"learning_rate": 1.1222222222222221e-07,
|
| 1351 |
-
"logits/chosen": 7.010849952697754,
|
| 1352 |
-
"logits/rejected": 7.441749572753906,
|
| 1353 |
-
"logps/chosen": -502.49371337890625,
|
| 1354 |
-
"logps/rejected": -1215.2733154296875,
|
| 1355 |
-
"loss": 0.5922121524810791,
|
| 1356 |
-
"rewards/accuracies": 0.9750000238418579,
|
| 1357 |
-
"rewards/chosen": -2.6824889183044434,
|
| 1358 |
-
"rewards/margins": 148.62466430664062,
|
| 1359 |
-
"rewards/rejected": -151.30715942382812,
|
| 1360 |
-
"step": 900
|
| 1361 |
-
},
|
| 1362 |
-
{
|
| 1363 |
-
"epoch": 2.4147557328015954,
|
| 1364 |
-
"grad_norm": 0.005245895590633154,
|
| 1365 |
-
"learning_rate": 1.011111111111111e-07,
|
| 1366 |
-
"logits/chosen": 6.980523109436035,
|
| 1367 |
-
"logits/rejected": 7.430232048034668,
|
| 1368 |
-
"logps/chosen": -470.41253662109375,
|
| 1369 |
-
"logps/rejected": -1160.4951171875,
|
| 1370 |
-
"loss": 1.195225143432617,
|
| 1371 |
-
"rewards/accuracies": 0.949999988079071,
|
| 1372 |
-
"rewards/chosen": -3.2118802070617676,
|
| 1373 |
-
"rewards/margins": 144.29278564453125,
|
| 1374 |
-
"rewards/rejected": -147.50466918945312,
|
| 1375 |
-
"step": 910
|
| 1376 |
-
},
|
| 1377 |
-
{
|
| 1378 |
-
"epoch": 2.4413426387504154,
|
| 1379 |
-
"grad_norm": 194.52578735351562,
|
| 1380 |
-
"learning_rate": 9e-08,
|
| 1381 |
-
"logits/chosen": 6.884810447692871,
|
| 1382 |
-
"logits/rejected": 7.503731727600098,
|
| 1383 |
-
"logps/chosen": -440.31976318359375,
|
| 1384 |
-
"logps/rejected": -1206.906494140625,
|
| 1385 |
-
"loss": 0.44526066780090334,
|
| 1386 |
-
"rewards/accuracies": 0.956250011920929,
|
| 1387 |
-
"rewards/chosen": 1.156048059463501,
|
| 1388 |
-
"rewards/margins": 149.27732849121094,
|
| 1389 |
-
"rewards/rejected": -148.1212921142578,
|
| 1390 |
-
"step": 920
|
| 1391 |
-
},
|
| 1392 |
-
{
|
| 1393 |
-
"epoch": 2.4679295446992358,
|
| 1394 |
-
"grad_norm": 1.5737574004387467e-14,
|
| 1395 |
-
"learning_rate": 7.888888888888889e-08,
|
| 1396 |
-
"logits/chosen": 7.322862148284912,
|
| 1397 |
-
"logits/rejected": 7.748003959655762,
|
| 1398 |
-
"logps/chosen": -508.33245849609375,
|
| 1399 |
-
"logps/rejected": -1189.603759765625,
|
| 1400 |
-
"loss": 0.18692436218261718,
|
| 1401 |
-
"rewards/accuracies": 0.9750000238418579,
|
| 1402 |
-
"rewards/chosen": 1.148808479309082,
|
| 1403 |
-
"rewards/margins": 146.20956420898438,
|
| 1404 |
-
"rewards/rejected": -145.06076049804688,
|
| 1405 |
-
"step": 930
|
| 1406 |
-
},
|
| 1407 |
-
{
|
| 1408 |
-
"epoch": 2.4945164506480557,
|
| 1409 |
-
"grad_norm": 0.22959347069263458,
|
| 1410 |
-
"learning_rate": 6.777777777777778e-08,
|
| 1411 |
-
"logits/chosen": 7.375940799713135,
|
| 1412 |
-
"logits/rejected": 7.710402011871338,
|
| 1413 |
-
"logps/chosen": -490.12384033203125,
|
| 1414 |
-
"logps/rejected": -1171.1483154296875,
|
| 1415 |
-
"loss": 0.27915282249450685,
|
| 1416 |
-
"rewards/accuracies": 0.9624999761581421,
|
| 1417 |
-
"rewards/chosen": 1.145845651626587,
|
| 1418 |
-
"rewards/margins": 140.29800415039062,
|
| 1419 |
-
"rewards/rejected": -139.1521453857422,
|
| 1420 |
-
"step": 940
|
| 1421 |
-
},
|
| 1422 |
-
{
|
| 1423 |
-
"epoch": 2.521103356596876,
|
| 1424 |
-
"grad_norm": 22.964818954467773,
|
| 1425 |
-
"learning_rate": 5.666666666666666e-08,
|
| 1426 |
-
"logits/chosen": 7.258917331695557,
|
| 1427 |
-
"logits/rejected": 7.766401767730713,
|
| 1428 |
-
"logps/chosen": -467.205322265625,
|
| 1429 |
-
"logps/rejected": -1157.4315185546875,
|
| 1430 |
-
"loss": 1.6006925582885743,
|
| 1431 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 1432 |
-
"rewards/chosen": 0.41268739104270935,
|
| 1433 |
-
"rewards/margins": 135.27273559570312,
|
| 1434 |
-
"rewards/rejected": -134.86004638671875,
|
| 1435 |
-
"step": 950
|
| 1436 |
-
},
|
| 1437 |
-
{
|
| 1438 |
-
"epoch": 2.547690262545696,
|
| 1439 |
-
"grad_norm": 3.8648969441501535e-11,
|
| 1440 |
-
"learning_rate": 4.555555555555556e-08,
|
| 1441 |
-
"logits/chosen": 7.018073081970215,
|
| 1442 |
-
"logits/rejected": 7.558196067810059,
|
| 1443 |
-
"logps/chosen": -449.532958984375,
|
| 1444 |
-
"logps/rejected": -1138.4356689453125,
|
| 1445 |
-
"loss": 0.28522279262542727,
|
| 1446 |
-
"rewards/accuracies": 0.981249988079071,
|
| 1447 |
-
"rewards/chosen": -0.8609614372253418,
|
| 1448 |
-
"rewards/margins": 139.2249298095703,
|
| 1449 |
-
"rewards/rejected": -140.0858917236328,
|
| 1450 |
-
"step": 960
|
| 1451 |
-
},
|
| 1452 |
-
{
|
| 1453 |
-
"epoch": 2.5742771684945165,
|
| 1454 |
-
"grad_norm": 84.71375274658203,
|
| 1455 |
-
"learning_rate": 3.4444444444444444e-08,
|
| 1456 |
-
"logits/chosen": 7.130776405334473,
|
| 1457 |
-
"logits/rejected": 7.609295845031738,
|
| 1458 |
-
"logps/chosen": -438.42694091796875,
|
| 1459 |
-
"logps/rejected": -1249.6336669921875,
|
| 1460 |
-
"loss": 0.4750792980194092,
|
| 1461 |
-
"rewards/accuracies": 0.96875,
|
| 1462 |
-
"rewards/chosen": -1.273829460144043,
|
| 1463 |
-
"rewards/margins": 160.90731811523438,
|
| 1464 |
-
"rewards/rejected": -162.18113708496094,
|
| 1465 |
-
"step": 970
|
| 1466 |
-
},
|
| 1467 |
-
{
|
| 1468 |
-
"epoch": 2.6008640744433364,
|
| 1469 |
-
"grad_norm": 85.9113540649414,
|
| 1470 |
-
"learning_rate": 2.3333333333333334e-08,
|
| 1471 |
-
"logits/chosen": 7.113263130187988,
|
| 1472 |
-
"logits/rejected": 7.686596870422363,
|
| 1473 |
-
"logps/chosen": -434.325439453125,
|
| 1474 |
-
"logps/rejected": -1194.6849365234375,
|
| 1475 |
-
"loss": 0.33106160163879395,
|
| 1476 |
-
"rewards/accuracies": 0.9437500238418579,
|
| 1477 |
-
"rewards/chosen": -1.2038366794586182,
|
| 1478 |
-
"rewards/margins": 146.78440856933594,
|
| 1479 |
-
"rewards/rejected": -147.98825073242188,
|
| 1480 |
-
"step": 980
|
| 1481 |
-
},
|
| 1482 |
-
{
|
| 1483 |
-
"epoch": 2.627450980392157,
|
| 1484 |
-
"grad_norm": 2.4605165866986043e-20,
|
| 1485 |
-
"learning_rate": 1.2222222222222222e-08,
|
| 1486 |
-
"logits/chosen": 7.00030517578125,
|
| 1487 |
-
"logits/rejected": 7.477368354797363,
|
| 1488 |
-
"logps/chosen": -450.455078125,
|
| 1489 |
-
"logps/rejected": -1269.2520751953125,
|
| 1490 |
-
"loss": 0.2776132583618164,
|
| 1491 |
-
"rewards/accuracies": 0.981249988079071,
|
| 1492 |
-
"rewards/chosen": -3.280397891998291,
|
| 1493 |
-
"rewards/margins": 163.57626342773438,
|
| 1494 |
-
"rewards/rejected": -166.85665893554688,
|
| 1495 |
-
"step": 990
|
| 1496 |
-
},
|
| 1497 |
-
{
|
| 1498 |
-
"epoch": 2.6540378863409773,
|
| 1499 |
-
"grad_norm": 80.78559112548828,
|
| 1500 |
-
"learning_rate": 1.111111111111111e-09,
|
| 1501 |
-
"logits/chosen": 7.087013244628906,
|
| 1502 |
-
"logits/rejected": 7.507058620452881,
|
| 1503 |
-
"logps/chosen": -490.28857421875,
|
| 1504 |
-
"logps/rejected": -1223.248046875,
|
| 1505 |
-
"loss": 0.2815593719482422,
|
| 1506 |
-
"rewards/accuracies": 0.9750000238418579,
|
| 1507 |
-
"rewards/chosen": -2.167093276977539,
|
| 1508 |
-
"rewards/margins": 151.61813354492188,
|
| 1509 |
-
"rewards/rejected": -153.78524780273438,
|
| 1510 |
-
"step": 1000
|
| 1511 |
}
|
| 1512 |
],
|
| 1513 |
"logging_steps": 10,
|
| 1514 |
-
"max_steps":
|
| 1515 |
"num_input_tokens_seen": 0,
|
| 1516 |
-
"num_train_epochs":
|
| 1517 |
"save_steps": 200,
|
| 1518 |
"stateful_callbacks": {
|
| 1519 |
"TrainerControl": {
|
|
@@ -1522,7 +322,7 @@
|
|
| 1522 |
"should_evaluate": false,
|
| 1523 |
"should_log": false,
|
| 1524 |
"should_save": true,
|
| 1525 |
-
"should_training_stop":
|
| 1526 |
},
|
| 1527 |
"attributes": {}
|
| 1528 |
}
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.5317381189764041,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
"epoch": 0.026586905948820207,
|
| 14 |
+
"grad_norm": 229.74172973632812,
|
| 15 |
+
"learning_rate": 1.8000000000000001e-06,
|
| 16 |
+
"logits/chosen": 1.892960548400879,
|
| 17 |
+
"logits/rejected": 2.2739109992980957,
|
| 18 |
+
"logps/chosen": -180.7786102294922,
|
| 19 |
+
"logps/rejected": -296.7843017578125,
|
| 20 |
+
"loss": 41.754217529296874,
|
| 21 |
"rewards/accuracies": 0.643750011920929,
|
| 22 |
+
"rewards/chosen": 127.22708892822266,
|
| 23 |
+
"rewards/margins": 51.65830612182617,
|
| 24 |
+
"rewards/rejected": 75.56879425048828,
|
| 25 |
"step": 10
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"epoch": 0.053173811897640415,
|
| 29 |
+
"grad_norm": 127.54531860351562,
|
| 30 |
+
"learning_rate": 3.8000000000000005e-06,
|
| 31 |
+
"logits/chosen": 2.360779285430908,
|
| 32 |
+
"logits/rejected": 2.5129952430725098,
|
| 33 |
+
"logps/chosen": -198.55577087402344,
|
| 34 |
+
"logps/rejected": -311.8116149902344,
|
| 35 |
+
"loss": 34.334066772460936,
|
| 36 |
+
"rewards/accuracies": 0.637499988079071,
|
| 37 |
+
"rewards/chosen": 133.30413818359375,
|
| 38 |
+
"rewards/margins": 69.63650512695312,
|
| 39 |
+
"rewards/rejected": 63.667640686035156,
|
| 40 |
"step": 20
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"epoch": 0.07976071784646062,
|
| 44 |
+
"grad_norm": 195.1651153564453,
|
| 45 |
+
"learning_rate": 5.8e-06,
|
| 46 |
+
"logits/chosen": 2.7846579551696777,
|
| 47 |
+
"logits/rejected": 2.912212610244751,
|
| 48 |
+
"logps/chosen": -201.8841094970703,
|
| 49 |
+
"logps/rejected": -343.28619384765625,
|
| 50 |
+
"loss": 31.000405883789064,
|
| 51 |
+
"rewards/accuracies": 0.706250011920929,
|
| 52 |
+
"rewards/chosen": 149.70481872558594,
|
| 53 |
+
"rewards/margins": 90.86196899414062,
|
| 54 |
+
"rewards/rejected": 58.842857360839844,
|
| 55 |
"step": 30
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"epoch": 0.10634762379528083,
|
| 59 |
+
"grad_norm": 217.8875274658203,
|
| 60 |
+
"learning_rate": 7.800000000000002e-06,
|
| 61 |
+
"logits/chosen": 2.687530040740967,
|
| 62 |
+
"logits/rejected": 2.89595365524292,
|
| 63 |
+
"logps/chosen": -180.60574340820312,
|
| 64 |
+
"logps/rejected": -395.2937927246094,
|
| 65 |
+
"loss": 33.74924926757812,
|
| 66 |
+
"rewards/accuracies": 0.675000011920929,
|
| 67 |
+
"rewards/chosen": 124.906494140625,
|
| 68 |
+
"rewards/margins": 96.38619995117188,
|
| 69 |
+
"rewards/rejected": 28.52029037475586,
|
| 70 |
"step": 40
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"epoch": 0.13293452974410103,
|
| 74 |
+
"grad_norm": 123.57787322998047,
|
| 75 |
+
"learning_rate": 9.800000000000001e-06,
|
| 76 |
+
"logits/chosen": 3.6891541481018066,
|
| 77 |
+
"logits/rejected": 3.7287964820861816,
|
| 78 |
+
"logps/chosen": -233.40304565429688,
|
| 79 |
+
"logps/rejected": -501.949951171875,
|
| 80 |
+
"loss": 33.165060424804686,
|
| 81 |
+
"rewards/accuracies": 0.706250011920929,
|
| 82 |
+
"rewards/chosen": 132.43130493164062,
|
| 83 |
+
"rewards/margins": 153.64065551757812,
|
| 84 |
+
"rewards/rejected": -21.209354400634766,
|
| 85 |
"step": 50
|
| 86 |
},
|
| 87 |
{
|
| 88 |
"epoch": 0.15952143569292124,
|
| 89 |
+
"grad_norm": 240.26422119140625,
|
| 90 |
+
"learning_rate": 9.742857142857143e-06,
|
| 91 |
+
"logits/chosen": 4.325669288635254,
|
| 92 |
+
"logits/rejected": 4.669508934020996,
|
| 93 |
+
"logps/chosen": -227.3274688720703,
|
| 94 |
+
"logps/rejected": -761.2488403320312,
|
| 95 |
+
"loss": 9.919349670410156,
|
| 96 |
+
"rewards/accuracies": 0.8374999761581421,
|
| 97 |
+
"rewards/chosen": 78.26126861572266,
|
| 98 |
+
"rewards/margins": 259.2169494628906,
|
| 99 |
+
"rewards/rejected": -180.95565795898438,
|
| 100 |
"step": 60
|
| 101 |
},
|
| 102 |
{
|
| 103 |
"epoch": 0.18610834164174145,
|
| 104 |
+
"grad_norm": 18.67397117614746,
|
| 105 |
+
"learning_rate": 9.457142857142858e-06,
|
| 106 |
+
"logits/chosen": 6.029001235961914,
|
| 107 |
+
"logits/rejected": 6.398881912231445,
|
| 108 |
+
"logps/chosen": -361.8104248046875,
|
| 109 |
+
"logps/rejected": -936.2852783203125,
|
| 110 |
+
"loss": 8.505984497070312,
|
| 111 |
+
"rewards/accuracies": 0.8999999761581421,
|
| 112 |
+
"rewards/chosen": 62.65727615356445,
|
| 113 |
+
"rewards/margins": 295.85296630859375,
|
| 114 |
+
"rewards/rejected": -233.1957550048828,
|
| 115 |
"step": 70
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"epoch": 0.21269524759056166,
|
| 119 |
+
"grad_norm": 424.631591796875,
|
| 120 |
+
"learning_rate": 9.171428571428572e-06,
|
| 121 |
+
"logits/chosen": 7.3346147537231445,
|
| 122 |
+
"logits/rejected": 7.6371870040893555,
|
| 123 |
+
"logps/chosen": -487.5819396972656,
|
| 124 |
+
"logps/rejected": -1231.750244140625,
|
| 125 |
+
"loss": 5.0393016815185545,
|
| 126 |
+
"rewards/accuracies": 0.925000011920929,
|
| 127 |
+
"rewards/chosen": 3.2465362548828125,
|
| 128 |
+
"rewards/margins": 386.61541748046875,
|
| 129 |
+
"rewards/rejected": -383.368896484375,
|
| 130 |
"step": 80
|
| 131 |
},
|
| 132 |
{
|
| 133 |
"epoch": 0.23928215353938184,
|
| 134 |
+
"grad_norm": 58.2872200012207,
|
| 135 |
+
"learning_rate": 8.885714285714286e-06,
|
| 136 |
+
"logits/chosen": 7.502760410308838,
|
| 137 |
+
"logits/rejected": 7.7781524658203125,
|
| 138 |
+
"logps/chosen": -515.661376953125,
|
| 139 |
+
"logps/rejected": -1255.3602294921875,
|
| 140 |
+
"loss": 2.7182350158691406,
|
| 141 |
+
"rewards/accuracies": 0.956250011920929,
|
| 142 |
+
"rewards/chosen": -3.7650718688964844,
|
| 143 |
+
"rewards/margins": 391.1650390625,
|
| 144 |
+
"rewards/rejected": -394.9300842285156,
|
| 145 |
"step": 90
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"epoch": 0.26586905948820205,
|
| 149 |
+
"grad_norm": 0.0009401601273566484,
|
| 150 |
+
"learning_rate": 8.6e-06,
|
| 151 |
+
"logits/chosen": 6.9696502685546875,
|
| 152 |
+
"logits/rejected": 7.490464687347412,
|
| 153 |
+
"logps/chosen": -407.62591552734375,
|
| 154 |
+
"logps/rejected": -1201.7757568359375,
|
| 155 |
+
"loss": 1.852958869934082,
|
| 156 |
+
"rewards/accuracies": 0.9437500238418579,
|
| 157 |
+
"rewards/chosen": 16.64017105102539,
|
| 158 |
+
"rewards/margins": 388.97650146484375,
|
| 159 |
+
"rewards/rejected": -372.3362731933594,
|
| 160 |
"step": 100
|
| 161 |
},
|
| 162 |
{
|
| 163 |
"epoch": 0.2924559654370223,
|
| 164 |
+
"grad_norm": 776.8017578125,
|
| 165 |
+
"learning_rate": 8.314285714285715e-06,
|
| 166 |
+
"logits/chosen": 7.605508327484131,
|
| 167 |
+
"logits/rejected": 8.056883811950684,
|
| 168 |
+
"logps/chosen": -500.75714111328125,
|
| 169 |
+
"logps/rejected": -1358.50732421875,
|
| 170 |
+
"loss": 0.4602807998657227,
|
| 171 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 172 |
+
"rewards/chosen": -11.148886680603027,
|
| 173 |
+
"rewards/margins": 444.7156677246094,
|
| 174 |
+
"rewards/rejected": -455.8646545410156,
|
| 175 |
"step": 110
|
| 176 |
},
|
| 177 |
{
|
| 178 |
"epoch": 0.3190428713858425,
|
| 179 |
+
"grad_norm": 9.229455307652179e-14,
|
| 180 |
+
"learning_rate": 8.02857142857143e-06,
|
| 181 |
+
"logits/chosen": 7.707437992095947,
|
| 182 |
+
"logits/rejected": 8.208626747131348,
|
| 183 |
+
"logps/chosen": -493.72503662109375,
|
| 184 |
+
"logps/rejected": -1418.4669189453125,
|
| 185 |
+
"loss": 0.4660654544830322,
|
| 186 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 187 |
+
"rewards/chosen": -27.65629005432129,
|
| 188 |
+
"rewards/margins": 457.8779296875,
|
| 189 |
+
"rewards/rejected": -485.53424072265625,
|
| 190 |
"step": 120
|
| 191 |
},
|
| 192 |
{
|
| 193 |
"epoch": 0.34562977733466266,
|
| 194 |
+
"grad_norm": 3.0277444440507395e-10,
|
| 195 |
+
"learning_rate": 7.742857142857144e-06,
|
| 196 |
+
"logits/chosen": 7.820192813873291,
|
| 197 |
+
"logits/rejected": 8.277512550354004,
|
| 198 |
+
"logps/chosen": -490.735595703125,
|
| 199 |
+
"logps/rejected": -1337.3505859375,
|
| 200 |
+
"loss": 1.2780420303344726,
|
| 201 |
+
"rewards/accuracies": 0.981249988079071,
|
| 202 |
+
"rewards/chosen": 0.13531294465065002,
|
| 203 |
+
"rewards/margins": 430.638671875,
|
| 204 |
+
"rewards/rejected": -430.50335693359375,
|
| 205 |
"step": 130
|
| 206 |
},
|
| 207 |
{
|
| 208 |
"epoch": 0.3722166832834829,
|
| 209 |
+
"grad_norm": 1.2154150397236663e-07,
|
| 210 |
+
"learning_rate": 7.457142857142857e-06,
|
| 211 |
+
"logits/chosen": 7.428654670715332,
|
| 212 |
+
"logits/rejected": 7.884097099304199,
|
| 213 |
+
"logps/chosen": -490.9019470214844,
|
| 214 |
+
"logps/rejected": -1213.6722412109375,
|
| 215 |
+
"loss": 0.03439792990684509,
|
| 216 |
+
"rewards/accuracies": 0.9937499761581421,
|
| 217 |
+
"rewards/chosen": 3.2693276405334473,
|
| 218 |
+
"rewards/margins": 378.11016845703125,
|
| 219 |
+
"rewards/rejected": -374.8408203125,
|
| 220 |
"step": 140
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 0.3988035892323031,
|
| 224 |
+
"grad_norm": 0.0008078943355940282,
|
| 225 |
+
"learning_rate": 7.1714285714285725e-06,
|
| 226 |
+
"logits/chosen": 7.338967800140381,
|
| 227 |
+
"logits/rejected": 7.722776889801025,
|
| 228 |
+
"logps/chosen": -444.8753356933594,
|
| 229 |
+
"logps/rejected": -1266.625244140625,
|
| 230 |
+
"loss": 1.42781343460083,
|
| 231 |
+
"rewards/accuracies": 0.96875,
|
| 232 |
+
"rewards/chosen": -2.331421375274658,
|
| 233 |
+
"rewards/margins": 393.41400146484375,
|
| 234 |
+
"rewards/rejected": -395.74542236328125,
|
| 235 |
"step": 150
|
| 236 |
},
|
| 237 |
{
|
| 238 |
"epoch": 0.4253904951811233,
|
| 239 |
+
"grad_norm": 0.0,
|
| 240 |
+
"learning_rate": 6.885714285714287e-06,
|
| 241 |
+
"logits/chosen": 7.96518087387085,
|
| 242 |
+
"logits/rejected": 8.39413833618164,
|
| 243 |
+
"logps/chosen": -586.011962890625,
|
| 244 |
+
"logps/rejected": -1365.374267578125,
|
| 245 |
+
"loss": 1.9711128234863282,
|
| 246 |
+
"rewards/accuracies": 0.96875,
|
| 247 |
+
"rewards/chosen": -28.12943458557129,
|
| 248 |
+
"rewards/margins": 417.09210205078125,
|
| 249 |
+
"rewards/rejected": -445.22149658203125,
|
| 250 |
"step": 160
|
| 251 |
},
|
| 252 |
{
|
| 253 |
"epoch": 0.4519774011299435,
|
| 254 |
+
"grad_norm": 5.4764127260797935e-12,
|
| 255 |
+
"learning_rate": 6.600000000000001e-06,
|
| 256 |
+
"logits/chosen": 7.3422675132751465,
|
| 257 |
+
"logits/rejected": 7.857165336608887,
|
| 258 |
+
"logps/chosen": -465.5393981933594,
|
| 259 |
+
"logps/rejected": -1300.1103515625,
|
| 260 |
+
"loss": 0.004332171380519867,
|
| 261 |
+
"rewards/accuracies": 0.9937499761581421,
|
| 262 |
+
"rewards/chosen": 1.256854772567749,
|
| 263 |
+
"rewards/margins": 424.2041931152344,
|
| 264 |
+
"rewards/rejected": -422.9473571777344,
|
| 265 |
"step": 170
|
| 266 |
},
|
| 267 |
{
|
| 268 |
"epoch": 0.4785643070787637,
|
| 269 |
+
"grad_norm": 422.58892822265625,
|
| 270 |
+
"learning_rate": 6.314285714285715e-06,
|
| 271 |
+
"logits/chosen": 7.199074745178223,
|
| 272 |
+
"logits/rejected": 7.617570400238037,
|
| 273 |
+
"logps/chosen": -446.2982482910156,
|
| 274 |
+
"logps/rejected": -1317.12890625,
|
| 275 |
+
"loss": 0.9593421936035156,
|
| 276 |
+
"rewards/accuracies": 0.9750000238418579,
|
| 277 |
+
"rewards/chosen": 2.7993197441101074,
|
| 278 |
+
"rewards/margins": 444.40753173828125,
|
| 279 |
+
"rewards/rejected": -441.60821533203125,
|
| 280 |
"step": 180
|
| 281 |
},
|
| 282 |
{
|
| 283 |
"epoch": 0.5051512130275839,
|
| 284 |
+
"grad_norm": 0.0,
|
| 285 |
+
"learning_rate": 6.028571428571429e-06,
|
| 286 |
+
"logits/chosen": 7.070580959320068,
|
| 287 |
+
"logits/rejected": 7.494720458984375,
|
| 288 |
+
"logps/chosen": -430.97760009765625,
|
| 289 |
+
"logps/rejected": -1291.35205078125,
|
| 290 |
+
"loss": 0.09595458507537842,
|
| 291 |
+
"rewards/accuracies": 0.9937499761581421,
|
| 292 |
+
"rewards/chosen": 7.344033241271973,
|
| 293 |
+
"rewards/margins": 433.2731018066406,
|
| 294 |
+
"rewards/rejected": -425.9291076660156,
|
| 295 |
"step": 190
|
| 296 |
},
|
| 297 |
{
|
| 298 |
"epoch": 0.5317381189764041,
|
| 299 |
+
"grad_norm": 1.3654603958129883,
|
| 300 |
+
"learning_rate": 5.742857142857143e-06,
|
| 301 |
+
"logits/chosen": 7.252472877502441,
|
| 302 |
+
"logits/rejected": 7.820960998535156,
|
| 303 |
+
"logps/chosen": -435.6075134277344,
|
| 304 |
+
"logps/rejected": -1333.8228759765625,
|
| 305 |
+
"loss": 0.7474074840545655,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
"rewards/accuracies": 0.956250011920929,
|
| 307 |
+
"rewards/chosen": 1.129046082496643,
|
| 308 |
+
"rewards/margins": 456.1576232910156,
|
| 309 |
+
"rewards/rejected": -455.028564453125,
|
| 310 |
+
"step": 200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
}
|
| 312 |
],
|
| 313 |
"logging_steps": 10,
|
| 314 |
+
"max_steps": 400,
|
| 315 |
"num_input_tokens_seen": 0,
|
| 316 |
+
"num_train_epochs": 2,
|
| 317 |
"save_steps": 200,
|
| 318 |
"stateful_callbacks": {
|
| 319 |
"TrainerControl": {
|
|
|
|
| 322 |
"should_evaluate": false,
|
| 323 |
"should_log": false,
|
| 324 |
"should_save": true,
|
| 325 |
+
"should_training_stop": false
|
| 326 |
},
|
| 327 |
"attributes": {}
|
| 328 |
}
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6289
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa5979d784b3be5f03398730b0db9a0aaad24ae1fdea10accf8ecc4f7c831b44
|
| 3 |
size 6289
|