irishprancer commited on
Commit
7c19f8e
·
verified ·
1 Parent(s): 34e9d78

Training in progress, step 3600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d85d797097afce863d924a902fd673ba283872940210e3c933a64dde7a4be42
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a7db4ae93951b0eb394bb0a363f73cd5df34f9278223503ea607797313cdef9
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32f59ddc5d1d02f125383197f9e87f354af36a0ab9f9c34a9101568ff6028781
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:550ddc0253077b9ade8068188ab7383f87735a416196347e817b58cdd6eecfa7
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8298cd2038dcf4bc8c0ba6dfa0d230a23246f758d069f7c77f9c04f77b6d8f8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ae35034e3077f87418b20f4a24e69590c4f56a313fa0284d685c7f3a1b03d8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c1ec9215b291496d6c7959ac8b8fcef926d7c6ecd1d84b37dbf2c985bef91a3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a0ba1807c4ff64f4d8fc6d84a7a517689523073c7ea31a60948b80a14d9e61
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.717534065246582,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 150.0,
5
  "eval_steps": 150,
6
- "global_step": 3450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3302,6 +3302,151 @@
3302
  "EMA_steps_per_second": 25.002,
3303
  "epoch": 150.0,
3304
  "step": 3450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3305
  }
3306
  ],
3307
  "logging_steps": 10,
@@ -3321,7 +3466,7 @@
3321
  "attributes": {}
3322
  }
3323
  },
3324
- "total_flos": 8.89909420608553e+16,
3325
  "train_batch_size": 4,
3326
  "trial_name": null,
3327
  "trial_params": null
 
1
  {
2
  "best_metric": 0.717534065246582,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 156.52173913043478,
5
  "eval_steps": 150,
6
+ "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3302
  "EMA_steps_per_second": 25.002,
3303
  "epoch": 150.0,
3304
  "step": 3450
3305
+ },
3306
+ {
3307
+ "epoch": 150.43478260869566,
3308
+ "grad_norm": 1.7541120052337646,
3309
+ "learning_rate": 2.4672082280509036e-07,
3310
+ "loss": 0.214,
3311
+ "step": 3460
3312
+ },
3313
+ {
3314
+ "epoch": 150.8695652173913,
3315
+ "grad_norm": 2.0008656978607178,
3316
+ "learning_rate": 4.934416456101807e-07,
3317
+ "loss": 0.2627,
3318
+ "step": 3470
3319
+ },
3320
+ {
3321
+ "epoch": 151.30434782608697,
3322
+ "grad_norm": 1.6539170742034912,
3323
+ "learning_rate": 7.40162468415271e-07,
3324
+ "loss": 0.2,
3325
+ "step": 3480
3326
+ },
3327
+ {
3328
+ "epoch": 151.7391304347826,
3329
+ "grad_norm": 2.369926691055298,
3330
+ "learning_rate": 9.868832912203614e-07,
3331
+ "loss": 0.2478,
3332
+ "step": 3490
3333
+ },
3334
+ {
3335
+ "epoch": 152.17391304347825,
3336
+ "grad_norm": 2.07112979888916,
3337
+ "learning_rate": 1.2336041140254517e-06,
3338
+ "loss": 0.2427,
3339
+ "step": 3500
3340
+ },
3341
+ {
3342
+ "epoch": 152.6086956521739,
3343
+ "grad_norm": 1.6030749082565308,
3344
+ "learning_rate": 1.480324936830542e-06,
3345
+ "loss": 0.2402,
3346
+ "step": 3510
3347
+ },
3348
+ {
3349
+ "epoch": 153.04347826086956,
3350
+ "grad_norm": 1.5949645042419434,
3351
+ "learning_rate": 1.7270457596356322e-06,
3352
+ "loss": 0.2072,
3353
+ "step": 3520
3354
+ },
3355
+ {
3356
+ "epoch": 153.47826086956522,
3357
+ "grad_norm": 2.338641881942749,
3358
+ "learning_rate": 1.973766582440723e-06,
3359
+ "loss": 0.2506,
3360
+ "step": 3530
3361
+ },
3362
+ {
3363
+ "epoch": 153.91304347826087,
3364
+ "grad_norm": 2.719093084335327,
3365
+ "learning_rate": 2.220487405245813e-06,
3366
+ "loss": 0.2321,
3367
+ "step": 3540
3368
+ },
3369
+ {
3370
+ "epoch": 154.34782608695653,
3371
+ "grad_norm": 2.292358636856079,
3372
+ "learning_rate": 2.4672082280509034e-06,
3373
+ "loss": 0.2404,
3374
+ "step": 3550
3375
+ },
3376
+ {
3377
+ "epoch": 154.7826086956522,
3378
+ "grad_norm": 2.0019381046295166,
3379
+ "learning_rate": 2.4672079835702752e-06,
3380
+ "loss": 0.2343,
3381
+ "step": 3560
3382
+ },
3383
+ {
3384
+ "epoch": 155.2173913043478,
3385
+ "grad_norm": 1.6779125928878784,
3386
+ "learning_rate": 2.4672072501284865e-06,
3387
+ "loss": 0.1963,
3388
+ "step": 3570
3389
+ },
3390
+ {
3391
+ "epoch": 155.65217391304347,
3392
+ "grad_norm": 2.0632243156433105,
3393
+ "learning_rate": 2.467206027725829e-06,
3394
+ "loss": 0.267,
3395
+ "step": 3580
3396
+ },
3397
+ {
3398
+ "epoch": 156.08695652173913,
3399
+ "grad_norm": 1.6089539527893066,
3400
+ "learning_rate": 2.467204316362787e-06,
3401
+ "loss": 0.2034,
3402
+ "step": 3590
3403
+ },
3404
+ {
3405
+ "epoch": 156.52173913043478,
3406
+ "grad_norm": 2.475633382797241,
3407
+ "learning_rate": 2.4672021160400387e-06,
3408
+ "loss": 0.2685,
3409
+ "step": 3600
3410
+ },
3411
+ {
3412
+ "epoch": 156.52173913043478,
3413
+ "eval_loss": 0.9592596292495728,
3414
+ "eval_runtime": 0.4813,
3415
+ "eval_samples_per_second": 20.778,
3416
+ "eval_steps_per_second": 20.778,
3417
+ "step": 3600
3418
+ },
3419
+ {
3420
+ "Start_State_loss": 0.7309322357177734,
3421
+ "Start_State_runtime": 0.4223,
3422
+ "Start_State_samples_per_second": 23.679,
3423
+ "Start_State_steps_per_second": 23.679,
3424
+ "epoch": 156.52173913043478,
3425
+ "step": 3600
3426
+ },
3427
+ {
3428
+ "Raw_Model_loss": 0.9592596292495728,
3429
+ "Raw_Model_runtime": 0.3944,
3430
+ "Raw_Model_samples_per_second": 25.356,
3431
+ "Raw_Model_steps_per_second": 25.356,
3432
+ "epoch": 156.52173913043478,
3433
+ "step": 3600
3434
+ },
3435
+ {
3436
+ "SWA_loss": 0.8119293451309204,
3437
+ "SWA_runtime": 0.3904,
3438
+ "SWA_samples_per_second": 25.615,
3439
+ "SWA_steps_per_second": 25.615,
3440
+ "epoch": 156.52173913043478,
3441
+ "step": 3600
3442
+ },
3443
+ {
3444
+ "EMA_loss": 0.7311049103736877,
3445
+ "EMA_runtime": 0.4017,
3446
+ "EMA_samples_per_second": 24.896,
3447
+ "EMA_steps_per_second": 24.896,
3448
+ "epoch": 156.52173913043478,
3449
+ "step": 3600
3450
  }
3451
  ],
3452
  "logging_steps": 10,
 
3466
  "attributes": {}
3467
  }
3468
  },
3469
+ "total_flos": 9.28760054861906e+16,
3470
  "train_batch_size": 4,
3471
  "trial_name": null,
3472
  "trial_params": null