bitsoko commited on
Commit
6552d7a
·
verified ·
1 Parent(s): 6bb229f

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43ae9755626e2950d8c06fdf1d60414796248dfc072fc65db072fe182e1cf597
3
  size 1912664024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b35aacbfdc92cae95f4722e8ccfc67a4f68647ffa65ea431590ce58096dec808
3
  size 1912664024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f97b6951acc478b8072c3a2f936c84025fc1e144d1addcc1796e7e406ff11a9
3
  size 958697812
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49e207697dc3ec720bb6feb8f8eae1a74a911ee9fb54850c4480006eaa332e55
3
  size 958697812
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d9b92fa45e44153a07435b55d5d3d71bf302bfd9a3b3f027d29977fda525192
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01d248dd4a61bc85f685999c311d599535fb345e57d98e4ccefd0735e695f9bd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:520ffe1792334414c9404c774e2e9174a1306a63c3f66ae705923c2d8b0a934b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc36f0303920d64d6d5947100d6f9f34e124ede65e136b78d6101e85e913cf9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03374440149702436,
5
  "eval_steps": 20,
6
- "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -412,6 +412,51 @@
412
  "eval_samples_per_second": 4.246,
413
  "eval_steps_per_second": 0.552,
414
  "step": 540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  }
416
  ],
417
  "logging_steps": 20,
@@ -419,7 +464,7 @@
419
  "num_input_tokens_seen": 0,
420
  "num_train_epochs": 1,
421
  "save_steps": 50,
422
- "total_flos": 3.97156971856896e+16,
423
  "train_batch_size": 2,
424
  "trial_name": null,
425
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03681207436039021,
5
  "eval_steps": 20,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
412
  "eval_samples_per_second": 4.246,
413
  "eval_steps_per_second": 0.552,
414
  "step": 540
415
+ },
416
+ {
417
+ "epoch": 0.034357936069697524,
418
+ "grad_norm": 0.1211417093873024,
419
+ "learning_rate": 0.0001931876764453173,
420
+ "loss": 1.5879,
421
+ "step": 560
422
+ },
423
+ {
424
+ "epoch": 0.034357936069697524,
425
+ "eval_loss": 1.4466437101364136,
426
+ "eval_runtime": 23.8508,
427
+ "eval_samples_per_second": 4.193,
428
+ "eval_steps_per_second": 0.545,
429
+ "step": 560
430
+ },
431
+ {
432
+ "epoch": 0.03558500521504387,
433
+ "grad_norm": 0.14397528767585754,
434
+ "learning_rate": 0.00019294218730821162,
435
+ "loss": 1.5352,
436
+ "step": 580
437
+ },
438
+ {
439
+ "epoch": 0.03558500521504387,
440
+ "eval_loss": 1.4339115619659424,
441
+ "eval_runtime": 23.649,
442
+ "eval_samples_per_second": 4.229,
443
+ "eval_steps_per_second": 0.55,
444
+ "step": 580
445
+ },
446
+ {
447
+ "epoch": 0.03681207436039021,
448
+ "grad_norm": 0.12468410283327103,
449
+ "learning_rate": 0.00019269669817110593,
450
+ "loss": 1.5045,
451
+ "step": 600
452
+ },
453
+ {
454
+ "epoch": 0.03681207436039021,
455
+ "eval_loss": 1.4277862310409546,
456
+ "eval_runtime": 23.647,
457
+ "eval_samples_per_second": 4.229,
458
+ "eval_steps_per_second": 0.55,
459
+ "step": 600
460
  }
461
  ],
462
  "logging_steps": 20,
 
464
  "num_input_tokens_seen": 0,
465
  "num_train_epochs": 1,
466
  "save_steps": 50,
467
+ "total_flos": 4.3772694621696e+16,
468
  "train_batch_size": 2,
469
  "trial_name": null,
470
  "trial_params": null