bitsoko commited on
Commit
52adb31
·
verified ·
1 Parent(s): dedcf9b

Training in progress, step 650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b35aacbfdc92cae95f4722e8ccfc67a4f68647ffa65ea431590ce58096dec808
3
  size 1912664024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77bec1f4ac425531a05582ea8046293677e9aef7e25a09da01544099efb93f8d
3
  size 1912664024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49e207697dc3ec720bb6feb8f8eae1a74a911ee9fb54850c4480006eaa332e55
3
  size 958697812
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62765d738accf85fc7c3f65162136ac7c75de679fcbaa389480de246257616a
3
  size 958697812
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01d248dd4a61bc85f685999c311d599535fb345e57d98e4ccefd0735e695f9bd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0ab450b6ed3afea5706163f591f78b56c95466e0ef81af351a47a99261b91e2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cc36f0303920d64d6d5947100d6f9f34e124ede65e136b78d6101e85e913cf9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1b4f26951b84f5727f8b34d9245ef840230c09420a237b4b37e336f28fe1f3c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.03681207436039021,
5
  "eval_steps": 20,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -457,6 +457,36 @@
457
  "eval_samples_per_second": 4.229,
458
  "eval_steps_per_second": 0.55,
459
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  }
461
  ],
462
  "logging_steps": 20,
@@ -464,7 +494,7 @@
464
  "num_input_tokens_seen": 0,
465
  "num_train_epochs": 1,
466
  "save_steps": 50,
467
- "total_flos": 4.3772694621696e+16,
468
  "train_batch_size": 2,
469
  "trial_name": null,
470
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.039879747223756055,
5
  "eval_steps": 20,
6
+ "global_step": 650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
457
  "eval_samples_per_second": 4.229,
458
  "eval_steps_per_second": 0.55,
459
  "step": 600
460
+ },
461
+ {
462
+ "epoch": 0.03803914350573655,
463
+ "grad_norm": 0.1577584445476532,
464
+ "learning_rate": 0.00019245120903400024,
465
+ "loss": 1.5497,
466
+ "step": 620
467
+ },
468
+ {
469
+ "epoch": 0.03803914350573655,
470
+ "eval_loss": 1.4203659296035767,
471
+ "eval_runtime": 23.8622,
472
+ "eval_samples_per_second": 4.191,
473
+ "eval_steps_per_second": 0.545,
474
+ "step": 620
475
+ },
476
+ {
477
+ "epoch": 0.03926621265108289,
478
+ "grad_norm": 0.12410438805818558,
479
+ "learning_rate": 0.00019220571989689457,
480
+ "loss": 1.503,
481
+ "step": 640
482
+ },
483
+ {
484
+ "epoch": 0.03926621265108289,
485
+ "eval_loss": 1.4154139757156372,
486
+ "eval_runtime": 23.4706,
487
+ "eval_samples_per_second": 4.261,
488
+ "eval_steps_per_second": 0.554,
489
+ "step": 640
490
  }
491
  ],
492
  "logging_steps": 20,
 
494
  "num_input_tokens_seen": 0,
495
  "num_train_epochs": 1,
496
  "save_steps": 50,
497
+ "total_flos": 4.730105544192e+16,
498
  "train_batch_size": 2,
499
  "trial_name": null,
500
  "trial_params": null