moos124 commited on
Commit
4176d6c
·
verified ·
1 Parent(s): cf072f3

Training in progress, step 650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51dd734530cde416a6bc5d2b6cd555828693264cc1cc46aeeecee9f81d0c52bd
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ba2c9e34e30db5c998ce0a4de749a0bdd1a6b5f90b27534ad36835dbf6d582d
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea0c3af9a9f1a63074486c3c8e171866d98c3c53377d97e296b715e48782fba6
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:641d66196316619b3710a969d55a4d82f4fb3b572c3c294b8b6409d90761e312
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02d488df1edbef1d3d4d686f2ade3e4d4cafbb2ad64ace71441881733f52db9b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4114553082ca1845f3fc7e69c4e6e7243ca221ebcfd740c1a75560d0bd716ac
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99a26b47fd98de9029ba62a9e893f466571afe281f2deba5f59a50e0ac0b1f13
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:026b5c74cc8e086c9916a04299b290480732d771d50ea5509c5a30a72dbfe481
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.13653333333333334,
6
  "eval_steps": 500,
7
- "global_step": 640,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -658,6 +658,16 @@
658
  "mean_token_accuracy": 0.7443821474909782,
659
  "num_tokens": 2962337.0,
660
  "step": 640
 
 
 
 
 
 
 
 
 
 
661
  }
662
  ],
663
  "logging_steps": 10,
@@ -677,7 +687,7 @@
677
  "attributes": {}
678
  }
679
  },
680
- "total_flos": 1.404896254752768e+16,
681
  "train_batch_size": 4,
682
  "trial_name": null,
683
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.13866666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 650,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
658
  "mean_token_accuracy": 0.7443821474909782,
659
  "num_tokens": 2962337.0,
660
  "step": 640
661
+ },
662
+ {
663
+ "entropy": 1.0139970764517785,
664
+ "epoch": 0.13866666666666666,
665
+ "grad_norm": 0.2730376124382019,
666
+ "learning_rate": 9.963560416004623e-05,
667
+ "loss": 1.112107276916504,
668
+ "mean_token_accuracy": 0.7496687114238739,
669
+ "num_tokens": 3006113.0,
670
+ "step": 650
671
  }
672
  ],
673
  "logging_steps": 10,
 
687
  "attributes": {}
688
  }
689
  },
690
+ "total_flos": 1.4248061762098176e+16,
691
  "train_batch_size": 4,
692
  "trial_name": null,
693
  "trial_params": null