moos124 commited on
Commit
fa15b7a
·
verified ·
1 Parent(s): c349061

Training in progress, step 3630, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e91fd52db8bb9783452c0d5dd7c0af8a657867e66c06b6e3c2761fd5dc918673
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44aede43cb1a41b83e1247683ea557b147107a67f306ff62d1398bbdad92c1b
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9208e903089f9a7f3069e3625af4cafbe4549e30ac6b829aa123976463344dde
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b31aaba1fda764e711b455094d3ead4cbaea7041b9472d183caf81a23b9747a
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eeb52f222e24a9a9e42fa6918b67db8ffa8ef2f4f4ec3281838b0693b65f0a25
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28be791632a2ce933cf99b51e2bfeaac4f925036c002da006e45360cf39931f0
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8422d6bece112fd75d3c138f96db21f37100a7fc66a031ae0f6c8cffc187aaa0
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3875b65aa45ddb86163d814c0192e0142bb08111de8324f09ea0668c6f3cca4
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7722666666666667,
6
  "eval_steps": 500,
7
- "global_step": 3620,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3638,6 +3638,16 @@
3638
  "mean_token_accuracy": 0.7547581911087036,
3639
  "num_tokens": 16850703.0,
3640
  "step": 3620
 
 
 
 
 
 
 
 
 
 
3641
  }
3642
  ],
3643
  "logging_steps": 10,
@@ -3657,7 +3667,7 @@
3657
  "attributes": {}
3658
  }
3659
  },
3660
- "total_flos": 7.983529744959283e+16,
3661
  "train_batch_size": 4,
3662
  "trial_name": null,
3663
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7744,
6
  "eval_steps": 500,
7
+ "global_step": 3630,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3638
  "mean_token_accuracy": 0.7547581911087036,
3639
  "num_tokens": 16850703.0,
3640
  "step": 3620
3641
+ },
3642
+ {
3643
+ "entropy": 0.8650934003293514,
3644
+ "epoch": 0.7744,
3645
+ "grad_norm": 0.23581688106060028,
3646
+ "learning_rate": 7.031891161226608e-05,
3647
+ "loss": 0.9123600959777832,
3648
+ "mean_token_accuracy": 0.7830170378088951,
3649
+ "num_tokens": 16894959.0,
3650
+ "step": 3630
3651
  }
3652
  ],
3653
  "logging_steps": 10,
 
3667
  "attributes": {}
3668
  }
3669
  },
3670
+ "total_flos": 8.003783018612736e+16,
3671
  "train_batch_size": 4,
3672
  "trial_name": null,
3673
  "trial_params": null