moos124 commited on
Commit
346cbcc
·
verified ·
1 Parent(s): 193a944

Training in progress, step 3620, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5946e092817c33b1c880596ed9707c940052bb080175730284e9ae1f1522c828
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e91fd52db8bb9783452c0d5dd7c0af8a657867e66c06b6e3c2761fd5dc918673
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12630e53a657ea86e2e8aa72c9a98409e0c66da704c7e3463849f04dc66bb1ca
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9208e903089f9a7f3069e3625af4cafbe4549e30ac6b829aa123976463344dde
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e1bb00994a4c4e87d5404278a1c2142958faad299de0c9dc68af0079fd3170e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeb52f222e24a9a9e42fa6918b67db8ffa8ef2f4f4ec3281838b0693b65f0a25
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b10098c4ee79ca867481d2aa5aab5913b79f18cc7282e5a3d2699aeeabee88fe
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8422d6bece112fd75d3c138f96db21f37100a7fc66a031ae0f6c8cffc187aaa0
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7701333333333333,
6
  "eval_steps": 500,
7
- "global_step": 3610,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3628,6 +3628,16 @@
3628
  "mean_token_accuracy": 0.7834656447172165,
3629
  "num_tokens": 16802813.0,
3630
  "step": 3610
 
 
 
 
 
 
 
 
 
 
3631
  }
3632
  ],
3633
  "logging_steps": 10,
@@ -3647,7 +3657,7 @@
3647
  "attributes": {}
3648
  }
3649
  },
3650
- "total_flos": 7.962290572873114e+16,
3651
  "train_batch_size": 4,
3652
  "trial_name": null,
3653
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7722666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 3620,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3628
  "mean_token_accuracy": 0.7834656447172165,
3629
  "num_tokens": 16802813.0,
3630
  "step": 3610
3631
+ },
3632
+ {
3633
+ "entropy": 1.0183790929615497,
3634
+ "epoch": 0.7722666666666667,
3635
+ "grad_norm": 0.24549080431461334,
3636
+ "learning_rate": 7.047692579601424e-05,
3637
+ "loss": 1.1990603446960448,
3638
+ "mean_token_accuracy": 0.7547581911087036,
3639
+ "num_tokens": 16850703.0,
3640
+ "step": 3620
3641
  }
3642
  ],
3643
  "logging_steps": 10,
 
3657
  "attributes": {}
3658
  }
3659
  },
3660
+ "total_flos": 7.983529744959283e+16,
3661
  "train_batch_size": 4,
3662
  "trial_name": null,
3663
  "trial_params": null