moos124 commited on
Commit
c7b914f
·
verified ·
1 Parent(s): 39e7a28

Training in progress, step 690, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62fa0dae9180b8e9cf91b61b9b0567019ae5925eeaa05eadbc254e528f7a2648
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:264edc690d05f8e065574c0018129efdf89a9ff31cb3a9f4a2e16da5cf2c6245
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:929396c762c2458f69fce4eee4ff4f8bdbe90788d9ee70d2fce722708de74c60
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ef03d259b3f24d4ab2229225454a30ed7f99d83ee087f0cbbdb2527f63c5f6f
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d875597fcb96668619d5dfe31a66ddf6368794a8fc9781b7cc0c3d1a9465e866
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c7c65247b0d734de29c98800e2eda78993d4889b56195126de4a58afd6cd2b7
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ead9e97463c05478511a6c80978c58c03ec8da4f5aacb879ebc6eb4dc33296d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e72bc4edd90a9ccce0a41af921593fef7756a0a79934b8018125ec1fca3204fd
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.14506666666666668,
6
  "eval_steps": 500,
7
- "global_step": 680,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -698,6 +698,16 @@
698
  "mean_token_accuracy": 0.7894617035984993,
699
  "num_tokens": 3143357.0,
700
  "step": 680
 
 
 
 
 
 
 
 
 
 
701
  }
702
  ],
703
  "logging_steps": 10,
@@ -717,7 +727,7 @@
717
  "attributes": {}
718
  }
719
  },
720
- "total_flos": 1.4888787600940032e+16,
721
  "train_batch_size": 4,
722
  "trial_name": null,
723
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.1472,
6
  "eval_steps": 500,
7
+ "global_step": 690,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
698
  "mean_token_accuracy": 0.7894617035984993,
699
  "num_tokens": 3143357.0,
700
  "step": 680
701
+ },
702
+ {
703
+ "entropy": 0.9675152562558651,
704
+ "epoch": 0.1472,
705
+ "grad_norm": 0.27567023038864136,
706
+ "learning_rate": 9.954742170899172e-05,
707
+ "loss": 1.0641048431396485,
708
+ "mean_token_accuracy": 0.7591987878084183,
709
+ "num_tokens": 3193515.0,
710
+ "step": 690
711
  }
712
  ],
713
  "logging_steps": 10,
 
727
  "attributes": {}
728
  }
729
  },
730
+ "total_flos": 1.5136415728309248e+16,
731
  "train_batch_size": 4,
732
  "trial_name": null,
733
  "trial_params": null