moos124 commited on
Commit
d4d8290
·
verified ·
1 Parent(s): 75333eb

Training in progress, step 1750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f4c98f6aabf49be4f26c5bcb1cf24907ecf0e87df4e6636ad3341d315bfeb6c
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de0554991b14b7f68ec4557853cba19505adcd1ccfeb166bc22a2875dd24fd80
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f10928bb50ddabe7412701f4397c876255be29494039fcf4ea3f9093e23997cf
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00960b1186e24c4e09c7902032a383f5860085f00e9ba295ccf8972c8012bd4f
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:deae801c158964c40f1573c507c036ee10487b29620e828b0bcf022410dd837b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8631da7f3d81e34988bef387e6d9466592c4a772e476986c832977b41f1b4a16
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cb4b5860837a63ee23b2a132dfbe0277b390aeac6122771e4989f0edff682dc
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a64b7e7ff55c7126c1169c0c41bd866d8a3f18d85cdaf2c1db2a45052a9484a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.3712,
6
  "eval_steps": 500,
7
- "global_step": 1740,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1758,6 +1758,16 @@
1758
  "mean_token_accuracy": 0.7833232149481774,
1759
  "num_tokens": 8071410.0,
1760
  "step": 1740
 
 
 
 
 
 
 
 
 
 
1761
  }
1762
  ],
1763
  "logging_steps": 10,
@@ -1777,7 +1787,7 @@
1777
  "attributes": {}
1778
  }
1779
  },
1780
- "total_flos": 3.830219015852237e+16,
1781
  "train_batch_size": 4,
1782
  "trial_name": null,
1783
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.37333333333333335,
6
  "eval_steps": 500,
7
+ "global_step": 1750,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1758
  "mean_token_accuracy": 0.7833232149481774,
1759
  "num_tokens": 8071410.0,
1760
  "step": 1740
1761
+ },
1762
+ {
1763
+ "entropy": 0.8095596194267273,
1764
+ "epoch": 0.37333333333333335,
1765
+ "grad_norm": 0.36941683292388916,
1766
+ "learning_rate": 9.38416551705078e-05,
1767
+ "loss": 0.8782508850097657,
1768
+ "mean_token_accuracy": 0.7895680025219918,
1769
+ "num_tokens": 8113695.0,
1770
+ "step": 1750
1771
  }
1772
  ],
1773
  "logging_steps": 10,
 
1787
  "attributes": {}
1788
  }
1789
  },
1790
+ "total_flos": 3.849319671502541e+16,
1791
  "train_batch_size": 4,
1792
  "trial_name": null,
1793
  "trial_params": null