moos124 commited on
Commit
78c2eb6
·
verified ·
1 Parent(s): f06ce77

Training in progress, step 2270, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:144677f64bb722a75657910254c9b53aec73907bb30f7fae21289a37c6ba9fcb
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:767c3437730d367c98d8021d358424909c63e917867d405b1d74b84fd447037d
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25309e1202347ad8fc1909537945da38c655b35564d3016adc8c6918761c8845
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc2fc9946e3bab4c307a0add4a2fec60dbc15f93133c4257724c1ec1e757b237
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54ff895d49694534dbcf457c4364135e9e4df0a9c7bb41cb2b8a6e65dea20252
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a148e5b5499e18201681483d67b16d0b5f2e270b8f456322e9fcbccaeb99c239
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43e4106416f0e18c625209614bc82c2d49db0dbb5dcdfd6ed70850245c8dbfa1
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c215e603d9ed5e976f07429a149078377d8ddbea7b9805e3a7512502cd8918
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.48213333333333336,
6
  "eval_steps": 500,
7
- "global_step": 2260,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2278,6 +2278,16 @@
2278
  "mean_token_accuracy": 0.7805656388401985,
2279
  "num_tokens": 10496592.0,
2280
  "step": 2260
 
 
 
 
 
 
 
 
 
 
2281
  }
2282
  ],
2283
  "logging_steps": 10,
@@ -2297,7 +2307,7 @@
2297
  "attributes": {}
2298
  }
2299
  },
2300
- "total_flos": 4.977467747646259e+16,
2301
  "train_batch_size": 4,
2302
  "trial_name": null,
2303
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4842666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 2270,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2278
  "mean_token_accuracy": 0.7805656388401985,
2279
  "num_tokens": 10496592.0,
2280
  "step": 2260
2281
+ },
2282
+ {
2283
+ "entropy": 1.0050086982548236,
2284
+ "epoch": 0.4842666666666667,
2285
+ "grad_norm": 0.31434357166290283,
2286
+ "learning_rate": 8.882969073338833e-05,
2287
+ "loss": 1.1325186729431151,
2288
+ "mean_token_accuracy": 0.7497815892100335,
2289
+ "num_tokens": 10546937.0,
2290
+ "step": 2270
2291
  }
2292
  ],
2293
  "logging_steps": 10,
 
2307
  "attributes": {}
2308
  }
2309
  },
2310
+ "total_flos": 5.000876076521779e+16,
2311
  "train_batch_size": 4,
2312
  "trial_name": null,
2313
  "trial_params": null