moos124 commited on
Commit
fccebb9
·
verified ·
1 Parent(s): 195f198

Training in progress, step 2280, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:767c3437730d367c98d8021d358424909c63e917867d405b1d74b84fd447037d
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b72056db0382ea8589dcdec7cedf3395fedf882cf675eda2460891eb94d530e1
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc2fc9946e3bab4c307a0add4a2fec60dbc15f93133c4257724c1ec1e757b237
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c44a07870ae6c6e52e53668edeb3f5d8a503e66a45882c9d8cb25da282987f7
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a148e5b5499e18201681483d67b16d0b5f2e270b8f456322e9fcbccaeb99c239
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d168c89865870f500efe4dec2b2a9427746a7547605abe7e2a50a6a88eaa3dd
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7c215e603d9ed5e976f07429a149078377d8ddbea7b9805e3a7512502cd8918
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0b0d8365d4bf757526408ce349e5758722ce4d781b98bcfe2a9b29bdf5ab70
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4842666666666667,
6
  "eval_steps": 500,
7
- "global_step": 2270,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2288,6 +2288,16 @@
2288
  "mean_token_accuracy": 0.7497815892100335,
2289
  "num_tokens": 10546937.0,
2290
  "step": 2270
 
 
 
 
 
 
 
 
 
 
2291
  }
2292
  ],
2293
  "logging_steps": 10,
@@ -2307,7 +2317,7 @@
2307
  "attributes": {}
2308
  }
2309
  },
2310
- "total_flos": 5.000876076521779e+16,
2311
  "train_batch_size": 4,
2312
  "trial_name": null,
2313
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4864,
6
  "eval_steps": 500,
7
+ "global_step": 2280,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2288
  "mean_token_accuracy": 0.7497815892100335,
2289
  "num_tokens": 10546937.0,
2290
  "step": 2270
2291
+ },
2292
+ {
2293
+ "entropy": 0.9999729461967946,
2294
+ "epoch": 0.4864,
2295
+ "grad_norm": 0.23635436594486237,
2296
+ "learning_rate": 8.872042304858412e-05,
2297
+ "loss": 1.0858405113220215,
2298
+ "mean_token_accuracy": 0.7575721621513367,
2299
+ "num_tokens": 10598042.0,
2300
+ "step": 2280
2301
  }
2302
  ],
2303
  "logging_steps": 10,
 
2317
  "attributes": {}
2318
  }
2319
  },
2320
+ "total_flos": 5.02476383681065e+16,
2321
  "train_batch_size": 4,
2322
  "trial_name": null,
2323
  "trial_params": null