moos124 commited on
Commit
42e0285
·
verified ·
1 Parent(s): 5855da2

Training in progress, step 2290, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b72056db0382ea8589dcdec7cedf3395fedf882cf675eda2460891eb94d530e1
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d972981bb70eec13eb230710f7d5fccc5c887035bdfd2f7f15f03673e797dba
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c44a07870ae6c6e52e53668edeb3f5d8a503e66a45882c9d8cb25da282987f7
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7ce96f7d7e489dcdab86e3a3f942398aaee186cd3644e541195f5e52a7cd898
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d168c89865870f500efe4dec2b2a9427746a7547605abe7e2a50a6a88eaa3dd
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c6eb6e7a4b97be96175fc8cab533692b090f8e51f686250e1a39cb0acf203b
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b0b0d8365d4bf757526408ce349e5758722ce4d781b98bcfe2a9b29bdf5ab70
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99294bfd0bbfeb373d87ec34ab1c460706fc9f847c7dc4efa72f661c0a51b705
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.4864,
6
  "eval_steps": 500,
7
- "global_step": 2280,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2298,6 +2298,16 @@
2298
  "mean_token_accuracy": 0.7575721621513367,
2299
  "num_tokens": 10598042.0,
2300
  "step": 2280
 
 
 
 
 
 
 
 
 
 
2301
  }
2302
  ],
2303
  "logging_steps": 10,
@@ -2317,7 +2327,7 @@
2317
  "attributes": {}
2318
  }
2319
  },
2320
- "total_flos": 5.02476383681065e+16,
2321
  "train_batch_size": 4,
2322
  "trial_name": null,
2323
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4885333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 2290,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2298
  "mean_token_accuracy": 0.7575721621513367,
2299
  "num_tokens": 10598042.0,
2300
  "step": 2280
2301
+ },
2302
+ {
2303
+ "entropy": 0.9136553466320038,
2304
+ "epoch": 0.4885333333333333,
2305
+ "grad_norm": 0.2523214519023895,
2306
+ "learning_rate": 8.861069143594423e-05,
2307
+ "loss": 0.9898375511169434,
2308
+ "mean_token_accuracy": 0.7727992206811904,
2309
+ "num_tokens": 10640977.0,
2310
+ "step": 2290
2311
  }
2312
  ],
2313
  "logging_steps": 10,
 
2327
  "attributes": {}
2328
  }
2329
  },
2330
+ "total_flos": 5.045623609225728e+16,
2331
  "train_batch_size": 4,
2332
  "trial_name": null,
2333
  "trial_params": null