moos124 commited on
Commit
2961d8d
·
verified ·
1 Parent(s): 21b77d0

Training in progress, step 2510, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fb2bd4f7a80eb40d2530475250df49d51b1d8a3700a89614c1a2536866086a7
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbe67794af50642f182b9ea8c384905c650abf9cc8b329428898500972747400
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9ec9783d423282cb76fb52866c8a208f9223b796eeeb00ddb08827afbec228e
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5619a0f072aab8b47dd7d30c4ada88d0753ce4862a2d555516994d2a847ef155
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9111dc5965b3afd3c9bf84e72d677f38968c6f9466692468cba75e25452a3307
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13bb58902d7911f6b8185a6ef12bf6396763797af9152cc44e85896cb9480a07
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8aedc8c45f41bf64919c375727c80db9e0aeff0f8950601fa0a0a492e6b25a1
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e0e6092e19618a2ff351245c56be6866db27a4f7763b7af3f3d7624941a3665
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5333333333333333,
6
  "eval_steps": 500,
7
- "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2518,6 +2518,16 @@
2518
  "mean_token_accuracy": 0.7651191413402557,
2519
  "num_tokens": 11616235.0,
2520
  "step": 2500
 
 
 
 
 
 
 
 
 
 
2521
  }
2522
  ],
2523
  "logging_steps": 10,
@@ -2537,7 +2547,7 @@
2537
  "attributes": {}
2538
  }
2539
  },
2540
- "total_flos": 5.513579309009203e+16,
2541
  "train_batch_size": 4,
2542
  "trial_name": null,
2543
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5354666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 2510,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2518
  "mean_token_accuracy": 0.7651191413402557,
2519
  "num_tokens": 11616235.0,
2520
  "step": 2500
2521
+ },
2522
+ {
2523
+ "entropy": 0.9308743372559547,
2524
+ "epoch": 0.5354666666666666,
2525
+ "grad_norm": 0.29301849007606506,
2526
+ "learning_rate": 8.60819414792639e-05,
2527
+ "loss": 1.0315680503845215,
2528
+ "mean_token_accuracy": 0.7692675769329071,
2529
+ "num_tokens": 11666713.0,
2530
+ "step": 2510
2531
  }
2532
  ],
2533
  "logging_steps": 10,
 
2547
  "attributes": {}
2548
  }
2549
  },
2550
+ "total_flos": 5.535017643393024e+16,
2551
  "train_batch_size": 4,
2552
  "trial_name": null,
2553
  "trial_params": null