moos124 commited on
Commit
e9f28de
·
verified ·
1 Parent(s): a333f17

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:891e02183334d2d6e14b763e2d18c5efd2d4d7210faa9668a08340f77ff5895a
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fb2bd4f7a80eb40d2530475250df49d51b1d8a3700a89614c1a2536866086a7
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:658b5ccd31f41720bde98eec6bd91a87a8dee408f3864b9bba97359fb4622a38
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9ec9783d423282cb76fb52866c8a208f9223b796eeeb00ddb08827afbec228e
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea52b5ecaaf31a006976c86d0d9921770b6b3dc3a8c41a05914e1a191fa1d3ea
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9111dc5965b3afd3c9bf84e72d677f38968c6f9466692468cba75e25452a3307
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:248c62dcb08df8749d40de382b7d31f14b7567698229aafdff21ecbdcedc42b0
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8aedc8c45f41bf64919c375727c80db9e0aeff0f8950601fa0a0a492e6b25a1
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5312,
6
  "eval_steps": 500,
7
- "global_step": 2490,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2508,6 +2508,16 @@
2508
  "mean_token_accuracy": 0.7662723585963249,
2509
  "num_tokens": 11568739.0,
2510
  "step": 2490
 
 
 
 
 
 
 
 
 
 
2511
  }
2512
  ],
2513
  "logging_steps": 10,
@@ -2527,7 +2537,7 @@
2527
  "attributes": {}
2528
  }
2529
  },
2530
- "total_flos": 5.489113887938458e+16,
2531
  "train_batch_size": 4,
2532
  "trial_name": null,
2533
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5333333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 2500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2508
  "mean_token_accuracy": 0.7662723585963249,
2509
  "num_tokens": 11568739.0,
2510
  "step": 2490
2511
+ },
2512
+ {
2513
+ "entropy": 0.9251207195222377,
2514
+ "epoch": 0.5333333333333333,
2515
+ "grad_norm": 0.23416800796985626,
2516
+ "learning_rate": 8.620153719937535e-05,
2517
+ "loss": 0.9998083114624023,
2518
+ "mean_token_accuracy": 0.7651191413402557,
2519
+ "num_tokens": 11616235.0,
2520
+ "step": 2500
2521
  }
2522
  ],
2523
  "logging_steps": 10,
 
2537
  "attributes": {}
2538
  }
2539
  },
2540
+ "total_flos": 5.513579309009203e+16,
2541
  "train_batch_size": 4,
2542
  "trial_name": null,
2543
  "trial_params": null