moos124 commited on
Commit
63e1a0f
·
verified ·
1 Parent(s): f79a212

Training in progress, step 2490, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58df8933593be515378f4ed26531652372f20b456ad4d4f899eb46e2ffc95ce1
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:891e02183334d2d6e14b763e2d18c5efd2d4d7210faa9668a08340f77ff5895a
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48c9904e7e622e8f841d2638f3478e33d1225746b6513ebf98cba149ef2ca639
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:658b5ccd31f41720bde98eec6bd91a87a8dee408f3864b9bba97359fb4622a38
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c0f664a3363567917c69321a2cc9180a5dc58a60087edf86b1fb088a0865651
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea52b5ecaaf31a006976c86d0d9921770b6b3dc3a8c41a05914e1a191fa1d3ea
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d3a91cf25ce58560d2917160a1aaffcee398f1ff65559c5d58abcd7610b9f59
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:248c62dcb08df8749d40de382b7d31f14b7567698229aafdff21ecbdcedc42b0
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5290666666666667,
6
  "eval_steps": 500,
7
- "global_step": 2480,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2498,6 +2498,16 @@
2498
  "mean_token_accuracy": 0.7577844798564911,
2499
  "num_tokens": 11526756.0,
2500
  "step": 2480
 
 
 
 
 
 
 
 
 
 
2501
  }
2502
  ],
2503
  "logging_steps": 10,
@@ -2517,7 +2527,7 @@
2517
  "attributes": {}
2518
  }
2519
  },
2520
- "total_flos": 5.468820962062848e+16,
2521
  "train_batch_size": 4,
2522
  "trial_name": null,
2523
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5312,
6
  "eval_steps": 500,
7
+ "global_step": 2490,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2498
  "mean_token_accuracy": 0.7577844798564911,
2499
  "num_tokens": 11526756.0,
2500
  "step": 2480
2501
+ },
2502
+ {
2503
+ "entropy": 0.929996844381094,
2504
+ "epoch": 0.5312,
2505
+ "grad_norm": 0.29296958446502686,
2506
+ "learning_rate": 8.632069917162255e-05,
2507
+ "loss": 1.0086584091186523,
2508
+ "mean_token_accuracy": 0.7662723585963249,
2509
+ "num_tokens": 11568739.0,
2510
+ "step": 2490
2511
  }
2512
  ],
2513
  "logging_steps": 10,
 
2527
  "attributes": {}
2528
  }
2529
  },
2530
+ "total_flos": 5.489113887938458e+16,
2531
  "train_batch_size": 4,
2532
  "trial_name": null,
2533
  "trial_params": null