moos124 commited on
Commit
de6bca3
·
verified ·
1 Parent(s): 8bea8c6

Training in progress, step 2360, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f79f8bb9d5f7dc15c04fa86ab72d4b1d25c7f112febc4658c22f8f3fae5a8034
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fd7170591f5571b3bbcf9d603466417fc7136a3ac93e65d5337338d466eddf0
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51af2c09efe30f822ec7955da569a14d92bc67173a2ad6375838b7d2a18fde9a
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc0e4d233e51d0716f11aa3c933ce7220053d5215cb79abc561196901c1ec7c5
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf2f0f561cb542533e93676afd9598125fe0373b36b6381b7d48a9e8d03bca97
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e7fdaec812b106b1310ee19a34a72066d9e146e7175dc3be750da56496929e0
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:632011de483a2102d4f0d1947c4329c3cd8dce410c54d596291881aacfbc03d9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38e6f7582b579bd50731564e68a4d878d6c62c900bebe47f92751e2d2be3afb6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5013333333333333,
6
  "eval_steps": 500,
7
- "global_step": 2350,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2368,6 +2368,16 @@
2368
  "mean_token_accuracy": 0.7475039146840572,
2369
  "num_tokens": 10925395.0,
2370
  "step": 2350
 
 
 
 
 
 
 
 
 
 
2371
  }
2372
  ],
2373
  "logging_steps": 10,
@@ -2387,7 +2397,7 @@
2387
  "attributes": {}
2388
  }
2389
  },
2390
- "total_flos": 5.181426161804698e+16,
2391
  "train_batch_size": 4,
2392
  "trial_name": null,
2393
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5034666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 2360,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2368
  "mean_token_accuracy": 0.7475039146840572,
2369
  "num_tokens": 10925395.0,
2370
  "step": 2350
2371
+ },
2372
+ {
2373
+ "entropy": 1.0321477994322776,
2374
+ "epoch": 0.5034666666666666,
2375
+ "grad_norm": 0.2507781386375427,
2376
+ "learning_rate": 8.782969130318358e-05,
2377
+ "loss": 1.1364535331726073,
2378
+ "mean_token_accuracy": 0.7528289645910263,
2379
+ "num_tokens": 10973794.0,
2380
+ "step": 2360
2381
  }
2382
  ],
2383
  "logging_steps": 10,
 
2397
  "attributes": {}
2398
  }
2399
  },
2400
+ "total_flos": 5.204525383584768e+16,
2401
  "train_batch_size": 4,
2402
  "trial_name": null,
2403
  "trial_params": null