mgh6 commited on
Commit
658fd34
·
verified ·
1 Parent(s): d8b5fd7

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b791fbecb6b88664aa79f119dff8f288aa75b9b87dd0dd34e19411dbce854289
3
  size 136000488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05a674fab1302b21ca09374d55300acb133b868799b5a89059807bbe5940e72f
3
  size 136000488
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad68c170613ee4fb6dd9cc941512acc1d3c2e781098dbe881bd4cfe8efeecfc
3
  size 268176506
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e5eb28d2aab11186249c608b8dea707f2ff8896e074965b515c45602208d729
3
  size 268176506
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89d109ac320325ce5994a75dba2574533dce01656e0a672c348391e584969cd4
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d98c1d6ddc8d6251f378064e080be6b3b902cbfa14b91558d4aa1686eb53ac1
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e63d039dcd74cce40e5b0ae616878e231197e581fdff19eeed08507ad6e3f131
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c3cf1238c41ea98e467a79fddf8f1f64b327ca5320d4658e9575b16bffd97b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,13 +1,21 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06022067112174173,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
- "log_history": [],
 
 
 
 
 
 
 
 
11
  "logging_steps": 1000,
12
  "max_steps": 249060,
13
  "num_input_tokens_seen": 0,
@@ -25,7 +33,7 @@
25
  "attributes": {}
26
  }
27
  },
28
- "total_flos": 2500969120137216.0,
29
  "train_batch_size": 1,
30
  "trial_name": null,
31
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.12044134224348346,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.12044134224348346,
13
+ "grad_norm": 1.711878776550293,
14
+ "learning_rate": 9.95984903236168e-05,
15
+ "loss": 12057152.512,
16
+ "step": 1000
17
+ }
18
+ ],
19
  "logging_steps": 1000,
20
  "max_steps": 249060,
21
  "num_input_tokens_seen": 0,
 
33
  "attributes": {}
34
  }
35
  },
36
+ "total_flos": 5001938240274432.0,
37
  "train_batch_size": 1,
38
  "trial_name": null,
39
  "trial_params": null