mgh6 commited on
Commit
ac99423
·
verified ·
1 Parent(s): 71c2171

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:507d55726514d36d57a7fdead1e533238594fb1d956a903a49ce02157b571e17
3
  size 2611614300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30b0f309b7f4a021aeae9b8c023638b91640c0494b78df53f9799eafce8591ec
3
  size 2611614300
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fb309794c77ba81c5d999193c64bdbf97c4b11eecd10e3754dddae31d948844
3
  size 5213028466
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cbefb9ea304174eb6b10d342f4670b2fe3cd40d03615dc555c42ec57dc27c0e
3
  size 5213028466
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:874b9aec013ad321d4edc1c021ca42f8014c7ff34d53714ff8059015e8ee9794
3
  size 14942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5a50d8043ac6976ce002b35434e69f45b0b7b1b32881ce5602e437b14e194b
3
  size 14942
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c39af0f6970331c0f430d145f5514421d9baa7e90f9cf02971fb62606bf1ff3d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eecc763d4f0407011d54bc501a4da5c4c1dfc18e161c6f252fcc58e764d0886a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.024843756065370134,
5
  "eval_steps": 500,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14,6 +14,13 @@
14
  "learning_rate": 0.0009975155279503105,
15
  "loss": 1.0811305421386547e+17,
16
  "step": 100
 
 
 
 
 
 
 
17
  }
18
  ],
19
  "logging_steps": 100,
@@ -33,7 +40,7 @@
33
  "attributes": {}
34
  }
35
  },
36
- "total_flos": 1.9448797327261696e+16,
37
  "train_batch_size": 1,
38
  "trial_name": null,
39
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04968751213074027,
5
  "eval_steps": 500,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14
  "learning_rate": 0.0009975155279503105,
15
  "loss": 1.0811305421386547e+17,
16
  "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.04968751213074027,
20
+ "grad_norm": 0.9572473764419556,
21
+ "learning_rate": 0.000995031055900621,
22
+ "loss": 1.7117682180722524e+16,
23
+ "step": 200
24
  }
25
  ],
26
  "logging_steps": 100,
 
40
  "attributes": {}
41
  }
42
  },
43
+ "total_flos": 3.889759465452339e+16,
44
  "train_batch_size": 1,
45
  "trial_name": null,
46
  "trial_params": null