fpadovani commited on
Commit
e77efbf
·
verified ·
1 Parent(s): 71feff6

Training in progress, step 1000, checkpoint

Browse files
checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e968dbb3380de7a3cb81dafc561acc0238e3df7f4a2d3593f54687f9b501d6cf
3
  size 435544704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3af484d2981c1d8d51caba3ea0b20da0d73e3958223f010171f35184084a130c
3
  size 435544704
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88ebb55308c87e3e9097aa3b8979c4cd29b29eeb02dfb4f603a07a47ce4c1d5f
3
  size 871183627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02f0f0a5fe76c042386505973b8005fe81c1768a92f6d6341ae31e07c213c82a
3
  size 871183627
checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f46683c066aee705f7b93bdbd4b829ca423162b1fb577c522b9b1c1625a9c1c1
3
  size 14709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39586f70e92c891dbcaab37909f4cf86c0501394c80c92f12d33b7c2ac0ad3c7
3
  size 14709
checkpoint-1000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ae2a2128444abab378aa06c09a61a84665f758fcc19fc46f5789b0bc1b5665
3
+ size 1383
checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3381cb33b25da3196bf5781a9390f345898978fd83e9e4bca1d36cf8c7145e8a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1794c1f570008e45f746af78e6132c4682c43814cf01f7e4ba820420220475
3
  size 1465
checkpoint-1000/tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
checkpoint-1000/trainer_state.json CHANGED
@@ -1,33 +1,39 @@
1
  {
2
- "best_global_step": 982,
3
- "best_metric": 3.760134696960449,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.0366598778004072,
6
  "eval_steps": 500,
7
  "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
12
  {
13
  "epoch": 1.0,
14
- "eval_loss": 4.097745895385742,
15
- "eval_runtime": 66.4624,
16
- "eval_samples_per_second": 504.782,
17
- "eval_steps_per_second": 1.986,
18
- "step": 491
19
  },
20
  {
21
- "epoch": 2.0,
22
- "eval_loss": 3.760134696960449,
23
- "eval_runtime": 66.4623,
24
- "eval_samples_per_second": 504.783,
25
- "eval_steps_per_second": 1.986,
26
- "step": 982
27
  }
28
  ],
29
- "logging_steps": 4000,
30
- "max_steps": 2455,
31
  "num_input_tokens_seen": 0,
32
  "num_train_epochs": 5,
33
  "save_steps": 500,
@@ -43,7 +49,7 @@
43
  "attributes": {}
44
  }
45
  },
46
- "total_flos": 1.6717202915328e+16,
47
  "train_batch_size": 256,
48
  "trial_name": null,
49
  "trial_params": null
 
1
  {
2
+ "best_global_step": 501,
3
+ "best_metric": 3.908651113510132,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.996007984031936,
6
  "eval_steps": 500,
7
  "global_step": 1000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
+ {
13
+ "epoch": 0.001996007984031936,
14
+ "grad_norm": 30.08072853088379,
15
+ "learning_rate": 0.0,
16
+ "loss": 10.3965,
17
+ "step": 1
18
+ },
19
  {
20
  "epoch": 1.0,
21
+ "grad_norm": 1.243941068649292,
22
+ "learning_rate": 0.0001,
23
+ "loss": 4.8834,
24
+ "step": 501
 
25
  },
26
  {
27
+ "epoch": 1.0,
28
+ "eval_loss": 3.908651113510132,
29
+ "eval_runtime": 19.9819,
30
+ "eval_samples_per_second": 1586.036,
31
+ "eval_steps_per_second": 6.206,
32
+ "step": 501
33
  }
34
  ],
35
+ "logging_steps": 500,
36
+ "max_steps": 2505,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 5,
39
  "save_steps": 500,
 
49
  "attributes": {}
50
  }
51
  },
52
+ "total_flos": 1.6720991649792e+16,
53
  "train_batch_size": 256,
54
  "trial_name": null,
55
  "trial_params": null
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a57dba9b9c8c3226400ebcad0a2060a021a094dd027a7ac1acf9378f5c6cc27
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7a8afa9c6745812d26be318b1c7d9348962a9c4c0d9cbb9442934f28d74eb6
3
  size 5969