mgh6 commited on
Commit
592356a
·
verified ·
1 Parent(s): 4b2f862

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "tattabio/gLM2_150M",
3
  "architectures": [
4
  "gLM2ForMaskedLM"
5
  ],
@@ -8,10 +8,10 @@
8
  "AutoModel": "modeling_glm2.gLM2Model",
9
  "AutoModelForMaskedLM": "modeling_glm2.gLM2ForMaskedLM"
10
  },
11
- "depth": 30,
12
- "dim": 640,
13
  "ffn_dim_multiplier": null,
14
- "heads": 10,
15
  "model_type": "gLM2",
16
  "norm_eps": 1e-05,
17
  "swiglu_multiple_of": 256,
 
1
  {
2
+ "_name_or_path": "tattabio/gLM2_650M",
3
  "architectures": [
4
  "gLM2ForMaskedLM"
5
  ],
 
8
  "AutoModel": "modeling_glm2.gLM2Model",
9
  "AutoModelForMaskedLM": "modeling_glm2.gLM2ForMaskedLM"
10
  },
11
+ "depth": 33,
12
+ "dim": 1280,
13
  "ffn_dim_multiplier": null,
14
+ "heads": 20,
15
  "model_type": "gLM2",
16
  "norm_eps": 1e-05,
17
  "swiglu_multiple_of": 256,
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:516558ed7782de66fc542438abb1c93e159afd70a2aeb6571ce83cca423452b0
3
- size 609855088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228e54ea153feeb0f49b0800638a29264ec8340106787699023f5720254dacea
3
+ size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:812c91eacfd5aea68d8b5decb8b50302d3944860c0aa6ecd636549bd4f072a92
3
- size 1219840058
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0006e3e3cc59298369c06ee6e4e8b3272c5752f670d9f969958dfd9e69616dc
3
+ size 5365108834
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f4312d4eb4a3834512b8e6a5f558f7335f936ed9768ab54b18216e62eb5a7d3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:848fdf35f13e1fde847fbd191021c99c0675e5e723a1b65fde4649f2fc9250db
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13ae4134e19f55d5a540bad8977ebfa7de23a5f70c51215224d0742bb2666b1a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9f9fc41c0627d630837221d5c7872d3197c08985ee35f058d5f5e36bfe0249b
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e287e6f80aed910a1d4cb01fb428361df3b7e62045921fccfd519aab7f20c2e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae1449d711371210b0f6284f921f3df183a3c5c6628d3fc2950f5c89910866d
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:006e670f373067b7e226643b8cade6148c320aff0b769e7d1532179c7f45b76a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3cc14e80a0475fa4dead8d6a3c6f0af9c5a92c40ad285584d68830834b3a6ea
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59630a3df2ec5543c18897bf2cb0562e6bac8d472d75091b8f7ddabcb069715a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:478ca537cf75a11344e25e46d3c46fdcf2db572bdb8cfff6f1ed3781e47a9787
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,33 +1,89 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0012275351477837237,
5
- "eval_steps": 2,
6
- "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0006137675738918619,
13
- "grad_norm": 158.1446075439453,
14
- "learning_rate": 9.993861264579497e-05,
15
- "loss": 100.2575,
16
- "step": 1
17
  },
18
  {
19
- "epoch": 0.0012275351477837237,
20
- "grad_norm": 105.63041687011719,
21
- "learning_rate": 9.987722529158994e-05,
22
- "loss": 95.2722,
23
- "step": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
- "logging_steps": 1,
27
- "max_steps": 1629,
28
  "num_input_tokens_seen": 0,
29
  "num_train_epochs": 1,
30
- "save_steps": 2,
31
  "stateful_callbacks": {
32
  "TrainerControl": {
33
  "args": {
@@ -40,8 +96,8 @@
40
  "attributes": {}
41
  }
42
  },
43
- "total_flos": 919791151165440.0,
44
- "train_batch_size": 8,
45
  "trial_name": null,
46
  "trial_params": null
47
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03836047336824137,
5
+ "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0038360473368241363,
13
+ "grad_norm": 44.11674880981445,
14
+ "learning_rate": 9.961638790854688e-05,
15
+ "loss": 78.9796,
16
+ "step": 50
17
  },
18
  {
19
+ "epoch": 0.0076720946736482725,
20
+ "grad_norm": 31.060550689697266,
21
+ "learning_rate": 9.923277581709376e-05,
22
+ "loss": 75.5083,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.011508142010472408,
27
+ "grad_norm": 53.42700958251953,
28
+ "learning_rate": 9.884916372564063e-05,
29
+ "loss": 74.4945,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.015344189347296545,
34
+ "grad_norm": 44.670753479003906,
35
+ "learning_rate": 9.846555163418752e-05,
36
+ "loss": 73.8958,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.019180236684120684,
41
+ "grad_norm": 44.99592971801758,
42
+ "learning_rate": 9.80819395427344e-05,
43
+ "loss": 73.1967,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.023016284020944817,
48
+ "grad_norm": 47.95292663574219,
49
+ "learning_rate": 9.769832745128127e-05,
50
+ "loss": 72.7175,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.026852331357768953,
55
+ "grad_norm": 15.567469596862793,
56
+ "learning_rate": 9.731471535982815e-05,
57
+ "loss": 72.0448,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 0.03068837869459309,
62
+ "grad_norm": 37.817440032958984,
63
+ "learning_rate": 9.693110326837502e-05,
64
+ "loss": 71.9744,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 0.03452442603141723,
69
+ "grad_norm": 32.989627838134766,
70
+ "learning_rate": 9.65474911769219e-05,
71
+ "loss": 71.4153,
72
+ "step": 450
73
+ },
74
+ {
75
+ "epoch": 0.03836047336824137,
76
+ "grad_norm": 44.315311431884766,
77
+ "learning_rate": 9.616387908546877e-05,
78
+ "loss": 71.042,
79
+ "step": 500
80
  }
81
  ],
82
+ "logging_steps": 50,
83
+ "max_steps": 13034,
84
  "num_input_tokens_seen": 0,
85
  "num_train_epochs": 1,
86
+ "save_steps": 500,
87
  "stateful_callbacks": {
88
  "TrainerControl": {
89
  "args": {
 
96
  "attributes": {}
97
  }
98
  },
99
+ "total_flos": 8.568157303923016e+17,
100
+ "train_batch_size": 2,
101
  "trial_name": null,
102
  "trial_params": null
103
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:064b240ea07b11fb2a55256aa70c4f515e16a1e7de5972e80b77b98e19219a68
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b0a8ed667ee8b35f22ca4883f52af3ea1273c54ad954652c4052132affac051
3
  size 5240