SystemAdmin123 commited on
Commit
6230b87
·
verified ·
1 Parent(s): cb610ac

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ca635966f2128b90695cdcf1b450ff9388c9812f95f690192973e5b7eefd3c9
3
  size 723674912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b999a352785b09d15860c1320662d8ec817d298e02812f21d68f01e900838dea
3
  size 723674912
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef2ab014f8101a1dbbd4564e14dff1cbf3c43dda56a1b19089771de3e5eb2e2f
3
  size 735625370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2899f09cd2c54fdf5ef96e88d4218eefc8c7ef1b0315b44f21f9a89576e1bb98
3
  size 735625370
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d497aa3968cd2f05db0d0e8c5e1be496a8a5348df0a825e18ed3fdbaa24257ad
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69a04a1208f7a0d6f51f37a136b5c2e55bf3f53b3d0fd57164c5b83ca47a2645
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dda0d87dad890add5a8f3995389ff6a597895845903171a363aa580fa07ac30
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:080a7e72d6be938a9418e60003db90412af8a61e6434f9e9f1b598cca861dbcd
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de656d8e54adb9fa6e0b2ddbe69d4325a775f7e1580ed51c58a759ad9c7520d4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3d114a75d37be476b865187eb2b3d29d9343b131614a08f42be0014f110ce6f
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9eaa87e4309aa1a00b739cd637a2ec444ea6c757388c653064a1906e4d8dfb2e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc5a0f78838743362c5d5378dff81ea2f7d0039da53a423f1759e861bc6b233
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca9a25c72339c898b564e0c464a3f6fc75bbeec408008928b7ed05533156b98c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb34b6c62864960ca7a1a2bf6005b33b4420cc8055506432b79e0fe18bca2cd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 25.0,
5
- "eval_steps": 200,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,9 +11,9 @@
11
  {
12
  "epoch": 0.125,
13
  "eval_loss": 2.5584230422973633,
14
- "eval_runtime": 4.9063,
15
- "eval_samples_per_second": 305.933,
16
- "eval_steps_per_second": 3.465,
17
  "step": 1
18
  },
19
  {
@@ -31,145 +31,19 @@
31
  "step": 20
32
  },
33
  {
34
- "epoch": 3.75,
35
- "grad_norm": 0.1630859375,
36
- "learning_rate": 0.00019458172417006347,
37
- "loss": 2.1785,
38
- "step": 30
39
- },
40
- {
41
- "epoch": 5.0,
42
- "grad_norm": 0.1376953125,
43
- "learning_rate": 0.0001879473751206489,
44
- "loss": 2.1359,
45
- "step": 40
46
- },
47
- {
48
- "epoch": 6.25,
49
- "grad_norm": 0.1376953125,
50
- "learning_rate": 0.00017891405093963938,
51
- "loss": 2.1125,
52
- "step": 50
53
- },
54
- {
55
- "epoch": 7.5,
56
- "grad_norm": 0.1279296875,
57
- "learning_rate": 0.00016772815716257412,
58
- "loss": 2.0939,
59
- "step": 60
60
- },
61
- {
62
- "epoch": 8.75,
63
- "grad_norm": 0.138671875,
64
- "learning_rate": 0.00015469481581224272,
65
- "loss": 2.0638,
66
- "step": 70
67
- },
68
- {
69
- "epoch": 10.0,
70
- "grad_norm": 0.140625,
71
- "learning_rate": 0.00014016954246529696,
72
- "loss": 2.0632,
73
- "step": 80
74
- },
75
- {
76
- "epoch": 11.25,
77
- "grad_norm": 0.1337890625,
78
- "learning_rate": 0.00012454854871407994,
79
- "loss": 2.055,
80
- "step": 90
81
- },
82
- {
83
- "epoch": 12.5,
84
- "grad_norm": 0.14453125,
85
- "learning_rate": 0.00010825793454723325,
86
- "loss": 2.0298,
87
- "step": 100
88
- },
89
- {
90
- "epoch": 13.75,
91
- "grad_norm": 0.142578125,
92
- "learning_rate": 9.174206545276677e-05,
93
- "loss": 2.0271,
94
- "step": 110
95
- },
96
- {
97
- "epoch": 15.0,
98
- "grad_norm": 0.1357421875,
99
- "learning_rate": 7.54514512859201e-05,
100
- "loss": 2.0168,
101
- "step": 120
102
- },
103
- {
104
- "epoch": 16.25,
105
- "grad_norm": 0.1318359375,
106
- "learning_rate": 5.983045753470308e-05,
107
- "loss": 2.0126,
108
- "step": 130
109
- },
110
- {
111
- "epoch": 17.5,
112
- "grad_norm": 0.1376953125,
113
- "learning_rate": 4.530518418775733e-05,
114
- "loss": 2.0188,
115
- "step": 140
116
- },
117
- {
118
- "epoch": 18.75,
119
- "grad_norm": 0.134765625,
120
- "learning_rate": 3.227184283742591e-05,
121
- "loss": 2.009,
122
- "step": 150
123
- },
124
- {
125
- "epoch": 20.0,
126
- "grad_norm": 0.1357421875,
127
- "learning_rate": 2.1085949060360654e-05,
128
- "loss": 2.0108,
129
- "step": 160
130
- },
131
- {
132
- "epoch": 21.25,
133
- "grad_norm": 0.130859375,
134
- "learning_rate": 1.2052624879351104e-05,
135
- "loss": 2.0101,
136
- "step": 170
137
- },
138
- {
139
- "epoch": 22.5,
140
- "grad_norm": 0.146484375,
141
- "learning_rate": 5.418275829936537e-06,
142
- "loss": 2.0168,
143
- "step": 180
144
- },
145
- {
146
- "epoch": 23.75,
147
- "grad_norm": 0.146484375,
148
- "learning_rate": 1.3638696597277679e-06,
149
- "loss": 2.0059,
150
- "step": 190
151
- },
152
- {
153
- "epoch": 25.0,
154
- "grad_norm": 0.1328125,
155
- "learning_rate": 0.0,
156
- "loss": 2.011,
157
- "step": 200
158
- },
159
- {
160
- "epoch": 25.0,
161
- "eval_loss": 2.0616559982299805,
162
- "eval_runtime": 4.9912,
163
- "eval_samples_per_second": 300.731,
164
- "eval_steps_per_second": 3.406,
165
- "step": 200
166
  }
167
  ],
168
  "logging_steps": 10,
169
  "max_steps": 200,
170
  "num_input_tokens_seen": 0,
171
  "num_train_epochs": 25,
172
- "save_steps": 200,
173
  "stateful_callbacks": {
174
  "TrainerControl": {
175
  "args": {
@@ -177,12 +51,12 @@
177
  "should_evaluate": false,
178
  "should_log": false,
179
  "should_save": true,
180
- "should_training_stop": true
181
  },
182
  "attributes": {}
183
  }
184
  },
185
- "total_flos": 7.113876738932736e+16,
186
  "train_batch_size": 23,
187
  "trial_name": null,
188
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.5,
5
+ "eval_steps": 20,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.125,
13
  "eval_loss": 2.5584230422973633,
14
+ "eval_runtime": 4.8815,
15
+ "eval_samples_per_second": 307.488,
16
+ "eval_steps_per_second": 3.483,
17
  "step": 1
18
  },
19
  {
 
31
  "step": 20
32
  },
33
  {
34
+ "epoch": 2.5,
35
+ "eval_loss": 2.1561896800994873,
36
+ "eval_runtime": 4.8534,
37
+ "eval_samples_per_second": 309.27,
38
+ "eval_steps_per_second": 3.503,
39
+ "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
  ],
42
  "logging_steps": 10,
43
  "max_steps": 200,
44
  "num_input_tokens_seen": 0,
45
  "num_train_epochs": 25,
46
+ "save_steps": 20,
47
  "stateful_callbacks": {
48
  "TrainerControl": {
49
  "args": {
 
51
  "should_evaluate": false,
52
  "should_log": false,
53
  "should_save": true,
54
+ "should_training_stop": false
55
  },
56
  "attributes": {}
57
  }
58
  },
59
+ "total_flos": 7113876738932736.0,
60
  "train_batch_size": 23,
61
  "trial_name": null,
62
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3156bde561d7a483929e0f1d8c097a973dfeb26f4690b823508131f70e6df615
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d4c566c5121c40bd8158f38c2b438572e5e9a4a274e5b8c6fc9ce3ebb93d224
3
  size 6840