FormlessAI commited on
Commit
37ad9d6
·
verified ·
1 Parent(s): 0ff152e

Training in progress, step 120, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92c3ca36e9dd4d11ab85588a56c6fb7b33a51db0bfd3d4eb3ee825accee6a828
3
  size 80014016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e07bb356c3c42e5cdc07d10d2311f6b5d583b9c0243d40576869a8a8468b6d98
3
  size 80014016
last-checkpoint/global_step120/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66dc06d93f59c002bd6980986bbd53dab716b8588332f09e290885a539e536f3
3
+ size 60296272
last-checkpoint/global_step120/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:274b6848025365e3dda92951d0ca3e5d15f264a98269a8c215418b1e6630f50c
3
+ size 60296400
last-checkpoint/global_step120/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e92415e1541c796ef60a91fd9fc6f046a75c96bd56685c357cc821aba8e9ad88
3
+ size 60296400
last-checkpoint/global_step120/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42e7578deced32b54388e031ab71404067972e3c7e6413672d16489b6bb4b9cb
3
+ size 60296400
last-checkpoint/global_step120/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0051d479ecabaf7850bc6a78ce63326f1ec673808c117ceaae6b5af296aa15b8
3
+ size 80296428
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step80
 
1
+ global_step120
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5140c0994a4009e2bd1bdabfcc4d1ef6e2398fa905a081db9641fd12a8aef58e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca446724d1ebce77ccae7f35340aabb59e5bca2ba79e103237fd5f8b74b534ac
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c9018248062ac40731f8f160cde9bcf648a19dc4d6c04ac8c07a5ac8dbc63ab
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72284f3bf1ea5778c38096c11524890c08f7bd83e9ef49537cb678a37a80dc25
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7138d4e5f63e17a11068cd327d0123256938b5c0ac9100060f799e1689debf24
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a8554acf33597a260ef009c9e86cc0718a7640fdf53a7e753811647743bf70a
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03cc86f7684c1db139e3ec7994865b6ab0ab40f5a57ca5af414e11cc912dd03b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b0f0993e706c9ca5d8f708ef0f01e07d0edb3dd3e3db326b06ad6f26f3cf62c
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9c74ea4c289ec2b8f9336a171496a3ab2f72be98f2c877ad703552d865f9922
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31d2e54503626585fe3bd1382687ba707fdc17dcc88507487c697e511203f899
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 80,
3
- "best_metric": 1.1793321371078491,
4
- "best_model_checkpoint": "miner_id_24/checkpoint-80",
5
- "epoch": 0.6584362139917695,
6
  "eval_steps": 20,
7
- "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -160,6 +160,78 @@
160
  "eval_samples_per_second": 14.47,
161
  "eval_steps_per_second": 0.907,
162
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  }
164
  ],
165
  "logging_steps": 5,
@@ -188,7 +260,7 @@
188
  "attributes": {}
189
  }
190
  },
191
- "total_flos": 1.6728695582658396e+18,
192
  "train_batch_size": 4,
193
  "trial_name": null,
194
  "trial_params": null
 
1
  {
2
+ "best_global_step": 120,
3
+ "best_metric": 1.1266472339630127,
4
+ "best_model_checkpoint": "miner_id_24/checkpoint-120",
5
+ "epoch": 0.9876543209876543,
6
  "eval_steps": 20,
7
+ "global_step": 120,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
160
  "eval_samples_per_second": 14.47,
161
  "eval_steps_per_second": 0.907,
162
  "step": 80
163
+ },
164
+ {
165
+ "epoch": 0.6995884773662552,
166
+ "grad_norm": 0.06558868288993835,
167
+ "learning_rate": 0.00011200000000000001,
168
+ "loss": 1.1719,
169
+ "step": 85
170
+ },
171
+ {
172
+ "epoch": 0.7407407407407407,
173
+ "grad_norm": 0.06846272945404053,
174
+ "learning_rate": 0.00011866666666666669,
175
+ "loss": 1.1555,
176
+ "step": 90
177
+ },
178
+ {
179
+ "epoch": 0.7818930041152263,
180
+ "grad_norm": 0.07038144767284393,
181
+ "learning_rate": 0.00012533333333333334,
182
+ "loss": 1.1683,
183
+ "step": 95
184
+ },
185
+ {
186
+ "epoch": 0.823045267489712,
187
+ "grad_norm": 0.07254977524280548,
188
+ "learning_rate": 0.000132,
189
+ "loss": 1.1592,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 0.823045267489712,
194
+ "eval_loss": 1.148273229598999,
195
+ "eval_runtime": 274.3966,
196
+ "eval_samples_per_second": 14.413,
197
+ "eval_steps_per_second": 0.904,
198
+ "step": 100
199
+ },
200
+ {
201
+ "epoch": 0.8641975308641975,
202
+ "grad_norm": 0.07729992270469666,
203
+ "learning_rate": 0.00013866666666666669,
204
+ "loss": 1.1482,
205
+ "step": 105
206
+ },
207
+ {
208
+ "epoch": 0.9053497942386831,
209
+ "grad_norm": 0.08038458973169327,
210
+ "learning_rate": 0.00014533333333333333,
211
+ "loss": 1.1458,
212
+ "step": 110
213
+ },
214
+ {
215
+ "epoch": 0.9465020576131687,
216
+ "grad_norm": 0.08932825922966003,
217
+ "learning_rate": 0.000152,
218
+ "loss": 1.1384,
219
+ "step": 115
220
+ },
221
+ {
222
+ "epoch": 0.9876543209876543,
223
+ "grad_norm": 0.08660374581813812,
224
+ "learning_rate": 0.00015866666666666668,
225
+ "loss": 1.129,
226
+ "step": 120
227
+ },
228
+ {
229
+ "epoch": 0.9876543209876543,
230
+ "eval_loss": 1.1266472339630127,
231
+ "eval_runtime": 272.2952,
232
+ "eval_samples_per_second": 14.525,
233
+ "eval_steps_per_second": 0.911,
234
+ "step": 120
235
  }
236
  ],
237
  "logging_steps": 5,
 
260
  "attributes": {}
261
  }
262
  },
263
+ "total_flos": 2.5093043373987594e+18,
264
  "train_batch_size": 4,
265
  "trial_name": null,
266
  "trial_params": null