FormlessAI commited on
Commit
69a8fbd
·
verified ·
1 Parent(s): 5783377

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24951095bad417fe4d5f10cb313dac59dfcfbd07da754ff9cd607089f1464f20
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e3262f321176d26c6fa474fc73619945ffe32c1298cb3cb4c93f063b4f7e1cf
3
  size 98088784
last-checkpoint/global_step200/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ec8e59fbf3708d2906da33455599161e0778f4619d48bb6157cedda10deab60
3
+ size 73939813
last-checkpoint/global_step200/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ab01ffc6b882076e872ab72450ac68242a1066b012c62a53f3ed6b1ee60adc
3
+ size 73939813
last-checkpoint/global_step200/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54276ca1dff9468478cfda5eb668a4aef50344168b9dc23d6fb0362f4e7d8ee6
3
+ size 73939877
last-checkpoint/global_step200/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f05419254fc2f3c82d8daf23ce7e4fe0bd7fee03230281b099cb94b368801a0c
3
+ size 73939877
last-checkpoint/global_step200/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7047707b41a4769d6c0db180802b6e4117b608ca31cdf43846bd53d4cf1fc7d8
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step150
 
1
+ global_step200
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71943a7534a603fc1073a728ef828fea525cdf5f25bf2fd0efc0240e6f46ebfc
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe71a984f3dec9f68d5e0c8d287b709dc94679e36c2f73fcd4bb577f118dc56b
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63b213587c483eed36cb75309049fa83568935186952fbec8cb20c41657ce583
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073d3d50c553c506414b06b3236a0e47ab676db77072aa4ada6dbd9ccae3efe8
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a070f5332db568fae86334a08fc1d346399db4a28d7560e846aae021f5cf8fe0
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4448dc49072e0161b26ca29cf232331ae40e44f83c87e97c7b098177ab3622b
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13ce84bc4f574a8a3540d6a08d280ebfeea47cd3c1b20a8bcc24e2a3baed0176
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bad727f18a1ed5b11da2bd5fee112f9da1775401ade913bb7443084b78e34672
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51b8ced64010d7c63b9599d6610f10182005fca711e469c3a69c02f6738b2037
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:267a8256d64aed80d8aed25b49adf5bf4d92086abb3964c902d2e45f4a76d74e
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.7854999899864197,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.14696876913655849,
6
  "eval_steps": 50,
7
- "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -242,6 +242,84 @@
242
  "eval_samples_per_second": 130.904,
243
  "eval_steps_per_second": 16.371,
244
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
  }
246
  ],
247
  "logging_steps": 5,
@@ -270,7 +348,7 @@
270
  "attributes": {}
271
  }
272
  },
273
- "total_flos": 7.859423388316467e+16,
274
  "train_batch_size": 2,
275
  "trial_name": null,
276
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.7729406356811523,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.19595835884874463,
6
  "eval_steps": 50,
7
+ "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
242
  "eval_samples_per_second": 130.904,
243
  "eval_steps_per_second": 16.371,
244
  "step": 150
245
+ },
246
+ {
247
+ "epoch": 0.1518677281077771,
248
+ "grad_norm": 0.2067604809999466,
249
+ "learning_rate": 0.00011076407880230177,
250
+ "loss": 0.7802,
251
+ "step": 155
252
+ },
253
+ {
254
+ "epoch": 0.15676668707899571,
255
+ "grad_norm": 0.23229490220546722,
256
+ "learning_rate": 0.00011075764704038947,
257
+ "loss": 0.7935,
258
+ "step": 160
259
+ },
260
+ {
261
+ "epoch": 0.16166564605021433,
262
+ "grad_norm": 0.26088929176330566,
263
+ "learning_rate": 0.00011075064637901441,
264
+ "loss": 0.7996,
265
+ "step": 165
266
+ },
267
+ {
268
+ "epoch": 0.16656460502143294,
269
+ "grad_norm": 0.23547124862670898,
270
+ "learning_rate": 0.00011074307689011918,
271
+ "loss": 0.8011,
272
+ "step": 170
273
+ },
274
+ {
275
+ "epoch": 0.17146356399265156,
276
+ "grad_norm": 0.24739272892475128,
277
+ "learning_rate": 0.00011073493865149204,
278
+ "loss": 0.7854,
279
+ "step": 175
280
+ },
281
+ {
282
+ "epoch": 0.17636252296387017,
283
+ "grad_norm": 0.222539022564888,
284
+ "learning_rate": 0.00011072623174676594,
285
+ "loss": 0.7962,
286
+ "step": 180
287
+ },
288
+ {
289
+ "epoch": 0.1812614819350888,
290
+ "grad_norm": 0.2204458862543106,
291
+ "learning_rate": 0.00011071695626541782,
292
+ "loss": 0.7958,
293
+ "step": 185
294
+ },
295
+ {
296
+ "epoch": 0.1861604409063074,
297
+ "grad_norm": 0.2629190683364868,
298
+ "learning_rate": 0.00011070711230276759,
299
+ "loss": 0.7779,
300
+ "step": 190
301
+ },
302
+ {
303
+ "epoch": 0.19105939987752601,
304
+ "grad_norm": 0.22338400781154633,
305
+ "learning_rate": 0.00011069669995997721,
306
+ "loss": 0.7972,
307
+ "step": 195
308
+ },
309
+ {
310
+ "epoch": 0.19595835884874463,
311
+ "grad_norm": 0.20890545845031738,
312
+ "learning_rate": 0.00011068571934404962,
313
+ "loss": 0.7769,
314
+ "step": 200
315
+ },
316
+ {
317
+ "epoch": 0.19595835884874463,
318
+ "eval_loss": 0.7729406356811523,
319
+ "eval_runtime": 15.0332,
320
+ "eval_samples_per_second": 130.311,
321
+ "eval_steps_per_second": 16.297,
322
+ "step": 200
323
  }
324
  ],
325
  "logging_steps": 5,
 
348
  "attributes": {}
349
  }
350
  },
351
+ "total_flos": 1.0404528961408205e+17,
352
  "train_batch_size": 2,
353
  "trial_name": null,
354
  "trial_params": null