PhoenixB commited on
Commit
40c3793
·
verified ·
1 Parent(s): dc622de

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c503048216b577a639c3e7199d6c8e0dc5320af80cf66353a78052840963a44
3
  size 83946192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef31bd5a132dc63059267ab5a04a10fe9f1676b33345214450050e8f34f3ec8
3
  size 83946192
last-checkpoint/global_step150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1893317a4c5a150ef714767ec2bb7a323f7b7ce3e4e52a45098e4fbd40d3ff7
3
+ size 251686096
last-checkpoint/global_step150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e84ad5ab5d56709c933eb3e76fda7609114ea7257d8c79662462699ccc15306
3
+ size 251686224
last-checkpoint/global_step150/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c43ceb50d2abb1069848965147a144b507ea964f7f1eecee6eb98362e89af5ed
3
+ size 84231276
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step140
 
1
+ global_step150
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26e86d514dddce0b14f45dd634a478c13db09aafb21ec02229544713a63bc1e7
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34572b4ea0c45cec898917df37b81bb05851f4de5cc1bcbafb774bab1bd5668
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2e5617e3007c4a85897d5bc30efdcdd93ae16461f578795dfc04877a4238ff6
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bad1eb070839a232fe7250717e387d5d3120d7edacea87879c84fecc74267f8
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d9ec9defbf9d184fe9bf33fb5021004937dc6216d5af83e76aa562e2a036d91
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0edab479cf5df2fd0e0eb08833b9040a0342b7b3b1ce5f746c88e4c78156c68
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.2121437788009644,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-140",
4
- "epoch": 0.021387923461788184,
5
  "eval_steps": 10,
6
- "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -323,6 +323,28 @@
323
  "eval_samples_per_second": 7.595,
324
  "eval_steps_per_second": 1.9,
325
  "step": 140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  }
327
  ],
328
  "logging_steps": 5,
@@ -337,7 +359,7 @@
337
  "early_stopping_threshold": 0.0
338
  },
339
  "attributes": {
340
- "early_stopping_patience_counter": 0
341
  }
342
  },
343
  "TrainerControl": {
@@ -346,12 +368,12 @@
346
  "should_evaluate": false,
347
  "should_log": false,
348
  "should_save": true,
349
- "should_training_stop": false
350
  },
351
  "attributes": {}
352
  }
353
  },
354
- "total_flos": 8.309137876524728e+17,
355
  "train_batch_size": 2,
356
  "trial_name": null,
357
  "trial_params": null
 
1
  {
2
  "best_metric": 1.2121437788009644,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-140",
4
+ "epoch": 0.02291563228048734,
5
  "eval_steps": 10,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
323
  "eval_samples_per_second": 7.595,
324
  "eval_steps_per_second": 1.9,
325
  "step": 140
326
+ },
327
+ {
328
+ "epoch": 0.02215177787113776,
329
+ "grad_norm": 0.24214355647563934,
330
+ "learning_rate": 3.6455629509730136e-07,
331
+ "loss": 1.3213,
332
+ "step": 145
333
+ },
334
+ {
335
+ "epoch": 0.02291563228048734,
336
+ "grad_norm": 0.40569353103637695,
337
+ "learning_rate": 0.0,
338
+ "loss": 1.0569,
339
+ "step": 150
340
+ },
341
+ {
342
+ "epoch": 0.02291563228048734,
343
+ "eval_loss": 1.2136216163635254,
344
+ "eval_runtime": 722.8743,
345
+ "eval_samples_per_second": 7.626,
346
+ "eval_steps_per_second": 1.908,
347
+ "step": 150
348
  }
349
  ],
350
  "logging_steps": 5,
 
359
  "early_stopping_threshold": 0.0
360
  },
361
  "attributes": {
362
+ "early_stopping_patience_counter": 1
363
  }
364
  },
365
  "TrainerControl": {
 
368
  "should_evaluate": false,
369
  "should_log": false,
370
  "should_save": true,
371
+ "should_training_stop": true
372
  },
373
  "attributes": {}
374
  }
375
  },
376
+ "total_flos": 8.902647724847923e+17,
377
  "train_batch_size": 2,
378
  "trial_name": null,
379
  "trial_params": null