brian-todd commited on
Commit
32702df
·
1 Parent(s): d577c26

Training in progress, step 170

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f17325da574bc4de45d40e009daa3f7adc44be55823cfbef4b8be9afdcd36663
3
  size 522284877
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b731b33a663df9b5d325ea24811ab15b363e54445aff4ec4bd6fe507e6467d7
3
  size 522284877
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f17325da574bc4de45d40e009daa3f7adc44be55823cfbef4b8be9afdcd36663
3
  size 522284877
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b731b33a663df9b5d325ea24811ab15b363e54445aff4ec4bd6fe507e6467d7
3
  size 522284877
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46a23fcd6874a1853674994bee58a2a786096d6a41cbad082a14e2a21191dc30
3
  size 1044539653
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51b0f686c1f2bee0e68037a4cca40921cdb05f7528cabced5713e8ae0cb43297
3
  size 1044539653
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c787baf8f99d6b2efc301f4aff54d4957ec120513fe6d10f749a942c07bf7140
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb77c5432001f75ca5c8c8d2df598ea67c727a71f0a1359228917568a1d0916
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e87ab8212c9c1b5c9ac2af513be72fc40777cbdb25e7df360e56de5cd75aab0
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3841b7d3094f97af299f97dddaa8fe1e5f1142ed0601dee132ea7f776f6ffff
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 40.0,
5
- "global_step": 160,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -102,11 +102,17 @@
102
  "learning_rate": 0.0002,
103
  "loss": 0.009,
104
  "step": 160
 
 
 
 
 
 
105
  }
106
  ],
107
  "max_steps": 200,
108
  "num_train_epochs": 50,
109
- "total_flos": 5.23073568374784e+16,
110
  "trial_name": null,
111
  "trial_params": null
112
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 42.5,
5
+ "global_step": 170,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
102
  "learning_rate": 0.0002,
103
  "loss": 0.009,
104
  "step": 160
105
+ },
106
+ {
107
+ "epoch": 42.5,
108
+ "learning_rate": 0.0002,
109
+ "loss": 0.0088,
110
+ "step": 170
111
  }
112
  ],
113
  "max_steps": 200,
114
  "num_train_epochs": 50,
115
+ "total_flos": 5.558694508363776e+16,
116
  "trial_name": null,
117
  "trial_params": null
118
  }