alicegoesdown commited on
Commit
5ea5cf3
·
verified ·
1 Parent(s): 6e04ffd

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/lora_lower/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1843627ca7f5c8892cc27ec365b7b71475f9c7c28bf3db1528f68975cab934af
3
  size 2058899176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:462b6bff6479bc4430adab26dfb4c275946fb47f94c48e49edfad477094d0422
3
  size 2058899176
last-checkpoint/lora_top/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a788bb0ebd30119e21444b5ca652ef18fb146fe392e34129c932ff9be592f7a
3
  size 2058359328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4dcaaf734b9b1a56cb2294cf0fa8500e082ee74b2b20b5cd2c67e1122555870
3
  size 2058359328
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:839291e7974e74a87e53a123a3881b8e276d083b1f2193ef431e4dc80bb107c3
3
  size 2061522259
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c82dcc7cfa9a457a5aa17056d6b326ba023342eb225aaf0898e6e54c2bb6077
3
  size 2061522259
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5925e82888e152f6444154d23c1b4f62c064f08b33b27aebad540c586011183c
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99714ae94e1721c655a4d2e5fffbbc6ed7e1f5ed893f7bf8f89ada975f3ed81f
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6f51347b38751994e31f402f6d1cbfdce41c21e3b2e0fd15f1fdf02faa3c7d5
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f41cbca3e8e87d3857cac1912cf18c05169bb171e7530cf76d23b482cbc432c
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 4.046905040740967,
3
- "best_model_checkpoint": "./output/checkpoint-150",
4
- "epoch": 0.02449779519843214,
5
  "eval_steps": 150,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -120,6 +120,119 @@
120
  "eval_samples_per_second": 39.533,
121
  "eval_steps_per_second": 39.533,
122
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  }
124
  ],
125
  "logging_steps": 10,
@@ -139,7 +252,7 @@
139
  "attributes": {}
140
  }
141
  },
142
- "total_flos": 7269696958758912.0,
143
  "train_batch_size": 16,
144
  "trial_name": null,
145
  "trial_params": null
 
1
  {
2
+ "best_metric": 3.867088794708252,
3
+ "best_model_checkpoint": "./output/checkpoint-300",
4
+ "epoch": 0.04899559039686428,
5
  "eval_steps": 150,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
120
  "eval_samples_per_second": 39.533,
121
  "eval_steps_per_second": 39.533,
122
  "step": 150
123
+ },
124
+ {
125
+ "epoch": 0.026130981544994283,
126
+ "grad_norm": 39.35482406616211,
127
+ "learning_rate": 2.1536377422415337e-05,
128
+ "loss": 3.9314,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 0.027764167891556425,
133
+ "grad_norm": 49.44642639160156,
134
+ "learning_rate": 2.1533500038449112e-05,
135
+ "loss": 3.9469,
136
+ "step": 170
137
+ },
138
+ {
139
+ "epoch": 0.02939735423811857,
140
+ "grad_norm": 47.13311004638672,
141
+ "learning_rate": 2.1530180298496075e-05,
142
+ "loss": 3.8316,
143
+ "step": 180
144
+ },
145
+ {
146
+ "epoch": 0.031030540584680712,
147
+ "grad_norm": 45.86495590209961,
148
+ "learning_rate": 2.1526418339017734e-05,
149
+ "loss": 3.9398,
150
+ "step": 190
151
+ },
152
+ {
153
+ "epoch": 0.03266372693124286,
154
+ "grad_norm": 42.1937141418457,
155
+ "learning_rate": 2.152221431465351e-05,
156
+ "loss": 3.8032,
157
+ "step": 200
158
+ },
159
+ {
160
+ "epoch": 0.034296913277805,
161
+ "grad_norm": 43.07133865356445,
162
+ "learning_rate": 2.1517568398214374e-05,
163
+ "loss": 3.823,
164
+ "step": 210
165
+ },
166
+ {
167
+ "epoch": 0.03593009962436714,
168
+ "grad_norm": 52.06683349609375,
169
+ "learning_rate": 2.1512480780675756e-05,
170
+ "loss": 3.6545,
171
+ "step": 220
172
+ },
173
+ {
174
+ "epoch": 0.03756328597092928,
175
+ "grad_norm": 43.83436965942383,
176
+ "learning_rate": 2.150695167116969e-05,
177
+ "loss": 3.8256,
178
+ "step": 230
179
+ },
180
+ {
181
+ "epoch": 0.039196472317491425,
182
+ "grad_norm": 46.70238494873047,
183
+ "learning_rate": 2.1500981296976207e-05,
184
+ "loss": 3.8661,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 0.04082965866405357,
189
+ "grad_norm": 47.87880325317383,
190
+ "learning_rate": 2.1494569903514006e-05,
191
+ "loss": 3.7335,
192
+ "step": 250
193
+ },
194
+ {
195
+ "epoch": 0.04246284501061571,
196
+ "grad_norm": 42.191898345947266,
197
+ "learning_rate": 2.1487717754330366e-05,
198
+ "loss": 3.7399,
199
+ "step": 260
200
+ },
201
+ {
202
+ "epoch": 0.04409603135717785,
203
+ "grad_norm": 41.03351974487305,
204
+ "learning_rate": 2.1480425131090295e-05,
205
+ "loss": 3.703,
206
+ "step": 270
207
+ },
208
+ {
209
+ "epoch": 0.04572921770374,
210
+ "grad_norm": 41.71271896362305,
211
+ "learning_rate": 2.1472692333564976e-05,
212
+ "loss": 3.6198,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 0.04736240405030214,
217
+ "grad_norm": 40.65316390991211,
218
+ "learning_rate": 2.1464519679619426e-05,
219
+ "loss": 3.6979,
220
+ "step": 290
221
+ },
222
+ {
223
+ "epoch": 0.04899559039686428,
224
+ "grad_norm": 37.19804382324219,
225
+ "learning_rate": 2.1455907505199437e-05,
226
+ "loss": 3.638,
227
+ "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.04899559039686428,
231
+ "eval_loss": 3.867088794708252,
232
+ "eval_runtime": 17.9999,
233
+ "eval_samples_per_second": 27.778,
234
+ "eval_steps_per_second": 27.778,
235
+ "step": 300
236
  }
237
  ],
238
  "logging_steps": 10,
 
252
  "attributes": {}
253
  }
254
  },
255
+ "total_flos": 1.4443567455141888e+16,
256
  "train_batch_size": 16,
257
  "trial_name": null,
258
  "trial_params": null