Kajlid commited on
Commit
5d92467
·
verified ·
1 Parent(s): 3fb0549

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:973f5003702306b371cda780db67d7e038379c8b244d67c35ce938c8ab0bf96b
3
  size 262219392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6622ac26fe235a79bb8b295cc951d493bb49eff64de84ffd09c19e199f2e0b46
3
  size 262219392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4b96ffd460a0e1c3ba4e20bc59a3c67d71b5803fdaf98befbfd7b2a4a266160
3
  size 133778341
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcdaff135da8696151ceb5a5ee918bf9e78a07f05bdf03cba9ba121111ce1fc8
3
  size 133778341
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fee0874fa9afae54661807fadac685c3d3f843473b6af99cc43d812ec6e1b36
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20ab57b9b26fc7cb4418a4e1198e25ebb1da623aea7693e1fc71ff284d45724b
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07fd6c02ecd99f61cbdc485d41ab67693d370b3b850c6938a1f7e3f349931355
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f5bbb5b4ba44d34a455960920ef8eaf75574205648b376b9b795cb9f0b32ae
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0016,
6
  "eval_steps": 500,
7
- "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -148,6 +148,76 @@
148
  "learning_rate": 8.800000000000001e-05,
149
  "loss": 0.7709,
150
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  }
152
  ],
153
  "logging_steps": 1,
@@ -162,12 +232,12 @@
162
  "should_evaluate": false,
163
  "should_log": false,
164
  "should_save": true,
165
- "should_training_stop": false
166
  },
167
  "attributes": {}
168
  }
169
  },
170
- "total_flos": 9759882376581120.0,
171
  "train_batch_size": 2,
172
  "trial_name": null,
173
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0024,
6
  "eval_steps": 500,
7
+ "global_step": 30,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
148
  "learning_rate": 8.800000000000001e-05,
149
  "loss": 0.7709,
150
  "step": 20
151
+ },
152
+ {
153
+ "epoch": 0.00168,
154
+ "grad_norm": 0.07268717885017395,
155
+ "learning_rate": 8e-05,
156
+ "loss": 0.7329,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 0.00176,
161
+ "grad_norm": 0.07336169481277466,
162
+ "learning_rate": 7.2e-05,
163
+ "loss": 0.5357,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 0.00184,
168
+ "grad_norm": 0.15232133865356445,
169
+ "learning_rate": 6.400000000000001e-05,
170
+ "loss": 0.9113,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 0.00192,
175
+ "grad_norm": 0.18304885923862457,
176
+ "learning_rate": 5.6000000000000006e-05,
177
+ "loss": 0.7212,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 0.002,
182
+ "grad_norm": 0.11778385192155838,
183
+ "learning_rate": 4.8e-05,
184
+ "loss": 0.5004,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 0.00208,
189
+ "grad_norm": 0.08693696558475494,
190
+ "learning_rate": 4e-05,
191
+ "loss": 0.5905,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 0.00216,
196
+ "grad_norm": 0.10036703199148178,
197
+ "learning_rate": 3.2000000000000005e-05,
198
+ "loss": 0.659,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 0.00224,
203
+ "grad_norm": 0.16843527555465698,
204
+ "learning_rate": 2.4e-05,
205
+ "loss": 0.6575,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 0.00232,
210
+ "grad_norm": 0.1020098477602005,
211
+ "learning_rate": 1.6000000000000003e-05,
212
+ "loss": 0.6988,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 0.0024,
217
+ "grad_norm": 0.07744976878166199,
218
+ "learning_rate": 8.000000000000001e-06,
219
+ "loss": 0.7703,
220
+ "step": 30
221
  }
222
  ],
223
  "logging_steps": 1,
 
232
  "should_evaluate": false,
233
  "should_log": false,
234
  "should_save": true,
235
+ "should_training_stop": true
236
  },
237
  "attributes": {}
238
  }
239
  },
240
+ "total_flos": 1.4619103575552e+16,
241
  "train_batch_size": 2,
242
  "trial_name": null,
243
  "trial_params": null