ljcamargo commited on
Commit
ea7ccf1
·
verified ·
1 Parent(s): 742a48b

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccd1607970824f8435dff0ec490e2197f145ee38b543dfac0f375baae91f3e84
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12de36f02475ba36424b6cbbc78a99fb5d247b1f59b0671ec136b90196dbc42e
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05aa8a7bf492a3ad20e0d6edfff8c0717a4f0bbc1219a587cbf095503cf2d00e
3
- size 2479122661
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71a973f442004b75157ae01481531805c844e77a68190e59a5218c09d8d6df94
3
+ size 2479123301
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09b7290d8ca07041eee19d5fa227aba688d13ea17f7d6f3c0e4a7903d483d295
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc1a0da602f8abf4bf342932694d528cc1f0baa4d5027de58ad34f4d9855d085
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19ed95effe7569c75627b601fa080ba53727e518015156dc63042342eab93ca8
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38dd85de4e747e5477e492c54af5b212cebc40d19045c2dfc5361392de0ed8a7
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08,
6
  "eval_steps": 500,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -148,6 +148,76 @@
148
  "learning_rate": 4.617234468937876e-05,
149
  "loss": 1.3504,
150
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  }
152
  ],
153
  "logging_steps": 10,
@@ -167,7 +237,7 @@
167
  "attributes": {}
168
  }
169
  },
170
- "total_flos": 3595273633732608.0,
171
  "train_batch_size": 2,
172
  "trial_name": null,
173
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.12,
6
  "eval_steps": 500,
7
+ "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
148
  "learning_rate": 4.617234468937876e-05,
149
  "loss": 1.3504,
150
  "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.084,
154
+ "grad_norm": 33.88198471069336,
155
+ "learning_rate": 4.5971943887775554e-05,
156
+ "loss": 1.5029,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.088,
161
+ "grad_norm": 24.533716201782227,
162
+ "learning_rate": 4.5771543086172346e-05,
163
+ "loss": 1.2978,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.092,
168
+ "grad_norm": 27.563339233398438,
169
+ "learning_rate": 4.557114228456914e-05,
170
+ "loss": 1.7014,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.096,
175
+ "grad_norm": 29.428752899169922,
176
+ "learning_rate": 4.5370741482965936e-05,
177
+ "loss": 1.3845,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.1,
182
+ "grad_norm": 20.272520065307617,
183
+ "learning_rate": 4.517034068136273e-05,
184
+ "loss": 1.2192,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.104,
189
+ "grad_norm": 47.12469482421875,
190
+ "learning_rate": 4.496993987975952e-05,
191
+ "loss": 1.2814,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.108,
196
+ "grad_norm": 18.20330238342285,
197
+ "learning_rate": 4.476953907815631e-05,
198
+ "loss": 1.2717,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 0.112,
203
+ "grad_norm": 39.07451248168945,
204
+ "learning_rate": 4.456913827655311e-05,
205
+ "loss": 1.3291,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 0.116,
210
+ "grad_norm": 50.37272644042969,
211
+ "learning_rate": 4.43687374749499e-05,
212
+ "loss": 1.3691,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 0.12,
217
+ "grad_norm": 23.233367919921875,
218
+ "learning_rate": 4.4168336673346694e-05,
219
+ "loss": 1.4183,
220
+ "step": 300
221
  }
222
  ],
223
  "logging_steps": 10,
 
237
  "attributes": {}
238
  }
239
  },
240
+ "total_flos": 5404828786993152.0,
241
  "train_batch_size": 2,
242
  "trial_name": null,
243
  "trial_params": null