Dohahemdann commited on
Commit
67f8137
·
verified ·
1 Parent(s): 30357c6

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5fb75a4a36553b73994dbd2353c33c1160b78c14db20d5475338392d9c4157d
3
  size 75579856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0120a8149bd4c9c80c434ee1d50d17d11b5ff6a9a540930d8477b8330665d7c6
3
  size 75579856
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f97ab80c0e9ed284bc1431c9250044bcd13f5ab7aee843eb60252e95c35c049
3
  size 39015574
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf508cce684f141d23779498e2107e05fe761d23d480c911d36daef0444c4a1
3
  size 39015574
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b432237b00d1d90d6724248db250f29e52844d78501ed7ee56e7727e8a88c36f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:923d278f6956e6477248046106bcc976164d13b97aca384b8661e5bb580c9f50
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a625473bb0a513e1727ebc85c0930216deed7d5ec1e4fd299b7895bbd369bcc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ccbb7e9b2bee49a3a83a2cbe68b9ea55fce3c23d9b7073b07177a5b6c6d109
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.01383365035448729,
6
  "eval_steps": 500,
7
- "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -148,6 +148,76 @@
148
  "learning_rate": 2.6027397260273973e-05,
149
  "loss": 2.5836,
150
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  }
152
  ],
153
  "logging_steps": 1,
@@ -167,7 +237,7 @@
167
  "attributes": {}
168
  }
169
  },
170
- "total_flos": 673991251476480.0,
171
  "train_batch_size": 2,
172
  "trial_name": null,
173
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.020750475531730934,
6
  "eval_steps": 500,
7
+ "global_step": 30,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
148
  "learning_rate": 2.6027397260273973e-05,
149
  "loss": 2.5836,
150
  "step": 20
151
+ },
152
+ {
153
+ "epoch": 0.014525332872211656,
154
+ "grad_norm": 0.2680312991142273,
155
+ "learning_rate": 2.7397260273972603e-05,
156
+ "loss": 2.748,
157
+ "step": 21
158
+ },
159
+ {
160
+ "epoch": 0.01521701538993602,
161
+ "grad_norm": 0.2818455398082733,
162
+ "learning_rate": 2.8767123287671234e-05,
163
+ "loss": 2.5701,
164
+ "step": 22
165
+ },
166
+ {
167
+ "epoch": 0.015908697907660384,
168
+ "grad_norm": 0.2324114739894867,
169
+ "learning_rate": 3.0136986301369862e-05,
170
+ "loss": 1.8337,
171
+ "step": 23
172
+ },
173
+ {
174
+ "epoch": 0.01660038042538475,
175
+ "grad_norm": 0.2770012617111206,
176
+ "learning_rate": 3.1506849315068496e-05,
177
+ "loss": 2.7497,
178
+ "step": 24
179
+ },
180
+ {
181
+ "epoch": 0.017292062943109112,
182
+ "grad_norm": 0.27101394534111023,
183
+ "learning_rate": 3.287671232876712e-05,
184
+ "loss": 2.8125,
185
+ "step": 25
186
+ },
187
+ {
188
+ "epoch": 0.017983745460833478,
189
+ "grad_norm": 0.27373552322387695,
190
+ "learning_rate": 3.424657534246575e-05,
191
+ "loss": 2.7676,
192
+ "step": 26
193
+ },
194
+ {
195
+ "epoch": 0.01867542797855784,
196
+ "grad_norm": 0.2803725302219391,
197
+ "learning_rate": 3.561643835616438e-05,
198
+ "loss": 2.2638,
199
+ "step": 27
200
+ },
201
+ {
202
+ "epoch": 0.019367110496282206,
203
+ "grad_norm": 0.25683945417404175,
204
+ "learning_rate": 3.698630136986301e-05,
205
+ "loss": 2.8515,
206
+ "step": 28
207
+ },
208
+ {
209
+ "epoch": 0.020058793014006572,
210
+ "grad_norm": 0.32016420364379883,
211
+ "learning_rate": 3.8356164383561644e-05,
212
+ "loss": 2.8561,
213
+ "step": 29
214
+ },
215
+ {
216
+ "epoch": 0.020750475531730934,
217
+ "grad_norm": 0.4947223663330078,
218
+ "learning_rate": 3.9726027397260274e-05,
219
+ "loss": 2.9034,
220
+ "step": 30
221
  }
222
  ],
223
  "logging_steps": 1,
 
237
  "attributes": {}
238
  }
239
  },
240
+ "total_flos": 1008346610073600.0,
241
  "train_batch_size": 2,
242
  "trial_name": null,
243
  "trial_params": null