Seynro commited on
Commit
0c8c663
·
verified ·
1 Parent(s): 037afcb

Training in progress, step 1400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d838a5ea1d7a4dc8078b8d4b929ed97c3470e3371a8bb2486e51d5dfd7c4fd84
3
  size 540001920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb2357db4d5dbc8b50b73aee33258be5078a83ac5942fbb18c3611064b0ba01e
3
  size 540001920
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7fff9e74897ca1492335c9d8fe5b040902cbb283142b17c7520e36732329301
3
  size 1080097722
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:799e6d327ebe8e5729bbf02861a8f0503d38df901843c264b5fa151bb9f8f1f0
3
  size 1080097722
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bed33057040b93d153a6ff39538a6442d5d06dbf384014b88b5824efe94b6e35
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56c984d039682da209f1f0b16ab958ba4afe2a245e0b9d729f68d8024510ead0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e9157fdd19d09ebce63a8190c2be1a0db31dfdda72875e01c571181cfafcf28
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a18a679691bf356f5e92141e3d0862158121dd0dcde0cf358005325db8d44228
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.0140105078809105,
5
  "eval_steps": 500,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -147,6 +147,62 @@
147
  "learning_rate": 6.747099073871009e-06,
148
  "loss": 0.3459,
149
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  }
151
  ],
152
  "logging_steps": 50,
@@ -175,7 +231,7 @@
175
  "attributes": {}
176
  }
177
  },
178
- "total_flos": 6260818378752000.0,
179
  "train_batch_size": 6,
180
  "trial_name": null,
181
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.812609457092819,
5
  "eval_steps": 500,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
147
  "learning_rate": 6.747099073871009e-06,
148
  "loss": 0.3459,
149
  "step": 1000
150
+ },
151
+ {
152
+ "epoch": 7.364273204903678,
153
+ "grad_norm": 3.011735200881958,
154
+ "learning_rate": 1.8115901680389373e-06,
155
+ "loss": 0.2136,
156
+ "step": 1050
157
+ },
158
+ {
159
+ "epoch": 7.714535901926444,
160
+ "grad_norm": 1.369733452796936,
161
+ "learning_rate": 1.8128355854463153e-10,
162
+ "loss": 0.2193,
163
+ "step": 1100
164
+ },
165
+ {
166
+ "epoch": 8.063047285464098,
167
+ "grad_norm": 2.204275369644165,
168
+ "learning_rate": 1.7419630088165716e-06,
169
+ "loss": 0.2068,
170
+ "step": 1150
171
+ },
172
+ {
173
+ "epoch": 8.413309982486865,
174
+ "grad_norm": 3.1148300170898438,
175
+ "learning_rate": 6.624338192647677e-06,
176
+ "loss": 0.2043,
177
+ "step": 1200
178
+ },
179
+ {
180
+ "epoch": 8.763572679509632,
181
+ "grad_norm": 2.349841356277466,
182
+ "learning_rate": 1.3490758880415994e-05,
183
+ "loss": 0.1968,
184
+ "step": 1250
185
+ },
186
+ {
187
+ "epoch": 9.112084063047286,
188
+ "grad_norm": 2.6443629264831543,
189
+ "learning_rate": 2.07146919805216e-05,
190
+ "loss": 0.1814,
191
+ "step": 1300
192
+ },
193
+ {
194
+ "epoch": 9.462346760070053,
195
+ "grad_norm": 2.4111177921295166,
196
+ "learning_rate": 2.65849160614322e-05,
197
+ "loss": 0.1568,
198
+ "step": 1350
199
+ },
200
+ {
201
+ "epoch": 9.812609457092819,
202
+ "grad_norm": 2.6896817684173584,
203
+ "learning_rate": 2.9710879312112288e-05,
204
+ "loss": 0.184,
205
+ "step": 1400
206
  }
207
  ],
208
  "logging_steps": 50,
 
231
  "attributes": {}
232
  }
233
  },
234
+ "total_flos": 8764518629376000.0,
235
  "train_batch_size": 6,
236
  "trial_name": null,
237
  "trial_params": null