Phonsiri commited on
Commit
faee915
·
verified ·
1 Parent(s): e5a6813

Training in progress, step 275, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b30428edb2ec353fb00d980723dab87d47a3a57a437c530816fd877e52bf44df
3
  size 6171927112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67d3498e837d0f50c42422fc84f1f7baafb513956ff5819ed74aa854a3f03ca9
3
  size 6171927112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:573f39757dc714be30e45637a17b4dc1aaf790d8c76a677b9cc40389734d5bbc
3
  size 12344133221
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bc7c892ca343b720a60812170b7c972378e9279e1b626c85c799d1abdf71c3
3
  size 12344133221
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:715563349ae345b2e3188b314e826936503719f9ac8cb3873b644d2566f112eb
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c125ae2c56e328b466f25a9167f095fce27b7bcd4cd7957a7b20020419f7855
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.26666666666666666,
6
  "eval_steps": 500,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -148,10 +148,59 @@
148
  "learning_rate": 1.768888888888889e-05,
149
  "loss": 4.100537109375,
150
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  }
152
  ],
153
  "logging_steps": 10,
154
- "max_steps": 2250,
155
  "num_input_tokens_seen": 0,
156
  "num_train_epochs": 3,
157
  "save_steps": 25,
@@ -167,7 +216,7 @@
167
  "attributes": {}
168
  }
169
  },
170
- "total_flos": 4.36434175524864e+17,
171
  "train_batch_size": 4,
172
  "trial_name": null,
173
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.09651662718259191,
6
  "eval_steps": 500,
7
+ "global_step": 275,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
148
  "learning_rate": 1.768888888888889e-05,
149
  "loss": 4.100537109375,
150
  "step": 200
151
+ },
152
+ {
153
+ "epoch": 0.0737036062121611,
154
+ "grad_norm": 5.625,
155
+ "learning_rate": 4.888888888888889e-06,
156
+ "loss": 6.024441146850586,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 0.07721330174607353,
161
+ "grad_norm": 5.96875,
162
+ "learning_rate": 5.12280701754386e-06,
163
+ "loss": 5.444968414306641,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 0.08072299727998596,
168
+ "grad_norm": 5.78125,
169
+ "learning_rate": 5.356725146198831e-06,
170
+ "loss": 5.957166290283203,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 0.08423269281389839,
175
+ "grad_norm": 3.484375,
176
+ "learning_rate": 5.590643274853802e-06,
177
+ "loss": 5.767229080200195,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 0.08774238834781083,
182
+ "grad_norm": 6.25,
183
+ "learning_rate": 5.824561403508773e-06,
184
+ "loss": 5.761946487426758,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 0.09125208388172326,
189
+ "grad_norm": 8.375,
190
+ "learning_rate": 6.058479532163744e-06,
191
+ "loss": 5.641996765136719,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 0.0947617794156357,
196
+ "grad_norm": 4.9375,
197
+ "learning_rate": 6.292397660818715e-06,
198
+ "loss": 5.592499542236328,
199
+ "step": 270
200
  }
201
  ],
202
  "logging_steps": 10,
203
+ "max_steps": 8550,
204
  "num_input_tokens_seen": 0,
205
  "num_train_epochs": 3,
206
  "save_steps": 25,
 
216
  "attributes": {}
217
  }
218
  },
219
+ "total_flos": 6.00096991346688e+17,
220
  "train_batch_size": 4,
221
  "trial_name": null,
222
  "trial_params": null