abdo-Mansour commited on
Commit
26dc9e9
·
verified ·
1 Parent(s): 82ddf41

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a95a9f8b7fcf070707492d0022f4160abddf649685c62bd0acb0b2191db9f5e
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2439e9d102dd0282d5be625a5206ace7cb88705000d6860c90b7dd6e87479dda
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e89b0e30ab40bd6b04823383a4786638df8d21340402e1007a60c2792997f10f
3
  size 323298107
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7e6893629af82f9fee403650d1001e8491330b5ba33f3da9b1fb841e78e5625
3
  size 323298107
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a57f5c296f52913583d889b2b838ab564aea78743337131e3f761d182b38830
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0508ea4bdd39a5b7fea7db06c94302ab81c11bf628134ea211975658de537b4
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:260ec53777947eef3f02712019ac0cb83b4c23f3edf0a8e24296a881ab7c1aa6
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:465843e6fd6c63a8484074f7251981a985fb35e33f610f0fb99145df1ac998bc
3
  size 14917
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0e56cd39bf95ce68a94b21421aa9b66e0b651c8e26272987fddafbf9f0a90ff
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f2fe62185040eb3e7aa577bdc5ab136376cdc8488f5c2c0e5ad1f929361d62
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eeaf5d2be120879c36c1f532c4608c5e2ddaf2ba45ca3f2b9987ec0786c20625
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9a3a7f4dfbdc75f44961641f4d80fe3f960908acd6fd5ff2a8c33e958c30dc5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 200,
3
- "best_metric": 0.3187030255794525,
4
- "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-200",
5
- "epoch": 0.471976401179941,
6
  "eval_steps": 50,
7
- "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -180,6 +180,49 @@
180
  "eval_samples_per_second": 3.138,
181
  "eval_steps_per_second": 0.785,
182
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  }
184
  ],
185
  "logging_steps": 10,
@@ -199,7 +242,7 @@
199
  "attributes": {}
200
  }
201
  },
202
- "total_flos": 4.822198368652493e+16,
203
  "train_batch_size": 2,
204
  "trial_name": null,
205
  "trial_params": null
 
1
  {
2
+ "best_global_step": 250,
3
+ "best_metric": 0.3116294741630554,
4
+ "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-250",
5
+ "epoch": 0.5899705014749262,
6
  "eval_steps": 50,
7
+ "global_step": 250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
180
  "eval_samples_per_second": 3.138,
181
  "eval_steps_per_second": 0.785,
182
  "step": 200
183
+ },
184
+ {
185
+ "epoch": 0.49557522123893805,
186
+ "grad_norm": 5.596831798553467,
187
+ "learning_rate": 1.5916002290209964e-05,
188
+ "loss": 0.2322,
189
+ "step": 210
190
+ },
191
+ {
192
+ "epoch": 0.5191740412979351,
193
+ "grad_norm": 2.483771562576294,
194
+ "learning_rate": 1.5738741478676192e-05,
195
+ "loss": 0.2322,
196
+ "step": 220
197
+ },
198
+ {
199
+ "epoch": 0.5427728613569321,
200
+ "grad_norm": 2.2052314281463623,
201
+ "learning_rate": 1.5549210446287542e-05,
202
+ "loss": 0.2337,
203
+ "step": 230
204
+ },
205
+ {
206
+ "epoch": 0.5663716814159292,
207
+ "grad_norm": 4.261275768280029,
208
+ "learning_rate": 1.534773046265049e-05,
209
+ "loss": 0.2147,
210
+ "step": 240
211
+ },
212
+ {
213
+ "epoch": 0.5899705014749262,
214
+ "grad_norm": 2.935073137283325,
215
+ "learning_rate": 1.5134643051758448e-05,
216
+ "loss": 0.2523,
217
+ "step": 250
218
+ },
219
+ {
220
+ "epoch": 0.5899705014749262,
221
+ "eval_loss": 0.3116294741630554,
222
+ "eval_runtime": 375.6346,
223
+ "eval_samples_per_second": 3.141,
224
+ "eval_steps_per_second": 0.785,
225
+ "step": 250
226
  }
227
  ],
228
  "logging_steps": 10,
 
242
  "attributes": {}
243
  }
244
  },
245
+ "total_flos": 6.021815188271923e+16,
246
  "train_batch_size": 2,
247
  "trial_name": null,
248
  "trial_params": null