abdo-Mansour commited on
Commit
5a3f6d4
·
verified ·
1 Parent(s): dd94095

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7899771a087474a43e06bd28a8f45f3cd059337cd48735439b8ca48c96012a96
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5115a33042e3fd397dd32d66d32a19c0c16be57ac770db5f8d1d1f33faa5fc9
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e871bc1215fd69846b0c86915f65f94a04012ae84b81406e0748cd8df5d5c493
3
  size 323298107
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec98cd6c428dfc7f63b47f4b24f7c520934f5445ba14cb6244683725a7da0cd
3
  size 323298107
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d01b6bcf1ce8d302bf604d2caa30f647c31e144636452ecbabe94efa6e1ff85
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b7b75229400998a7d66acd1bde59e0297fb4e980d83e44d61fd3bb4d4fabf86
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a1b94068eaea136d63c62e77d4a2297e17922006283b618e301589c374ba369
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acdc4719af55e3178da4e7dfea565c4d31664a72cb7973326a772c14ba81cc93
3
  size 14917
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04a843a996a1f9dfcf8170c57ee770c1bf26ad7bacf1e9c3a455b70d583baef0
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3defd38fd8f62a7a3f6971d154c9135f2e1818e83bbd2a48dbbd5417f2bd284
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:446221154b37e4dc88e7ffc37fc20cc194db931246268ef3708102ef7a9ef6f4
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aca282f4652e04d966ebc751281f63a231106b830e64c0934d14aac4688fe840
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 400,
3
- "best_metric": 0.2940136194229126,
4
- "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-400",
5
- "epoch": 0.943952802359882,
6
  "eval_steps": 50,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -352,6 +352,49 @@
352
  "eval_samples_per_second": 3.134,
353
  "eval_steps_per_second": 0.784,
354
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  }
356
  ],
357
  "logging_steps": 10,
@@ -371,7 +414,7 @@
371
  "attributes": {}
372
  }
373
  },
374
- "total_flos": 9.618911528799437e+16,
375
  "train_batch_size": 2,
376
  "trial_name": null,
377
  "trial_params": null
 
1
  {
2
+ "best_global_step": 450,
3
+ "best_metric": 0.29345282912254333,
4
+ "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-450",
5
+ "epoch": 1.0613569321533922,
6
  "eval_steps": 50,
7
+ "global_step": 450,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
352
  "eval_samples_per_second": 3.134,
353
  "eval_steps_per_second": 0.784,
354
  "step": 400
355
+ },
356
+ {
357
+ "epoch": 0.967551622418879,
358
+ "grad_norm": 1.4067103862762451,
359
+ "learning_rate": 1.0493642866720528e-05,
360
+ "loss": 0.206,
361
+ "step": 410
362
+ },
363
+ {
364
+ "epoch": 0.9911504424778761,
365
+ "grad_norm": 2.350339651107788,
366
+ "learning_rate": 1.0151831192678511e-05,
367
+ "loss": 0.2531,
368
+ "step": 420
369
+ },
370
+ {
371
+ "epoch": 1.0141592920353983,
372
+ "grad_norm": 2.0622878074645996,
373
+ "learning_rate": 9.807219538334829e-06,
374
+ "loss": 0.2002,
375
+ "step": 430
376
+ },
377
+ {
378
+ "epoch": 1.0377581120943953,
379
+ "grad_norm": 1.5239025354385376,
380
+ "learning_rate": 9.460392046819943e-06,
381
+ "loss": 0.1457,
382
+ "step": 440
383
+ },
384
+ {
385
+ "epoch": 1.0613569321533922,
386
+ "grad_norm": 1.3972654342651367,
387
+ "learning_rate": 9.11193661727792e-06,
388
+ "loss": 0.1686,
389
+ "step": 450
390
+ },
391
+ {
392
+ "epoch": 1.0613569321533922,
393
+ "eval_loss": 0.29345282912254333,
394
+ "eval_runtime": 376.3252,
395
+ "eval_samples_per_second": 3.136,
396
+ "eval_steps_per_second": 0.784,
397
+ "step": 450
398
  }
399
  ],
400
  "logging_steps": 10,
 
414
  "attributes": {}
415
  }
416
  },
417
+ "total_flos": 1.0824644623440282e+17,
418
  "train_batch_size": 2,
419
  "trial_name": null,
420
  "trial_params": null