alicegoesdown commited on
Commit
c4ea699
·
verified ·
1 Parent(s): 0ef03e8

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/lora_top/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81a3e1ce388fbb04bcc402f98b97bdba57b483a337fa2db42078ad43f1180290
3
  size 12591264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74c935a4db11d9954248a9ac8c7f1ed129a2d53ead6a4137b79d300258c6b6fd
3
  size 12591264
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e52720f955729f297c3cd82dd0dc7537ca06ad2f31133e703e5376940771e590
3
  size 25206586
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c794acd8ed44a50952d429716499bc5f43af1db5a3592ccabf56d02c4146e251
3
  size 25206586
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51cb429aa9ea83b39fbadd5af13147f084df52d8164a214fef2929a47c6ac353
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe75e0f6d5068d897d45b2b460d1ca19c86e7c11996a5acadcd9e0af782efe20
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce61aeb6f544a7922e9c19d6f330ea70f32eaee2533d00b2d71d15e1a48daef0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e6f142d93dbd6e62ebe01d74ed2ceb68210344a36e52cda5c9422f1f0eeb827
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 3.8206655979156494,
3
- "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 0.6944444444444444,
5
  "eval_steps": 150,
6
- "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -346,6 +346,119 @@
346
  "eval_samples_per_second": 38.779,
347
  "eval_steps_per_second": 38.779,
348
  "step": 450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  }
350
  ],
351
  "logging_steps": 10,
@@ -365,7 +478,7 @@
365
  "attributes": {}
366
  }
367
  },
368
- "total_flos": 1716594654314496.0,
369
  "train_batch_size": 16,
370
  "trial_name": null,
371
  "trial_params": null
 
1
  {
2
+ "best_metric": 3.631777286529541,
3
+ "best_model_checkpoint": "./output/checkpoint-600",
4
+ "epoch": 0.9259259259259259,
5
  "eval_steps": 150,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
346
  "eval_samples_per_second": 38.779,
347
  "eval_steps_per_second": 38.779,
348
  "step": 450
349
+ },
350
+ {
351
+ "epoch": 0.7098765432098766,
352
+ "grad_norm": 2.024686813354492,
353
+ "learning_rate": 0.0002960221815726757,
354
+ "loss": 3.69,
355
+ "step": 460
356
+ },
357
+ {
358
+ "epoch": 0.7253086419753086,
359
+ "grad_norm": 2.0512404441833496,
360
+ "learning_rate": 0.00029579917391812314,
361
+ "loss": 3.6132,
362
+ "step": 470
363
+ },
364
+ {
365
+ "epoch": 0.7407407407407407,
366
+ "grad_norm": 2.0870227813720703,
367
+ "learning_rate": 0.0002955701730305872,
368
+ "loss": 3.6681,
369
+ "step": 480
370
+ },
371
+ {
372
+ "epoch": 0.7561728395061729,
373
+ "grad_norm": 2.38336443901062,
374
+ "learning_rate": 0.00029533518832339727,
375
+ "loss": 3.5982,
376
+ "step": 490
377
+ },
378
+ {
379
+ "epoch": 0.7716049382716049,
380
+ "grad_norm": 3.0686960220336914,
381
+ "learning_rate": 0.0002950942294558544,
382
+ "loss": 3.7158,
383
+ "step": 500
384
+ },
385
+ {
386
+ "epoch": 0.7870370370370371,
387
+ "grad_norm": 1.8391352891921997,
388
+ "learning_rate": 0.0002948473063328338,
389
+ "loss": 3.4874,
390
+ "step": 510
391
+ },
392
+ {
393
+ "epoch": 0.8024691358024691,
394
+ "grad_norm": 3.0002377033233643,
395
+ "learning_rate": 0.00029459442910437797,
396
+ "loss": 3.5523,
397
+ "step": 520
398
+ },
399
+ {
400
+ "epoch": 0.8179012345679012,
401
+ "grad_norm": 2.9197888374328613,
402
+ "learning_rate": 0.0002943356081652793,
403
+ "loss": 3.5964,
404
+ "step": 530
405
+ },
406
+ {
407
+ "epoch": 0.8333333333333334,
408
+ "grad_norm": 2.4608469009399414,
409
+ "learning_rate": 0.0002940708541546529,
410
+ "loss": 3.6013,
411
+ "step": 540
412
+ },
413
+ {
414
+ "epoch": 0.8487654320987654,
415
+ "grad_norm": 2.6457505226135254,
416
+ "learning_rate": 0.00029380017795549906,
417
+ "loss": 3.5637,
418
+ "step": 550
419
+ },
420
+ {
421
+ "epoch": 0.8641975308641975,
422
+ "grad_norm": 2.789729356765747,
423
+ "learning_rate": 0.0002935235906942563,
424
+ "loss": 3.6736,
425
+ "step": 560
426
+ },
427
+ {
428
+ "epoch": 0.8796296296296297,
429
+ "grad_norm": 2.2199699878692627,
430
+ "learning_rate": 0.00029324110374034354,
431
+ "loss": 3.6183,
432
+ "step": 570
433
+ },
434
+ {
435
+ "epoch": 0.8950617283950617,
436
+ "grad_norm": 2.9708974361419678,
437
+ "learning_rate": 0.00029295272870569303,
438
+ "loss": 3.4817,
439
+ "step": 580
440
+ },
441
+ {
442
+ "epoch": 0.9104938271604939,
443
+ "grad_norm": 2.5598032474517822,
444
+ "learning_rate": 0.00029265847744427303,
445
+ "loss": 3.3191,
446
+ "step": 590
447
+ },
448
+ {
449
+ "epoch": 0.9259259259259259,
450
+ "grad_norm": 2.723311424255371,
451
+ "learning_rate": 0.0002923583620516003,
452
+ "loss": 3.6747,
453
+ "step": 600
454
+ },
455
+ {
456
+ "epoch": 0.9259259259259259,
457
+ "eval_loss": 3.631777286529541,
458
+ "eval_runtime": 14.5253,
459
+ "eval_samples_per_second": 34.423,
460
+ "eval_steps_per_second": 34.423,
461
+ "step": 600
462
  }
463
  ],
464
  "logging_steps": 10,
 
478
  "attributes": {}
479
  }
480
  },
481
+ "total_flos": 2268906529357824.0,
482
  "train_batch_size": 16,
483
  "trial_name": null,
484
  "trial_params": null