Rakhman16 commited on
Commit
cbc8afe
·
verified ·
1 Parent(s): 54a198b

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:764348990305a0c0352faa89e8736769c0d238ad2078665aa29fe01daedcbf47
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a6445366fb72dd64a30a112d0cfda302649ce8c29f7108f12ace64035585c67
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec02be8051eecb7fa903a5be0dd57bd83e957d928502ecb4301d03c3b20fb547
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ebbdc69304c59243d4149daaa710b1485f7a643fa3ccf2294412ebf232abae9
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:823b0b5098205a0b28558e0dded4fca169974b462ac985081136e74ff9997e92
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2a334725c06878338083a325b1190cf2f41e51415271d4e791a39c574e91d1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bc3df46a8b91726d99d86dcfb610586aab9195760e93dc8011c603031432524
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eedbad439d02a3d9e01c697324385512054df489e38825f0e26bf6d99fe88ac
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.20700447261333466,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-2000",
4
- "epoch": 1.4049877063575693,
5
  "eval_steps": 100,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -447,6 +447,116 @@
447
  "eval_samples_per_second": 66.281,
448
  "eval_steps_per_second": 2.081,
449
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
450
  }
451
  ],
452
  "logging_steps": 50,
@@ -466,7 +576,7 @@
466
  "attributes": {}
467
  }
468
  },
469
- "total_flos": 1.94843689058304e+16,
470
  "train_batch_size": 32,
471
  "trial_name": null,
472
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.20481644570827484,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-2500",
4
+ "epoch": 1.7562346329469616,
5
  "eval_steps": 100,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
447
  "eval_samples_per_second": 66.281,
448
  "eval_steps_per_second": 2.081,
449
  "step": 2000
450
+ },
451
+ {
452
+ "epoch": 1.4401123990165086,
453
+ "grad_norm": 25767.197265625,
454
+ "learning_rate": 1.9195361911454674e-05,
455
+ "loss": 0.2054,
456
+ "step": 2050
457
+ },
458
+ {
459
+ "epoch": 1.4752370916754478,
460
+ "grad_norm": 22215.111328125,
461
+ "learning_rate": 1.893183415319747e-05,
462
+ "loss": 0.2082,
463
+ "step": 2100
464
+ },
465
+ {
466
+ "epoch": 1.4752370916754478,
467
+ "eval_loss": 0.20631250739097595,
468
+ "eval_runtime": 67.1038,
469
+ "eval_samples_per_second": 66.464,
470
+ "eval_steps_per_second": 2.086,
471
+ "step": 2100
472
+ },
473
+ {
474
+ "epoch": 1.510361784334387,
475
+ "grad_norm": 27927.373046875,
476
+ "learning_rate": 1.8668306394940267e-05,
477
+ "loss": 0.2128,
478
+ "step": 2150
479
+ },
480
+ {
481
+ "epoch": 1.5454864769933263,
482
+ "grad_norm": 25635.267578125,
483
+ "learning_rate": 1.8404778636683066e-05,
484
+ "loss": 0.2078,
485
+ "step": 2200
486
+ },
487
+ {
488
+ "epoch": 1.5454864769933263,
489
+ "eval_loss": 0.20582793653011322,
490
+ "eval_runtime": 67.2723,
491
+ "eval_samples_per_second": 66.298,
492
+ "eval_steps_per_second": 2.081,
493
+ "step": 2200
494
+ },
495
+ {
496
+ "epoch": 1.5806111696522656,
497
+ "grad_norm": 25550.1171875,
498
+ "learning_rate": 1.814125087842586e-05,
499
+ "loss": 0.2058,
500
+ "step": 2250
501
+ },
502
+ {
503
+ "epoch": 1.6157358623112048,
504
+ "grad_norm": 21671.251953125,
505
+ "learning_rate": 1.787772312016866e-05,
506
+ "loss": 0.206,
507
+ "step": 2300
508
+ },
509
+ {
510
+ "epoch": 1.6157358623112048,
511
+ "eval_loss": 0.2059122622013092,
512
+ "eval_runtime": 67.4662,
513
+ "eval_samples_per_second": 66.107,
514
+ "eval_steps_per_second": 2.075,
515
+ "step": 2300
516
+ },
517
+ {
518
+ "epoch": 1.650860554970144,
519
+ "grad_norm": 21685.947265625,
520
+ "learning_rate": 1.7614195361911453e-05,
521
+ "loss": 0.2086,
522
+ "step": 2350
523
+ },
524
+ {
525
+ "epoch": 1.685985247629083,
526
+ "grad_norm": 24516.828125,
527
+ "learning_rate": 1.7350667603654252e-05,
528
+ "loss": 0.2069,
529
+ "step": 2400
530
+ },
531
+ {
532
+ "epoch": 1.685985247629083,
533
+ "eval_loss": 0.20495346188545227,
534
+ "eval_runtime": 67.1671,
535
+ "eval_samples_per_second": 66.402,
536
+ "eval_steps_per_second": 2.084,
537
+ "step": 2400
538
+ },
539
+ {
540
+ "epoch": 1.7211099402880223,
541
+ "grad_norm": 22610.7734375,
542
+ "learning_rate": 1.708713984539705e-05,
543
+ "loss": 0.2052,
544
+ "step": 2450
545
+ },
546
+ {
547
+ "epoch": 1.7562346329469616,
548
+ "grad_norm": 35525.84765625,
549
+ "learning_rate": 1.6823612087139845e-05,
550
+ "loss": 0.2051,
551
+ "step": 2500
552
+ },
553
+ {
554
+ "epoch": 1.7562346329469616,
555
+ "eval_loss": 0.20481644570827484,
556
+ "eval_runtime": 67.1059,
557
+ "eval_samples_per_second": 66.462,
558
+ "eval_steps_per_second": 2.086,
559
+ "step": 2500
560
  }
561
  ],
562
  "logging_steps": 50,
 
576
  "attributes": {}
577
  }
578
  },
579
+ "total_flos": 2.43560320303104e+16,
580
  "train_batch_size": 32,
581
  "trial_name": null,
582
  "trial_params": null