FredericFan commited on
Commit
32b0651
·
verified ·
1 Parent(s): 52253d2

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c001192b49ac33ee8f47a5309a370cfdfa4c3a70dda715a94b188da6ca70162
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d826c309b9f9492a58d67f6f2eecf7cdd6615d329adca02ac882e7b2d1de12cb
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7637d351cab3d505a7c81380e8e251c1b0641e2bf3a5583d4012a983c8e533e9
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3a553addc7ddb7e0af58a94d7cce4e00b2edf20465e6287f926268d68186099
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0233e0c60d0eeda67a49aa9719818a64279caead7bb9a9439f543fe78e64a894
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a7290491808e872bdfba856354703f0501eed54b9da7aaa39843c9c248c6159
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e79009f389b4fe7467240d87a72862340c60d6549edf4f6a20e75882e0962455
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c790c63fb7345bfd79d855efc8355667eb23eb3bcba2a26737d1f0ffb0ba07b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08755213767290115,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-3000",
4
- "epoch": 0.24,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -475,6 +475,84 @@
475
  "eval_samples_per_second": 22.789,
476
  "eval_steps_per_second": 5.697,
477
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  }
479
  ],
480
  "logging_steps": 50,
@@ -494,7 +572,7 @@
494
  "attributes": {}
495
  }
496
  },
497
- "total_flos": 7307494686720000.0,
498
  "train_batch_size": 4,
499
  "trial_name": null,
500
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.0869474709033966,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-3500",
4
+ "epoch": 0.28,
5
  "eval_steps": 500,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
475
  "eval_samples_per_second": 22.789,
476
  "eval_steps_per_second": 5.697,
477
  "step": 3000
478
+ },
479
+ {
480
+ "epoch": 0.244,
481
+ "grad_norm": 0.18366344273090363,
482
+ "learning_rate": 2.63424e-05,
483
+ "loss": 0.0781,
484
+ "step": 3050
485
+ },
486
+ {
487
+ "epoch": 0.248,
488
+ "grad_norm": 0.15975314378738403,
489
+ "learning_rate": 2.62824e-05,
490
+ "loss": 0.0771,
491
+ "step": 3100
492
+ },
493
+ {
494
+ "epoch": 0.252,
495
+ "grad_norm": 0.14510446786880493,
496
+ "learning_rate": 2.6222399999999998e-05,
497
+ "loss": 0.0734,
498
+ "step": 3150
499
+ },
500
+ {
501
+ "epoch": 0.256,
502
+ "grad_norm": 0.10040156543254852,
503
+ "learning_rate": 2.61624e-05,
504
+ "loss": 0.0624,
505
+ "step": 3200
506
+ },
507
+ {
508
+ "epoch": 0.26,
509
+ "grad_norm": 0.1453912854194641,
510
+ "learning_rate": 2.61024e-05,
511
+ "loss": 0.0661,
512
+ "step": 3250
513
+ },
514
+ {
515
+ "epoch": 0.264,
516
+ "grad_norm": 0.13999666273593903,
517
+ "learning_rate": 2.60424e-05,
518
+ "loss": 0.0694,
519
+ "step": 3300
520
+ },
521
+ {
522
+ "epoch": 0.268,
523
+ "grad_norm": 0.13396582007408142,
524
+ "learning_rate": 2.59824e-05,
525
+ "loss": 0.0692,
526
+ "step": 3350
527
+ },
528
+ {
529
+ "epoch": 0.272,
530
+ "grad_norm": 0.1334969699382782,
531
+ "learning_rate": 2.59224e-05,
532
+ "loss": 0.0629,
533
+ "step": 3400
534
+ },
535
+ {
536
+ "epoch": 0.276,
537
+ "grad_norm": 0.16296976804733276,
538
+ "learning_rate": 2.5862399999999998e-05,
539
+ "loss": 0.0679,
540
+ "step": 3450
541
+ },
542
+ {
543
+ "epoch": 0.28,
544
+ "grad_norm": 0.1321544647216797,
545
+ "learning_rate": 2.58024e-05,
546
+ "loss": 0.0764,
547
+ "step": 3500
548
+ },
549
+ {
550
+ "epoch": 0.28,
551
+ "eval_loss": 0.0869474709033966,
552
+ "eval_runtime": 87.6679,
553
+ "eval_samples_per_second": 22.813,
554
+ "eval_steps_per_second": 5.703,
555
+ "step": 3500
556
  }
557
  ],
558
  "logging_steps": 50,
 
572
  "attributes": {}
573
  }
574
  },
575
+ "total_flos": 8525410467840000.0,
576
  "train_batch_size": 4,
577
  "trial_name": null,
578
  "trial_params": null