Arittro2 commited on
Commit
acd0bbf
·
verified ·
1 Parent(s): b8bd41e

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74a7fbfd1065c7efe650cbe07ca6888be4c9c4026201b6e3d687e19008471a74
3
  size 262406656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93fe08fda954a8d19235305b943a8e691ee131a7294b52f2b5fb23bd46716507
3
  size 262406656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d13ac8ea643c9fa2eb6e074fedbff66a8ad842ec5de19c941bf75cc87d544fb7
3
  size 122872331
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb3975da2fc0d9c7f4f4e9652c42e390d2d5a328fc9fe84b2a3069c7e905c7b
3
  size 122872331
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa961fa9e506668d35c6cfd8cf85f9299717888b30062742b0fd9e2da10b1c98
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcfe3a31388de42cdb8e2ba831ae4c3ef355515443e6afb9cf07cb38355f83c0
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df3eab020225d79fdb69396f30e0ff17b6980870fce2cd29482a57a0b5aad692
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:744586c5f7157d9fd0b219ae516b1d5cf715a6af929b7cd570b93b36b3eb4887
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.3540979097909791,
6
  "eval_steps": 500,
7
- "global_step": 5150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -13398,11 +13398,271 @@
13398
  "rewards/quality_reward_func/mean": 0.800000011920929,
13399
  "rewards/quality_reward_func/std": 0.0,
13400
  "step": 5150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13401
  }
13402
  ],
13403
  "logging_steps": 10,
13404
  "max_steps": 14544,
13405
- "num_input_tokens_seen": 7376940,
13406
  "num_train_epochs": 1,
13407
  "save_steps": 50,
13408
  "stateful_callbacks": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.360973597359736,
6
  "eval_steps": 500,
7
+ "global_step": 5250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
13398
  "rewards/quality_reward_func/mean": 0.800000011920929,
13399
  "rewards/quality_reward_func/std": 0.0,
13400
  "step": 5150
13401
+ },
13402
+ {
13403
+ "completion_length": 16.57894736842105,
13404
+ "completions/clipped_ratio": 0.0,
13405
+ "completions/max_length": 16.57894736842105,
13406
+ "completions/max_terminated_length": 16.57894736842105,
13407
+ "completions/mean_length": 15.605263157894736,
13408
+ "completions/mean_terminated_length": 15.605263157894736,
13409
+ "completions/min_length": 15.105263157894736,
13410
+ "completions/min_terminated_length": 15.105263157894736,
13411
+ "epoch": 0.3547854785478548,
13412
+ "frac_reward_zero_std": 1.0,
13413
+ "grad_norm": 0.0,
13414
+ "kl": 1.2172791863742627,
13415
+ "learning_rate": 4.075421804615715e-06,
13416
+ "loss": 0.0,
13417
+ "num_tokens": 7391547.0,
13418
+ "reward": 4.099999904632568,
13419
+ "reward_std": 0.0,
13420
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13421
+ "rewards/coherence_reward_func/std": 0.0,
13422
+ "rewards/formatting_reward_func/mean": 2.0,
13423
+ "rewards/formatting_reward_func/std": 0.0,
13424
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13425
+ "rewards/quality_reward_func/std": 0.0,
13426
+ "step": 5160
13427
+ },
13428
+ {
13429
+ "completion_length": 19.8,
13430
+ "completions/clipped_ratio": 0.0,
13431
+ "completions/max_length": 19.8,
13432
+ "completions/max_terminated_length": 19.8,
13433
+ "completions/mean_length": 17.15,
13434
+ "completions/mean_terminated_length": 17.15,
13435
+ "completions/min_length": 15.1,
13436
+ "completions/min_terminated_length": 15.1,
13437
+ "epoch": 0.35547304730473045,
13438
+ "frac_reward_zero_std": 1.0,
13439
+ "grad_norm": 0.0,
13440
+ "kl": 0.9243504330515862,
13441
+ "learning_rate": 4.070758176794378e-06,
13442
+ "loss": 0.0,
13443
+ "num_tokens": 7406345.0,
13444
+ "reward": 4.099999904632568,
13445
+ "reward_std": 0.0,
13446
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13447
+ "rewards/coherence_reward_func/std": 0.0,
13448
+ "rewards/formatting_reward_func/mean": 2.0,
13449
+ "rewards/formatting_reward_func/std": 0.0,
13450
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13451
+ "rewards/quality_reward_func/std": 0.0,
13452
+ "step": 5170
13453
+ },
13454
+ {
13455
+ "completion_length": 20.5,
13456
+ "completions/clipped_ratio": 0.0,
13457
+ "completions/max_length": 20.5,
13458
+ "completions/max_terminated_length": 20.5,
13459
+ "completions/mean_length": 18.55,
13460
+ "completions/mean_terminated_length": 18.55,
13461
+ "completions/min_length": 16.7,
13462
+ "completions/min_terminated_length": 16.7,
13463
+ "epoch": 0.35616061606160615,
13464
+ "frac_reward_zero_std": 1.0,
13465
+ "grad_norm": 0.0,
13466
+ "kl": 1.3112298667430877,
13467
+ "learning_rate": 4.066085500070087e-06,
13468
+ "loss": 0.0001,
13469
+ "num_tokens": 7420439.0,
13470
+ "reward": 4.099999904632568,
13471
+ "reward_std": 0.0,
13472
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13473
+ "rewards/coherence_reward_func/std": 0.0,
13474
+ "rewards/formatting_reward_func/mean": 2.0,
13475
+ "rewards/formatting_reward_func/std": 0.0,
13476
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13477
+ "rewards/quality_reward_func/std": 0.0,
13478
+ "step": 5180
13479
+ },
13480
+ {
13481
+ "completion_length": 18.2,
13482
+ "completions/clipped_ratio": 0.0,
13483
+ "completions/max_length": 18.2,
13484
+ "completions/max_terminated_length": 18.2,
13485
+ "completions/mean_length": 16.875,
13486
+ "completions/mean_terminated_length": 16.875,
13487
+ "completions/min_length": 16.0,
13488
+ "completions/min_terminated_length": 16.0,
13489
+ "epoch": 0.35684818481848185,
13490
+ "frac_reward_zero_std": 1.0,
13491
+ "grad_norm": 0.0,
13492
+ "kl": 1.347011312842369,
13493
+ "learning_rate": 4.061403801361432e-06,
13494
+ "loss": 0.0,
13495
+ "num_tokens": 7435522.0,
13496
+ "reward": 4.099999904632568,
13497
+ "reward_std": 0.0,
13498
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13499
+ "rewards/coherence_reward_func/std": 0.0,
13500
+ "rewards/formatting_reward_func/mean": 2.0,
13501
+ "rewards/formatting_reward_func/std": 0.0,
13502
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13503
+ "rewards/quality_reward_func/std": 0.0,
13504
+ "step": 5190
13505
+ },
13506
+ {
13507
+ "completion_length": 20.8,
13508
+ "completions/clipped_ratio": 0.0,
13509
+ "completions/max_length": 20.8,
13510
+ "completions/max_terminated_length": 20.8,
13511
+ "completions/mean_length": 18.325,
13512
+ "completions/mean_terminated_length": 18.325,
13513
+ "completions/min_length": 16.8,
13514
+ "completions/min_terminated_length": 16.8,
13515
+ "epoch": 0.35753575357535755,
13516
+ "frac_reward_zero_std": 1.0,
13517
+ "grad_norm": 0.0,
13518
+ "kl": 1.2339761398732663,
13519
+ "learning_rate": 4.0567131076389795e-06,
13520
+ "loss": 0.0,
13521
+ "num_tokens": 7451491.0,
13522
+ "reward": 4.099999904632568,
13523
+ "reward_std": 0.0,
13524
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13525
+ "rewards/coherence_reward_func/std": 0.0,
13526
+ "rewards/formatting_reward_func/mean": 2.0,
13527
+ "rewards/formatting_reward_func/std": 0.0,
13528
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13529
+ "rewards/quality_reward_func/std": 0.0,
13530
+ "step": 5200
13531
+ },
13532
+ {
13533
+ "completion_length": 18.0,
13534
+ "completions/clipped_ratio": 0.0,
13535
+ "completions/max_length": 18.0,
13536
+ "completions/max_terminated_length": 18.0,
13537
+ "completions/mean_length": 16.1,
13538
+ "completions/mean_terminated_length": 16.1,
13539
+ "completions/min_length": 14.3,
13540
+ "completions/min_terminated_length": 14.3,
13541
+ "epoch": 0.3582233223322332,
13542
+ "frac_reward_zero_std": 1.0,
13543
+ "grad_norm": 0.0,
13544
+ "kl": 1.1732663363218307,
13545
+ "learning_rate": 4.052013445925116e-06,
13546
+ "loss": 0.0,
13547
+ "num_tokens": 7466091.0,
13548
+ "reward": 4.099999904632568,
13549
+ "reward_std": 0.0,
13550
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13551
+ "rewards/coherence_reward_func/std": 0.0,
13552
+ "rewards/formatting_reward_func/mean": 2.0,
13553
+ "rewards/formatting_reward_func/std": 0.0,
13554
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13555
+ "rewards/quality_reward_func/std": 0.0,
13556
+ "step": 5210
13557
+ },
13558
+ {
13559
+ "completion_length": 18.8,
13560
+ "completions/clipped_ratio": 0.0,
13561
+ "completions/max_length": 18.8,
13562
+ "completions/max_terminated_length": 18.8,
13563
+ "completions/mean_length": 17.125,
13564
+ "completions/mean_terminated_length": 17.125,
13565
+ "completions/min_length": 15.7,
13566
+ "completions/min_terminated_length": 15.7,
13567
+ "epoch": 0.3589108910891089,
13568
+ "frac_reward_zero_std": 1.0,
13569
+ "grad_norm": 0.0,
13570
+ "kl": 1.2563588432967663,
13571
+ "learning_rate": 4.0473048432938875e-06,
13572
+ "loss": 0.0,
13573
+ "num_tokens": 7481236.0,
13574
+ "reward": 4.099999904632568,
13575
+ "reward_std": 0.0,
13576
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13577
+ "rewards/coherence_reward_func/std": 0.0,
13578
+ "rewards/formatting_reward_func/mean": 2.0,
13579
+ "rewards/formatting_reward_func/std": 0.0,
13580
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13581
+ "rewards/quality_reward_func/std": 0.0,
13582
+ "step": 5220
13583
+ },
13584
+ {
13585
+ "completion_length": 20.5,
13586
+ "completions/clipped_ratio": 0.0,
13587
+ "completions/max_length": 20.5,
13588
+ "completions/max_terminated_length": 20.5,
13589
+ "completions/mean_length": 18.2,
13590
+ "completions/mean_terminated_length": 18.2,
13591
+ "completions/min_length": 16.7,
13592
+ "completions/min_terminated_length": 16.7,
13593
+ "epoch": 0.3595984598459846,
13594
+ "frac_reward_zero_std": 1.0,
13595
+ "grad_norm": 0.0,
13596
+ "kl": 1.28299700319767,
13597
+ "learning_rate": 4.042587326870851e-06,
13598
+ "loss": 0.0001,
13599
+ "num_tokens": 7495776.0,
13600
+ "reward": 4.099999904632568,
13601
+ "reward_std": 0.0,
13602
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13603
+ "rewards/coherence_reward_func/std": 0.0,
13604
+ "rewards/formatting_reward_func/mean": 2.0,
13605
+ "rewards/formatting_reward_func/std": 0.0,
13606
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13607
+ "rewards/quality_reward_func/std": 0.0,
13608
+ "step": 5230
13609
+ },
13610
+ {
13611
+ "completion_length": 22.2,
13612
+ "completions/clipped_ratio": 0.0,
13613
+ "completions/max_length": 22.2,
13614
+ "completions/max_terminated_length": 22.2,
13615
+ "completions/mean_length": 20.35,
13616
+ "completions/mean_terminated_length": 20.35,
13617
+ "completions/min_length": 18.9,
13618
+ "completions/min_terminated_length": 18.9,
13619
+ "epoch": 0.3602860286028603,
13620
+ "frac_reward_zero_std": 1.0,
13621
+ "grad_norm": 0.0,
13622
+ "kl": 1.4640183687210082,
13623
+ "learning_rate": 4.037860923832913e-06,
13624
+ "loss": 0.0001,
13625
+ "num_tokens": 7511746.0,
13626
+ "reward": 4.099999904632568,
13627
+ "reward_std": 0.0,
13628
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13629
+ "rewards/coherence_reward_func/std": 0.0,
13630
+ "rewards/formatting_reward_func/mean": 2.0,
13631
+ "rewards/formatting_reward_func/std": 0.0,
13632
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13633
+ "rewards/quality_reward_func/std": 0.0,
13634
+ "step": 5240
13635
+ },
13636
+ {
13637
+ "completion_length": 19.1,
13638
+ "completions/clipped_ratio": 0.0,
13639
+ "completions/max_length": 19.1,
13640
+ "completions/max_terminated_length": 19.1,
13641
+ "completions/mean_length": 16.8,
13642
+ "completions/mean_terminated_length": 16.8,
13643
+ "completions/min_length": 15.6,
13644
+ "completions/min_terminated_length": 15.6,
13645
+ "epoch": 0.360973597359736,
13646
+ "frac_reward_zero_std": 1.0,
13647
+ "grad_norm": 0.0,
13648
+ "kl": 1.186241403222084,
13649
+ "learning_rate": 4.0331256614081735e-06,
13650
+ "loss": 0.0,
13651
+ "num_tokens": 7525094.0,
13652
+ "reward": 4.099999904632568,
13653
+ "reward_std": 0.0,
13654
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
13655
+ "rewards/coherence_reward_func/std": 0.0,
13656
+ "rewards/formatting_reward_func/mean": 2.0,
13657
+ "rewards/formatting_reward_func/std": 0.0,
13658
+ "rewards/quality_reward_func/mean": 0.800000011920929,
13659
+ "rewards/quality_reward_func/std": 0.0,
13660
+ "step": 5250
13661
  }
13662
  ],
13663
  "logging_steps": 10,
13664
  "max_steps": 14544,
13665
+ "num_input_tokens_seen": 7525094,
13666
  "num_train_epochs": 1,
13667
  "save_steps": 50,
13668
  "stateful_callbacks": {