Arittro2 commited on
Commit
f58b136
·
verified ·
1 Parent(s): 3f32de9

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -29,12 +29,12 @@
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
 
 
 
32
  "up_proj",
33
  "k_proj",
34
- "down_proj",
35
- "gate_proj",
36
  "o_proj",
37
- "v_proj",
38
  "q_proj"
39
  ],
40
  "task_type": "CAUSAL_LM",
 
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "v_proj",
33
+ "gate_proj",
34
+ "down_proj",
35
  "up_proj",
36
  "k_proj",
 
 
37
  "o_proj",
 
38
  "q_proj"
39
  ],
40
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e72eda942629c3047aa40a8d87716a3ee98493a90889d48e85dd66dac3c06797
3
  size 262406656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b02836ae885c9a379cab7d418669e282e39824c3e8442660ff95b5af732c1d3
3
  size 262406656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4de02450276a367d03e33824327be68771c8e4800ee949ada9830b28f1a60a46
3
  size 122872331
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10cc964c60bfde918121f701449487ada51f8d814bd8c940c07460e711c0350
3
  size 122872331
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8c8c2a4a5365e4d2f815f01c10e4be6327777e8d35b0484332d334eea008ef0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b86a856532e9dbe010592bfb956348463afea72b9792aedba460c25c8145b5ae
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6b658c31a612ef44d06fbdd6338f43d25c77380e7f2f9a28a42ad6a24fa1046
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a4abf5cec20ca257e82b8cc70b248ece9e4b9d912fb1d104f5f1a77d9ff1ea
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9866611661166117,
6
  "eval_steps": 500,
7
- "global_step": 14350,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -37318,11 +37318,271 @@
37318
  "rewards/quality_reward_func/mean": 0.800000011920929,
37319
  "rewards/quality_reward_func/std": 0.0,
37320
  "step": 14350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37321
  }
37322
  ],
37323
  "logging_steps": 10,
37324
  "max_steps": 14544,
37325
- "num_input_tokens_seen": 20628250,
37326
  "num_train_epochs": 1,
37327
  "save_steps": 50,
37328
  "stateful_callbacks": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9935368536853685,
6
  "eval_steps": 500,
7
+ "global_step": 14450,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
37318
  "rewards/quality_reward_func/mean": 0.800000011920929,
37319
  "rewards/quality_reward_func/std": 0.0,
37320
  "step": 14350
37321
+ },
37322
+ {
37323
+ "completion_length": 18.3,
37324
+ "completions/clipped_ratio": 0.0,
37325
+ "completions/max_length": 18.3,
37326
+ "completions/max_terminated_length": 18.3,
37327
+ "completions/mean_length": 16.3,
37328
+ "completions/mean_terminated_length": 16.3,
37329
+ "completions/min_length": 15.1,
37330
+ "completions/min_terminated_length": 15.1,
37331
+ "epoch": 0.9873487348734874,
37332
+ "frac_reward_zero_std": 1.0,
37333
+ "grad_norm": 0.0,
37334
+ "kl": 1.2089169837534428,
37335
+ "learning_rate": 2.464160105727642e-09,
37336
+ "loss": 0.0,
37337
+ "num_tokens": 20642162.0,
37338
+ "reward": 4.099999904632568,
37339
+ "reward_std": 0.0,
37340
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37341
+ "rewards/coherence_reward_func/std": 0.0,
37342
+ "rewards/formatting_reward_func/mean": 2.0,
37343
+ "rewards/formatting_reward_func/std": 0.0,
37344
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37345
+ "rewards/quality_reward_func/std": 0.0,
37346
+ "step": 14360
37347
+ },
37348
+ {
37349
+ "completion_length": 19.2,
37350
+ "completions/clipped_ratio": 0.0,
37351
+ "completions/max_length": 19.2,
37352
+ "completions/max_terminated_length": 19.2,
37353
+ "completions/mean_length": 17.275,
37354
+ "completions/mean_terminated_length": 17.275,
37355
+ "completions/min_length": 16.2,
37356
+ "completions/min_terminated_length": 16.2,
37357
+ "epoch": 0.988036303630363,
37358
+ "frac_reward_zero_std": 1.0,
37359
+ "grad_norm": 0.0,
37360
+ "kl": 1.3353900104761123,
37361
+ "learning_rate": 2.2050024110001345e-09,
37362
+ "loss": 0.0,
37363
+ "num_tokens": 20656053.0,
37364
+ "reward": 4.099999904632568,
37365
+ "reward_std": 0.0,
37366
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37367
+ "rewards/coherence_reward_func/std": 0.0,
37368
+ "rewards/formatting_reward_func/mean": 2.0,
37369
+ "rewards/formatting_reward_func/std": 0.0,
37370
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37371
+ "rewards/quality_reward_func/std": 0.0,
37372
+ "step": 14370
37373
+ },
37374
+ {
37375
+ "completion_length": 17.4,
37376
+ "completions/clipped_ratio": 0.0,
37377
+ "completions/max_length": 17.4,
37378
+ "completions/max_terminated_length": 17.4,
37379
+ "completions/mean_length": 16.175,
37380
+ "completions/mean_terminated_length": 16.175,
37381
+ "completions/min_length": 15.6,
37382
+ "completions/min_terminated_length": 15.6,
37383
+ "epoch": 0.9887238723872387,
37384
+ "frac_reward_zero_std": 1.0,
37385
+ "grad_norm": 0.0,
37386
+ "kl": 1.1925065904855727,
37387
+ "learning_rate": 1.9602341397173542e-09,
37388
+ "loss": 0.0,
37389
+ "num_tokens": 20669724.0,
37390
+ "reward": 4.099999904632568,
37391
+ "reward_std": 0.0,
37392
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37393
+ "rewards/coherence_reward_func/std": 0.0,
37394
+ "rewards/formatting_reward_func/mean": 2.0,
37395
+ "rewards/formatting_reward_func/std": 0.0,
37396
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37397
+ "rewards/quality_reward_func/std": 0.0,
37398
+ "step": 14380
37399
+ },
37400
+ {
37401
+ "completion_length": 16.5,
37402
+ "completions/clipped_ratio": 0.0,
37403
+ "completions/max_length": 16.5,
37404
+ "completions/max_terminated_length": 16.5,
37405
+ "completions/mean_length": 15.575,
37406
+ "completions/mean_terminated_length": 15.575,
37407
+ "completions/min_length": 14.6,
37408
+ "completions/min_terminated_length": 14.6,
37409
+ "epoch": 0.9894114411441144,
37410
+ "frac_reward_zero_std": 1.0,
37411
+ "grad_norm": 0.0,
37412
+ "kl": 1.3791985176503658,
37413
+ "learning_rate": 1.7298567019527479e-09,
37414
+ "loss": 0.0,
37415
+ "num_tokens": 20687811.0,
37416
+ "reward": 4.099999904632568,
37417
+ "reward_std": 0.0,
37418
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37419
+ "rewards/coherence_reward_func/std": 0.0,
37420
+ "rewards/formatting_reward_func/mean": 2.0,
37421
+ "rewards/formatting_reward_func/std": 0.0,
37422
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37423
+ "rewards/quality_reward_func/std": 0.0,
37424
+ "step": 14390
37425
+ },
37426
+ {
37427
+ "completion_length": 17.3,
37428
+ "completions/clipped_ratio": 0.0,
37429
+ "completions/max_length": 17.3,
37430
+ "completions/max_terminated_length": 17.3,
37431
+ "completions/mean_length": 15.5,
37432
+ "completions/mean_terminated_length": 15.5,
37433
+ "completions/min_length": 14.4,
37434
+ "completions/min_terminated_length": 14.4,
37435
+ "epoch": 0.9900990099009901,
37436
+ "frac_reward_zero_std": 1.0,
37437
+ "grad_norm": 0.0,
37438
+ "kl": 1.4439594164490699,
37439
+ "learning_rate": 1.513871424876079e-09,
37440
+ "loss": 0.0,
37441
+ "num_tokens": 20703267.0,
37442
+ "reward": 4.099999904632568,
37443
+ "reward_std": 0.0,
37444
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37445
+ "rewards/coherence_reward_func/std": 0.0,
37446
+ "rewards/formatting_reward_func/mean": 2.0,
37447
+ "rewards/formatting_reward_func/std": 0.0,
37448
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37449
+ "rewards/quality_reward_func/std": 0.0,
37450
+ "step": 14400
37451
+ },
37452
+ {
37453
+ "completion_length": 20.4,
37454
+ "completions/clipped_ratio": 0.0,
37455
+ "completions/max_length": 20.4,
37456
+ "completions/max_terminated_length": 20.4,
37457
+ "completions/mean_length": 18.975,
37458
+ "completions/mean_terminated_length": 18.975,
37459
+ "completions/min_length": 17.6,
37460
+ "completions/min_terminated_length": 17.6,
37461
+ "epoch": 0.9907865786578658,
37462
+ "frac_reward_zero_std": 1.0,
37463
+ "grad_norm": 0.0,
37464
+ "kl": 1.0520384900271893,
37465
+ "learning_rate": 1.3122795527467647e-09,
37466
+ "loss": 0.0,
37467
+ "num_tokens": 20715926.0,
37468
+ "reward": 4.099999904632568,
37469
+ "reward_std": 0.0,
37470
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37471
+ "rewards/coherence_reward_func/std": 0.0,
37472
+ "rewards/formatting_reward_func/mean": 2.0,
37473
+ "rewards/formatting_reward_func/std": 0.0,
37474
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37475
+ "rewards/quality_reward_func/std": 0.0,
37476
+ "step": 14410
37477
+ },
37478
+ {
37479
+ "completion_length": 16.3,
37480
+ "completions/clipped_ratio": 0.0,
37481
+ "completions/max_length": 16.3,
37482
+ "completions/max_terminated_length": 16.3,
37483
+ "completions/mean_length": 15.575,
37484
+ "completions/mean_terminated_length": 15.575,
37485
+ "completions/min_length": 14.8,
37486
+ "completions/min_terminated_length": 14.8,
37487
+ "epoch": 0.9914741474147415,
37488
+ "frac_reward_zero_std": 1.0,
37489
+ "grad_norm": 0.0,
37490
+ "kl": 1.1841505281394347,
37491
+ "learning_rate": 1.125082246904996e-09,
37492
+ "loss": 0.0,
37493
+ "num_tokens": 20729649.0,
37494
+ "reward": 4.099999904632568,
37495
+ "reward_std": 0.0,
37496
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37497
+ "rewards/coherence_reward_func/std": 0.0,
37498
+ "rewards/formatting_reward_func/mean": 2.0,
37499
+ "rewards/formatting_reward_func/std": 0.0,
37500
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37501
+ "rewards/quality_reward_func/std": 0.0,
37502
+ "step": 14420
37503
+ },
37504
+ {
37505
+ "completion_length": 18.1,
37506
+ "completions/clipped_ratio": 0.0,
37507
+ "completions/max_length": 18.1,
37508
+ "completions/max_terminated_length": 18.1,
37509
+ "completions/mean_length": 17.3,
37510
+ "completions/mean_terminated_length": 17.3,
37511
+ "completions/min_length": 16.4,
37512
+ "completions/min_terminated_length": 16.4,
37513
+ "epoch": 0.9921617161716172,
37514
+ "frac_reward_zero_std": 1.0,
37515
+ "grad_norm": 0.0,
37516
+ "kl": 1.377218122780323,
37517
+ "learning_rate": 9.52280585766463e-10,
37518
+ "loss": 0.0,
37519
+ "num_tokens": 20747045.0,
37520
+ "reward": 4.099999904632568,
37521
+ "reward_std": 0.0,
37522
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37523
+ "rewards/coherence_reward_func/std": 0.0,
37524
+ "rewards/formatting_reward_func/mean": 2.0,
37525
+ "rewards/formatting_reward_func/std": 0.0,
37526
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37527
+ "rewards/quality_reward_func/std": 0.0,
37528
+ "step": 14430
37529
+ },
37530
+ {
37531
+ "completion_length": 16.3,
37532
+ "completions/clipped_ratio": 0.0,
37533
+ "completions/max_length": 16.3,
37534
+ "completions/max_terminated_length": 16.3,
37535
+ "completions/mean_length": 15.275,
37536
+ "completions/mean_terminated_length": 15.275,
37537
+ "completions/min_length": 14.4,
37538
+ "completions/min_terminated_length": 14.4,
37539
+ "epoch": 0.9928492849284929,
37540
+ "frac_reward_zero_std": 1.0,
37541
+ "grad_norm": 0.0,
37542
+ "kl": 1.112162598967552,
37543
+ "learning_rate": 7.938755648156938e-10,
37544
+ "loss": 0.0,
37545
+ "num_tokens": 20758908.0,
37546
+ "reward": 4.099999904632568,
37547
+ "reward_std": 0.0,
37548
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37549
+ "rewards/coherence_reward_func/std": 0.0,
37550
+ "rewards/formatting_reward_func/mean": 2.0,
37551
+ "rewards/formatting_reward_func/std": 0.0,
37552
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37553
+ "rewards/quality_reward_func/std": 0.0,
37554
+ "step": 14440
37555
+ },
37556
+ {
37557
+ "completion_length": 19.2,
37558
+ "completions/clipped_ratio": 0.0,
37559
+ "completions/max_length": 19.2,
37560
+ "completions/max_terminated_length": 19.2,
37561
+ "completions/mean_length": 16.7,
37562
+ "completions/mean_terminated_length": 16.7,
37563
+ "completions/min_length": 14.4,
37564
+ "completions/min_terminated_length": 14.4,
37565
+ "epoch": 0.9935368536853685,
37566
+ "frac_reward_zero_std": 1.0,
37567
+ "grad_norm": 0.0,
37568
+ "kl": 0.9231208987534046,
37569
+ "learning_rate": 6.498680966007809e-10,
37570
+ "loss": 0.0,
37571
+ "num_tokens": 20774668.0,
37572
+ "reward": 4.099999904632568,
37573
+ "reward_std": 0.0,
37574
+ "rewards/coherence_reward_func/mean": 1.2999999523162842,
37575
+ "rewards/coherence_reward_func/std": 0.0,
37576
+ "rewards/formatting_reward_func/mean": 2.0,
37577
+ "rewards/formatting_reward_func/std": 0.0,
37578
+ "rewards/quality_reward_func/mean": 0.800000011920929,
37579
+ "rewards/quality_reward_func/std": 0.0,
37580
+ "step": 14450
37581
  }
37582
  ],
37583
  "logging_steps": 10,
37584
  "max_steps": 14544,
37585
+ "num_input_tokens_seen": 20774668,
37586
  "num_train_epochs": 1,
37587
  "save_steps": 50,
37588
  "stateful_callbacks": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:834eb2c805e6cbc223d894fc25ac7ea495fdb7ae28416408c469f17593a544fc
3
  size 7057
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88b07759f83baf72651a5497140bc0ce24b06811a14c3e8fba0534eb4ac791ea
3
  size 7057