AymenELKani commited on
Commit
e189061
·
verified ·
1 Parent(s): 9109f16

Training in progress, step 3354, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:483e158910cfe03a442d52ce93638ab5bf380d13b770b525afb53777bfc5194e
3
  size 9449344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d805d224770203f87507cd409ca03c7d379294618bc6df66dcdb8923b5024d
3
  size 9449344
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ace7ce133128b7e6644f91e18acfbbcc2d1f1d1046c964b71b6e34cbcf92b3a2
3
  size 18957003
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cf95319f4c58dbfc6de8610b486ecae741191a50a79dd6828f045be1cd3b60e
3
  size 18957003
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21ff9a64fc3b709e3db36b3c7c3181c1a80df94bf916bb90382695562b4876a7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd6db25db4cf05e7919262d88ce7dfa06447501cda328a167e8debfb09b35275
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9083047562248399,
6
  "eval_steps": 500,
7
- "global_step": 3200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4808,6 +4808,231 @@
4808
  "rewards/margins": 25.77750015258789,
4809
  "rewards/rejected": -12.244199752807617,
4810
  "step": 3200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4811
  }
4812
  ],
4813
  "logging_steps": 10,
@@ -4822,7 +5047,7 @@
4822
  "should_evaluate": false,
4823
  "should_log": false,
4824
  "should_save": true,
4825
- "should_training_stop": false
4826
  },
4827
  "attributes": {}
4828
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
  "eval_steps": 500,
7
+ "global_step": 3354,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4808
  "rewards/margins": 25.77750015258789,
4809
  "rewards/rejected": -12.244199752807617,
4810
  "step": 3200
4811
+ },
4812
+ {
4813
+ "epoch": 1.914268674519159,
4814
+ "grad_norm": 3.401453625428985e-07,
4815
+ "learning_rate": 2.4456893515647507e-07,
4816
+ "logits/chosen": -55.8200569152832,
4817
+ "logits/rejected": -60.309791564941406,
4818
+ "logps/chosen": -3641.48876953125,
4819
+ "logps/rejected": -4029.09130859375,
4820
+ "loss": 0.0,
4821
+ "rewards/accuracies": 1.0,
4822
+ "rewards/chosen": 12.986165046691895,
4823
+ "rewards/margins": 28.325210571289062,
4824
+ "rewards/rejected": -15.339044570922852,
4825
+ "step": 3210
4826
+ },
4827
+ {
4828
+ "epoch": 1.9202325928134785,
4829
+ "grad_norm": 7.95227883543248e-09,
4830
+ "learning_rate": 2.1204464497393828e-07,
4831
+ "logits/chosen": -58.28154754638672,
4832
+ "logits/rejected": -63.397918701171875,
4833
+ "logps/chosen": -3738.774169921875,
4834
+ "logps/rejected": -4127.396484375,
4835
+ "loss": 0.0,
4836
+ "rewards/accuracies": 1.0,
4837
+ "rewards/chosen": 12.601153373718262,
4838
+ "rewards/margins": 32.9376106262207,
4839
+ "rewards/rejected": -20.33645248413086,
4840
+ "step": 3220
4841
+ },
4842
+ {
4843
+ "epoch": 1.9261965111077979,
4844
+ "grad_norm": 5.5718683142913505e-06,
4845
+ "learning_rate": 1.818308335493707e-07,
4846
+ "logits/chosen": -57.85089874267578,
4847
+ "logits/rejected": -62.124717712402344,
4848
+ "logps/chosen": -3759.594482421875,
4849
+ "logps/rejected": -4074.05859375,
4850
+ "loss": 0.0,
4851
+ "rewards/accuracies": 1.0,
4852
+ "rewards/chosen": 12.022012710571289,
4853
+ "rewards/margins": 27.768798828125,
4854
+ "rewards/rejected": -15.746786117553711,
4855
+ "step": 3230
4856
+ },
4857
+ {
4858
+ "epoch": 1.932160429402117,
4859
+ "grad_norm": 2.9493682518477726e-07,
4860
+ "learning_rate": 1.539303171041423e-07,
4861
+ "logits/chosen": -56.495628356933594,
4862
+ "logits/rejected": -59.803489685058594,
4863
+ "logps/chosen": -3937.475341796875,
4864
+ "logps/rejected": -4117.28955078125,
4865
+ "loss": 0.0181,
4866
+ "rewards/accuracies": 0.9750000238418579,
4867
+ "rewards/chosen": 13.402850151062012,
4868
+ "rewards/margins": 29.56199073791504,
4869
+ "rewards/rejected": -16.159137725830078,
4870
+ "step": 3240
4871
+ },
4872
+ {
4873
+ "epoch": 1.9381243476964367,
4874
+ "grad_norm": 2.0169439451933613e-09,
4875
+ "learning_rate": 1.2834569623800806e-07,
4876
+ "logits/chosen": -56.14719772338867,
4877
+ "logits/rejected": -61.06328201293945,
4878
+ "logps/chosen": -3900.921142578125,
4879
+ "logps/rejected": -3950.700439453125,
4880
+ "loss": 0.0,
4881
+ "rewards/accuracies": 1.0,
4882
+ "rewards/chosen": 11.294220924377441,
4883
+ "rewards/margins": 27.905467987060547,
4884
+ "rewards/rejected": -16.611248016357422,
4885
+ "step": 3250
4886
+ },
4887
+ {
4888
+ "epoch": 1.944088265990756,
4889
+ "grad_norm": 2.454358383197075e-10,
4890
+ "learning_rate": 1.0507935568670469e-07,
4891
+ "logits/chosen": -59.91279983520508,
4892
+ "logits/rejected": -64.65019226074219,
4893
+ "logps/chosen": -4057.24365234375,
4894
+ "logps/rejected": -4658.7998046875,
4895
+ "loss": 0.0,
4896
+ "rewards/accuracies": 1.0,
4897
+ "rewards/chosen": 13.831171035766602,
4898
+ "rewards/margins": 30.55777359008789,
4899
+ "rewards/rejected": -16.726600646972656,
4900
+ "step": 3260
4901
+ },
4902
+ {
4903
+ "epoch": 1.9500521842850753,
4904
+ "grad_norm": 6.98909570928663e-05,
4905
+ "learning_rate": 8.413346409967548e-08,
4906
+ "logits/chosen": -57.639564514160156,
4907
+ "logits/rejected": -60.486106872558594,
4908
+ "logps/chosen": -3805.05908203125,
4909
+ "logps/rejected": -3502.89208984375,
4910
+ "loss": 0.0,
4911
+ "rewards/accuracies": 1.0,
4912
+ "rewards/chosen": 13.168159484863281,
4913
+ "rewards/margins": 26.42257308959961,
4914
+ "rewards/rejected": -13.254412651062012,
4915
+ "step": 3270
4916
+ },
4917
+ {
4918
+ "epoch": 1.9560161025793947,
4919
+ "grad_norm": 9.799998590409587e-10,
4920
+ "learning_rate": 6.5509973837935e-08,
4921
+ "logits/chosen": -56.7869987487793,
4922
+ "logits/rejected": -63.3160285949707,
4923
+ "logps/chosen": -3626.40771484375,
4924
+ "logps/rejected": -4573.03466796875,
4925
+ "loss": 0.0,
4926
+ "rewards/accuracies": 1.0,
4927
+ "rewards/chosen": 14.337489128112793,
4928
+ "rewards/margins": 31.131816864013672,
4929
+ "rewards/rejected": -16.794330596923828,
4930
+ "step": 3280
4931
+ },
4932
+ {
4933
+ "epoch": 1.961980020873714,
4934
+ "grad_norm": 8.010190867935307e-06,
4935
+ "learning_rate": 4.921062079207839e-08,
4936
+ "logits/chosen": -58.120140075683594,
4937
+ "logits/rejected": -62.119712829589844,
4938
+ "logps/chosen": -3722.432861328125,
4939
+ "logps/rejected": -4000.43701171875,
4940
+ "loss": 0.0,
4941
+ "rewards/accuracies": 1.0,
4942
+ "rewards/chosen": 15.0454740524292,
4943
+ "rewards/margins": 28.300750732421875,
4944
+ "rewards/rejected": -13.255276679992676,
4945
+ "step": 3290
4946
+ },
4947
+ {
4948
+ "epoch": 1.9679439391680336,
4949
+ "grad_norm": 0.012528502382338047,
4950
+ "learning_rate": 3.5236924220494186e-08,
4951
+ "logits/chosen": -57.49330520629883,
4952
+ "logits/rejected": -60.7618522644043,
4953
+ "logps/chosen": -4048.438232421875,
4954
+ "logps/rejected": -4347.9658203125,
4955
+ "loss": 0.0,
4956
+ "rewards/accuracies": 1.0,
4957
+ "rewards/chosen": 13.4490966796875,
4958
+ "rewards/margins": 29.223918914794922,
4959
+ "rewards/rejected": -15.774820327758789,
4960
+ "step": 3300
4961
+ },
4962
+ {
4963
+ "epoch": 1.9739078574623528,
4964
+ "grad_norm": 5.5174933066837184e-08,
4965
+ "learning_rate": 2.3590186607733154e-08,
4966
+ "logits/chosen": -57.35243606567383,
4967
+ "logits/rejected": -63.4393310546875,
4968
+ "logps/chosen": -3737.04150390625,
4969
+ "logps/rejected": -4001.02978515625,
4970
+ "loss": 0.0,
4971
+ "rewards/accuracies": 1.0,
4972
+ "rewards/chosen": 14.624621391296387,
4973
+ "rewards/margins": 27.510913848876953,
4974
+ "rewards/rejected": -12.886293411254883,
4975
+ "step": 3310
4976
+ },
4977
+ {
4978
+ "epoch": 1.9798717757566722,
4979
+ "grad_norm": 2.4484758665010986e-10,
4980
+ "learning_rate": 1.4271493543133174e-08,
4981
+ "logits/chosen": -59.93109130859375,
4982
+ "logits/rejected": -63.264732360839844,
4983
+ "logps/chosen": -3958.082763671875,
4984
+ "logps/rejected": -4254.8232421875,
4985
+ "loss": 0.0,
4986
+ "rewards/accuracies": 1.0,
4987
+ "rewards/chosen": 13.18006420135498,
4988
+ "rewards/margins": 28.5799503326416,
4989
+ "rewards/rejected": -15.399887084960938,
4990
+ "step": 3320
4991
+ },
4992
+ {
4993
+ "epoch": 1.9858356940509916,
4994
+ "grad_norm": 4.756313121134781e-09,
4995
+ "learning_rate": 7.281713619605723e-09,
4996
+ "logits/chosen": -56.019737243652344,
4997
+ "logits/rejected": -59.69663619995117,
4998
+ "logps/chosen": -4050.05908203125,
4999
+ "logps/rejected": -4002.97802734375,
5000
+ "loss": 0.0,
5001
+ "rewards/accuracies": 1.0,
5002
+ "rewards/chosen": 13.66505241394043,
5003
+ "rewards/margins": 27.011245727539062,
5004
+ "rewards/rejected": -13.346193313598633,
5005
+ "step": 3330
5006
+ },
5007
+ {
5008
+ "epoch": 1.9917996123453108,
5009
+ "grad_norm": 1.108175638364628e-05,
5010
+ "learning_rate": 2.6214983526867686e-09,
5011
+ "logits/chosen": -56.985069274902344,
5012
+ "logits/rejected": -62.2716178894043,
5013
+ "logps/chosen": -3683.37255859375,
5014
+ "logps/rejected": -3870.92041015625,
5015
+ "loss": 0.0,
5016
+ "rewards/accuracies": 1.0,
5017
+ "rewards/chosen": 13.052160263061523,
5018
+ "rewards/margins": 27.678844451904297,
5019
+ "rewards/rejected": -14.626681327819824,
5020
+ "step": 3340
5021
+ },
5022
+ {
5023
+ "epoch": 1.9977635306396302,
5024
+ "grad_norm": 2.601581456929125e-07,
5025
+ "learning_rate": 2.912821198075566e-10,
5026
+ "logits/chosen": -56.11452102661133,
5027
+ "logits/rejected": -60.453453063964844,
5028
+ "logps/chosen": -3346.564453125,
5029
+ "logps/rejected": -3288.348388671875,
5030
+ "loss": 0.0,
5031
+ "rewards/accuracies": 1.0,
5032
+ "rewards/chosen": 12.876147270202637,
5033
+ "rewards/margins": 26.675273895263672,
5034
+ "rewards/rejected": -13.799127578735352,
5035
+ "step": 3350
5036
  }
5037
  ],
5038
  "logging_steps": 10,
 
5047
  "should_evaluate": false,
5048
  "should_log": false,
5049
  "should_save": true,
5050
+ "should_training_stop": true
5051
  },
5052
  "attributes": {}
5053
  }