amos1088 commited on
Commit
4e5b3fe
·
verified ·
1 Parent(s): fc1cf89

Training in progress, step 6700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:944808fe783edfd9b3c65bab6e9419c6453c9834fc1973f7c62ab61a3de9aae9
3
  size 35668592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cad0a278102bf9770bc41be47d29e569bd4f2693c87eb7700d1c115f88c29cf
3
  size 35668592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db9cdb407f41fd1b11ccd9ecdfd0c6e94912dd1e472526a67c2e3e236dade180
3
  size 18257163
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b424d2e19ea94a34391764dcea1aed8baff5e017c96bfe359256150da0292cd4
3
  size 18257163
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4229669a62a33714a49d253749de55184a0e99902f7bb8d86266ba2b372b0ec4
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b8dde9f5e22222ecfd67853fa8d0b9ba72a03a3881426f7471f24522b0e590e
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a62fbf57777c52a276ea0715fd511e46f7d6fae15daac2dd598d49f57d5374b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7b581468d312885ca28af1cc20f9f1ad44a8af1db5ffa003b6beff388c6ef3
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.572585929588372,
6
  "eval_steps": 500,
7
- "global_step": 6600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -9908,6 +9908,156 @@
9908
  "rewards/margins": 17.756301879882812,
9909
  "rewards/rejected": -16.33933448791504,
9910
  "step": 6600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9911
  }
9912
  ],
9913
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.5964138917019122,
6
  "eval_steps": 500,
7
+ "global_step": 6700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
9908
  "rewards/margins": 17.756301879882812,
9909
  "rewards/rejected": -16.33933448791504,
9910
  "step": 6600
9911
+ },
9912
+ {
9913
+ "epoch": 1.574968725799726,
9914
+ "grad_norm": 1.6681083252478857e-06,
9915
+ "learning_rate": 2.7190977122535254e-05,
9916
+ "logits/chosen": 9.289986610412598,
9917
+ "logits/rejected": 8.851910591125488,
9918
+ "logps/chosen": -3.9985766410827637,
9919
+ "logps/rejected": -181.04971313476562,
9920
+ "loss": 0.0,
9921
+ "rewards/accuracies": 1.0,
9922
+ "rewards/chosen": 1.4206149578094482,
9923
+ "rewards/margins": 17.674718856811523,
9924
+ "rewards/rejected": -16.254104614257812,
9925
+ "step": 6610
9926
+ },
9927
+ {
9928
+ "epoch": 1.5773515220110799,
9929
+ "grad_norm": 8.596234692959115e-06,
9930
+ "learning_rate": 2.7121921381742445e-05,
9931
+ "logits/chosen": 9.695623397827148,
9932
+ "logits/rejected": 9.04432487487793,
9933
+ "logps/chosen": -3.8987979888916016,
9934
+ "logps/rejected": -181.49603271484375,
9935
+ "loss": 0.0,
9936
+ "rewards/accuracies": 1.0,
9937
+ "rewards/chosen": 1.3905894756317139,
9938
+ "rewards/margins": 17.71683120727539,
9939
+ "rewards/rejected": -16.326242446899414,
9940
+ "step": 6620
9941
+ },
9942
+ {
9943
+ "epoch": 1.579734318222434,
9944
+ "grad_norm": 9.528152986604255e-07,
9945
+ "learning_rate": 2.7052849329510616e-05,
9946
+ "logits/chosen": 9.253401756286621,
9947
+ "logits/rejected": 8.395661354064941,
9948
+ "logps/chosen": -4.091588497161865,
9949
+ "logps/rejected": -182.94107055664062,
9950
+ "loss": 0.0,
9951
+ "rewards/accuracies": 1.0,
9952
+ "rewards/chosen": 1.3835569620132446,
9953
+ "rewards/margins": 17.877513885498047,
9954
+ "rewards/rejected": -16.493959426879883,
9955
+ "step": 6630
9956
+ },
9957
+ {
9958
+ "epoch": 1.582117114433788,
9959
+ "grad_norm": 3.34082760673482e-05,
9960
+ "learning_rate": 2.698376149680408e-05,
9961
+ "logits/chosen": 9.560027122497559,
9962
+ "logits/rejected": 9.066927909851074,
9963
+ "logps/chosen": -3.810729503631592,
9964
+ "logps/rejected": -178.86024475097656,
9965
+ "loss": 0.0,
9966
+ "rewards/accuracies": 1.0,
9967
+ "rewards/chosen": 1.3871489763259888,
9968
+ "rewards/margins": 17.55055809020996,
9969
+ "rewards/rejected": -16.163410186767578,
9970
+ "step": 6640
9971
+ },
9972
+ {
9973
+ "epoch": 1.584499910645142,
9974
+ "grad_norm": 2.7486205453897128e-06,
9975
+ "learning_rate": 2.6914658414708483e-05,
9976
+ "logits/chosen": 9.511039733886719,
9977
+ "logits/rejected": 8.858368873596191,
9978
+ "logps/chosen": -4.18659782409668,
9979
+ "logps/rejected": -179.6385955810547,
9980
+ "loss": 0.0,
9981
+ "rewards/accuracies": 1.0,
9982
+ "rewards/chosen": 1.388300895690918,
9983
+ "rewards/margins": 17.517656326293945,
9984
+ "rewards/rejected": -16.12935447692871,
9985
+ "step": 6650
9986
+ },
9987
+ {
9988
+ "epoch": 1.5868827068564961,
9989
+ "grad_norm": 2.875137852242915e-06,
9990
+ "learning_rate": 2.684554061442669e-05,
9991
+ "logits/chosen": 8.98546314239502,
9992
+ "logits/rejected": 8.42377758026123,
9993
+ "logps/chosen": -4.1340131759643555,
9994
+ "logps/rejected": -183.18992614746094,
9995
+ "loss": 0.0,
9996
+ "rewards/accuracies": 1.0,
9997
+ "rewards/chosen": 1.3774447441101074,
9998
+ "rewards/margins": 17.857067108154297,
9999
+ "rewards/rejected": -16.47962188720703,
10000
+ "step": 6660
10001
+ },
10002
+ {
10003
+ "epoch": 1.5892655030678502,
10004
+ "grad_norm": 8.891835818758409e-07,
10005
+ "learning_rate": 2.6776408627274703e-05,
10006
+ "logits/chosen": 8.80525016784668,
10007
+ "logits/rejected": 8.30695629119873,
10008
+ "logps/chosen": -4.148054599761963,
10009
+ "logps/rejected": -184.2656707763672,
10010
+ "loss": 0.0,
10011
+ "rewards/accuracies": 1.0,
10012
+ "rewards/chosen": 1.4196046590805054,
10013
+ "rewards/margins": 17.907718658447266,
10014
+ "rewards/rejected": -16.48811149597168,
10015
+ "step": 6670
10016
+ },
10017
+ {
10018
+ "epoch": 1.5916482992792043,
10019
+ "grad_norm": 5.344471446733223e-06,
10020
+ "learning_rate": 2.6707262984677573e-05,
10021
+ "logits/chosen": 9.732979774475098,
10022
+ "logits/rejected": 9.241010665893555,
10023
+ "logps/chosen": -3.8970329761505127,
10024
+ "logps/rejected": -179.18814086914062,
10025
+ "loss": 0.0,
10026
+ "rewards/accuracies": 1.0,
10027
+ "rewards/chosen": 1.3821308612823486,
10028
+ "rewards/margins": 17.509174346923828,
10029
+ "rewards/rejected": -16.12704086303711,
10030
+ "step": 6680
10031
+ },
10032
+ {
10033
+ "epoch": 1.5940310954905583,
10034
+ "grad_norm": 1.1200794688193128e-05,
10035
+ "learning_rate": 2.6638104218165332e-05,
10036
+ "logits/chosen": 8.704614639282227,
10037
+ "logits/rejected": 8.213298797607422,
10038
+ "logps/chosen": -4.218519687652588,
10039
+ "logps/rejected": -181.6812744140625,
10040
+ "loss": 0.0,
10041
+ "rewards/accuracies": 1.0,
10042
+ "rewards/chosen": 1.414750576019287,
10043
+ "rewards/margins": 17.60392189025879,
10044
+ "rewards/rejected": -16.189170837402344,
10045
+ "step": 6690
10046
+ },
10047
+ {
10048
+ "epoch": 1.5964138917019122,
10049
+ "grad_norm": 3.7125805647519883e-06,
10050
+ "learning_rate": 2.6568932859368905e-05,
10051
+ "logits/chosen": 9.12821102142334,
10052
+ "logits/rejected": 8.672597885131836,
10053
+ "logps/chosen": -4.364258289337158,
10054
+ "logps/rejected": -182.12783813476562,
10055
+ "loss": 0.0,
10056
+ "rewards/accuracies": 1.0,
10057
+ "rewards/chosen": 1.3750208616256714,
10058
+ "rewards/margins": 17.705333709716797,
10059
+ "rewards/rejected": -16.330312728881836,
10060
+ "step": 6700
10061
  }
10062
  ],
10063
  "logging_steps": 10,