Muhammed164 commited on
Commit
1485fb5
·
verified ·
1 Parent(s): 0b5fd91

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ca6efbed2efe6156b8c59735fb4dfca38ce2a5db135322b35d06c0d9ece4461
3
  size 204500912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d24df5eae34ec3fdf38b1f2b6c298e42ad8fb07bed978b6111473d86177209f2
3
  size 204500912
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3228e0321659c4502abc82fa473ef10d695f47b297d8c49e209a320ec0d862e0
3
  size 104062923
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:789ad1dcf0738c2b103827b3bf6b47de957c032a3a1cd442af3b12552cfbbd4f
3
  size 104062923
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:181c5f0270cf39930062ddfa3767a2481d0c360f120b11f8e25dbf533a1cdaba
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d6d8fafcd1ee268414be5acf0366296af5b03d60871978712eac1979cb42d65
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:568b50080100a449a915d52bb8c381d309ce62e67e1fc337145dd1ba27bc34ba
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d72352d4b2a5c34b9f8314745f4274a6ceaf25c7fd28797dfe03562830d2faba
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.5928880026586905,
6
  "eval_steps": 500,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -908,6 +908,306 @@
908
  "rewards/margins": 149.69699096679688,
909
  "rewards/rejected": -148.73023986816406,
910
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
911
  }
912
  ],
913
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.122299767364573,
6
  "eval_steps": 500,
7
+ "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
908
  "rewards/margins": 149.69699096679688,
909
  "rewards/rejected": -148.73023986816406,
910
  "step": 600
911
+ },
912
+ {
913
+ "epoch": 1.6194749086075109,
914
+ "grad_norm": 66.45056915283203,
915
+ "learning_rate": 4.344444444444444e-07,
916
+ "logits/chosen": 7.270118713378906,
917
+ "logits/rejected": 7.5960588455200195,
918
+ "logps/chosen": -503.7193298339844,
919
+ "logps/rejected": -1094.6021728515625,
920
+ "loss": 1.5946972846984864,
921
+ "rewards/accuracies": 0.9437500238418579,
922
+ "rewards/chosen": -3.655120372772217,
923
+ "rewards/margins": 125.68217468261719,
924
+ "rewards/rejected": -129.33731079101562,
925
+ "step": 610
926
+ },
927
+ {
928
+ "epoch": 1.646061814556331,
929
+ "grad_norm": 280.2427978515625,
930
+ "learning_rate": 4.2333333333333334e-07,
931
+ "logits/chosen": 7.251768589019775,
932
+ "logits/rejected": 7.520864009857178,
933
+ "logps/chosen": -517.1514892578125,
934
+ "logps/rejected": -1172.587158203125,
935
+ "loss": 2.4477691650390625,
936
+ "rewards/accuracies": 0.925000011920929,
937
+ "rewards/chosen": 0.9268826246261597,
938
+ "rewards/margins": 142.18368530273438,
939
+ "rewards/rejected": -141.2567901611328,
940
+ "step": 620
941
+ },
942
+ {
943
+ "epoch": 1.6726487205051512,
944
+ "grad_norm": 1.0393255949020386,
945
+ "learning_rate": 4.122222222222222e-07,
946
+ "logits/chosen": 7.011075019836426,
947
+ "logits/rejected": 7.46621561050415,
948
+ "logps/chosen": -447.34124755859375,
949
+ "logps/rejected": -1143.3458251953125,
950
+ "loss": 1.0738434791564941,
951
+ "rewards/accuracies": 0.9437500238418579,
952
+ "rewards/chosen": 1.6229969263076782,
953
+ "rewards/margins": 142.9796600341797,
954
+ "rewards/rejected": -141.35665893554688,
955
+ "step": 630
956
+ },
957
+ {
958
+ "epoch": 1.6992356264539714,
959
+ "grad_norm": 119.75847625732422,
960
+ "learning_rate": 4.0111111111111106e-07,
961
+ "logits/chosen": 6.9999237060546875,
962
+ "logits/rejected": 7.578449249267578,
963
+ "logps/chosen": -469.19012451171875,
964
+ "logps/rejected": -1200.680419921875,
965
+ "loss": 0.9937694549560547,
966
+ "rewards/accuracies": 0.949999988079071,
967
+ "rewards/chosen": 1.2244775295257568,
968
+ "rewards/margins": 147.14993286132812,
969
+ "rewards/rejected": -145.92544555664062,
970
+ "step": 640
971
+ },
972
+ {
973
+ "epoch": 1.7258225324027916,
974
+ "grad_norm": 27.75540542602539,
975
+ "learning_rate": 3.8999999999999997e-07,
976
+ "logits/chosen": 6.937554359436035,
977
+ "logits/rejected": 7.374237060546875,
978
+ "logps/chosen": -463.05438232421875,
979
+ "logps/rejected": -1168.0521240234375,
980
+ "loss": 0.39649856090545654,
981
+ "rewards/accuracies": 0.981249988079071,
982
+ "rewards/chosen": 3.329749345779419,
983
+ "rewards/margins": 145.75735473632812,
984
+ "rewards/rejected": -142.4276123046875,
985
+ "step": 650
986
+ },
987
+ {
988
+ "epoch": 1.7524094383516118,
989
+ "grad_norm": 31.218721389770508,
990
+ "learning_rate": 3.788888888888889e-07,
991
+ "logits/chosen": 7.072316646575928,
992
+ "logits/rejected": 7.550895690917969,
993
+ "logps/chosen": -483.40234375,
994
+ "logps/rejected": -1184.9073486328125,
995
+ "loss": 0.25033409595489503,
996
+ "rewards/accuracies": 0.9624999761581421,
997
+ "rewards/chosen": -2.29609751701355,
998
+ "rewards/margins": 147.64785766601562,
999
+ "rewards/rejected": -149.94395446777344,
1000
+ "step": 660
1001
+ },
1002
+ {
1003
+ "epoch": 1.778996344300432,
1004
+ "grad_norm": 6.414053359549143e-07,
1005
+ "learning_rate": 3.6777777777777774e-07,
1006
+ "logits/chosen": 7.303959846496582,
1007
+ "logits/rejected": 7.623525142669678,
1008
+ "logps/chosen": -508.6453552246094,
1009
+ "logps/rejected": -1223.940673828125,
1010
+ "loss": 0.31205618381500244,
1011
+ "rewards/accuracies": 0.9624999761581421,
1012
+ "rewards/chosen": -2.5321922302246094,
1013
+ "rewards/margins": 150.60842895507812,
1014
+ "rewards/rejected": -153.14060974121094,
1015
+ "step": 670
1016
+ },
1017
+ {
1018
+ "epoch": 1.8055832502492524,
1019
+ "grad_norm": 1.0985974499902462e-12,
1020
+ "learning_rate": 3.5666666666666666e-07,
1021
+ "logits/chosen": 7.344334602355957,
1022
+ "logits/rejected": 7.8254547119140625,
1023
+ "logps/chosen": -532.2833251953125,
1024
+ "logps/rejected": -1228.1844482421875,
1025
+ "loss": 1.0204992294311523,
1026
+ "rewards/accuracies": 0.956250011920929,
1027
+ "rewards/chosen": 3.5795791149139404,
1028
+ "rewards/margins": 159.29647827148438,
1029
+ "rewards/rejected": -155.71688842773438,
1030
+ "step": 680
1031
+ },
1032
+ {
1033
+ "epoch": 1.8321701561980723,
1034
+ "grad_norm": 66.1689453125,
1035
+ "learning_rate": 3.4555555555555557e-07,
1036
+ "logits/chosen": 7.0121636390686035,
1037
+ "logits/rejected": 7.367627143859863,
1038
+ "logps/chosen": -453.792236328125,
1039
+ "logps/rejected": -1141.1865234375,
1040
+ "loss": 0.38547022342681886,
1041
+ "rewards/accuracies": 0.9437500238418579,
1042
+ "rewards/chosen": -0.3728172183036804,
1043
+ "rewards/margins": 139.9238739013672,
1044
+ "rewards/rejected": -140.29669189453125,
1045
+ "step": 690
1046
+ },
1047
+ {
1048
+ "epoch": 1.8587570621468927,
1049
+ "grad_norm": 1.7826409438004044e-20,
1050
+ "learning_rate": 3.3444444444444443e-07,
1051
+ "logits/chosen": 6.914497375488281,
1052
+ "logits/rejected": 7.344313144683838,
1053
+ "logps/chosen": -456.8873596191406,
1054
+ "logps/rejected": -1159.482666015625,
1055
+ "loss": 0.2864746332168579,
1056
+ "rewards/accuracies": 0.9750000238418579,
1057
+ "rewards/chosen": -0.41872739791870117,
1058
+ "rewards/margins": 139.0840606689453,
1059
+ "rewards/rejected": -139.50277709960938,
1060
+ "step": 700
1061
+ },
1062
+ {
1063
+ "epoch": 1.8853439680957127,
1064
+ "grad_norm": 0.6577161550521851,
1065
+ "learning_rate": 3.233333333333333e-07,
1066
+ "logits/chosen": 7.24100399017334,
1067
+ "logits/rejected": 7.729971408843994,
1068
+ "logps/chosen": -453.352783203125,
1069
+ "logps/rejected": -1139.1920166015625,
1070
+ "loss": 0.40453357696533204,
1071
+ "rewards/accuracies": 0.949999988079071,
1072
+ "rewards/chosen": -0.5374351739883423,
1073
+ "rewards/margins": 134.17910766601562,
1074
+ "rewards/rejected": -134.71653747558594,
1075
+ "step": 710
1076
+ },
1077
+ {
1078
+ "epoch": 1.911930874044533,
1079
+ "grad_norm": 58.065155029296875,
1080
+ "learning_rate": 3.122222222222222e-07,
1081
+ "logits/chosen": 7.2779541015625,
1082
+ "logits/rejected": 7.5862250328063965,
1083
+ "logps/chosen": -502.33489990234375,
1084
+ "logps/rejected": -1174.3145751953125,
1085
+ "loss": 0.2982128143310547,
1086
+ "rewards/accuracies": 0.949999988079071,
1087
+ "rewards/chosen": 0.528018593788147,
1088
+ "rewards/margins": 134.43031311035156,
1089
+ "rewards/rejected": -133.9022979736328,
1090
+ "step": 720
1091
+ },
1092
+ {
1093
+ "epoch": 1.9385177799933533,
1094
+ "grad_norm": 3.057793140411377,
1095
+ "learning_rate": 3.011111111111111e-07,
1096
+ "logits/chosen": 7.26782751083374,
1097
+ "logits/rejected": 7.741539001464844,
1098
+ "logps/chosen": -496.8504943847656,
1099
+ "logps/rejected": -1235.9169921875,
1100
+ "loss": 0.8299455642700195,
1101
+ "rewards/accuracies": 0.9750000238418579,
1102
+ "rewards/chosen": -0.2519731819629669,
1103
+ "rewards/margins": 152.95582580566406,
1104
+ "rewards/rejected": -153.20779418945312,
1105
+ "step": 730
1106
+ },
1107
+ {
1108
+ "epoch": 1.9651046859421735,
1109
+ "grad_norm": 185.18174743652344,
1110
+ "learning_rate": 2.9e-07,
1111
+ "logits/chosen": 7.201784610748291,
1112
+ "logits/rejected": 7.585198402404785,
1113
+ "logps/chosen": -474.599853515625,
1114
+ "logps/rejected": -1184.7464599609375,
1115
+ "loss": 1.5328912734985352,
1116
+ "rewards/accuracies": 0.9312499761581421,
1117
+ "rewards/chosen": -3.7899742126464844,
1118
+ "rewards/margins": 142.3949432373047,
1119
+ "rewards/rejected": -146.1849365234375,
1120
+ "step": 740
1121
+ },
1122
+ {
1123
+ "epoch": 1.9916915918909937,
1124
+ "grad_norm": 173.19436645507812,
1125
+ "learning_rate": 2.788888888888889e-07,
1126
+ "logits/chosen": 7.347403526306152,
1127
+ "logits/rejected": 7.8731865882873535,
1128
+ "logps/chosen": -484.5741271972656,
1129
+ "logps/rejected": -1241.9910888671875,
1130
+ "loss": 0.9414227485656739,
1131
+ "rewards/accuracies": 0.9624999761581421,
1132
+ "rewards/chosen": -0.26702070236206055,
1133
+ "rewards/margins": 149.2693328857422,
1134
+ "rewards/rejected": -149.53634643554688,
1135
+ "step": 750
1136
+ },
1137
+ {
1138
+ "epoch": 2.015952143569292,
1139
+ "grad_norm": 277.53521728515625,
1140
+ "learning_rate": 2.6777777777777775e-07,
1141
+ "logits/chosen": 7.591332912445068,
1142
+ "logits/rejected": 7.759430408477783,
1143
+ "logps/chosen": -578.1312866210938,
1144
+ "logps/rejected": -1104.3414306640625,
1145
+ "loss": 0.3479891538619995,
1146
+ "rewards/accuracies": 0.9726027250289917,
1147
+ "rewards/chosen": -3.462564468383789,
1148
+ "rewards/margins": 117.48858642578125,
1149
+ "rewards/rejected": -120.95115661621094,
1150
+ "step": 760
1151
+ },
1152
+ {
1153
+ "epoch": 2.0425390495181124,
1154
+ "grad_norm": 81.4224624633789,
1155
+ "learning_rate": 2.5666666666666666e-07,
1156
+ "logits/chosen": 6.820937156677246,
1157
+ "logits/rejected": 7.438076972961426,
1158
+ "logps/chosen": -445.38592529296875,
1159
+ "logps/rejected": -1255.2547607421875,
1160
+ "loss": 0.7632743835449218,
1161
+ "rewards/accuracies": 0.9624999761581421,
1162
+ "rewards/chosen": -1.3056232929229736,
1163
+ "rewards/margins": 162.7215576171875,
1164
+ "rewards/rejected": -164.02719116210938,
1165
+ "step": 770
1166
+ },
1167
+ {
1168
+ "epoch": 2.0691259554669323,
1169
+ "grad_norm": 80.839111328125,
1170
+ "learning_rate": 2.455555555555555e-07,
1171
+ "logits/chosen": 7.089077949523926,
1172
+ "logits/rejected": 7.562623500823975,
1173
+ "logps/chosen": -479.9771423339844,
1174
+ "logps/rejected": -1212.7470703125,
1175
+ "loss": 0.37755522727966306,
1176
+ "rewards/accuracies": 0.96875,
1177
+ "rewards/chosen": -0.4610620439052582,
1178
+ "rewards/margins": 147.49462890625,
1179
+ "rewards/rejected": -147.9556884765625,
1180
+ "step": 780
1181
+ },
1182
+ {
1183
+ "epoch": 2.0957128614157527,
1184
+ "grad_norm": 102.21258544921875,
1185
+ "learning_rate": 2.3444444444444444e-07,
1186
+ "logits/chosen": 7.348860740661621,
1187
+ "logits/rejected": 7.812272548675537,
1188
+ "logps/chosen": -545.9133911132812,
1189
+ "logps/rejected": -1209.3829345703125,
1190
+ "loss": 0.8432134628295899,
1191
+ "rewards/accuracies": 0.9437500238418579,
1192
+ "rewards/chosen": -4.405800819396973,
1193
+ "rewards/margins": 136.8076629638672,
1194
+ "rewards/rejected": -141.2134552001953,
1195
+ "step": 790
1196
+ },
1197
+ {
1198
+ "epoch": 2.122299767364573,
1199
+ "grad_norm": 7.490438461303711,
1200
+ "learning_rate": 2.2333333333333332e-07,
1201
+ "logits/chosen": 6.928166389465332,
1202
+ "logits/rejected": 7.589695930480957,
1203
+ "logps/chosen": -411.7215881347656,
1204
+ "logps/rejected": -1234.5572509765625,
1205
+ "loss": 0.08373026251792907,
1206
+ "rewards/accuracies": 0.9750000238418579,
1207
+ "rewards/chosen": -1.1743253469467163,
1208
+ "rewards/margins": 159.31773376464844,
1209
+ "rewards/rejected": -160.49208068847656,
1210
+ "step": 800
1211
  }
1212
  ],
1213
  "logging_steps": 10,