yangjunxiao2021's picture
Upload 21 files
d86e382 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.9402985074626864,
"eval_steps": 500,
"global_step": 132,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 1.230027198791504,
"Normal prob": -1.230027198791504,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0,
"step": 0
},
{
"DPO Loss": 0.34657145664029176,
"Negative Geometric Mean": -0.9493766827771892,
"Negative prob": -0.9493766827771892,
"Normal Loss": 1.0090343952178955,
"Normal prob": -1.0090343952178955,
"Positive Loss": 0.37890636920928955,
"Positive prob": -0.37890636920928955,
"epoch": 0,
"step": 0
},
{
"epoch": 0.029850746268656716,
"grad_norm": 52.804205501572916,
"learning_rate": 2.9772727272727274e-06,
"loss": 1.4155,
"step": 1
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 1.092234492301941,
"Normal prob": -1.092234492301941,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.029850746268656716,
"step": 1
},
{
"DPO Loss": 0.3577462635421841,
"Negative Geometric Mean": -0.3783103787169165,
"Negative prob": -0.3783103787169165,
"Normal Loss": 0.8107529282569885,
"Normal prob": -0.8107529282569885,
"Positive Loss": 0.07748764753341675,
"Positive prob": -0.07748764753341675,
"epoch": 0.029850746268656716,
"step": 1
},
{
"epoch": 0.05970149253731343,
"grad_norm": 22.382695244020965,
"learning_rate": 2.9545454545454547e-06,
"loss": 1.1517,
"step": 2
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.8582519292831421,
"Normal prob": -0.8582519292831421,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.05970149253731343,
"step": 2
},
{
"DPO Loss": 0.3623313108408915,
"Negative Geometric Mean": -1.0158045398410653,
"Negative prob": -1.0158045398410653,
"Normal Loss": 0.7299004793167114,
"Normal prob": -0.7299004793167114,
"Positive Loss": 0.07864368706941605,
"Positive prob": -0.07864368706941605,
"epoch": 0.05970149253731343,
"step": 2
},
{
"epoch": 0.08955223880597014,
"grad_norm": 12.355752020889257,
"learning_rate": 2.931818181818182e-06,
"loss": 1.0958,
"step": 3
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9027985334396362,
"Normal prob": -0.9027985334396362,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.08955223880597014,
"step": 3
},
{
"DPO Loss": 0.37659024799225,
"Negative Geometric Mean": -0.6431755968319472,
"Negative prob": -0.6431755968319472,
"Normal Loss": 0.8552955389022827,
"Normal prob": -0.8552955389022827,
"Positive Loss": 0.0777173787355423,
"Positive prob": -0.0777173787355423,
"epoch": 0.08955223880597014,
"step": 3
},
{
"epoch": 0.11940298507462686,
"grad_norm": 13.671836146708186,
"learning_rate": 2.9090909090909093e-06,
"loss": 1.065,
"step": 4
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9766985177993774,
"Normal prob": -0.9766985177993774,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.11940298507462686,
"step": 4
},
{
"DPO Loss": 0.35204142219855233,
"Negative Geometric Mean": -0.8325932820638021,
"Negative prob": -0.8325932820638021,
"Normal Loss": 1.0553878545761108,
"Normal prob": -1.0553878545761108,
"Positive Loss": 0.09103336185216904,
"Positive prob": -0.09103336185216904,
"epoch": 0.11940298507462686,
"step": 4
},
{
"epoch": 0.14925373134328357,
"grad_norm": 9.500922911135007,
"learning_rate": 2.8863636363636366e-06,
"loss": 1.2053,
"step": 5
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9637267589569092,
"Normal prob": -0.9637267589569092,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.14925373134328357,
"step": 5
},
{
"DPO Loss": 0.3369350566486365,
"Negative Geometric Mean": -0.8089229916566171,
"Negative prob": -0.8089229916566171,
"Normal Loss": 0.809516429901123,
"Normal prob": -0.809516429901123,
"Positive Loss": 0.08676248043775558,
"Positive prob": -0.08676248043775558,
"epoch": 0.14925373134328357,
"step": 5
},
{
"epoch": 0.1791044776119403,
"grad_norm": 9.50990015460849,
"learning_rate": 2.863636363636364e-06,
"loss": 1.1133,
"step": 6
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.832047700881958,
"Normal prob": -0.832047700881958,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.1791044776119403,
"step": 6
},
{
"DPO Loss": 0.3370728580941308,
"Negative Geometric Mean": -0.8685310105770683,
"Negative prob": -0.8685310105770683,
"Normal Loss": 0.8815029263496399,
"Normal prob": -0.8815029263496399,
"Positive Loss": 0.07432825863361359,
"Positive prob": -0.07432825863361359,
"epoch": 0.1791044776119403,
"step": 6
},
{
"epoch": 0.208955223880597,
"grad_norm": 8.039098978654936,
"learning_rate": 2.840909090909091e-06,
"loss": 1.2064,
"step": 7
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9996135830879211,
"Normal prob": -0.9996135830879211,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.208955223880597,
"step": 7
},
{
"DPO Loss": 0.35429090257439527,
"Negative Geometric Mean": -0.610354295417444,
"Negative prob": -0.610354295417444,
"Normal Loss": 0.716498851776123,
"Normal prob": -0.716498851776123,
"Positive Loss": 0.09210512042045593,
"Positive prob": -0.09210512042045593,
"epoch": 0.208955223880597,
"step": 7
},
{
"epoch": 0.23880597014925373,
"grad_norm": 9.566983237882555,
"learning_rate": 2.8181818181818185e-06,
"loss": 1.067,
"step": 8
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.8073037266731262,
"Normal prob": -0.8073037266731262,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.23880597014925373,
"step": 8
},
{
"DPO Loss": 0.31094894833121156,
"Negative Geometric Mean": -0.8462985688606195,
"Negative prob": -0.8462985688606195,
"Normal Loss": 0.5099501013755798,
"Normal prob": -0.5099501013755798,
"Positive Loss": 0.09047375619411469,
"Positive prob": -0.09047375619411469,
"epoch": 0.23880597014925373,
"step": 8
},
{
"epoch": 0.26865671641791045,
"grad_norm": 7.87857077645135,
"learning_rate": 2.7954545454545453e-06,
"loss": 1.0028,
"step": 9
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.850799024105072,
"Normal prob": -0.850799024105072,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.26865671641791045,
"step": 9
},
{
"DPO Loss": 0.30296717585093275,
"Negative Geometric Mean": -0.9634897133738725,
"Negative prob": -0.9634897133738725,
"Normal Loss": 1.2854121923446655,
"Normal prob": -1.2854121923446655,
"Positive Loss": 0.07252619415521622,
"Positive prob": -0.07252619415521622,
"epoch": 0.26865671641791045,
"step": 9
},
{
"epoch": 0.29850746268656714,
"grad_norm": 9.469089262773355,
"learning_rate": 2.7727272727272726e-06,
"loss": 1.1173,
"step": 10
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.8608676195144653,
"Normal prob": -0.8608676195144653,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.29850746268656714,
"step": 10
},
{
"DPO Loss": 0.26851745737184274,
"Negative Geometric Mean": -1.0714603639998526,
"Negative prob": -1.0714603639998526,
"Normal Loss": 0.7984556555747986,
"Normal prob": -0.7984556555747986,
"Positive Loss": 0.05822120979428291,
"Positive prob": -0.05822120979428291,
"epoch": 0.29850746268656714,
"step": 10
},
{
"epoch": 0.3283582089552239,
"grad_norm": 7.287903263195378,
"learning_rate": 2.75e-06,
"loss": 1.013,
"step": 11
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.8310959339141846,
"Normal prob": -0.8310959339141846,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.3283582089552239,
"step": 11
},
{
"DPO Loss": 0.26161430097353844,
"Negative Geometric Mean": -1.1722790406482055,
"Negative prob": -1.1722790406482055,
"Normal Loss": 0.8640764355659485,
"Normal prob": -0.8640764355659485,
"Positive Loss": 0.07576876878738403,
"Positive prob": -0.07576876878738403,
"epoch": 0.3283582089552239,
"step": 11
},
{
"epoch": 0.3582089552238806,
"grad_norm": 12.679434481744659,
"learning_rate": 2.7272727272727272e-06,
"loss": 1.1083,
"step": 12
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9239760637283325,
"Normal prob": -0.9239760637283325,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.3582089552238806,
"step": 12
},
{
"DPO Loss": 0.3063912470938362,
"Negative Geometric Mean": -0.7924091110304511,
"Negative prob": -0.7924091110304511,
"Normal Loss": 0.8149660229682922,
"Normal prob": -0.8149660229682922,
"Positive Loss": 0.06200961023569107,
"Positive prob": -0.06200961023569107,
"epoch": 0.3582089552238806,
"step": 12
},
{
"epoch": 0.3880597014925373,
"grad_norm": 8.690620794915942,
"learning_rate": 2.7045454545454545e-06,
"loss": 1.0424,
"step": 13
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.7188159227371216,
"Normal prob": -0.7188159227371216,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.3880597014925373,
"step": 13
},
{
"DPO Loss": 0.2937756023461591,
"Negative Geometric Mean": -0.4400056991696919,
"Negative prob": -0.4400056991696919,
"Normal Loss": 0.8943111300468445,
"Normal prob": -0.8943111300468445,
"Positive Loss": 0.09851977974176407,
"Positive prob": -0.09851977974176407,
"epoch": 0.3880597014925373,
"step": 13
},
{
"epoch": 0.417910447761194,
"grad_norm": 8.116923440818201,
"learning_rate": 2.681818181818182e-06,
"loss": 1.019,
"step": 14
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.7557149529457092,
"Normal prob": -0.7557149529457092,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.417910447761194,
"step": 14
},
{
"DPO Loss": 0.3186842313017755,
"Negative Geometric Mean": -1.1004963759258488,
"Negative prob": -1.1004963759258488,
"Normal Loss": 0.8593255877494812,
"Normal prob": -0.8593255877494812,
"Positive Loss": 0.04907805845141411,
"Positive prob": -0.04907805845141411,
"epoch": 0.417910447761194,
"step": 14
},
{
"epoch": 0.44776119402985076,
"grad_norm": 8.007172349603502,
"learning_rate": 2.659090909090909e-06,
"loss": 1.0353,
"step": 15
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.897316038608551,
"Normal prob": -0.897316038608551,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.44776119402985076,
"step": 15
},
{
"DPO Loss": 0.1223304973432013,
"Negative Geometric Mean": -1.9276966850878456,
"Negative prob": -1.9276966850878456,
"Normal Loss": 0.6334409117698669,
"Normal prob": -0.6334409117698669,
"Positive Loss": 0.05224687606096268,
"Positive prob": -0.05224687606096268,
"epoch": 0.44776119402985076,
"step": 15
},
{
"epoch": 0.47761194029850745,
"grad_norm": 8.24043100452026,
"learning_rate": 2.6363636363636364e-06,
"loss": 0.9134,
"step": 16
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9187679886817932,
"Normal prob": -0.9187679886817932,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.47761194029850745,
"step": 16
},
{
"DPO Loss": 0.055564984135369914,
"Negative Geometric Mean": -3.0230499623024127,
"Negative prob": -3.0230499623024127,
"Normal Loss": 1.1529364585876465,
"Normal prob": -1.1529364585876465,
"Positive Loss": 0.08235401660203934,
"Positive prob": -0.08235401660203934,
"epoch": 0.47761194029850745,
"step": 16
},
{
"epoch": 0.5074626865671642,
"grad_norm": 8.688253054467179,
"learning_rate": 2.6136363636363637e-06,
"loss": 1.0468,
"step": 17
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.7582242488861084,
"Normal prob": -0.7582242488861084,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.5074626865671642,
"step": 17
},
{
"DPO Loss": 0.12820626634228,
"Negative Geometric Mean": -2.269475635729338,
"Negative prob": -2.269475635729338,
"Normal Loss": 0.3805200755596161,
"Normal prob": -0.3805200755596161,
"Positive Loss": 0.07097562402486801,
"Positive prob": -0.07097562402486801,
"epoch": 0.5074626865671642,
"step": 17
},
{
"epoch": 0.5373134328358209,
"grad_norm": 8.727946309173646,
"learning_rate": 2.590909090909091e-06,
"loss": 0.8819,
"step": 18
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 1.0369868278503418,
"Normal prob": -1.0369868278503418,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.5373134328358209,
"step": 18
},
{
"DPO Loss": 0.06657070366779086,
"Negative Geometric Mean": -2.8913005754059435,
"Negative prob": -2.8913005754059435,
"Normal Loss": 0.7784830331802368,
"Normal prob": -0.7784830331802368,
"Positive Loss": 0.04361863434314728,
"Positive prob": -0.04361863434314728,
"epoch": 0.5373134328358209,
"step": 18
},
{
"epoch": 0.5671641791044776,
"grad_norm": 8.562076920129734,
"learning_rate": 2.5681818181818183e-06,
"loss": 0.8626,
"step": 19
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9392335414886475,
"Normal prob": -0.9392335414886475,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.5671641791044776,
"step": 19
},
{
"DPO Loss": 0.004452790854185025,
"Negative Geometric Mean": -5.43384802094139,
"Negative prob": -5.43384802094139,
"Normal Loss": 0.8345220685005188,
"Normal prob": -0.8345220685005188,
"Positive Loss": 0.07906413078308105,
"Positive prob": -0.07906413078308105,
"epoch": 0.5671641791044776,
"step": 19
},
{
"epoch": 0.5970149253731343,
"grad_norm": 9.154040073172876,
"learning_rate": 2.5454545454545456e-06,
"loss": 0.9584,
"step": 20
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.8151698112487793,
"Normal prob": -0.8151698112487793,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.5970149253731343,
"step": 20
},
{
"DPO Loss": 0.0007850244175145432,
"Negative Geometric Mean": -7.495313533399471,
"Negative prob": -7.495313533399471,
"Normal Loss": 0.8681256771087646,
"Normal prob": -0.8681256771087646,
"Positive Loss": 0.05661068111658096,
"Positive prob": -0.05661068111658096,
"epoch": 0.5970149253731343,
"step": 20
},
{
"epoch": 0.6268656716417911,
"grad_norm": 19.32249471061477,
"learning_rate": 2.522727272727273e-06,
"loss": 0.9313,
"step": 21
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.8350412249565125,
"Normal prob": -0.8350412249565125,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.6268656716417911,
"step": 21
},
{
"DPO Loss": 0.013323540066809244,
"Negative Geometric Mean": -4.539878773912091,
"Negative prob": -4.539878773912091,
"Normal Loss": 0.8549312949180603,
"Normal prob": -0.8549312949180603,
"Positive Loss": 0.0937165841460228,
"Positive prob": -0.0937165841460228,
"epoch": 0.6268656716417911,
"step": 21
},
{
"epoch": 0.6567164179104478,
"grad_norm": 18.4720664879043,
"learning_rate": 2.5e-06,
"loss": 0.9518,
"step": 22
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.83185875415802,
"Normal prob": -0.83185875415802,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.6567164179104478,
"step": 22
},
{
"DPO Loss": 7.441585176947715e-05,
"Negative Geometric Mean": -9.482926878210616,
"Negative prob": -9.482926878210616,
"Normal Loss": 1.0035439729690552,
"Normal prob": -1.0035439729690552,
"Positive Loss": 0.1059131771326065,
"Positive prob": -0.1059131771326065,
"epoch": 0.6567164179104478,
"step": 22
},
{
"epoch": 0.6865671641791045,
"grad_norm": 7.4403596397354015,
"learning_rate": 2.4772727272727275e-06,
"loss": 0.9741,
"step": 23
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 1.1941570043563843,
"Normal prob": -1.1941570043563843,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.6865671641791045,
"step": 23
},
{
"DPO Loss": 5.767926370817813e-06,
"Negative Geometric Mean": -12.173237529280238,
"Negative prob": -12.173237529280238,
"Normal Loss": 0.7278223037719727,
"Normal prob": -0.7278223037719727,
"Positive Loss": 0.07192976027727127,
"Positive prob": -0.07192976027727127,
"epoch": 0.6865671641791045,
"step": 23
},
{
"epoch": 0.7164179104477612,
"grad_norm": 7.330688093396721,
"learning_rate": 2.454545454545455e-06,
"loss": 0.9927,
"step": 24
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9865759611129761,
"Normal prob": -0.9865759611129761,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.7164179104477612,
"step": 24
},
{
"DPO Loss": 1.0947351633426092e-05,
"Negative Geometric Mean": -11.567188243575508,
"Negative prob": -11.567188243575508,
"Normal Loss": 0.8570675253868103,
"Normal prob": -0.8570675253868103,
"Positive Loss": 0.06724032014608383,
"Positive prob": -0.06724032014608383,
"epoch": 0.7164179104477612,
"step": 24
},
{
"epoch": 0.746268656716418,
"grad_norm": 7.557781355223681,
"learning_rate": 2.4318181818181817e-06,
"loss": 0.9221,
"step": 25
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9384709000587463,
"Normal prob": -0.9384709000587463,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.746268656716418,
"step": 25
},
{
"DPO Loss": 0.0003560853089358333,
"Negative Geometric Mean": -8.138521756169094,
"Negative prob": -8.138521756169094,
"Normal Loss": 0.8964889049530029,
"Normal prob": -0.8964889049530029,
"Positive Loss": 0.06566499173641205,
"Positive prob": -0.06566499173641205,
"epoch": 0.746268656716418,
"step": 25
},
{
"epoch": 0.7761194029850746,
"grad_norm": 7.046101597469539,
"learning_rate": 2.409090909090909e-06,
"loss": 0.9733,
"step": 26
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9506588578224182,
"Normal prob": -0.9506588578224182,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.7761194029850746,
"step": 26
},
{
"DPO Loss": 8.255091540571396e-06,
"Negative Geometric Mean": -11.822831423895254,
"Negative prob": -11.822831423895254,
"Normal Loss": 0.9992624521255493,
"Normal prob": -0.9992624521255493,
"Positive Loss": 0.093373604118824,
"Positive prob": -0.093373604118824,
"epoch": 0.7761194029850746,
"step": 26
},
{
"epoch": 0.8059701492537313,
"grad_norm": 6.576814099531671,
"learning_rate": 2.3863636363636363e-06,
"loss": 0.9531,
"step": 27
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.964348316192627,
"Normal prob": -0.964348316192627,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.8059701492537313,
"step": 27
},
{
"DPO Loss": 1.038318288302413e-05,
"Negative Geometric Mean": -12.70634533110119,
"Negative prob": -12.70634533110119,
"Normal Loss": 0.8761529326438904,
"Normal prob": -0.8761529326438904,
"Positive Loss": 0.048190467059612274,
"Positive prob": -0.048190467059612274,
"epoch": 0.8059701492537313,
"step": 27
},
{
"epoch": 0.835820895522388,
"grad_norm": 7.041162886647003,
"learning_rate": 2.3636363636363636e-06,
"loss": 0.9254,
"step": 28
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 1.0659770965576172,
"Normal prob": -1.0659770965576172,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.835820895522388,
"step": 28
},
{
"DPO Loss": 4.745458003421849e-06,
"Negative Geometric Mean": -12.22632121816452,
"Negative prob": -12.22632121816452,
"Normal Loss": 0.877432644367218,
"Normal prob": -0.877432644367218,
"Positive Loss": 0.07441161572933197,
"Positive prob": -0.07441161572933197,
"epoch": 0.835820895522388,
"step": 28
},
{
"epoch": 0.8656716417910447,
"grad_norm": 6.123530075257292,
"learning_rate": 2.340909090909091e-06,
"loss": 0.9906,
"step": 29
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.9131932854652405,
"Normal prob": -0.9131932854652405,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.8656716417910447,
"step": 29
},
{
"DPO Loss": 1.6460282004165447e-05,
"Negative Geometric Mean": -11.04015827178955,
"Negative prob": -11.04015827178955,
"Normal Loss": 1.063417673110962,
"Normal prob": -1.063417673110962,
"Positive Loss": 0.050095487385988235,
"Positive prob": -0.050095487385988235,
"epoch": 0.8656716417910447,
"step": 29
},
{
"epoch": 0.8955223880597015,
"grad_norm": 6.118956679067234,
"learning_rate": 2.318181818181818e-06,
"loss": 0.9193,
"step": 30
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.8643659949302673,
"Normal prob": -0.8643659949302673,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.8955223880597015,
"step": 30
},
{
"DPO Loss": 9.904635203296299e-06,
"Negative Geometric Mean": -11.622193305258186,
"Negative prob": -11.622193305258186,
"Normal Loss": 0.7562206983566284,
"Normal prob": -0.7562206983566284,
"Positive Loss": 0.05644884705543518,
"Positive prob": -0.05644884705543518,
"epoch": 0.8955223880597015,
"step": 30
},
{
"epoch": 0.9253731343283582,
"grad_norm": 6.536101223333384,
"learning_rate": 2.2954545454545455e-06,
"loss": 0.8934,
"step": 31
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.8608868718147278,
"Normal prob": -0.8608868718147278,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.9253731343283582,
"step": 31
},
{
"DPO Loss": 2.137509260823493e-05,
"Negative Geometric Mean": -10.705368454391891,
"Negative prob": -10.705368454391891,
"Normal Loss": 0.71401047706604,
"Normal prob": -0.71401047706604,
"Positive Loss": 0.06425323337316513,
"Positive prob": -0.06425323337316513,
"epoch": 0.9253731343283582,
"step": 31
},
{
"epoch": 0.9552238805970149,
"grad_norm": 7.040623160546982,
"learning_rate": 2.2727272727272728e-06,
"loss": 0.8664,
"step": 32
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.7989053130149841,
"Normal prob": -0.7989053130149841,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.9552238805970149,
"step": 32
},
{
"DPO Loss": 2.7078393532067353e-05,
"Negative Geometric Mean": -10.612409125434027,
"Negative prob": -10.612409125434027,
"Normal Loss": 0.9129906892776489,
"Normal prob": -0.9129906892776489,
"Positive Loss": 0.10737287253141403,
"Positive prob": -0.10737287253141403,
"epoch": 0.9552238805970149,
"step": 32
},
{
"epoch": 0.9850746268656716,
"grad_norm": 6.585654111220224,
"learning_rate": 2.25e-06,
"loss": 0.842,
"step": 33
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.8037691712379456,
"Normal prob": -0.8037691712379456,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.9850746268656716,
"step": 33
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.7143898606300354,
"Normal prob": -0.7143898606300354,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 0.9850746268656716,
"step": 33
},
{
"epoch": 1.0149253731343284,
"grad_norm": 9.914782438598868,
"learning_rate": 2.2272727272727274e-06,
"loss": 0.6917,
"step": 34
},
{
"DPO Loss": 3.654100751997121e-05,
"Negative Geometric Mean": -10.674591619318182,
"Negative prob": -10.674591619318182,
"Normal Loss": 0.48714742064476013,
"Normal prob": -0.48714742064476013,
"Positive Loss": 0.03236498683691025,
"Positive prob": -0.03236498683691025,
"epoch": 1.0149253731343284,
"step": 34
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5524182915687561,
"Normal prob": -0.5524182915687561,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.0149253731343284,
"step": 34
},
{
"epoch": 1.044776119402985,
"grad_norm": 7.2083835894058375,
"learning_rate": 2.2045454545454547e-06,
"loss": 0.6615,
"step": 35
},
{
"DPO Loss": 5.3735510809371045e-05,
"Negative Geometric Mean": -10.087603725282486,
"Negative prob": -10.087603725282486,
"Normal Loss": 0.47449687123298645,
"Normal prob": -0.47449687123298645,
"Positive Loss": 0.02946843020617962,
"Positive prob": -0.02946843020617962,
"epoch": 1.044776119402985,
"step": 35
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5653090476989746,
"Normal prob": -0.5653090476989746,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.044776119402985,
"step": 35
},
{
"epoch": 1.0746268656716418,
"grad_norm": 7.509154772376704,
"learning_rate": 2.181818181818182e-06,
"loss": 0.5463,
"step": 36
},
{
"DPO Loss": 4.57076718186167e-05,
"Negative Geometric Mean": -10.369059509873779,
"Negative prob": -10.369059509873779,
"Normal Loss": 0.7275592684745789,
"Normal prob": -0.7275592684745789,
"Positive Loss": 0.022183816879987717,
"Positive prob": -0.022183816879987717,
"epoch": 1.0746268656716418,
"step": 36
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.48975300788879395,
"Normal prob": -0.48975300788879395,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.0746268656716418,
"step": 36
},
{
"epoch": 1.1044776119402986,
"grad_norm": 5.921019640825061,
"learning_rate": 2.1590909090909092e-06,
"loss": 0.6523,
"step": 37
},
{
"DPO Loss": 7.289560432171723e-05,
"Negative Geometric Mean": -9.601848503888467,
"Negative prob": -9.601848503888467,
"Normal Loss": 0.8984713554382324,
"Normal prob": -0.8984713554382324,
"Positive Loss": 0.029801441356539726,
"Positive prob": -0.029801441356539726,
"epoch": 1.1044776119402986,
"step": 37
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.709186851978302,
"Normal prob": -0.709186851978302,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.1044776119402986,
"step": 37
},
{
"epoch": 1.1343283582089552,
"grad_norm": 7.446657265486741,
"learning_rate": 2.1363636363636365e-06,
"loss": 0.6967,
"step": 38
},
{
"DPO Loss": 2.6440661378327594e-05,
"Negative Geometric Mean": -10.957384672619048,
"Negative prob": -10.957384672619048,
"Normal Loss": 0.44052013754844666,
"Normal prob": -0.44052013754844666,
"Positive Loss": 0.02077590487897396,
"Positive prob": -0.02077590487897396,
"epoch": 1.1343283582089552,
"step": 38
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.39025214314460754,
"Normal prob": -0.39025214314460754,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.1343283582089552,
"step": 38
},
{
"epoch": 1.164179104477612,
"grad_norm": 6.65773645557663,
"learning_rate": 2.113636363636364e-06,
"loss": 0.5978,
"step": 39
},
{
"DPO Loss": 2.123153925438824e-05,
"Negative Geometric Mean": -10.680970389887971,
"Negative prob": -10.680970389887971,
"Normal Loss": 0.6101383566856384,
"Normal prob": -0.6101383566856384,
"Positive Loss": 0.03984152898192406,
"Positive prob": -0.03984152898192406,
"epoch": 1.164179104477612,
"step": 39
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5792780518531799,
"Normal prob": -0.5792780518531799,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.164179104477612,
"step": 39
},
{
"epoch": 1.1940298507462686,
"grad_norm": 6.924093024562789,
"learning_rate": 2.090909090909091e-06,
"loss": 0.6592,
"step": 40
},
{
"DPO Loss": 1.6680911890968927e-05,
"Negative Geometric Mean": -10.952719974078342,
"Negative prob": -10.952719974078342,
"Normal Loss": 0.4338739216327667,
"Normal prob": -0.4338739216327667,
"Positive Loss": 0.03350961208343506,
"Positive prob": -0.03350961208343506,
"epoch": 1.1940298507462686,
"step": 40
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.3462405800819397,
"Normal prob": -0.3462405800819397,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.1940298507462686,
"step": 40
},
{
"epoch": 1.2238805970149254,
"grad_norm": 6.295634713144118,
"learning_rate": 2.0681818181818184e-06,
"loss": 0.5281,
"step": 41
},
{
"DPO Loss": 1.9216125147544902e-05,
"Negative Geometric Mean": -10.949885493970315,
"Negative prob": -10.949885493970315,
"Normal Loss": 0.6209268569946289,
"Normal prob": -0.6209268569946289,
"Positive Loss": 0.010221516713500023,
"Positive prob": -0.010221516713500023,
"epoch": 1.2238805970149254,
"step": 41
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.4472298324108124,
"Normal prob": -0.4472298324108124,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.2238805970149254,
"step": 41
},
{
"epoch": 1.2537313432835822,
"grad_norm": 5.857596369043,
"learning_rate": 2.0454545454545453e-06,
"loss": 0.5837,
"step": 42
},
{
"DPO Loss": 3.1575882722812355e-05,
"Negative Geometric Mean": -10.842009715544872,
"Negative prob": -10.842009715544872,
"Normal Loss": 0.3952675461769104,
"Normal prob": -0.3952675461769104,
"Positive Loss": 0.06998435407876968,
"Positive prob": -0.06998435407876968,
"epoch": 1.2537313432835822,
"step": 42
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.3682936131954193,
"Normal prob": -0.3682936131954193,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.2537313432835822,
"step": 42
},
{
"epoch": 1.2835820895522387,
"grad_norm": 6.19946111675221,
"learning_rate": 2.0227272727272726e-06,
"loss": 0.4489,
"step": 43
},
{
"DPO Loss": 7.918896147509772e-06,
"Negative Geometric Mean": -11.86809765625,
"Negative prob": -11.86809765625,
"Normal Loss": 0.7341710329055786,
"Normal prob": -0.7341710329055786,
"Positive Loss": 0.023408204317092896,
"Positive prob": -0.023408204317092896,
"epoch": 1.2835820895522387,
"step": 43
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5852903723716736,
"Normal prob": -0.5852903723716736,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.2835820895522387,
"step": 43
},
{
"epoch": 1.3134328358208955,
"grad_norm": 7.299737993528941,
"learning_rate": 2e-06,
"loss": 0.6287,
"step": 44
},
{
"DPO Loss": 1.81222332665437e-05,
"Negative Geometric Mean": -10.647718364689625,
"Negative prob": -10.647718364689625,
"Normal Loss": 0.4992733597755432,
"Normal prob": -0.4992733597755432,
"Positive Loss": 0.05666818842291832,
"Positive prob": -0.05666818842291832,
"epoch": 1.3134328358208955,
"step": 44
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.7956355810165405,
"Normal prob": -0.7956355810165405,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.3134328358208955,
"step": 44
},
{
"epoch": 1.3432835820895521,
"grad_norm": 7.900970686658878,
"learning_rate": 1.977272727272727e-06,
"loss": 0.7441,
"step": 45
},
{
"DPO Loss": 6.092015148833826e-05,
"Negative Geometric Mean": -9.997283935546875,
"Negative prob": -9.997283935546875,
"Normal Loss": 0.5293800830841064,
"Normal prob": -0.5293800830841064,
"Positive Loss": 0.012493799440562725,
"Positive prob": -0.012493799440562725,
"epoch": 1.3432835820895521,
"step": 45
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.6335676908493042,
"Normal prob": -0.6335676908493042,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.3432835820895521,
"step": 45
},
{
"epoch": 1.373134328358209,
"grad_norm": 6.033587067188048,
"learning_rate": 1.9545454545454545e-06,
"loss": 0.6688,
"step": 46
},
{
"DPO Loss": 2.075863324394268e-05,
"Negative Geometric Mean": -11.443209795884684,
"Negative prob": -11.443209795884684,
"Normal Loss": 0.9459198713302612,
"Normal prob": -0.9459198713302612,
"Positive Loss": 0.019672967493534088,
"Positive prob": -0.019672967493534088,
"epoch": 1.373134328358209,
"step": 46
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5753485560417175,
"Normal prob": -0.5753485560417175,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.373134328358209,
"step": 46
},
{
"epoch": 1.4029850746268657,
"grad_norm": 6.437116667065512,
"learning_rate": 1.931818181818182e-06,
"loss": 0.7138,
"step": 47
},
{
"DPO Loss": 5.24218732737661e-05,
"Negative Geometric Mean": -10.045061616056572,
"Negative prob": -10.045061616056572,
"Normal Loss": 0.6808024644851685,
"Normal prob": -0.6808024644851685,
"Positive Loss": 0.023415615782141685,
"Positive prob": -0.023415615782141685,
"epoch": 1.4029850746268657,
"step": 47
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.6357601284980774,
"Normal prob": -0.6357601284980774,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.4029850746268657,
"step": 47
},
{
"epoch": 1.4328358208955223,
"grad_norm": 7.152119639795567,
"learning_rate": 1.909090909090909e-06,
"loss": 0.6079,
"step": 48
},
{
"DPO Loss": 7.2306889216542525e-06,
"Negative Geometric Mean": -12.073476457210242,
"Negative prob": -12.073476457210242,
"Normal Loss": 0.5705257058143616,
"Normal prob": -0.5705257058143616,
"Positive Loss": 0.02072186954319477,
"Positive prob": -0.02072186954319477,
"epoch": 1.4328358208955223,
"step": 48
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5705331563949585,
"Normal prob": -0.5705331563949585,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.4328358208955223,
"step": 48
},
{
"epoch": 1.462686567164179,
"grad_norm": 8.435206603146995,
"learning_rate": 1.8863636363636364e-06,
"loss": 0.4921,
"step": 49
},
{
"DPO Loss": 1.2005791148960418e-05,
"Negative Geometric Mean": -11.335293660121682,
"Negative prob": -11.335293660121682,
"Normal Loss": 0.4985297918319702,
"Normal prob": -0.4985297918319702,
"Positive Loss": 0.03511481359601021,
"Positive prob": -0.03511481359601021,
"epoch": 1.462686567164179,
"step": 49
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5165051221847534,
"Normal prob": -0.5165051221847534,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.462686567164179,
"step": 49
},
{
"epoch": 1.4925373134328357,
"grad_norm": 6.663071553118176,
"learning_rate": 1.8636363636363637e-06,
"loss": 0.582,
"step": 50
},
{
"DPO Loss": 8.121549918893668e-06,
"Negative Geometric Mean": -11.646775242426388,
"Negative prob": -11.646775242426388,
"Normal Loss": 0.7396381497383118,
"Normal prob": -0.7396381497383118,
"Positive Loss": 0.046656664460897446,
"Positive prob": -0.046656664460897446,
"epoch": 1.4925373134328357,
"step": 50
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.46003180742263794,
"Normal prob": -0.46003180742263794,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.4925373134328357,
"step": 50
},
{
"epoch": 1.5223880597014925,
"grad_norm": 6.8924034055431225,
"learning_rate": 1.840909090909091e-06,
"loss": 0.5667,
"step": 51
},
{
"DPO Loss": 3.960602457920955e-05,
"Negative Geometric Mean": -10.694715555003613,
"Negative prob": -10.694715555003613,
"Normal Loss": 0.32585108280181885,
"Normal prob": -0.32585108280181885,
"Positive Loss": 0.04867149889469147,
"Positive prob": -0.04867149889469147,
"epoch": 1.5223880597014925,
"step": 51
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.4816523492336273,
"Normal prob": -0.4816523492336273,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.5223880597014925,
"step": 51
},
{
"epoch": 1.5522388059701493,
"grad_norm": 7.4924421694754075,
"learning_rate": 1.8181818181818183e-06,
"loss": 0.4917,
"step": 52
},
{
"DPO Loss": 5.1645393655010374e-05,
"Negative Geometric Mean": -10.168975942689114,
"Negative prob": -10.168975942689114,
"Normal Loss": 0.30011507868766785,
"Normal prob": -0.30011507868766785,
"Positive Loss": 0.02231639437377453,
"Positive prob": -0.02231639437377453,
"epoch": 1.5522388059701493,
"step": 52
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.4937782287597656,
"Normal prob": -0.4937782287597656,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.5522388059701493,
"step": 52
},
{
"epoch": 1.582089552238806,
"grad_norm": 7.184478655478447,
"learning_rate": 1.7954545454545456e-06,
"loss": 0.5295,
"step": 53
},
{
"DPO Loss": 5.356822072205326e-06,
"Negative Geometric Mean": -12.203828545026882,
"Negative prob": -12.203828545026882,
"Normal Loss": 0.5068655014038086,
"Normal prob": -0.5068655014038086,
"Positive Loss": 0.02591904066503048,
"Positive prob": -0.02591904066503048,
"epoch": 1.582089552238806,
"step": 53
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.44835716485977173,
"Normal prob": -0.44835716485977173,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.582089552238806,
"step": 53
},
{
"epoch": 1.6119402985074627,
"grad_norm": 6.665246283694876,
"learning_rate": 1.7727272727272729e-06,
"loss": 0.5862,
"step": 54
},
{
"DPO Loss": 2.7645910456594184e-05,
"Negative Geometric Mean": -10.52684736755279,
"Negative prob": -10.52684736755279,
"Normal Loss": 0.4901617169380188,
"Normal prob": -0.4901617169380188,
"Positive Loss": 0.031082332134246826,
"Positive prob": -0.031082332134246826,
"epoch": 1.6119402985074627,
"step": 54
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.3990895450115204,
"Normal prob": -0.3990895450115204,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.6119402985074627,
"step": 54
},
{
"epoch": 1.6417910447761193,
"grad_norm": 7.251995962906654,
"learning_rate": 1.7500000000000002e-06,
"loss": 0.5512,
"step": 55
},
{
"DPO Loss": 5.845775193481474e-06,
"Negative Geometric Mean": -11.981290714110127,
"Negative prob": -11.981290714110127,
"Normal Loss": 0.36946558952331543,
"Normal prob": -0.36946558952331543,
"Positive Loss": 0.03706742450594902,
"Positive prob": -0.03706742450594902,
"epoch": 1.6417910447761193,
"step": 55
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.6298558712005615,
"Normal prob": -0.6298558712005615,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.6417910447761193,
"step": 55
},
{
"epoch": 1.671641791044776,
"grad_norm": 6.5830406614829995,
"learning_rate": 1.7272727272727275e-06,
"loss": 0.5497,
"step": 56
},
{
"DPO Loss": 3.2969348642918384e-05,
"Negative Geometric Mean": -10.456912128245772,
"Negative prob": -10.456912128245772,
"Normal Loss": 0.6241900324821472,
"Normal prob": -0.6241900324821472,
"Positive Loss": 0.027565686032176018,
"Positive prob": -0.027565686032176018,
"epoch": 1.671641791044776,
"step": 56
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.49294447898864746,
"Normal prob": -0.49294447898864746,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.671641791044776,
"step": 56
},
{
"epoch": 1.7014925373134329,
"grad_norm": 7.489375090765791,
"learning_rate": 1.7045454545454548e-06,
"loss": 0.5473,
"step": 57
},
{
"DPO Loss": 9.641101792233715e-06,
"Negative Geometric Mean": -11.58503936609456,
"Negative prob": -11.58503936609456,
"Normal Loss": 0.4547930359840393,
"Normal prob": -0.4547930359840393,
"Positive Loss": 0.02409125678241253,
"Positive prob": -0.02409125678241253,
"epoch": 1.7014925373134329,
"step": 57
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5437726378440857,
"Normal prob": -0.5437726378440857,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.7014925373134329,
"step": 57
},
{
"epoch": 1.7313432835820897,
"grad_norm": 7.870395979704569,
"learning_rate": 1.6818181818181817e-06,
"loss": 0.6139,
"step": 58
},
{
"DPO Loss": 1.2307788643174536e-05,
"Negative Geometric Mean": -11.400927734375,
"Negative prob": -11.400927734375,
"Normal Loss": 0.4675034284591675,
"Normal prob": -0.4675034284591675,
"Positive Loss": 0.02847522310912609,
"Positive prob": -0.02847522310912609,
"epoch": 1.7313432835820897,
"step": 58
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5951191186904907,
"Normal prob": -0.5951191186904907,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.7313432835820897,
"step": 58
},
{
"epoch": 1.7611940298507462,
"grad_norm": 7.248327621413268,
"learning_rate": 1.659090909090909e-06,
"loss": 0.5501,
"step": 59
},
{
"DPO Loss": 6.131353933599495e-06,
"Negative Geometric Mean": -12.089666559278351,
"Negative prob": -12.089666559278351,
"Normal Loss": 0.6625760793685913,
"Normal prob": -0.6625760793685913,
"Positive Loss": 0.024925949051976204,
"Positive prob": -0.024925949051976204,
"epoch": 1.7611940298507462,
"step": 59
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.574043333530426,
"Normal prob": -0.574043333530426,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.7611940298507462,
"step": 59
},
{
"epoch": 1.7910447761194028,
"grad_norm": 5.45423423175427,
"learning_rate": 1.6363636363636363e-06,
"loss": 0.5803,
"step": 60
},
{
"DPO Loss": 6.456255345351767e-06,
"Negative Geometric Mean": -12.335179908988403,
"Negative prob": -12.335179908988403,
"Normal Loss": 0.5476536750793457,
"Normal prob": -0.5476536750793457,
"Positive Loss": 0.03484680876135826,
"Positive prob": -0.03484680876135826,
"epoch": 1.7910447761194028,
"step": 60
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.4749366343021393,
"Normal prob": -0.4749366343021393,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.7910447761194028,
"step": 60
},
{
"epoch": 1.8208955223880596,
"grad_norm": 5.8299300029602845,
"learning_rate": 1.6136363636363635e-06,
"loss": 0.5286,
"step": 61
},
{
"DPO Loss": 6.855680101123193e-06,
"Negative Geometric Mean": -12.133075664569805,
"Negative prob": -12.133075664569805,
"Normal Loss": 0.49556368589401245,
"Normal prob": -0.49556368589401245,
"Positive Loss": 0.031043315306305885,
"Positive prob": -0.031043315306305885,
"epoch": 1.8208955223880596,
"step": 61
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.7167157530784607,
"Normal prob": -0.7167157530784607,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.8208955223880596,
"step": 61
},
{
"epoch": 1.8507462686567164,
"grad_norm": 6.451470188285151,
"learning_rate": 1.5909090909090908e-06,
"loss": 0.5513,
"step": 62
},
{
"DPO Loss": 1.236436099783623e-05,
"Negative Geometric Mean": -11.445466172271574,
"Negative prob": -11.445466172271574,
"Normal Loss": 0.5918139219284058,
"Normal prob": -0.5918139219284058,
"Positive Loss": 0.026092026382684708,
"Positive prob": -0.026092026382684708,
"epoch": 1.8507462686567164,
"step": 62
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.4669744670391083,
"Normal prob": -0.4669744670391083,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.8507462686567164,
"step": 62
},
{
"epoch": 1.8805970149253732,
"grad_norm": 6.794192852617203,
"learning_rate": 1.5681818181818181e-06,
"loss": 0.4827,
"step": 63
},
{
"DPO Loss": 1.3348207631132665e-05,
"Negative Geometric Mean": -11.5704201146176,
"Negative prob": -11.5704201146176,
"Normal Loss": 0.7752443552017212,
"Normal prob": -0.7752443552017212,
"Positive Loss": 0.03313179686665535,
"Positive prob": -0.03313179686665535,
"epoch": 1.8805970149253732,
"step": 63
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.4319833815097809,
"Normal prob": -0.4319833815097809,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.8805970149253732,
"step": 63
},
{
"epoch": 1.9104477611940298,
"grad_norm": 6.583453632012116,
"learning_rate": 1.5454545454545454e-06,
"loss": 0.5891,
"step": 64
},
{
"DPO Loss": 4.963582076406908e-06,
"Negative Geometric Mean": -12.19005351163903,
"Negative prob": -12.19005351163903,
"Normal Loss": 0.7006582617759705,
"Normal prob": -0.7006582617759705,
"Positive Loss": 0.05256428197026253,
"Positive prob": -0.05256428197026253,
"epoch": 1.9104477611940298,
"step": 64
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5088911652565002,
"Normal prob": -0.5088911652565002,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.9104477611940298,
"step": 64
},
{
"epoch": 1.9402985074626866,
"grad_norm": 6.441815309507991,
"learning_rate": 1.5227272727272727e-06,
"loss": 0.676,
"step": 65
},
{
"DPO Loss": 3.909155035241524e-06,
"Negative Geometric Mean": -12.52902815645973,
"Negative prob": -12.52902815645973,
"Normal Loss": 0.4071587920188904,
"Normal prob": -0.4071587920188904,
"Positive Loss": 0.029172131791710854,
"Positive prob": -0.029172131791710854,
"epoch": 1.9402985074626866,
"step": 65
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5863581299781799,
"Normal prob": -0.5863581299781799,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.9402985074626866,
"step": 65
},
{
"epoch": 1.9701492537313432,
"grad_norm": 6.3544200742459935,
"learning_rate": 1.5e-06,
"loss": 0.4844,
"step": 66
},
{
"DPO Loss": 4.394697707115605e-06,
"Negative Geometric Mean": -12.445152789608176,
"Negative prob": -12.445152789608176,
"Normal Loss": 0.5812058448791504,
"Normal prob": -0.5812058448791504,
"Positive Loss": 0.025852346792817116,
"Positive prob": -0.025852346792817116,
"epoch": 1.9701492537313432,
"step": 66
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.7593735456466675,
"Normal prob": -0.7593735456466675,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 1.9701492537313432,
"step": 66
},
{
"epoch": 2.0,
"grad_norm": 7.0108110525699985,
"learning_rate": 1.4772727272727273e-06,
"loss": 0.6144,
"step": 67
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.34294071793556213,
"Normal prob": -0.34294071793556213,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.0,
"step": 67
},
{
"DPO Loss": 2.4070561719272564e-06,
"Negative Geometric Mean": -13.018313531479217,
"Negative prob": -13.018313531479217,
"Normal Loss": 0.3513112962245941,
"Normal prob": -0.3513112962245941,
"Positive Loss": 0.013781579211354256,
"Positive prob": -0.013781579211354256,
"epoch": 2.0,
"step": 67
},
{
"epoch": 2.029850746268657,
"grad_norm": 7.07209986229336,
"learning_rate": 1.4545454545454546e-06,
"loss": 0.3488,
"step": 68
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.17317090928554535,
"Normal prob": -0.17317090928554535,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.029850746268657,
"step": 68
},
{
"DPO Loss": 2.6232480269051795e-05,
"Negative Geometric Mean": -10.84640401579797,
"Negative prob": -10.84640401579797,
"Normal Loss": 0.2502392828464508,
"Normal prob": -0.2502392828464508,
"Positive Loss": 0.009016763418912888,
"Positive prob": -0.009016763418912888,
"epoch": 2.029850746268657,
"step": 68
},
{
"epoch": 2.0597014925373136,
"grad_norm": 6.718484882180734,
"learning_rate": 1.431818181818182e-06,
"loss": 0.3109,
"step": 69
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.25603172183036804,
"Normal prob": -0.25603172183036804,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.0597014925373136,
"step": 69
},
{
"DPO Loss": 2.4722913198806987e-06,
"Negative Geometric Mean": -13.571044921875,
"Negative prob": -13.571044921875,
"Normal Loss": 0.6462356448173523,
"Normal prob": -0.6462356448173523,
"Positive Loss": 0.004487407859414816,
"Positive prob": -0.004487407859414816,
"epoch": 2.0597014925373136,
"step": 69
},
{
"epoch": 2.08955223880597,
"grad_norm": 6.391521213804556,
"learning_rate": 1.4090909090909092e-06,
"loss": 0.4603,
"step": 70
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2792622148990631,
"Normal prob": -0.2792622148990631,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.08955223880597,
"step": 70
},
{
"DPO Loss": 2.30209765905298e-06,
"Negative Geometric Mean": -13.15788681702044,
"Negative prob": -13.15788681702044,
"Normal Loss": 0.297980934381485,
"Normal prob": -0.297980934381485,
"Positive Loss": 0.011687587015330791,
"Positive prob": -0.011687587015330791,
"epoch": 2.08955223880597,
"step": 70
},
{
"epoch": 2.1194029850746268,
"grad_norm": 6.000476341412616,
"learning_rate": 1.3863636363636363e-06,
"loss": 0.426,
"step": 71
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.31911832094192505,
"Normal prob": -0.31911832094192505,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.1194029850746268,
"step": 71
},
{
"DPO Loss": 9.623063611478237e-07,
"Negative Geometric Mean": -13.79306566782845,
"Negative prob": -13.79306566782845,
"Normal Loss": 0.1256338506937027,
"Normal prob": -0.1256338506937027,
"Positive Loss": 0.014073642902076244,
"Positive prob": -0.014073642902076244,
"epoch": 2.1194029850746268,
"step": 71
},
{
"epoch": 2.1492537313432836,
"grad_norm": 5.665938957087509,
"learning_rate": 1.3636363636363636e-06,
"loss": 0.31,
"step": 72
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.5485053062438965,
"Normal prob": -0.5485053062438965,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.1492537313432836,
"step": 72
},
{
"DPO Loss": 5.508823305986437e-06,
"Negative Geometric Mean": -12.17880211034751,
"Negative prob": -12.17880211034751,
"Normal Loss": 0.18863847851753235,
"Normal prob": -0.18863847851753235,
"Positive Loss": 0.011334001086652279,
"Positive prob": -0.011334001086652279,
"epoch": 2.1492537313432836,
"step": 72
},
{
"epoch": 2.1791044776119404,
"grad_norm": 6.60404865468319,
"learning_rate": 1.340909090909091e-06,
"loss": 0.3371,
"step": 73
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2698725759983063,
"Normal prob": -0.2698725759983063,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.1791044776119404,
"step": 73
},
{
"DPO Loss": 2.5343320997566906e-06,
"Negative Geometric Mean": -13.084115531452266,
"Negative prob": -13.084115531452266,
"Normal Loss": 0.3101830780506134,
"Normal prob": -0.3101830780506134,
"Positive Loss": 0.007303276099264622,
"Positive prob": -0.007303276099264622,
"epoch": 2.1791044776119404,
"step": 73
},
{
"epoch": 2.208955223880597,
"grad_norm": 7.537056674857057,
"learning_rate": 1.3181818181818182e-06,
"loss": 0.3896,
"step": 74
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2088720202445984,
"Normal prob": -0.2088720202445984,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.208955223880597,
"step": 74
},
{
"DPO Loss": 6.722595604209246e-06,
"Negative Geometric Mean": -12.173159354073661,
"Negative prob": -12.173159354073661,
"Normal Loss": 0.40398481488227844,
"Normal prob": -0.40398481488227844,
"Positive Loss": 0.01838095672428608,
"Positive prob": -0.01838095672428608,
"epoch": 2.208955223880597,
"step": 74
},
{
"epoch": 2.2388059701492535,
"grad_norm": 7.724539990601786,
"learning_rate": 1.2954545454545455e-06,
"loss": 0.3009,
"step": 75
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.48262086510658264,
"Normal prob": -0.48262086510658264,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.2388059701492535,
"step": 75
},
{
"DPO Loss": 5.197786322988637e-07,
"Negative Geometric Mean": -14.51572339888308,
"Negative prob": -14.51572339888308,
"Normal Loss": 0.23035627603530884,
"Normal prob": -0.23035627603530884,
"Positive Loss": 0.008795712143182755,
"Positive prob": -0.008795712143182755,
"epoch": 2.2388059701492535,
"step": 75
},
{
"epoch": 2.2686567164179103,
"grad_norm": 8.221187128676613,
"learning_rate": 1.2727272727272728e-06,
"loss": 0.3589,
"step": 76
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2785874009132385,
"Normal prob": -0.2785874009132385,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.2686567164179103,
"step": 76
},
{
"DPO Loss": 1.621094342365068e-06,
"Negative Geometric Mean": -13.028692859266869,
"Negative prob": -13.028692859266869,
"Normal Loss": 0.29848527908325195,
"Normal prob": -0.29848527908325195,
"Positive Loss": 0.004369077738374472,
"Positive prob": -0.004369077738374472,
"epoch": 2.2686567164179103,
"step": 76
},
{
"epoch": 2.298507462686567,
"grad_norm": 6.89968090148801,
"learning_rate": 1.25e-06,
"loss": 0.3104,
"step": 77
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.31930315494537354,
"Normal prob": -0.31930315494537354,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.298507462686567,
"step": 77
},
{
"DPO Loss": 1.3040399279024244e-05,
"Negative Geometric Mean": -11.366265677550448,
"Negative prob": -11.366265677550448,
"Normal Loss": 0.4552519917488098,
"Normal prob": -0.4552519917488098,
"Positive Loss": 0.020085470750927925,
"Positive prob": -0.020085470750927925,
"epoch": 2.298507462686567,
"step": 77
},
{
"epoch": 2.328358208955224,
"grad_norm": 7.22395223128197,
"learning_rate": 1.2272727272727274e-06,
"loss": 0.3699,
"step": 78
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.4527321457862854,
"Normal prob": -0.4527321457862854,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.328358208955224,
"step": 78
},
{
"DPO Loss": 3.5797003005450865e-06,
"Negative Geometric Mean": -12.924953185405927,
"Negative prob": -12.924953185405927,
"Normal Loss": 0.49810460209846497,
"Normal prob": -0.49810460209846497,
"Positive Loss": 0.0035452607553452253,
"Positive prob": -0.0035452607553452253,
"epoch": 2.328358208955224,
"step": 78
},
{
"epoch": 2.3582089552238807,
"grad_norm": 7.0300487933358,
"learning_rate": 1.2045454545454545e-06,
"loss": 0.3254,
"step": 79
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.3027646243572235,
"Normal prob": -0.3027646243572235,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.3582089552238807,
"step": 79
},
{
"DPO Loss": 5.243361513598499e-06,
"Negative Geometric Mean": -12.248686441906308,
"Negative prob": -12.248686441906308,
"Normal Loss": 0.19559913873672485,
"Normal prob": -0.19559913873672485,
"Positive Loss": 0.0036265316884964705,
"Positive prob": -0.0036265316884964705,
"epoch": 2.3582089552238807,
"step": 79
},
{
"epoch": 2.388059701492537,
"grad_norm": 7.018808301104353,
"learning_rate": 1.1818181818181818e-06,
"loss": 0.2924,
"step": 80
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.37363290786743164,
"Normal prob": -0.37363290786743164,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.388059701492537,
"step": 80
},
{
"DPO Loss": 8.039128102474587e-06,
"Negative Geometric Mean": -12.245501740608809,
"Negative prob": -12.245501740608809,
"Normal Loss": 0.38694456219673157,
"Normal prob": -0.38694456219673157,
"Positive Loss": 0.007913284935057163,
"Positive prob": -0.007913284935057163,
"epoch": 2.388059701492537,
"step": 80
},
{
"epoch": 2.417910447761194,
"grad_norm": 7.1316719605682595,
"learning_rate": 1.159090909090909e-06,
"loss": 0.373,
"step": 81
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.23960407078266144,
"Normal prob": -0.23960407078266144,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.417910447761194,
"step": 81
},
{
"DPO Loss": 3.643317578642959e-06,
"Negative Geometric Mean": -12.694272748161765,
"Negative prob": -12.694272748161765,
"Normal Loss": 0.3460986912250519,
"Normal prob": -0.3460986912250519,
"Positive Loss": 0.03607124090194702,
"Positive prob": -0.03607124090194702,
"epoch": 2.417910447761194,
"step": 81
},
{
"epoch": 2.4477611940298507,
"grad_norm": 6.568519556302587,
"learning_rate": 1.1363636363636364e-06,
"loss": 0.2968,
"step": 82
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.3786263167858124,
"Normal prob": -0.3786263167858124,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.4477611940298507,
"step": 82
},
{
"DPO Loss": 1.17810282746522e-05,
"Negative Geometric Mean": -11.290989731297348,
"Negative prob": -11.290989731297348,
"Normal Loss": 0.3233850598335266,
"Normal prob": -0.3233850598335266,
"Positive Loss": 0.014756398275494576,
"Positive prob": -0.014756398275494576,
"epoch": 2.4477611940298507,
"step": 82
},
{
"epoch": 2.4776119402985075,
"grad_norm": 6.961788241099841,
"learning_rate": 1.1136363636363637e-06,
"loss": 0.3067,
"step": 83
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.7298503518104553,
"Normal prob": -0.7298503518104553,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.4776119402985075,
"step": 83
},
{
"DPO Loss": 6.489746401144139e-06,
"Negative Geometric Mean": -12.0409423828125,
"Negative prob": -12.0409423828125,
"Normal Loss": 0.36162418127059937,
"Normal prob": -0.36162418127059937,
"Positive Loss": 0.007005380000919104,
"Positive prob": -0.007005380000919104,
"epoch": 2.4776119402985075,
"step": 83
},
{
"epoch": 2.5074626865671643,
"grad_norm": 6.528351208906881,
"learning_rate": 1.090909090909091e-06,
"loss": 0.4822,
"step": 84
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.43608808517456055,
"Normal prob": -0.43608808517456055,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.5074626865671643,
"step": 84
},
{
"DPO Loss": 1.8647181536166908e-06,
"Negative Geometric Mean": -13.310558063113747,
"Negative prob": -13.310558063113747,
"Normal Loss": 0.2762463092803955,
"Normal prob": -0.2762463092803955,
"Positive Loss": 0.015207285061478615,
"Positive prob": -0.015207285061478615,
"epoch": 2.5074626865671643,
"step": 84
},
{
"epoch": 2.5373134328358207,
"grad_norm": 6.696386369118086,
"learning_rate": 1.0681818181818183e-06,
"loss": 0.3106,
"step": 85
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.3587005138397217,
"Normal prob": -0.3587005138397217,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.5373134328358207,
"step": 85
},
{
"DPO Loss": 1.0392724887777463e-05,
"Negative Geometric Mean": -11.663321547420965,
"Negative prob": -11.663321547420965,
"Normal Loss": 0.38271617889404297,
"Normal prob": -0.38271617889404297,
"Positive Loss": 0.007094533648341894,
"Positive prob": -0.007094533648341894,
"epoch": 2.5373134328358207,
"step": 85
},
{
"epoch": 2.5671641791044775,
"grad_norm": 7.213029254290765,
"learning_rate": 1.0454545454545456e-06,
"loss": 0.317,
"step": 86
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.3641352355480194,
"Normal prob": -0.3641352355480194,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.5671641791044775,
"step": 86
},
{
"DPO Loss": 3.5987715729513327e-06,
"Negative Geometric Mean": -12.57204106168927,
"Negative prob": -12.57204106168927,
"Normal Loss": 0.46394774317741394,
"Normal prob": -0.46394774317741394,
"Positive Loss": 0.0050806887447834015,
"Positive prob": -0.0050806887447834015,
"epoch": 2.5671641791044775,
"step": 86
},
{
"epoch": 2.5970149253731343,
"grad_norm": 7.901045462084173,
"learning_rate": 1.0227272727272727e-06,
"loss": 0.3702,
"step": 87
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.38809868693351746,
"Normal prob": -0.38809868693351746,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.5970149253731343,
"step": 87
},
{
"DPO Loss": 1.6651211340411204e-06,
"Negative Geometric Mean": -13.372305265036962,
"Negative prob": -13.372305265036962,
"Normal Loss": 0.27421802282333374,
"Normal prob": -0.27421802282333374,
"Positive Loss": 0.017523737624287605,
"Positive prob": -0.017523737624287605,
"epoch": 2.5970149253731343,
"step": 87
},
{
"epoch": 2.626865671641791,
"grad_norm": 6.883433596486567,
"learning_rate": 1e-06,
"loss": 0.3336,
"step": 88
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.28301262855529785,
"Normal prob": -0.28301262855529785,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.626865671641791,
"step": 88
},
{
"DPO Loss": 2.9454008504157696e-06,
"Negative Geometric Mean": -12.71242241010274,
"Negative prob": -12.71242241010274,
"Normal Loss": 0.35078540444374084,
"Normal prob": -0.35078540444374084,
"Positive Loss": 0.01793888583779335,
"Positive prob": -0.01793888583779335,
"epoch": 2.626865671641791,
"step": 88
},
{
"epoch": 2.656716417910448,
"grad_norm": 6.603176465896816,
"learning_rate": 9.772727272727273e-07,
"loss": 0.4153,
"step": 89
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.29014265537261963,
"Normal prob": -0.29014265537261963,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.656716417910448,
"step": 89
},
{
"DPO Loss": 1.4884178069432536e-05,
"Negative Geometric Mean": -11.291460420642494,
"Negative prob": -11.291460420642494,
"Normal Loss": 0.27552318572998047,
"Normal prob": -0.27552318572998047,
"Positive Loss": 0.006887962110340595,
"Positive prob": -0.006887962110340595,
"epoch": 2.656716417910448,
"step": 89
},
{
"epoch": 2.6865671641791042,
"grad_norm": 7.071652347635012,
"learning_rate": 9.545454545454546e-07,
"loss": 0.3525,
"step": 90
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.3731546103954315,
"Normal prob": -0.3731546103954315,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.6865671641791042,
"step": 90
},
{
"DPO Loss": 6.442647626957985e-06,
"Negative Geometric Mean": -12.235858669051204,
"Negative prob": -12.235858669051204,
"Normal Loss": 0.21569418907165527,
"Normal prob": -0.21569418907165527,
"Positive Loss": 0.005633717868477106,
"Positive prob": -0.005633717868477106,
"epoch": 2.6865671641791042,
"step": 90
},
{
"epoch": 2.716417910447761,
"grad_norm": 6.08004163712123,
"learning_rate": 9.318181818181818e-07,
"loss": 0.3038,
"step": 91
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.35742098093032837,
"Normal prob": -0.35742098093032837,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.716417910447761,
"step": 91
},
{
"DPO Loss": 4.149841651615268e-06,
"Negative Geometric Mean": -12.598418855144757,
"Negative prob": -12.598418855144757,
"Normal Loss": 0.28535205125808716,
"Normal prob": -0.28535205125808716,
"Positive Loss": 0.012952926568686962,
"Positive prob": -0.012952926568686962,
"epoch": 2.716417910447761,
"step": 91
},
{
"epoch": 2.746268656716418,
"grad_norm": 7.098164063144904,
"learning_rate": 9.090909090909091e-07,
"loss": 0.3625,
"step": 92
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.28534939885139465,
"Normal prob": -0.28534939885139465,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.746268656716418,
"step": 92
},
{
"DPO Loss": 1.2248438490094142e-06,
"Negative Geometric Mean": -13.854903100242078,
"Negative prob": -13.854903100242078,
"Normal Loss": 0.36843106150627136,
"Normal prob": -0.36843106150627136,
"Positive Loss": 0.03612969443202019,
"Positive prob": -0.03612969443202019,
"epoch": 2.746268656716418,
"step": 92
},
{
"epoch": 2.7761194029850746,
"grad_norm": 7.081778856883454,
"learning_rate": 8.863636363636364e-07,
"loss": 0.4371,
"step": 93
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.4569069743156433,
"Normal prob": -0.4569069743156433,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.7761194029850746,
"step": 93
},
{
"DPO Loss": 4.104862759699108e-06,
"Negative Geometric Mean": -12.369599921518265,
"Negative prob": -12.369599921518265,
"Normal Loss": 0.2255462110042572,
"Normal prob": -0.2255462110042572,
"Positive Loss": 0.018570953980088234,
"Positive prob": -0.018570953980088234,
"epoch": 2.7761194029850746,
"step": 93
},
{
"epoch": 2.8059701492537314,
"grad_norm": 6.9825157063188374,
"learning_rate": 8.636363636363637e-07,
"loss": 0.3096,
"step": 94
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2608332633972168,
"Normal prob": -0.2608332633972168,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.8059701492537314,
"step": 94
},
{
"DPO Loss": 1.6469150536061094e-06,
"Negative Geometric Mean": -13.248110250737463,
"Negative prob": -13.248110250737463,
"Normal Loss": 0.24679048359394073,
"Normal prob": -0.24679048359394073,
"Positive Loss": 0.014129209332168102,
"Positive prob": -0.014129209332168102,
"epoch": 2.8059701492537314,
"step": 94
},
{
"epoch": 2.835820895522388,
"grad_norm": 7.604649982159979,
"learning_rate": 8.409090909090908e-07,
"loss": 0.2674,
"step": 95
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2900543212890625,
"Normal prob": -0.2900543212890625,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.835820895522388,
"step": 95
},
{
"DPO Loss": 7.456183395726831e-07,
"Negative Geometric Mean": -13.86903901734104,
"Negative prob": -13.86903901734104,
"Normal Loss": 0.4645146429538727,
"Normal prob": -0.4645146429538727,
"Positive Loss": 0.006549107376486063,
"Positive prob": -0.006549107376486063,
"epoch": 2.835820895522388,
"step": 95
},
{
"epoch": 2.8656716417910446,
"grad_norm": 6.428480514761495,
"learning_rate": 8.181818181818181e-07,
"loss": 0.3561,
"step": 96
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.17966699600219727,
"Normal prob": -0.17966699600219727,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.8656716417910446,
"step": 96
},
{
"DPO Loss": 1.1826854588610623e-05,
"Negative Geometric Mean": -11.482138813405797,
"Negative prob": -11.482138813405797,
"Normal Loss": 0.4035150110721588,
"Normal prob": -0.4035150110721588,
"Positive Loss": 0.01260466780513525,
"Positive prob": -0.01260466780513525,
"epoch": 2.8656716417910446,
"step": 96
},
{
"epoch": 2.8955223880597014,
"grad_norm": 6.355093862289104,
"learning_rate": 7.954545454545454e-07,
"loss": 0.3552,
"step": 97
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.1942460834980011,
"Normal prob": -0.1942460834980011,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.8955223880597014,
"step": 97
},
{
"DPO Loss": 2.1722275431802666e-06,
"Negative Geometric Mean": -12.907205766876064,
"Negative prob": -12.907205766876064,
"Normal Loss": 0.3104533553123474,
"Normal prob": -0.3104533553123474,
"Positive Loss": 0.004998633172363043,
"Positive prob": -0.004998633172363043,
"epoch": 2.8955223880597014,
"step": 97
},
{
"epoch": 2.925373134328358,
"grad_norm": 6.623833463639339,
"learning_rate": 7.727272727272727e-07,
"loss": 0.3507,
"step": 98
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.33913183212280273,
"Normal prob": -0.33913183212280273,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.925373134328358,
"step": 98
},
{
"DPO Loss": 1.1818778170094944e-06,
"Negative Geometric Mean": -13.596246585154585,
"Negative prob": -13.596246585154585,
"Normal Loss": 0.3668951988220215,
"Normal prob": -0.3668951988220215,
"Positive Loss": 0.016438201069831848,
"Positive prob": -0.016438201069831848,
"epoch": 2.925373134328358,
"step": 98
},
{
"epoch": 2.955223880597015,
"grad_norm": 7.342348492585064,
"learning_rate": 7.5e-07,
"loss": 0.4162,
"step": 99
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2540174424648285,
"Normal prob": -0.2540174424648285,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.955223880597015,
"step": 99
},
{
"DPO Loss": 2.3274892548031074e-06,
"Negative Geometric Mean": -13.38752170138889,
"Negative prob": -13.38752170138889,
"Normal Loss": 0.6933973431587219,
"Normal prob": -0.6933973431587219,
"Positive Loss": 0.004921761341392994,
"Positive prob": -0.004921761341392994,
"epoch": 2.955223880597015,
"step": 99
},
{
"epoch": 2.9850746268656714,
"grad_norm": 6.242023928985393,
"learning_rate": 7.272727272727273e-07,
"loss": 0.4423,
"step": 100
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.32996082305908203,
"Normal prob": -0.32996082305908203,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.9850746268656714,
"step": 100
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.27658382058143616,
"Normal prob": -0.27658382058143616,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 2.9850746268656714,
"step": 100
},
{
"epoch": 3.014925373134328,
"grad_norm": 5.853608572027528,
"learning_rate": 7.045454545454546e-07,
"loss": 0.288,
"step": 101
},
{
"DPO Loss": 1.1678178546410005e-06,
"Negative Geometric Mean": -14.891405087425595,
"Negative prob": -14.891405087425595,
"Normal Loss": 0.11027539521455765,
"Normal prob": -0.11027539521455765,
"Positive Loss": 0.0027892631478607655,
"Positive prob": -0.0027892631478607655,
"epoch": 3.014925373134328,
"step": 101
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.26463809609413147,
"Normal prob": -0.26463809609413147,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.014925373134328,
"step": 101
},
{
"epoch": 3.044776119402985,
"grad_norm": 6.514265106044286,
"learning_rate": 6.818181818181818e-07,
"loss": 0.1912,
"step": 102
},
{
"DPO Loss": 3.6430315872713267e-06,
"Negative Geometric Mean": -13.001615084134615,
"Negative prob": -13.001615084134615,
"Normal Loss": 0.10679034143686295,
"Normal prob": -0.10679034143686295,
"Positive Loss": 0.01767848990857601,
"Positive prob": -0.01767848990857601,
"epoch": 3.044776119402985,
"step": 102
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.1798650622367859,
"Normal prob": -0.1798650622367859,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.044776119402985,
"step": 102
},
{
"epoch": 3.074626865671642,
"grad_norm": 6.098513301658777,
"learning_rate": 6.590909090909091e-07,
"loss": 0.2014,
"step": 103
},
{
"DPO Loss": 1.1857304744373281e-05,
"Negative Geometric Mean": -11.364407111528822,
"Negative prob": -11.364407111528822,
"Normal Loss": 0.5280313491821289,
"Normal prob": -0.5280313491821289,
"Positive Loss": 0.0045397402718663216,
"Positive prob": -0.0045397402718663216,
"epoch": 3.074626865671642,
"step": 103
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.12902340292930603,
"Normal prob": -0.12902340292930603,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.074626865671642,
"step": 103
},
{
"epoch": 3.1044776119402986,
"grad_norm": 6.268435906008225,
"learning_rate": 6.363636363636364e-07,
"loss": 0.2413,
"step": 104
},
{
"DPO Loss": 8.258820908422388e-07,
"Negative Geometric Mean": -13.974816351361241,
"Negative prob": -13.974816351361241,
"Normal Loss": 0.08596272766590118,
"Normal prob": -0.08596272766590118,
"Positive Loss": 0.0037321026902645826,
"Positive prob": -0.0037321026902645826,
"epoch": 3.1044776119402986,
"step": 104
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.12673968076705933,
"Normal prob": -0.12673968076705933,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.1044776119402986,
"step": 104
},
{
"epoch": 3.1343283582089554,
"grad_norm": 4.849865946932611,
"learning_rate": 6.136363636363637e-07,
"loss": 0.2016,
"step": 105
},
{
"DPO Loss": 1.2510054247133794e-05,
"Negative Geometric Mean": -11.465228908237913,
"Negative prob": -11.465228908237913,
"Normal Loss": 0.35748380422592163,
"Normal prob": -0.35748380422592163,
"Positive Loss": 0.0027046226896345615,
"Positive prob": -0.0027046226896345615,
"epoch": 3.1343283582089554,
"step": 105
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2695090174674988,
"Normal prob": -0.2695090174674988,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.1343283582089554,
"step": 105
},
{
"epoch": 3.1641791044776117,
"grad_norm": 5.314522862984474,
"learning_rate": 5.909090909090909e-07,
"loss": 0.3061,
"step": 106
},
{
"DPO Loss": 1.1451636416948107e-06,
"Negative Geometric Mean": -13.922169000330106,
"Negative prob": -13.922169000330106,
"Normal Loss": 0.15261346101760864,
"Normal prob": -0.15261346101760864,
"Positive Loss": 0.004130078945308924,
"Positive prob": -0.004130078945308924,
"epoch": 3.1641791044776117,
"step": 106
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.3225187063217163,
"Normal prob": -0.3225187063217163,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.1641791044776117,
"step": 106
},
{
"epoch": 3.1940298507462686,
"grad_norm": 5.845924095415361,
"learning_rate": 5.681818181818182e-07,
"loss": 0.2219,
"step": 107
},
{
"DPO Loss": 3.406975256320534e-06,
"Negative Geometric Mean": -13.250364491637324,
"Negative prob": -13.250364491637324,
"Normal Loss": 0.2123008817434311,
"Normal prob": -0.2123008817434311,
"Positive Loss": 0.00209601828828454,
"Positive prob": -0.00209601828828454,
"epoch": 3.1940298507462686,
"step": 107
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2501071095466614,
"Normal prob": -0.2501071095466614,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.1940298507462686,
"step": 107
},
{
"epoch": 3.2238805970149254,
"grad_norm": 6.273812820779774,
"learning_rate": 5.454545454545455e-07,
"loss": 0.2085,
"step": 108
},
{
"DPO Loss": 7.219691741892465e-06,
"Negative Geometric Mean": -12.184994006283068,
"Negative prob": -12.184994006283068,
"Normal Loss": 0.11331921815872192,
"Normal prob": -0.11331921815872192,
"Positive Loss": 0.00535797793418169,
"Positive prob": -0.00535797793418169,
"epoch": 3.2238805970149254,
"step": 108
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2601730525493622,
"Normal prob": -0.2601730525493622,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.2238805970149254,
"step": 108
},
{
"epoch": 3.253731343283582,
"grad_norm": 6.61609605527567,
"learning_rate": 5.227272727272728e-07,
"loss": 0.2738,
"step": 109
},
{
"DPO Loss": 3.8013957323267827e-06,
"Negative Geometric Mean": -12.432244078240172,
"Negative prob": -12.432244078240172,
"Normal Loss": 0.212859645485878,
"Normal prob": -0.212859645485878,
"Positive Loss": 0.0058334325440227985,
"Positive prob": -0.0058334325440227985,
"epoch": 3.253731343283582,
"step": 109
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.1951064020395279,
"Normal prob": -0.1951064020395279,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.253731343283582,
"step": 109
},
{
"epoch": 3.283582089552239,
"grad_norm": 6.461685783109346,
"learning_rate": 5e-07,
"loss": 0.1872,
"step": 110
},
{
"DPO Loss": 9.760423619643666e-07,
"Negative Geometric Mean": -13.36294397566719,
"Negative prob": -13.36294397566719,
"Normal Loss": 0.16994960606098175,
"Normal prob": -0.16994960606098175,
"Positive Loss": 0.007101885508745909,
"Positive prob": -0.007101885508745909,
"epoch": 3.283582089552239,
"step": 110
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.25234583020210266,
"Normal prob": -0.25234583020210266,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.283582089552239,
"step": 110
},
{
"epoch": 3.3134328358208958,
"grad_norm": 6.064062487418674,
"learning_rate": 4.772727272727273e-07,
"loss": 0.1877,
"step": 111
},
{
"DPO Loss": 3.0380582299826617e-06,
"Negative Geometric Mean": -12.93929797021028,
"Negative prob": -12.93929797021028,
"Normal Loss": 0.07496587187051773,
"Normal prob": -0.07496587187051773,
"Positive Loss": 0.003302493365481496,
"Positive prob": -0.003302493365481496,
"epoch": 3.3134328358208958,
"step": 111
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.116237573325634,
"Normal prob": -0.116237573325634,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.3134328358208958,
"step": 111
},
{
"epoch": 3.343283582089552,
"grad_norm": 11.774507449917868,
"learning_rate": 4.5454545454545457e-07,
"loss": 0.1455,
"step": 112
},
{
"DPO Loss": 1.8040673343906892e-06,
"Negative Geometric Mean": -13.262591667895046,
"Negative prob": -13.262591667895046,
"Normal Loss": 0.20686665177345276,
"Normal prob": -0.20686665177345276,
"Positive Loss": 0.0036297321785241365,
"Positive prob": -0.0036297321785241365,
"epoch": 3.343283582089552,
"step": 112
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.07100074738264084,
"Normal prob": -0.07100074738264084,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.343283582089552,
"step": 112
},
{
"epoch": 3.373134328358209,
"grad_norm": 6.197973269249537,
"learning_rate": 4.3181818181818187e-07,
"loss": 0.2737,
"step": 113
},
{
"DPO Loss": 1.8399912201802113e-05,
"Negative Geometric Mean": -11.161076035610465,
"Negative prob": -11.161076035610465,
"Normal Loss": 0.3017271161079407,
"Normal prob": -0.3017271161079407,
"Positive Loss": 0.0018219746416434646,
"Positive prob": -0.0018219746416434646,
"epoch": 3.373134328358209,
"step": 113
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.40800532698631287,
"Normal prob": -0.40800532698631287,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.373134328358209,
"step": 113
},
{
"epoch": 3.4029850746268657,
"grad_norm": 6.686699543045222,
"learning_rate": 4.0909090909090906e-07,
"loss": 0.2495,
"step": 114
},
{
"DPO Loss": 7.636819721194287e-06,
"Negative Geometric Mean": -11.901342007076794,
"Negative prob": -11.901342007076794,
"Normal Loss": 0.12182455509901047,
"Normal prob": -0.12182455509901047,
"Positive Loss": 0.008607598952949047,
"Positive prob": -0.008607598952949047,
"epoch": 3.4029850746268657,
"step": 114
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.18401654064655304,
"Normal prob": -0.18401654064655304,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.4029850746268657,
"step": 114
},
{
"epoch": 3.4328358208955225,
"grad_norm": 6.81794821888112,
"learning_rate": 3.8636363636363636e-07,
"loss": 0.2007,
"step": 115
},
{
"DPO Loss": 6.554748941648e-06,
"Negative Geometric Mean": -12.335026873289234,
"Negative prob": -12.335026873289234,
"Normal Loss": 0.21853935718536377,
"Normal prob": -0.21853935718536377,
"Positive Loss": 0.004862755537033081,
"Positive prob": -0.004862755537033081,
"epoch": 3.4328358208955225,
"step": 115
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.135187566280365,
"Normal prob": -0.135187566280365,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.4328358208955225,
"step": 115
},
{
"epoch": 3.4626865671641793,
"grad_norm": 6.189730848953201,
"learning_rate": 3.6363636363636366e-07,
"loss": 0.2086,
"step": 116
},
{
"DPO Loss": 3.6050653288985906e-06,
"Negative Geometric Mean": -12.796296909877233,
"Negative prob": -12.796296909877233,
"Normal Loss": 0.2358456403017044,
"Normal prob": -0.2358456403017044,
"Positive Loss": 0.010835876688361168,
"Positive prob": -0.010835876688361168,
"epoch": 3.4626865671641793,
"step": 116
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.14767657220363617,
"Normal prob": -0.14767657220363617,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.4626865671641793,
"step": 116
},
{
"epoch": 3.4925373134328357,
"grad_norm": 6.979155772945575,
"learning_rate": 3.409090909090909e-07,
"loss": 0.2849,
"step": 117
},
{
"DPO Loss": 6.702416418876966e-06,
"Negative Geometric Mean": -12.413108648255815,
"Negative prob": -12.413108648255815,
"Normal Loss": 0.14713706076145172,
"Normal prob": -0.14713706076145172,
"Positive Loss": 0.003462533000856638,
"Positive prob": -0.003462533000856638,
"epoch": 3.4925373134328357,
"step": 117
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.23745860159397125,
"Normal prob": -0.23745860159397125,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.4925373134328357,
"step": 117
},
{
"epoch": 3.5223880597014925,
"grad_norm": 6.728854419168043,
"learning_rate": 3.181818181818182e-07,
"loss": 0.1677,
"step": 118
},
{
"DPO Loss": 2.470218665968806e-06,
"Negative Geometric Mean": -13.20369715379494,
"Negative prob": -13.20369715379494,
"Normal Loss": 0.4334864020347595,
"Normal prob": -0.4334864020347595,
"Positive Loss": 0.005433392245322466,
"Positive prob": -0.005433392245322466,
"epoch": 3.5223880597014925,
"step": 118
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.1629062443971634,
"Normal prob": -0.1629062443971634,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.5223880597014925,
"step": 118
},
{
"epoch": 3.5522388059701493,
"grad_norm": 5.9093975782845645,
"learning_rate": 2.9545454545454545e-07,
"loss": 0.2375,
"step": 119
},
{
"DPO Loss": 1.9495445800859506e-06,
"Negative Geometric Mean": -13.439311124840561,
"Negative prob": -13.439311124840561,
"Normal Loss": 0.3102337718009949,
"Normal prob": -0.3102337718009949,
"Positive Loss": 0.001397938933223486,
"Positive prob": -0.001397938933223486,
"epoch": 3.5522388059701493,
"step": 119
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.10538414120674133,
"Normal prob": -0.10538414120674133,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.5522388059701493,
"step": 119
},
{
"epoch": 3.582089552238806,
"grad_norm": 5.432045886830493,
"learning_rate": 2.7272727272727274e-07,
"loss": 0.1749,
"step": 120
},
{
"DPO Loss": 2.111671823116432e-05,
"Negative Geometric Mean": -11.482684536637931,
"Negative prob": -11.482684536637931,
"Normal Loss": 0.09432564675807953,
"Normal prob": -0.09432564675807953,
"Positive Loss": 0.003968758508563042,
"Positive prob": -0.003968758508563042,
"epoch": 3.582089552238806,
"step": 120
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2578660249710083,
"Normal prob": -0.2578660249710083,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.582089552238806,
"step": 120
},
{
"epoch": 3.611940298507463,
"grad_norm": 6.329624233904299,
"learning_rate": 2.5e-07,
"loss": 0.1958,
"step": 121
},
{
"DPO Loss": 2.2736615595795564e-06,
"Negative Geometric Mean": -13.00193465573286,
"Negative prob": -13.00193465573286,
"Normal Loss": 0.11790954321622849,
"Normal prob": -0.11790954321622849,
"Positive Loss": 0.015944618731737137,
"Positive prob": -0.015944618731737137,
"epoch": 3.611940298507463,
"step": 121
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.2980431020259857,
"Normal prob": -0.2980431020259857,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.611940298507463,
"step": 121
},
{
"epoch": 3.6417910447761193,
"grad_norm": 6.390125283101109,
"learning_rate": 2.2727272727272729e-07,
"loss": 0.2578,
"step": 122
},
{
"DPO Loss": 1.8224405365341362e-06,
"Negative Geometric Mean": -13.443804791865459,
"Negative prob": -13.443804791865459,
"Normal Loss": 0.2082529067993164,
"Normal prob": -0.2082529067993164,
"Positive Loss": 0.0020329623948782682,
"Positive prob": -0.0020329623948782682,
"epoch": 3.6417910447761193,
"step": 122
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.27499350905418396,
"Normal prob": -0.27499350905418396,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.6417910447761193,
"step": 122
},
{
"epoch": 3.671641791044776,
"grad_norm": 7.334639331815002,
"learning_rate": 2.0454545454545453e-07,
"loss": 0.1928,
"step": 123
},
{
"DPO Loss": 3.887408166527688e-06,
"Negative Geometric Mean": -12.602550216132615,
"Negative prob": -12.602550216132615,
"Normal Loss": 0.3751141130924225,
"Normal prob": -0.3751141130924225,
"Positive Loss": 0.0016299609560519457,
"Positive prob": -0.0016299609560519457,
"epoch": 3.671641791044776,
"step": 123
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.14575666189193726,
"Normal prob": -0.14575666189193726,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.671641791044776,
"step": 123
},
{
"epoch": 3.701492537313433,
"grad_norm": 5.602697068663161,
"learning_rate": 1.8181818181818183e-07,
"loss": 0.1898,
"step": 124
},
{
"DPO Loss": 1.1130948677477009e-06,
"Negative Geometric Mean": -13.74802903824201,
"Negative prob": -13.74802903824201,
"Normal Loss": 0.09506483376026154,
"Normal prob": -0.09506483376026154,
"Positive Loss": 0.0032382213976234198,
"Positive prob": -0.0032382213976234198,
"epoch": 3.701492537313433,
"step": 124
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.15355288982391357,
"Normal prob": -0.15355288982391357,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.701492537313433,
"step": 124
},
{
"epoch": 3.7313432835820897,
"grad_norm": 7.94396763225081,
"learning_rate": 1.590909090909091e-07,
"loss": 0.153,
"step": 125
},
{
"DPO Loss": 4.4835976933222324e-07,
"Negative Geometric Mean": -14.556803077741021,
"Negative prob": -14.556803077741021,
"Normal Loss": 0.09771548211574554,
"Normal prob": -0.09771548211574554,
"Positive Loss": 0.012089760042726994,
"Positive prob": -0.012089760042726994,
"epoch": 3.7313432835820897,
"step": 125
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.17557981610298157,
"Normal prob": -0.17557981610298157,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.7313432835820897,
"step": 125
},
{
"epoch": 3.7611940298507465,
"grad_norm": 5.746760679085967,
"learning_rate": 1.3636363636363637e-07,
"loss": 0.2027,
"step": 126
},
{
"DPO Loss": 2.5731311695920285e-06,
"Negative Geometric Mean": -12.94003257909751,
"Negative prob": -12.94003257909751,
"Normal Loss": 0.2374420166015625,
"Normal prob": -0.2374420166015625,
"Positive Loss": 0.006095151882618666,
"Positive prob": -0.006095151882618666,
"epoch": 3.7611940298507465,
"step": 126
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.12284944206476212,
"Normal prob": -0.12284944206476212,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.7611940298507465,
"step": 126
},
{
"epoch": 3.791044776119403,
"grad_norm": 6.172054742054878,
"learning_rate": 1.1363636363636364e-07,
"loss": 0.1587,
"step": 127
},
{
"DPO Loss": 3.012714219508236e-06,
"Negative Geometric Mean": -12.808327907986111,
"Negative prob": -12.808327907986111,
"Normal Loss": 0.07416192442178726,
"Normal prob": -0.07416192442178726,
"Positive Loss": 0.0024134027771651745,
"Positive prob": -0.0024134027771651745,
"epoch": 3.791044776119403,
"step": 127
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.292096883058548,
"Normal prob": -0.292096883058548,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.791044776119403,
"step": 127
},
{
"epoch": 3.8208955223880596,
"grad_norm": 6.689741596768201,
"learning_rate": 9.090909090909091e-08,
"loss": 0.1959,
"step": 128
},
{
"DPO Loss": 3.060298655777367e-06,
"Negative Geometric Mean": -12.873194280660377,
"Negative prob": -12.873194280660377,
"Normal Loss": 0.33571678400039673,
"Normal prob": -0.33571678400039673,
"Positive Loss": 0.0060377782210707664,
"Positive prob": -0.0060377782210707664,
"epoch": 3.8208955223880596,
"step": 128
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.22519126534461975,
"Normal prob": -0.22519126534461975,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.8208955223880596,
"step": 128
},
{
"epoch": 3.8507462686567164,
"grad_norm": 7.002612239614997,
"learning_rate": 6.818181818181819e-08,
"loss": 0.2147,
"step": 129
},
{
"DPO Loss": 1.0818999409674698e-06,
"Negative Geometric Mean": -13.897989908854166,
"Negative prob": -13.897989908854166,
"Normal Loss": 0.2647945284843445,
"Normal prob": -0.2647945284843445,
"Positive Loss": 0.005775726865977049,
"Positive prob": -0.005775726865977049,
"epoch": 3.8507462686567164,
"step": 129
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.14088931679725647,
"Normal prob": -0.14088931679725647,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.8507462686567164,
"step": 129
},
{
"epoch": 3.8805970149253732,
"grad_norm": 7.106870960626619,
"learning_rate": 4.545454545454546e-08,
"loss": 0.2408,
"step": 130
},
{
"DPO Loss": 3.805466487272458e-06,
"Negative Geometric Mean": -12.980504410990168,
"Negative prob": -12.980504410990168,
"Normal Loss": 0.3013966977596283,
"Normal prob": -0.3013966977596283,
"Positive Loss": 0.003041935386136174,
"Positive prob": -0.003041935386136174,
"epoch": 3.8805970149253732,
"step": 130
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.27534565329551697,
"Normal prob": -0.27534565329551697,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.8805970149253732,
"step": 130
},
{
"epoch": 3.91044776119403,
"grad_norm": 5.966195743329273,
"learning_rate": 2.272727272727273e-08,
"loss": 0.2029,
"step": 131
},
{
"DPO Loss": 1.5273080011035291e-06,
"Negative Geometric Mean": -13.34349348358295,
"Negative prob": -13.34349348358295,
"Normal Loss": 0.20480337738990784,
"Normal prob": -0.20480337738990784,
"Positive Loss": 0.0037362114526331425,
"Positive prob": -0.0037362114526331425,
"epoch": 3.91044776119403,
"step": 131
},
{
"DPO Loss": 0.0,
"Negative Geometric Mean": 0.0,
"Negative prob": 0.0,
"Normal Loss": 0.19383595883846283,
"Normal prob": -0.19383595883846283,
"Positive Loss": 0.0,
"Positive prob": 0.0,
"epoch": 3.91044776119403,
"step": 131
},
{
"epoch": 3.9402985074626864,
"grad_norm": 6.0018477972181445,
"learning_rate": 0.0,
"loss": 0.1864,
"step": 132
}
],
"logging_steps": 1,
"max_steps": 132,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 11615663554560.0,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}