{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9402985074626864, "eval_steps": 500, "global_step": 132, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.230027198791504, "Normal prob": -1.230027198791504, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0, "step": 0 }, { "DPO Loss": 0.34657145664029176, "Negative Geometric Mean": -0.9493766827771892, "Negative prob": -0.9493766827771892, "Normal Loss": 1.0090343952178955, "Normal prob": -1.0090343952178955, "Positive Loss": 0.37890636920928955, "Positive prob": -0.37890636920928955, "epoch": 0, "step": 0 }, { "epoch": 0.029850746268656716, "grad_norm": 52.804205501572916, "learning_rate": 2.9772727272727274e-06, "loss": 1.4155, "step": 1 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.092234492301941, "Normal prob": -1.092234492301941, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.029850746268656716, "step": 1 }, { "DPO Loss": 0.3577462635421841, "Negative Geometric Mean": -0.3783103787169165, "Negative prob": -0.3783103787169165, "Normal Loss": 0.8107529282569885, "Normal prob": -0.8107529282569885, "Positive Loss": 0.07748764753341675, "Positive prob": -0.07748764753341675, "epoch": 0.029850746268656716, "step": 1 }, { "epoch": 0.05970149253731343, "grad_norm": 22.382695244020965, "learning_rate": 2.9545454545454547e-06, "loss": 1.1517, "step": 2 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8582519292831421, "Normal prob": -0.8582519292831421, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.05970149253731343, "step": 2 }, { "DPO Loss": 0.3623313108408915, "Negative Geometric Mean": -1.0158045398410653, "Negative prob": -1.0158045398410653, "Normal Loss": 0.7299004793167114, "Normal prob": -0.7299004793167114, "Positive Loss": 0.07864368706941605, "Positive prob": -0.07864368706941605, "epoch": 0.05970149253731343, "step": 2 }, { "epoch": 0.08955223880597014, "grad_norm": 12.355752020889257, "learning_rate": 2.931818181818182e-06, "loss": 1.0958, "step": 3 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9027985334396362, "Normal prob": -0.9027985334396362, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.08955223880597014, "step": 3 }, { "DPO Loss": 0.37659024799225, "Negative Geometric Mean": -0.6431755968319472, "Negative prob": -0.6431755968319472, "Normal Loss": 0.8552955389022827, "Normal prob": -0.8552955389022827, "Positive Loss": 0.0777173787355423, "Positive prob": -0.0777173787355423, "epoch": 0.08955223880597014, "step": 3 }, { "epoch": 0.11940298507462686, "grad_norm": 13.671836146708186, "learning_rate": 2.9090909090909093e-06, "loss": 1.065, "step": 4 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9766985177993774, "Normal prob": -0.9766985177993774, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.11940298507462686, "step": 4 }, { "DPO Loss": 0.35204142219855233, "Negative Geometric Mean": -0.8325932820638021, "Negative prob": -0.8325932820638021, "Normal Loss": 1.0553878545761108, "Normal prob": -1.0553878545761108, "Positive Loss": 0.09103336185216904, "Positive prob": -0.09103336185216904, "epoch": 0.11940298507462686, "step": 4 }, { "epoch": 0.14925373134328357, "grad_norm": 9.500922911135007, "learning_rate": 2.8863636363636366e-06, "loss": 1.2053, "step": 5 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9637267589569092, "Normal prob": -0.9637267589569092, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.14925373134328357, "step": 5 }, { "DPO Loss": 0.3369350566486365, "Negative Geometric Mean": -0.8089229916566171, "Negative prob": -0.8089229916566171, "Normal Loss": 0.809516429901123, "Normal prob": -0.809516429901123, "Positive Loss": 0.08676248043775558, "Positive prob": -0.08676248043775558, "epoch": 0.14925373134328357, "step": 5 }, { "epoch": 0.1791044776119403, "grad_norm": 9.50990015460849, "learning_rate": 2.863636363636364e-06, "loss": 1.1133, "step": 6 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.832047700881958, "Normal prob": -0.832047700881958, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.1791044776119403, "step": 6 }, { "DPO Loss": 0.3370728580941308, "Negative Geometric Mean": -0.8685310105770683, "Negative prob": -0.8685310105770683, "Normal Loss": 0.8815029263496399, "Normal prob": -0.8815029263496399, "Positive Loss": 0.07432825863361359, "Positive prob": -0.07432825863361359, "epoch": 0.1791044776119403, "step": 6 }, { "epoch": 0.208955223880597, "grad_norm": 8.039098978654936, "learning_rate": 2.840909090909091e-06, "loss": 1.2064, "step": 7 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9996135830879211, "Normal prob": -0.9996135830879211, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.208955223880597, "step": 7 }, { "DPO Loss": 0.35429090257439527, "Negative Geometric Mean": -0.610354295417444, "Negative prob": -0.610354295417444, "Normal Loss": 0.716498851776123, "Normal prob": -0.716498851776123, "Positive Loss": 0.09210512042045593, "Positive prob": -0.09210512042045593, "epoch": 0.208955223880597, "step": 7 }, { "epoch": 0.23880597014925373, "grad_norm": 9.566983237882555, "learning_rate": 2.8181818181818185e-06, "loss": 1.067, "step": 8 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8073037266731262, "Normal prob": -0.8073037266731262, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.23880597014925373, "step": 8 }, { "DPO Loss": 0.31094894833121156, "Negative Geometric Mean": -0.8462985688606195, "Negative prob": -0.8462985688606195, "Normal Loss": 0.5099501013755798, "Normal prob": -0.5099501013755798, "Positive Loss": 0.09047375619411469, "Positive prob": -0.09047375619411469, "epoch": 0.23880597014925373, "step": 8 }, { "epoch": 0.26865671641791045, "grad_norm": 7.87857077645135, "learning_rate": 2.7954545454545453e-06, "loss": 1.0028, "step": 9 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.850799024105072, "Normal prob": -0.850799024105072, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.26865671641791045, "step": 9 }, { "DPO Loss": 0.30296717585093275, "Negative Geometric Mean": -0.9634897133738725, "Negative prob": -0.9634897133738725, "Normal Loss": 1.2854121923446655, "Normal prob": -1.2854121923446655, "Positive Loss": 0.07252619415521622, "Positive prob": -0.07252619415521622, "epoch": 0.26865671641791045, "step": 9 }, { "epoch": 0.29850746268656714, "grad_norm": 9.469089262773355, "learning_rate": 2.7727272727272726e-06, "loss": 1.1173, "step": 10 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8608676195144653, "Normal prob": -0.8608676195144653, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.29850746268656714, "step": 10 }, { "DPO Loss": 0.26851745737184274, "Negative Geometric Mean": -1.0714603639998526, "Negative prob": -1.0714603639998526, "Normal Loss": 0.7984556555747986, "Normal prob": -0.7984556555747986, "Positive Loss": 0.05822120979428291, "Positive prob": -0.05822120979428291, "epoch": 0.29850746268656714, "step": 10 }, { "epoch": 0.3283582089552239, "grad_norm": 7.287903263195378, "learning_rate": 2.75e-06, "loss": 1.013, "step": 11 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8310959339141846, "Normal prob": -0.8310959339141846, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.3283582089552239, "step": 11 }, { "DPO Loss": 0.26161430097353844, "Negative Geometric Mean": -1.1722790406482055, "Negative prob": -1.1722790406482055, "Normal Loss": 0.8640764355659485, "Normal prob": -0.8640764355659485, "Positive Loss": 0.07576876878738403, "Positive prob": -0.07576876878738403, "epoch": 0.3283582089552239, "step": 11 }, { "epoch": 0.3582089552238806, "grad_norm": 12.679434481744659, "learning_rate": 2.7272727272727272e-06, "loss": 1.1083, "step": 12 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9239760637283325, "Normal prob": -0.9239760637283325, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.3582089552238806, "step": 12 }, { "DPO Loss": 0.3063912470938362, "Negative Geometric Mean": -0.7924091110304511, "Negative prob": -0.7924091110304511, "Normal Loss": 0.8149660229682922, "Normal prob": -0.8149660229682922, "Positive Loss": 0.06200961023569107, "Positive prob": -0.06200961023569107, "epoch": 0.3582089552238806, "step": 12 }, { "epoch": 0.3880597014925373, "grad_norm": 8.690620794915942, "learning_rate": 2.7045454545454545e-06, "loss": 1.0424, "step": 13 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7188159227371216, "Normal prob": -0.7188159227371216, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.3880597014925373, "step": 13 }, { "DPO Loss": 0.2937756023461591, "Negative Geometric Mean": -0.4400056991696919, "Negative prob": -0.4400056991696919, "Normal Loss": 0.8943111300468445, "Normal prob": -0.8943111300468445, "Positive Loss": 0.09851977974176407, "Positive prob": -0.09851977974176407, "epoch": 0.3880597014925373, "step": 13 }, { "epoch": 0.417910447761194, "grad_norm": 8.116923440818201, "learning_rate": 2.681818181818182e-06, "loss": 1.019, "step": 14 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7557149529457092, "Normal prob": -0.7557149529457092, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.417910447761194, "step": 14 }, { "DPO Loss": 0.3186842313017755, "Negative Geometric Mean": -1.1004963759258488, "Negative prob": -1.1004963759258488, "Normal Loss": 0.8593255877494812, "Normal prob": -0.8593255877494812, "Positive Loss": 0.04907805845141411, "Positive prob": -0.04907805845141411, "epoch": 0.417910447761194, "step": 14 }, { "epoch": 0.44776119402985076, "grad_norm": 8.007172349603502, "learning_rate": 2.659090909090909e-06, "loss": 1.0353, "step": 15 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.897316038608551, "Normal prob": -0.897316038608551, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.44776119402985076, "step": 15 }, { "DPO Loss": 0.1223304973432013, "Negative Geometric Mean": -1.9276966850878456, "Negative prob": -1.9276966850878456, "Normal Loss": 0.6334409117698669, "Normal prob": -0.6334409117698669, "Positive Loss": 0.05224687606096268, "Positive prob": -0.05224687606096268, "epoch": 0.44776119402985076, "step": 15 }, { "epoch": 0.47761194029850745, "grad_norm": 8.24043100452026, "learning_rate": 2.6363636363636364e-06, "loss": 0.9134, "step": 16 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9187679886817932, "Normal prob": -0.9187679886817932, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.47761194029850745, "step": 16 }, { "DPO Loss": 0.055564984135369914, "Negative Geometric Mean": -3.0230499623024127, "Negative prob": -3.0230499623024127, "Normal Loss": 1.1529364585876465, "Normal prob": -1.1529364585876465, "Positive Loss": 0.08235401660203934, "Positive prob": -0.08235401660203934, "epoch": 0.47761194029850745, "step": 16 }, { "epoch": 0.5074626865671642, "grad_norm": 8.688253054467179, "learning_rate": 2.6136363636363637e-06, "loss": 1.0468, "step": 17 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7582242488861084, "Normal prob": -0.7582242488861084, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.5074626865671642, "step": 17 }, { "DPO Loss": 0.12820626634228, "Negative Geometric Mean": -2.269475635729338, "Negative prob": -2.269475635729338, "Normal Loss": 0.3805200755596161, "Normal prob": -0.3805200755596161, "Positive Loss": 0.07097562402486801, "Positive prob": -0.07097562402486801, "epoch": 0.5074626865671642, "step": 17 }, { "epoch": 0.5373134328358209, "grad_norm": 8.727946309173646, "learning_rate": 2.590909090909091e-06, "loss": 0.8819, "step": 18 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0369868278503418, "Normal prob": -1.0369868278503418, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.5373134328358209, "step": 18 }, { "DPO Loss": 0.06657070366779086, "Negative Geometric Mean": -2.8913005754059435, "Negative prob": -2.8913005754059435, "Normal Loss": 0.7784830331802368, "Normal prob": -0.7784830331802368, "Positive Loss": 0.04361863434314728, "Positive prob": -0.04361863434314728, "epoch": 0.5373134328358209, "step": 18 }, { "epoch": 0.5671641791044776, "grad_norm": 8.562076920129734, "learning_rate": 2.5681818181818183e-06, "loss": 0.8626, "step": 19 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9392335414886475, "Normal prob": -0.9392335414886475, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.5671641791044776, "step": 19 }, { "DPO Loss": 0.004452790854185025, "Negative Geometric Mean": -5.43384802094139, "Negative prob": -5.43384802094139, "Normal Loss": 0.8345220685005188, "Normal prob": -0.8345220685005188, "Positive Loss": 0.07906413078308105, "Positive prob": -0.07906413078308105, "epoch": 0.5671641791044776, "step": 19 }, { "epoch": 0.5970149253731343, "grad_norm": 9.154040073172876, "learning_rate": 2.5454545454545456e-06, "loss": 0.9584, "step": 20 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8151698112487793, "Normal prob": -0.8151698112487793, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.5970149253731343, "step": 20 }, { "DPO Loss": 0.0007850244175145432, "Negative Geometric Mean": -7.495313533399471, "Negative prob": -7.495313533399471, "Normal Loss": 0.8681256771087646, "Normal prob": -0.8681256771087646, "Positive Loss": 0.05661068111658096, "Positive prob": -0.05661068111658096, "epoch": 0.5970149253731343, "step": 20 }, { "epoch": 0.6268656716417911, "grad_norm": 19.32249471061477, "learning_rate": 2.522727272727273e-06, "loss": 0.9313, "step": 21 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8350412249565125, "Normal prob": -0.8350412249565125, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.6268656716417911, "step": 21 }, { "DPO Loss": 0.013323540066809244, "Negative Geometric Mean": -4.539878773912091, "Negative prob": -4.539878773912091, "Normal Loss": 0.8549312949180603, "Normal prob": -0.8549312949180603, "Positive Loss": 0.0937165841460228, "Positive prob": -0.0937165841460228, "epoch": 0.6268656716417911, "step": 21 }, { "epoch": 0.6567164179104478, "grad_norm": 18.4720664879043, "learning_rate": 2.5e-06, "loss": 0.9518, "step": 22 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.83185875415802, "Normal prob": -0.83185875415802, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.6567164179104478, "step": 22 }, { "DPO Loss": 7.441585176947715e-05, "Negative Geometric Mean": -9.482926878210616, "Negative prob": -9.482926878210616, "Normal Loss": 1.0035439729690552, "Normal prob": -1.0035439729690552, "Positive Loss": 0.1059131771326065, "Positive prob": -0.1059131771326065, "epoch": 0.6567164179104478, "step": 22 }, { "epoch": 0.6865671641791045, "grad_norm": 7.4403596397354015, "learning_rate": 2.4772727272727275e-06, "loss": 0.9741, "step": 23 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.1941570043563843, "Normal prob": -1.1941570043563843, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.6865671641791045, "step": 23 }, { "DPO Loss": 5.767926370817813e-06, "Negative Geometric Mean": -12.173237529280238, "Negative prob": -12.173237529280238, "Normal Loss": 0.7278223037719727, "Normal prob": -0.7278223037719727, "Positive Loss": 0.07192976027727127, "Positive prob": -0.07192976027727127, "epoch": 0.6865671641791045, "step": 23 }, { "epoch": 0.7164179104477612, "grad_norm": 7.330688093396721, "learning_rate": 2.454545454545455e-06, "loss": 0.9927, "step": 24 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9865759611129761, "Normal prob": -0.9865759611129761, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.7164179104477612, "step": 24 }, { "DPO Loss": 1.0947351633426092e-05, "Negative Geometric Mean": -11.567188243575508, "Negative prob": -11.567188243575508, "Normal Loss": 0.8570675253868103, "Normal prob": -0.8570675253868103, "Positive Loss": 0.06724032014608383, "Positive prob": -0.06724032014608383, "epoch": 0.7164179104477612, "step": 24 }, { "epoch": 0.746268656716418, "grad_norm": 7.557781355223681, "learning_rate": 2.4318181818181817e-06, "loss": 0.9221, "step": 25 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9384709000587463, "Normal prob": -0.9384709000587463, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.746268656716418, "step": 25 }, { "DPO Loss": 0.0003560853089358333, "Negative Geometric Mean": -8.138521756169094, "Negative prob": -8.138521756169094, "Normal Loss": 0.8964889049530029, "Normal prob": -0.8964889049530029, "Positive Loss": 0.06566499173641205, "Positive prob": -0.06566499173641205, "epoch": 0.746268656716418, "step": 25 }, { "epoch": 0.7761194029850746, "grad_norm": 7.046101597469539, "learning_rate": 2.409090909090909e-06, "loss": 0.9733, "step": 26 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9506588578224182, "Normal prob": -0.9506588578224182, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.7761194029850746, "step": 26 }, { "DPO Loss": 8.255091540571396e-06, "Negative Geometric Mean": -11.822831423895254, "Negative prob": -11.822831423895254, "Normal Loss": 0.9992624521255493, "Normal prob": -0.9992624521255493, "Positive Loss": 0.093373604118824, "Positive prob": -0.093373604118824, "epoch": 0.7761194029850746, "step": 26 }, { "epoch": 0.8059701492537313, "grad_norm": 6.576814099531671, "learning_rate": 2.3863636363636363e-06, "loss": 0.9531, "step": 27 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.964348316192627, "Normal prob": -0.964348316192627, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.8059701492537313, "step": 27 }, { "DPO Loss": 1.038318288302413e-05, "Negative Geometric Mean": -12.70634533110119, "Negative prob": -12.70634533110119, "Normal Loss": 0.8761529326438904, "Normal prob": -0.8761529326438904, "Positive Loss": 0.048190467059612274, "Positive prob": -0.048190467059612274, "epoch": 0.8059701492537313, "step": 27 }, { "epoch": 0.835820895522388, "grad_norm": 7.041162886647003, "learning_rate": 2.3636363636363636e-06, "loss": 0.9254, "step": 28 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0659770965576172, "Normal prob": -1.0659770965576172, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.835820895522388, "step": 28 }, { "DPO Loss": 4.745458003421849e-06, "Negative Geometric Mean": -12.22632121816452, "Negative prob": -12.22632121816452, "Normal Loss": 0.877432644367218, "Normal prob": -0.877432644367218, "Positive Loss": 0.07441161572933197, "Positive prob": -0.07441161572933197, "epoch": 0.835820895522388, "step": 28 }, { "epoch": 0.8656716417910447, "grad_norm": 6.123530075257292, "learning_rate": 2.340909090909091e-06, "loss": 0.9906, "step": 29 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9131932854652405, "Normal prob": -0.9131932854652405, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.8656716417910447, "step": 29 }, { "DPO Loss": 1.6460282004165447e-05, "Negative Geometric Mean": -11.04015827178955, "Negative prob": -11.04015827178955, "Normal Loss": 1.063417673110962, "Normal prob": -1.063417673110962, "Positive Loss": 0.050095487385988235, "Positive prob": -0.050095487385988235, "epoch": 0.8656716417910447, "step": 29 }, { "epoch": 0.8955223880597015, "grad_norm": 6.118956679067234, "learning_rate": 2.318181818181818e-06, "loss": 0.9193, "step": 30 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8643659949302673, "Normal prob": -0.8643659949302673, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.8955223880597015, "step": 30 }, { "DPO Loss": 9.904635203296299e-06, "Negative Geometric Mean": -11.622193305258186, "Negative prob": -11.622193305258186, "Normal Loss": 0.7562206983566284, "Normal prob": -0.7562206983566284, "Positive Loss": 0.05644884705543518, "Positive prob": -0.05644884705543518, "epoch": 0.8955223880597015, "step": 30 }, { "epoch": 0.9253731343283582, "grad_norm": 6.536101223333384, "learning_rate": 2.2954545454545455e-06, "loss": 0.8934, "step": 31 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8608868718147278, "Normal prob": -0.8608868718147278, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.9253731343283582, "step": 31 }, { "DPO Loss": 2.137509260823493e-05, "Negative Geometric Mean": -10.705368454391891, "Negative prob": -10.705368454391891, "Normal Loss": 0.71401047706604, "Normal prob": -0.71401047706604, "Positive Loss": 0.06425323337316513, "Positive prob": -0.06425323337316513, "epoch": 0.9253731343283582, "step": 31 }, { "epoch": 0.9552238805970149, "grad_norm": 7.040623160546982, "learning_rate": 2.2727272727272728e-06, "loss": 0.8664, "step": 32 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7989053130149841, "Normal prob": -0.7989053130149841, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.9552238805970149, "step": 32 }, { "DPO Loss": 2.7078393532067353e-05, "Negative Geometric Mean": -10.612409125434027, "Negative prob": -10.612409125434027, "Normal Loss": 0.9129906892776489, "Normal prob": -0.9129906892776489, "Positive Loss": 0.10737287253141403, "Positive prob": -0.10737287253141403, "epoch": 0.9552238805970149, "step": 32 }, { "epoch": 0.9850746268656716, "grad_norm": 6.585654111220224, "learning_rate": 2.25e-06, "loss": 0.842, "step": 33 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8037691712379456, "Normal prob": -0.8037691712379456, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.9850746268656716, "step": 33 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7143898606300354, "Normal prob": -0.7143898606300354, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.9850746268656716, "step": 33 }, { "epoch": 1.0149253731343284, "grad_norm": 9.914782438598868, "learning_rate": 2.2272727272727274e-06, "loss": 0.6917, "step": 34 }, { "DPO Loss": 3.654100751997121e-05, "Negative Geometric Mean": -10.674591619318182, "Negative prob": -10.674591619318182, "Normal Loss": 0.48714742064476013, "Normal prob": -0.48714742064476013, "Positive Loss": 0.03236498683691025, "Positive prob": -0.03236498683691025, "epoch": 1.0149253731343284, "step": 34 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5524182915687561, "Normal prob": -0.5524182915687561, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.0149253731343284, "step": 34 }, { "epoch": 1.044776119402985, "grad_norm": 7.2083835894058375, "learning_rate": 2.2045454545454547e-06, "loss": 0.6615, "step": 35 }, { "DPO Loss": 5.3735510809371045e-05, "Negative Geometric Mean": -10.087603725282486, "Negative prob": -10.087603725282486, "Normal Loss": 0.47449687123298645, "Normal prob": -0.47449687123298645, "Positive Loss": 0.02946843020617962, "Positive prob": -0.02946843020617962, "epoch": 1.044776119402985, "step": 35 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5653090476989746, "Normal prob": -0.5653090476989746, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.044776119402985, "step": 35 }, { "epoch": 1.0746268656716418, "grad_norm": 7.509154772376704, "learning_rate": 2.181818181818182e-06, "loss": 0.5463, "step": 36 }, { "DPO Loss": 4.57076718186167e-05, "Negative Geometric Mean": -10.369059509873779, "Negative prob": -10.369059509873779, "Normal Loss": 0.7275592684745789, "Normal prob": -0.7275592684745789, "Positive Loss": 0.022183816879987717, "Positive prob": -0.022183816879987717, "epoch": 1.0746268656716418, "step": 36 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.48975300788879395, "Normal prob": -0.48975300788879395, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.0746268656716418, "step": 36 }, { "epoch": 1.1044776119402986, "grad_norm": 5.921019640825061, "learning_rate": 2.1590909090909092e-06, "loss": 0.6523, "step": 37 }, { "DPO Loss": 7.289560432171723e-05, "Negative Geometric Mean": -9.601848503888467, "Negative prob": -9.601848503888467, "Normal Loss": 0.8984713554382324, "Normal prob": -0.8984713554382324, "Positive Loss": 0.029801441356539726, "Positive prob": -0.029801441356539726, "epoch": 1.1044776119402986, "step": 37 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.709186851978302, "Normal prob": -0.709186851978302, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.1044776119402986, "step": 37 }, { "epoch": 1.1343283582089552, "grad_norm": 7.446657265486741, "learning_rate": 2.1363636363636365e-06, "loss": 0.6967, "step": 38 }, { "DPO Loss": 2.6440661378327594e-05, "Negative Geometric Mean": -10.957384672619048, "Negative prob": -10.957384672619048, "Normal Loss": 0.44052013754844666, "Normal prob": -0.44052013754844666, "Positive Loss": 0.02077590487897396, "Positive prob": -0.02077590487897396, "epoch": 1.1343283582089552, "step": 38 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.39025214314460754, "Normal prob": -0.39025214314460754, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.1343283582089552, "step": 38 }, { "epoch": 1.164179104477612, "grad_norm": 6.65773645557663, "learning_rate": 2.113636363636364e-06, "loss": 0.5978, "step": 39 }, { "DPO Loss": 2.123153925438824e-05, "Negative Geometric Mean": -10.680970389887971, "Negative prob": -10.680970389887971, "Normal Loss": 0.6101383566856384, "Normal prob": -0.6101383566856384, "Positive Loss": 0.03984152898192406, "Positive prob": -0.03984152898192406, "epoch": 1.164179104477612, "step": 39 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5792780518531799, "Normal prob": -0.5792780518531799, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.164179104477612, "step": 39 }, { "epoch": 1.1940298507462686, "grad_norm": 6.924093024562789, "learning_rate": 2.090909090909091e-06, "loss": 0.6592, "step": 40 }, { "DPO Loss": 1.6680911890968927e-05, "Negative Geometric Mean": -10.952719974078342, "Negative prob": -10.952719974078342, "Normal Loss": 0.4338739216327667, "Normal prob": -0.4338739216327667, "Positive Loss": 0.03350961208343506, "Positive prob": -0.03350961208343506, "epoch": 1.1940298507462686, "step": 40 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3462405800819397, "Normal prob": -0.3462405800819397, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.1940298507462686, "step": 40 }, { "epoch": 1.2238805970149254, "grad_norm": 6.295634713144118, "learning_rate": 2.0681818181818184e-06, "loss": 0.5281, "step": 41 }, { "DPO Loss": 1.9216125147544902e-05, "Negative Geometric Mean": -10.949885493970315, "Negative prob": -10.949885493970315, "Normal Loss": 0.6209268569946289, "Normal prob": -0.6209268569946289, "Positive Loss": 0.010221516713500023, "Positive prob": -0.010221516713500023, "epoch": 1.2238805970149254, "step": 41 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4472298324108124, "Normal prob": -0.4472298324108124, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.2238805970149254, "step": 41 }, { "epoch": 1.2537313432835822, "grad_norm": 5.857596369043, "learning_rate": 2.0454545454545453e-06, "loss": 0.5837, "step": 42 }, { "DPO Loss": 3.1575882722812355e-05, "Negative Geometric Mean": -10.842009715544872, "Negative prob": -10.842009715544872, "Normal Loss": 0.3952675461769104, "Normal prob": -0.3952675461769104, "Positive Loss": 0.06998435407876968, "Positive prob": -0.06998435407876968, "epoch": 1.2537313432835822, "step": 42 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3682936131954193, "Normal prob": -0.3682936131954193, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.2537313432835822, "step": 42 }, { "epoch": 1.2835820895522387, "grad_norm": 6.19946111675221, "learning_rate": 2.0227272727272726e-06, "loss": 0.4489, "step": 43 }, { "DPO Loss": 7.918896147509772e-06, "Negative Geometric Mean": -11.86809765625, "Negative prob": -11.86809765625, "Normal Loss": 0.7341710329055786, "Normal prob": -0.7341710329055786, "Positive Loss": 0.023408204317092896, "Positive prob": -0.023408204317092896, "epoch": 1.2835820895522387, "step": 43 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5852903723716736, "Normal prob": -0.5852903723716736, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.2835820895522387, "step": 43 }, { "epoch": 1.3134328358208955, "grad_norm": 7.299737993528941, "learning_rate": 2e-06, "loss": 0.6287, "step": 44 }, { "DPO Loss": 1.81222332665437e-05, "Negative Geometric Mean": -10.647718364689625, "Negative prob": -10.647718364689625, "Normal Loss": 0.4992733597755432, "Normal prob": -0.4992733597755432, "Positive Loss": 0.05666818842291832, "Positive prob": -0.05666818842291832, "epoch": 1.3134328358208955, "step": 44 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7956355810165405, "Normal prob": -0.7956355810165405, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.3134328358208955, "step": 44 }, { "epoch": 1.3432835820895521, "grad_norm": 7.900970686658878, "learning_rate": 1.977272727272727e-06, "loss": 0.7441, "step": 45 }, { "DPO Loss": 6.092015148833826e-05, "Negative Geometric Mean": -9.997283935546875, "Negative prob": -9.997283935546875, "Normal Loss": 0.5293800830841064, "Normal prob": -0.5293800830841064, "Positive Loss": 0.012493799440562725, "Positive prob": -0.012493799440562725, "epoch": 1.3432835820895521, "step": 45 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6335676908493042, "Normal prob": -0.6335676908493042, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.3432835820895521, "step": 45 }, { "epoch": 1.373134328358209, "grad_norm": 6.033587067188048, "learning_rate": 1.9545454545454545e-06, "loss": 0.6688, "step": 46 }, { "DPO Loss": 2.075863324394268e-05, "Negative Geometric Mean": -11.443209795884684, "Negative prob": -11.443209795884684, "Normal Loss": 0.9459198713302612, "Normal prob": -0.9459198713302612, "Positive Loss": 0.019672967493534088, "Positive prob": -0.019672967493534088, "epoch": 1.373134328358209, "step": 46 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5753485560417175, "Normal prob": -0.5753485560417175, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.373134328358209, "step": 46 }, { "epoch": 1.4029850746268657, "grad_norm": 6.437116667065512, "learning_rate": 1.931818181818182e-06, "loss": 0.7138, "step": 47 }, { "DPO Loss": 5.24218732737661e-05, "Negative Geometric Mean": -10.045061616056572, "Negative prob": -10.045061616056572, "Normal Loss": 0.6808024644851685, "Normal prob": -0.6808024644851685, "Positive Loss": 0.023415615782141685, "Positive prob": -0.023415615782141685, "epoch": 1.4029850746268657, "step": 47 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6357601284980774, "Normal prob": -0.6357601284980774, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.4029850746268657, "step": 47 }, { "epoch": 1.4328358208955223, "grad_norm": 7.152119639795567, "learning_rate": 1.909090909090909e-06, "loss": 0.6079, "step": 48 }, { "DPO Loss": 7.2306889216542525e-06, "Negative Geometric Mean": -12.073476457210242, "Negative prob": -12.073476457210242, "Normal Loss": 0.5705257058143616, "Normal prob": -0.5705257058143616, "Positive Loss": 0.02072186954319477, "Positive prob": -0.02072186954319477, "epoch": 1.4328358208955223, "step": 48 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5705331563949585, "Normal prob": -0.5705331563949585, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.4328358208955223, "step": 48 }, { "epoch": 1.462686567164179, "grad_norm": 8.435206603146995, "learning_rate": 1.8863636363636364e-06, "loss": 0.4921, "step": 49 }, { "DPO Loss": 1.2005791148960418e-05, "Negative Geometric Mean": -11.335293660121682, "Negative prob": -11.335293660121682, "Normal Loss": 0.4985297918319702, "Normal prob": -0.4985297918319702, "Positive Loss": 0.03511481359601021, "Positive prob": -0.03511481359601021, "epoch": 1.462686567164179, "step": 49 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5165051221847534, "Normal prob": -0.5165051221847534, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.462686567164179, "step": 49 }, { "epoch": 1.4925373134328357, "grad_norm": 6.663071553118176, "learning_rate": 1.8636363636363637e-06, "loss": 0.582, "step": 50 }, { "DPO Loss": 8.121549918893668e-06, "Negative Geometric Mean": -11.646775242426388, "Negative prob": -11.646775242426388, "Normal Loss": 0.7396381497383118, "Normal prob": -0.7396381497383118, "Positive Loss": 0.046656664460897446, "Positive prob": -0.046656664460897446, "epoch": 1.4925373134328357, "step": 50 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.46003180742263794, "Normal prob": -0.46003180742263794, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.4925373134328357, "step": 50 }, { "epoch": 1.5223880597014925, "grad_norm": 6.8924034055431225, "learning_rate": 1.840909090909091e-06, "loss": 0.5667, "step": 51 }, { "DPO Loss": 3.960602457920955e-05, "Negative Geometric Mean": -10.694715555003613, "Negative prob": -10.694715555003613, "Normal Loss": 0.32585108280181885, "Normal prob": -0.32585108280181885, "Positive Loss": 0.04867149889469147, "Positive prob": -0.04867149889469147, "epoch": 1.5223880597014925, "step": 51 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4816523492336273, "Normal prob": -0.4816523492336273, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.5223880597014925, "step": 51 }, { "epoch": 1.5522388059701493, "grad_norm": 7.4924421694754075, "learning_rate": 1.8181818181818183e-06, "loss": 0.4917, "step": 52 }, { "DPO Loss": 5.1645393655010374e-05, "Negative Geometric Mean": -10.168975942689114, "Negative prob": -10.168975942689114, "Normal Loss": 0.30011507868766785, "Normal prob": -0.30011507868766785, "Positive Loss": 0.02231639437377453, "Positive prob": -0.02231639437377453, "epoch": 1.5522388059701493, "step": 52 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4937782287597656, "Normal prob": -0.4937782287597656, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.5522388059701493, "step": 52 }, { "epoch": 1.582089552238806, "grad_norm": 7.184478655478447, "learning_rate": 1.7954545454545456e-06, "loss": 0.5295, "step": 53 }, { "DPO Loss": 5.356822072205326e-06, "Negative Geometric Mean": -12.203828545026882, "Negative prob": -12.203828545026882, "Normal Loss": 0.5068655014038086, "Normal prob": -0.5068655014038086, "Positive Loss": 0.02591904066503048, "Positive prob": -0.02591904066503048, "epoch": 1.582089552238806, "step": 53 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.44835716485977173, "Normal prob": -0.44835716485977173, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.582089552238806, "step": 53 }, { "epoch": 1.6119402985074627, "grad_norm": 6.665246283694876, "learning_rate": 1.7727272727272729e-06, "loss": 0.5862, "step": 54 }, { "DPO Loss": 2.7645910456594184e-05, "Negative Geometric Mean": -10.52684736755279, "Negative prob": -10.52684736755279, "Normal Loss": 0.4901617169380188, "Normal prob": -0.4901617169380188, "Positive Loss": 0.031082332134246826, "Positive prob": -0.031082332134246826, "epoch": 1.6119402985074627, "step": 54 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3990895450115204, "Normal prob": -0.3990895450115204, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.6119402985074627, "step": 54 }, { "epoch": 1.6417910447761193, "grad_norm": 7.251995962906654, "learning_rate": 1.7500000000000002e-06, "loss": 0.5512, "step": 55 }, { "DPO Loss": 5.845775193481474e-06, "Negative Geometric Mean": -11.981290714110127, "Negative prob": -11.981290714110127, "Normal Loss": 0.36946558952331543, "Normal prob": -0.36946558952331543, "Positive Loss": 0.03706742450594902, "Positive prob": -0.03706742450594902, "epoch": 1.6417910447761193, "step": 55 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6298558712005615, "Normal prob": -0.6298558712005615, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.6417910447761193, "step": 55 }, { "epoch": 1.671641791044776, "grad_norm": 6.5830406614829995, "learning_rate": 1.7272727272727275e-06, "loss": 0.5497, "step": 56 }, { "DPO Loss": 3.2969348642918384e-05, "Negative Geometric Mean": -10.456912128245772, "Negative prob": -10.456912128245772, "Normal Loss": 0.6241900324821472, "Normal prob": -0.6241900324821472, "Positive Loss": 0.027565686032176018, "Positive prob": -0.027565686032176018, "epoch": 1.671641791044776, "step": 56 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.49294447898864746, "Normal prob": -0.49294447898864746, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.671641791044776, "step": 56 }, { "epoch": 1.7014925373134329, "grad_norm": 7.489375090765791, "learning_rate": 1.7045454545454548e-06, "loss": 0.5473, "step": 57 }, { "DPO Loss": 9.641101792233715e-06, "Negative Geometric Mean": -11.58503936609456, "Negative prob": -11.58503936609456, "Normal Loss": 0.4547930359840393, "Normal prob": -0.4547930359840393, "Positive Loss": 0.02409125678241253, "Positive prob": -0.02409125678241253, "epoch": 1.7014925373134329, "step": 57 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5437726378440857, "Normal prob": -0.5437726378440857, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.7014925373134329, "step": 57 }, { "epoch": 1.7313432835820897, "grad_norm": 7.870395979704569, "learning_rate": 1.6818181818181817e-06, "loss": 0.6139, "step": 58 }, { "DPO Loss": 1.2307788643174536e-05, "Negative Geometric Mean": -11.400927734375, "Negative prob": -11.400927734375, "Normal Loss": 0.4675034284591675, "Normal prob": -0.4675034284591675, "Positive Loss": 0.02847522310912609, "Positive prob": -0.02847522310912609, "epoch": 1.7313432835820897, "step": 58 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5951191186904907, "Normal prob": -0.5951191186904907, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.7313432835820897, "step": 58 }, { "epoch": 1.7611940298507462, "grad_norm": 7.248327621413268, "learning_rate": 1.659090909090909e-06, "loss": 0.5501, "step": 59 }, { "DPO Loss": 6.131353933599495e-06, "Negative Geometric Mean": -12.089666559278351, "Negative prob": -12.089666559278351, "Normal Loss": 0.6625760793685913, "Normal prob": -0.6625760793685913, "Positive Loss": 0.024925949051976204, "Positive prob": -0.024925949051976204, "epoch": 1.7611940298507462, "step": 59 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.574043333530426, "Normal prob": -0.574043333530426, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.7611940298507462, "step": 59 }, { "epoch": 1.7910447761194028, "grad_norm": 5.45423423175427, "learning_rate": 1.6363636363636363e-06, "loss": 0.5803, "step": 60 }, { "DPO Loss": 6.456255345351767e-06, "Negative Geometric Mean": -12.335179908988403, "Negative prob": -12.335179908988403, "Normal Loss": 0.5476536750793457, "Normal prob": -0.5476536750793457, "Positive Loss": 0.03484680876135826, "Positive prob": -0.03484680876135826, "epoch": 1.7910447761194028, "step": 60 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4749366343021393, "Normal prob": -0.4749366343021393, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.7910447761194028, "step": 60 }, { "epoch": 1.8208955223880596, "grad_norm": 5.8299300029602845, "learning_rate": 1.6136363636363635e-06, "loss": 0.5286, "step": 61 }, { "DPO Loss": 6.855680101123193e-06, "Negative Geometric Mean": -12.133075664569805, "Negative prob": -12.133075664569805, "Normal Loss": 0.49556368589401245, "Normal prob": -0.49556368589401245, "Positive Loss": 0.031043315306305885, "Positive prob": -0.031043315306305885, "epoch": 1.8208955223880596, "step": 61 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7167157530784607, "Normal prob": -0.7167157530784607, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.8208955223880596, "step": 61 }, { "epoch": 1.8507462686567164, "grad_norm": 6.451470188285151, "learning_rate": 1.5909090909090908e-06, "loss": 0.5513, "step": 62 }, { "DPO Loss": 1.236436099783623e-05, "Negative Geometric Mean": -11.445466172271574, "Negative prob": -11.445466172271574, "Normal Loss": 0.5918139219284058, "Normal prob": -0.5918139219284058, "Positive Loss": 0.026092026382684708, "Positive prob": -0.026092026382684708, "epoch": 1.8507462686567164, "step": 62 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4669744670391083, "Normal prob": -0.4669744670391083, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.8507462686567164, "step": 62 }, { "epoch": 1.8805970149253732, "grad_norm": 6.794192852617203, "learning_rate": 1.5681818181818181e-06, "loss": 0.4827, "step": 63 }, { "DPO Loss": 1.3348207631132665e-05, "Negative Geometric Mean": -11.5704201146176, "Negative prob": -11.5704201146176, "Normal Loss": 0.7752443552017212, "Normal prob": -0.7752443552017212, "Positive Loss": 0.03313179686665535, "Positive prob": -0.03313179686665535, "epoch": 1.8805970149253732, "step": 63 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4319833815097809, "Normal prob": -0.4319833815097809, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.8805970149253732, "step": 63 }, { "epoch": 1.9104477611940298, "grad_norm": 6.583453632012116, "learning_rate": 1.5454545454545454e-06, "loss": 0.5891, "step": 64 }, { "DPO Loss": 4.963582076406908e-06, "Negative Geometric Mean": -12.19005351163903, "Negative prob": -12.19005351163903, "Normal Loss": 0.7006582617759705, "Normal prob": -0.7006582617759705, "Positive Loss": 0.05256428197026253, "Positive prob": -0.05256428197026253, "epoch": 1.9104477611940298, "step": 64 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5088911652565002, "Normal prob": -0.5088911652565002, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.9104477611940298, "step": 64 }, { "epoch": 1.9402985074626866, "grad_norm": 6.441815309507991, "learning_rate": 1.5227272727272727e-06, "loss": 0.676, "step": 65 }, { "DPO Loss": 3.909155035241524e-06, "Negative Geometric Mean": -12.52902815645973, "Negative prob": -12.52902815645973, "Normal Loss": 0.4071587920188904, "Normal prob": -0.4071587920188904, "Positive Loss": 0.029172131791710854, "Positive prob": -0.029172131791710854, "epoch": 1.9402985074626866, "step": 65 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5863581299781799, "Normal prob": -0.5863581299781799, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.9402985074626866, "step": 65 }, { "epoch": 1.9701492537313432, "grad_norm": 6.3544200742459935, "learning_rate": 1.5e-06, "loss": 0.4844, "step": 66 }, { "DPO Loss": 4.394697707115605e-06, "Negative Geometric Mean": -12.445152789608176, "Negative prob": -12.445152789608176, "Normal Loss": 0.5812058448791504, "Normal prob": -0.5812058448791504, "Positive Loss": 0.025852346792817116, "Positive prob": -0.025852346792817116, "epoch": 1.9701492537313432, "step": 66 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7593735456466675, "Normal prob": -0.7593735456466675, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.9701492537313432, "step": 66 }, { "epoch": 2.0, "grad_norm": 7.0108110525699985, "learning_rate": 1.4772727272727273e-06, "loss": 0.6144, "step": 67 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.34294071793556213, "Normal prob": -0.34294071793556213, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.0, "step": 67 }, { "DPO Loss": 2.4070561719272564e-06, "Negative Geometric Mean": -13.018313531479217, "Negative prob": -13.018313531479217, "Normal Loss": 0.3513112962245941, "Normal prob": -0.3513112962245941, "Positive Loss": 0.013781579211354256, "Positive prob": -0.013781579211354256, "epoch": 2.0, "step": 67 }, { "epoch": 2.029850746268657, "grad_norm": 7.07209986229336, "learning_rate": 1.4545454545454546e-06, "loss": 0.3488, "step": 68 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.17317090928554535, "Normal prob": -0.17317090928554535, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.029850746268657, "step": 68 }, { "DPO Loss": 2.6232480269051795e-05, "Negative Geometric Mean": -10.84640401579797, "Negative prob": -10.84640401579797, "Normal Loss": 0.2502392828464508, "Normal prob": -0.2502392828464508, "Positive Loss": 0.009016763418912888, "Positive prob": -0.009016763418912888, "epoch": 2.029850746268657, "step": 68 }, { "epoch": 2.0597014925373136, "grad_norm": 6.718484882180734, "learning_rate": 1.431818181818182e-06, "loss": 0.3109, "step": 69 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.25603172183036804, "Normal prob": -0.25603172183036804, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.0597014925373136, "step": 69 }, { "DPO Loss": 2.4722913198806987e-06, "Negative Geometric Mean": -13.571044921875, "Negative prob": -13.571044921875, "Normal Loss": 0.6462356448173523, "Normal prob": -0.6462356448173523, "Positive Loss": 0.004487407859414816, "Positive prob": -0.004487407859414816, "epoch": 2.0597014925373136, "step": 69 }, { "epoch": 2.08955223880597, "grad_norm": 6.391521213804556, "learning_rate": 1.4090909090909092e-06, "loss": 0.4603, "step": 70 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2792622148990631, "Normal prob": -0.2792622148990631, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.08955223880597, "step": 70 }, { "DPO Loss": 2.30209765905298e-06, "Negative Geometric Mean": -13.15788681702044, "Negative prob": -13.15788681702044, "Normal Loss": 0.297980934381485, "Normal prob": -0.297980934381485, "Positive Loss": 0.011687587015330791, "Positive prob": -0.011687587015330791, "epoch": 2.08955223880597, "step": 70 }, { "epoch": 2.1194029850746268, "grad_norm": 6.000476341412616, "learning_rate": 1.3863636363636363e-06, "loss": 0.426, "step": 71 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.31911832094192505, "Normal prob": -0.31911832094192505, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.1194029850746268, "step": 71 }, { "DPO Loss": 9.623063611478237e-07, "Negative Geometric Mean": -13.79306566782845, "Negative prob": -13.79306566782845, "Normal Loss": 0.1256338506937027, "Normal prob": -0.1256338506937027, "Positive Loss": 0.014073642902076244, "Positive prob": -0.014073642902076244, "epoch": 2.1194029850746268, "step": 71 }, { "epoch": 2.1492537313432836, "grad_norm": 5.665938957087509, "learning_rate": 1.3636363636363636e-06, "loss": 0.31, "step": 72 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5485053062438965, "Normal prob": -0.5485053062438965, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.1492537313432836, "step": 72 }, { "DPO Loss": 5.508823305986437e-06, "Negative Geometric Mean": -12.17880211034751, "Negative prob": -12.17880211034751, "Normal Loss": 0.18863847851753235, "Normal prob": -0.18863847851753235, "Positive Loss": 0.011334001086652279, "Positive prob": -0.011334001086652279, "epoch": 2.1492537313432836, "step": 72 }, { "epoch": 2.1791044776119404, "grad_norm": 6.60404865468319, "learning_rate": 1.340909090909091e-06, "loss": 0.3371, "step": 73 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2698725759983063, "Normal prob": -0.2698725759983063, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.1791044776119404, "step": 73 }, { "DPO Loss": 2.5343320997566906e-06, "Negative Geometric Mean": -13.084115531452266, "Negative prob": -13.084115531452266, "Normal Loss": 0.3101830780506134, "Normal prob": -0.3101830780506134, "Positive Loss": 0.007303276099264622, "Positive prob": -0.007303276099264622, "epoch": 2.1791044776119404, "step": 73 }, { "epoch": 2.208955223880597, "grad_norm": 7.537056674857057, "learning_rate": 1.3181818181818182e-06, "loss": 0.3896, "step": 74 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2088720202445984, "Normal prob": -0.2088720202445984, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.208955223880597, "step": 74 }, { "DPO Loss": 6.722595604209246e-06, "Negative Geometric Mean": -12.173159354073661, "Negative prob": -12.173159354073661, "Normal Loss": 0.40398481488227844, "Normal prob": -0.40398481488227844, "Positive Loss": 0.01838095672428608, "Positive prob": -0.01838095672428608, "epoch": 2.208955223880597, "step": 74 }, { "epoch": 2.2388059701492535, "grad_norm": 7.724539990601786, "learning_rate": 1.2954545454545455e-06, "loss": 0.3009, "step": 75 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.48262086510658264, "Normal prob": -0.48262086510658264, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.2388059701492535, "step": 75 }, { "DPO Loss": 5.197786322988637e-07, "Negative Geometric Mean": -14.51572339888308, "Negative prob": -14.51572339888308, "Normal Loss": 0.23035627603530884, "Normal prob": -0.23035627603530884, "Positive Loss": 0.008795712143182755, "Positive prob": -0.008795712143182755, "epoch": 2.2388059701492535, "step": 75 }, { "epoch": 2.2686567164179103, "grad_norm": 8.221187128676613, "learning_rate": 1.2727272727272728e-06, "loss": 0.3589, "step": 76 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2785874009132385, "Normal prob": -0.2785874009132385, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.2686567164179103, "step": 76 }, { "DPO Loss": 1.621094342365068e-06, "Negative Geometric Mean": -13.028692859266869, "Negative prob": -13.028692859266869, "Normal Loss": 0.29848527908325195, "Normal prob": -0.29848527908325195, "Positive Loss": 0.004369077738374472, "Positive prob": -0.004369077738374472, "epoch": 2.2686567164179103, "step": 76 }, { "epoch": 2.298507462686567, "grad_norm": 6.89968090148801, "learning_rate": 1.25e-06, "loss": 0.3104, "step": 77 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.31930315494537354, "Normal prob": -0.31930315494537354, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.298507462686567, "step": 77 }, { "DPO Loss": 1.3040399279024244e-05, "Negative Geometric Mean": -11.366265677550448, "Negative prob": -11.366265677550448, "Normal Loss": 0.4552519917488098, "Normal prob": -0.4552519917488098, "Positive Loss": 0.020085470750927925, "Positive prob": -0.020085470750927925, "epoch": 2.298507462686567, "step": 77 }, { "epoch": 2.328358208955224, "grad_norm": 7.22395223128197, "learning_rate": 1.2272727272727274e-06, "loss": 0.3699, "step": 78 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4527321457862854, "Normal prob": -0.4527321457862854, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.328358208955224, "step": 78 }, { "DPO Loss": 3.5797003005450865e-06, "Negative Geometric Mean": -12.924953185405927, "Negative prob": -12.924953185405927, "Normal Loss": 0.49810460209846497, "Normal prob": -0.49810460209846497, "Positive Loss": 0.0035452607553452253, "Positive prob": -0.0035452607553452253, "epoch": 2.328358208955224, "step": 78 }, { "epoch": 2.3582089552238807, "grad_norm": 7.0300487933358, "learning_rate": 1.2045454545454545e-06, "loss": 0.3254, "step": 79 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3027646243572235, "Normal prob": -0.3027646243572235, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.3582089552238807, "step": 79 }, { "DPO Loss": 5.243361513598499e-06, "Negative Geometric Mean": -12.248686441906308, "Negative prob": -12.248686441906308, "Normal Loss": 0.19559913873672485, "Normal prob": -0.19559913873672485, "Positive Loss": 0.0036265316884964705, "Positive prob": -0.0036265316884964705, "epoch": 2.3582089552238807, "step": 79 }, { "epoch": 2.388059701492537, "grad_norm": 7.018808301104353, "learning_rate": 1.1818181818181818e-06, "loss": 0.2924, "step": 80 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.37363290786743164, "Normal prob": -0.37363290786743164, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.388059701492537, "step": 80 }, { "DPO Loss": 8.039128102474587e-06, "Negative Geometric Mean": -12.245501740608809, "Negative prob": -12.245501740608809, "Normal Loss": 0.38694456219673157, "Normal prob": -0.38694456219673157, "Positive Loss": 0.007913284935057163, "Positive prob": -0.007913284935057163, "epoch": 2.388059701492537, "step": 80 }, { "epoch": 2.417910447761194, "grad_norm": 7.1316719605682595, "learning_rate": 1.159090909090909e-06, "loss": 0.373, "step": 81 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.23960407078266144, "Normal prob": -0.23960407078266144, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.417910447761194, "step": 81 }, { "DPO Loss": 3.643317578642959e-06, "Negative Geometric Mean": -12.694272748161765, "Negative prob": -12.694272748161765, "Normal Loss": 0.3460986912250519, "Normal prob": -0.3460986912250519, "Positive Loss": 0.03607124090194702, "Positive prob": -0.03607124090194702, "epoch": 2.417910447761194, "step": 81 }, { "epoch": 2.4477611940298507, "grad_norm": 6.568519556302587, "learning_rate": 1.1363636363636364e-06, "loss": 0.2968, "step": 82 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3786263167858124, "Normal prob": -0.3786263167858124, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.4477611940298507, "step": 82 }, { "DPO Loss": 1.17810282746522e-05, "Negative Geometric Mean": -11.290989731297348, "Negative prob": -11.290989731297348, "Normal Loss": 0.3233850598335266, "Normal prob": -0.3233850598335266, "Positive Loss": 0.014756398275494576, "Positive prob": -0.014756398275494576, "epoch": 2.4477611940298507, "step": 82 }, { "epoch": 2.4776119402985075, "grad_norm": 6.961788241099841, "learning_rate": 1.1136363636363637e-06, "loss": 0.3067, "step": 83 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7298503518104553, "Normal prob": -0.7298503518104553, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.4776119402985075, "step": 83 }, { "DPO Loss": 6.489746401144139e-06, "Negative Geometric Mean": -12.0409423828125, "Negative prob": -12.0409423828125, "Normal Loss": 0.36162418127059937, "Normal prob": -0.36162418127059937, "Positive Loss": 0.007005380000919104, "Positive prob": -0.007005380000919104, "epoch": 2.4776119402985075, "step": 83 }, { "epoch": 2.5074626865671643, "grad_norm": 6.528351208906881, "learning_rate": 1.090909090909091e-06, "loss": 0.4822, "step": 84 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.43608808517456055, "Normal prob": -0.43608808517456055, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.5074626865671643, "step": 84 }, { "DPO Loss": 1.8647181536166908e-06, "Negative Geometric Mean": -13.310558063113747, "Negative prob": -13.310558063113747, "Normal Loss": 0.2762463092803955, "Normal prob": -0.2762463092803955, "Positive Loss": 0.015207285061478615, "Positive prob": -0.015207285061478615, "epoch": 2.5074626865671643, "step": 84 }, { "epoch": 2.5373134328358207, "grad_norm": 6.696386369118086, "learning_rate": 1.0681818181818183e-06, "loss": 0.3106, "step": 85 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3587005138397217, "Normal prob": -0.3587005138397217, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.5373134328358207, "step": 85 }, { "DPO Loss": 1.0392724887777463e-05, "Negative Geometric Mean": -11.663321547420965, "Negative prob": -11.663321547420965, "Normal Loss": 0.38271617889404297, "Normal prob": -0.38271617889404297, "Positive Loss": 0.007094533648341894, "Positive prob": -0.007094533648341894, "epoch": 2.5373134328358207, "step": 85 }, { "epoch": 2.5671641791044775, "grad_norm": 7.213029254290765, "learning_rate": 1.0454545454545456e-06, "loss": 0.317, "step": 86 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3641352355480194, "Normal prob": -0.3641352355480194, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.5671641791044775, "step": 86 }, { "DPO Loss": 3.5987715729513327e-06, "Negative Geometric Mean": -12.57204106168927, "Negative prob": -12.57204106168927, "Normal Loss": 0.46394774317741394, "Normal prob": -0.46394774317741394, "Positive Loss": 0.0050806887447834015, "Positive prob": -0.0050806887447834015, "epoch": 2.5671641791044775, "step": 86 }, { "epoch": 2.5970149253731343, "grad_norm": 7.901045462084173, "learning_rate": 1.0227272727272727e-06, "loss": 0.3702, "step": 87 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.38809868693351746, "Normal prob": -0.38809868693351746, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.5970149253731343, "step": 87 }, { "DPO Loss": 1.6651211340411204e-06, "Negative Geometric Mean": -13.372305265036962, "Negative prob": -13.372305265036962, "Normal Loss": 0.27421802282333374, "Normal prob": -0.27421802282333374, "Positive Loss": 0.017523737624287605, "Positive prob": -0.017523737624287605, "epoch": 2.5970149253731343, "step": 87 }, { "epoch": 2.626865671641791, "grad_norm": 6.883433596486567, "learning_rate": 1e-06, "loss": 0.3336, "step": 88 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.28301262855529785, "Normal prob": -0.28301262855529785, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.626865671641791, "step": 88 }, { "DPO Loss": 2.9454008504157696e-06, "Negative Geometric Mean": -12.71242241010274, "Negative prob": -12.71242241010274, "Normal Loss": 0.35078540444374084, "Normal prob": -0.35078540444374084, "Positive Loss": 0.01793888583779335, "Positive prob": -0.01793888583779335, "epoch": 2.626865671641791, "step": 88 }, { "epoch": 2.656716417910448, "grad_norm": 6.603176465896816, "learning_rate": 9.772727272727273e-07, "loss": 0.4153, "step": 89 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.29014265537261963, "Normal prob": -0.29014265537261963, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.656716417910448, "step": 89 }, { "DPO Loss": 1.4884178069432536e-05, "Negative Geometric Mean": -11.291460420642494, "Negative prob": -11.291460420642494, "Normal Loss": 0.27552318572998047, "Normal prob": -0.27552318572998047, "Positive Loss": 0.006887962110340595, "Positive prob": -0.006887962110340595, "epoch": 2.656716417910448, "step": 89 }, { "epoch": 2.6865671641791042, "grad_norm": 7.071652347635012, "learning_rate": 9.545454545454546e-07, "loss": 0.3525, "step": 90 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3731546103954315, "Normal prob": -0.3731546103954315, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.6865671641791042, "step": 90 }, { "DPO Loss": 6.442647626957985e-06, "Negative Geometric Mean": -12.235858669051204, "Negative prob": -12.235858669051204, "Normal Loss": 0.21569418907165527, "Normal prob": -0.21569418907165527, "Positive Loss": 0.005633717868477106, "Positive prob": -0.005633717868477106, "epoch": 2.6865671641791042, "step": 90 }, { "epoch": 2.716417910447761, "grad_norm": 6.08004163712123, "learning_rate": 9.318181818181818e-07, "loss": 0.3038, "step": 91 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.35742098093032837, "Normal prob": -0.35742098093032837, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.716417910447761, "step": 91 }, { "DPO Loss": 4.149841651615268e-06, "Negative Geometric Mean": -12.598418855144757, "Negative prob": -12.598418855144757, "Normal Loss": 0.28535205125808716, "Normal prob": -0.28535205125808716, "Positive Loss": 0.012952926568686962, "Positive prob": -0.012952926568686962, "epoch": 2.716417910447761, "step": 91 }, { "epoch": 2.746268656716418, "grad_norm": 7.098164063144904, "learning_rate": 9.090909090909091e-07, "loss": 0.3625, "step": 92 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.28534939885139465, "Normal prob": -0.28534939885139465, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.746268656716418, "step": 92 }, { "DPO Loss": 1.2248438490094142e-06, "Negative Geometric Mean": -13.854903100242078, "Negative prob": -13.854903100242078, "Normal Loss": 0.36843106150627136, "Normal prob": -0.36843106150627136, "Positive Loss": 0.03612969443202019, "Positive prob": -0.03612969443202019, "epoch": 2.746268656716418, "step": 92 }, { "epoch": 2.7761194029850746, "grad_norm": 7.081778856883454, "learning_rate": 8.863636363636364e-07, "loss": 0.4371, "step": 93 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4569069743156433, "Normal prob": -0.4569069743156433, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.7761194029850746, "step": 93 }, { "DPO Loss": 4.104862759699108e-06, "Negative Geometric Mean": -12.369599921518265, "Negative prob": -12.369599921518265, "Normal Loss": 0.2255462110042572, "Normal prob": -0.2255462110042572, "Positive Loss": 0.018570953980088234, "Positive prob": -0.018570953980088234, "epoch": 2.7761194029850746, "step": 93 }, { "epoch": 2.8059701492537314, "grad_norm": 6.9825157063188374, "learning_rate": 8.636363636363637e-07, "loss": 0.3096, "step": 94 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2608332633972168, "Normal prob": -0.2608332633972168, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.8059701492537314, "step": 94 }, { "DPO Loss": 1.6469150536061094e-06, "Negative Geometric Mean": -13.248110250737463, "Negative prob": -13.248110250737463, "Normal Loss": 0.24679048359394073, "Normal prob": -0.24679048359394073, "Positive Loss": 0.014129209332168102, "Positive prob": -0.014129209332168102, "epoch": 2.8059701492537314, "step": 94 }, { "epoch": 2.835820895522388, "grad_norm": 7.604649982159979, "learning_rate": 8.409090909090908e-07, "loss": 0.2674, "step": 95 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2900543212890625, "Normal prob": -0.2900543212890625, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.835820895522388, "step": 95 }, { "DPO Loss": 7.456183395726831e-07, "Negative Geometric Mean": -13.86903901734104, "Negative prob": -13.86903901734104, "Normal Loss": 0.4645146429538727, "Normal prob": -0.4645146429538727, "Positive Loss": 0.006549107376486063, "Positive prob": -0.006549107376486063, "epoch": 2.835820895522388, "step": 95 }, { "epoch": 2.8656716417910446, "grad_norm": 6.428480514761495, "learning_rate": 8.181818181818181e-07, "loss": 0.3561, "step": 96 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.17966699600219727, "Normal prob": -0.17966699600219727, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.8656716417910446, "step": 96 }, { "DPO Loss": 1.1826854588610623e-05, "Negative Geometric Mean": -11.482138813405797, "Negative prob": -11.482138813405797, "Normal Loss": 0.4035150110721588, "Normal prob": -0.4035150110721588, "Positive Loss": 0.01260466780513525, "Positive prob": -0.01260466780513525, "epoch": 2.8656716417910446, "step": 96 }, { "epoch": 2.8955223880597014, "grad_norm": 6.355093862289104, "learning_rate": 7.954545454545454e-07, "loss": 0.3552, "step": 97 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.1942460834980011, "Normal prob": -0.1942460834980011, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.8955223880597014, "step": 97 }, { "DPO Loss": 2.1722275431802666e-06, "Negative Geometric Mean": -12.907205766876064, "Negative prob": -12.907205766876064, "Normal Loss": 0.3104533553123474, "Normal prob": -0.3104533553123474, "Positive Loss": 0.004998633172363043, "Positive prob": -0.004998633172363043, "epoch": 2.8955223880597014, "step": 97 }, { "epoch": 2.925373134328358, "grad_norm": 6.623833463639339, "learning_rate": 7.727272727272727e-07, "loss": 0.3507, "step": 98 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.33913183212280273, "Normal prob": -0.33913183212280273, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.925373134328358, "step": 98 }, { "DPO Loss": 1.1818778170094944e-06, "Negative Geometric Mean": -13.596246585154585, "Negative prob": -13.596246585154585, "Normal Loss": 0.3668951988220215, "Normal prob": -0.3668951988220215, "Positive Loss": 0.016438201069831848, "Positive prob": -0.016438201069831848, "epoch": 2.925373134328358, "step": 98 }, { "epoch": 2.955223880597015, "grad_norm": 7.342348492585064, "learning_rate": 7.5e-07, "loss": 0.4162, "step": 99 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2540174424648285, "Normal prob": -0.2540174424648285, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.955223880597015, "step": 99 }, { "DPO Loss": 2.3274892548031074e-06, "Negative Geometric Mean": -13.38752170138889, "Negative prob": -13.38752170138889, "Normal Loss": 0.6933973431587219, "Normal prob": -0.6933973431587219, "Positive Loss": 0.004921761341392994, "Positive prob": -0.004921761341392994, "epoch": 2.955223880597015, "step": 99 }, { "epoch": 2.9850746268656714, "grad_norm": 6.242023928985393, "learning_rate": 7.272727272727273e-07, "loss": 0.4423, "step": 100 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.32996082305908203, "Normal prob": -0.32996082305908203, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.9850746268656714, "step": 100 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.27658382058143616, "Normal prob": -0.27658382058143616, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.9850746268656714, "step": 100 }, { "epoch": 3.014925373134328, "grad_norm": 5.853608572027528, "learning_rate": 7.045454545454546e-07, "loss": 0.288, "step": 101 }, { "DPO Loss": 1.1678178546410005e-06, "Negative Geometric Mean": -14.891405087425595, "Negative prob": -14.891405087425595, "Normal Loss": 0.11027539521455765, "Normal prob": -0.11027539521455765, "Positive Loss": 0.0027892631478607655, "Positive prob": -0.0027892631478607655, "epoch": 3.014925373134328, "step": 101 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.26463809609413147, "Normal prob": -0.26463809609413147, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.014925373134328, "step": 101 }, { "epoch": 3.044776119402985, "grad_norm": 6.514265106044286, "learning_rate": 6.818181818181818e-07, "loss": 0.1912, "step": 102 }, { "DPO Loss": 3.6430315872713267e-06, "Negative Geometric Mean": -13.001615084134615, "Negative prob": -13.001615084134615, "Normal Loss": 0.10679034143686295, "Normal prob": -0.10679034143686295, "Positive Loss": 0.01767848990857601, "Positive prob": -0.01767848990857601, "epoch": 3.044776119402985, "step": 102 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.1798650622367859, "Normal prob": -0.1798650622367859, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.044776119402985, "step": 102 }, { "epoch": 3.074626865671642, "grad_norm": 6.098513301658777, "learning_rate": 6.590909090909091e-07, "loss": 0.2014, "step": 103 }, { "DPO Loss": 1.1857304744373281e-05, "Negative Geometric Mean": -11.364407111528822, "Negative prob": -11.364407111528822, "Normal Loss": 0.5280313491821289, "Normal prob": -0.5280313491821289, "Positive Loss": 0.0045397402718663216, "Positive prob": -0.0045397402718663216, "epoch": 3.074626865671642, "step": 103 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.12902340292930603, "Normal prob": -0.12902340292930603, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.074626865671642, "step": 103 }, { "epoch": 3.1044776119402986, "grad_norm": 6.268435906008225, "learning_rate": 6.363636363636364e-07, "loss": 0.2413, "step": 104 }, { "DPO Loss": 8.258820908422388e-07, "Negative Geometric Mean": -13.974816351361241, "Negative prob": -13.974816351361241, "Normal Loss": 0.08596272766590118, "Normal prob": -0.08596272766590118, "Positive Loss": 0.0037321026902645826, "Positive prob": -0.0037321026902645826, "epoch": 3.1044776119402986, "step": 104 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.12673968076705933, "Normal prob": -0.12673968076705933, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.1044776119402986, "step": 104 }, { "epoch": 3.1343283582089554, "grad_norm": 4.849865946932611, "learning_rate": 6.136363636363637e-07, "loss": 0.2016, "step": 105 }, { "DPO Loss": 1.2510054247133794e-05, "Negative Geometric Mean": -11.465228908237913, "Negative prob": -11.465228908237913, "Normal Loss": 0.35748380422592163, "Normal prob": -0.35748380422592163, "Positive Loss": 0.0027046226896345615, "Positive prob": -0.0027046226896345615, "epoch": 3.1343283582089554, "step": 105 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2695090174674988, "Normal prob": -0.2695090174674988, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.1343283582089554, "step": 105 }, { "epoch": 3.1641791044776117, "grad_norm": 5.314522862984474, "learning_rate": 5.909090909090909e-07, "loss": 0.3061, "step": 106 }, { "DPO Loss": 1.1451636416948107e-06, "Negative Geometric Mean": -13.922169000330106, "Negative prob": -13.922169000330106, "Normal Loss": 0.15261346101760864, "Normal prob": -0.15261346101760864, "Positive Loss": 0.004130078945308924, "Positive prob": -0.004130078945308924, "epoch": 3.1641791044776117, "step": 106 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3225187063217163, "Normal prob": -0.3225187063217163, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.1641791044776117, "step": 106 }, { "epoch": 3.1940298507462686, "grad_norm": 5.845924095415361, "learning_rate": 5.681818181818182e-07, "loss": 0.2219, "step": 107 }, { "DPO Loss": 3.406975256320534e-06, "Negative Geometric Mean": -13.250364491637324, "Negative prob": -13.250364491637324, "Normal Loss": 0.2123008817434311, "Normal prob": -0.2123008817434311, "Positive Loss": 0.00209601828828454, "Positive prob": -0.00209601828828454, "epoch": 3.1940298507462686, "step": 107 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2501071095466614, "Normal prob": -0.2501071095466614, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.1940298507462686, "step": 107 }, { "epoch": 3.2238805970149254, "grad_norm": 6.273812820779774, "learning_rate": 5.454545454545455e-07, "loss": 0.2085, "step": 108 }, { "DPO Loss": 7.219691741892465e-06, "Negative Geometric Mean": -12.184994006283068, "Negative prob": -12.184994006283068, "Normal Loss": 0.11331921815872192, "Normal prob": -0.11331921815872192, "Positive Loss": 0.00535797793418169, "Positive prob": -0.00535797793418169, "epoch": 3.2238805970149254, "step": 108 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2601730525493622, "Normal prob": -0.2601730525493622, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.2238805970149254, "step": 108 }, { "epoch": 3.253731343283582, "grad_norm": 6.61609605527567, "learning_rate": 5.227272727272728e-07, "loss": 0.2738, "step": 109 }, { "DPO Loss": 3.8013957323267827e-06, "Negative Geometric Mean": -12.432244078240172, "Negative prob": -12.432244078240172, "Normal Loss": 0.212859645485878, "Normal prob": -0.212859645485878, "Positive Loss": 0.0058334325440227985, "Positive prob": -0.0058334325440227985, "epoch": 3.253731343283582, "step": 109 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.1951064020395279, "Normal prob": -0.1951064020395279, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.253731343283582, "step": 109 }, { "epoch": 3.283582089552239, "grad_norm": 6.461685783109346, "learning_rate": 5e-07, "loss": 0.1872, "step": 110 }, { "DPO Loss": 9.760423619643666e-07, "Negative Geometric Mean": -13.36294397566719, "Negative prob": -13.36294397566719, "Normal Loss": 0.16994960606098175, "Normal prob": -0.16994960606098175, "Positive Loss": 0.007101885508745909, "Positive prob": -0.007101885508745909, "epoch": 3.283582089552239, "step": 110 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.25234583020210266, "Normal prob": -0.25234583020210266, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.283582089552239, "step": 110 }, { "epoch": 3.3134328358208958, "grad_norm": 6.064062487418674, "learning_rate": 4.772727272727273e-07, "loss": 0.1877, "step": 111 }, { "DPO Loss": 3.0380582299826617e-06, "Negative Geometric Mean": -12.93929797021028, "Negative prob": -12.93929797021028, "Normal Loss": 0.07496587187051773, "Normal prob": -0.07496587187051773, "Positive Loss": 0.003302493365481496, "Positive prob": -0.003302493365481496, "epoch": 3.3134328358208958, "step": 111 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.116237573325634, "Normal prob": -0.116237573325634, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.3134328358208958, "step": 111 }, { "epoch": 3.343283582089552, "grad_norm": 11.774507449917868, "learning_rate": 4.5454545454545457e-07, "loss": 0.1455, "step": 112 }, { "DPO Loss": 1.8040673343906892e-06, "Negative Geometric Mean": -13.262591667895046, "Negative prob": -13.262591667895046, "Normal Loss": 0.20686665177345276, "Normal prob": -0.20686665177345276, "Positive Loss": 0.0036297321785241365, "Positive prob": -0.0036297321785241365, "epoch": 3.343283582089552, "step": 112 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.07100074738264084, "Normal prob": -0.07100074738264084, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.343283582089552, "step": 112 }, { "epoch": 3.373134328358209, "grad_norm": 6.197973269249537, "learning_rate": 4.3181818181818187e-07, "loss": 0.2737, "step": 113 }, { "DPO Loss": 1.8399912201802113e-05, "Negative Geometric Mean": -11.161076035610465, "Negative prob": -11.161076035610465, "Normal Loss": 0.3017271161079407, "Normal prob": -0.3017271161079407, "Positive Loss": 0.0018219746416434646, "Positive prob": -0.0018219746416434646, "epoch": 3.373134328358209, "step": 113 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.40800532698631287, "Normal prob": -0.40800532698631287, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.373134328358209, "step": 113 }, { "epoch": 3.4029850746268657, "grad_norm": 6.686699543045222, "learning_rate": 4.0909090909090906e-07, "loss": 0.2495, "step": 114 }, { "DPO Loss": 7.636819721194287e-06, "Negative Geometric Mean": -11.901342007076794, "Negative prob": -11.901342007076794, "Normal Loss": 0.12182455509901047, "Normal prob": -0.12182455509901047, "Positive Loss": 0.008607598952949047, "Positive prob": -0.008607598952949047, "epoch": 3.4029850746268657, "step": 114 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.18401654064655304, "Normal prob": -0.18401654064655304, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.4029850746268657, "step": 114 }, { "epoch": 3.4328358208955225, "grad_norm": 6.81794821888112, "learning_rate": 3.8636363636363636e-07, "loss": 0.2007, "step": 115 }, { "DPO Loss": 6.554748941648e-06, "Negative Geometric Mean": -12.335026873289234, "Negative prob": -12.335026873289234, "Normal Loss": 0.21853935718536377, "Normal prob": -0.21853935718536377, "Positive Loss": 0.004862755537033081, "Positive prob": -0.004862755537033081, "epoch": 3.4328358208955225, "step": 115 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.135187566280365, "Normal prob": -0.135187566280365, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.4328358208955225, "step": 115 }, { "epoch": 3.4626865671641793, "grad_norm": 6.189730848953201, "learning_rate": 3.6363636363636366e-07, "loss": 0.2086, "step": 116 }, { "DPO Loss": 3.6050653288985906e-06, "Negative Geometric Mean": -12.796296909877233, "Negative prob": -12.796296909877233, "Normal Loss": 0.2358456403017044, "Normal prob": -0.2358456403017044, "Positive Loss": 0.010835876688361168, "Positive prob": -0.010835876688361168, "epoch": 3.4626865671641793, "step": 116 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.14767657220363617, "Normal prob": -0.14767657220363617, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.4626865671641793, "step": 116 }, { "epoch": 3.4925373134328357, "grad_norm": 6.979155772945575, "learning_rate": 3.409090909090909e-07, "loss": 0.2849, "step": 117 }, { "DPO Loss": 6.702416418876966e-06, "Negative Geometric Mean": -12.413108648255815, "Negative prob": -12.413108648255815, "Normal Loss": 0.14713706076145172, "Normal prob": -0.14713706076145172, "Positive Loss": 0.003462533000856638, "Positive prob": -0.003462533000856638, "epoch": 3.4925373134328357, "step": 117 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.23745860159397125, "Normal prob": -0.23745860159397125, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.4925373134328357, "step": 117 }, { "epoch": 3.5223880597014925, "grad_norm": 6.728854419168043, "learning_rate": 3.181818181818182e-07, "loss": 0.1677, "step": 118 }, { "DPO Loss": 2.470218665968806e-06, "Negative Geometric Mean": -13.20369715379494, "Negative prob": -13.20369715379494, "Normal Loss": 0.4334864020347595, "Normal prob": -0.4334864020347595, "Positive Loss": 0.005433392245322466, "Positive prob": -0.005433392245322466, "epoch": 3.5223880597014925, "step": 118 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.1629062443971634, "Normal prob": -0.1629062443971634, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.5223880597014925, "step": 118 }, { "epoch": 3.5522388059701493, "grad_norm": 5.9093975782845645, "learning_rate": 2.9545454545454545e-07, "loss": 0.2375, "step": 119 }, { "DPO Loss": 1.9495445800859506e-06, "Negative Geometric Mean": -13.439311124840561, "Negative prob": -13.439311124840561, "Normal Loss": 0.3102337718009949, "Normal prob": -0.3102337718009949, "Positive Loss": 0.001397938933223486, "Positive prob": -0.001397938933223486, "epoch": 3.5522388059701493, "step": 119 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.10538414120674133, "Normal prob": -0.10538414120674133, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.5522388059701493, "step": 119 }, { "epoch": 3.582089552238806, "grad_norm": 5.432045886830493, "learning_rate": 2.7272727272727274e-07, "loss": 0.1749, "step": 120 }, { "DPO Loss": 2.111671823116432e-05, "Negative Geometric Mean": -11.482684536637931, "Negative prob": -11.482684536637931, "Normal Loss": 0.09432564675807953, "Normal prob": -0.09432564675807953, "Positive Loss": 0.003968758508563042, "Positive prob": -0.003968758508563042, "epoch": 3.582089552238806, "step": 120 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2578660249710083, "Normal prob": -0.2578660249710083, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.582089552238806, "step": 120 }, { "epoch": 3.611940298507463, "grad_norm": 6.329624233904299, "learning_rate": 2.5e-07, "loss": 0.1958, "step": 121 }, { "DPO Loss": 2.2736615595795564e-06, "Negative Geometric Mean": -13.00193465573286, "Negative prob": -13.00193465573286, "Normal Loss": 0.11790954321622849, "Normal prob": -0.11790954321622849, "Positive Loss": 0.015944618731737137, "Positive prob": -0.015944618731737137, "epoch": 3.611940298507463, "step": 121 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2980431020259857, "Normal prob": -0.2980431020259857, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.611940298507463, "step": 121 }, { "epoch": 3.6417910447761193, "grad_norm": 6.390125283101109, "learning_rate": 2.2727272727272729e-07, "loss": 0.2578, "step": 122 }, { "DPO Loss": 1.8224405365341362e-06, "Negative Geometric Mean": -13.443804791865459, "Negative prob": -13.443804791865459, "Normal Loss": 0.2082529067993164, "Normal prob": -0.2082529067993164, "Positive Loss": 0.0020329623948782682, "Positive prob": -0.0020329623948782682, "epoch": 3.6417910447761193, "step": 122 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.27499350905418396, "Normal prob": -0.27499350905418396, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.6417910447761193, "step": 122 }, { "epoch": 3.671641791044776, "grad_norm": 7.334639331815002, "learning_rate": 2.0454545454545453e-07, "loss": 0.1928, "step": 123 }, { "DPO Loss": 3.887408166527688e-06, "Negative Geometric Mean": -12.602550216132615, "Negative prob": -12.602550216132615, "Normal Loss": 0.3751141130924225, "Normal prob": -0.3751141130924225, "Positive Loss": 0.0016299609560519457, "Positive prob": -0.0016299609560519457, "epoch": 3.671641791044776, "step": 123 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.14575666189193726, "Normal prob": -0.14575666189193726, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.671641791044776, "step": 123 }, { "epoch": 3.701492537313433, "grad_norm": 5.602697068663161, "learning_rate": 1.8181818181818183e-07, "loss": 0.1898, "step": 124 }, { "DPO Loss": 1.1130948677477009e-06, "Negative Geometric Mean": -13.74802903824201, "Negative prob": -13.74802903824201, "Normal Loss": 0.09506483376026154, "Normal prob": -0.09506483376026154, "Positive Loss": 0.0032382213976234198, "Positive prob": -0.0032382213976234198, "epoch": 3.701492537313433, "step": 124 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.15355288982391357, "Normal prob": -0.15355288982391357, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.701492537313433, "step": 124 }, { "epoch": 3.7313432835820897, "grad_norm": 7.94396763225081, "learning_rate": 1.590909090909091e-07, "loss": 0.153, "step": 125 }, { "DPO Loss": 4.4835976933222324e-07, "Negative Geometric Mean": -14.556803077741021, "Negative prob": -14.556803077741021, "Normal Loss": 0.09771548211574554, "Normal prob": -0.09771548211574554, "Positive Loss": 0.012089760042726994, "Positive prob": -0.012089760042726994, "epoch": 3.7313432835820897, "step": 125 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.17557981610298157, "Normal prob": -0.17557981610298157, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.7313432835820897, "step": 125 }, { "epoch": 3.7611940298507465, "grad_norm": 5.746760679085967, "learning_rate": 1.3636363636363637e-07, "loss": 0.2027, "step": 126 }, { "DPO Loss": 2.5731311695920285e-06, "Negative Geometric Mean": -12.94003257909751, "Negative prob": -12.94003257909751, "Normal Loss": 0.2374420166015625, "Normal prob": -0.2374420166015625, "Positive Loss": 0.006095151882618666, "Positive prob": -0.006095151882618666, "epoch": 3.7611940298507465, "step": 126 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.12284944206476212, "Normal prob": -0.12284944206476212, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.7611940298507465, "step": 126 }, { "epoch": 3.791044776119403, "grad_norm": 6.172054742054878, "learning_rate": 1.1363636363636364e-07, "loss": 0.1587, "step": 127 }, { "DPO Loss": 3.012714219508236e-06, "Negative Geometric Mean": -12.808327907986111, "Negative prob": -12.808327907986111, "Normal Loss": 0.07416192442178726, "Normal prob": -0.07416192442178726, "Positive Loss": 0.0024134027771651745, "Positive prob": -0.0024134027771651745, "epoch": 3.791044776119403, "step": 127 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.292096883058548, "Normal prob": -0.292096883058548, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.791044776119403, "step": 127 }, { "epoch": 3.8208955223880596, "grad_norm": 6.689741596768201, "learning_rate": 9.090909090909091e-08, "loss": 0.1959, "step": 128 }, { "DPO Loss": 3.060298655777367e-06, "Negative Geometric Mean": -12.873194280660377, "Negative prob": -12.873194280660377, "Normal Loss": 0.33571678400039673, "Normal prob": -0.33571678400039673, "Positive Loss": 0.0060377782210707664, "Positive prob": -0.0060377782210707664, "epoch": 3.8208955223880596, "step": 128 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.22519126534461975, "Normal prob": -0.22519126534461975, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.8208955223880596, "step": 128 }, { "epoch": 3.8507462686567164, "grad_norm": 7.002612239614997, "learning_rate": 6.818181818181819e-08, "loss": 0.2147, "step": 129 }, { "DPO Loss": 1.0818999409674698e-06, "Negative Geometric Mean": -13.897989908854166, "Negative prob": -13.897989908854166, "Normal Loss": 0.2647945284843445, "Normal prob": -0.2647945284843445, "Positive Loss": 0.005775726865977049, "Positive prob": -0.005775726865977049, "epoch": 3.8507462686567164, "step": 129 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.14088931679725647, "Normal prob": -0.14088931679725647, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.8507462686567164, "step": 129 }, { "epoch": 3.8805970149253732, "grad_norm": 7.106870960626619, "learning_rate": 4.545454545454546e-08, "loss": 0.2408, "step": 130 }, { "DPO Loss": 3.805466487272458e-06, "Negative Geometric Mean": -12.980504410990168, "Negative prob": -12.980504410990168, "Normal Loss": 0.3013966977596283, "Normal prob": -0.3013966977596283, "Positive Loss": 0.003041935386136174, "Positive prob": -0.003041935386136174, "epoch": 3.8805970149253732, "step": 130 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.27534565329551697, "Normal prob": -0.27534565329551697, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.8805970149253732, "step": 130 }, { "epoch": 3.91044776119403, "grad_norm": 5.966195743329273, "learning_rate": 2.272727272727273e-08, "loss": 0.2029, "step": 131 }, { "DPO Loss": 1.5273080011035291e-06, "Negative Geometric Mean": -13.34349348358295, "Negative prob": -13.34349348358295, "Normal Loss": 0.20480337738990784, "Normal prob": -0.20480337738990784, "Positive Loss": 0.0037362114526331425, "Positive prob": -0.0037362114526331425, "epoch": 3.91044776119403, "step": 131 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.19383595883846283, "Normal prob": -0.19383595883846283, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.91044776119403, "step": 131 }, { "epoch": 3.9402985074626864, "grad_norm": 6.0018477972181445, "learning_rate": 0.0, "loss": 0.1864, "step": 132 } ], "logging_steps": 1, "max_steps": 132, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 11615663554560.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }