{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.9402985074626864, "eval_steps": 500, "global_step": 132, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.473748803138733, "Normal prob": -1.473748803138733, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0, "step": 0 }, { "DPO Loss": 0.06949889122647772, "Negative Geometric Mean": -0.7320872274143302, "Negative prob": -0.7320872274143302, "Normal Loss": 1.1129032373428345, "Normal prob": -1.1129032373428345, "Positive Loss": 0.03312499821186066, "Positive prob": -0.03312499821186066, "epoch": 0, "step": 0 }, { "epoch": 0.029850746268656716, "grad_norm": 18.225476683490662, "learning_rate": 4.962121212121213e-06, "loss": 2.7615, "step": 1 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.3676621913909912, "Normal prob": -1.3676621913909912, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.029850746268656716, "step": 1 }, { "DPO Loss": 0.06949427533467885, "Negative Geometric Mean": -0.2709551656920078, "Negative prob": -0.2709551656920078, "Normal Loss": 1.0154451131820679, "Normal prob": -1.0154451131820679, "Positive Loss": 0.010648148134350777, "Positive prob": -0.010648148134350777, "epoch": 0.029850746268656716, "step": 1 }, { "epoch": 0.05970149253731343, "grad_norm": 10.973910370395869, "learning_rate": 4.924242424242425e-06, "loss": 2.5071, "step": 2 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0250481367111206, "Normal prob": -1.0250481367111206, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.05970149253731343, "step": 2 }, { "DPO Loss": 0.07027684572568721, "Negative Geometric Mean": -0.7035573122529645, "Negative prob": -0.7035573122529645, "Normal Loss": 0.9350224733352661, "Normal prob": -0.9350224733352661, "Positive Loss": 0.013161763548851013, "Positive prob": -0.013161763548851013, "epoch": 0.05970149253731343, "step": 2 }, { "epoch": 0.08955223880597014, "grad_norm": 5.286307932780534, "learning_rate": 4.8863636363636365e-06, "loss": 2.1838, "step": 3 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.099656343460083, "Normal prob": -1.099656343460083, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.08955223880597014, "step": 3 }, { "DPO Loss": 0.07000872130537095, "Negative Geometric Mean": -0.50920245398773, "Negative prob": -0.50920245398773, "Normal Loss": 1.012195110321045, "Normal prob": -1.012195110321045, "Positive Loss": 0.010950000025331974, "Positive prob": -0.010950000025331974, "epoch": 0.08955223880597014, "step": 3 }, { "epoch": 0.11940298507462686, "grad_norm": 4.694266529653364, "learning_rate": 4.848484848484849e-06, "loss": 2.0884, "step": 4 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.1125233173370361, "Normal prob": -1.1125233173370361, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.11940298507462686, "step": 4 }, { "DPO Loss": 0.06966151029727026, "Negative Geometric Mean": -0.6059479553903345, "Negative prob": -0.6059479553903345, "Normal Loss": 1.2086811065673828, "Normal prob": -1.2086811065673828, "Positive Loss": 0.013302751816809177, "Positive prob": -0.013302751816809177, "epoch": 0.11940298507462686, "step": 4 }, { "epoch": 0.14925373134328357, "grad_norm": 3.9343608034451956, "learning_rate": 4.810606060606061e-06, "loss": 2.3148, "step": 5 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0461715459823608, "Normal prob": -1.0461715459823608, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.14925373134328357, "step": 5 }, { "DPO Loss": 0.0697616162943168, "Negative Geometric Mean": -0.5313092979127134, "Negative prob": -0.5313092979127134, "Normal Loss": 0.9298596978187561, "Normal prob": -0.9298596978187561, "Positive Loss": 0.009642857126891613, "Positive prob": -0.009642857126891613, "epoch": 0.14925373134328357, "step": 5 }, { "epoch": 0.1791044776119403, "grad_norm": 4.978595566884078, "learning_rate": 4.772727272727273e-06, "loss": 2.1108, "step": 6 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9568106532096863, "Normal prob": -0.9568106532096863, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.1791044776119403, "step": 6 }, { "DPO Loss": 0.07176842575804532, "Negative Geometric Mean": -0.6178861788617886, "Negative prob": -0.6178861788617886, "Normal Loss": 1.025352120399475, "Normal prob": -1.025352120399475, "Positive Loss": 0.008620689623057842, "Positive prob": -0.008620689623057842, "epoch": 0.1791044776119403, "step": 6 }, { "epoch": 0.208955223880597, "grad_norm": 4.189574990356294, "learning_rate": 4.734848484848486e-06, "loss": 2.3772, "step": 7 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.1897211074829102, "Normal prob": -1.1897211074829102, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.208955223880597, "step": 7 }, { "DPO Loss": 0.07036739710471702, "Negative Geometric Mean": -0.3787878787878788, "Negative prob": -0.3787878787878788, "Normal Loss": 0.7370600700378418, "Normal prob": -0.7370600700378418, "Positive Loss": 0.011687500402331352, "Positive prob": -0.011687500402331352, "epoch": 0.208955223880597, "step": 7 }, { "epoch": 0.23880597014925373, "grad_norm": 6.321797893052054, "learning_rate": 4.696969696969698e-06, "loss": 1.9933, "step": 8 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8784773349761963, "Normal prob": -0.8784773349761963, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.23880597014925373, "step": 8 }, { "DPO Loss": 0.06670352572746277, "Negative Geometric Mean": -0.5685483870967742, "Negative prob": -0.5685483870967742, "Normal Loss": 0.5634253621101379, "Normal prob": -0.5634253621101379, "Positive Loss": 0.010757575742900372, "Positive prob": -0.010757575742900372, "epoch": 0.23880597014925373, "step": 8 }, { "epoch": 0.26865671641791045, "grad_norm": 2.925357157601987, "learning_rate": 4.6590909090909095e-06, "loss": 1.8436, "step": 9 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9526916742324829, "Normal prob": -0.9526916742324829, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.26865671641791045, "step": 9 }, { "DPO Loss": 0.06864461465565863, "Negative Geometric Mean": -0.6147959183673469, "Negative prob": -0.6147959183673469, "Normal Loss": 1.429092288017273, "Normal prob": -1.429092288017273, "Positive Loss": 0.007869564928114414, "Positive prob": -0.007869564928114414, "epoch": 0.26865671641791045, "step": 9 }, { "epoch": 0.29850746268656714, "grad_norm": 3.7243122638447597, "learning_rate": 4.621212121212122e-06, "loss": 2.1944, "step": 10 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0172185897827148, "Normal prob": -1.0172185897827148, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.29850746268656714, "step": 10 }, { "DPO Loss": 0.06859313298661615, "Negative Geometric Mean": -0.5025906735751295, "Negative prob": -0.5025906735751295, "Normal Loss": 0.9087347984313965, "Normal prob": -0.9087347984313965, "Positive Loss": 0.007932691834867, "Positive prob": -0.007932691834867, "epoch": 0.29850746268656714, "step": 10 }, { "epoch": 0.3283582089552239, "grad_norm": 3.39821832870912, "learning_rate": 4.583333333333333e-06, "loss": 1.9761, "step": 11 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9474860429763794, "Normal prob": -0.9474860429763794, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.3283582089552239, "step": 11 }, { "DPO Loss": 0.06592769100065496, "Negative Geometric Mean": -0.6051502145922747, "Negative prob": -0.6051502145922747, "Normal Loss": 0.9822975397109985, "Normal prob": -0.9822975397109985, "Positive Loss": 0.009666666388511658, "Positive prob": -0.009666666388511658, "epoch": 0.3283582089552239, "step": 11 }, { "epoch": 0.3582089552238806, "grad_norm": 3.294143205427048, "learning_rate": 4.5454545454545455e-06, "loss": 2.2052, "step": 12 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0535829067230225, "Normal prob": -1.0535829067230225, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.3582089552238806, "step": 12 }, { "DPO Loss": 0.06825827457617649, "Negative Geometric Mean": -0.4282178217821782, "Negative prob": -0.4282178217821782, "Normal Loss": 0.9327846169471741, "Normal prob": -0.9327846169471741, "Positive Loss": 0.010060605593025684, "Positive prob": -0.010060605593025684, "epoch": 0.3582089552238806, "step": 12 }, { "epoch": 0.3880597014925373, "grad_norm": 2.8675837782136466, "learning_rate": 4.507575757575758e-06, "loss": 2.0578, "step": 13 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7751196026802063, "Normal prob": -0.7751196026802063, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.3880597014925373, "step": 13 }, { "DPO Loss": 0.06905548523799253, "Negative Geometric Mean": -0.1627680311890838, "Negative prob": -0.1627680311890838, "Normal Loss": 1.0433595180511475, "Normal prob": -1.0433595180511475, "Positive Loss": 0.011105768382549286, "Positive prob": -0.011105768382549286, "epoch": 0.3880597014925373, "step": 13 }, { "epoch": 0.417910447761194, "grad_norm": 2.1475947055640088, "learning_rate": 4.46969696969697e-06, "loss": 2.0588, "step": 14 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8788169622421265, "Normal prob": -0.8788169622421265, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.417910447761194, "step": 14 }, { "DPO Loss": 0.06639852018123209, "Negative Geometric Mean": -0.6695035460992907, "Negative prob": -0.6695035460992907, "Normal Loss": 0.9589195251464844, "Normal prob": -0.9589195251464844, "Positive Loss": 0.00703846151009202, "Positive prob": -0.00703846151009202, "epoch": 0.417910447761194, "step": 14 }, { "epoch": 0.44776119402985076, "grad_norm": 2.1026025349240416, "learning_rate": 4.4318181818181824e-06, "loss": 2.0814, "step": 15 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.03587806224823, "Normal prob": -1.03587806224823, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.44776119402985076, "step": 15 }, { "DPO Loss": 0.06737810413934098, "Negative Geometric Mean": -0.6141304347826086, "Negative prob": -0.6141304347826086, "Normal Loss": 0.7427144050598145, "Normal prob": -0.7427144050598145, "Positive Loss": 0.00862637348473072, "Positive prob": -0.00862637348473072, "epoch": 0.44776119402985076, "step": 15 }, { "epoch": 0.47761194029850745, "grad_norm": 2.121417596280862, "learning_rate": 4.393939393939394e-06, "loss": 1.9041, "step": 16 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0410958528518677, "Normal prob": -1.0410958528518677, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.47761194029850745, "step": 16 }, { "DPO Loss": 0.06757464144806456, "Negative Geometric Mean": -0.8685714285714285, "Negative prob": -0.8685714285714285, "Normal Loss": 1.2538859844207764, "Normal prob": -1.2538859844207764, "Positive Loss": 0.010104166343808174, "Positive prob": -0.010104166343808174, "epoch": 0.47761194029850745, "step": 16 }, { "epoch": 0.5074626865671642, "grad_norm": 2.0243861101726144, "learning_rate": 4.356060606060606e-06, "loss": 2.2044, "step": 17 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8563380241394043, "Normal prob": -0.8563380241394043, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.5074626865671642, "step": 17 }, { "DPO Loss": 0.06507499159121503, "Negative Geometric Mean": -0.92578125, "Negative prob": -0.92578125, "Normal Loss": 0.4478856325149536, "Normal prob": -0.4478856325149536, "Positive Loss": 0.009772727265954018, "Positive prob": -0.009772727265954018, "epoch": 0.5074626865671642, "step": 17 }, { "epoch": 0.5373134328358209, "grad_norm": 1.9874172089797602, "learning_rate": 4.3181818181818185e-06, "loss": 1.8841, "step": 18 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.1045136451721191, "Normal prob": -1.1045136451721191, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.5373134328358209, "step": 18 }, { "DPO Loss": 0.06580628996222289, "Negative Geometric Mean": -0.6819407008086253, "Negative prob": -0.6819407008086253, "Normal Loss": 0.8865710496902466, "Normal prob": -0.8865710496902466, "Positive Loss": 0.005851063411682844, "Positive prob": -0.005851063411682844, "epoch": 0.5373134328358209, "step": 18 }, { "epoch": 0.5671641791044776, "grad_norm": 2.3150219841921498, "learning_rate": 4.280303030303031e-06, "loss": 1.8932, "step": 19 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0592763423919678, "Normal prob": -1.0592763423919678, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.5671641791044776, "step": 19 }, { "DPO Loss": 0.06432851002636585, "Negative Geometric Mean": -0.6752827140549273, "Negative prob": -0.6752827140549273, "Normal Loss": 0.912404477596283, "Normal prob": -0.912404477596283, "Positive Loss": 0.009065933525562286, "Positive prob": -0.009065933525562286, "epoch": 0.5671641791044776, "step": 19 }, { "epoch": 0.5970149253731343, "grad_norm": 2.2096690829306223, "learning_rate": 4.242424242424243e-06, "loss": 2.0859, "step": 20 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9406824111938477, "Normal prob": -0.9406824111938477, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.5970149253731343, "step": 20 }, { "DPO Loss": 0.060591559873934865, "Negative Geometric Mean": -0.893491124260355, "Negative prob": -0.893491124260355, "Normal Loss": 0.9586926698684692, "Normal prob": -0.9586926698684692, "Positive Loss": 0.00845070369541645, "Positive prob": -0.00845070369541645, "epoch": 0.5970149253731343, "step": 20 }, { "epoch": 0.6268656716417911, "grad_norm": 2.0543289796446245, "learning_rate": 4.204545454545455e-06, "loss": 1.9797, "step": 21 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9250226020812988, "Normal prob": -0.9250226020812988, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.6268656716417911, "step": 21 }, { "DPO Loss": 0.06826819659834403, "Negative Geometric Mean": -0.7664041994750657, "Negative prob": -0.7664041994750657, "Normal Loss": 0.9128367900848389, "Normal prob": -0.9128367900848389, "Positive Loss": 0.009385964833199978, "Positive prob": -0.009385964833199978, "epoch": 0.6268656716417911, "step": 21 }, { "epoch": 0.6567164179104478, "grad_norm": 2.1732788810158947, "learning_rate": 4.166666666666667e-06, "loss": 2.1043, "step": 22 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8822575211524963, "Normal prob": -0.8822575211524963, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.6567164179104478, "step": 22 }, { "DPO Loss": 0.06523964930067404, "Negative Geometric Mean": -0.5416666666666666, "Negative prob": -0.5416666666666666, "Normal Loss": 1.1358203887939453, "Normal prob": -1.1358203887939453, "Positive Loss": 0.010486111044883728, "Positive prob": -0.010486111044883728, "epoch": 0.6567164179104478, "step": 22 }, { "epoch": 0.6865671641791045, "grad_norm": 1.9168421019267101, "learning_rate": 4.128787878787879e-06, "loss": 2.1751, "step": 23 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.3600391149520874, "Normal prob": -1.3600391149520874, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.6865671641791045, "step": 23 }, { "DPO Loss": 0.06455202489822325, "Negative Geometric Mean": -0.6653846153846154, "Negative prob": -0.6653846153846154, "Normal Loss": 0.8008089065551758, "Normal prob": -0.8008089065551758, "Positive Loss": 0.009148147888481617, "Positive prob": -0.009148147888481617, "epoch": 0.6865671641791045, "step": 23 }, { "epoch": 0.7164179104477612, "grad_norm": 2.0532254578506643, "learning_rate": 4.0909090909090915e-06, "loss": 2.202, "step": 24 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.061716914176941, "Normal prob": -1.061716914176941, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.7164179104477612, "step": 24 }, { "DPO Loss": 0.063801133538218, "Negative Geometric Mean": -0.7, "Negative prob": -0.7, "Normal Loss": 0.9242342710494995, "Normal prob": -0.9242342710494995, "Positive Loss": 0.006785714067518711, "Positive prob": -0.006785714067518711, "epoch": 0.7164179104477612, "step": 24 }, { "epoch": 0.746268656716418, "grad_norm": 1.9740851905222436, "learning_rate": 4.053030303030303e-06, "loss": 2.0742, "step": 25 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0656933784484863, "Normal prob": -1.0656933784484863, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.746268656716418, "step": 25 }, { "DPO Loss": 0.06495219940286055, "Negative Geometric Mean": -0.7237076648841355, "Negative prob": -0.7237076648841355, "Normal Loss": 1.0261398553848267, "Normal prob": -1.0261398553848267, "Positive Loss": 0.008142856881022453, "Positive prob": -0.008142856881022453, "epoch": 0.746268656716418, "step": 25 }, { "epoch": 0.7761194029850746, "grad_norm": 2.005923039875544, "learning_rate": 4.015151515151515e-06, "loss": 2.1505, "step": 26 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0470722913742065, "Normal prob": -1.0470722913742065, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.7761194029850746, "step": 26 }, { "DPO Loss": 0.06028949635352483, "Negative Geometric Mean": -0.7204724409448819, "Negative prob": -0.7204724409448819, "Normal Loss": 1.1458784341812134, "Normal prob": -1.1458784341812134, "Positive Loss": 0.010136363096535206, "Positive prob": -0.010136363096535206, "epoch": 0.7761194029850746, "step": 26 }, { "epoch": 0.8059701492537313, "grad_norm": 2.044925450136982, "learning_rate": 3.9772727272727275e-06, "loss": 2.1185, "step": 27 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.04264497756958, "Normal prob": -1.04264497756958, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.8059701492537313, "step": 27 }, { "DPO Loss": 0.05726022616321828, "Negative Geometric Mean": -1.7555555555555555, "Negative prob": -1.7555555555555555, "Normal Loss": 0.9190311431884766, "Normal prob": -0.9190311431884766, "Positive Loss": 0.005844155326485634, "Positive prob": -0.005844155326485634, "epoch": 0.8059701492537313, "step": 27 }, { "epoch": 0.835820895522388, "grad_norm": 2.296980857842588, "learning_rate": 3.93939393939394e-06, "loss": 2.0114, "step": 28 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.1765683889389038, "Normal prob": -1.1765683889389038, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.835820895522388, "step": 28 }, { "DPO Loss": 0.06474883760926352, "Negative Geometric Mean": -0.5363128491620112, "Negative prob": -0.5363128491620112, "Normal Loss": 0.9715909361839294, "Normal prob": -0.9715909361839294, "Positive Loss": 0.007989129982888699, "Positive prob": -0.007989129982888699, "epoch": 0.835820895522388, "step": 28 }, { "epoch": 0.8656716417910447, "grad_norm": 2.0567429049328383, "learning_rate": 3.901515151515151e-06, "loss": 2.2164, "step": 29 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9982737302780151, "Normal prob": -0.9982737302780151, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.8656716417910447, "step": 29 }, { "DPO Loss": 0.0608347556233502, "Negative Geometric Mean": -0.7108167770419426, "Negative prob": -0.7108167770419426, "Normal Loss": 1.1631579399108887, "Normal prob": -1.1631579399108887, "Positive Loss": 0.004755434580147266, "Positive prob": -0.004755434580147266, "epoch": 0.8656716417910447, "step": 29 }, { "epoch": 0.8955223880597015, "grad_norm": 1.8213700432454047, "learning_rate": 3.863636363636364e-06, "loss": 2.0177, "step": 30 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9721886515617371, "Normal prob": -0.9721886515617371, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.8955223880597015, "step": 30 }, { "DPO Loss": 0.06105003324712648, "Negative Geometric Mean": -0.7432835820895523, "Negative prob": -0.7432835820895523, "Normal Loss": 0.8931198716163635, "Normal prob": -0.8931198716163635, "Positive Loss": 0.008076922968029976, "Positive prob": -0.008076922968029976, "epoch": 0.8955223880597015, "step": 30 }, { "epoch": 0.9253731343283582, "grad_norm": 2.0079866997733427, "learning_rate": 3.825757575757576e-06, "loss": 1.9801, "step": 31 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9485834240913391, "Normal prob": -0.9485834240913391, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.9253731343283582, "step": 31 }, { "DPO Loss": 0.059690654129860854, "Negative Geometric Mean": -0.7115987460815048, "Negative prob": -0.7115987460815048, "Normal Loss": 0.7757353186607361, "Normal prob": -0.7757353186607361, "Positive Loss": 0.008100775070488453, "Positive prob": -0.008100775070488453, "epoch": 0.9253731343283582, "step": 31 }, { "epoch": 0.9552238805970149, "grad_norm": 2.093650864008497, "learning_rate": 3.7878787878787882e-06, "loss": 1.9138, "step": 32 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8712155222892761, "Normal prob": -0.8712155222892761, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.9552238805970149, "step": 32 }, { "DPO Loss": 0.058390165026955355, "Negative Geometric Mean": -0.8195718654434251, "Negative prob": -0.8195718654434251, "Normal Loss": 1.0223194360733032, "Normal prob": -1.0223194360733032, "Positive Loss": 0.009166666306555271, "Positive prob": -0.009166666306555271, "epoch": 0.9552238805970149, "step": 32 }, { "epoch": 0.9850746268656716, "grad_norm": 1.918674743301411, "learning_rate": 3.7500000000000005e-06, "loss": 1.8673, "step": 33 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8764194846153259, "Normal prob": -0.8764194846153259, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.9850746268656716, "step": 33 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9049854874610901, "Normal prob": -0.9049854874610901, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 0.9850746268656716, "step": 33 }, { "epoch": 1.0149253731343284, "grad_norm": 2.3870461406394385, "learning_rate": 3.7121212121212124e-06, "loss": 1.6596, "step": 34 }, { "DPO Loss": 0.05161118360714422, "Negative Geometric Mean": -1.1951219512195121, "Negative prob": -1.1951219512195121, "Normal Loss": 0.6912181377410889, "Normal prob": -0.6912181377410889, "Positive Loss": 0.008287036791443825, "Positive prob": -0.008287036791443825, "epoch": 1.0149253731343284, "step": 34 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7368760108947754, "Normal prob": -0.7368760108947754, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.0149253731343284, "step": 34 }, { "epoch": 1.044776119402985, "grad_norm": 2.0096981539582512, "learning_rate": 3.6742424242424247e-06, "loss": 1.7642, "step": 35 }, { "DPO Loss": 0.050134327618006326, "Negative Geometric Mean": -1.1651090342679127, "Negative prob": -1.1651090342679127, "Normal Loss": 0.5865209698677063, "Normal prob": -0.5865209698677063, "Positive Loss": 0.0053385416977107525, "Positive prob": -0.0053385416977107525, "epoch": 1.044776119402985, "step": 35 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7552986741065979, "Normal prob": -0.7552986741065979, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.044776119402985, "step": 35 }, { "epoch": 1.0746268656716418, "grad_norm": 2.257076769713416, "learning_rate": 3.6363636363636366e-06, "loss": 1.3998, "step": 36 }, { "DPO Loss": 0.057914387710718065, "Negative Geometric Mean": -0.9114391143911439, "Negative prob": -0.9114391143911439, "Normal Loss": 0.9425891041755676, "Normal prob": -0.9425891041755676, "Positive Loss": 0.005472221877425909, "Positive prob": -0.005472221877425909, "epoch": 1.0746268656716418, "step": 36 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.690214216709137, "Normal prob": -0.690214216709137, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.0746268656716418, "step": 36 }, { "epoch": 1.1044776119402986, "grad_norm": 1.756066979163819, "learning_rate": 3.598484848484849e-06, "loss": 1.7631, "step": 37 }, { "DPO Loss": 0.05986767521840584, "Negative Geometric Mean": -0.7525083612040134, "Negative prob": -0.7525083612040134, "Normal Loss": 1.1574740409851074, "Normal prob": -1.1574740409851074, "Positive Loss": 0.008390804752707481, "Positive prob": -0.008390804752707481, "epoch": 1.1044776119402986, "step": 37 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.9453551769256592, "Normal prob": -0.9453551769256592, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.1044776119402986, "step": 37 }, { "epoch": 1.1343283582089552, "grad_norm": 2.052189862953305, "learning_rate": 3.560606060606061e-06, "loss": 1.9055, "step": 38 }, { "DPO Loss": 0.05071519177647574, "Negative Geometric Mean": -1.1154529307282415, "Negative prob": -1.1154529307282415, "Normal Loss": 0.6747573018074036, "Normal prob": -0.6747573018074036, "Positive Loss": 0.007083333097398281, "Positive prob": -0.007083333097398281, "epoch": 1.1343283582089552, "step": 38 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5797101259231567, "Normal prob": -0.5797101259231567, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.1343283582089552, "step": 38 }, { "epoch": 1.164179104477612, "grad_norm": 1.8364836375102407, "learning_rate": 3.522727272727273e-06, "loss": 1.6451, "step": 39 }, { "DPO Loss": 0.05363530097738672, "Negative Geometric Mean": -0.7802197802197802, "Negative prob": -0.7802197802197802, "Normal Loss": 0.8341231942176819, "Normal prob": -0.8341231942176819, "Positive Loss": 0.007555555552244186, "Positive prob": -0.007555555552244186, "epoch": 1.164179104477612, "step": 39 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.774144172668457, "Normal prob": -0.774144172668457, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.164179104477612, "step": 39 }, { "epoch": 1.1940298507462686, "grad_norm": 1.752600899095073, "learning_rate": 3.4848484848484854e-06, "loss": 1.7698, "step": 40 }, { "DPO Loss": 0.046149434416349194, "Negative Geometric Mean": -1.108695652173913, "Negative prob": -1.108695652173913, "Normal Loss": 0.6663944721221924, "Normal prob": -0.6663944721221924, "Positive Loss": 0.005934066139161587, "Positive prob": -0.005934066139161587, "epoch": 1.1940298507462686, "step": 40 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5120744705200195, "Normal prob": -0.5120744705200195, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.1940298507462686, "step": 40 }, { "epoch": 1.2238805970149254, "grad_norm": 1.7160554023533308, "learning_rate": 3.4469696969696977e-06, "loss": 1.4923, "step": 41 }, { "DPO Loss": 0.05481914864319287, "Negative Geometric Mean": -0.9079497907949791, "Negative prob": -0.9079497907949791, "Normal Loss": 0.8701958060264587, "Normal prob": -0.8701958060264587, "Positive Loss": 0.005239361431449652, "Positive prob": -0.005239361431449652, "epoch": 1.2238805970149254, "step": 41 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6536540985107422, "Normal prob": -0.6536540985107422, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.2238805970149254, "step": 41 }, { "epoch": 1.2537313432835822, "grad_norm": 1.6657379646847996, "learning_rate": 3.409090909090909e-06, "loss": 1.6994, "step": 42 }, { "DPO Loss": 0.034274153669708436, "Negative Geometric Mean": -1.696078431372549, "Negative prob": -1.696078431372549, "Normal Loss": 0.573955774307251, "Normal prob": -0.573955774307251, "Positive Loss": 0.011519607156515121, "Positive prob": -0.011519607156515121, "epoch": 1.2537313432835822, "step": 42 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5243297219276428, "Normal prob": -0.5243297219276428, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.2537313432835822, "step": 42 }, { "epoch": 1.2835820895522387, "grad_norm": 1.7425846272146088, "learning_rate": 3.3712121212121214e-06, "loss": 1.3121, "step": 43 }, { "DPO Loss": 0.03886750898633557, "Negative Geometric Mean": -1.368421052631579, "Negative prob": -1.368421052631579, "Normal Loss": 0.9870129823684692, "Normal prob": -0.9870129823684692, "Positive Loss": 0.00590425543487072, "Positive prob": -0.00590425543487072, "epoch": 1.2835820895522387, "step": 43 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8093385100364685, "Normal prob": -0.8093385100364685, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.2835820895522387, "step": 43 }, { "epoch": 1.3134328358208955, "grad_norm": 1.9499203274848462, "learning_rate": 3.3333333333333333e-06, "loss": 1.737, "step": 44 }, { "DPO Loss": 0.047065760496309984, "Negative Geometric Mean": -0.783625730994152, "Negative prob": -0.783625730994152, "Normal Loss": 0.7276914119720459, "Normal prob": -0.7276914119720459, "Positive Loss": 0.00962962955236435, "Positive prob": -0.00962962955236435, "epoch": 1.3134328358208955, "step": 44 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0343434810638428, "Normal prob": -1.0343434810638428, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.3134328358208955, "step": 44 }, { "epoch": 1.3432835820895521, "grad_norm": 2.207535556548572, "learning_rate": 3.2954545454545456e-06, "loss": 1.9807, "step": 45 }, { "DPO Loss": 0.03902078617347059, "Negative Geometric Mean": -1.3935860058309038, "Negative prob": -1.3935860058309038, "Normal Loss": 0.7487812638282776, "Normal prob": -0.7487812638282776, "Positive Loss": 0.005891088861972094, "Positive prob": -0.005891088861972094, "epoch": 1.3432835820895521, "step": 45 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8634064197540283, "Normal prob": -0.8634064197540283, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.3432835820895521, "step": 45 }, { "epoch": 1.373134328358209, "grad_norm": 1.733736404057815, "learning_rate": 3.257575757575758e-06, "loss": 1.796, "step": 46 }, { "DPO Loss": 0.008443692622487866, "Negative Geometric Mean": -3.4796747967479673, "Negative prob": -3.4796747967479673, "Normal Loss": 1.2108107805252075, "Normal prob": -1.2108107805252075, "Positive Loss": 0.006159419659525156, "Positive prob": -0.006159419659525156, "epoch": 1.373134328358209, "step": 46 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8496732115745544, "Normal prob": -0.8496732115745544, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.373134328358209, "step": 46 }, { "epoch": 1.4029850746268657, "grad_norm": 2.0551411951408087, "learning_rate": 3.21969696969697e-06, "loss": 1.9398, "step": 47 }, { "DPO Loss": 0.01909205677409713, "Negative Geometric Mean": -2.3923809523809525, "Negative prob": -2.3923809523809525, "Normal Loss": 0.9414006471633911, "Normal prob": -0.9414006471633911, "Positive Loss": 0.008750000037252903, "Positive prob": -0.008750000037252903, "epoch": 1.4029850746268657, "step": 47 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8334981799125671, "Normal prob": -0.8334981799125671, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.4029850746268657, "step": 47 }, { "epoch": 1.4328358208955223, "grad_norm": 1.8388003689411008, "learning_rate": 3.181818181818182e-06, "loss": 1.6728, "step": 48 }, { "DPO Loss": 0.02497623897733731, "Negative Geometric Mean": -2.0987654320987654, "Negative prob": -2.0987654320987654, "Normal Loss": 0.8016359806060791, "Normal prob": -0.8016359806060791, "Positive Loss": 0.006173469126224518, "Positive prob": -0.006173469126224518, "epoch": 1.4328358208955223, "step": 48 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8423645496368408, "Normal prob": -0.8423645496368408, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.4328358208955223, "step": 48 }, { "epoch": 1.462686567164179, "grad_norm": 2.1681118756810696, "learning_rate": 3.143939393939394e-06, "loss": 1.447, "step": 49 }, { "DPO Loss": 0.013682508086219123, "Negative Geometric Mean": -2.435483870967742, "Negative prob": -2.435483870967742, "Normal Loss": 0.7013167142868042, "Normal prob": -0.7013167142868042, "Positive Loss": 0.006237373687326908, "Positive prob": -0.006237373687326908, "epoch": 1.462686567164179, "step": 49 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6980867981910706, "Normal prob": -0.6980867981910706, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.462686567164179, "step": 49 }, { "epoch": 1.4925373134328357, "grad_norm": 1.9846715793493248, "learning_rate": 3.1060606060606063e-06, "loss": 1.6488, "step": 50 }, { "DPO Loss": 0.03537065017993214, "Negative Geometric Mean": -1.2079470198675497, "Negative prob": -1.2079470198675497, "Normal Loss": 0.9708001613616943, "Normal prob": -0.9708001613616943, "Positive Loss": 0.012167300097644329, "Positive prob": -0.012167300097644329, "epoch": 1.4925373134328357, "step": 50 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6297158598899841, "Normal prob": -0.6297158598899841, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.4925373134328357, "step": 50 }, { "epoch": 1.5223880597014925, "grad_norm": 1.889993754274304, "learning_rate": 3.0681818181818186e-06, "loss": 1.5471, "step": 51 }, { "DPO Loss": 0.00511619702516101, "Negative Geometric Mean": -3.75, "Negative prob": -3.75, "Normal Loss": 0.5263843536376953, "Normal prob": -0.5263843536376953, "Positive Loss": 0.008690476417541504, "Positive prob": -0.008690476417541504, "epoch": 1.5223880597014925, "step": 51 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.648101270198822, "Normal prob": -0.648101270198822, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.5223880597014925, "step": 51 }, { "epoch": 1.5522388059701493, "grad_norm": 1.9583224262606371, "learning_rate": 3.0303030303030305e-06, "loss": 1.368, "step": 52 }, { "DPO Loss": 0.023317925217908392, "Negative Geometric Mean": -2.0302375809935205, "Negative prob": -2.0302375809935205, "Normal Loss": 0.5086835026741028, "Normal prob": -0.5086835026741028, "Positive Loss": 0.005246913526207209, "Positive prob": -0.005246913526207209, "epoch": 1.5522388059701493, "step": 52 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7624507546424866, "Normal prob": -0.7624507546424866, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.5522388059701493, "step": 52 }, { "epoch": 1.582089552238806, "grad_norm": 2.0796242286981483, "learning_rate": 2.992424242424243e-06, "loss": 1.5007, "step": 53 }, { "DPO Loss": 0.02533049901224309, "Negative Geometric Mean": -1.7668711656441718, "Negative prob": -1.7668711656441718, "Normal Loss": 0.8126410841941833, "Normal prob": -0.8126410841941833, "Positive Loss": 0.006399999838322401, "Positive prob": -0.006399999838322401, "epoch": 1.582089552238806, "step": 53 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6325581669807434, "Normal prob": -0.6325581669807434, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.582089552238806, "step": 53 }, { "epoch": 1.6119402985074627, "grad_norm": 1.8028010090491322, "learning_rate": 2.954545454545455e-06, "loss": 1.6298, "step": 54 }, { "DPO Loss": 0.008989192406013728, "Negative Geometric Mean": -2.9140893470790377, "Negative prob": -2.9140893470790377, "Normal Loss": 0.7080327272415161, "Normal prob": -0.7080327272415161, "Positive Loss": 0.007157894782721996, "Positive prob": -0.007157894782721996, "epoch": 1.6119402985074627, "step": 54 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5518050789833069, "Normal prob": -0.5518050789833069, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.6119402985074627, "step": 54 }, { "epoch": 1.6417910447761193, "grad_norm": 2.1317703393812013, "learning_rate": 2.916666666666667e-06, "loss": 1.4784, "step": 55 }, { "DPO Loss": 0.0017641501193504069, "Negative Geometric Mean": -4.435643564356436, "Negative prob": -4.435643564356436, "Normal Loss": 0.6120218634605408, "Normal prob": -0.6120218634605408, "Positive Loss": 0.008848484605550766, "Positive prob": -0.008848484605550766, "epoch": 1.6417910447761193, "step": 55 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8250529766082764, "Normal prob": -0.8250529766082764, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.6417910447761193, "step": 55 }, { "epoch": 1.671641791044776, "grad_norm": 1.9295233424895062, "learning_rate": 2.8787878787878793e-06, "loss": 1.4833, "step": 56 }, { "DPO Loss": 0.0008582962926898683, "Negative Geometric Mean": -5.420054200542006, "Negative prob": -5.420054200542006, "Normal Loss": 0.8771640658378601, "Normal prob": -0.8771640658378601, "Positive Loss": 0.005862068850547075, "Positive prob": -0.005862068850547075, "epoch": 1.671641791044776, "step": 56 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7775915861129761, "Normal prob": -0.7775915861129761, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.671641791044776, "step": 56 }, { "epoch": 1.7014925373134329, "grad_norm": 2.1336621561000517, "learning_rate": 2.8409090909090916e-06, "loss": 1.5531, "step": 57 }, { "DPO Loss": 3.0859386382985467e-06, "Negative Geometric Mean": -10.939759036144578, "Negative prob": -10.939759036144578, "Normal Loss": 0.7169179320335388, "Normal prob": -0.7169179320335388, "Positive Loss": 0.004330985713750124, "Positive prob": -0.004330985713750124, "epoch": 1.7014925373134329, "step": 57 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7558799386024475, "Normal prob": -0.7558799386024475, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.7014925373134329, "step": 57 }, { "epoch": 1.7313432835820897, "grad_norm": 2.026861999145552, "learning_rate": 2.803030303030303e-06, "loss": 1.6893, "step": 58 }, { "DPO Loss": 9.637681779237705e-06, "Negative Geometric Mean": -9.8348623853211, "Negative prob": -9.8348623853211, "Normal Loss": 0.7010309100151062, "Normal prob": -0.7010309100151062, "Positive Loss": 0.0077380952425301075, "Positive prob": -0.0077380952425301075, "epoch": 1.7313432835820897, "step": 58 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8174257278442383, "Normal prob": -0.8174257278442383, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.7313432835820897, "step": 58 }, { "epoch": 1.7611940298507462, "grad_norm": 2.0237400680669233, "learning_rate": 2.7651515151515153e-06, "loss": 1.4707, "step": 59 }, { "DPO Loss": 4.87264787158538e-06, "Negative Geometric Mean": -10.53061224489796, "Negative prob": -10.53061224489796, "Normal Loss": 0.9056603908538818, "Normal prob": -0.9056603908538818, "Positive Loss": 0.005695652216672897, "Positive prob": -0.005695652216672897, "epoch": 1.7611940298507462, "step": 59 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7474267482757568, "Normal prob": -0.7474267482757568, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.7611940298507462, "step": 59 }, { "epoch": 1.7910447761194028, "grad_norm": 1.6643331247068862, "learning_rate": 2.7272727272727272e-06, "loss": 1.5398, "step": 60 }, { "DPO Loss": 5.683581101286291e-06, "Negative Geometric Mean": -10.49802371541502, "Negative prob": -10.49802371541502, "Normal Loss": 0.877220094203949, "Normal prob": -0.877220094203949, "Positive Loss": 0.005698529537767172, "Positive prob": -0.005698529537767172, "epoch": 1.7910447761194028, "step": 60 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6736842393875122, "Normal prob": -0.6736842393875122, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.7910447761194028, "step": 60 }, { "epoch": 1.8208955223880596, "grad_norm": 1.7989892631140816, "learning_rate": 2.6893939393939395e-06, "loss": 1.4722, "step": 61 }, { "DPO Loss": 2.137128869937267e-05, "Negative Geometric Mean": -8.969258589511755, "Negative prob": -8.969258589511755, "Normal Loss": 0.7194860577583313, "Normal prob": -0.7194860577583313, "Positive Loss": 0.00649305572733283, "Positive prob": -0.00649305572733283, "epoch": 1.8208955223880596, "step": 61 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.929568350315094, "Normal prob": -0.929568350315094, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.8208955223880596, "step": 61 }, { "epoch": 1.8507462686567164, "grad_norm": 1.9526547595423867, "learning_rate": 2.6515151515151514e-06, "loss": 1.5191, "step": 62 }, { "DPO Loss": 1.0951692777756726e-07, "Negative Geometric Mean": -14.311111111111112, "Negative prob": -14.311111111111112, "Normal Loss": 0.7467652559280396, "Normal prob": -0.7467652559280396, "Positive Loss": 0.0054220776073634624, "Positive prob": -0.0054220776073634624, "epoch": 1.8507462686567164, "step": 62 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6857832670211792, "Normal prob": -0.6857832670211792, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.8507462686567164, "step": 62 }, { "epoch": 1.8805970149253732, "grad_norm": 3.017658278101351, "learning_rate": 2.6136363636363637e-06, "loss": 1.3437, "step": 63 }, { "DPO Loss": 5.283075192742321e-06, "Negative Geometric Mean": -10.6875, "Negative prob": -10.6875, "Normal Loss": 1.0695186853408813, "Normal prob": -1.0695186853408813, "Positive Loss": 0.007590909022837877, "Positive prob": -0.007590909022837877, "epoch": 1.8805970149253732, "step": 63 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.67087721824646, "Normal prob": -0.67087721824646, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.8805970149253732, "step": 63 }, { "epoch": 1.9104477611940298, "grad_norm": 1.9690280536538323, "learning_rate": 2.575757575757576e-06, "loss": 1.6489, "step": 64 }, { "DPO Loss": 6.82746902339858e-09, "Negative Geometric Mean": -17.046728971962615, "Negative prob": -17.046728971962615, "Normal Loss": 0.8287292718887329, "Normal prob": -0.8287292718887329, "Positive Loss": 0.014733541756868362, "Positive prob": -0.014733541756868362, "epoch": 1.9104477611940298, "step": 64 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7296082377433777, "Normal prob": -0.7296082377433777, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.9104477611940298, "step": 64 }, { "epoch": 1.9402985074626866, "grad_norm": 1.980498582311496, "learning_rate": 2.537878787878788e-06, "loss": 1.7853, "step": 65 }, { "DPO Loss": 0.0017371726894365076, "Negative Geometric Mean": -4.584440227703984, "Negative prob": -4.584440227703984, "Normal Loss": 0.6079380512237549, "Normal prob": -0.6079380512237549, "Positive Loss": 0.006488095503300428, "Positive prob": -0.006488095503300428, "epoch": 1.9402985074626866, "step": 65 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8265402913093567, "Normal prob": -0.8265402913093567, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.9402985074626866, "step": 65 }, { "epoch": 1.9701492537313432, "grad_norm": 1.9860225812192471, "learning_rate": 2.5e-06, "loss": 1.3121, "step": 66 }, { "DPO Loss": 1.985446938559249e-08, "Negative Geometric Mean": -16.0, "Negative prob": -16.0, "Normal Loss": 0.8603014945983887, "Normal prob": -0.8603014945983887, "Positive Loss": 0.006925926078110933, "Positive prob": -0.006925926078110933, "epoch": 1.9701492537313432, "step": 66 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0001871585845947, "Normal prob": -1.0001871585845947, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 1.9701492537313432, "step": 66 }, { "epoch": 2.0, "grad_norm": 2.1586739039579803, "learning_rate": 2.4621212121212125e-06, "loss": 1.6944, "step": 67 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.660470187664032, "Normal prob": -0.660470187664032, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.0, "step": 67 }, { "DPO Loss": 2.795109238712382e-09, "Negative Geometric Mean": -17.994459833795013, "Negative prob": -17.994459833795013, "Normal Loss": 0.5924752354621887, "Normal prob": -0.5924752354621887, "Positive Loss": 0.007375000044703484, "Positive prob": -0.007375000044703484, "epoch": 2.0, "step": 67 }, { "epoch": 2.029850746268657, "grad_norm": 2.0311133700966093, "learning_rate": 2.4242424242424244e-06, "loss": 1.2332, "step": 68 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.30464887619018555, "Normal prob": -0.30464887619018555, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.029850746268657, "step": 68 }, { "DPO Loss": 3.3765403651445788e-06, "Negative Geometric Mean": -10.989200863930886, "Negative prob": -10.989200863930886, "Normal Loss": 0.5390244126319885, "Normal prob": -0.5390244126319885, "Positive Loss": 0.004907407332211733, "Positive prob": -0.004907407332211733, "epoch": 2.029850746268657, "step": 68 }, { "epoch": 2.0597014925373136, "grad_norm": 1.9315697106843346, "learning_rate": 2.3863636363636367e-06, "loss": 1.1108, "step": 69 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.49376991391181946, "Normal prob": -0.49376991391181946, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.0597014925373136, "step": 69 }, { "DPO Loss": 1.0930880593796741e-09, "Negative Geometric Mean": -19.382113821138212, "Negative prob": -19.382113821138212, "Normal Loss": 0.956488847732544, "Normal prob": -0.956488847732544, "Positive Loss": 0.005253623239696026, "Positive prob": -0.005253623239696026, "epoch": 2.0597014925373136, "step": 69 }, { "epoch": 2.08955223880597, "grad_norm": 1.7728900772175493, "learning_rate": 2.348484848484849e-06, "loss": 1.5013, "step": 70 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6218942403793335, "Normal prob": -0.6218942403793335, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.08955223880597, "step": 70 }, { "DPO Loss": 3.561967749923965e-05, "Negative Geometric Mean": -8.606498194945848, "Negative prob": -8.606498194945848, "Normal Loss": 0.5527488589286804, "Normal prob": -0.5527488589286804, "Positive Loss": 0.00751700671389699, "Positive prob": -0.00751700671389699, "epoch": 2.08955223880597, "step": 70 }, { "epoch": 2.1194029850746268, "grad_norm": 1.826781792071871, "learning_rate": 2.310606060606061e-06, "loss": 1.454, "step": 71 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.608776867389679, "Normal prob": -0.608776867389679, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.1194029850746268, "step": 71 }, { "DPO Loss": 1.2300846750239835e-09, "Negative Geometric Mean": -18.6280193236715, "Negative prob": -18.6280193236715, "Normal Loss": 0.32436975836753845, "Normal prob": -0.32436975836753845, "Positive Loss": 0.0032526878640055656, "Positive prob": -0.0032526878640055656, "epoch": 2.1194029850746268, "step": 71 }, { "epoch": 2.1492537313432836, "grad_norm": 1.9825051027124971, "learning_rate": 2.2727272727272728e-06, "loss": 1.1902, "step": 72 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8769010305404663, "Normal prob": -0.8769010305404663, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.1492537313432836, "step": 72 }, { "DPO Loss": 3.4191614042999946e-10, "Negative Geometric Mean": -20.10161662817552, "Negative prob": -20.10161662817552, "Normal Loss": 0.36518046259880066, "Normal prob": -0.36518046259880066, "Positive Loss": 0.00548913050442934, "Positive prob": -0.00548913050442934, "epoch": 2.1492537313432836, "step": 72 }, { "epoch": 2.1791044776119404, "grad_norm": 1.901526952951394, "learning_rate": 2.234848484848485e-06, "loss": 1.189, "step": 73 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5985037684440613, "Normal prob": -0.5985037684440613, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.1791044776119404, "step": 73 }, { "DPO Loss": 1.5418231055074745e-05, "Negative Geometric Mean": -9.411764705882353, "Negative prob": -9.411764705882353, "Normal Loss": 0.5556577444076538, "Normal prob": -0.5556577444076538, "Positive Loss": 0.0051428573206067085, "Positive prob": -0.0051428573206067085, "epoch": 2.1791044776119404, "step": 73 }, { "epoch": 2.208955223880597, "grad_norm": 1.9121864314767125, "learning_rate": 2.196969696969697e-06, "loss": 1.4241, "step": 74 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4503432512283325, "Normal prob": -0.4503432512283325, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.208955223880597, "step": 74 }, { "DPO Loss": 1.8289985890922114e-07, "Negative Geometric Mean": -14.016528925619834, "Negative prob": -14.016528925619834, "Normal Loss": 0.7023761868476868, "Normal prob": -0.7023761868476868, "Positive Loss": 0.008802083320915699, "Positive prob": -0.008802083320915699, "epoch": 2.208955223880597, "step": 74 }, { "epoch": 2.2388059701492535, "grad_norm": 2.0475067135738048, "learning_rate": 2.1590909090909092e-06, "loss": 1.0604, "step": 75 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8230820894241333, "Normal prob": -0.8230820894241333, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.2388059701492535, "step": 75 }, { "DPO Loss": 5.0341293179959425e-09, "Negative Geometric Mean": -17.27363184079602, "Negative prob": -17.27363184079602, "Normal Loss": 0.5400981903076172, "Normal prob": -0.5400981903076172, "Positive Loss": 0.005451807286590338, "Positive prob": -0.005451807286590338, "epoch": 2.2388059701492535, "step": 75 }, { "epoch": 2.2686567164179103, "grad_norm": 2.1967311931865887, "learning_rate": 2.1212121212121216e-06, "loss": 1.3192, "step": 76 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5391436815261841, "Normal prob": -0.5391436815261841, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.2686567164179103, "step": 76 }, { "DPO Loss": 2.2653410284413853e-10, "Negative Geometric Mean": -20.210526315789473, "Negative prob": -20.210526315789473, "Normal Loss": 0.5660699009895325, "Normal prob": -0.5660699009895325, "Positive Loss": 0.0037499999161809683, "Positive prob": -0.0037499999161809683, "epoch": 2.2686567164179103, "step": 76 }, { "epoch": 2.298507462686567, "grad_norm": 1.9967518595691778, "learning_rate": 2.0833333333333334e-06, "loss": 1.1468, "step": 77 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6178169846534729, "Normal prob": -0.6178169846534729, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.298507462686567, "step": 77 }, { "DPO Loss": 1.2840515325935314e-10, "Negative Geometric Mean": -21.063291139240505, "Negative prob": -21.063291139240505, "Normal Loss": 0.8663536906242371, "Normal prob": -0.8663536906242371, "Positive Loss": 0.007692307699471712, "Positive prob": -0.007692307699471712, "epoch": 2.298507462686567, "step": 77 }, { "epoch": 2.328358208955224, "grad_norm": 2.049865126116338, "learning_rate": 2.0454545454545457e-06, "loss": 1.3278, "step": 78 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7724137902259827, "Normal prob": -0.7724137902259827, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.328358208955224, "step": 78 }, { "DPO Loss": 5.41321637733525e-09, "Negative Geometric Mean": -17.454545454545453, "Negative prob": -17.454545454545453, "Normal Loss": 0.7771220803260803, "Normal prob": -0.7771220803260803, "Positive Loss": 0.004117647185921669, "Positive prob": -0.004117647185921669, "epoch": 2.328358208955224, "step": 78 }, { "epoch": 2.3582089552238807, "grad_norm": 2.1656269684919547, "learning_rate": 2.0075757575757576e-06, "loss": 1.1424, "step": 79 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6516613364219666, "Normal prob": -0.6516613364219666, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.3582089552238807, "step": 79 }, { "DPO Loss": 1.6457955158574946e-05, "Negative Geometric Mean": -9.305439330543933, "Negative prob": -9.305439330543933, "Normal Loss": 0.3754848837852478, "Normal prob": -0.3754848837852478, "Positive Loss": 0.003989361692219973, "Positive prob": -0.003989361692219973, "epoch": 2.3582089552238807, "step": 79 }, { "epoch": 2.388059701492537, "grad_norm": 2.092361435006071, "learning_rate": 1.96969696969697e-06, "loss": 1.0891, "step": 80 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6648983359336853, "Normal prob": -0.6648983359336853, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.388059701492537, "step": 80 }, { "DPO Loss": 1.8778874028992665e-10, "Negative Geometric Mean": -20.96551724137931, "Negative prob": -20.96551724137931, "Normal Loss": 0.7357211709022522, "Normal prob": -0.7357211709022522, "Positive Loss": 0.004262295085936785, "Positive prob": -0.004262295085936785, "epoch": 2.388059701492537, "step": 80 }, { "epoch": 2.417910447761194, "grad_norm": 2.1628073754065253, "learning_rate": 1.931818181818182e-06, "loss": 1.3322, "step": 81 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4883902966976166, "Normal prob": -0.4883902966976166, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.417910447761194, "step": 81 }, { "DPO Loss": 3.535443508732329e-06, "Negative Geometric Mean": -10.894586894586894, "Negative prob": -10.894586894586894, "Normal Loss": 0.7335423231124878, "Normal prob": -0.7335423231124878, "Positive Loss": 0.015023922547698021, "Positive prob": -0.015023922547698021, "epoch": 2.417910447761194, "step": 81 }, { "epoch": 2.4477611940298507, "grad_norm": 1.9738732649321062, "learning_rate": 1.8939393939393941e-06, "loss": 1.1511, "step": 82 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6825304627418518, "Normal prob": -0.6825304627418518, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.4477611940298507, "step": 82 }, { "DPO Loss": 4.576269364710466e-06, "Negative Geometric Mean": -10.524444444444445, "Negative prob": -10.524444444444445, "Normal Loss": 0.678820013999939, "Normal prob": -0.678820013999939, "Positive Loss": 0.004888392984867096, "Positive prob": -0.004888392984867096, "epoch": 2.4477611940298507, "step": 82 }, { "epoch": 2.4776119402985075, "grad_norm": 1.8948734143444266, "learning_rate": 1.8560606060606062e-06, "loss": 1.1637, "step": 83 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 1.0566037893295288, "Normal prob": -1.0566037893295288, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.4776119402985075, "step": 83 }, { "DPO Loss": 2.1392874161708599e-10, "Negative Geometric Mean": -20.55045871559633, "Negative prob": -20.55045871559633, "Normal Loss": 0.6912804245948792, "Normal prob": -0.6912804245948792, "Positive Loss": 0.006160713732242584, "Positive prob": -0.006160713732242584, "epoch": 2.4776119402985075, "step": 83 }, { "epoch": 2.5074626865671643, "grad_norm": 1.9220719621785174, "learning_rate": 1.8181818181818183e-06, "loss": 1.5396, "step": 84 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7508878707885742, "Normal prob": -0.7508878707885742, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.5074626865671643, "step": 84 }, { "DPO Loss": 6.15847736230281e-09, "Negative Geometric Mean": -17.133858267716537, "Negative prob": -17.133858267716537, "Normal Loss": 0.5112016201019287, "Normal prob": -0.5112016201019287, "Positive Loss": 0.008045454509556293, "Positive prob": -0.008045454509556293, "epoch": 2.5074626865671643, "step": 84 }, { "epoch": 2.5373134328358207, "grad_norm": 1.7994903843307968, "learning_rate": 1.7803030303030306e-06, "loss": 1.0922, "step": 85 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6117647290229797, "Normal prob": -0.6117647290229797, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.5373134328358207, "step": 85 }, { "DPO Loss": 3.7454324156780054e-10, "Negative Geometric Mean": -20.236190476190476, "Negative prob": -20.236190476190476, "Normal Loss": 0.6880466341972351, "Normal prob": -0.6880466341972351, "Positive Loss": 0.006770832929760218, "Positive prob": -0.006770832929760218, "epoch": 2.5373134328358207, "step": 85 }, { "epoch": 2.5671641791044775, "grad_norm": 2.0725411062109917, "learning_rate": 1.7424242424242427e-06, "loss": 1.2207, "step": 86 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.613942563533783, "Normal prob": -0.613942563533783, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.5671641791044775, "step": 86 }, { "DPO Loss": 7.096156997863154e-11, "Negative Geometric Mean": -21.55440414507772, "Negative prob": -21.55440414507772, "Normal Loss": 0.7485380172729492, "Normal prob": -0.7485380172729492, "Positive Loss": 0.0013281250139698386, "Positive prob": -0.0013281250139698386, "epoch": 2.5671641791044775, "step": 86 }, { "epoch": 2.5970149253731343, "grad_norm": 2.267950642529402, "learning_rate": 1.7045454545454546e-06, "loss": 1.2643, "step": 87 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6565449237823486, "Normal prob": -0.6565449237823486, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.5970149253731343, "step": 87 }, { "DPO Loss": 2.436813554454401e-07, "Negative Geometric Mean": -13.447852760736197, "Negative prob": -13.447852760736197, "Normal Loss": 0.5056726336479187, "Normal prob": -0.5056726336479187, "Positive Loss": 0.005249999463558197, "Positive prob": -0.005249999463558197, "epoch": 2.5970149253731343, "step": 87 }, { "epoch": 2.626865671641791, "grad_norm": 2.0183179819909824, "learning_rate": 1.6666666666666667e-06, "loss": 1.1806, "step": 88 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6031413674354553, "Normal prob": -0.6031413674354553, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.626865671641791, "step": 88 }, { "DPO Loss": 3.7345149202262504e-11, "Negative Geometric Mean": -22.166666666666668, "Negative prob": -22.166666666666668, "Normal Loss": 0.5758354663848877, "Normal prob": -0.5758354663848877, "Positive Loss": 0.008958333171904087, "Positive prob": -0.008958333171904087, "epoch": 2.626865671641791, "step": 88 }, { "epoch": 2.656716417910448, "grad_norm": 1.9544945233097493, "learning_rate": 1.628787878787879e-06, "loss": 1.4476, "step": 89 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5048255324363708, "Normal prob": -0.5048255324363708, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.656716417910448, "step": 89 }, { "DPO Loss": 1.0769485739142802e-07, "Negative Geometric Mean": -14.51051051051051, "Negative prob": -14.51051051051051, "Normal Loss": 0.4972894787788391, "Normal prob": -0.4972894787788391, "Positive Loss": 0.006953124888241291, "Positive prob": -0.006953124888241291, "epoch": 2.656716417910448, "step": 89 }, { "epoch": 2.6865671641791042, "grad_norm": 2.0315371233639605, "learning_rate": 1.590909090909091e-06, "loss": 1.2496, "step": 90 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7242281436920166, "Normal prob": -0.7242281436920166, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.6865671641791042, "step": 90 }, { "DPO Loss": 6.913702745333522e-10, "Negative Geometric Mean": -19.575418994413408, "Negative prob": -19.575418994413408, "Normal Loss": 0.5063291192054749, "Normal prob": -0.5063291192054749, "Positive Loss": 0.005833332892507315, "Positive prob": -0.005833332892507315, "epoch": 2.6865671641791042, "step": 90 }, { "epoch": 2.716417910447761, "grad_norm": 1.8740490605509472, "learning_rate": 1.5530303030303032e-06, "loss": 1.1379, "step": 91 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6402753591537476, "Normal prob": -0.6402753591537476, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.716417910447761, "step": 91 }, { "DPO Loss": 7.833580502750123e-08, "Negative Geometric Mean": -14.602495543672013, "Negative prob": -14.602495543672013, "Normal Loss": 0.5074971318244934, "Normal prob": -0.5074971318244934, "Positive Loss": 0.006207864731550217, "Positive prob": -0.006207864731550217, "epoch": 2.716417910447761, "step": 91 }, { "epoch": 2.746268656716418, "grad_norm": 2.0558492126355015, "learning_rate": 1.5151515151515152e-06, "loss": 1.2436, "step": 92 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6279550194740295, "Normal prob": -0.6279550194740295, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.746268656716418, "step": 92 }, { "DPO Loss": 4.4534060233937953e-07, "Negative Geometric Mean": -12.918032786885245, "Negative prob": -12.918032786885245, "Normal Loss": 0.6633106470108032, "Normal prob": -0.6633106470108032, "Positive Loss": 0.006970587652176619, "Positive prob": -0.006970587652176619, "epoch": 2.746268656716418, "step": 92 }, { "epoch": 2.7761194029850746, "grad_norm": 1.9381588902563873, "learning_rate": 1.4772727272727275e-06, "loss": 1.4662, "step": 93 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8252788186073303, "Normal prob": -0.8252788186073303, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.7761194029850746, "step": 93 }, { "DPO Loss": 1.7154770529142717e-09, "Negative Geometric Mean": -18.31686541737649, "Negative prob": -18.31686541737649, "Normal Loss": 0.4556961953639984, "Normal prob": -0.4556961953639984, "Positive Loss": 0.00785046722739935, "Positive prob": -0.00785046722739935, "epoch": 2.7761194029850746, "step": 93 }, { "epoch": 2.8059701492537314, "grad_norm": 2.0354631342670952, "learning_rate": 1.4393939393939396e-06, "loss": 1.2155, "step": 94 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5697329640388489, "Normal prob": -0.5697329640388489, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.8059701492537314, "step": 94 }, { "DPO Loss": 2.1559920081838255e-09, "Negative Geometric Mean": -18.059405940594058, "Negative prob": -18.059405940594058, "Normal Loss": 0.47756874561309814, "Normal prob": -0.47756874561309814, "Positive Loss": 0.006666666828095913, "Positive prob": -0.006666666828095913, "epoch": 2.8059701492537314, "step": 94 }, { "epoch": 2.835820895522388, "grad_norm": 2.070312477732604, "learning_rate": 1.4015151515151515e-06, "loss": 1.0389, "step": 95 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.570941150188446, "Normal prob": -0.570941150188446, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.835820895522388, "step": 95 }, { "DPO Loss": 1.3975915802068097e-08, "Negative Geometric Mean": -16.148423005565864, "Negative prob": -16.148423005565864, "Normal Loss": 0.8247422575950623, "Normal prob": -0.8247422575950623, "Positive Loss": 0.006149999797344208, "Positive prob": -0.006149999797344208, "epoch": 2.835820895522388, "step": 95 }, { "epoch": 2.8656716417910446, "grad_norm": 2.148324382578434, "learning_rate": 1.3636363636363636e-06, "loss": 1.3602, "step": 96 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.41602247953414917, "Normal prob": -0.41602247953414917, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.8656716417910446, "step": 96 }, { "DPO Loss": 1.4832332463114162e-08, "Negative Geometric Mean": -16.390243902439025, "Negative prob": -16.390243902439025, "Normal Loss": 0.6726162433624268, "Normal prob": -0.6726162433624268, "Positive Loss": 0.004892241209745407, "Positive prob": -0.004892241209745407, "epoch": 2.8656716417910446, "step": 96 }, { "epoch": 2.8955223880597014, "grad_norm": 2.0518611453442914, "learning_rate": 1.3257575757575757e-06, "loss": 1.2756, "step": 97 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4884667694568634, "Normal prob": -0.4884667694568634, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.8955223880597014, "step": 97 }, { "DPO Loss": 2.021944833989896e-11, "Negative Geometric Mean": -22.63862332695985, "Negative prob": -22.63862332695985, "Normal Loss": 0.6506666541099548, "Normal prob": -0.6506666541099548, "Positive Loss": 0.0039835162460803986, "Positive prob": -0.0039835162460803986, "epoch": 2.8955223880597014, "step": 97 }, { "epoch": 2.925373134328358, "grad_norm": 1.999284468302419, "learning_rate": 1.287878787878788e-06, "loss": 1.2692, "step": 98 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6194643378257751, "Normal prob": -0.6194643378257751, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.925373134328358, "step": 98 }, { "DPO Loss": 1.609763433792714e-11, "Negative Geometric Mean": -22.949494949494948, "Negative prob": -22.949494949494948, "Normal Loss": 0.6997876763343811, "Normal prob": -0.6997876763343811, "Positive Loss": 0.0046562496572732925, "Positive prob": -0.0046562496572732925, "epoch": 2.925373134328358, "step": 98 }, { "epoch": 2.955223880597015, "grad_norm": 2.219383329638975, "learning_rate": 1.25e-06, "loss": 1.4134, "step": 99 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5010309219360352, "Normal prob": -0.5010309219360352, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.955223880597015, "step": 99 }, { "DPO Loss": 3.5758823653111824e-09, "Negative Geometric Mean": -17.847246891651864, "Negative prob": -17.847246891651864, "Normal Loss": 1.1492958068847656, "Normal prob": -1.1492958068847656, "Positive Loss": 0.00566666666418314, "Positive prob": -0.00566666666418314, "epoch": 2.955223880597015, "step": 99 }, { "epoch": 2.9850746268656714, "grad_norm": 1.866821925385724, "learning_rate": 1.2121212121212122e-06, "loss": 1.5138, "step": 100 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5581395626068115, "Normal prob": -0.5581395626068115, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.9850746268656714, "step": 100 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.613131582736969, "Normal prob": -0.613131582736969, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 2.9850746268656714, "step": 100 }, { "epoch": 3.014925373134328, "grad_norm": 1.9205095889539987, "learning_rate": 1.1742424242424245e-06, "loss": 1.1743, "step": 101 }, { "DPO Loss": 1.5580032831565753e-10, "Negative Geometric Mean": -21.77777777777778, "Negative prob": -21.77777777777778, "Normal Loss": 0.4700824022293091, "Normal prob": -0.4700824022293091, "Positive Loss": 0.003035714151337743, "Positive prob": -0.003035714151337743, "epoch": 3.014925373134328, "step": 101 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5164644718170166, "Normal prob": -0.5164644718170166, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.014925373134328, "step": 101 }, { "epoch": 3.044776119402985, "grad_norm": 2.398506630489912, "learning_rate": 1.1363636363636364e-06, "loss": 0.9691, "step": 102 }, { "DPO Loss": 1.44865623442931e-08, "Negative Geometric Mean": -16.54901960784314, "Negative prob": -16.54901960784314, "Normal Loss": 0.28256332874298096, "Normal prob": -0.28256332874298096, "Positive Loss": 0.011323529295623302, "Positive prob": -0.011323529295623302, "epoch": 3.044776119402985, "step": 102 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4212121069431305, "Normal prob": -0.4212121069431305, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.044776119402985, "step": 102 }, { "epoch": 3.074626865671642, "grad_norm": 2.347890205119244, "learning_rate": 1.0984848484848485e-06, "loss": 0.9348, "step": 103 }, { "DPO Loss": 1.8560680536222348e-06, "Negative Geometric Mean": -11.405940594059405, "Negative prob": -11.405940594059405, "Normal Loss": 0.9324009418487549, "Normal prob": -0.9324009418487549, "Positive Loss": 0.005365168210119009, "Positive prob": -0.005365168210119009, "epoch": 3.074626865671642, "step": 103 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3442211151123047, "Normal prob": -0.3442211151123047, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.074626865671642, "step": 103 }, { "epoch": 3.1044776119402986, "grad_norm": 2.359188892156426, "learning_rate": 1.0606060606060608e-06, "loss": 1.0781, "step": 104 }, { "DPO Loss": 1.9647505846419055e-11, "Negative Geometric Mean": -22.793296089385475, "Negative prob": -22.793296089385475, "Normal Loss": 0.2681061625480652, "Normal prob": -0.2681061625480652, "Positive Loss": 0.005217391066253185, "Positive prob": -0.005217391066253185, "epoch": 3.1044776119402986, "step": 104 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3617348372936249, "Normal prob": -0.3617348372936249, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.1044776119402986, "step": 104 }, { "epoch": 3.1343283582089554, "grad_norm": 1.9817390653042575, "learning_rate": 1.0227272727272729e-06, "loss": 0.9282, "step": 105 }, { "DPO Loss": 4.53512291861469e-08, "Negative Geometric Mean": -15.375375375375375, "Negative prob": -15.375375375375375, "Normal Loss": 0.7424152493476868, "Normal prob": -0.7424152493476868, "Positive Loss": 0.005976562388241291, "Positive prob": -0.005976562388241291, "epoch": 3.1343283582089554, "step": 105 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5832012891769409, "Normal prob": -0.5832012891769409, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.1343283582089554, "step": 105 }, { "epoch": 3.1641791044776117, "grad_norm": 1.740772729450397, "learning_rate": 9.84848484848485e-07, "loss": 1.3153, "step": 106 }, { "DPO Loss": 2.1648668767386283e-07, "Negative Geometric Mean": -13.639344262295081, "Negative prob": -13.639344262295081, "Normal Loss": 0.5162454843521118, "Normal prob": -0.5162454843521118, "Positive Loss": 0.006029411219060421, "Positive prob": -0.006029411219060421, "epoch": 3.1641791044776117, "step": 106 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.7083825469017029, "Normal prob": -0.7083825469017029, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.1641791044776117, "step": 106 }, { "epoch": 3.1940298507462686, "grad_norm": 2.131379055827505, "learning_rate": 9.469696969696971e-07, "loss": 1.0913, "step": 107 }, { "DPO Loss": 1.9380008710813674e-11, "Negative Geometric Mean": -23.414634146341463, "Negative prob": -23.414634146341463, "Normal Loss": 0.5913370847702026, "Normal prob": -0.5913370847702026, "Positive Loss": 0.004039854742586613, "Positive prob": -0.004039854742586613, "epoch": 3.1940298507462686, "step": 107 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5914691686630249, "Normal prob": -0.5914691686630249, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.1940298507462686, "step": 107 }, { "epoch": 3.2238805970149254, "grad_norm": 1.9202006353881724, "learning_rate": 9.090909090909091e-07, "loss": 1.1011, "step": 108 }, { "DPO Loss": 4.580912965076425e-09, "Negative Geometric Mean": -17.609467455621303, "Negative prob": -17.609467455621303, "Normal Loss": 0.42352941632270813, "Normal prob": -0.42352941632270813, "Positive Loss": 0.003362676128745079, "Positive prob": -0.003362676128745079, "epoch": 3.2238805970149254, "step": 108 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6711342930793762, "Normal prob": -0.6711342930793762, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.2238805970149254, "step": 108 }, { "epoch": 3.253731343283582, "grad_norm": 2.1185111009526474, "learning_rate": 8.712121212121213e-07, "loss": 1.2732, "step": 109 }, { "DPO Loss": 4.787739095215943e-11, "Negative Geometric Mean": -21.968652037617556, "Negative prob": -21.968652037617556, "Normal Loss": 0.5764660835266113, "Normal prob": -0.5764660835266113, "Positive Loss": 0.0056589143350720406, "Positive prob": -0.0056589143350720406, "epoch": 3.253731343283582, "step": 109 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.49819204211235046, "Normal prob": -0.49819204211235046, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.253731343283582, "step": 109 }, { "epoch": 3.283582089552239, "grad_norm": 2.2180222345911083, "learning_rate": 8.333333333333333e-07, "loss": 1.0525, "step": 110 }, { "DPO Loss": 3.6084690791622813e-12, "Negative Geometric Mean": -24.202729044834307, "Negative prob": -24.202729044834307, "Normal Loss": 0.4346349835395813, "Normal prob": -0.4346349835395813, "Positive Loss": 0.005865384358912706, "Positive prob": -0.005865384358912706, "epoch": 3.283582089552239, "step": 110 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.47032222151756287, "Normal prob": -0.47032222151756287, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.283582089552239, "step": 110 }, { "epoch": 3.3134328358208958, "grad_norm": 1.9203296198811195, "learning_rate": 7.954545454545455e-07, "loss": 0.9339, "step": 111 }, { "DPO Loss": 2.6897354919129347e-08, "Negative Geometric Mean": -15.874015748031496, "Negative prob": -15.874015748031496, "Normal Loss": 0.22816593945026398, "Normal prob": -0.22816593945026398, "Positive Loss": 0.004649122711271048, "Positive prob": -0.004649122711271048, "epoch": 3.3134328358208958, "step": 111 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4344242513179779, "Normal prob": -0.4344242513179779, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.3134328358208958, "step": 111 }, { "epoch": 3.343283582089552, "grad_norm": 2.1609689345068044, "learning_rate": 7.575757575757576e-07, "loss": 0.8455, "step": 112 }, { "DPO Loss": 2.6240920551036232e-11, "Negative Geometric Mean": -22.54922279792746, "Negative prob": -22.54922279792746, "Normal Loss": 0.5216197371482849, "Normal prob": -0.5216197371482849, "Positive Loss": 0.000907451962120831, "Positive prob": -0.000907451962120831, "epoch": 3.343283582089552, "step": 112 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.2542831301689148, "Normal prob": -0.2542831301689148, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.343283582089552, "step": 112 }, { "epoch": 3.373134328358209, "grad_norm": 2.2176774644389647, "learning_rate": 7.196969696969698e-07, "loss": 1.2494, "step": 113 }, { "DPO Loss": 7.933165156285787e-10, "Negative Geometric Mean": -19.304918032786887, "Negative prob": -19.304918032786887, "Normal Loss": 0.6469248533248901, "Normal prob": -0.6469248533248901, "Positive Loss": 0.004204545170068741, "Positive prob": -0.004204545170068741, "epoch": 3.373134328358209, "step": 113 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.8203546404838562, "Normal prob": -0.8203546404838562, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.373134328358209, "step": 113 }, { "epoch": 3.4029850746268657, "grad_norm": 2.282965185136383, "learning_rate": 6.818181818181818e-07, "loss": 1.1533, "step": 114 }, { "DPO Loss": 1.5624834760585283e-11, "Negative Geometric Mean": -23.169620253164556, "Negative prob": -23.169620253164556, "Normal Loss": 0.35404255986213684, "Normal prob": -0.35404255986213684, "Positive Loss": 0.003926281817257404, "Positive prob": -0.003926281817257404, "epoch": 3.4029850746268657, "step": 114 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4111521542072296, "Normal prob": -0.4111521542072296, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.4029850746268657, "step": 114 }, { "epoch": 3.4328358208955225, "grad_norm": 2.5031875324236887, "learning_rate": 6.43939393939394e-07, "loss": 1.0183, "step": 115 }, { "DPO Loss": 6.8512084896570295e-12, "Negative Geometric Mean": -24.387096774193548, "Negative prob": -24.387096774193548, "Normal Loss": 0.4786564111709595, "Normal prob": -0.4786564111709595, "Positive Loss": 0.004000000189989805, "Positive prob": -0.004000000189989805, "epoch": 3.4328358208955225, "step": 115 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.36766186356544495, "Normal prob": -0.36766186356544495, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.4328358208955225, "step": 115 }, { "epoch": 3.4626865671641793, "grad_norm": 2.163496184787407, "learning_rate": 6.060606060606061e-07, "loss": 0.9872, "step": 116 }, { "DPO Loss": 1.0654309291727694e-08, "Negative Geometric Mean": -16.859504132231404, "Negative prob": -16.859504132231404, "Normal Loss": 0.604774534702301, "Normal prob": -0.604774534702301, "Positive Loss": 0.007083333097398281, "Positive prob": -0.007083333097398281, "epoch": 3.4626865671641793, "step": 116 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4578120708465576, "Normal prob": -0.4578120708465576, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.4626865671641793, "step": 116 }, { "epoch": 3.4925373134328357, "grad_norm": 2.406443323866471, "learning_rate": 5.681818181818182e-07, "loss": 1.207, "step": 117 }, { "DPO Loss": 3.0777376250523913e-10, "Negative Geometric Mean": -20.50485436893204, "Negative prob": -20.50485436893204, "Normal Loss": 0.4896000027656555, "Normal prob": -0.4896000027656555, "Positive Loss": 0.004831932485103607, "Positive prob": -0.004831932485103607, "epoch": 3.4925373134328357, "step": 117 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5456368923187256, "Normal prob": -0.5456368923187256, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.4925373134328357, "step": 117 }, { "epoch": 3.5223880597014925, "grad_norm": 2.2690867581296876, "learning_rate": 5.303030303030304e-07, "loss": 0.8877, "step": 118 }, { "DPO Loss": 2.407718380157414e-05, "Negative Geometric Mean": -8.941843971631206, "Negative prob": -8.941843971631206, "Normal Loss": 0.7953816652297974, "Normal prob": -0.7953816652297974, "Positive Loss": 0.0020000000949949026, "Positive prob": -0.0020000000949949026, "epoch": 3.5223880597014925, "step": 118 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.45923912525177, "Normal prob": -0.45923912525177, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.5223880597014925, "step": 118 }, { "epoch": 3.5522388059701493, "grad_norm": 2.082904753100527, "learning_rate": 4.924242424242425e-07, "loss": 1.1249, "step": 119 }, { "DPO Loss": 9.226459600590658e-11, "Negative Geometric Mean": -21.457725947521865, "Negative prob": -21.457725947521865, "Normal Loss": 0.7009803652763367, "Normal prob": -0.7009803652763367, "Positive Loss": 0.003613861510530114, "Positive prob": -0.003613861510530114, "epoch": 3.5522388059701493, "step": 119 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.40026333928108215, "Normal prob": -0.40026333928108215, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.5522388059701493, "step": 119 }, { "epoch": 3.582089552238806, "grad_norm": 2.073003302129352, "learning_rate": 4.5454545454545457e-07, "loss": 0.9345, "step": 120 }, { "DPO Loss": 1.3578722599998166e-10, "Negative Geometric Mean": -21.41732283464567, "Negative prob": -21.41732283464567, "Normal Loss": 0.27029654383659363, "Normal prob": -0.27029654383659363, "Positive Loss": 0.006812499836087227, "Positive prob": -0.006812499836087227, "epoch": 3.582089552238806, "step": 120 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6213314533233643, "Normal prob": -0.6213314533233643, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.582089552238806, "step": 120 }, { "epoch": 3.611940298507463, "grad_norm": 2.068480170444558, "learning_rate": 4.1666666666666667e-07, "loss": 0.9623, "step": 121 }, { "DPO Loss": 9.670490344190136e-10, "Negative Geometric Mean": -18.966565349544073, "Negative prob": -18.966565349544073, "Normal Loss": 0.3237774074077606, "Normal prob": -0.3237774074077606, "Positive Loss": 0.011749999597668648, "Positive prob": -0.011749999597668648, "epoch": 3.611940298507463, "step": 121 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5593419671058655, "Normal prob": -0.5593419671058655, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.611940298507463, "step": 121 }, { "epoch": 3.6417910447761193, "grad_norm": 2.1524327376787213, "learning_rate": 3.787878787878788e-07, "loss": 1.1055, "step": 122 }, { "DPO Loss": 5.7797366909189613e-11, "Negative Geometric Mean": -21.995565410199557, "Negative prob": -21.995565410199557, "Normal Loss": 0.5489404797554016, "Normal prob": -0.5489404797554016, "Positive Loss": 0.003525280859321356, "Positive prob": -0.003525280859321356, "epoch": 3.6417910447761193, "step": 122 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6111933588981628, "Normal prob": -0.6111933588981628, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.6417910447761193, "step": 122 }, { "epoch": 3.671641791044776, "grad_norm": 2.0175223671713285, "learning_rate": 3.409090909090909e-07, "loss": 1.0023, "step": 123 }, { "DPO Loss": 2.8380273726950947e-10, "Negative Geometric Mean": -20.266666666666666, "Negative prob": -20.266666666666666, "Normal Loss": 0.7033638954162598, "Normal prob": -0.7033638954162598, "Positive Loss": 0.003409090917557478, "Positive prob": -0.003409090917557478, "epoch": 3.671641791044776, "step": 123 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.46175718307495117, "Normal prob": -0.46175718307495117, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.671641791044776, "step": 123 }, { "epoch": 3.701492537313433, "grad_norm": 1.9066080510483667, "learning_rate": 3.0303030303030305e-07, "loss": 0.9467, "step": 124 }, { "DPO Loss": 1.5647971808422086e-11, "Negative Geometric Mean": -23.17837837837838, "Negative prob": -23.17837837837838, "Normal Loss": 0.286995530128479, "Normal prob": -0.286995530128479, "Positive Loss": 0.002824073890224099, "Positive prob": -0.002824073890224099, "epoch": 3.701492537313433, "step": 124 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3671291470527649, "Normal prob": -0.3671291470527649, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.701492537313433, "step": 124 }, { "epoch": 3.7313432835820897, "grad_norm": 2.531083759946281, "learning_rate": 2.651515151515152e-07, "loss": 0.7514, "step": 125 }, { "DPO Loss": 1.7525048080927675e-11, "Negative Geometric Mean": -22.879227053140095, "Negative prob": -22.879227053140095, "Normal Loss": 0.3598875403404236, "Normal prob": -0.3598875403404236, "Positive Loss": 0.0022849461529403925, "Positive prob": -0.0022849461529403925, "epoch": 3.7313432835820897, "step": 125 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.3959847688674927, "Normal prob": -0.3959847688674927, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.7313432835820897, "step": 125 }, { "epoch": 3.7611940298507465, "grad_norm": 2.0517064716337616, "learning_rate": 2.2727272727272729e-07, "loss": 0.9947, "step": 126 }, { "DPO Loss": 2.7711943854600904e-11, "Negative Geometric Mean": -22.61431870669746, "Negative prob": -22.61431870669746, "Normal Loss": 0.5822654962539673, "Normal prob": -0.5822654962539673, "Positive Loss": 0.004266304429620504, "Positive prob": -0.004266304429620504, "epoch": 3.7611940298507465, "step": 126 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.34913185238838196, "Normal prob": -0.34913185238838196, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.7611940298507465, "step": 126 }, { "epoch": 3.791044776119403, "grad_norm": 2.025401136197227, "learning_rate": 1.893939393939394e-07, "loss": 0.87, "step": 127 }, { "DPO Loss": 1.311506237805117e-10, "Negative Geometric Mean": -21.039755351681958, "Negative prob": -21.039755351681958, "Normal Loss": 0.23188406229019165, "Normal prob": -0.23188406229019165, "Positive Loss": 0.0052678571082651615, "Positive prob": -0.0052678571082651615, "epoch": 3.791044776119403, "step": 127 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6681604981422424, "Normal prob": -0.6681604981422424, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.791044776119403, "step": 127 }, { "epoch": 3.8208955223880596, "grad_norm": 2.242147138154214, "learning_rate": 1.5151515151515152e-07, "loss": 0.9392, "step": 128 }, { "DPO Loss": 1.4861272720822845e-06, "Negative Geometric Mean": -11.783393501805055, "Negative prob": -11.783393501805055, "Normal Loss": 0.6964818239212036, "Normal prob": -0.6964818239212036, "Positive Loss": 0.00632653059437871, "Positive prob": -0.00632653059437871, "epoch": 3.8208955223880596, "step": 128 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.5179063081741333, "Normal prob": -0.5179063081741333, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.8208955223880596, "step": 128 }, { "epoch": 3.8507462686567164, "grad_norm": 1.9270522647390511, "learning_rate": 1.1363636363636364e-07, "loss": 1.0487, "step": 129 }, { "DPO Loss": 4.898483852773752e-10, "Negative Geometric Mean": -19.74721189591078, "Negative prob": -19.74721189591078, "Normal Loss": 0.6805970072746277, "Normal prob": -0.6805970072746277, "Positive Loss": 0.006926605477929115, "Positive prob": -0.006926605477929115, "epoch": 3.8507462686567164, "step": 129 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4078023433685303, "Normal prob": -0.4078023433685303, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.8507462686567164, "step": 129 }, { "epoch": 3.8805970149253732, "grad_norm": 2.0715865285120842, "learning_rate": 7.575757575757576e-08, "loss": 1.2189, "step": 130 }, { "DPO Loss": 2.2675528124422485e-11, "Negative Geometric Mean": -23.16883116883117, "Negative prob": -23.16883116883117, "Normal Loss": 0.5700891017913818, "Normal prob": -0.5700891017913818, "Positive Loss": 0.00426666671410203, "Positive prob": -0.00426666671410203, "epoch": 3.8805970149253732, "step": 130 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.6165137887001038, "Normal prob": -0.6165137887001038, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.8805970149253732, "step": 130 }, { "epoch": 3.91044776119403, "grad_norm": 2.135926505561416, "learning_rate": 3.787878787878788e-08, "loss": 1.0232, "step": 131 }, { "DPO Loss": 6.434899282164839e-11, "Negative Geometric Mean": -21.73913043478261, "Negative prob": -21.73913043478261, "Normal Loss": 0.5634146332740784, "Normal prob": -0.5634146332740784, "Positive Loss": 0.004065934102982283, "Positive prob": -0.004065934102982283, "epoch": 3.91044776119403, "step": 131 }, { "DPO Loss": 0.0, "Negative Geometric Mean": 0.0, "Negative prob": 0.0, "Normal Loss": 0.4604148268699646, "Normal prob": -0.4604148268699646, "Positive Loss": 0.0, "Positive prob": 0.0, "epoch": 3.91044776119403, "step": 131 }, { "epoch": 3.9402985074626864, "grad_norm": 2.0527824846286724, "learning_rate": 0.0, "loss": 1.0204, "step": 132 } ], "logging_steps": 1, "max_steps": 132, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 11615663554560.0, "train_batch_size": 6, "trial_name": null, "trial_params": null }