| { |
| "best_metric": 0.4712187647819519, |
| "best_model_checkpoint": "/mnt/yscfs/zhuchiwei/realquestions/ckpt/250212_realquestions_dpo/checkpoint-700", |
| "epoch": 0.99968, |
| "eval_steps": 100, |
| "global_step": 781, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00128, |
| "grad_norm": 7.436504551716031, |
| "learning_rate": 8.860759493670886e-09, |
| "logits/chosen": -1.0859375, |
| "logits/rejected": -1.10498046875, |
| "logps/chosen": -336.5, |
| "logps/rejected": -339.5, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00256, |
| "grad_norm": 7.250237903929739, |
| "learning_rate": 1.772151898734177e-08, |
| "logits/chosen": -1.134765625, |
| "logits/rejected": -1.11767578125, |
| "logps/chosen": -329.75, |
| "logps/rejected": -317.75, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00384, |
| "grad_norm": 7.662424410669392, |
| "learning_rate": 2.658227848101266e-08, |
| "logits/chosen": -1.1162109375, |
| "logits/rejected": -1.1435546875, |
| "logps/chosen": -334.5, |
| "logps/rejected": -305.75, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.328125, |
| "rewards/chosen": -0.0010561943054199219, |
| "rewards/margins": 0.0004811286926269531, |
| "rewards/rejected": -0.0015385150909423828, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.00512, |
| "grad_norm": 7.278826365213056, |
| "learning_rate": 3.544303797468354e-08, |
| "logits/chosen": -1.154296875, |
| "logits/rejected": -1.173828125, |
| "logps/chosen": -312.5, |
| "logps/rejected": -320.625, |
| "loss": 0.692, |
| "rewards/accuracies": 0.34375, |
| "rewards/chosen": 0.00021409988403320312, |
| "rewards/margins": 5.698204040527344e-05, |
| "rewards/rejected": 0.000156402587890625, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0064, |
| "grad_norm": 7.667975226995807, |
| "learning_rate": 4.430379746835443e-08, |
| "logits/chosen": -1.1318359375, |
| "logits/rejected": -1.1826171875, |
| "logps/chosen": -335.0, |
| "logps/rejected": -351.875, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.328125, |
| "rewards/chosen": -3.075599670410156e-05, |
| "rewards/margins": 0.0016710758209228516, |
| "rewards/rejected": -0.0016994476318359375, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.00768, |
| "grad_norm": 7.34407022526262, |
| "learning_rate": 5.316455696202532e-08, |
| "logits/chosen": -1.0146484375, |
| "logits/rejected": -1.1123046875, |
| "logps/chosen": -330.75, |
| "logps/rejected": -351.5, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.3046875, |
| "rewards/chosen": 0.0008311271667480469, |
| "rewards/margins": -0.0005159378051757812, |
| "rewards/rejected": 0.0013489723205566406, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00896, |
| "grad_norm": 6.79379213946981, |
| "learning_rate": 6.20253164556962e-08, |
| "logits/chosen": -1.044921875, |
| "logits/rejected": -1.10986328125, |
| "logps/chosen": -285.875, |
| "logps/rejected": -287.0, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.3046875, |
| "rewards/chosen": -0.00012946128845214844, |
| "rewards/margins": -0.00041294097900390625, |
| "rewards/rejected": 0.0002841949462890625, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01024, |
| "grad_norm": 7.527185660748829, |
| "learning_rate": 7.088607594936708e-08, |
| "logits/chosen": -1.12548828125, |
| "logits/rejected": -1.17578125, |
| "logps/chosen": -337.25, |
| "logps/rejected": -321.0, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.296875, |
| "rewards/chosen": 0.0013508796691894531, |
| "rewards/margins": 0.0003237724304199219, |
| "rewards/rejected": 0.001026153564453125, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01152, |
| "grad_norm": 7.076196519468638, |
| "learning_rate": 7.974683544303797e-08, |
| "logits/chosen": -1.1572265625, |
| "logits/rejected": -1.1826171875, |
| "logps/chosen": -297.0, |
| "logps/rejected": -312.125, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.234375, |
| "rewards/chosen": 0.0007529258728027344, |
| "rewards/margins": -0.0009369850158691406, |
| "rewards/rejected": 0.0016903877258300781, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.0128, |
| "grad_norm": 7.318991452232657, |
| "learning_rate": 8.860759493670886e-08, |
| "logits/chosen": -1.10400390625, |
| "logits/rejected": -1.1416015625, |
| "logps/chosen": -318.75, |
| "logps/rejected": -304.125, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.390625, |
| "rewards/chosen": 0.0015916824340820312, |
| "rewards/margins": 0.0015668869018554688, |
| "rewards/rejected": 2.9325485229492188e-05, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01408, |
| "grad_norm": 7.525848702124104, |
| "learning_rate": 9.746835443037974e-08, |
| "logits/chosen": -1.1044921875, |
| "logits/rejected": -1.09130859375, |
| "logps/chosen": -341.25, |
| "logps/rejected": -323.5, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.2578125, |
| "rewards/chosen": -0.0008592605590820312, |
| "rewards/margins": -0.0011625289916992188, |
| "rewards/rejected": 0.00030422210693359375, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.01536, |
| "grad_norm": 7.187235726257762, |
| "learning_rate": 1.0632911392405063e-07, |
| "logits/chosen": -1.09619140625, |
| "logits/rejected": -1.15283203125, |
| "logps/chosen": -327.25, |
| "logps/rejected": -326.25, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.328125, |
| "rewards/chosen": -0.0012707710266113281, |
| "rewards/margins": -0.00025081634521484375, |
| "rewards/rejected": -0.001018524169921875, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.01664, |
| "grad_norm": 7.376759243506061, |
| "learning_rate": 1.151898734177215e-07, |
| "logits/chosen": -1.130859375, |
| "logits/rejected": -1.1220703125, |
| "logps/chosen": -317.75, |
| "logps/rejected": -320.0, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.3203125, |
| "rewards/chosen": 5.7220458984375e-05, |
| "rewards/margins": 0.0014476776123046875, |
| "rewards/rejected": -0.0013861656188964844, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.01792, |
| "grad_norm": 7.321841205322695, |
| "learning_rate": 1.240506329113924e-07, |
| "logits/chosen": -1.11669921875, |
| "logits/rejected": -1.146484375, |
| "logps/chosen": -298.5, |
| "logps/rejected": -291.875, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.3203125, |
| "rewards/chosen": 0.0023250579833984375, |
| "rewards/margins": 0.00077056884765625, |
| "rewards/rejected": 0.0015516281127929688, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0192, |
| "grad_norm": 7.513669747884291, |
| "learning_rate": 1.329113924050633e-07, |
| "logits/chosen": -1.02978515625, |
| "logits/rejected": -1.06396484375, |
| "logps/chosen": -348.5, |
| "logps/rejected": -348.0, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.2890625, |
| "rewards/chosen": 0.00205230712890625, |
| "rewards/margins": 0.001857757568359375, |
| "rewards/rejected": 0.00019502639770507812, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.02048, |
| "grad_norm": 7.56834980768175, |
| "learning_rate": 1.4177215189873417e-07, |
| "logits/chosen": -1.10009765625, |
| "logits/rejected": -1.123046875, |
| "logps/chosen": -331.125, |
| "logps/rejected": -330.25, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.34375, |
| "rewards/chosen": 0.0020537376403808594, |
| "rewards/margins": 0.0016803741455078125, |
| "rewards/rejected": 0.0003743171691894531, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02176, |
| "grad_norm": 7.361758673679262, |
| "learning_rate": 1.5063291139240505e-07, |
| "logits/chosen": -1.201171875, |
| "logits/rejected": -1.1767578125, |
| "logps/chosen": -337.375, |
| "logps/rejected": -321.5, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.3203125, |
| "rewards/chosen": -0.001008749008178711, |
| "rewards/margins": -0.001155853271484375, |
| "rewards/rejected": 0.00014638900756835938, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.02304, |
| "grad_norm": 7.632190356013242, |
| "learning_rate": 1.5949367088607593e-07, |
| "logits/chosen": -1.1279296875, |
| "logits/rejected": -1.18115234375, |
| "logps/chosen": -324.25, |
| "logps/rejected": -323.75, |
| "loss": 0.6913, |
| "rewards/accuracies": 0.3671875, |
| "rewards/chosen": 0.002650022506713867, |
| "rewards/margins": 0.0017561912536621094, |
| "rewards/rejected": 0.0008993148803710938, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.02432, |
| "grad_norm": 7.567509047244517, |
| "learning_rate": 1.6835443037974684e-07, |
| "logits/chosen": -1.2041015625, |
| "logits/rejected": -1.126953125, |
| "logps/chosen": -334.125, |
| "logps/rejected": -280.0, |
| "loss": 0.6913, |
| "rewards/accuracies": 0.3359375, |
| "rewards/chosen": 0.0007448196411132812, |
| "rewards/margins": 0.00107574462890625, |
| "rewards/rejected": -0.00033283233642578125, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0256, |
| "grad_norm": 7.324051502003322, |
| "learning_rate": 1.7721518987341772e-07, |
| "logits/chosen": -1.09814453125, |
| "logits/rejected": -1.10302734375, |
| "logps/chosen": -304.5, |
| "logps/rejected": -293.875, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.234375, |
| "rewards/chosen": -0.0005500316619873047, |
| "rewards/margins": -0.0008497238159179688, |
| "rewards/rejected": 0.00030422210693359375, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02688, |
| "grad_norm": 7.1047505475300525, |
| "learning_rate": 1.8607594936708857e-07, |
| "logits/chosen": -1.06591796875, |
| "logits/rejected": -1.103515625, |
| "logps/chosen": -343.75, |
| "logps/rejected": -317.625, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.3515625, |
| "rewards/chosen": 0.0008325576782226562, |
| "rewards/margins": -5.53131103515625e-05, |
| "rewards/rejected": 0.0008883476257324219, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.02816, |
| "grad_norm": 6.910170627811144, |
| "learning_rate": 1.9493670886075948e-07, |
| "logits/chosen": -1.08251953125, |
| "logits/rejected": -1.1064453125, |
| "logps/chosen": -296.5, |
| "logps/rejected": -283.375, |
| "loss": 0.6908, |
| "rewards/accuracies": 0.4140625, |
| "rewards/chosen": 0.0032749176025390625, |
| "rewards/margins": 0.003100872039794922, |
| "rewards/rejected": 0.00017690658569335938, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.02944, |
| "grad_norm": 7.159911065415018, |
| "learning_rate": 2.0379746835443036e-07, |
| "logits/chosen": -1.1181640625, |
| "logits/rejected": -1.17578125, |
| "logps/chosen": -322.5, |
| "logps/rejected": -323.5, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.3515625, |
| "rewards/chosen": 0.0003407001495361328, |
| "rewards/margins": 0.0001850128173828125, |
| "rewards/rejected": 0.0001583099365234375, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.03072, |
| "grad_norm": 7.159118745120893, |
| "learning_rate": 2.1265822784810127e-07, |
| "logits/chosen": -1.1064453125, |
| "logits/rejected": -1.12890625, |
| "logps/chosen": -317.375, |
| "logps/rejected": -315.75, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.3203125, |
| "rewards/chosen": 0.0022192001342773438, |
| "rewards/margins": 0.0004982948303222656, |
| "rewards/rejected": 0.0017242431640625, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 7.381100080259422, |
| "learning_rate": 2.2151898734177215e-07, |
| "logits/chosen": -1.0849609375, |
| "logits/rejected": -1.11376953125, |
| "logps/chosen": -320.625, |
| "logps/rejected": -313.5, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.3515625, |
| "rewards/chosen": 0.005059480667114258, |
| "rewards/margins": -0.00042819976806640625, |
| "rewards/rejected": 0.005497932434082031, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03328, |
| "grad_norm": 7.218282400256934, |
| "learning_rate": 2.30379746835443e-07, |
| "logits/chosen": -1.166015625, |
| "logits/rejected": -1.185546875, |
| "logps/chosen": -326.875, |
| "logps/rejected": -331.875, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.328125, |
| "rewards/chosen": 0.004558563232421875, |
| "rewards/margins": 0.0010230541229248047, |
| "rewards/rejected": 0.0035305023193359375, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.03456, |
| "grad_norm": 7.239082966066987, |
| "learning_rate": 2.392405063291139e-07, |
| "logits/chosen": -1.1005859375, |
| "logits/rejected": -1.15283203125, |
| "logps/chosen": -342.75, |
| "logps/rejected": -328.875, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.328125, |
| "rewards/chosen": 0.004992961883544922, |
| "rewards/margins": -2.574920654296875e-05, |
| "rewards/rejected": 0.0050220489501953125, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.03584, |
| "grad_norm": 6.948014033726085, |
| "learning_rate": 2.481012658227848e-07, |
| "logits/chosen": -1.05908203125, |
| "logits/rejected": -1.0693359375, |
| "logps/chosen": -321.625, |
| "logps/rejected": -285.125, |
| "loss": 0.6935, |
| "rewards/accuracies": 0.296875, |
| "rewards/chosen": 0.0019271373748779297, |
| "rewards/margins": -0.002063751220703125, |
| "rewards/rejected": 0.003989458084106445, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03712, |
| "grad_norm": 6.869465257961058, |
| "learning_rate": 2.5696202531645567e-07, |
| "logits/chosen": -1.162109375, |
| "logits/rejected": -1.15380859375, |
| "logps/chosen": -307.5, |
| "logps/rejected": -294.625, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.328125, |
| "rewards/chosen": 0.00728607177734375, |
| "rewards/margins": 5.91278076171875e-05, |
| "rewards/rejected": 0.00720977783203125, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0384, |
| "grad_norm": 7.337023896045187, |
| "learning_rate": 2.658227848101266e-07, |
| "logits/chosen": -1.017578125, |
| "logits/rejected": -1.07275390625, |
| "logps/chosen": -327.25, |
| "logps/rejected": -343.0, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.3671875, |
| "rewards/chosen": 0.0086822509765625, |
| "rewards/margins": 5.5789947509765625e-05, |
| "rewards/rejected": 0.008625030517578125, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03968, |
| "grad_norm": 7.347791617859354, |
| "learning_rate": 2.7468354430379743e-07, |
| "logits/chosen": -1.18359375, |
| "logits/rejected": -1.2197265625, |
| "logps/chosen": -311.5, |
| "logps/rejected": -307.0, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.359375, |
| "rewards/chosen": 0.01105499267578125, |
| "rewards/margins": 0.0011830329895019531, |
| "rewards/rejected": 0.009868621826171875, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.04096, |
| "grad_norm": 7.498901433929662, |
| "learning_rate": 2.8354430379746834e-07, |
| "logits/chosen": -1.14111328125, |
| "logits/rejected": -1.11865234375, |
| "logps/chosen": -334.5, |
| "logps/rejected": -310.75, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.359375, |
| "rewards/chosen": 0.009601593017578125, |
| "rewards/margins": 0.0008537769317626953, |
| "rewards/rejected": 0.008741378784179688, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04224, |
| "grad_norm": 7.169131545571104, |
| "learning_rate": 2.9240506329113925e-07, |
| "logits/chosen": -1.10888671875, |
| "logits/rejected": -1.11962890625, |
| "logps/chosen": -329.25, |
| "logps/rejected": -319.375, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.4140625, |
| "rewards/chosen": 0.013051986694335938, |
| "rewards/margins": 0.0033140182495117188, |
| "rewards/rejected": 0.00975799560546875, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.04352, |
| "grad_norm": 7.268573668937982, |
| "learning_rate": 3.012658227848101e-07, |
| "logits/chosen": -1.12109375, |
| "logits/rejected": -1.13037109375, |
| "logps/chosen": -322.25, |
| "logps/rejected": -304.0, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.3984375, |
| "rewards/chosen": 0.011199951171875, |
| "rewards/margins": 0.0003910064697265625, |
| "rewards/rejected": 0.0108184814453125, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0448, |
| "grad_norm": 7.598910436962618, |
| "learning_rate": 3.1012658227848096e-07, |
| "logits/chosen": -1.06591796875, |
| "logits/rejected": -1.1396484375, |
| "logps/chosen": -311.375, |
| "logps/rejected": -331.5, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.390625, |
| "rewards/chosen": 0.01214599609375, |
| "rewards/margins": -0.0003960132598876953, |
| "rewards/rejected": 0.012542724609375, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.04608, |
| "grad_norm": 7.108926141259724, |
| "learning_rate": 3.1898734177215186e-07, |
| "logits/chosen": -1.10205078125, |
| "logits/rejected": -1.1240234375, |
| "logps/chosen": -324.25, |
| "logps/rejected": -304.125, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.3671875, |
| "rewards/chosen": 0.0137176513671875, |
| "rewards/margins": 0.001232147216796875, |
| "rewards/rejected": 0.012485504150390625, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04736, |
| "grad_norm": 7.267204751248262, |
| "learning_rate": 3.2784810126582277e-07, |
| "logits/chosen": -1.04638671875, |
| "logits/rejected": -1.07470703125, |
| "logps/chosen": -304.0, |
| "logps/rejected": -331.25, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.3984375, |
| "rewards/chosen": 0.013946533203125, |
| "rewards/margins": 0.00335693359375, |
| "rewards/rejected": 0.010589599609375, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.04864, |
| "grad_norm": 7.22576003364743, |
| "learning_rate": 3.367088607594937e-07, |
| "logits/chosen": -1.1435546875, |
| "logits/rejected": -1.1552734375, |
| "logps/chosen": -338.0, |
| "logps/rejected": -320.125, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.390625, |
| "rewards/chosen": 0.0144500732421875, |
| "rewards/margins": 0.000946044921875, |
| "rewards/rejected": 0.01351165771484375, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04992, |
| "grad_norm": 6.91155869405154, |
| "learning_rate": 3.4556962025316453e-07, |
| "logits/chosen": -1.02978515625, |
| "logits/rejected": -1.1005859375, |
| "logps/chosen": -301.125, |
| "logps/rejected": -309.0, |
| "loss": 0.6938, |
| "rewards/accuracies": 0.3359375, |
| "rewards/chosen": 0.013141632080078125, |
| "rewards/margins": -0.00186920166015625, |
| "rewards/rejected": 0.0149993896484375, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0512, |
| "grad_norm": 7.5651212389735845, |
| "learning_rate": 3.5443037974683544e-07, |
| "logits/chosen": -1.11279296875, |
| "logits/rejected": -1.18359375, |
| "logps/chosen": -301.375, |
| "logps/rejected": -328.0, |
| "loss": 0.693, |
| "rewards/accuracies": 0.359375, |
| "rewards/chosen": 0.013530731201171875, |
| "rewards/margins": -0.0004057884216308594, |
| "rewards/rejected": 0.0139312744140625, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05248, |
| "grad_norm": 7.055346161775772, |
| "learning_rate": 3.632911392405063e-07, |
| "logits/chosen": -1.1689453125, |
| "logits/rejected": -1.19482421875, |
| "logps/chosen": -325.75, |
| "logps/rejected": -307.5, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.40625, |
| "rewards/chosen": 0.017852783203125, |
| "rewards/margins": 0.002711772918701172, |
| "rewards/rejected": 0.01515960693359375, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.05376, |
| "grad_norm": 7.034415420766986, |
| "learning_rate": 3.7215189873417715e-07, |
| "logits/chosen": -1.14501953125, |
| "logits/rejected": -1.1533203125, |
| "logps/chosen": -342.0, |
| "logps/rejected": -316.25, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.40625, |
| "rewards/chosen": 0.019439697265625, |
| "rewards/margins": 0.0026378631591796875, |
| "rewards/rejected": 0.01682281494140625, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.05504, |
| "grad_norm": 7.974604803897025, |
| "learning_rate": 3.810126582278481e-07, |
| "logits/chosen": -1.1513671875, |
| "logits/rejected": -1.1669921875, |
| "logps/chosen": -364.25, |
| "logps/rejected": -370.125, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.421875, |
| "rewards/chosen": 0.01983642578125, |
| "rewards/margins": 0.0025014877319335938, |
| "rewards/rejected": 0.01732635498046875, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.05632, |
| "grad_norm": 7.177077737022184, |
| "learning_rate": 3.8987341772151896e-07, |
| "logits/chosen": -1.0830078125, |
| "logits/rejected": -1.11474609375, |
| "logps/chosen": -337.0, |
| "logps/rejected": -321.375, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.4296875, |
| "rewards/chosen": 0.0222625732421875, |
| "rewards/margins": 0.0036296844482421875, |
| "rewards/rejected": 0.01862335205078125, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0576, |
| "grad_norm": 7.092550989605114, |
| "learning_rate": 3.9873417721518987e-07, |
| "logits/chosen": -1.208984375, |
| "logits/rejected": -1.1796875, |
| "logps/chosen": -336.75, |
| "logps/rejected": -338.5, |
| "loss": 0.691, |
| "rewards/accuracies": 0.4453125, |
| "rewards/chosen": 0.0211334228515625, |
| "rewards/margins": 0.0034885406494140625, |
| "rewards/rejected": 0.01763916015625, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.05888, |
| "grad_norm": 7.466264419127936, |
| "learning_rate": 4.075949367088607e-07, |
| "logits/chosen": -1.0966796875, |
| "logits/rejected": -1.126953125, |
| "logps/chosen": -321.0, |
| "logps/rejected": -334.75, |
| "loss": 0.6891, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": 0.0274810791015625, |
| "rewards/margins": 0.0086822509765625, |
| "rewards/rejected": 0.0187835693359375, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.06016, |
| "grad_norm": 6.985552122485807, |
| "learning_rate": 4.164556962025316e-07, |
| "logits/chosen": -1.12109375, |
| "logits/rejected": -1.1279296875, |
| "logps/chosen": -303.0, |
| "logps/rejected": -297.875, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.421875, |
| "rewards/chosen": 0.0264129638671875, |
| "rewards/margins": 0.003119945526123047, |
| "rewards/rejected": 0.02330780029296875, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.06144, |
| "grad_norm": 7.176358655196464, |
| "learning_rate": 4.2531645569620254e-07, |
| "logits/chosen": -1.1015625, |
| "logits/rejected": -1.11083984375, |
| "logps/chosen": -340.5, |
| "logps/rejected": -318.75, |
| "loss": 0.6901, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.028717041015625, |
| "rewards/margins": 0.0058269500732421875, |
| "rewards/rejected": 0.0229034423828125, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.06272, |
| "grad_norm": 7.100421828155237, |
| "learning_rate": 4.341772151898734e-07, |
| "logits/chosen": -1.2080078125, |
| "logits/rejected": -1.2109375, |
| "logps/chosen": -339.75, |
| "logps/rejected": -332.25, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.3984375, |
| "rewards/chosen": 0.0296630859375, |
| "rewards/margins": 0.00295257568359375, |
| "rewards/rejected": 0.0267333984375, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 7.057468496585091, |
| "learning_rate": 4.430379746835443e-07, |
| "logits/chosen": -1.115234375, |
| "logits/rejected": -1.14599609375, |
| "logps/chosen": -306.75, |
| "logps/rejected": -277.0, |
| "loss": 0.6878, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": 0.03302001953125, |
| "rewards/margins": 0.00989532470703125, |
| "rewards/rejected": 0.02313232421875, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06528, |
| "grad_norm": 7.03326897520388, |
| "learning_rate": 4.5189873417721515e-07, |
| "logits/chosen": -1.1220703125, |
| "logits/rejected": -1.1650390625, |
| "logps/chosen": -292.5, |
| "logps/rejected": -301.625, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.4453125, |
| "rewards/chosen": 0.0316925048828125, |
| "rewards/margins": 0.0028066635131835938, |
| "rewards/rejected": 0.0289306640625, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.06656, |
| "grad_norm": 6.957761014625216, |
| "learning_rate": 4.60759493670886e-07, |
| "logits/chosen": -1.1015625, |
| "logits/rejected": -1.1357421875, |
| "logps/chosen": -309.75, |
| "logps/rejected": -317.75, |
| "loss": 0.6904, |
| "rewards/accuracies": 0.4296875, |
| "rewards/chosen": 0.034149169921875, |
| "rewards/margins": 0.004019737243652344, |
| "rewards/rejected": 0.0301361083984375, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.06784, |
| "grad_norm": 6.866772048798592, |
| "learning_rate": 4.6962025316455697e-07, |
| "logits/chosen": -1.2236328125, |
| "logits/rejected": -1.23828125, |
| "logps/chosen": -313.0, |
| "logps/rejected": -290.625, |
| "loss": 0.6903, |
| "rewards/accuracies": 0.4609375, |
| "rewards/chosen": 0.036773681640625, |
| "rewards/margins": 0.0050182342529296875, |
| "rewards/rejected": 0.0317535400390625, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.06912, |
| "grad_norm": 34.49051291265523, |
| "learning_rate": 4.784810126582278e-07, |
| "logits/chosen": -1.1630859375, |
| "logits/rejected": -1.1513671875, |
| "logps/chosen": -297.375, |
| "logps/rejected": -443.875, |
| "loss": 0.6864, |
| "rewards/accuracies": 0.4765625, |
| "rewards/chosen": 0.0362701416015625, |
| "rewards/margins": 0.02279949188232422, |
| "rewards/rejected": 0.013580322265625, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0704, |
| "grad_norm": 7.450083821669119, |
| "learning_rate": 4.873417721518987e-07, |
| "logits/chosen": -1.126953125, |
| "logits/rejected": -1.216796875, |
| "logps/chosen": -303.125, |
| "logps/rejected": -338.25, |
| "loss": 0.6942, |
| "rewards/accuracies": 0.3359375, |
| "rewards/chosen": 0.03314208984375, |
| "rewards/margins": -0.002353191375732422, |
| "rewards/rejected": 0.0355072021484375, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.07168, |
| "grad_norm": 7.328024111375669, |
| "learning_rate": 4.962025316455696e-07, |
| "logits/chosen": -1.1318359375, |
| "logits/rejected": -1.12451171875, |
| "logps/chosen": -327.75, |
| "logps/rejected": -300.75, |
| "loss": 0.6884, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": 0.0408172607421875, |
| "rewards/margins": 0.008729934692382812, |
| "rewards/rejected": 0.0321197509765625, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.07296, |
| "grad_norm": 6.641731690681104, |
| "learning_rate": 5.050632911392404e-07, |
| "logits/chosen": -1.19482421875, |
| "logits/rejected": -1.208984375, |
| "logps/chosen": -274.0, |
| "logps/rejected": -292.5, |
| "loss": 0.6896, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0372772216796875, |
| "rewards/margins": 0.0062408447265625, |
| "rewards/rejected": 0.0310516357421875, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.07424, |
| "grad_norm": 7.328502114098263, |
| "learning_rate": 5.139240506329113e-07, |
| "logits/chosen": -1.2109375, |
| "logits/rejected": -1.220703125, |
| "logps/chosen": -337.25, |
| "logps/rejected": -324.25, |
| "loss": 0.6887, |
| "rewards/accuracies": 0.4453125, |
| "rewards/chosen": 0.0395965576171875, |
| "rewards/margins": 0.008788108825683594, |
| "rewards/rejected": 0.03082275390625, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.07552, |
| "grad_norm": 7.129804240589812, |
| "learning_rate": 5.227848101265822e-07, |
| "logits/chosen": -1.1884765625, |
| "logits/rejected": -1.2099609375, |
| "logps/chosen": -304.5, |
| "logps/rejected": -306.75, |
| "loss": 0.6885, |
| "rewards/accuracies": 0.5078125, |
| "rewards/chosen": 0.0443572998046875, |
| "rewards/margins": 0.008890151977539062, |
| "rewards/rejected": 0.035400390625, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.0768, |
| "grad_norm": 7.1937924938108875, |
| "learning_rate": 5.316455696202532e-07, |
| "logits/chosen": -1.07861328125, |
| "logits/rejected": -1.03759765625, |
| "logps/chosen": -322.25, |
| "logps/rejected": -299.375, |
| "loss": 0.6874, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": 0.046478271484375, |
| "rewards/margins": 0.011920928955078125, |
| "rewards/rejected": 0.0345611572265625, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07808, |
| "grad_norm": 7.129369707890816, |
| "learning_rate": 5.405063291139241e-07, |
| "logits/chosen": -1.171875, |
| "logits/rejected": -1.15380859375, |
| "logps/chosen": -317.25, |
| "logps/rejected": -275.125, |
| "loss": 0.685, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": 0.05120849609375, |
| "rewards/margins": 0.01607513427734375, |
| "rewards/rejected": 0.0351715087890625, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.07936, |
| "grad_norm": 7.302643657053502, |
| "learning_rate": 5.493670886075949e-07, |
| "logits/chosen": -1.189453125, |
| "logits/rejected": -1.2587890625, |
| "logps/chosen": -330.75, |
| "logps/rejected": -345.5, |
| "loss": 0.691, |
| "rewards/accuracies": 0.421875, |
| "rewards/chosen": 0.0424652099609375, |
| "rewards/margins": 0.003490447998046875, |
| "rewards/rejected": 0.03900146484375, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.08064, |
| "grad_norm": 6.84968399915786, |
| "learning_rate": 5.582278481012658e-07, |
| "logits/chosen": -1.087890625, |
| "logits/rejected": -1.119140625, |
| "logps/chosen": -326.125, |
| "logps/rejected": -308.625, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": 0.0484619140625, |
| "rewards/margins": 0.009546279907226562, |
| "rewards/rejected": 0.038909912109375, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.08192, |
| "grad_norm": 7.274670055824019, |
| "learning_rate": 5.670886075949367e-07, |
| "logits/chosen": -1.1708984375, |
| "logits/rejected": -1.19140625, |
| "logps/chosen": -324.25, |
| "logps/rejected": -328.875, |
| "loss": 0.6859, |
| "rewards/accuracies": 0.5546875, |
| "rewards/chosen": 0.054931640625, |
| "rewards/margins": 0.013892173767089844, |
| "rewards/rejected": 0.04095458984375, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.0832, |
| "grad_norm": 7.043191769175182, |
| "learning_rate": 5.759493670886076e-07, |
| "logits/chosen": -1.1728515625, |
| "logits/rejected": -1.193359375, |
| "logps/chosen": -321.75, |
| "logps/rejected": -315.0, |
| "loss": 0.6874, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": 0.050048828125, |
| "rewards/margins": 0.011362075805664062, |
| "rewards/rejected": 0.0386810302734375, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.08448, |
| "grad_norm": 7.1747985320738445, |
| "learning_rate": 5.848101265822785e-07, |
| "logits/chosen": -1.19921875, |
| "logits/rejected": -1.224609375, |
| "logps/chosen": -331.5, |
| "logps/rejected": -333.5, |
| "loss": 0.6893, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.05633544921875, |
| "rewards/margins": 0.007636070251464844, |
| "rewards/rejected": 0.04864501953125, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.08576, |
| "grad_norm": 6.706114883500065, |
| "learning_rate": 5.936708860759493e-07, |
| "logits/chosen": -1.1787109375, |
| "logits/rejected": -1.17626953125, |
| "logps/chosen": -327.0, |
| "logps/rejected": -296.875, |
| "loss": 0.6849, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.05401611328125, |
| "rewards/margins": 0.016617774963378906, |
| "rewards/rejected": 0.0373687744140625, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.08704, |
| "grad_norm": 6.815414755393516, |
| "learning_rate": 6.025316455696202e-07, |
| "logits/chosen": -1.1162109375, |
| "logits/rejected": -1.1396484375, |
| "logps/chosen": -315.75, |
| "logps/rejected": -303.5, |
| "loss": 0.689, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": 0.0550537109375, |
| "rewards/margins": 0.00931549072265625, |
| "rewards/rejected": 0.0457763671875, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.08832, |
| "grad_norm": 7.17640719707697, |
| "learning_rate": 6.113924050632911e-07, |
| "logits/chosen": -1.169921875, |
| "logits/rejected": -1.22314453125, |
| "logps/chosen": -345.5, |
| "logps/rejected": -337.75, |
| "loss": 0.6857, |
| "rewards/accuracies": 0.53125, |
| "rewards/chosen": 0.05963134765625, |
| "rewards/margins": 0.015005111694335938, |
| "rewards/rejected": 0.0446624755859375, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0896, |
| "grad_norm": 7.148882260506765, |
| "learning_rate": 6.202531645569619e-07, |
| "logits/chosen": -1.193359375, |
| "logits/rejected": -1.171875, |
| "logps/chosen": -324.25, |
| "logps/rejected": -319.25, |
| "loss": 0.6857, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": 0.058624267578125, |
| "rewards/margins": 0.015472412109375, |
| "rewards/rejected": 0.043121337890625, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.09088, |
| "grad_norm": 6.965207472135811, |
| "learning_rate": 6.291139240506329e-07, |
| "logits/chosen": -1.1845703125, |
| "logits/rejected": -1.189453125, |
| "logps/chosen": -320.75, |
| "logps/rejected": -302.625, |
| "loss": 0.6872, |
| "rewards/accuracies": 0.5390625, |
| "rewards/chosen": 0.059600830078125, |
| "rewards/margins": 0.011350154876708984, |
| "rewards/rejected": 0.0481719970703125, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.09216, |
| "grad_norm": 6.690590284756765, |
| "learning_rate": 6.379746835443037e-07, |
| "logits/chosen": -1.2001953125, |
| "logits/rejected": -1.1767578125, |
| "logps/chosen": -311.125, |
| "logps/rejected": -314.25, |
| "loss": 0.6867, |
| "rewards/accuracies": 0.4921875, |
| "rewards/chosen": 0.058197021484375, |
| "rewards/margins": 0.012401580810546875, |
| "rewards/rejected": 0.045806884765625, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.09344, |
| "grad_norm": 6.995206172986862, |
| "learning_rate": 6.468354430379746e-07, |
| "logits/chosen": -1.185546875, |
| "logits/rejected": -1.2392578125, |
| "logps/chosen": -306.125, |
| "logps/rejected": -320.25, |
| "loss": 0.692, |
| "rewards/accuracies": 0.4609375, |
| "rewards/chosen": 0.054534912109375, |
| "rewards/margins": 0.002094268798828125, |
| "rewards/rejected": 0.052459716796875, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.09472, |
| "grad_norm": 7.197486018158177, |
| "learning_rate": 6.556962025316455e-07, |
| "logits/chosen": -1.189453125, |
| "logits/rejected": -1.18701171875, |
| "logps/chosen": -321.75, |
| "logps/rejected": -315.25, |
| "loss": 0.6816, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": 0.06298828125, |
| "rewards/margins": 0.02439117431640625, |
| "rewards/rejected": 0.038543701171875, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 7.062298755275947, |
| "learning_rate": 6.645569620253163e-07, |
| "logits/chosen": -1.17529296875, |
| "logits/rejected": -1.18359375, |
| "logps/chosen": -337.625, |
| "logps/rejected": -304.875, |
| "loss": 0.6865, |
| "rewards/accuracies": 0.5859375, |
| "rewards/chosen": 0.068359375, |
| "rewards/margins": 0.01427459716796875, |
| "rewards/rejected": 0.0540771484375, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.09728, |
| "grad_norm": 6.9747053464578155, |
| "learning_rate": 6.734177215189874e-07, |
| "logits/chosen": -1.17431640625, |
| "logits/rejected": -1.197265625, |
| "logps/chosen": -320.25, |
| "logps/rejected": -305.0, |
| "loss": 0.6862, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": 0.06768798828125, |
| "rewards/margins": 0.014251708984375, |
| "rewards/rejected": 0.053436279296875, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.09856, |
| "grad_norm": 7.0914726140322015, |
| "learning_rate": 6.822784810126582e-07, |
| "logits/chosen": -1.25, |
| "logits/rejected": -1.28515625, |
| "logps/chosen": -302.625, |
| "logps/rejected": -311.5, |
| "loss": 0.6864, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.06793212890625, |
| "rewards/margins": 0.014862060546875, |
| "rewards/rejected": 0.0531005859375, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.09984, |
| "grad_norm": 6.924658831045798, |
| "learning_rate": 6.911392405063291e-07, |
| "logits/chosen": -1.1904296875, |
| "logits/rejected": -1.2080078125, |
| "logps/chosen": -312.375, |
| "logps/rejected": -312.75, |
| "loss": 0.6879, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": 0.064971923828125, |
| "rewards/margins": 0.010891914367675781, |
| "rewards/rejected": 0.054107666015625, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.10112, |
| "grad_norm": 6.617651432015416, |
| "learning_rate": 7e-07, |
| "logits/chosen": -1.220703125, |
| "logits/rejected": -1.2080078125, |
| "logps/chosen": -307.5, |
| "logps/rejected": -288.125, |
| "loss": 0.6858, |
| "rewards/accuracies": 0.5703125, |
| "rewards/chosen": 0.06640625, |
| "rewards/margins": 0.01545858383178711, |
| "rewards/rejected": 0.050933837890625, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.1024, |
| "grad_norm": 7.148212562285202, |
| "learning_rate": 6.999964952031891e-07, |
| "logits/chosen": -1.142578125, |
| "logits/rejected": -1.13818359375, |
| "logps/chosen": -340.5, |
| "logps/rejected": -325.375, |
| "loss": 0.6825, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": 0.069793701171875, |
| "rewards/margins": 0.021467208862304688, |
| "rewards/rejected": 0.04840087890625, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.10368, |
| "grad_norm": 6.912997090681548, |
| "learning_rate": 6.999859808829482e-07, |
| "logits/chosen": -1.212890625, |
| "logits/rejected": -1.1943359375, |
| "logps/chosen": -329.125, |
| "logps/rejected": -302.75, |
| "loss": 0.6813, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": 0.071319580078125, |
| "rewards/margins": 0.024139404296875, |
| "rewards/rejected": 0.047149658203125, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.10496, |
| "grad_norm": 6.642590263041201, |
| "learning_rate": 6.999684572498524e-07, |
| "logits/chosen": -1.251953125, |
| "logits/rejected": -1.2294921875, |
| "logps/chosen": -306.5, |
| "logps/rejected": -282.875, |
| "loss": 0.6837, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.070587158203125, |
| "rewards/margins": 0.020760536193847656, |
| "rewards/rejected": 0.0496978759765625, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.10624, |
| "grad_norm": 7.156833321052252, |
| "learning_rate": 6.99943924654854e-07, |
| "logits/chosen": -1.20947265625, |
| "logits/rejected": -1.240234375, |
| "logps/chosen": -299.5, |
| "logps/rejected": -319.5, |
| "loss": 0.6876, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": 0.07061767578125, |
| "rewards/margins": 0.011600494384765625, |
| "rewards/rejected": 0.058990478515625, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.10752, |
| "grad_norm": 7.161233013169767, |
| "learning_rate": 6.999123835892781e-07, |
| "logits/chosen": -1.2470703125, |
| "logits/rejected": -1.2265625, |
| "logps/chosen": -361.625, |
| "logps/rejected": -346.875, |
| "loss": 0.6774, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": 0.078857421875, |
| "rewards/margins": 0.03279876708984375, |
| "rewards/rejected": 0.04608154296875, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.1088, |
| "grad_norm": 7.380079541175457, |
| "learning_rate": 6.998738346848098e-07, |
| "logits/chosen": -1.169921875, |
| "logits/rejected": -1.171875, |
| "logps/chosen": -321.75, |
| "logps/rejected": -313.0, |
| "loss": 0.6758, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": 0.076507568359375, |
| "rewards/margins": 0.0360107421875, |
| "rewards/rejected": 0.04058837890625, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.11008, |
| "grad_norm": 6.787264499218527, |
| "learning_rate": 6.998282787134845e-07, |
| "logits/chosen": -1.2353515625, |
| "logits/rejected": -1.2216796875, |
| "logps/chosen": -307.625, |
| "logps/rejected": -280.625, |
| "loss": 0.681, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": 0.07275390625, |
| "rewards/margins": 0.025938034057617188, |
| "rewards/rejected": 0.046783447265625, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.11136, |
| "grad_norm": 7.070691711467475, |
| "learning_rate": 6.997757165876698e-07, |
| "logits/chosen": -1.212890625, |
| "logits/rejected": -1.21484375, |
| "logps/chosen": -333.5, |
| "logps/rejected": -326.5, |
| "loss": 0.681, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": 0.075775146484375, |
| "rewards/margins": 0.024099349975585938, |
| "rewards/rejected": 0.05169677734375, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.11264, |
| "grad_norm": 7.281268067802746, |
| "learning_rate": 6.997161493600493e-07, |
| "logits/chosen": -1.2333984375, |
| "logits/rejected": -1.20751953125, |
| "logps/chosen": -342.375, |
| "logps/rejected": -297.75, |
| "loss": 0.6748, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": 0.0849609375, |
| "rewards/margins": 0.038787841796875, |
| "rewards/rejected": 0.0461883544921875, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.11392, |
| "grad_norm": 6.721540641608089, |
| "learning_rate": 6.996495782236003e-07, |
| "logits/chosen": -1.1689453125, |
| "logits/rejected": -1.1826171875, |
| "logps/chosen": -284.5, |
| "logps/rejected": -302.75, |
| "loss": 0.6877, |
| "rewards/accuracies": 0.5234375, |
| "rewards/chosen": 0.063812255859375, |
| "rewards/margins": 0.011150360107421875, |
| "rewards/rejected": 0.052581787109375, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.1152, |
| "grad_norm": 6.658821074174305, |
| "learning_rate": 6.9957600451157e-07, |
| "logits/chosen": -1.2216796875, |
| "logits/rejected": -1.2529296875, |
| "logps/chosen": -289.0, |
| "logps/rejected": -299.5, |
| "loss": 0.6808, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": 0.0616455078125, |
| "rewards/margins": 0.025604248046875, |
| "rewards/rejected": 0.03614044189453125, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.11648, |
| "grad_norm": 6.960320737670953, |
| "learning_rate": 6.994954296974495e-07, |
| "logits/chosen": -1.23388671875, |
| "logits/rejected": -1.263671875, |
| "logps/chosen": -302.5, |
| "logps/rejected": -310.25, |
| "loss": 0.6793, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": 0.07379150390625, |
| "rewards/margins": 0.029428482055664062, |
| "rewards/rejected": 0.044342041015625, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.11776, |
| "grad_norm": 7.049238366581128, |
| "learning_rate": 6.994078553949439e-07, |
| "logits/chosen": -1.2294921875, |
| "logits/rejected": -1.267578125, |
| "logps/chosen": -313.625, |
| "logps/rejected": -294.0, |
| "loss": 0.6731, |
| "rewards/accuracies": 0.6328125, |
| "rewards/chosen": 0.0853271484375, |
| "rewards/margins": 0.0420074462890625, |
| "rewards/rejected": 0.04345703125, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.11904, |
| "grad_norm": 6.649581467509272, |
| "learning_rate": 6.993132833579392e-07, |
| "logits/chosen": -1.20703125, |
| "logits/rejected": -1.2236328125, |
| "logps/chosen": -287.5, |
| "logps/rejected": -288.25, |
| "loss": 0.6779, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.075164794921875, |
| "rewards/margins": 0.03218841552734375, |
| "rewards/rejected": 0.04302978515625, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.12032, |
| "grad_norm": 6.872841249887952, |
| "learning_rate": 6.992117154804688e-07, |
| "logits/chosen": -1.1748046875, |
| "logits/rejected": -1.224609375, |
| "logps/chosen": -314.25, |
| "logps/rejected": -305.0, |
| "loss": 0.6812, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": 0.07373046875, |
| "rewards/margins": 0.026458740234375, |
| "rewards/rejected": 0.0472412109375, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.1216, |
| "grad_norm": 7.327496061016414, |
| "learning_rate": 6.99103153796674e-07, |
| "logits/chosen": -1.173828125, |
| "logits/rejected": -1.1796875, |
| "logps/chosen": -337.25, |
| "logps/rejected": -300.75, |
| "loss": 0.6748, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": 0.07318115234375, |
| "rewards/margins": 0.0394134521484375, |
| "rewards/rejected": 0.03388214111328125, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.12288, |
| "grad_norm": 7.333095642704951, |
| "learning_rate": 6.989876004807644e-07, |
| "logits/chosen": -1.2060546875, |
| "logits/rejected": -1.2041015625, |
| "logps/chosen": -344.75, |
| "logps/rejected": -315.0, |
| "loss": 0.6733, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": 0.07806396484375, |
| "rewards/margins": 0.04097175598144531, |
| "rewards/rejected": 0.0370635986328125, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.12416, |
| "grad_norm": 7.054266672465839, |
| "learning_rate": 6.988650578469735e-07, |
| "logits/chosen": -1.2177734375, |
| "logits/rejected": -1.2255859375, |
| "logps/chosen": -326.25, |
| "logps/rejected": -359.0, |
| "loss": 0.6661, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": 0.071868896484375, |
| "rewards/margins": 0.05510711669921875, |
| "rewards/rejected": 0.016735076904296875, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.12544, |
| "grad_norm": 7.188205202679432, |
| "learning_rate": 6.98735528349513e-07, |
| "logits/chosen": -1.18212890625, |
| "logits/rejected": -1.2021484375, |
| "logps/chosen": -273.875, |
| "logps/rejected": -304.0, |
| "loss": 0.6746, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": 0.059661865234375, |
| "rewards/margins": 0.03852081298828125, |
| "rewards/rejected": 0.02120208740234375, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.12672, |
| "grad_norm": 7.374652320682456, |
| "learning_rate": 6.985990145825232e-07, |
| "logits/chosen": -1.13818359375, |
| "logits/rejected": -1.1669921875, |
| "logps/chosen": -330.25, |
| "logps/rejected": -322.0, |
| "loss": 0.6701, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": 0.074493408203125, |
| "rewards/margins": 0.04855918884277344, |
| "rewards/rejected": 0.02597808837890625, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 7.010945640873385, |
| "learning_rate": 6.984555192800215e-07, |
| "logits/chosen": -1.244140625, |
| "logits/rejected": -1.296875, |
| "logps/chosen": -314.75, |
| "logps/rejected": -322.5, |
| "loss": 0.6711, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": 0.076141357421875, |
| "rewards/margins": 0.0458831787109375, |
| "rewards/rejected": 0.030277252197265625, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.128, |
| "eval_logits/chosen": -1.169921875, |
| "eval_logits/rejected": -1.2216796875, |
| "eval_logps/chosen": -314.125, |
| "eval_logps/rejected": -299.0625, |
| "eval_loss": 0.6733124852180481, |
| "eval_rewards/accuracies": 0.649609386920929, |
| "eval_rewards/chosen": 0.0625, |
| "eval_rewards/margins": 0.04170989990234375, |
| "eval_rewards/rejected": 0.020813941955566406, |
| "eval_runtime": 27.7049, |
| "eval_samples_per_second": 18.047, |
| "eval_steps_per_second": 0.578, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12928, |
| "grad_norm": 6.562841834429094, |
| "learning_rate": 6.983050453158471e-07, |
| "logits/chosen": -1.18994140625, |
| "logits/rejected": -1.171875, |
| "logps/chosen": -294.0, |
| "logps/rejected": -260.25, |
| "loss": 0.6775, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.058380126953125, |
| "rewards/margins": 0.0331878662109375, |
| "rewards/rejected": 0.025234222412109375, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.13056, |
| "grad_norm": 7.311489748652196, |
| "learning_rate": 6.981475957036038e-07, |
| "logits/chosen": -1.18359375, |
| "logits/rejected": -1.1904296875, |
| "logps/chosen": -319.0, |
| "logps/rejected": -322.5, |
| "loss": 0.6697, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": 0.061248779296875, |
| "rewards/margins": 0.049468994140625, |
| "rewards/rejected": 0.011905670166015625, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.13184, |
| "grad_norm": 6.969731388418404, |
| "learning_rate": 6.979831735965997e-07, |
| "logits/chosen": -1.19921875, |
| "logits/rejected": -1.2529296875, |
| "logps/chosen": -312.5, |
| "logps/rejected": -329.0, |
| "loss": 0.6769, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": 0.057342529296875, |
| "rewards/margins": 0.03508758544921875, |
| "rewards/rejected": 0.02228546142578125, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.13312, |
| "grad_norm": 7.382805496910107, |
| "learning_rate": 6.978117822877838e-07, |
| "logits/chosen": -1.1396484375, |
| "logits/rejected": -1.162109375, |
| "logps/chosen": -346.0, |
| "logps/rejected": -328.25, |
| "loss": 0.6633, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": 0.0740966796875, |
| "rewards/margins": 0.0634307861328125, |
| "rewards/rejected": 0.010618209838867188, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1344, |
| "grad_norm": 7.529482042313781, |
| "learning_rate": 6.976334252096801e-07, |
| "logits/chosen": -1.2216796875, |
| "logits/rejected": -1.26123046875, |
| "logps/chosen": -304.875, |
| "logps/rejected": -337.625, |
| "loss": 0.6731, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": 0.0610198974609375, |
| "rewards/margins": 0.043567657470703125, |
| "rewards/rejected": 0.0174560546875, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.13568, |
| "grad_norm": 7.287196640480614, |
| "learning_rate": 6.974481059343188e-07, |
| "logits/chosen": -1.240234375, |
| "logits/rejected": -1.224609375, |
| "logps/chosen": -338.25, |
| "logps/rejected": -301.125, |
| "loss": 0.6694, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": 0.0517730712890625, |
| "rewards/margins": 0.0513458251953125, |
| "rewards/rejected": 0.000377655029296875, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.13696, |
| "grad_norm": 7.457935260315348, |
| "learning_rate": 6.972558281731654e-07, |
| "logits/chosen": -1.18359375, |
| "logits/rejected": -1.2529296875, |
| "logps/chosen": -308.125, |
| "logps/rejected": -343.875, |
| "loss": 0.6727, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": 0.03688812255859375, |
| "rewards/margins": 0.04468536376953125, |
| "rewards/rejected": -0.007733345031738281, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.13824, |
| "grad_norm": 7.110683172273849, |
| "learning_rate": 6.970565957770455e-07, |
| "logits/chosen": -1.2783203125, |
| "logits/rejected": -1.279296875, |
| "logps/chosen": -325.375, |
| "logps/rejected": -300.75, |
| "loss": 0.6628, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": 0.04693603515625, |
| "rewards/margins": 0.06512451171875, |
| "rewards/rejected": -0.01806640625, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.13952, |
| "grad_norm": 7.4393288593299935, |
| "learning_rate": 6.96850412736068e-07, |
| "logits/chosen": -1.1689453125, |
| "logits/rejected": -1.20166015625, |
| "logps/chosen": -293.75, |
| "logps/rejected": -323.25, |
| "loss": 0.6661, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": 0.03907012939453125, |
| "rewards/margins": 0.056720733642578125, |
| "rewards/rejected": -0.0177459716796875, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.1408, |
| "grad_norm": 7.027414643492787, |
| "learning_rate": 6.96637283179545e-07, |
| "logits/chosen": -1.1953125, |
| "logits/rejected": -1.2119140625, |
| "logps/chosen": -319.25, |
| "logps/rejected": -312.0, |
| "loss": 0.6673, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.024829864501953125, |
| "rewards/margins": 0.054996490478515625, |
| "rewards/rejected": -0.030157089233398438, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.14208, |
| "grad_norm": 7.3061024388552065, |
| "learning_rate": 6.9641721137591e-07, |
| "logits/chosen": -1.1865234375, |
| "logits/rejected": -1.1982421875, |
| "logps/chosen": -347.5, |
| "logps/rejected": -333.5, |
| "loss": 0.653, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": 0.02840423583984375, |
| "rewards/margins": 0.085723876953125, |
| "rewards/rejected": -0.057373046875, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.14336, |
| "grad_norm": 7.218209771794371, |
| "learning_rate": 6.961902017326311e-07, |
| "logits/chosen": -1.14892578125, |
| "logits/rejected": -1.22119140625, |
| "logps/chosen": -290.5, |
| "logps/rejected": -310.375, |
| "loss": 0.6562, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": 0.0299224853515625, |
| "rewards/margins": 0.07830810546875, |
| "rewards/rejected": -0.0483551025390625, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.14464, |
| "grad_norm": 7.635227167652353, |
| "learning_rate": 6.959562587961234e-07, |
| "logits/chosen": -1.14794921875, |
| "logits/rejected": -1.17919921875, |
| "logps/chosen": -305.5, |
| "logps/rejected": -329.25, |
| "loss": 0.658, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": 0.0113372802734375, |
| "rewards/margins": 0.076263427734375, |
| "rewards/rejected": -0.06497573852539062, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.14592, |
| "grad_norm": 7.086183935410638, |
| "learning_rate": 6.957153872516586e-07, |
| "logits/chosen": -1.1669921875, |
| "logits/rejected": -1.236328125, |
| "logps/chosen": -334.25, |
| "logps/rejected": -324.25, |
| "loss": 0.667, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": 0.0051097869873046875, |
| "rewards/margins": 0.05683135986328125, |
| "rewards/rejected": -0.05169677734375, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.1472, |
| "grad_norm": 6.902888342176391, |
| "learning_rate": 6.954675919232694e-07, |
| "logits/chosen": -1.20703125, |
| "logits/rejected": -1.23828125, |
| "logps/chosen": -307.75, |
| "logps/rejected": -292.125, |
| "loss": 0.6636, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": 0.0019683837890625, |
| "rewards/margins": 0.064910888671875, |
| "rewards/rejected": -0.06283187866210938, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.14848, |
| "grad_norm": 6.9983913393596735, |
| "learning_rate": 6.95212877773655e-07, |
| "logits/chosen": -1.1494140625, |
| "logits/rejected": -1.16650390625, |
| "logps/chosen": -316.25, |
| "logps/rejected": -320.25, |
| "loss": 0.6636, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.01812744140625, |
| "rewards/margins": 0.06414794921875, |
| "rewards/rejected": -0.082275390625, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.14976, |
| "grad_norm": 7.039439125767687, |
| "learning_rate": 6.949512499040799e-07, |
| "logits/chosen": -1.205078125, |
| "logits/rejected": -1.2041015625, |
| "logps/chosen": -314.0, |
| "logps/rejected": -309.75, |
| "loss": 0.6702, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.03998565673828125, |
| "rewards/margins": 0.051842689514160156, |
| "rewards/rejected": -0.091796875, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.15104, |
| "grad_norm": 7.433508336249601, |
| "learning_rate": 6.946827135542728e-07, |
| "logits/chosen": -1.09521484375, |
| "logits/rejected": -1.140625, |
| "logps/chosen": -310.25, |
| "logps/rejected": -334.625, |
| "loss": 0.6686, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.051239013671875, |
| "rewards/margins": 0.05425071716308594, |
| "rewards/rejected": -0.105438232421875, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.15232, |
| "grad_norm": 7.340968519858034, |
| "learning_rate": 6.944072741023215e-07, |
| "logits/chosen": -1.1787109375, |
| "logits/rejected": -1.2099609375, |
| "logps/chosen": -325.75, |
| "logps/rejected": -330.5, |
| "loss": 0.6539, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.0429840087890625, |
| "rewards/margins": 0.084930419921875, |
| "rewards/rejected": -0.1279296875, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.1536, |
| "grad_norm": 7.136653505773104, |
| "learning_rate": 6.941249370645649e-07, |
| "logits/chosen": -1.1865234375, |
| "logits/rejected": -1.171875, |
| "logps/chosen": -329.25, |
| "logps/rejected": -323.75, |
| "loss": 0.6649, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -0.05987548828125, |
| "rewards/margins": 0.06435394287109375, |
| "rewards/rejected": -0.12432861328125, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.15488, |
| "grad_norm": 7.355481505262627, |
| "learning_rate": 6.938357080954826e-07, |
| "logits/chosen": -1.05419921875, |
| "logits/rejected": -1.076171875, |
| "logps/chosen": -334.25, |
| "logps/rejected": -338.25, |
| "loss": 0.6473, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.03987884521484375, |
| "rewards/margins": 0.09857177734375, |
| "rewards/rejected": -0.1387939453125, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.15616, |
| "grad_norm": 7.263830648748775, |
| "learning_rate": 6.935395929875821e-07, |
| "logits/chosen": -1.17333984375, |
| "logits/rejected": -1.1708984375, |
| "logps/chosen": -335.75, |
| "logps/rejected": -332.0, |
| "loss": 0.6578, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0830535888671875, |
| "rewards/margins": 0.077911376953125, |
| "rewards/rejected": -0.16094970703125, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.15744, |
| "grad_norm": 8.404772457823935, |
| "learning_rate": 6.932365976712819e-07, |
| "logits/chosen": -1.09716796875, |
| "logits/rejected": -1.13525390625, |
| "logps/chosen": -315.875, |
| "logps/rejected": -359.0, |
| "loss": 0.6414, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.076934814453125, |
| "rewards/margins": 0.115936279296875, |
| "rewards/rejected": -0.19287109375, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.15872, |
| "grad_norm": 7.621326463499134, |
| "learning_rate": 6.929267282147936e-07, |
| "logits/chosen": -1.07373046875, |
| "logits/rejected": -1.11767578125, |
| "logps/chosen": -344.125, |
| "logps/rejected": -355.0, |
| "loss": 0.6457, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -0.08642578125, |
| "rewards/margins": 0.1038818359375, |
| "rewards/rejected": -0.1903076171875, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 7.788922626286128, |
| "learning_rate": 6.926099908240002e-07, |
| "logits/chosen": -1.142578125, |
| "logits/rejected": -1.1640625, |
| "logps/chosen": -328.5, |
| "logps/rejected": -356.75, |
| "loss": 0.6583, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.102294921875, |
| "rewards/margins": 0.0783233642578125, |
| "rewards/rejected": -0.18048095703125, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.16128, |
| "grad_norm": 7.473164766096368, |
| "learning_rate": 6.922863918423311e-07, |
| "logits/chosen": -1.12744140625, |
| "logits/rejected": -1.1103515625, |
| "logps/chosen": -343.25, |
| "logps/rejected": -337.75, |
| "loss": 0.6575, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.12322998046875, |
| "rewards/margins": 0.080108642578125, |
| "rewards/rejected": -0.2030029296875, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.16256, |
| "grad_norm": 7.64697192150379, |
| "learning_rate": 6.919559377506359e-07, |
| "logits/chosen": -1.177734375, |
| "logits/rejected": -1.21484375, |
| "logps/chosen": -333.5, |
| "logps/rejected": -354.75, |
| "loss": 0.6485, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.13275146484375, |
| "rewards/margins": 0.10205078125, |
| "rewards/rejected": -0.2347412109375, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.16384, |
| "grad_norm": 7.253778836602147, |
| "learning_rate": 6.916186351670546e-07, |
| "logits/chosen": -1.12353515625, |
| "logits/rejected": -1.16650390625, |
| "logps/chosen": -318.625, |
| "logps/rejected": -313.5, |
| "loss": 0.6458, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.16357421875, |
| "rewards/margins": 0.106414794921875, |
| "rewards/rejected": -0.2698974609375, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.16512, |
| "grad_norm": 7.447630373123696, |
| "learning_rate": 6.91274490846884e-07, |
| "logits/chosen": -1.12158203125, |
| "logits/rejected": -1.10791015625, |
| "logps/chosen": -333.75, |
| "logps/rejected": -306.625, |
| "loss": 0.6538, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.17535400390625, |
| "rewards/margins": 0.088592529296875, |
| "rewards/rejected": -0.263916015625, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.1664, |
| "grad_norm": 7.4679011809033815, |
| "learning_rate": 6.90923511682444e-07, |
| "logits/chosen": -1.1396484375, |
| "logits/rejected": -1.171875, |
| "logps/chosen": -331.75, |
| "logps/rejected": -361.5, |
| "loss": 0.6447, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.19451904296875, |
| "rewards/margins": 0.112060546875, |
| "rewards/rejected": -0.3065185546875, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.16768, |
| "grad_norm": 7.362515597925838, |
| "learning_rate": 6.905657047029384e-07, |
| "logits/chosen": -1.087890625, |
| "logits/rejected": -1.1240234375, |
| "logps/chosen": -324.75, |
| "logps/rejected": -308.875, |
| "loss": 0.6387, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.2166748046875, |
| "rewards/margins": 0.126708984375, |
| "rewards/rejected": -0.34326171875, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.16896, |
| "grad_norm": 7.770937607646877, |
| "learning_rate": 6.90201077074314e-07, |
| "logits/chosen": -1.1044921875, |
| "logits/rejected": -1.1640625, |
| "logps/chosen": -329.25, |
| "logps/rejected": -347.25, |
| "loss": 0.6436, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.2451171875, |
| "rewards/margins": 0.11639404296875, |
| "rewards/rejected": -0.36181640625, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.17024, |
| "grad_norm": 7.943995279771987, |
| "learning_rate": 6.898296360991182e-07, |
| "logits/chosen": -1.10205078125, |
| "logits/rejected": -1.13330078125, |
| "logps/chosen": -356.25, |
| "logps/rejected": -357.25, |
| "loss": 0.6337, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.2764892578125, |
| "rewards/margins": 0.13845062255859375, |
| "rewards/rejected": -0.414794921875, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.17152, |
| "grad_norm": 8.15643307389567, |
| "learning_rate": 6.894513892163518e-07, |
| "logits/chosen": -1.0361328125, |
| "logits/rejected": -1.05517578125, |
| "logps/chosen": -372.0, |
| "logps/rejected": -356.75, |
| "loss": 0.6553, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.291015625, |
| "rewards/margins": 0.0922698974609375, |
| "rewards/rejected": -0.38330078125, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.1728, |
| "grad_norm": 8.402149118568568, |
| "learning_rate": 6.890663440013204e-07, |
| "logits/chosen": -1.07666015625, |
| "logits/rejected": -1.1103515625, |
| "logps/chosen": -367.25, |
| "logps/rejected": -380.5, |
| "loss": 0.636, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.3759765625, |
| "rewards/margins": 0.14380645751953125, |
| "rewards/rejected": -0.519287109375, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.17408, |
| "grad_norm": 7.676163099722455, |
| "learning_rate": 6.886745081654823e-07, |
| "logits/chosen": -1.06005859375, |
| "logits/rejected": -1.076171875, |
| "logps/chosen": -365.25, |
| "logps/rejected": -364.125, |
| "loss": 0.6389, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.3699951171875, |
| "rewards/margins": 0.129241943359375, |
| "rewards/rejected": -0.498779296875, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.17536, |
| "grad_norm": 7.792721927404631, |
| "learning_rate": 6.882758895562949e-07, |
| "logits/chosen": -0.97998046875, |
| "logits/rejected": -1.05126953125, |
| "logps/chosen": -360.25, |
| "logps/rejected": -366.25, |
| "loss": 0.6345, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -0.42626953125, |
| "rewards/margins": 0.145538330078125, |
| "rewards/rejected": -0.572021484375, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.17664, |
| "grad_norm": 13.220416758370622, |
| "learning_rate": 6.878704961570564e-07, |
| "logits/chosen": -0.9208984375, |
| "logits/rejected": -0.9052734375, |
| "logps/chosen": -427.75, |
| "logps/rejected": -412.75, |
| "loss": 0.6566, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.642822265625, |
| "rewards/margins": 0.10161972045898438, |
| "rewards/rejected": -0.744140625, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.17792, |
| "grad_norm": 8.66931437809148, |
| "learning_rate": 6.874583360867468e-07, |
| "logits/chosen": -0.8505859375, |
| "logits/rejected": -0.88330078125, |
| "logps/chosen": -399.0, |
| "logps/rejected": -415.0, |
| "loss": 0.6271, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.6767578125, |
| "rewards/margins": 0.1732177734375, |
| "rewards/rejected": -0.849609375, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.1792, |
| "grad_norm": 8.526440571507557, |
| "learning_rate": 6.87039417599865e-07, |
| "logits/chosen": -0.875, |
| "logits/rejected": -0.91796875, |
| "logps/chosen": -376.25, |
| "logps/rejected": -390.25, |
| "loss": 0.6212, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -0.6337890625, |
| "rewards/margins": 0.18011474609375, |
| "rewards/rejected": -0.8134765625, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.18048, |
| "grad_norm": 9.392422913412458, |
| "learning_rate": 6.866137490862636e-07, |
| "logits/chosen": -0.78857421875, |
| "logits/rejected": -0.8232421875, |
| "logps/chosen": -392.25, |
| "logps/rejected": -399.25, |
| "loss": 0.6189, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.6796875, |
| "rewards/margins": 0.19610595703125, |
| "rewards/rejected": -0.87646484375, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.18176, |
| "grad_norm": 9.326150506259697, |
| "learning_rate": 6.861813390709803e-07, |
| "logits/chosen": -0.71630859375, |
| "logits/rejected": -0.78125, |
| "logps/chosen": -369.5, |
| "logps/rejected": -404.25, |
| "loss": 0.6118, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7138671875, |
| "rewards/margins": 0.21263885498046875, |
| "rewards/rejected": -0.92529296875, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.18304, |
| "grad_norm": 8.384403124201002, |
| "learning_rate": 6.857421962140681e-07, |
| "logits/chosen": -0.8466796875, |
| "logits/rejected": -0.87451171875, |
| "logps/chosen": -408.5, |
| "logps/rejected": -402.0, |
| "loss": 0.6247, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -0.72265625, |
| "rewards/margins": 0.19293212890625, |
| "rewards/rejected": -0.91552734375, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.18432, |
| "grad_norm": 9.388640196159205, |
| "learning_rate": 6.852963293104211e-07, |
| "logits/chosen": -0.94677734375, |
| "logits/rejected": -0.9345703125, |
| "logps/chosen": -389.25, |
| "logps/rejected": -378.0, |
| "loss": 0.621, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.6689453125, |
| "rewards/margins": 0.1993408203125, |
| "rewards/rejected": -0.86865234375, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.1856, |
| "grad_norm": 11.03326645275443, |
| "learning_rate": 6.848437472895988e-07, |
| "logits/chosen": -0.9150390625, |
| "logits/rejected": -1.00146484375, |
| "logps/chosen": -386.75, |
| "logps/rejected": -431.25, |
| "loss": 0.5984, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.642578125, |
| "rewards/margins": 0.234375, |
| "rewards/rejected": -0.876953125, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.18688, |
| "grad_norm": 61.60360857590808, |
| "learning_rate": 6.843844592156471e-07, |
| "logits/chosen": -0.83984375, |
| "logits/rejected": -0.96044921875, |
| "logps/chosen": -348.5, |
| "logps/rejected": -454.75, |
| "loss": 0.6328, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.7197265625, |
| "rewards/margins": 0.17962646484375, |
| "rewards/rejected": -0.89892578125, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.18816, |
| "grad_norm": 15.422965574908865, |
| "learning_rate": 6.839184742869166e-07, |
| "logits/chosen": -0.86328125, |
| "logits/rejected": -0.85986328125, |
| "logps/chosen": -401.25, |
| "logps/rejected": -429.5, |
| "loss": 0.5783, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -0.79638671875, |
| "rewards/margins": 0.29449462890625, |
| "rewards/rejected": -1.08984375, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.18944, |
| "grad_norm": 13.374494698966174, |
| "learning_rate": 6.834458018358787e-07, |
| "logits/chosen": -0.8583984375, |
| "logits/rejected": -0.83056640625, |
| "logps/chosen": -438.25, |
| "logps/rejected": -396.25, |
| "loss": 0.6247, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.9375, |
| "rewards/margins": 0.221160888671875, |
| "rewards/rejected": -1.15869140625, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.19072, |
| "grad_norm": 10.50579381133434, |
| "learning_rate": 6.829664513289386e-07, |
| "logits/chosen": -0.6806640625, |
| "logits/rejected": -0.696533203125, |
| "logps/chosen": -429.25, |
| "logps/rejected": -445.0, |
| "loss": 0.6204, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.02392578125, |
| "rewards/margins": 0.21826171875, |
| "rewards/rejected": -1.24072265625, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 8.912036503648771, |
| "learning_rate": 6.824804323662456e-07, |
| "logits/chosen": -0.833984375, |
| "logits/rejected": -0.85009765625, |
| "logps/chosen": -450.5, |
| "logps/rejected": -462.75, |
| "loss": 0.6157, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -1.15869140625, |
| "rewards/margins": 0.23162841796875, |
| "rewards/rejected": -1.3916015625, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.19328, |
| "grad_norm": 8.776101733274905, |
| "learning_rate": 6.819877546815008e-07, |
| "logits/chosen": -0.8564453125, |
| "logits/rejected": -0.875, |
| "logps/chosen": -438.25, |
| "logps/rejected": -447.25, |
| "loss": 0.6202, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -1.07470703125, |
| "rewards/margins": 0.23724365234375, |
| "rewards/rejected": -1.3115234375, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.19456, |
| "grad_norm": 10.578778147443705, |
| "learning_rate": 6.814884281417626e-07, |
| "logits/chosen": -0.8427734375, |
| "logits/rejected": -0.8701171875, |
| "logps/chosen": -434.75, |
| "logps/rejected": -448.25, |
| "loss": 0.591, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -1.03515625, |
| "rewards/margins": 0.2916259765625, |
| "rewards/rejected": -1.3251953125, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.19584, |
| "grad_norm": 8.049518104374286, |
| "learning_rate": 6.809824627472483e-07, |
| "logits/chosen": -0.8037109375, |
| "logits/rejected": -0.8603515625, |
| "logps/chosen": -419.5, |
| "logps/rejected": -428.5, |
| "loss": 0.5812, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.02392578125, |
| "rewards/margins": 0.31982421875, |
| "rewards/rejected": -1.34375, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.19712, |
| "grad_norm": 8.793213481082436, |
| "learning_rate": 6.804698686311346e-07, |
| "logits/chosen": -0.87255859375, |
| "logits/rejected": -0.85888671875, |
| "logps/chosen": -437.75, |
| "logps/rejected": -433.5, |
| "loss": 0.6043, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -1.11376953125, |
| "rewards/margins": 0.26483154296875, |
| "rewards/rejected": -1.37939453125, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.1984, |
| "grad_norm": 17.16055737002578, |
| "learning_rate": 6.79950656059354e-07, |
| "logits/chosen": -0.888427734375, |
| "logits/rejected": -0.934326171875, |
| "logps/chosen": -470.75, |
| "logps/rejected": -460.25, |
| "loss": 0.5672, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -1.1201171875, |
| "rewards/margins": 0.353271484375, |
| "rewards/rejected": -1.474609375, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.19968, |
| "grad_norm": 8.561528637211719, |
| "learning_rate": 6.794248354303899e-07, |
| "logits/chosen": -0.80615234375, |
| "logits/rejected": -0.83203125, |
| "logps/chosen": -441.75, |
| "logps/rejected": -466.25, |
| "loss": 0.6081, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -1.259765625, |
| "rewards/margins": 0.260406494140625, |
| "rewards/rejected": -1.5205078125, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.20096, |
| "grad_norm": 8.69093146968424, |
| "learning_rate": 6.788924172750679e-07, |
| "logits/chosen": -0.87646484375, |
| "logits/rejected": -0.9169921875, |
| "logps/chosen": -437.5, |
| "logps/rejected": -462.25, |
| "loss": 0.5753, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -1.1572265625, |
| "rewards/margins": 0.3331298828125, |
| "rewards/rejected": -1.48828125, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.20224, |
| "grad_norm": 8.31733029290186, |
| "learning_rate": 6.783534122563447e-07, |
| "logits/chosen": -0.7666015625, |
| "logits/rejected": -0.8388671875, |
| "logps/chosen": -427.5, |
| "logps/rejected": -477.0, |
| "loss": 0.5649, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.2607421875, |
| "rewards/margins": 0.38958740234375, |
| "rewards/rejected": -1.6484375, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.20352, |
| "grad_norm": 11.171939635720042, |
| "learning_rate": 6.77807831169095e-07, |
| "logits/chosen": -0.802734375, |
| "logits/rejected": -0.86181640625, |
| "logps/chosen": -476.75, |
| "logps/rejected": -498.0, |
| "loss": 0.5918, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -1.4375, |
| "rewards/margins": 0.305908203125, |
| "rewards/rejected": -1.7431640625, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.2048, |
| "grad_norm": 8.281097141476181, |
| "learning_rate": 6.772556849398952e-07, |
| "logits/chosen": -0.843994140625, |
| "logits/rejected": -0.884765625, |
| "logps/chosen": -505.75, |
| "logps/rejected": -551.75, |
| "loss": 0.5537, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -1.4814453125, |
| "rewards/margins": 0.419677734375, |
| "rewards/rejected": -1.9033203125, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.20608, |
| "grad_norm": 10.239198232842558, |
| "learning_rate": 6.766969846268044e-07, |
| "logits/chosen": -0.7734375, |
| "logits/rejected": -0.830078125, |
| "logps/chosen": -459.5, |
| "logps/rejected": -488.75, |
| "loss": 0.6061, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -1.57373046875, |
| "rewards/margins": 0.25689697265625, |
| "rewards/rejected": -1.830078125, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.20736, |
| "grad_norm": 9.747342835599794, |
| "learning_rate": 6.761317414191428e-07, |
| "logits/chosen": -0.8076171875, |
| "logits/rejected": -0.8447265625, |
| "logps/chosen": -483.75, |
| "logps/rejected": -545.25, |
| "loss": 0.5981, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -1.779296875, |
| "rewards/margins": 0.2919921875, |
| "rewards/rejected": -2.072265625, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.20864, |
| "grad_norm": 9.525902367060457, |
| "learning_rate": 6.755599666372684e-07, |
| "logits/chosen": -0.80859375, |
| "logits/rejected": -0.8984375, |
| "logps/chosen": -454.5, |
| "logps/rejected": -502.25, |
| "loss": 0.582, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -1.671875, |
| "rewards/margins": 0.3372802734375, |
| "rewards/rejected": -2.0078125, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.20992, |
| "grad_norm": 10.81029592784359, |
| "learning_rate": 6.749816717323492e-07, |
| "logits/chosen": -0.7578125, |
| "logits/rejected": -0.8056640625, |
| "logps/chosen": -518.75, |
| "logps/rejected": -571.0, |
| "loss": 0.5573, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -1.8740234375, |
| "rewards/margins": 0.4305419921875, |
| "rewards/rejected": -2.3046875, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.2112, |
| "grad_norm": 8.72875920033335, |
| "learning_rate": 6.743968682861345e-07, |
| "logits/chosen": -0.75830078125, |
| "logits/rejected": -0.82470703125, |
| "logps/chosen": -456.5, |
| "logps/rejected": -518.25, |
| "loss": 0.5457, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -1.642578125, |
| "rewards/margins": 0.4796142578125, |
| "rewards/rejected": -2.123046875, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.21248, |
| "grad_norm": 12.52887713385305, |
| "learning_rate": 6.738055680107232e-07, |
| "logits/chosen": -0.8310546875, |
| "logits/rejected": -0.83740234375, |
| "logps/chosen": -502.75, |
| "logps/rejected": -525.25, |
| "loss": 0.6151, |
| "rewards/accuracies": 0.6015625, |
| "rewards/chosen": -1.794921875, |
| "rewards/margins": 0.2906494140625, |
| "rewards/rejected": -2.083984375, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.21376, |
| "grad_norm": 12.573683619458626, |
| "learning_rate": 6.732077827483283e-07, |
| "logits/chosen": -0.79052734375, |
| "logits/rejected": -0.7919921875, |
| "logps/chosen": -510.75, |
| "logps/rejected": -537.5, |
| "loss": 0.6154, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -1.8447265625, |
| "rewards/margins": 0.3406219482421875, |
| "rewards/rejected": -2.185546875, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.21504, |
| "grad_norm": 8.275606690031259, |
| "learning_rate": 6.726035244710405e-07, |
| "logits/chosen": -0.796142578125, |
| "logits/rejected": -0.814453125, |
| "logps/chosen": -465.5, |
| "logps/rejected": -497.0, |
| "loss": 0.5702, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -1.51953125, |
| "rewards/margins": 0.4149169921875, |
| "rewards/rejected": -1.93359375, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.21632, |
| "grad_norm": 10.111116876816661, |
| "learning_rate": 6.719928052805885e-07, |
| "logits/chosen": -0.83642578125, |
| "logits/rejected": -0.841796875, |
| "logps/chosen": -464.0, |
| "logps/rejected": -512.75, |
| "loss": 0.5484, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.36328125, |
| "rewards/margins": 0.4599609375, |
| "rewards/rejected": -1.82421875, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.2176, |
| "grad_norm": 14.001498683826862, |
| "learning_rate": 6.713756374080959e-07, |
| "logits/chosen": -0.8994140625, |
| "logits/rejected": -0.9580078125, |
| "logps/chosen": -458.75, |
| "logps/rejected": -498.5, |
| "loss": 0.5747, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -1.2998046875, |
| "rewards/margins": 0.36627197265625, |
| "rewards/rejected": -1.6669921875, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.21888, |
| "grad_norm": 13.154660100000475, |
| "learning_rate": 6.70752033213837e-07, |
| "logits/chosen": -0.88525390625, |
| "logits/rejected": -0.93603515625, |
| "logps/chosen": -467.0, |
| "logps/rejected": -471.5, |
| "loss": 0.5537, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -1.3125, |
| "rewards/margins": 0.42266845703125, |
| "rewards/rejected": -1.7353515625, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.22016, |
| "grad_norm": 11.432368493570536, |
| "learning_rate": 6.70122005186989e-07, |
| "logits/chosen": -0.79443359375, |
| "logits/rejected": -0.85107421875, |
| "logps/chosen": -481.5, |
| "logps/rejected": -537.0, |
| "loss": 0.5738, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -1.56640625, |
| "rewards/margins": 0.3822021484375, |
| "rewards/rejected": -1.9443359375, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.22144, |
| "grad_norm": 9.822098330205291, |
| "learning_rate": 6.694855659453818e-07, |
| "logits/chosen": -0.84765625, |
| "logits/rejected": -0.84912109375, |
| "logps/chosen": -511.75, |
| "logps/rejected": -511.5, |
| "loss": 0.5899, |
| "rewards/accuracies": 0.6640625, |
| "rewards/chosen": -1.70703125, |
| "rewards/margins": 0.35321044921875, |
| "rewards/rejected": -2.0595703125, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.22272, |
| "grad_norm": 15.790896268251576, |
| "learning_rate": 6.688427282352449e-07, |
| "logits/chosen": -0.755859375, |
| "logits/rejected": -0.80126953125, |
| "logps/chosen": -492.25, |
| "logps/rejected": -520.5, |
| "loss": 0.5308, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -1.7265625, |
| "rewards/margins": 0.4969482421875, |
| "rewards/rejected": -2.2255859375, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 10.091430688703294, |
| "learning_rate": 6.681935049309533e-07, |
| "logits/chosen": -0.601318359375, |
| "logits/rejected": -0.64306640625, |
| "logps/chosen": -560.0, |
| "logps/rejected": -612.5, |
| "loss": 0.5713, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -2.1953125, |
| "rewards/margins": 0.515625, |
| "rewards/rejected": -2.7109375, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.22528, |
| "grad_norm": 9.197272514290859, |
| "learning_rate": 6.675379090347682e-07, |
| "logits/chosen": -0.64501953125, |
| "logits/rejected": -0.674560546875, |
| "logps/chosen": -608.75, |
| "logps/rejected": -654.0, |
| "loss": 0.5305, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -2.6953125, |
| "rewards/margins": 0.660400390625, |
| "rewards/rejected": -3.35546875, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.22656, |
| "grad_norm": 9.460572041512634, |
| "learning_rate": 6.668759536765779e-07, |
| "logits/chosen": -0.6484375, |
| "logits/rejected": -0.671630859375, |
| "logps/chosen": -600.5, |
| "logps/rejected": -642.0, |
| "loss": 0.567, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -2.6953125, |
| "rewards/margins": 0.5928955078125, |
| "rewards/rejected": -3.291015625, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.22784, |
| "grad_norm": 40.385948070557, |
| "learning_rate": 6.662076521136337e-07, |
| "logits/chosen": -0.5048828125, |
| "logits/rejected": -0.554443359375, |
| "logps/chosen": -604.0, |
| "logps/rejected": -661.75, |
| "loss": 0.5893, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -2.9296875, |
| "rewards/margins": 0.66015625, |
| "rewards/rejected": -3.58984375, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.22912, |
| "grad_norm": 22.98261625779329, |
| "learning_rate": 6.655330177302857e-07, |
| "logits/chosen": -0.63525390625, |
| "logits/rejected": -0.71435546875, |
| "logps/chosen": -649.5, |
| "logps/rejected": -725.0, |
| "loss": 0.5748, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.234375, |
| "rewards/margins": 0.593994140625, |
| "rewards/rejected": -3.830078125, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.2304, |
| "grad_norm": 8.847574852157168, |
| "learning_rate": 6.64852064037713e-07, |
| "logits/chosen": -0.6103515625, |
| "logits/rejected": -0.654296875, |
| "logps/chosen": -607.0, |
| "logps/rejected": -687.0, |
| "loss": 0.5131, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.97265625, |
| "rewards/margins": 0.7249755859375, |
| "rewards/rejected": -3.6953125, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.23168, |
| "grad_norm": 9.669090706723097, |
| "learning_rate": 6.641648046736549e-07, |
| "logits/chosen": -0.62060546875, |
| "logits/rejected": -0.67236328125, |
| "logps/chosen": -644.5, |
| "logps/rejected": -689.5, |
| "loss": 0.5648, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -3.06640625, |
| "rewards/margins": 0.6990966796875, |
| "rewards/rejected": -3.765625, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.23296, |
| "grad_norm": 9.666414731608148, |
| "learning_rate": 6.634712534021367e-07, |
| "logits/chosen": -0.584716796875, |
| "logits/rejected": -0.6240234375, |
| "logps/chosen": -589.0, |
| "logps/rejected": -635.5, |
| "loss": 0.5225, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -2.626953125, |
| "rewards/margins": 0.66064453125, |
| "rewards/rejected": -3.291015625, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.23424, |
| "grad_norm": 9.206989504196308, |
| "learning_rate": 6.627714241131942e-07, |
| "logits/chosen": -0.568115234375, |
| "logits/rejected": -0.587158203125, |
| "logps/chosen": -609.25, |
| "logps/rejected": -633.5, |
| "loss": 0.5513, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -2.73828125, |
| "rewards/margins": 0.598876953125, |
| "rewards/rejected": -3.333984375, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.23552, |
| "grad_norm": 39.055918050847936, |
| "learning_rate": 6.620653308225959e-07, |
| "logits/chosen": -0.563232421875, |
| "logits/rejected": -0.642578125, |
| "logps/chosen": -568.5, |
| "logps/rejected": -614.5, |
| "loss": 0.6409, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -2.60546875, |
| "rewards/margins": 0.454833984375, |
| "rewards/rejected": -3.060546875, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.2368, |
| "grad_norm": 15.55460973147395, |
| "learning_rate": 6.613529876715619e-07, |
| "logits/chosen": -0.669189453125, |
| "logits/rejected": -0.71875, |
| "logps/chosen": -591.5, |
| "logps/rejected": -631.0, |
| "loss": 0.529, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -2.4326171875, |
| "rewards/margins": 0.650634765625, |
| "rewards/rejected": -3.080078125, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.23808, |
| "grad_norm": 8.09681757649616, |
| "learning_rate": 6.606344089264805e-07, |
| "logits/chosen": -0.70849609375, |
| "logits/rejected": -0.7412109375, |
| "logps/chosen": -568.25, |
| "logps/rejected": -573.5, |
| "loss": 0.5365, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -2.1474609375, |
| "rewards/margins": 0.56982421875, |
| "rewards/rejected": -2.716796875, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.23936, |
| "grad_norm": 8.312763295596206, |
| "learning_rate": 6.599096089786234e-07, |
| "logits/chosen": -0.8388671875, |
| "logits/rejected": -0.873046875, |
| "logps/chosen": -519.5, |
| "logps/rejected": -573.25, |
| "loss": 0.5779, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -2.048828125, |
| "rewards/margins": 0.4176025390625, |
| "rewards/rejected": -2.4677734375, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.24064, |
| "grad_norm": 8.587134768192069, |
| "learning_rate": 6.591786023438564e-07, |
| "logits/chosen": -0.638519287109375, |
| "logits/rejected": -0.7158203125, |
| "logps/chosen": -489.75, |
| "logps/rejected": -564.75, |
| "loss": 0.5731, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -1.8544921875, |
| "rewards/margins": 0.45361328125, |
| "rewards/rejected": -2.3056640625, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.24192, |
| "grad_norm": 13.116768076581428, |
| "learning_rate": 6.584414036623496e-07, |
| "logits/chosen": -0.89697265625, |
| "logits/rejected": -0.98388671875, |
| "logps/chosen": -494.0, |
| "logps/rejected": -540.0, |
| "loss": 0.5309, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.63671875, |
| "rewards/margins": 0.5660400390625, |
| "rewards/rejected": -2.203125, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.2432, |
| "grad_norm": 9.963354836403468, |
| "learning_rate": 6.576980276982832e-07, |
| "logits/chosen": -0.82666015625, |
| "logits/rejected": -0.91162109375, |
| "logps/chosen": -469.5, |
| "logps/rejected": -498.25, |
| "loss": 0.5733, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -1.67578125, |
| "rewards/margins": 0.40509033203125, |
| "rewards/rejected": -2.08203125, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.24448, |
| "grad_norm": 9.679252853242984, |
| "learning_rate": 6.569484893395527e-07, |
| "logits/chosen": -0.92138671875, |
| "logits/rejected": -0.974609375, |
| "logps/chosen": -504.75, |
| "logps/rejected": -556.5, |
| "loss": 0.559, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -1.8671875, |
| "rewards/margins": 0.467041015625, |
| "rewards/rejected": -2.333984375, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.24576, |
| "grad_norm": 11.180357368507915, |
| "learning_rate": 6.561928035974705e-07, |
| "logits/chosen": -0.875, |
| "logits/rejected": -0.89599609375, |
| "logps/chosen": -547.25, |
| "logps/rejected": -599.25, |
| "loss": 0.5339, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -2.015625, |
| "rewards/margins": 0.6204833984375, |
| "rewards/rejected": -2.634765625, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.24704, |
| "grad_norm": 11.453973983300482, |
| "learning_rate": 6.55430985606465e-07, |
| "logits/chosen": -0.84228515625, |
| "logits/rejected": -0.87890625, |
| "logps/chosen": -550.25, |
| "logps/rejected": -584.5, |
| "loss": 0.5265, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -2.18359375, |
| "rewards/margins": 0.672607421875, |
| "rewards/rejected": -2.857421875, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.24832, |
| "grad_norm": 19.413502192585955, |
| "learning_rate": 6.546630506237778e-07, |
| "logits/chosen": -0.7080078125, |
| "logits/rejected": -0.7685546875, |
| "logps/chosen": -565.0, |
| "logps/rejected": -626.75, |
| "loss": 0.5264, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.2333984375, |
| "rewards/margins": 0.642578125, |
| "rewards/rejected": -2.873046875, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.2496, |
| "grad_norm": 11.99148623186324, |
| "learning_rate": 6.538890140291578e-07, |
| "logits/chosen": -0.73046875, |
| "logits/rejected": -0.75048828125, |
| "logps/chosen": -602.25, |
| "logps/rejected": -628.75, |
| "loss": 0.5333, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.5947265625, |
| "rewards/margins": 0.683349609375, |
| "rewards/rejected": -3.27734375, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.25088, |
| "grad_norm": 10.31797612543643, |
| "learning_rate": 6.531088913245536e-07, |
| "logits/chosen": -0.64794921875, |
| "logits/rejected": -0.716796875, |
| "logps/chosen": -635.75, |
| "logps/rejected": -716.0, |
| "loss": 0.4923, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.15625, |
| "rewards/margins": 0.8427734375, |
| "rewards/rejected": -3.99609375, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.25216, |
| "grad_norm": 87.52079256871755, |
| "learning_rate": 6.523226981338026e-07, |
| "logits/chosen": -0.68359375, |
| "logits/rejected": -0.70458984375, |
| "logps/chosen": -738.0, |
| "logps/rejected": -774.0, |
| "loss": 0.6523, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -4.09765625, |
| "rewards/margins": 0.696044921875, |
| "rewards/rejected": -4.794921875, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.25344, |
| "grad_norm": 72.5293740432998, |
| "learning_rate": 6.515304502023185e-07, |
| "logits/chosen": -0.5673828125, |
| "logits/rejected": -0.601318359375, |
| "logps/chosen": -806.5, |
| "logps/rejected": -903.0, |
| "loss": 0.5506, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.96484375, |
| "rewards/margins": 1.0079345703125, |
| "rewards/rejected": -5.97265625, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.25472, |
| "grad_norm": 44.679991429690986, |
| "learning_rate": 6.507321633967758e-07, |
| "logits/chosen": -0.649658203125, |
| "logits/rejected": -0.6904296875, |
| "logps/chosen": -837.0, |
| "logps/rejected": -915.0, |
| "loss": 0.499, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -4.8671875, |
| "rewards/margins": 1.01953125, |
| "rewards/rejected": -5.88671875, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 59.397830971941254, |
| "learning_rate": 6.499278537047919e-07, |
| "logits/chosen": -0.69677734375, |
| "logits/rejected": -0.72998046875, |
| "logps/chosen": -831.0, |
| "logps/rejected": -920.0, |
| "loss": 0.5369, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -5.015625, |
| "rewards/margins": 0.87841796875, |
| "rewards/rejected": -5.8828125, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.256, |
| "eval_logits/chosen": -0.6611328125, |
| "eval_logits/rejected": -0.733642578125, |
| "eval_logps/chosen": -767.75, |
| "eval_logps/rejected": -825.0, |
| "eval_loss": 0.6021875143051147, |
| "eval_rewards/accuracies": 0.69921875, |
| "eval_rewards/chosen": -4.474609375, |
| "eval_rewards/margins": 0.75811767578125, |
| "eval_rewards/rejected": -5.23046875, |
| "eval_runtime": 27.6962, |
| "eval_samples_per_second": 18.053, |
| "eval_steps_per_second": 0.578, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.25728, |
| "grad_norm": 53.24908848058674, |
| "learning_rate": 6.491175372346071e-07, |
| "logits/chosen": -0.671875, |
| "logits/rejected": -0.701171875, |
| "logps/chosen": -777.5, |
| "logps/rejected": -874.5, |
| "loss": 0.5742, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -4.5703125, |
| "rewards/margins": 0.83984375, |
| "rewards/rejected": -5.41015625, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.25856, |
| "grad_norm": 96.55947068044792, |
| "learning_rate": 6.483012302147617e-07, |
| "logits/chosen": -0.691162109375, |
| "logits/rejected": -0.73388671875, |
| "logps/chosen": -718.0, |
| "logps/rejected": -742.0, |
| "loss": 0.715, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -3.8984375, |
| "rewards/margins": 0.52960205078125, |
| "rewards/rejected": -4.42578125, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.25984, |
| "grad_norm": 11.651199302119567, |
| "learning_rate": 6.474789489937715e-07, |
| "logits/chosen": -0.7294921875, |
| "logits/rejected": -0.79931640625, |
| "logps/chosen": -654.0, |
| "logps/rejected": -739.0, |
| "loss": 0.4917, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.275390625, |
| "rewards/margins": 0.88525390625, |
| "rewards/rejected": -4.16015625, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.26112, |
| "grad_norm": 22.98647680803771, |
| "learning_rate": 6.466507100397998e-07, |
| "logits/chosen": -0.763671875, |
| "logits/rejected": -0.79931640625, |
| "logps/chosen": -605.0, |
| "logps/rejected": -657.5, |
| "loss": 0.5948, |
| "rewards/accuracies": 0.6484375, |
| "rewards/chosen": -2.662109375, |
| "rewards/margins": 0.5657958984375, |
| "rewards/rejected": -3.23046875, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.2624, |
| "grad_norm": 8.334836762281965, |
| "learning_rate": 6.458165299403282e-07, |
| "logits/chosen": -0.702880859375, |
| "logits/rejected": -0.7275390625, |
| "logps/chosen": -576.0, |
| "logps/rejected": -620.5, |
| "loss": 0.5457, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.412109375, |
| "rewards/margins": 0.678466796875, |
| "rewards/rejected": -3.08984375, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.26368, |
| "grad_norm": 8.43068729699347, |
| "learning_rate": 6.449764254018236e-07, |
| "logits/chosen": -0.8203125, |
| "logits/rejected": -0.89306640625, |
| "logps/chosen": -557.5, |
| "logps/rejected": -619.5, |
| "loss": 0.5399, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -2.326171875, |
| "rewards/margins": 0.57373046875, |
| "rewards/rejected": -2.8984375, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.26496, |
| "grad_norm": 15.681934350374608, |
| "learning_rate": 6.441304132494045e-07, |
| "logits/chosen": -0.89111328125, |
| "logits/rejected": -0.939453125, |
| "logps/chosen": -506.5, |
| "logps/rejected": -534.75, |
| "loss": 0.527, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.857421875, |
| "rewards/margins": 0.568603515625, |
| "rewards/rejected": -2.427734375, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.26624, |
| "grad_norm": 9.604421155152727, |
| "learning_rate": 6.432785104265033e-07, |
| "logits/chosen": -0.9912109375, |
| "logits/rejected": -1.02783203125, |
| "logps/chosen": -475.25, |
| "logps/rejected": -503.25, |
| "loss": 0.5645, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.501953125, |
| "rewards/margins": 0.432373046875, |
| "rewards/rejected": -1.9345703125, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.26752, |
| "grad_norm": 14.360891651779323, |
| "learning_rate": 6.424207339945278e-07, |
| "logits/chosen": -0.982421875, |
| "logits/rejected": -1.02734375, |
| "logps/chosen": -465.75, |
| "logps/rejected": -502.25, |
| "loss": 0.5318, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -1.5576171875, |
| "rewards/margins": 0.544677734375, |
| "rewards/rejected": -2.1025390625, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.2688, |
| "grad_norm": 13.445955498385377, |
| "learning_rate": 6.41557101132518e-07, |
| "logits/chosen": -0.91162109375, |
| "logits/rejected": -0.9501953125, |
| "logps/chosen": -472.5, |
| "logps/rejected": -518.5, |
| "loss": 0.5033, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.5302734375, |
| "rewards/margins": 0.611328125, |
| "rewards/rejected": -2.142578125, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.27008, |
| "grad_norm": 12.723099981494066, |
| "learning_rate": 6.406876291368041e-07, |
| "logits/chosen": -0.9912109375, |
| "logits/rejected": -1.0634765625, |
| "logps/chosen": -497.0, |
| "logps/rejected": -556.5, |
| "loss": 0.4932, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -1.623046875, |
| "rewards/margins": 0.616455078125, |
| "rewards/rejected": -2.2373046875, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.27136, |
| "grad_norm": 9.020187932187099, |
| "learning_rate": 6.398123354206582e-07, |
| "logits/chosen": -0.91845703125, |
| "logits/rejected": -0.98974609375, |
| "logps/chosen": -500.75, |
| "logps/rejected": -560.0, |
| "loss": 0.4941, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -1.8056640625, |
| "rewards/margins": 0.711669921875, |
| "rewards/rejected": -2.517578125, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.27264, |
| "grad_norm": 8.556186928836153, |
| "learning_rate": 6.389312375139469e-07, |
| "logits/chosen": -0.9033203125, |
| "logits/rejected": -0.96142578125, |
| "logps/chosen": -594.25, |
| "logps/rejected": -617.5, |
| "loss": 0.555, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -2.0859375, |
| "rewards/margins": 0.615234375, |
| "rewards/rejected": -2.69921875, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.27392, |
| "grad_norm": 13.39244807934873, |
| "learning_rate": 6.380443530627797e-07, |
| "logits/chosen": -0.85986328125, |
| "logits/rejected": -0.91552734375, |
| "logps/chosen": -502.25, |
| "logps/rejected": -560.75, |
| "loss": 0.4989, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.9384765625, |
| "rewards/margins": 0.70263671875, |
| "rewards/rejected": -2.63671875, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.2752, |
| "grad_norm": 8.777635188116983, |
| "learning_rate": 6.371516998291552e-07, |
| "logits/chosen": -0.80126953125, |
| "logits/rejected": -0.912109375, |
| "logps/chosen": -518.5, |
| "logps/rejected": -607.5, |
| "loss": 0.4933, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -2.1640625, |
| "rewards/margins": 0.726806640625, |
| "rewards/rejected": -2.890625, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.27648, |
| "grad_norm": 16.262313588962954, |
| "learning_rate": 6.362532956906059e-07, |
| "logits/chosen": -0.7509765625, |
| "logits/rejected": -0.8251953125, |
| "logps/chosen": -543.0, |
| "logps/rejected": -592.0, |
| "loss": 0.4902, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -2.212890625, |
| "rewards/margins": 0.656982421875, |
| "rewards/rejected": -2.87109375, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.27776, |
| "grad_norm": 11.250156251667049, |
| "learning_rate": 6.353491586398404e-07, |
| "logits/chosen": -0.720703125, |
| "logits/rejected": -0.779296875, |
| "logps/chosen": -568.0, |
| "logps/rejected": -639.5, |
| "loss": 0.5265, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -2.626953125, |
| "rewards/margins": 0.7470703125, |
| "rewards/rejected": -3.373046875, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.27904, |
| "grad_norm": 9.749001446309522, |
| "learning_rate": 6.344393067843826e-07, |
| "logits/chosen": -0.6494140625, |
| "logits/rejected": -0.721923828125, |
| "logps/chosen": -622.25, |
| "logps/rejected": -703.5, |
| "loss": 0.5042, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.078125, |
| "rewards/margins": 0.8087158203125, |
| "rewards/rejected": -3.888671875, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.28032, |
| "grad_norm": 13.338732962845118, |
| "learning_rate": 6.335237583462083e-07, |
| "logits/chosen": -0.67822265625, |
| "logits/rejected": -0.67333984375, |
| "logps/chosen": -700.0, |
| "logps/rejected": -834.5, |
| "loss": 0.5743, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -3.390625, |
| "rewards/margins": 1.11883544921875, |
| "rewards/rejected": -4.517578125, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.2816, |
| "grad_norm": 23.59756440330084, |
| "learning_rate": 6.326025316613823e-07, |
| "logits/chosen": -0.515380859375, |
| "logits/rejected": -0.543701171875, |
| "logps/chosen": -666.0, |
| "logps/rejected": -739.5, |
| "loss": 0.5342, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -3.46484375, |
| "rewards/margins": 0.9072265625, |
| "rewards/rejected": -4.375, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.28288, |
| "grad_norm": 38.39849197226403, |
| "learning_rate": 6.316756451796894e-07, |
| "logits/chosen": -0.611572265625, |
| "logits/rejected": -0.69287109375, |
| "logps/chosen": -653.0, |
| "logps/rejected": -741.5, |
| "loss": 0.5483, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.568359375, |
| "rewards/margins": 0.7652587890625, |
| "rewards/rejected": -4.328125, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.28416, |
| "grad_norm": 15.339038520285099, |
| "learning_rate": 6.307431174642653e-07, |
| "logits/chosen": -0.654296875, |
| "logits/rejected": -0.69580078125, |
| "logps/chosen": -693.0, |
| "logps/rejected": -754.5, |
| "loss": 0.5019, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.5, |
| "rewards/margins": 0.99560546875, |
| "rewards/rejected": -4.494140625, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.28544, |
| "grad_norm": 8.374650607981456, |
| "learning_rate": 6.298049671912254e-07, |
| "logits/chosen": -0.57861328125, |
| "logits/rejected": -0.62353515625, |
| "logps/chosen": -627.0, |
| "logps/rejected": -704.5, |
| "loss": 0.4972, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.31640625, |
| "rewards/margins": 0.92919921875, |
| "rewards/rejected": -4.244140625, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.28672, |
| "grad_norm": 45.963116084804696, |
| "learning_rate": 6.2886121314929e-07, |
| "logits/chosen": -0.609619140625, |
| "logits/rejected": -0.6494140625, |
| "logps/chosen": -639.5, |
| "logps/rejected": -666.25, |
| "loss": 0.7107, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -3.390625, |
| "rewards/margins": 0.4444580078125, |
| "rewards/rejected": -3.8359375, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 7.958096573222809, |
| "learning_rate": 6.279118742394089e-07, |
| "logits/chosen": -0.588623046875, |
| "logits/rejected": -0.64306640625, |
| "logps/chosen": -624.5, |
| "logps/rejected": -695.5, |
| "loss": 0.4767, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -2.857421875, |
| "rewards/margins": 0.83056640625, |
| "rewards/rejected": -3.689453125, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.28928, |
| "grad_norm": 9.611100210392358, |
| "learning_rate": 6.269569694743816e-07, |
| "logits/chosen": -0.6396484375, |
| "logits/rejected": -0.68701171875, |
| "logps/chosen": -564.5, |
| "logps/rejected": -650.0, |
| "loss": 0.4786, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -2.638671875, |
| "rewards/margins": 0.86083984375, |
| "rewards/rejected": -3.49609375, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.29056, |
| "grad_norm": 9.097155318325184, |
| "learning_rate": 6.259965179784779e-07, |
| "logits/chosen": -0.71484375, |
| "logits/rejected": -0.76611328125, |
| "logps/chosen": -612.0, |
| "logps/rejected": -672.5, |
| "loss": 0.4834, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -2.845703125, |
| "rewards/margins": 0.808837890625, |
| "rewards/rejected": -3.658203125, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.29184, |
| "grad_norm": 11.493217517064283, |
| "learning_rate": 6.250305389870541e-07, |
| "logits/chosen": -0.7099609375, |
| "logits/rejected": -0.77001953125, |
| "logps/chosen": -602.5, |
| "logps/rejected": -667.5, |
| "loss": 0.5058, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -2.642578125, |
| "rewards/margins": 0.72119140625, |
| "rewards/rejected": -3.36328125, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.29312, |
| "grad_norm": 9.437777289578113, |
| "learning_rate": 6.240590518461678e-07, |
| "logits/chosen": -0.62939453125, |
| "logits/rejected": -0.7001953125, |
| "logps/chosen": -529.75, |
| "logps/rejected": -617.0, |
| "loss": 0.4882, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -2.3828125, |
| "rewards/margins": 0.7900390625, |
| "rewards/rejected": -3.171875, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.2944, |
| "grad_norm": 8.312134657078936, |
| "learning_rate": 6.230820760121904e-07, |
| "logits/chosen": -0.578369140625, |
| "logits/rejected": -0.627197265625, |
| "logps/chosen": -559.75, |
| "logps/rejected": -656.5, |
| "loss": 0.4768, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -2.5625, |
| "rewards/margins": 0.951416015625, |
| "rewards/rejected": -3.51171875, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.29568, |
| "grad_norm": 11.986917072887415, |
| "learning_rate": 6.220996310514181e-07, |
| "logits/chosen": -0.5537109375, |
| "logits/rejected": -0.60888671875, |
| "logps/chosen": -616.0, |
| "logps/rejected": -668.5, |
| "loss": 0.576, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -2.873046875, |
| "rewards/margins": 0.666015625, |
| "rewards/rejected": -3.537109375, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.29696, |
| "grad_norm": 12.172332354528226, |
| "learning_rate": 6.21111736639679e-07, |
| "logits/chosen": -0.610595703125, |
| "logits/rejected": -0.695068359375, |
| "logps/chosen": -535.0, |
| "logps/rejected": -651.0, |
| "loss": 0.4534, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.4560546875, |
| "rewards/margins": 0.952392578125, |
| "rewards/rejected": -3.408203125, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.29824, |
| "grad_norm": 14.200772198441506, |
| "learning_rate": 6.201184125619403e-07, |
| "logits/chosen": -0.520263671875, |
| "logits/rejected": -0.56201171875, |
| "logps/chosen": -595.0, |
| "logps/rejected": -654.0, |
| "loss": 0.4812, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -2.56640625, |
| "rewards/margins": 0.9599609375, |
| "rewards/rejected": -3.52734375, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.29952, |
| "grad_norm": 13.457335334304465, |
| "learning_rate": 6.191196787119104e-07, |
| "logits/chosen": -0.5048828125, |
| "logits/rejected": -0.588134765625, |
| "logps/chosen": -642.5, |
| "logps/rejected": -732.5, |
| "loss": 0.4973, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.150390625, |
| "rewards/margins": 0.856689453125, |
| "rewards/rejected": -4.005859375, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.3008, |
| "grad_norm": 10.321622368203395, |
| "learning_rate": 6.181155550916422e-07, |
| "logits/chosen": -0.4512939453125, |
| "logits/rejected": -0.501708984375, |
| "logps/chosen": -647.5, |
| "logps/rejected": -769.5, |
| "loss": 0.4746, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.408203125, |
| "rewards/margins": 0.9677734375, |
| "rewards/rejected": -4.37109375, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.30208, |
| "grad_norm": 8.858095075192827, |
| "learning_rate": 6.171060618111317e-07, |
| "logits/chosen": -0.44775390625, |
| "logits/rejected": -0.4814453125, |
| "logps/chosen": -666.0, |
| "logps/rejected": -779.0, |
| "loss": 0.4962, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.6328125, |
| "rewards/margins": 0.91748046875, |
| "rewards/rejected": -4.556640625, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.30336, |
| "grad_norm": 18.70168630039342, |
| "learning_rate": 6.160912190879145e-07, |
| "logits/chosen": -0.5400390625, |
| "logits/rejected": -0.544921875, |
| "logps/chosen": -651.0, |
| "logps/rejected": -721.5, |
| "loss": 0.5413, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.35546875, |
| "rewards/margins": 0.91790771484375, |
| "rewards/rejected": -4.275390625, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.30464, |
| "grad_norm": 9.749493306506885, |
| "learning_rate": 6.150710472466629e-07, |
| "logits/chosen": -0.45068359375, |
| "logits/rejected": -0.485595703125, |
| "logps/chosen": -638.0, |
| "logps/rejected": -719.5, |
| "loss": 0.4751, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.2265625, |
| "rewards/margins": 0.86181640625, |
| "rewards/rejected": -4.0859375, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.30592, |
| "grad_norm": 20.80698350471745, |
| "learning_rate": 6.140455667187765e-07, |
| "logits/chosen": -0.46551513671875, |
| "logits/rejected": -0.50634765625, |
| "logps/chosen": -761.0, |
| "logps/rejected": -825.0, |
| "loss": 0.5784, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.84375, |
| "rewards/margins": 0.728759765625, |
| "rewards/rejected": -4.578125, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.3072, |
| "grad_norm": 8.953639678478353, |
| "learning_rate": 6.13014798041975e-07, |
| "logits/chosen": -0.417236328125, |
| "logits/rejected": -0.4698486328125, |
| "logps/chosen": -658.0, |
| "logps/rejected": -751.0, |
| "loss": 0.5047, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.318359375, |
| "rewards/margins": 0.96484375, |
| "rewards/rejected": -4.283203125, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.30848, |
| "grad_norm": 10.845686942793368, |
| "learning_rate": 6.119787618598854e-07, |
| "logits/chosen": -0.4853668212890625, |
| "logits/rejected": -0.5223388671875, |
| "logps/chosen": -655.75, |
| "logps/rejected": -701.0, |
| "loss": 0.5384, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.103515625, |
| "rewards/margins": 0.777099609375, |
| "rewards/rejected": -3.880859375, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.30976, |
| "grad_norm": 9.846249269177688, |
| "learning_rate": 6.109374789216295e-07, |
| "logits/chosen": -0.4921875, |
| "logits/rejected": -0.587646484375, |
| "logps/chosen": -609.0, |
| "logps/rejected": -722.0, |
| "loss": 0.4894, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -2.994140625, |
| "rewards/margins": 0.99072265625, |
| "rewards/rejected": -3.9765625, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.31104, |
| "grad_norm": 8.39816655306362, |
| "learning_rate": 6.098909700814082e-07, |
| "logits/chosen": -0.564208984375, |
| "logits/rejected": -0.618408203125, |
| "logps/chosen": -498.25, |
| "logps/rejected": -594.25, |
| "loss": 0.523, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.3525390625, |
| "rewards/margins": 0.72216796875, |
| "rewards/rejected": -3.078125, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.31232, |
| "grad_norm": 15.315342312273753, |
| "learning_rate": 6.08839256298083e-07, |
| "logits/chosen": -0.510986328125, |
| "logits/rejected": -0.580322265625, |
| "logps/chosen": -574.75, |
| "logps/rejected": -668.0, |
| "loss": 0.4818, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -2.73046875, |
| "rewards/margins": 0.8583984375, |
| "rewards/rejected": -3.591796875, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.3136, |
| "grad_norm": 11.767718051696765, |
| "learning_rate": 6.077823586347579e-07, |
| "logits/chosen": -0.48681640625, |
| "logits/rejected": -0.5322265625, |
| "logps/chosen": -561.0, |
| "logps/rejected": -622.0, |
| "loss": 0.5154, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -2.603515625, |
| "rewards/margins": 0.7664794921875, |
| "rewards/rejected": -3.3671875, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.31488, |
| "grad_norm": 9.13068372680758, |
| "learning_rate": 6.067202982583559e-07, |
| "logits/chosen": -0.5447998046875, |
| "logits/rejected": -0.6015625, |
| "logps/chosen": -654.5, |
| "logps/rejected": -729.0, |
| "loss": 0.5201, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -2.890625, |
| "rewards/margins": 0.822998046875, |
| "rewards/rejected": -3.7109375, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.31616, |
| "grad_norm": 8.828236079236284, |
| "learning_rate": 6.056530964391961e-07, |
| "logits/chosen": -0.55419921875, |
| "logits/rejected": -0.6162109375, |
| "logps/chosen": -605.75, |
| "logps/rejected": -679.5, |
| "loss": 0.4972, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -2.8447265625, |
| "rewards/margins": 0.84423828125, |
| "rewards/rejected": -3.6875, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.31744, |
| "grad_norm": 9.350860751825014, |
| "learning_rate": 6.04580774550567e-07, |
| "logits/chosen": -0.501220703125, |
| "logits/rejected": -0.592529296875, |
| "logps/chosen": -632.5, |
| "logps/rejected": -708.0, |
| "loss": 0.4995, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -2.921875, |
| "rewards/margins": 0.94384765625, |
| "rewards/rejected": -3.8671875, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.31872, |
| "grad_norm": 9.838783187292746, |
| "learning_rate": 6.035033540682994e-07, |
| "logits/chosen": -0.5537109375, |
| "logits/rejected": -0.60009765625, |
| "logps/chosen": -606.0, |
| "logps/rejected": -660.5, |
| "loss": 0.5366, |
| "rewards/accuracies": 0.6796875, |
| "rewards/chosen": -2.8203125, |
| "rewards/margins": 0.7607421875, |
| "rewards/rejected": -3.580078125, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 10.098149943213269, |
| "learning_rate": 6.02420856570335e-07, |
| "logits/chosen": -0.512939453125, |
| "logits/rejected": -0.553955078125, |
| "logps/chosen": -584.0, |
| "logps/rejected": -677.5, |
| "loss": 0.4851, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -2.765625, |
| "rewards/margins": 0.787109375, |
| "rewards/rejected": -3.5546875, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.32128, |
| "grad_norm": 11.938073471759608, |
| "learning_rate": 6.013333037362958e-07, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.58447265625, |
| "logps/chosen": -637.0, |
| "logps/rejected": -707.0, |
| "loss": 0.5481, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -2.98046875, |
| "rewards/margins": 0.731689453125, |
| "rewards/rejected": -3.71484375, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.32256, |
| "grad_norm": 8.629915885629975, |
| "learning_rate": 6.002407173470485e-07, |
| "logits/chosen": -0.52685546875, |
| "logits/rejected": -0.5693359375, |
| "logps/chosen": -647.0, |
| "logps/rejected": -739.5, |
| "loss": 0.4979, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.0390625, |
| "rewards/margins": 0.815185546875, |
| "rewards/rejected": -3.85546875, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.32384, |
| "grad_norm": 15.164274031135337, |
| "learning_rate": 5.991431192842692e-07, |
| "logits/chosen": -0.47216796875, |
| "logits/rejected": -0.52001953125, |
| "logps/chosen": -640.5, |
| "logps/rejected": -726.0, |
| "loss": 0.455, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.05859375, |
| "rewards/margins": 0.89501953125, |
| "rewards/rejected": -3.951171875, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.32512, |
| "grad_norm": 13.467159822471947, |
| "learning_rate": 5.980405315300045e-07, |
| "logits/chosen": -0.395751953125, |
| "logits/rejected": -0.4453125, |
| "logps/chosen": -634.5, |
| "logps/rejected": -718.0, |
| "loss": 0.5203, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -3.119140625, |
| "rewards/margins": 0.7841796875, |
| "rewards/rejected": -3.90625, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.3264, |
| "grad_norm": 9.015056279995315, |
| "learning_rate": 5.969329761662318e-07, |
| "logits/chosen": -0.38385009765625, |
| "logits/rejected": -0.4581298828125, |
| "logps/chosen": -625.0, |
| "logps/rejected": -722.5, |
| "loss": 0.5035, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.091796875, |
| "rewards/margins": 0.9658203125, |
| "rewards/rejected": -4.05859375, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.32768, |
| "grad_norm": 9.520970646405912, |
| "learning_rate": 5.958204753744171e-07, |
| "logits/chosen": -0.4287109375, |
| "logits/rejected": -0.486572265625, |
| "logps/chosen": -615.5, |
| "logps/rejected": -700.0, |
| "loss": 0.5045, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.001953125, |
| "rewards/margins": 0.9034423828125, |
| "rewards/rejected": -3.90625, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.32896, |
| "grad_norm": 22.42105812552605, |
| "learning_rate": 5.9470305143507e-07, |
| "logits/chosen": -0.405029296875, |
| "logits/rejected": -0.456787109375, |
| "logps/chosen": -684.0, |
| "logps/rejected": -746.0, |
| "loss": 0.6086, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.5625, |
| "rewards/margins": 0.7578125, |
| "rewards/rejected": -4.3203125, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.33024, |
| "grad_norm": 20.00218984655251, |
| "learning_rate": 5.935807267272985e-07, |
| "logits/chosen": -0.4755859375, |
| "logits/rejected": -0.497802734375, |
| "logps/chosen": -668.0, |
| "logps/rejected": -725.5, |
| "loss": 0.5487, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.1875, |
| "rewards/margins": 0.792236328125, |
| "rewards/rejected": -3.978515625, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.33152, |
| "grad_norm": 10.403435140293421, |
| "learning_rate": 5.924535237283598e-07, |
| "logits/chosen": -0.47998046875, |
| "logits/rejected": -0.528076171875, |
| "logps/chosen": -617.5, |
| "logps/rejected": -688.0, |
| "loss": 0.4796, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.8671875, |
| "rewards/margins": 0.8173828125, |
| "rewards/rejected": -3.68359375, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.3328, |
| "grad_norm": 8.359013118624633, |
| "learning_rate": 5.913214650132112e-07, |
| "logits/chosen": -0.43115234375, |
| "logits/rejected": -0.514892578125, |
| "logps/chosen": -580.75, |
| "logps/rejected": -687.0, |
| "loss": 0.4962, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -2.859375, |
| "rewards/margins": 0.9130859375, |
| "rewards/rejected": -3.76953125, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.33408, |
| "grad_norm": 9.989053690781105, |
| "learning_rate": 5.901845732540568e-07, |
| "logits/chosen": -0.488037109375, |
| "logits/rejected": -0.51708984375, |
| "logps/chosen": -659.0, |
| "logps/rejected": -727.0, |
| "loss": 0.4965, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.03125, |
| "rewards/margins": 1.017578125, |
| "rewards/rejected": -4.044921875, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.33536, |
| "grad_norm": 9.229777464931137, |
| "learning_rate": 5.890428712198945e-07, |
| "logits/chosen": -0.4755859375, |
| "logits/rejected": -0.4970703125, |
| "logps/chosen": -673.5, |
| "logps/rejected": -766.0, |
| "loss": 0.4583, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.19921875, |
| "rewards/margins": 1.03955078125, |
| "rewards/rejected": -4.2421875, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.33664, |
| "grad_norm": 11.937311630757574, |
| "learning_rate": 5.878963817760597e-07, |
| "logits/chosen": -0.454833984375, |
| "logits/rejected": -0.519775390625, |
| "logps/chosen": -642.5, |
| "logps/rejected": -735.5, |
| "loss": 0.5139, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.205078125, |
| "rewards/margins": 0.8829345703125, |
| "rewards/rejected": -4.0859375, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.33792, |
| "grad_norm": 10.230639726258993, |
| "learning_rate": 5.867451278837666e-07, |
| "logits/chosen": -0.355499267578125, |
| "logits/rejected": -0.408599853515625, |
| "logps/chosen": -642.5, |
| "logps/rejected": -704.0, |
| "loss": 0.5486, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -3.228515625, |
| "rewards/margins": 0.77197265625, |
| "rewards/rejected": -3.998046875, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.3392, |
| "grad_norm": 13.416289127212499, |
| "learning_rate": 5.855891325996495e-07, |
| "logits/chosen": -0.41424560546875, |
| "logits/rejected": -0.42974853515625, |
| "logps/chosen": -658.5, |
| "logps/rejected": -730.0, |
| "loss": 0.5526, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.15625, |
| "rewards/margins": 0.790771484375, |
| "rewards/rejected": -3.943359375, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.34048, |
| "grad_norm": 12.108152563268916, |
| "learning_rate": 5.844284190753003e-07, |
| "logits/chosen": -0.4765625, |
| "logits/rejected": -0.50439453125, |
| "logps/chosen": -636.5, |
| "logps/rejected": -707.0, |
| "loss": 0.5176, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -2.982421875, |
| "rewards/margins": 0.932373046875, |
| "rewards/rejected": -3.916015625, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.34176, |
| "grad_norm": 9.58986611812255, |
| "learning_rate": 5.83263010556805e-07, |
| "logits/chosen": -0.508544921875, |
| "logits/rejected": -0.589599609375, |
| "logps/chosen": -634.0, |
| "logps/rejected": -738.5, |
| "loss": 0.5, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.123046875, |
| "rewards/margins": 0.942626953125, |
| "rewards/rejected": -4.068359375, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.34304, |
| "grad_norm": 10.292439732995158, |
| "learning_rate": 5.820929303842783e-07, |
| "logits/chosen": -0.5439453125, |
| "logits/rejected": -0.611572265625, |
| "logps/chosen": -568.25, |
| "logps/rejected": -657.5, |
| "loss": 0.4522, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -2.576171875, |
| "rewards/margins": 1.00390625, |
| "rewards/rejected": -3.58203125, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.34432, |
| "grad_norm": 8.040921361038157, |
| "learning_rate": 5.809182019913959e-07, |
| "logits/chosen": -0.57275390625, |
| "logits/rejected": -0.607666015625, |
| "logps/chosen": -569.75, |
| "logps/rejected": -623.0, |
| "loss": 0.5395, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -2.421875, |
| "rewards/margins": 0.765380859375, |
| "rewards/rejected": -3.1875, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.3456, |
| "grad_norm": 12.336782262534681, |
| "learning_rate": 5.797388489049254e-07, |
| "logits/chosen": -0.59423828125, |
| "logits/rejected": -0.62109375, |
| "logps/chosen": -616.0, |
| "logps/rejected": -659.5, |
| "loss": 0.5252, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.69140625, |
| "rewards/margins": 0.767822265625, |
| "rewards/rejected": -3.45703125, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.34688, |
| "grad_norm": 14.10037159945176, |
| "learning_rate": 5.785548947442547e-07, |
| "logits/chosen": -0.578125, |
| "logits/rejected": -0.60888671875, |
| "logps/chosen": -575.5, |
| "logps/rejected": -677.5, |
| "loss": 0.444, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.5703125, |
| "rewards/margins": 0.984375, |
| "rewards/rejected": -3.55859375, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.34816, |
| "grad_norm": 10.631299174291128, |
| "learning_rate": 5.773663632209201e-07, |
| "logits/chosen": -0.559814453125, |
| "logits/rejected": -0.641357421875, |
| "logps/chosen": -580.5, |
| "logps/rejected": -681.5, |
| "loss": 0.4758, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -2.64453125, |
| "rewards/margins": 0.9033203125, |
| "rewards/rejected": -3.546875, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.34944, |
| "grad_norm": 9.172576313947784, |
| "learning_rate": 5.7617327813813e-07, |
| "logits/chosen": -0.492431640625, |
| "logits/rejected": -0.562744140625, |
| "logps/chosen": -618.0, |
| "logps/rejected": -672.5, |
| "loss": 0.5197, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.921875, |
| "rewards/margins": 0.7423095703125, |
| "rewards/rejected": -3.666015625, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.35072, |
| "grad_norm": 10.958422585425877, |
| "learning_rate": 5.749756633902887e-07, |
| "logits/chosen": -0.531494140625, |
| "logits/rejected": -0.56201171875, |
| "logps/chosen": -585.75, |
| "logps/rejected": -664.5, |
| "loss": 0.4871, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -2.65625, |
| "rewards/margins": 0.888671875, |
| "rewards/rejected": -3.548828125, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 11.430707520592867, |
| "learning_rate": 5.737735429625186e-07, |
| "logits/chosen": -0.546142578125, |
| "logits/rejected": -0.597900390625, |
| "logps/chosen": -664.0, |
| "logps/rejected": -724.5, |
| "loss": 0.5227, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.134765625, |
| "rewards/margins": 0.956787109375, |
| "rewards/rejected": -4.08984375, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.35328, |
| "grad_norm": 16.483350734581908, |
| "learning_rate": 5.725669409301782e-07, |
| "logits/chosen": -0.44775390625, |
| "logits/rejected": -0.451416015625, |
| "logps/chosen": -642.0, |
| "logps/rejected": -748.5, |
| "loss": 0.5278, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -3.234375, |
| "rewards/margins": 0.9853515625, |
| "rewards/rejected": -4.224609375, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.35456, |
| "grad_norm": 9.95398312986462, |
| "learning_rate": 5.71355881458382e-07, |
| "logits/chosen": -0.474609375, |
| "logits/rejected": -0.506591796875, |
| "logps/chosen": -635.0, |
| "logps/rejected": -716.5, |
| "loss": 0.4857, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.126953125, |
| "rewards/margins": 0.912109375, |
| "rewards/rejected": -4.033203125, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.35584, |
| "grad_norm": 18.865132248923636, |
| "learning_rate": 5.701403888015149e-07, |
| "logits/chosen": -0.446044921875, |
| "logits/rejected": -0.47314453125, |
| "logps/chosen": -639.25, |
| "logps/rejected": -730.0, |
| "loss": 0.4663, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.19921875, |
| "rewards/margins": 0.96240234375, |
| "rewards/rejected": -4.16796875, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.35712, |
| "grad_norm": 9.213729341091112, |
| "learning_rate": 5.689204873027471e-07, |
| "logits/chosen": -0.38330078125, |
| "logits/rejected": -0.398681640625, |
| "logps/chosen": -657.0, |
| "logps/rejected": -725.0, |
| "loss": 0.502, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.232421875, |
| "rewards/margins": 0.822998046875, |
| "rewards/rejected": -4.0546875, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.3584, |
| "grad_norm": 19.70630286015966, |
| "learning_rate": 5.676962013935464e-07, |
| "logits/chosen": -0.4217529296875, |
| "logits/rejected": -0.4654541015625, |
| "logps/chosen": -679.5, |
| "logps/rejected": -734.5, |
| "loss": 0.5831, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.66015625, |
| "rewards/margins": 0.718505859375, |
| "rewards/rejected": -4.37890625, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.35968, |
| "grad_norm": 22.464253350390536, |
| "learning_rate": 5.664675555931892e-07, |
| "logits/chosen": -0.401611328125, |
| "logits/rejected": -0.404296875, |
| "logps/chosen": -713.0, |
| "logps/rejected": -825.0, |
| "loss": 0.572, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -4.025390625, |
| "rewards/margins": 0.773193359375, |
| "rewards/rejected": -4.796875, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.36096, |
| "grad_norm": 10.334427726270604, |
| "learning_rate": 5.652345745082692e-07, |
| "logits/chosen": -0.4014892578125, |
| "logits/rejected": -0.439208984375, |
| "logps/chosen": -702.0, |
| "logps/rejected": -764.0, |
| "loss": 0.4943, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.63671875, |
| "rewards/margins": 0.854736328125, |
| "rewards/rejected": -4.494140625, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.36224, |
| "grad_norm": 8.836351837837654, |
| "learning_rate": 5.639972828322043e-07, |
| "logits/chosen": -0.3765869140625, |
| "logits/rejected": -0.436767578125, |
| "logps/chosen": -676.5, |
| "logps/rejected": -752.0, |
| "loss": 0.4468, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -3.45703125, |
| "rewards/margins": 1.09326171875, |
| "rewards/rejected": -4.55078125, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.36352, |
| "grad_norm": 36.01906152382893, |
| "learning_rate": 5.627557053447426e-07, |
| "logits/chosen": -0.3876953125, |
| "logits/rejected": -0.413818359375, |
| "logps/chosen": -689.0, |
| "logps/rejected": -736.5, |
| "loss": 0.6353, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.5625, |
| "rewards/margins": 0.6876678466796875, |
| "rewards/rejected": -4.24609375, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.3648, |
| "grad_norm": 10.074359287488177, |
| "learning_rate": 5.615098669114664e-07, |
| "logits/chosen": -0.3740234375, |
| "logits/rejected": -0.421630859375, |
| "logps/chosen": -666.0, |
| "logps/rejected": -800.5, |
| "loss": 0.4379, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.51953125, |
| "rewards/margins": 1.00341796875, |
| "rewards/rejected": -4.52734375, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.36608, |
| "grad_norm": 8.877186370667038, |
| "learning_rate": 5.602597924832926e-07, |
| "logits/chosen": -0.508056640625, |
| "logits/rejected": -0.53857421875, |
| "logps/chosen": -638.0, |
| "logps/rejected": -740.5, |
| "loss": 0.4867, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.2109375, |
| "rewards/margins": 1.00146484375, |
| "rewards/rejected": -4.212890625, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.36736, |
| "grad_norm": 9.967314448363597, |
| "learning_rate": 5.590055070959751e-07, |
| "logits/chosen": -0.4375, |
| "logits/rejected": -0.4688720703125, |
| "logps/chosen": -672.0, |
| "logps/rejected": -734.0, |
| "loss": 0.4381, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.158203125, |
| "rewards/margins": 1.140625, |
| "rewards/rejected": -4.298828125, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.36864, |
| "grad_norm": 33.10004352946362, |
| "learning_rate": 5.577470358696021e-07, |
| "logits/chosen": -0.389404296875, |
| "logits/rejected": -0.446044921875, |
| "logps/chosen": -646.5, |
| "logps/rejected": -739.5, |
| "loss": 0.6066, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.439453125, |
| "rewards/margins": 0.7655029296875, |
| "rewards/rejected": -4.203125, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.36992, |
| "grad_norm": 10.915408242599547, |
| "learning_rate": 5.56484404008093e-07, |
| "logits/chosen": -0.506103515625, |
| "logits/rejected": -0.5328369140625, |
| "logps/chosen": -618.5, |
| "logps/rejected": -691.5, |
| "loss": 0.4948, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.96484375, |
| "rewards/margins": 1.02490234375, |
| "rewards/rejected": -3.990234375, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.3712, |
| "grad_norm": 10.911477941525613, |
| "learning_rate": 5.552176367986944e-07, |
| "logits/chosen": -0.579345703125, |
| "logits/rejected": -0.63330078125, |
| "logps/chosen": -630.5, |
| "logps/rejected": -765.5, |
| "loss": 0.501, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.904296875, |
| "rewards/margins": 0.831298828125, |
| "rewards/rejected": -3.734375, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.37248, |
| "grad_norm": 13.643638150351943, |
| "learning_rate": 5.539467596114729e-07, |
| "logits/chosen": -0.488525390625, |
| "logits/rejected": -0.54443359375, |
| "logps/chosen": -586.5, |
| "logps/rejected": -697.5, |
| "loss": 0.4345, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -2.744140625, |
| "rewards/margins": 1.025390625, |
| "rewards/rejected": -3.76953125, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.37376, |
| "grad_norm": 10.440042665143498, |
| "learning_rate": 5.526717978988076e-07, |
| "logits/chosen": -0.615234375, |
| "logits/rejected": -0.625, |
| "logps/chosen": -635.0, |
| "logps/rejected": -655.0, |
| "loss": 0.5673, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -2.779296875, |
| "rewards/margins": 0.68798828125, |
| "rewards/rejected": -3.470703125, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.37504, |
| "grad_norm": 13.718400637119657, |
| "learning_rate": 5.513927771948797e-07, |
| "logits/chosen": -0.5810546875, |
| "logits/rejected": -0.61767578125, |
| "logps/chosen": -562.5, |
| "logps/rejected": -622.25, |
| "loss": 0.5058, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -2.4091796875, |
| "rewards/margins": 0.7276611328125, |
| "rewards/rejected": -3.138671875, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.37632, |
| "grad_norm": 12.273740462313567, |
| "learning_rate": 5.501097231151619e-07, |
| "logits/chosen": -0.6591796875, |
| "logits/rejected": -0.68896484375, |
| "logps/chosen": -548.25, |
| "logps/rejected": -623.0, |
| "loss": 0.4711, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -2.212890625, |
| "rewards/margins": 0.90234375, |
| "rewards/rejected": -3.115234375, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.3776, |
| "grad_norm": 12.632600487373818, |
| "learning_rate": 5.488226613559045e-07, |
| "logits/chosen": -0.608154296875, |
| "logits/rejected": -0.649169921875, |
| "logps/chosen": -562.75, |
| "logps/rejected": -593.75, |
| "loss": 0.4987, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -2.2451171875, |
| "rewards/margins": 0.77099609375, |
| "rewards/rejected": -3.017578125, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.37888, |
| "grad_norm": 8.796089070402305, |
| "learning_rate": 5.475316176936217e-07, |
| "logits/chosen": -0.593505859375, |
| "logits/rejected": -0.647705078125, |
| "logps/chosen": -598.5, |
| "logps/rejected": -641.5, |
| "loss": 0.5126, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -2.521484375, |
| "rewards/margins": 0.88037109375, |
| "rewards/rejected": -3.40234375, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.38016, |
| "grad_norm": 8.538475094142157, |
| "learning_rate": 5.462366179845746e-07, |
| "logits/chosen": -0.6328125, |
| "logits/rejected": -0.70556640625, |
| "logps/chosen": -560.25, |
| "logps/rejected": -608.5, |
| "loss": 0.551, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -2.3828125, |
| "rewards/margins": 0.629150390625, |
| "rewards/rejected": -3.009765625, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.38144, |
| "grad_norm": 8.516105169564174, |
| "learning_rate": 5.449376881642538e-07, |
| "logits/chosen": -0.524658203125, |
| "logits/rejected": -0.607666015625, |
| "logps/chosen": -572.75, |
| "logps/rejected": -645.0, |
| "loss": 0.5534, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -2.763671875, |
| "rewards/margins": 0.6611328125, |
| "rewards/rejected": -3.423828125, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.38272, |
| "grad_norm": 8.520830137173526, |
| "learning_rate": 5.436348542468598e-07, |
| "logits/chosen": -0.6298828125, |
| "logits/rejected": -0.643310546875, |
| "logps/chosen": -637.0, |
| "logps/rejected": -700.5, |
| "loss": 0.5124, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -2.669921875, |
| "rewards/margins": 0.77044677734375, |
| "rewards/rejected": -3.4375, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 11.383934405773742, |
| "learning_rate": 5.423281423247821e-07, |
| "logits/chosen": -0.62939453125, |
| "logits/rejected": -0.669921875, |
| "logps/chosen": -626.5, |
| "logps/rejected": -694.0, |
| "loss": 0.4308, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -2.6640625, |
| "rewards/margins": 0.98193359375, |
| "rewards/rejected": -3.646484375, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.384, |
| "eval_logits/chosen": -0.5311279296875, |
| "eval_logits/rejected": -0.6102294921875, |
| "eval_logps/chosen": -589.25, |
| "eval_logps/rejected": -647.5, |
| "eval_loss": 0.5219140648841858, |
| "eval_rewards/accuracies": 0.7308593988418579, |
| "eval_rewards/chosen": -2.68359375, |
| "eval_rewards/margins": 0.7745361328125, |
| "eval_rewards/rejected": -3.458984375, |
| "eval_runtime": 27.4735, |
| "eval_samples_per_second": 18.199, |
| "eval_steps_per_second": 0.582, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.38528, |
| "grad_norm": 9.685684454302386, |
| "learning_rate": 5.410175785680765e-07, |
| "logits/chosen": -0.60009765625, |
| "logits/rejected": -0.627197265625, |
| "logps/chosen": -610.25, |
| "logps/rejected": -658.5, |
| "loss": 0.5635, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -2.712890625, |
| "rewards/margins": 0.729248046875, |
| "rewards/rejected": -3.44140625, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.38656, |
| "grad_norm": 15.00512977681773, |
| "learning_rate": 5.397031892239415e-07, |
| "logits/chosen": -0.61962890625, |
| "logits/rejected": -0.6162109375, |
| "logps/chosen": -609.5, |
| "logps/rejected": -658.0, |
| "loss": 0.4694, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.783203125, |
| "rewards/margins": 0.82666015625, |
| "rewards/rejected": -3.61328125, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.38784, |
| "grad_norm": 13.021657646201135, |
| "learning_rate": 5.383850006161913e-07, |
| "logits/chosen": -0.453125, |
| "logits/rejected": -0.511962890625, |
| "logps/chosen": -610.0, |
| "logps/rejected": -676.0, |
| "loss": 0.5271, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -2.849609375, |
| "rewards/margins": 0.75146484375, |
| "rewards/rejected": -3.60546875, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.38912, |
| "grad_norm": 13.16735841607135, |
| "learning_rate": 5.370630391447304e-07, |
| "logits/chosen": -0.53125, |
| "logits/rejected": -0.5673828125, |
| "logps/chosen": -589.25, |
| "logps/rejected": -693.0, |
| "loss": 0.4679, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -2.7890625, |
| "rewards/margins": 0.91796875, |
| "rewards/rejected": -3.712890625, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.3904, |
| "grad_norm": 8.650434954949796, |
| "learning_rate": 5.357373312850235e-07, |
| "logits/chosen": -0.49755859375, |
| "logits/rejected": -0.5546875, |
| "logps/chosen": -628.5, |
| "logps/rejected": -716.0, |
| "loss": 0.5205, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.193359375, |
| "rewards/margins": 0.80926513671875, |
| "rewards/rejected": -4.001953125, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.39168, |
| "grad_norm": 12.03283706604588, |
| "learning_rate": 5.344079035875661e-07, |
| "logits/chosen": -0.468505859375, |
| "logits/rejected": -0.535400390625, |
| "logps/chosen": -658.0, |
| "logps/rejected": -745.0, |
| "loss": 0.5055, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.0546875, |
| "rewards/margins": 0.9290771484375, |
| "rewards/rejected": -3.98828125, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.39296, |
| "grad_norm": 21.421330253934375, |
| "learning_rate": 5.330747826773522e-07, |
| "logits/chosen": -0.4796142578125, |
| "logits/rejected": -0.5126953125, |
| "logps/chosen": -613.5, |
| "logps/rejected": -671.0, |
| "loss": 0.5362, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.009765625, |
| "rewards/margins": 0.86328125, |
| "rewards/rejected": -3.873046875, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.39424, |
| "grad_norm": 11.41199587027057, |
| "learning_rate": 5.317379952533411e-07, |
| "logits/chosen": -0.56982421875, |
| "logits/rejected": -0.62548828125, |
| "logps/chosen": -628.0, |
| "logps/rejected": -727.5, |
| "loss": 0.4724, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -2.916015625, |
| "rewards/margins": 1.119140625, |
| "rewards/rejected": -4.033203125, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.39552, |
| "grad_norm": 13.469179182772214, |
| "learning_rate": 5.303975680879232e-07, |
| "logits/chosen": -0.52392578125, |
| "logits/rejected": -0.54833984375, |
| "logps/chosen": -615.5, |
| "logps/rejected": -669.0, |
| "loss": 0.5421, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -2.951171875, |
| "rewards/margins": 0.742431640625, |
| "rewards/rejected": -3.6953125, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.3968, |
| "grad_norm": 16.733814154590878, |
| "learning_rate": 5.290535280263835e-07, |
| "logits/chosen": -0.39404296875, |
| "logits/rejected": -0.3988037109375, |
| "logps/chosen": -630.0, |
| "logps/rejected": -687.5, |
| "loss": 0.4677, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -2.814453125, |
| "rewards/margins": 0.91748046875, |
| "rewards/rejected": -3.740234375, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.39808, |
| "grad_norm": 8.75301225998322, |
| "learning_rate": 5.277059019863637e-07, |
| "logits/chosen": -0.43408203125, |
| "logits/rejected": -0.5146484375, |
| "logps/chosen": -606.0, |
| "logps/rejected": -685.25, |
| "loss": 0.5019, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.65625, |
| "rewards/margins": 0.901123046875, |
| "rewards/rejected": -3.560546875, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.39936, |
| "grad_norm": 8.024297545260174, |
| "learning_rate": 5.263547169573235e-07, |
| "logits/chosen": -0.55029296875, |
| "logits/rejected": -0.591064453125, |
| "logps/chosen": -567.75, |
| "logps/rejected": -655.0, |
| "loss": 0.5004, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.51953125, |
| "rewards/margins": 0.76953125, |
| "rewards/rejected": -3.2890625, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.40064, |
| "grad_norm": 9.38829913769337, |
| "learning_rate": 5.25e-07, |
| "logits/chosen": -0.4615478515625, |
| "logits/rejected": -0.5108642578125, |
| "logps/chosen": -597.75, |
| "logps/rejected": -676.0, |
| "loss": 0.4893, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -2.59375, |
| "rewards/margins": 0.8583984375, |
| "rewards/rejected": -3.455078125, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.40192, |
| "grad_norm": 8.678985799279634, |
| "learning_rate": 5.236417782458656e-07, |
| "logits/chosen": -0.572021484375, |
| "logits/rejected": -0.590576171875, |
| "logps/chosen": -636.5, |
| "logps/rejected": -693.5, |
| "loss": 0.5174, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -2.84375, |
| "rewards/margins": 0.8515625, |
| "rewards/rejected": -3.6953125, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.4032, |
| "grad_norm": 12.718593158199928, |
| "learning_rate": 5.222800788965847e-07, |
| "logits/chosen": -0.526611328125, |
| "logits/rejected": -0.5595703125, |
| "logps/chosen": -596.0, |
| "logps/rejected": -672.5, |
| "loss": 0.4431, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -2.623046875, |
| "rewards/margins": 0.919677734375, |
| "rewards/rejected": -3.546875, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.40448, |
| "grad_norm": 7.7191674664296075, |
| "learning_rate": 5.209149292234689e-07, |
| "logits/chosen": -0.491455078125, |
| "logits/rejected": -0.586669921875, |
| "logps/chosen": -589.0, |
| "logps/rejected": -668.0, |
| "loss": 0.4413, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -2.6796875, |
| "rewards/margins": 0.98486328125, |
| "rewards/rejected": -3.662109375, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.40576, |
| "grad_norm": 8.070223960301346, |
| "learning_rate": 5.195463565669309e-07, |
| "logits/chosen": -0.482421875, |
| "logits/rejected": -0.54638671875, |
| "logps/chosen": -555.5, |
| "logps/rejected": -614.5, |
| "loss": 0.5058, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -2.587890625, |
| "rewards/margins": 0.83489990234375, |
| "rewards/rejected": -3.419921875, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.40704, |
| "grad_norm": 10.840771905557805, |
| "learning_rate": 5.18174388335937e-07, |
| "logits/chosen": -0.4683837890625, |
| "logits/rejected": -0.53369140625, |
| "logps/chosen": -602.5, |
| "logps/rejected": -684.5, |
| "loss": 0.4845, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.919921875, |
| "rewards/margins": 0.88427734375, |
| "rewards/rejected": -3.798828125, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.40832, |
| "grad_norm": 10.638532746515596, |
| "learning_rate": 5.167990520074577e-07, |
| "logits/chosen": -0.458251953125, |
| "logits/rejected": -0.510498046875, |
| "logps/chosen": -632.5, |
| "logps/rejected": -718.5, |
| "loss": 0.4626, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.06640625, |
| "rewards/margins": 1.0732421875, |
| "rewards/rejected": -4.142578125, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.4096, |
| "grad_norm": 19.063401240095594, |
| "learning_rate": 5.154203751259183e-07, |
| "logits/chosen": -0.400634765625, |
| "logits/rejected": -0.4285888671875, |
| "logps/chosen": -671.5, |
| "logps/rejected": -744.5, |
| "loss": 0.53, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.439453125, |
| "rewards/margins": 1.008056640625, |
| "rewards/rejected": -4.447265625, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.41088, |
| "grad_norm": 11.58200685748918, |
| "learning_rate": 5.140383853026462e-07, |
| "logits/chosen": -0.2918701171875, |
| "logits/rejected": -0.3492431640625, |
| "logps/chosen": -709.5, |
| "logps/rejected": -846.0, |
| "loss": 0.4238, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.49609375, |
| "rewards/margins": 1.197265625, |
| "rewards/rejected": -4.6953125, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.41216, |
| "grad_norm": 10.060507343077607, |
| "learning_rate": 5.12653110215319e-07, |
| "logits/chosen": -0.3533172607421875, |
| "logits/rejected": -0.39697265625, |
| "logps/chosen": -668.0, |
| "logps/rejected": -751.0, |
| "loss": 0.4708, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.43359375, |
| "rewards/margins": 0.975830078125, |
| "rewards/rejected": -4.40625, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.41344, |
| "grad_norm": 20.29118301920304, |
| "learning_rate": 5.112645776074089e-07, |
| "logits/chosen": -0.33740234375, |
| "logits/rejected": -0.3800048828125, |
| "logps/chosen": -679.5, |
| "logps/rejected": -731.0, |
| "loss": 0.529, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.548828125, |
| "rewards/margins": 0.9560546875, |
| "rewards/rejected": -4.50390625, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.41472, |
| "grad_norm": 10.344847321745984, |
| "learning_rate": 5.098728152876287e-07, |
| "logits/chosen": -0.3602294921875, |
| "logits/rejected": -0.4044189453125, |
| "logps/chosen": -676.5, |
| "logps/rejected": -783.5, |
| "loss": 0.4689, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.681640625, |
| "rewards/margins": 1.164306640625, |
| "rewards/rejected": -4.84765625, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 53.68636513984977, |
| "learning_rate": 5.084778511293732e-07, |
| "logits/chosen": -0.37158203125, |
| "logits/rejected": -0.422607421875, |
| "logps/chosen": -695.0, |
| "logps/rejected": -795.5, |
| "loss": 0.5701, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.08203125, |
| "rewards/margins": 0.8125, |
| "rewards/rejected": -4.8984375, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.41728, |
| "grad_norm": 8.17017463161669, |
| "learning_rate": 5.070797130701617e-07, |
| "logits/chosen": -0.4307861328125, |
| "logits/rejected": -0.479736328125, |
| "logps/chosen": -671.0, |
| "logps/rejected": -785.5, |
| "loss": 0.45, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.505859375, |
| "rewards/margins": 1.221435546875, |
| "rewards/rejected": -4.73046875, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.41856, |
| "grad_norm": 21.790110998965517, |
| "learning_rate": 5.056784291110795e-07, |
| "logits/chosen": -0.364990234375, |
| "logits/rejected": -0.401123046875, |
| "logps/chosen": -700.5, |
| "logps/rejected": -780.0, |
| "loss": 0.5295, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.9375, |
| "rewards/margins": 0.91845703125, |
| "rewards/rejected": -4.85546875, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.41984, |
| "grad_norm": 12.801819009552196, |
| "learning_rate": 5.04274027316215e-07, |
| "logits/chosen": -0.3697509765625, |
| "logits/rejected": -0.385498046875, |
| "logps/chosen": -688.5, |
| "logps/rejected": -746.5, |
| "loss": 0.573, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.572265625, |
| "rewards/margins": 0.7158203125, |
| "rewards/rejected": -4.29296875, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.42112, |
| "grad_norm": 9.406211305601078, |
| "learning_rate": 5.028665358120994e-07, |
| "logits/chosen": -0.3780517578125, |
| "logits/rejected": -0.391845703125, |
| "logps/chosen": -657.0, |
| "logps/rejected": -727.0, |
| "loss": 0.4427, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.337890625, |
| "rewards/margins": 0.92919921875, |
| "rewards/rejected": -4.259765625, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.4224, |
| "grad_norm": 10.033069082942374, |
| "learning_rate": 5.014559827871426e-07, |
| "logits/chosen": -0.425048828125, |
| "logits/rejected": -0.448486328125, |
| "logps/chosen": -656.0, |
| "logps/rejected": -715.0, |
| "loss": 0.5266, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.21484375, |
| "rewards/margins": 0.91552734375, |
| "rewards/rejected": -4.12890625, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.42368, |
| "grad_norm": 9.652663655572574, |
| "learning_rate": 5.00042396491069e-07, |
| "logits/chosen": -0.506103515625, |
| "logits/rejected": -0.539794921875, |
| "logps/chosen": -658.0, |
| "logps/rejected": -751.0, |
| "loss": 0.4558, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.150390625, |
| "rewards/margins": 1.1103515625, |
| "rewards/rejected": -4.26171875, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.42496, |
| "grad_norm": 12.260458134069255, |
| "learning_rate": 4.986258052343511e-07, |
| "logits/chosen": -0.4840087890625, |
| "logits/rejected": -0.509521484375, |
| "logps/chosen": -655.5, |
| "logps/rejected": -725.0, |
| "loss": 0.5506, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.10546875, |
| "rewards/margins": 0.929931640625, |
| "rewards/rejected": -4.033203125, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.42624, |
| "grad_norm": 8.767742475858029, |
| "learning_rate": 4.972062373876435e-07, |
| "logits/chosen": -0.4326171875, |
| "logits/rejected": -0.478515625, |
| "logps/chosen": -643.5, |
| "logps/rejected": -712.5, |
| "loss": 0.5134, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.208984375, |
| "rewards/margins": 0.804443359375, |
| "rewards/rejected": -4.01171875, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.42752, |
| "grad_norm": 8.183910291192875, |
| "learning_rate": 4.95783721381214e-07, |
| "logits/chosen": -0.564453125, |
| "logits/rejected": -0.620361328125, |
| "logps/chosen": -615.5, |
| "logps/rejected": -733.5, |
| "loss": 0.4354, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -2.94921875, |
| "rewards/margins": 1.19970703125, |
| "rewards/rejected": -4.150390625, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.4288, |
| "grad_norm": 12.662549180581198, |
| "learning_rate": 4.943582857043742e-07, |
| "logits/chosen": -0.531005859375, |
| "logits/rejected": -0.53173828125, |
| "logps/chosen": -628.0, |
| "logps/rejected": -703.0, |
| "loss": 0.4677, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -2.96484375, |
| "rewards/margins": 0.94482421875, |
| "rewards/rejected": -3.9140625, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.43008, |
| "grad_norm": 11.452856436676567, |
| "learning_rate": 4.929299589049095e-07, |
| "logits/chosen": -0.5340576171875, |
| "logits/rejected": -0.6025390625, |
| "logps/chosen": -600.0, |
| "logps/rejected": -719.0, |
| "loss": 0.4012, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -2.669921875, |
| "rewards/margins": 1.154296875, |
| "rewards/rejected": -3.826171875, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.43136, |
| "grad_norm": 8.22048523281164, |
| "learning_rate": 4.914987695885067e-07, |
| "logits/chosen": -0.60498046875, |
| "logits/rejected": -0.62744140625, |
| "logps/chosen": -667.0, |
| "logps/rejected": -755.0, |
| "loss": 0.483, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -2.97265625, |
| "rewards/margins": 1.0361328125, |
| "rewards/rejected": -4.005859375, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.43264, |
| "grad_norm": 12.225224739430297, |
| "learning_rate": 4.900647464181817e-07, |
| "logits/chosen": -0.485107421875, |
| "logits/rejected": -0.52783203125, |
| "logps/chosen": -680.5, |
| "logps/rejected": -767.5, |
| "loss": 0.5177, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.234375, |
| "rewards/margins": 1.04296875, |
| "rewards/rejected": -4.27734375, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.43392, |
| "grad_norm": 8.757850519576271, |
| "learning_rate": 4.886279181137049e-07, |
| "logits/chosen": -0.552490234375, |
| "logits/rejected": -0.578369140625, |
| "logps/chosen": -653.0, |
| "logps/rejected": -717.5, |
| "loss": 0.5448, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.142578125, |
| "rewards/margins": 0.8095703125, |
| "rewards/rejected": -3.955078125, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.4352, |
| "grad_norm": 9.695868477167725, |
| "learning_rate": 4.871883134510262e-07, |
| "logits/chosen": -0.494384765625, |
| "logits/rejected": -0.51611328125, |
| "logps/chosen": -625.0, |
| "logps/rejected": -719.5, |
| "loss": 0.4424, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.021484375, |
| "rewards/margins": 1.091796875, |
| "rewards/rejected": -4.111328125, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.43648, |
| "grad_norm": 8.243809920140835, |
| "learning_rate": 4.857459612616992e-07, |
| "logits/chosen": -0.506591796875, |
| "logits/rejected": -0.54443359375, |
| "logps/chosen": -581.0, |
| "logps/rejected": -647.0, |
| "loss": 0.5348, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -2.822265625, |
| "rewards/margins": 0.7535400390625, |
| "rewards/rejected": -3.580078125, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.43776, |
| "grad_norm": 18.55929104936941, |
| "learning_rate": 4.843008904323029e-07, |
| "logits/chosen": -0.501708984375, |
| "logits/rejected": -0.51806640625, |
| "logps/chosen": -652.5, |
| "logps/rejected": -742.0, |
| "loss": 0.4282, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.1953125, |
| "rewards/margins": 1.115234375, |
| "rewards/rejected": -4.31640625, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.43904, |
| "grad_norm": 10.539994924615243, |
| "learning_rate": 4.828531299038638e-07, |
| "logits/chosen": -0.484375, |
| "logits/rejected": -0.52587890625, |
| "logps/chosen": -627.0, |
| "logps/rejected": -710.5, |
| "loss": 0.5168, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.1171875, |
| "rewards/margins": 0.879150390625, |
| "rewards/rejected": -3.99609375, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.44032, |
| "grad_norm": 14.239765085999206, |
| "learning_rate": 4.81402708671276e-07, |
| "logits/chosen": -0.473876953125, |
| "logits/rejected": -0.54345703125, |
| "logps/chosen": -616.0, |
| "logps/rejected": -745.5, |
| "loss": 0.3896, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -3.083984375, |
| "rewards/margins": 1.267822265625, |
| "rewards/rejected": -4.3515625, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.4416, |
| "grad_norm": 11.762433092658819, |
| "learning_rate": 4.799496557827208e-07, |
| "logits/chosen": -0.507568359375, |
| "logits/rejected": -0.552001953125, |
| "logps/chosen": -673.0, |
| "logps/rejected": -818.5, |
| "loss": 0.4055, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.478515625, |
| "rewards/margins": 1.3271484375, |
| "rewards/rejected": -4.80859375, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.44288, |
| "grad_norm": 13.3714208302887, |
| "learning_rate": 4.784940003390846e-07, |
| "logits/chosen": -0.47705078125, |
| "logits/rejected": -0.4874267578125, |
| "logps/chosen": -651.0, |
| "logps/rejected": -722.5, |
| "loss": 0.484, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.33203125, |
| "rewards/margins": 0.9072265625, |
| "rewards/rejected": -4.244140625, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.44416, |
| "grad_norm": 8.419739675084086, |
| "learning_rate": 4.770357714933765e-07, |
| "logits/chosen": -0.3489990234375, |
| "logits/rejected": -0.399169921875, |
| "logps/chosen": -715.5, |
| "logps/rejected": -804.0, |
| "loss": 0.4466, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.701171875, |
| "rewards/margins": 1.22509765625, |
| "rewards/rejected": -4.9296875, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.44544, |
| "grad_norm": 28.282829493788125, |
| "learning_rate": 4.7557499845014363e-07, |
| "logits/chosen": -0.34809112548828125, |
| "logits/rejected": -0.3986968994140625, |
| "logps/chosen": -692.0, |
| "logps/rejected": -785.5, |
| "loss": 0.5644, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.8125, |
| "rewards/margins": 1.04931640625, |
| "rewards/rejected": -4.859375, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.44672, |
| "grad_norm": 11.217438269564665, |
| "learning_rate": 4.741117104648874e-07, |
| "logits/chosen": -0.368408203125, |
| "logits/rejected": -0.39208984375, |
| "logps/chosen": -736.5, |
| "logps/rejected": -841.0, |
| "loss": 0.4152, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.939453125, |
| "rewards/margins": 1.28564453125, |
| "rewards/rejected": -5.2265625, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 9.988120972029757, |
| "learning_rate": 4.726459368434768e-07, |
| "logits/chosen": -0.345458984375, |
| "logits/rejected": -0.3746337890625, |
| "logps/chosen": -782.5, |
| "logps/rejected": -898.5, |
| "loss": 0.4168, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -4.28125, |
| "rewards/margins": 1.29931640625, |
| "rewards/rejected": -5.578125, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.44928, |
| "grad_norm": 27.1774086371912, |
| "learning_rate": 4.7117770694156146e-07, |
| "logits/chosen": -0.35791015625, |
| "logits/rejected": -0.396728515625, |
| "logps/chosen": -795.0, |
| "logps/rejected": -910.0, |
| "loss": 0.5071, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.61328125, |
| "rewards/margins": 1.2353515625, |
| "rewards/rejected": -5.84765625, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.45056, |
| "grad_norm": 18.51059077469931, |
| "learning_rate": 4.697070501639841e-07, |
| "logits/chosen": -0.3016357421875, |
| "logits/rejected": -0.3563232421875, |
| "logps/chosen": -777.0, |
| "logps/rejected": -924.0, |
| "loss": 0.4595, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -4.75390625, |
| "rewards/margins": 1.1455078125, |
| "rewards/rejected": -5.90234375, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.45184, |
| "grad_norm": 33.97653228823091, |
| "learning_rate": 4.682339959641915e-07, |
| "logits/chosen": -0.3653564453125, |
| "logits/rejected": -0.433349609375, |
| "logps/chosen": -755.0, |
| "logps/rejected": -877.0, |
| "loss": 0.505, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -4.537109375, |
| "rewards/margins": 1.25146484375, |
| "rewards/rejected": -5.7890625, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.45312, |
| "grad_norm": 38.200517048662974, |
| "learning_rate": 4.6675857384364475e-07, |
| "logits/chosen": -0.3599853515625, |
| "logits/rejected": -0.429931640625, |
| "logps/chosen": -783.0, |
| "logps/rejected": -925.0, |
| "loss": 0.5449, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.583984375, |
| "rewards/margins": 1.157470703125, |
| "rewards/rejected": -5.7421875, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.4544, |
| "grad_norm": 24.734123344344813, |
| "learning_rate": 4.6528081335122786e-07, |
| "logits/chosen": -0.34062957763671875, |
| "logits/rejected": -0.3896484375, |
| "logps/chosen": -734.5, |
| "logps/rejected": -849.0, |
| "loss": 0.522, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.123046875, |
| "rewards/margins": 1.119140625, |
| "rewards/rejected": -5.2421875, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.45568, |
| "grad_norm": 8.65649758872943, |
| "learning_rate": 4.6380074408265677e-07, |
| "logits/chosen": -0.3984375, |
| "logits/rejected": -0.3931884765625, |
| "logps/chosen": -743.0, |
| "logps/rejected": -840.0, |
| "loss": 0.4604, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.982421875, |
| "rewards/margins": 1.145751953125, |
| "rewards/rejected": -5.12890625, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.45696, |
| "grad_norm": 13.642027050493772, |
| "learning_rate": 4.62318395679886e-07, |
| "logits/chosen": -0.3692626953125, |
| "logits/rejected": -0.432861328125, |
| "logps/chosen": -708.25, |
| "logps/rejected": -823.5, |
| "loss": 0.4635, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.76171875, |
| "rewards/margins": 1.205078125, |
| "rewards/rejected": -4.9609375, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.45824, |
| "grad_norm": 8.597706559419494, |
| "learning_rate": 4.608337978305154e-07, |
| "logits/chosen": -0.404541015625, |
| "logits/rejected": -0.471435546875, |
| "logps/chosen": -671.5, |
| "logps/rejected": -778.5, |
| "loss": 0.465, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.478515625, |
| "rewards/margins": 1.0322265625, |
| "rewards/rejected": -4.515625, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.45952, |
| "grad_norm": 16.144497938016837, |
| "learning_rate": 4.593469802671951e-07, |
| "logits/chosen": -0.454833984375, |
| "logits/rejected": -0.503173828125, |
| "logps/chosen": -623.0, |
| "logps/rejected": -733.5, |
| "loss": 0.427, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.236328125, |
| "rewards/margins": 1.34521484375, |
| "rewards/rejected": -4.58203125, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.4608, |
| "grad_norm": 15.765499984405258, |
| "learning_rate": 4.5785797276703074e-07, |
| "logits/chosen": -0.46826171875, |
| "logits/rejected": -0.511474609375, |
| "logps/chosen": -686.5, |
| "logps/rejected": -804.0, |
| "loss": 0.5152, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.60546875, |
| "rewards/margins": 1.046875, |
| "rewards/rejected": -4.65625, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.46208, |
| "grad_norm": 8.245124791845544, |
| "learning_rate": 4.563668051509864e-07, |
| "logits/chosen": -0.49560546875, |
| "logits/rejected": -0.5390625, |
| "logps/chosen": -630.5, |
| "logps/rejected": -747.5, |
| "loss": 0.4031, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.111328125, |
| "rewards/margins": 1.2119140625, |
| "rewards/rejected": -4.326171875, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.46336, |
| "grad_norm": 8.559688059984063, |
| "learning_rate": 4.5487350728328796e-07, |
| "logits/chosen": -0.4332275390625, |
| "logits/rejected": -0.4951171875, |
| "logps/chosen": -630.0, |
| "logps/rejected": -726.0, |
| "loss": 0.4651, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.236328125, |
| "rewards/margins": 1.109375, |
| "rewards/rejected": -4.34765625, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.46464, |
| "grad_norm": 10.160592154999383, |
| "learning_rate": 4.533781090708244e-07, |
| "logits/chosen": -0.454345703125, |
| "logits/rejected": -0.46337890625, |
| "logps/chosen": -671.5, |
| "logps/rejected": -767.0, |
| "loss": 0.495, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.326171875, |
| "rewards/margins": 0.933349609375, |
| "rewards/rejected": -4.259765625, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.46592, |
| "grad_norm": 8.364222921089874, |
| "learning_rate": 4.518806404625495e-07, |
| "logits/chosen": -0.494140625, |
| "logits/rejected": -0.537109375, |
| "logps/chosen": -641.0, |
| "logps/rejected": -734.5, |
| "loss": 0.4365, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.1640625, |
| "rewards/margins": 1.05615234375, |
| "rewards/rejected": -4.220703125, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.4672, |
| "grad_norm": 9.713400168567283, |
| "learning_rate": 4.503811314488816e-07, |
| "logits/chosen": -0.428955078125, |
| "logits/rejected": -0.471923828125, |
| "logps/chosen": -652.5, |
| "logps/rejected": -697.0, |
| "loss": 0.5314, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -3.28125, |
| "rewards/margins": 0.78515625, |
| "rewards/rejected": -4.06640625, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.46848, |
| "grad_norm": 8.9296942425344, |
| "learning_rate": 4.488796120611029e-07, |
| "logits/chosen": -0.44140625, |
| "logits/rejected": -0.49609375, |
| "logps/chosen": -610.25, |
| "logps/rejected": -724.5, |
| "loss": 0.5128, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.134765625, |
| "rewards/margins": 1.03662109375, |
| "rewards/rejected": -4.169921875, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.46976, |
| "grad_norm": 8.165959460812232, |
| "learning_rate": 4.4737611237075845e-07, |
| "logits/chosen": -0.47265625, |
| "logits/rejected": -0.504150390625, |
| "logps/chosen": -656.0, |
| "logps/rejected": -809.5, |
| "loss": 0.4489, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.306640625, |
| "rewards/margins": 1.1591796875, |
| "rewards/rejected": -4.46484375, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.47104, |
| "grad_norm": 15.893475743625158, |
| "learning_rate": 4.4587066248905335e-07, |
| "logits/chosen": -0.407470703125, |
| "logits/rejected": -0.41796875, |
| "logps/chosen": -636.0, |
| "logps/rejected": -754.5, |
| "loss": 0.4304, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.076171875, |
| "rewards/margins": 1.13623046875, |
| "rewards/rejected": -4.212890625, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.47232, |
| "grad_norm": 12.204585822720064, |
| "learning_rate": 4.443632925662504e-07, |
| "logits/chosen": -0.416015625, |
| "logits/rejected": -0.44873046875, |
| "logps/chosen": -596.5, |
| "logps/rejected": -670.0, |
| "loss": 0.491, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -2.927734375, |
| "rewards/margins": 0.92626953125, |
| "rewards/rejected": -3.8515625, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.4736, |
| "grad_norm": 11.003968270027663, |
| "learning_rate": 4.4285403279106523e-07, |
| "logits/chosen": -0.3580322265625, |
| "logits/rejected": -0.4219970703125, |
| "logps/chosen": -611.5, |
| "logps/rejected": -725.5, |
| "loss": 0.4313, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.126953125, |
| "rewards/margins": 1.04248046875, |
| "rewards/rejected": -4.162109375, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.47488, |
| "grad_norm": 199.6202974665465, |
| "learning_rate": 4.4134291339006305e-07, |
| "logits/chosen": -0.3883056640625, |
| "logits/rejected": -0.38299560546875, |
| "logps/chosen": -646.5, |
| "logps/rejected": -762.0, |
| "loss": 0.5204, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.2890625, |
| "rewards/margins": 1.00927734375, |
| "rewards/rejected": -4.296875, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.47616, |
| "grad_norm": 11.144489957125527, |
| "learning_rate": 4.3982996462705184e-07, |
| "logits/chosen": -0.398193359375, |
| "logits/rejected": -0.4439697265625, |
| "logps/chosen": -675.0, |
| "logps/rejected": -765.5, |
| "loss": 0.4755, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.494140625, |
| "rewards/margins": 1.0732421875, |
| "rewards/rejected": -4.5703125, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.47744, |
| "grad_norm": 12.09850803118018, |
| "learning_rate": 4.383152168024776e-07, |
| "logits/chosen": -0.3857421875, |
| "logits/rejected": -0.430419921875, |
| "logps/chosen": -658.0, |
| "logps/rejected": -753.0, |
| "loss": 0.5137, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.525390625, |
| "rewards/margins": 0.9542236328125, |
| "rewards/rejected": -4.482421875, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.47872, |
| "grad_norm": 10.440049833545846, |
| "learning_rate": 4.3679870025281645e-07, |
| "logits/chosen": -0.3140869140625, |
| "logits/rejected": -0.341064453125, |
| "logps/chosen": -693.0, |
| "logps/rejected": -782.0, |
| "loss": 0.5013, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.654296875, |
| "rewards/margins": 0.970458984375, |
| "rewards/rejected": -4.626953125, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 18.264633064403437, |
| "learning_rate": 4.3528044534996764e-07, |
| "logits/chosen": -0.296142578125, |
| "logits/rejected": -0.359619140625, |
| "logps/chosen": -638.0, |
| "logps/rejected": -704.5, |
| "loss": 0.5782, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.236328125, |
| "rewards/margins": 0.8702392578125, |
| "rewards/rejected": -4.109375, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.48128, |
| "grad_norm": 8.417915455120893, |
| "learning_rate": 4.337604825006452e-07, |
| "logits/chosen": -0.3731689453125, |
| "logits/rejected": -0.4154052734375, |
| "logps/chosen": -638.0, |
| "logps/rejected": -742.5, |
| "loss": 0.4726, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.23828125, |
| "rewards/margins": 1.09521484375, |
| "rewards/rejected": -4.33203125, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.48256, |
| "grad_norm": 8.947246273156576, |
| "learning_rate": 4.3223884214576875e-07, |
| "logits/chosen": -0.369049072265625, |
| "logits/rejected": -0.3988037109375, |
| "logps/chosen": -686.5, |
| "logps/rejected": -759.5, |
| "loss": 0.5215, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.53515625, |
| "rewards/margins": 0.819091796875, |
| "rewards/rejected": -4.35546875, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.48384, |
| "grad_norm": 9.885820986822363, |
| "learning_rate": 4.3071555475985404e-07, |
| "logits/chosen": -0.347412109375, |
| "logits/rejected": -0.433349609375, |
| "logps/chosen": -605.5, |
| "logps/rejected": -716.5, |
| "loss": 0.4847, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.021484375, |
| "rewards/margins": 1.01611328125, |
| "rewards/rejected": -4.0390625, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.48512, |
| "grad_norm": 8.772239493272059, |
| "learning_rate": 4.2919065085040284e-07, |
| "logits/chosen": -0.371337890625, |
| "logits/rejected": -0.4169921875, |
| "logps/chosen": -646.0, |
| "logps/rejected": -746.0, |
| "loss": 0.4578, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.228515625, |
| "rewards/margins": 1.074462890625, |
| "rewards/rejected": -4.298828125, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.4864, |
| "grad_norm": 10.720670863122297, |
| "learning_rate": 4.2766416095729113e-07, |
| "logits/chosen": -0.37646484375, |
| "logits/rejected": -0.43701171875, |
| "logps/chosen": -662.5, |
| "logps/rejected": -751.5, |
| "loss": 0.4435, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.1328125, |
| "rewards/margins": 1.14599609375, |
| "rewards/rejected": -4.275390625, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.48768, |
| "grad_norm": 11.03467333595442, |
| "learning_rate": 4.261361156521586e-07, |
| "logits/chosen": -0.458740234375, |
| "logits/rejected": -0.530517578125, |
| "logps/chosen": -665.5, |
| "logps/rejected": -731.0, |
| "loss": 0.4372, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.1015625, |
| "rewards/margins": 1.2353515625, |
| "rewards/rejected": -4.33984375, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.48896, |
| "grad_norm": 9.450090321989622, |
| "learning_rate": 4.2460654553779557e-07, |
| "logits/chosen": -0.43212890625, |
| "logits/rejected": -0.47119140625, |
| "logps/chosen": -619.75, |
| "logps/rejected": -714.0, |
| "loss": 0.5313, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.197265625, |
| "rewards/margins": 0.80224609375, |
| "rewards/rejected": -3.99609375, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.49024, |
| "grad_norm": 14.256426075381693, |
| "learning_rate": 4.230754812475305e-07, |
| "logits/chosen": -0.40234375, |
| "logits/rejected": -0.401611328125, |
| "logps/chosen": -587.25, |
| "logps/rejected": -672.0, |
| "loss": 0.4617, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -2.955078125, |
| "rewards/margins": 0.952392578125, |
| "rewards/rejected": -3.90234375, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.49152, |
| "grad_norm": 16.353106293708784, |
| "learning_rate": 4.2154295344461614e-07, |
| "logits/chosen": -0.456787109375, |
| "logits/rejected": -0.4775390625, |
| "logps/chosen": -661.0, |
| "logps/rejected": -743.5, |
| "loss": 0.4423, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.201171875, |
| "rewards/margins": 1.04833984375, |
| "rewards/rejected": -4.251953125, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.4928, |
| "grad_norm": 11.021886637647597, |
| "learning_rate": 4.2000899282161556e-07, |
| "logits/chosen": -0.51171875, |
| "logits/rejected": -0.52880859375, |
| "logps/chosen": -652.5, |
| "logps/rejected": -727.5, |
| "loss": 0.4629, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.22265625, |
| "rewards/margins": 1.03076171875, |
| "rewards/rejected": -4.251953125, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.49408, |
| "grad_norm": 12.461963298467701, |
| "learning_rate": 4.1847363009978773e-07, |
| "logits/chosen": -0.3712158203125, |
| "logits/rejected": -0.412353515625, |
| "logps/chosen": -608.0, |
| "logps/rejected": -697.5, |
| "loss": 0.4655, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.07421875, |
| "rewards/margins": 1.073974609375, |
| "rewards/rejected": -4.1484375, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.49536, |
| "grad_norm": 11.664191303420534, |
| "learning_rate": 4.169368960284718e-07, |
| "logits/chosen": -0.2545166015625, |
| "logits/rejected": -0.374755859375, |
| "logps/chosen": -602.5, |
| "logps/rejected": -677.0, |
| "loss": 0.5172, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.953125, |
| "rewards/margins": 0.90625, |
| "rewards/rejected": -3.857421875, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.49664, |
| "grad_norm": 9.22000344583139, |
| "learning_rate": 4.1539882138447173e-07, |
| "logits/chosen": -0.439453125, |
| "logits/rejected": -0.486328125, |
| "logps/chosen": -675.0, |
| "logps/rejected": -789.5, |
| "loss": 0.437, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.564453125, |
| "rewards/margins": 1.2265625, |
| "rewards/rejected": -4.796875, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.49792, |
| "grad_norm": 10.388005400752823, |
| "learning_rate": 4.138594369714394e-07, |
| "logits/chosen": -0.369140625, |
| "logits/rejected": -0.390625, |
| "logps/chosen": -634.0, |
| "logps/rejected": -700.5, |
| "loss": 0.515, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.412109375, |
| "rewards/margins": 0.984130859375, |
| "rewards/rejected": -4.400390625, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.4992, |
| "grad_norm": 19.773927339862066, |
| "learning_rate": 4.1231877361925835e-07, |
| "logits/chosen": -0.27435302734375, |
| "logits/rejected": -0.29425048828125, |
| "logps/chosen": -664.0, |
| "logps/rejected": -725.5, |
| "loss": 0.5527, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.544921875, |
| "rewards/margins": 0.83642578125, |
| "rewards/rejected": -4.37890625, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.50048, |
| "grad_norm": 26.019664464089015, |
| "learning_rate": 4.1077686218342565e-07, |
| "logits/chosen": -0.300048828125, |
| "logits/rejected": -0.372314453125, |
| "logps/chosen": -690.5, |
| "logps/rejected": -777.0, |
| "loss": 0.6148, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.828125, |
| "rewards/margins": 0.8681640625, |
| "rewards/rejected": -4.69921875, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.50176, |
| "grad_norm": 9.566179596760582, |
| "learning_rate": 4.0923373354443425e-07, |
| "logits/chosen": -0.34130859375, |
| "logits/rejected": -0.38330078125, |
| "logps/chosen": -703.5, |
| "logps/rejected": -784.0, |
| "loss": 0.5242, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.611328125, |
| "rewards/margins": 0.9840087890625, |
| "rewards/rejected": -4.595703125, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.50304, |
| "grad_norm": 9.68452388640175, |
| "learning_rate": 4.076894186071548e-07, |
| "logits/chosen": -0.363037109375, |
| "logits/rejected": -0.41796875, |
| "logps/chosen": -660.0, |
| "logps/rejected": -764.0, |
| "loss": 0.4657, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.33984375, |
| "rewards/margins": 1.11669921875, |
| "rewards/rejected": -4.45703125, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.50432, |
| "grad_norm": 12.917842813325114, |
| "learning_rate": 4.0614394830021604e-07, |
| "logits/chosen": -0.3944091796875, |
| "logits/rejected": -0.46484375, |
| "logps/chosen": -638.5, |
| "logps/rejected": -742.0, |
| "loss": 0.4348, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.1484375, |
| "rewards/margins": 1.259033203125, |
| "rewards/rejected": -4.41015625, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.5056, |
| "grad_norm": 9.314495046466147, |
| "learning_rate": 4.0459735357538624e-07, |
| "logits/chosen": -0.392578125, |
| "logits/rejected": -0.4384765625, |
| "logps/chosen": -630.5, |
| "logps/rejected": -722.0, |
| "loss": 0.4576, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.1796875, |
| "rewards/margins": 1.0947265625, |
| "rewards/rejected": -4.26953125, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.50688, |
| "grad_norm": 10.547291120299455, |
| "learning_rate": 4.030496654069524e-07, |
| "logits/chosen": -0.4755859375, |
| "logits/rejected": -0.510009765625, |
| "logps/chosen": -657.0, |
| "logps/rejected": -736.5, |
| "loss": 0.4343, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.126953125, |
| "rewards/margins": 1.056640625, |
| "rewards/rejected": -4.181640625, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.50816, |
| "grad_norm": 8.647735781009173, |
| "learning_rate": 4.0150091479110063e-07, |
| "logits/chosen": -0.362060546875, |
| "logits/rejected": -0.4324951171875, |
| "logps/chosen": -639.0, |
| "logps/rejected": -751.5, |
| "loss": 0.4441, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.1640625, |
| "rewards/margins": 1.125, |
| "rewards/rejected": -4.291015625, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.50944, |
| "grad_norm": 8.598126228517861, |
| "learning_rate": 3.99951132745295e-07, |
| "logits/chosen": -0.4058837890625, |
| "logits/rejected": -0.457275390625, |
| "logps/chosen": -732.0, |
| "logps/rejected": -845.0, |
| "loss": 0.4575, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.447265625, |
| "rewards/margins": 1.125, |
| "rewards/rejected": -4.57421875, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.51072, |
| "grad_norm": 8.013248595167934, |
| "learning_rate": 3.984003503076566e-07, |
| "logits/chosen": -0.36474609375, |
| "logits/rejected": -0.416748046875, |
| "logps/chosen": -674.5, |
| "logps/rejected": -795.0, |
| "loss": 0.424, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.474609375, |
| "rewards/margins": 1.25830078125, |
| "rewards/rejected": -4.73046875, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 9.871007801580504, |
| "learning_rate": 3.968485985363416e-07, |
| "logits/chosen": -0.343994140625, |
| "logits/rejected": -0.3740234375, |
| "logps/chosen": -646.5, |
| "logps/rejected": -742.5, |
| "loss": 0.4432, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.3203125, |
| "rewards/margins": 1.1904296875, |
| "rewards/rejected": -4.5078125, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.512, |
| "eval_logits/chosen": -0.31884765625, |
| "eval_logits/rejected": -0.39837646484375, |
| "eval_logps/chosen": -663.0, |
| "eval_logps/rejected": -743.5, |
| "eval_loss": 0.49900001287460327, |
| "eval_rewards/accuracies": 0.741406261920929, |
| "eval_rewards/chosen": -3.4296875, |
| "eval_rewards/margins": 0.991943359375, |
| "eval_rewards/rejected": -4.4228515625, |
| "eval_runtime": 27.5022, |
| "eval_samples_per_second": 18.18, |
| "eval_steps_per_second": 0.582, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.51328, |
| "grad_norm": 9.918254110325114, |
| "learning_rate": 3.9529590850891934e-07, |
| "logits/chosen": -0.31048583984375, |
| "logits/rejected": -0.33154296875, |
| "logps/chosen": -675.0, |
| "logps/rejected": -770.0, |
| "loss": 0.4547, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.498046875, |
| "rewards/margins": 1.178955078125, |
| "rewards/rejected": -4.67578125, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.51456, |
| "grad_norm": 10.592955302658622, |
| "learning_rate": 3.9374231132175044e-07, |
| "logits/chosen": -0.3048095703125, |
| "logits/rejected": -0.3560791015625, |
| "logps/chosen": -670.0, |
| "logps/rejected": -794.5, |
| "loss": 0.4097, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.53515625, |
| "rewards/margins": 1.30126953125, |
| "rewards/rejected": -4.84375, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.51584, |
| "grad_norm": 9.608106146275887, |
| "learning_rate": 3.92187838089363e-07, |
| "logits/chosen": -0.361328125, |
| "logits/rejected": -0.39404296875, |
| "logps/chosen": -707.5, |
| "logps/rejected": -818.5, |
| "loss": 0.3828, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.662109375, |
| "rewards/margins": 1.3974609375, |
| "rewards/rejected": -5.0546875, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.51712, |
| "grad_norm": 14.34108279812411, |
| "learning_rate": 3.906325199438306e-07, |
| "logits/chosen": -0.335693359375, |
| "logits/rejected": -0.384765625, |
| "logps/chosen": -711.5, |
| "logps/rejected": -813.0, |
| "loss": 0.4382, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.736328125, |
| "rewards/margins": 1.1923828125, |
| "rewards/rejected": -4.93359375, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.5184, |
| "grad_norm": 11.139031446712131, |
| "learning_rate": 3.890763880341477e-07, |
| "logits/chosen": -0.3443603515625, |
| "logits/rejected": -0.3773193359375, |
| "logps/chosen": -662.75, |
| "logps/rejected": -766.0, |
| "loss": 0.5007, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.744140625, |
| "rewards/margins": 1.0185546875, |
| "rewards/rejected": -4.76953125, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.51968, |
| "grad_norm": 8.55091082093681, |
| "learning_rate": 3.875194735256067e-07, |
| "logits/chosen": -0.3232421875, |
| "logits/rejected": -0.3831787109375, |
| "logps/chosen": -675.0, |
| "logps/rejected": -830.5, |
| "loss": 0.4086, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.86328125, |
| "rewards/margins": 1.25634765625, |
| "rewards/rejected": -5.119140625, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.52096, |
| "grad_norm": 11.406620827960465, |
| "learning_rate": 3.859618075991735e-07, |
| "logits/chosen": -0.28350830078125, |
| "logits/rejected": -0.31494140625, |
| "logps/chosen": -730.0, |
| "logps/rejected": -811.0, |
| "loss": 0.5632, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.947265625, |
| "rewards/margins": 0.9580078125, |
| "rewards/rejected": -4.90625, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.52224, |
| "grad_norm": 11.55895260093868, |
| "learning_rate": 3.8440342145086245e-07, |
| "logits/chosen": -0.2462158203125, |
| "logits/rejected": -0.32208251953125, |
| "logps/chosen": -710.5, |
| "logps/rejected": -829.0, |
| "loss": 0.465, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.90234375, |
| "rewards/margins": 1.115234375, |
| "rewards/rejected": -5.01953125, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.52352, |
| "grad_norm": 9.334623486544036, |
| "learning_rate": 3.828443462911127e-07, |
| "logits/chosen": -0.235107421875, |
| "logits/rejected": -0.2991943359375, |
| "logps/chosen": -705.0, |
| "logps/rejected": -859.0, |
| "loss": 0.4231, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.93359375, |
| "rewards/margins": 1.3603515625, |
| "rewards/rejected": -5.2890625, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.5248, |
| "grad_norm": 9.304030036813096, |
| "learning_rate": 3.8128461334416223e-07, |
| "logits/chosen": -0.2989501953125, |
| "logits/rejected": -0.375732421875, |
| "logps/chosen": -671.0, |
| "logps/rejected": -773.0, |
| "loss": 0.4455, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.55078125, |
| "rewards/margins": 1.16357421875, |
| "rewards/rejected": -4.71484375, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.52608, |
| "grad_norm": 21.163435172332214, |
| "learning_rate": 3.7972425384742267e-07, |
| "logits/chosen": -0.275146484375, |
| "logits/rejected": -0.3277587890625, |
| "logps/chosen": -727.5, |
| "logps/rejected": -806.0, |
| "loss": 0.5246, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.919921875, |
| "rewards/margins": 0.976318359375, |
| "rewards/rejected": -4.89453125, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.52736, |
| "grad_norm": 15.093249035393466, |
| "learning_rate": 3.781632990508541e-07, |
| "logits/chosen": -0.327880859375, |
| "logits/rejected": -0.344482421875, |
| "logps/chosen": -738.5, |
| "logps/rejected": -821.5, |
| "loss": 0.5157, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.953125, |
| "rewards/margins": 1.029296875, |
| "rewards/rejected": -4.98046875, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.52864, |
| "grad_norm": 8.914765884163025, |
| "learning_rate": 3.766017802163386e-07, |
| "logits/chosen": -0.361083984375, |
| "logits/rejected": -0.388916015625, |
| "logps/chosen": -689.5, |
| "logps/rejected": -740.5, |
| "loss": 0.4922, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.609375, |
| "rewards/margins": 1.026611328125, |
| "rewards/rejected": -4.6328125, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.52992, |
| "grad_norm": 13.842987260589894, |
| "learning_rate": 3.750397286170548e-07, |
| "logits/chosen": -0.40478515625, |
| "logits/rejected": -0.459716796875, |
| "logps/chosen": -705.5, |
| "logps/rejected": -816.0, |
| "loss": 0.4627, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.615234375, |
| "rewards/margins": 1.1142578125, |
| "rewards/rejected": -4.734375, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.5312, |
| "grad_norm": 10.861145048772103, |
| "learning_rate": 3.734771755368508e-07, |
| "logits/chosen": -0.413818359375, |
| "logits/rejected": -0.449462890625, |
| "logps/chosen": -667.0, |
| "logps/rejected": -737.0, |
| "loss": 0.4652, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.353515625, |
| "rewards/margins": 0.997314453125, |
| "rewards/rejected": -4.3515625, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.53248, |
| "grad_norm": 10.048437842384944, |
| "learning_rate": 3.7191415226961866e-07, |
| "logits/chosen": -0.4249267578125, |
| "logits/rejected": -0.449951171875, |
| "logps/chosen": -663.5, |
| "logps/rejected": -741.0, |
| "loss": 0.5003, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.177734375, |
| "rewards/margins": 1.00439453125, |
| "rewards/rejected": -4.17578125, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.53376, |
| "grad_norm": 7.560247582517948, |
| "learning_rate": 3.703506901186665e-07, |
| "logits/chosen": -0.41876220703125, |
| "logits/rejected": -0.4765625, |
| "logps/chosen": -679.0, |
| "logps/rejected": -779.5, |
| "loss": 0.4335, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.23046875, |
| "rewards/margins": 1.267578125, |
| "rewards/rejected": -4.5, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.53504, |
| "grad_norm": 10.537078564076927, |
| "learning_rate": 3.687868203960925e-07, |
| "logits/chosen": -0.408935546875, |
| "logits/rejected": -0.434326171875, |
| "logps/chosen": -634.0, |
| "logps/rejected": -747.5, |
| "loss": 0.4401, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -2.978515625, |
| "rewards/margins": 1.08984375, |
| "rewards/rejected": -4.06640625, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.53632, |
| "grad_norm": 13.668617572855705, |
| "learning_rate": 3.6722257442215735e-07, |
| "logits/chosen": -0.4404296875, |
| "logits/rejected": -0.4970703125, |
| "logps/chosen": -678.0, |
| "logps/rejected": -792.5, |
| "loss": 0.4615, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.37890625, |
| "rewards/margins": 1.1044921875, |
| "rewards/rejected": -4.48828125, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.5376, |
| "grad_norm": 12.228990359857354, |
| "learning_rate": 3.6565798352465697e-07, |
| "logits/chosen": -0.485107421875, |
| "logits/rejected": -0.503173828125, |
| "logps/chosen": -628.0, |
| "logps/rejected": -704.5, |
| "loss": 0.4871, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.03125, |
| "rewards/margins": 0.918701171875, |
| "rewards/rejected": -3.947265625, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.53888, |
| "grad_norm": 18.18437340639748, |
| "learning_rate": 3.640930790382953e-07, |
| "logits/chosen": -0.3985595703125, |
| "logits/rejected": -0.4619140625, |
| "logps/chosen": -645.0, |
| "logps/rejected": -726.5, |
| "loss": 0.4444, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.162109375, |
| "rewards/margins": 0.968994140625, |
| "rewards/rejected": -4.130859375, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.54016, |
| "grad_norm": 11.370178937016053, |
| "learning_rate": 3.625278923040567e-07, |
| "logits/chosen": -0.40625, |
| "logits/rejected": -0.40673828125, |
| "logps/chosen": -689.5, |
| "logps/rejected": -748.0, |
| "loss": 0.519, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.4765625, |
| "rewards/margins": 0.92724609375, |
| "rewards/rejected": -4.40234375, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.54144, |
| "grad_norm": 14.61661184515439, |
| "learning_rate": 3.6096245466857807e-07, |
| "logits/chosen": -0.3848876953125, |
| "logits/rejected": -0.416748046875, |
| "logps/chosen": -681.0, |
| "logps/rejected": -737.5, |
| "loss": 0.4409, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.384765625, |
| "rewards/margins": 1.01220703125, |
| "rewards/rejected": -4.396484375, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.54272, |
| "grad_norm": 12.818023117655002, |
| "learning_rate": 3.5939679748352143e-07, |
| "logits/chosen": -0.3614501953125, |
| "logits/rejected": -0.43359375, |
| "logps/chosen": -664.0, |
| "logps/rejected": -786.5, |
| "loss": 0.4274, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.3046875, |
| "rewards/margins": 1.12353515625, |
| "rewards/rejected": -4.431640625, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 12.339337246571736, |
| "learning_rate": 3.578309521049456e-07, |
| "logits/chosen": -0.3673095703125, |
| "logits/rejected": -0.4122314453125, |
| "logps/chosen": -664.0, |
| "logps/rejected": -762.5, |
| "loss": 0.4744, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.564453125, |
| "rewards/margins": 1.170166015625, |
| "rewards/rejected": -4.736328125, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.54528, |
| "grad_norm": 8.658934278329045, |
| "learning_rate": 3.562649498926785e-07, |
| "logits/chosen": -0.2666015625, |
| "logits/rejected": -0.2794189453125, |
| "logps/chosen": -670.0, |
| "logps/rejected": -773.5, |
| "loss": 0.4346, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.388671875, |
| "rewards/margins": 1.10498046875, |
| "rewards/rejected": -4.490234375, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.54656, |
| "grad_norm": 16.97395118117586, |
| "learning_rate": 3.5469882220968913e-07, |
| "logits/chosen": -0.334716796875, |
| "logits/rejected": -0.3743896484375, |
| "logps/chosen": -627.5, |
| "logps/rejected": -758.0, |
| "loss": 0.5011, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.55078125, |
| "rewards/margins": 1.0, |
| "rewards/rejected": -4.5546875, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.54784, |
| "grad_norm": 10.000428326458785, |
| "learning_rate": 3.531326004214592e-07, |
| "logits/chosen": -0.234283447265625, |
| "logits/rejected": -0.29119873046875, |
| "logps/chosen": -709.0, |
| "logps/rejected": -835.5, |
| "loss": 0.4368, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.861328125, |
| "rewards/margins": 1.24755859375, |
| "rewards/rejected": -5.10546875, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.54912, |
| "grad_norm": 17.876551163341734, |
| "learning_rate": 3.5156631589535516e-07, |
| "logits/chosen": -0.305419921875, |
| "logits/rejected": -0.3280029296875, |
| "logps/chosen": -721.5, |
| "logps/rejected": -828.0, |
| "loss": 0.5295, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -4.10546875, |
| "rewards/margins": 1.10888671875, |
| "rewards/rejected": -5.21875, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.5504, |
| "grad_norm": 22.498342188721203, |
| "learning_rate": 3.5e-07, |
| "logits/chosen": -0.380859375, |
| "logits/rejected": -0.43505859375, |
| "logps/chosen": -739.0, |
| "logps/rejected": -841.0, |
| "loss": 0.533, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -4.107421875, |
| "rewards/margins": 0.91064453125, |
| "rewards/rejected": -5.015625, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.55168, |
| "grad_norm": 12.69840485691294, |
| "learning_rate": 3.484336841046448e-07, |
| "logits/chosen": -0.28363037109375, |
| "logits/rejected": -0.329345703125, |
| "logps/chosen": -718.5, |
| "logps/rejected": -844.0, |
| "loss": 0.4312, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.939453125, |
| "rewards/margins": 1.17919921875, |
| "rewards/rejected": -5.11328125, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.55296, |
| "grad_norm": 15.269189564746817, |
| "learning_rate": 3.468673995785409e-07, |
| "logits/chosen": -0.3050537109375, |
| "logits/rejected": -0.2933349609375, |
| "logps/chosen": -757.5, |
| "logps/rejected": -809.5, |
| "loss": 0.5201, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.078125, |
| "rewards/margins": 1.0419921875, |
| "rewards/rejected": -5.1171875, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.55424, |
| "grad_norm": 10.916565840639656, |
| "learning_rate": 3.4530117779031095e-07, |
| "logits/chosen": -0.28759765625, |
| "logits/rejected": -0.30859375, |
| "logps/chosen": -741.0, |
| "logps/rejected": -798.5, |
| "loss": 0.5074, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.970703125, |
| "rewards/margins": 0.98486328125, |
| "rewards/rejected": -4.95703125, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.55552, |
| "grad_norm": 8.956894754018172, |
| "learning_rate": 3.4373505010732153e-07, |
| "logits/chosen": -0.336181640625, |
| "logits/rejected": -0.366455078125, |
| "logps/chosen": -745.0, |
| "logps/rejected": -831.0, |
| "loss": 0.4109, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.966796875, |
| "rewards/margins": 1.26806640625, |
| "rewards/rejected": -5.23046875, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.5568, |
| "grad_norm": 10.251791767078773, |
| "learning_rate": 3.4216904789505446e-07, |
| "logits/chosen": -0.2631034851074219, |
| "logits/rejected": -0.291534423828125, |
| "logps/chosen": -701.5, |
| "logps/rejected": -824.0, |
| "loss": 0.4353, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.8203125, |
| "rewards/margins": 1.3251953125, |
| "rewards/rejected": -5.14453125, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.55808, |
| "grad_norm": 17.42176040192877, |
| "learning_rate": 3.4060320251647864e-07, |
| "logits/chosen": -0.309478759765625, |
| "logits/rejected": -0.3680419921875, |
| "logps/chosen": -712.5, |
| "logps/rejected": -840.5, |
| "loss": 0.4913, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.96484375, |
| "rewards/margins": 1.13525390625, |
| "rewards/rejected": -5.10546875, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.55936, |
| "grad_norm": 8.54035065196297, |
| "learning_rate": 3.3903754533142195e-07, |
| "logits/chosen": -0.256591796875, |
| "logits/rejected": -0.332275390625, |
| "logps/chosen": -719.0, |
| "logps/rejected": -851.0, |
| "loss": 0.4279, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.943359375, |
| "rewards/margins": 1.35791015625, |
| "rewards/rejected": -5.30078125, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.56064, |
| "grad_norm": 8.537378854753193, |
| "learning_rate": 3.3747210769594327e-07, |
| "logits/chosen": -0.25079345703125, |
| "logits/rejected": -0.331298828125, |
| "logps/chosen": -699.0, |
| "logps/rejected": -829.0, |
| "loss": 0.4387, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.8828125, |
| "rewards/margins": 1.21044921875, |
| "rewards/rejected": -5.08984375, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.56192, |
| "grad_norm": 9.144351590625533, |
| "learning_rate": 3.359069209617048e-07, |
| "logits/chosen": -0.314208984375, |
| "logits/rejected": -0.3914337158203125, |
| "logps/chosen": -697.0, |
| "logps/rejected": -799.0, |
| "loss": 0.4947, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.8203125, |
| "rewards/margins": 1.109375, |
| "rewards/rejected": -4.9296875, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.5632, |
| "grad_norm": 14.655918977438441, |
| "learning_rate": 3.3434201647534305e-07, |
| "logits/chosen": -0.3173828125, |
| "logits/rejected": -0.363525390625, |
| "logps/chosen": -686.0, |
| "logps/rejected": -800.5, |
| "loss": 0.4074, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.587890625, |
| "rewards/margins": 1.189453125, |
| "rewards/rejected": -4.76953125, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.56448, |
| "grad_norm": 9.37268098222402, |
| "learning_rate": 3.327774255778426e-07, |
| "logits/chosen": -0.35009765625, |
| "logits/rejected": -0.3543701171875, |
| "logps/chosen": -706.0, |
| "logps/rejected": -837.0, |
| "loss": 0.4304, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.71875, |
| "rewards/margins": 1.23095703125, |
| "rewards/rejected": -4.94921875, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.56576, |
| "grad_norm": 8.893240235048026, |
| "learning_rate": 3.312131796039074e-07, |
| "logits/chosen": -0.3359375, |
| "logits/rejected": -0.35595703125, |
| "logps/chosen": -720.0, |
| "logps/rejected": -820.5, |
| "loss": 0.4758, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.982421875, |
| "rewards/margins": 1.11669921875, |
| "rewards/rejected": -5.10546875, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.56704, |
| "grad_norm": 9.905678959597498, |
| "learning_rate": 3.2964930988133347e-07, |
| "logits/chosen": -0.31103515625, |
| "logits/rejected": -0.3660888671875, |
| "logps/chosen": -677.5, |
| "logps/rejected": -814.5, |
| "loss": 0.498, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.58984375, |
| "rewards/margins": 1.150390625, |
| "rewards/rejected": -4.73828125, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.56832, |
| "grad_norm": 9.08992784313469, |
| "learning_rate": 3.280858477303813e-07, |
| "logits/chosen": -0.39501953125, |
| "logits/rejected": -0.420166015625, |
| "logps/chosen": -693.0, |
| "logps/rejected": -835.5, |
| "loss": 0.4553, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -3.744140625, |
| "rewards/margins": 1.2099609375, |
| "rewards/rejected": -4.94921875, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.5696, |
| "grad_norm": 11.048886602570247, |
| "learning_rate": 3.265228244631491e-07, |
| "logits/chosen": -0.3572998046875, |
| "logits/rejected": -0.3760986328125, |
| "logps/chosen": -655.0, |
| "logps/rejected": -766.5, |
| "loss": 0.4808, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.58203125, |
| "rewards/margins": 0.9876708984375, |
| "rewards/rejected": -4.57421875, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.57088, |
| "grad_norm": 16.49283952772338, |
| "learning_rate": 3.2496027138294534e-07, |
| "logits/chosen": -0.3392333984375, |
| "logits/rejected": -0.38818359375, |
| "logps/chosen": -695.5, |
| "logps/rejected": -786.0, |
| "loss": 0.5221, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.708984375, |
| "rewards/margins": 1.129638671875, |
| "rewards/rejected": -4.84375, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.57216, |
| "grad_norm": 13.865402250549591, |
| "learning_rate": 3.2339821978366143e-07, |
| "logits/chosen": -0.3529052734375, |
| "logits/rejected": -0.415283203125, |
| "logps/chosen": -705.5, |
| "logps/rejected": -801.0, |
| "loss": 0.414, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.7890625, |
| "rewards/margins": 1.2333984375, |
| "rewards/rejected": -5.015625, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.57344, |
| "grad_norm": 9.732935361331597, |
| "learning_rate": 3.218367009491459e-07, |
| "logits/chosen": -0.30615234375, |
| "logits/rejected": -0.3333740234375, |
| "logps/chosen": -690.0, |
| "logps/rejected": -790.5, |
| "loss": 0.5015, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.7265625, |
| "rewards/margins": 1.05517578125, |
| "rewards/rejected": -4.78125, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.57472, |
| "grad_norm": 9.154715837042305, |
| "learning_rate": 3.2027574615257724e-07, |
| "logits/chosen": -0.3072509765625, |
| "logits/rejected": -0.3399658203125, |
| "logps/chosen": -722.5, |
| "logps/rejected": -808.0, |
| "loss": 0.4555, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.783203125, |
| "rewards/margins": 1.1298828125, |
| "rewards/rejected": -4.91015625, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 8.928902784266844, |
| "learning_rate": 3.1871538665583784e-07, |
| "logits/chosen": -0.33447265625, |
| "logits/rejected": -0.3792724609375, |
| "logps/chosen": -701.0, |
| "logps/rejected": -844.5, |
| "loss": 0.439, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.796875, |
| "rewards/margins": 1.2041015625, |
| "rewards/rejected": -5.00390625, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.57728, |
| "grad_norm": 12.00098250688527, |
| "learning_rate": 3.1715565370888724e-07, |
| "logits/chosen": -0.29534912109375, |
| "logits/rejected": -0.3519287109375, |
| "logps/chosen": -685.0, |
| "logps/rejected": -775.0, |
| "loss": 0.5097, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.541015625, |
| "rewards/margins": 1.02294921875, |
| "rewards/rejected": -4.5625, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.57856, |
| "grad_norm": 12.6273655314952, |
| "learning_rate": 3.155965785491375e-07, |
| "logits/chosen": -0.2778053283691406, |
| "logits/rejected": -0.3127593994140625, |
| "logps/chosen": -666.5, |
| "logps/rejected": -733.0, |
| "loss": 0.5567, |
| "rewards/accuracies": 0.6171875, |
| "rewards/chosen": -3.607421875, |
| "rewards/margins": 0.78857421875, |
| "rewards/rejected": -4.392578125, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.57984, |
| "grad_norm": 15.49253693464398, |
| "learning_rate": 3.140381924008266e-07, |
| "logits/chosen": -0.3681640625, |
| "logits/rejected": -0.415771484375, |
| "logps/chosen": -679.5, |
| "logps/rejected": -758.5, |
| "loss": 0.5302, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.544921875, |
| "rewards/margins": 1.06982421875, |
| "rewards/rejected": -4.61328125, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.58112, |
| "grad_norm": 14.072242757167011, |
| "learning_rate": 3.1248052647439325e-07, |
| "logits/chosen": -0.314361572265625, |
| "logits/rejected": -0.35498046875, |
| "logps/chosen": -683.0, |
| "logps/rejected": -766.5, |
| "loss": 0.4328, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.375, |
| "rewards/margins": 1.099609375, |
| "rewards/rejected": -4.47265625, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5824, |
| "grad_norm": 8.998262402466967, |
| "learning_rate": 3.109236119658523e-07, |
| "logits/chosen": -0.39013671875, |
| "logits/rejected": -0.46435546875, |
| "logps/chosen": -694.0, |
| "logps/rejected": -787.0, |
| "loss": 0.4334, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.5234375, |
| "rewards/margins": 1.29052734375, |
| "rewards/rejected": -4.81640625, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.58368, |
| "grad_norm": 9.654473976654984, |
| "learning_rate": 3.0936748005616934e-07, |
| "logits/chosen": -0.4014892578125, |
| "logits/rejected": -0.4482421875, |
| "logps/chosen": -652.5, |
| "logps/rejected": -773.0, |
| "loss": 0.4763, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -3.390625, |
| "rewards/margins": 1.03076171875, |
| "rewards/rejected": -4.41796875, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.58496, |
| "grad_norm": 9.304597500860096, |
| "learning_rate": 3.07812161910637e-07, |
| "logits/chosen": -0.38525390625, |
| "logits/rejected": -0.466064453125, |
| "logps/chosen": -677.0, |
| "logps/rejected": -786.5, |
| "loss": 0.4882, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.505859375, |
| "rewards/margins": 1.04443359375, |
| "rewards/rejected": -4.55078125, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.58624, |
| "grad_norm": 10.03299414048815, |
| "learning_rate": 3.062576886782496e-07, |
| "logits/chosen": -0.33111572265625, |
| "logits/rejected": -0.343994140625, |
| "logps/chosen": -669.0, |
| "logps/rejected": -749.0, |
| "loss": 0.4664, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -3.41796875, |
| "rewards/margins": 1.0966796875, |
| "rewards/rejected": -4.51953125, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.58752, |
| "grad_norm": 8.481068595485672, |
| "learning_rate": 3.0470409149108057e-07, |
| "logits/chosen": -0.376220703125, |
| "logits/rejected": -0.4453125, |
| "logps/chosen": -693.5, |
| "logps/rejected": -784.5, |
| "loss": 0.4547, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.419921875, |
| "rewards/margins": 1.06201171875, |
| "rewards/rejected": -4.478515625, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.5888, |
| "grad_norm": 12.321469175204998, |
| "learning_rate": 3.0315140146365854e-07, |
| "logits/chosen": -0.338134765625, |
| "logits/rejected": -0.415283203125, |
| "logps/chosen": -632.5, |
| "logps/rejected": -762.0, |
| "loss": 0.4179, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.32421875, |
| "rewards/margins": 1.26171875, |
| "rewards/rejected": -4.5859375, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.59008, |
| "grad_norm": 10.17822938822944, |
| "learning_rate": 3.0159964969234345e-07, |
| "logits/chosen": -0.31658935546875, |
| "logits/rejected": -0.399658203125, |
| "logps/chosen": -660.0, |
| "logps/rejected": -803.5, |
| "loss": 0.4285, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.5390625, |
| "rewards/margins": 1.218505859375, |
| "rewards/rejected": -4.75390625, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.59136, |
| "grad_norm": 7.7993467189482075, |
| "learning_rate": 3.00048867254705e-07, |
| "logits/chosen": -0.43994140625, |
| "logits/rejected": -0.50244140625, |
| "logps/chosen": -688.0, |
| "logps/rejected": -806.0, |
| "loss": 0.4446, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.525390625, |
| "rewards/margins": 1.19580078125, |
| "rewards/rejected": -4.71875, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.59264, |
| "grad_norm": 12.021053759681038, |
| "learning_rate": 2.9849908520889934e-07, |
| "logits/chosen": -0.35205078125, |
| "logits/rejected": -0.4013671875, |
| "logps/chosen": -703.5, |
| "logps/rejected": -813.0, |
| "loss": 0.4805, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.78515625, |
| "rewards/margins": 1.2294921875, |
| "rewards/rejected": -5.015625, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.59392, |
| "grad_norm": 9.606440908673006, |
| "learning_rate": 2.9695033459304765e-07, |
| "logits/chosen": -0.3104248046875, |
| "logits/rejected": -0.3370361328125, |
| "logps/chosen": -719.5, |
| "logps/rejected": -809.5, |
| "loss": 0.4665, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.779296875, |
| "rewards/margins": 1.021728515625, |
| "rewards/rejected": -4.80078125, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5952, |
| "grad_norm": 9.429466706631539, |
| "learning_rate": 2.954026464246138e-07, |
| "logits/chosen": -0.308349609375, |
| "logits/rejected": -0.3970947265625, |
| "logps/chosen": -685.0, |
| "logps/rejected": -803.5, |
| "loss": 0.4333, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.5859375, |
| "rewards/margins": 1.34521484375, |
| "rewards/rejected": -4.93359375, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.59648, |
| "grad_norm": 8.733380658733111, |
| "learning_rate": 2.938560516997839e-07, |
| "logits/chosen": -0.3392333984375, |
| "logits/rejected": -0.385986328125, |
| "logps/chosen": -679.0, |
| "logps/rejected": -788.0, |
| "loss": 0.4456, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.580078125, |
| "rewards/margins": 1.158203125, |
| "rewards/rejected": -4.732421875, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.59776, |
| "grad_norm": 11.051837804187693, |
| "learning_rate": 2.923105813928453e-07, |
| "logits/chosen": -0.35302734375, |
| "logits/rejected": -0.400390625, |
| "logps/chosen": -681.5, |
| "logps/rejected": -801.0, |
| "loss": 0.4298, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.771484375, |
| "rewards/margins": 1.23046875, |
| "rewards/rejected": -5.00390625, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.59904, |
| "grad_norm": 10.90589653843686, |
| "learning_rate": 2.907662664555658e-07, |
| "logits/chosen": -0.24847412109375, |
| "logits/rejected": -0.321044921875, |
| "logps/chosen": -716.5, |
| "logps/rejected": -895.5, |
| "loss": 0.4646, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.005859375, |
| "rewards/margins": 1.11767578125, |
| "rewards/rejected": -5.125, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.60032, |
| "grad_norm": 11.37286435597706, |
| "learning_rate": 2.8922313781657437e-07, |
| "logits/chosen": -0.258544921875, |
| "logits/rejected": -0.29833984375, |
| "logps/chosen": -707.0, |
| "logps/rejected": -853.0, |
| "loss": 0.4346, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.875, |
| "rewards/margins": 1.4248046875, |
| "rewards/rejected": -5.302734375, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.6016, |
| "grad_norm": 12.238759131932019, |
| "learning_rate": 2.876812263807417e-07, |
| "logits/chosen": -0.31640625, |
| "logits/rejected": -0.36279296875, |
| "logps/chosen": -757.0, |
| "logps/rejected": -893.5, |
| "loss": 0.3726, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -4.361328125, |
| "rewards/margins": 1.45849609375, |
| "rewards/rejected": -5.8125, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.60288, |
| "grad_norm": 14.41231269695093, |
| "learning_rate": 2.861405630285606e-07, |
| "logits/chosen": -0.203125, |
| "logits/rejected": -0.233642578125, |
| "logps/chosen": -747.0, |
| "logps/rejected": -901.5, |
| "loss": 0.3898, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -3.962890625, |
| "rewards/margins": 1.37841796875, |
| "rewards/rejected": -5.33984375, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.60416, |
| "grad_norm": 10.17467437176661, |
| "learning_rate": 2.8460117861552834e-07, |
| "logits/chosen": -0.1627197265625, |
| "logits/rejected": -0.205810546875, |
| "logps/chosen": -750.5, |
| "logps/rejected": -889.5, |
| "loss": 0.4308, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -4.59375, |
| "rewards/margins": 1.18017578125, |
| "rewards/rejected": -5.77734375, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.60544, |
| "grad_norm": 18.928934732354712, |
| "learning_rate": 2.8306310397152813e-07, |
| "logits/chosen": -0.1800537109375, |
| "logits/rejected": -0.21759796142578125, |
| "logps/chosen": -792.5, |
| "logps/rejected": -936.0, |
| "loss": 0.4427, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -4.73828125, |
| "rewards/margins": 1.591796875, |
| "rewards/rejected": -6.33203125, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.60672, |
| "grad_norm": 22.570954000361684, |
| "learning_rate": 2.815263699002124e-07, |
| "logits/chosen": -0.104888916015625, |
| "logits/rejected": -0.14886474609375, |
| "logps/chosen": -865.0, |
| "logps/rejected": -989.0, |
| "loss": 0.4815, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -5.34765625, |
| "rewards/margins": 1.2919921875, |
| "rewards/rejected": -6.640625, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 11.072003778958646, |
| "learning_rate": 2.799910071783845e-07, |
| "logits/chosen": -0.0941619873046875, |
| "logits/rejected": -0.12345123291015625, |
| "logps/chosen": -875.0, |
| "logps/rejected": -1017.5, |
| "loss": 0.4723, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -5.3984375, |
| "rewards/margins": 1.45703125, |
| "rewards/rejected": -6.8515625, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.60928, |
| "grad_norm": 22.291924161435414, |
| "learning_rate": 2.7845704655538383e-07, |
| "logits/chosen": -0.145660400390625, |
| "logits/rejected": -0.185394287109375, |
| "logps/chosen": -858.5, |
| "logps/rejected": -1001.5, |
| "loss": 0.5071, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -5.328125, |
| "rewards/margins": 1.337890625, |
| "rewards/rejected": -6.66796875, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.61056, |
| "grad_norm": 49.81448523508282, |
| "learning_rate": 2.7692451875246955e-07, |
| "logits/chosen": -0.1240692138671875, |
| "logits/rejected": -0.1556243896484375, |
| "logps/chosen": -825.5, |
| "logps/rejected": -936.0, |
| "loss": 0.637, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -5.234375, |
| "rewards/margins": 1.135498046875, |
| "rewards/rejected": -6.375, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.61184, |
| "grad_norm": 13.559544047897164, |
| "learning_rate": 2.753934544622044e-07, |
| "logits/chosen": -0.0382232666015625, |
| "logits/rejected": -0.0726776123046875, |
| "logps/chosen": -860.0, |
| "logps/rejected": -959.5, |
| "loss": 0.5071, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -5.5390625, |
| "rewards/margins": 1.2286376953125, |
| "rewards/rejected": -6.765625, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.61312, |
| "grad_norm": 44.498812483306665, |
| "learning_rate": 2.7386388434784144e-07, |
| "logits/chosen": -0.1600341796875, |
| "logits/rejected": -0.229248046875, |
| "logps/chosen": -800.0, |
| "logps/rejected": -891.0, |
| "loss": 0.541, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.859375, |
| "rewards/margins": 1.0419921875, |
| "rewards/rejected": -5.90234375, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.6144, |
| "grad_norm": 10.090939267052912, |
| "learning_rate": 2.723358390427089e-07, |
| "logits/chosen": -0.07647705078125, |
| "logits/rejected": -0.1278076171875, |
| "logps/chosen": -830.0, |
| "logps/rejected": -963.0, |
| "loss": 0.3957, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -5.05078125, |
| "rewards/margins": 1.451171875, |
| "rewards/rejected": -6.49609375, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.61568, |
| "grad_norm": 36.24985599924041, |
| "learning_rate": 2.708093491495973e-07, |
| "logits/chosen": -0.191162109375, |
| "logits/rejected": -0.2340087890625, |
| "logps/chosen": -795.0, |
| "logps/rejected": -943.5, |
| "loss": 0.5742, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -4.89453125, |
| "rewards/margins": 1.2020416259765625, |
| "rewards/rejected": -6.09375, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.61696, |
| "grad_norm": 9.559993471114256, |
| "learning_rate": 2.6928444524014593e-07, |
| "logits/chosen": -0.0987396240234375, |
| "logits/rejected": -0.13189697265625, |
| "logps/chosen": -821.5, |
| "logps/rejected": -962.0, |
| "loss": 0.4207, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -4.91015625, |
| "rewards/margins": 1.5185546875, |
| "rewards/rejected": -6.4296875, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.61824, |
| "grad_norm": 21.064286757896273, |
| "learning_rate": 2.677611578542312e-07, |
| "logits/chosen": -0.180572509765625, |
| "logits/rejected": -0.177886962890625, |
| "logps/chosen": -761.5, |
| "logps/rejected": -863.5, |
| "loss": 0.518, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.3125, |
| "rewards/margins": 1.28662109375, |
| "rewards/rejected": -5.59765625, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.61952, |
| "grad_norm": 16.47721480533661, |
| "learning_rate": 2.6623951749935486e-07, |
| "logits/chosen": -0.19525146484375, |
| "logits/rejected": -0.232696533203125, |
| "logps/chosen": -750.5, |
| "logps/rejected": -880.0, |
| "loss": 0.5357, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -4.375, |
| "rewards/margins": 1.01416015625, |
| "rewards/rejected": -5.39453125, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.6208, |
| "grad_norm": 14.729444247013049, |
| "learning_rate": 2.6471955465003233e-07, |
| "logits/chosen": -0.146209716796875, |
| "logits/rejected": -0.181396484375, |
| "logps/chosen": -727.0, |
| "logps/rejected": -855.5, |
| "loss": 0.4494, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.181640625, |
| "rewards/margins": 1.361328125, |
| "rewards/rejected": -5.54296875, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.62208, |
| "grad_norm": 17.822317812146366, |
| "learning_rate": 2.6320129974718357e-07, |
| "logits/chosen": -0.2357177734375, |
| "logits/rejected": -0.266845703125, |
| "logps/chosen": -755.5, |
| "logps/rejected": -898.5, |
| "loss": 0.3869, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -4.2265625, |
| "rewards/margins": 1.5546875, |
| "rewards/rejected": -5.78125, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.62336, |
| "grad_norm": 18.035398000203894, |
| "learning_rate": 2.6168478319752235e-07, |
| "logits/chosen": -0.25274658203125, |
| "logits/rejected": -0.2952880859375, |
| "logps/chosen": -715.5, |
| "logps/rejected": -808.5, |
| "loss": 0.53, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.82421875, |
| "rewards/margins": 0.89306640625, |
| "rewards/rejected": -4.7109375, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.62464, |
| "grad_norm": 11.523996728652335, |
| "learning_rate": 2.6017003537294813e-07, |
| "logits/chosen": -0.28271484375, |
| "logits/rejected": -0.321533203125, |
| "logps/chosen": -650.0, |
| "logps/rejected": -767.5, |
| "loss": 0.4265, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.513671875, |
| "rewards/margins": 1.33837890625, |
| "rewards/rejected": -4.85546875, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.62592, |
| "grad_norm": 9.3216241550967, |
| "learning_rate": 2.58657086609937e-07, |
| "logits/chosen": -0.29815673828125, |
| "logits/rejected": -0.323974609375, |
| "logps/chosen": -692.25, |
| "logps/rejected": -793.5, |
| "loss": 0.4857, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.697265625, |
| "rewards/margins": 0.97802734375, |
| "rewards/rejected": -4.673828125, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.6272, |
| "grad_norm": 9.12204855358185, |
| "learning_rate": 2.5714596720893474e-07, |
| "logits/chosen": -0.1910400390625, |
| "logits/rejected": -0.24755859375, |
| "logps/chosen": -638.0, |
| "logps/rejected": -753.0, |
| "loss": 0.49, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.484375, |
| "rewards/margins": 1.13525390625, |
| "rewards/rejected": -4.62109375, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.62848, |
| "grad_norm": 9.031432208381775, |
| "learning_rate": 2.5563670743374974e-07, |
| "logits/chosen": -0.311279296875, |
| "logits/rejected": -0.331787109375, |
| "logps/chosen": -661.0, |
| "logps/rejected": -733.5, |
| "loss": 0.4686, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.4296875, |
| "rewards/margins": 1.078125, |
| "rewards/rejected": -4.509765625, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.62976, |
| "grad_norm": 11.835986491275541, |
| "learning_rate": 2.541293375109466e-07, |
| "logits/chosen": -0.323974609375, |
| "logits/rejected": -0.363525390625, |
| "logps/chosen": -632.5, |
| "logps/rejected": -759.0, |
| "loss": 0.4363, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.412109375, |
| "rewards/margins": 1.064453125, |
| "rewards/rejected": -4.4765625, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.63104, |
| "grad_norm": 8.263522495741764, |
| "learning_rate": 2.5262388762924157e-07, |
| "logits/chosen": -0.341064453125, |
| "logits/rejected": -0.380859375, |
| "logps/chosen": -661.5, |
| "logps/rejected": -789.5, |
| "loss": 0.401, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.357421875, |
| "rewards/margins": 1.31689453125, |
| "rewards/rejected": -4.671875, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.63232, |
| "grad_norm": 16.717553631249775, |
| "learning_rate": 2.511203879388971e-07, |
| "logits/chosen": -0.28839111328125, |
| "logits/rejected": -0.35125732421875, |
| "logps/chosen": -655.5, |
| "logps/rejected": -778.0, |
| "loss": 0.399, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.291015625, |
| "rewards/margins": 1.16796875, |
| "rewards/rejected": -4.45703125, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.6336, |
| "grad_norm": 9.39340513518324, |
| "learning_rate": 2.496188685511185e-07, |
| "logits/chosen": -0.27813720703125, |
| "logits/rejected": -0.288360595703125, |
| "logps/chosen": -666.5, |
| "logps/rejected": -738.0, |
| "loss": 0.4937, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.365234375, |
| "rewards/margins": 0.90673828125, |
| "rewards/rejected": -4.26953125, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.63488, |
| "grad_norm": 8.21962622555432, |
| "learning_rate": 2.481193595374505e-07, |
| "logits/chosen": -0.22674560546875, |
| "logits/rejected": -0.27642822265625, |
| "logps/chosen": -603.75, |
| "logps/rejected": -724.0, |
| "loss": 0.478, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.115234375, |
| "rewards/margins": 1.0169677734375, |
| "rewards/rejected": -4.12890625, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.63616, |
| "grad_norm": 10.565279172163, |
| "learning_rate": 2.466218909291756e-07, |
| "logits/chosen": -0.32861328125, |
| "logits/rejected": -0.385009765625, |
| "logps/chosen": -659.5, |
| "logps/rejected": -752.5, |
| "loss": 0.494, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.30078125, |
| "rewards/margins": 1.0400390625, |
| "rewards/rejected": -4.337890625, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.63744, |
| "grad_norm": 14.800631609015028, |
| "learning_rate": 2.451264927167121e-07, |
| "logits/chosen": -0.378173828125, |
| "logits/rejected": -0.388671875, |
| "logps/chosen": -676.5, |
| "logps/rejected": -738.5, |
| "loss": 0.5109, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.45703125, |
| "rewards/margins": 0.9403076171875, |
| "rewards/rejected": -4.396484375, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.63872, |
| "grad_norm": 14.511150427574877, |
| "learning_rate": 2.436331948490136e-07, |
| "logits/chosen": -0.3680419921875, |
| "logits/rejected": -0.44677734375, |
| "logps/chosen": -638.5, |
| "logps/rejected": -751.0, |
| "loss": 0.5025, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.4140625, |
| "rewards/margins": 1.0439453125, |
| "rewards/rejected": -4.453125, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 9.963559206561316, |
| "learning_rate": 2.4214202723296923e-07, |
| "logits/chosen": -0.3682861328125, |
| "logits/rejected": -0.392578125, |
| "logps/chosen": -666.5, |
| "logps/rejected": -715.0, |
| "loss": 0.4913, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.439453125, |
| "rewards/margins": 0.968505859375, |
| "rewards/rejected": -4.41015625, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.64, |
| "eval_logits/chosen": -0.30975341796875, |
| "eval_logits/rejected": -0.39111328125, |
| "eval_logps/chosen": -649.75, |
| "eval_logps/rejected": -732.0, |
| "eval_loss": 0.4821406304836273, |
| "eval_rewards/accuracies": 0.737500011920929, |
| "eval_rewards/chosen": -3.2939453125, |
| "eval_rewards/margins": 1.017822265625, |
| "eval_rewards/rejected": -4.3095703125, |
| "eval_runtime": 27.6376, |
| "eval_samples_per_second": 18.091, |
| "eval_steps_per_second": 0.579, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.64128, |
| "grad_norm": 9.125881510784152, |
| "learning_rate": 2.4065301973280486e-07, |
| "logits/chosen": -0.3564453125, |
| "logits/rejected": -0.4180908203125, |
| "logps/chosen": -678.5, |
| "logps/rejected": -829.5, |
| "loss": 0.4451, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.44921875, |
| "rewards/margins": 1.13232421875, |
| "rewards/rejected": -4.58203125, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.64256, |
| "grad_norm": 17.366268825982257, |
| "learning_rate": 2.391662021694847e-07, |
| "logits/chosen": -0.3870849609375, |
| "logits/rejected": -0.4088134765625, |
| "logps/chosen": -630.5, |
| "logps/rejected": -714.0, |
| "loss": 0.5572, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.23828125, |
| "rewards/margins": 0.932861328125, |
| "rewards/rejected": -4.16796875, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.64384, |
| "grad_norm": 15.66439643538157, |
| "learning_rate": 2.3768160432011394e-07, |
| "logits/chosen": -0.332275390625, |
| "logits/rejected": -0.3670654296875, |
| "logps/chosen": -644.5, |
| "logps/rejected": -734.5, |
| "loss": 0.4228, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -2.994140625, |
| "rewards/margins": 1.0615234375, |
| "rewards/rejected": -4.0546875, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.64512, |
| "grad_norm": 13.777723328035924, |
| "learning_rate": 2.361992559173432e-07, |
| "logits/chosen": -0.399169921875, |
| "logits/rejected": -0.425537109375, |
| "logps/chosen": -669.5, |
| "logps/rejected": -773.5, |
| "loss": 0.4306, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.2421875, |
| "rewards/margins": 1.294921875, |
| "rewards/rejected": -4.53515625, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.6464, |
| "grad_norm": 9.719276284192894, |
| "learning_rate": 2.3471918664877214e-07, |
| "logits/chosen": -0.35498046875, |
| "logits/rejected": -0.3848876953125, |
| "logps/chosen": -668.5, |
| "logps/rejected": -758.5, |
| "loss": 0.4765, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.39453125, |
| "rewards/margins": 1.04248046875, |
| "rewards/rejected": -4.439453125, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.64768, |
| "grad_norm": 16.89908454302761, |
| "learning_rate": 2.3324142615635527e-07, |
| "logits/chosen": -0.355712890625, |
| "logits/rejected": -0.3896484375, |
| "logps/chosen": -676.0, |
| "logps/rejected": -796.0, |
| "loss": 0.3675, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -3.345703125, |
| "rewards/margins": 1.384765625, |
| "rewards/rejected": -4.73046875, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.64896, |
| "grad_norm": 9.037083694795681, |
| "learning_rate": 2.317660040358085e-07, |
| "logits/chosen": -0.2799072265625, |
| "logits/rejected": -0.35302734375, |
| "logps/chosen": -653.5, |
| "logps/rejected": -813.5, |
| "loss": 0.4096, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.427734375, |
| "rewards/margins": 1.4033203125, |
| "rewards/rejected": -4.83203125, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.65024, |
| "grad_norm": 13.711223830769715, |
| "learning_rate": 2.3029294983601597e-07, |
| "logits/chosen": -0.3631591796875, |
| "logits/rejected": -0.3857421875, |
| "logps/chosen": -639.0, |
| "logps/rejected": -715.0, |
| "loss": 0.5367, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -3.181640625, |
| "rewards/margins": 0.947265625, |
| "rewards/rejected": -4.130859375, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.65152, |
| "grad_norm": 15.850008639937514, |
| "learning_rate": 2.2882229305843867e-07, |
| "logits/chosen": -0.31689453125, |
| "logits/rejected": -0.3739013671875, |
| "logps/chosen": -603.0, |
| "logps/rejected": -715.0, |
| "loss": 0.4067, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -3.0390625, |
| "rewards/margins": 1.18798828125, |
| "rewards/rejected": -4.22265625, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.6528, |
| "grad_norm": 13.27611006512659, |
| "learning_rate": 2.2735406315652325e-07, |
| "logits/chosen": -0.3470458984375, |
| "logits/rejected": -0.40087890625, |
| "logps/chosen": -654.5, |
| "logps/rejected": -778.5, |
| "loss": 0.4238, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -3.30078125, |
| "rewards/margins": 1.177734375, |
| "rewards/rejected": -4.47265625, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.65408, |
| "grad_norm": 9.557828693226199, |
| "learning_rate": 2.2588828953511252e-07, |
| "logits/chosen": -0.250823974609375, |
| "logits/rejected": -0.336669921875, |
| "logps/chosen": -663.0, |
| "logps/rejected": -774.0, |
| "loss": 0.513, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.5625, |
| "rewards/margins": 1.05712890625, |
| "rewards/rejected": -4.625, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.65536, |
| "grad_norm": 15.537086503024735, |
| "learning_rate": 2.2442500154985642e-07, |
| "logits/chosen": -0.236083984375, |
| "logits/rejected": -0.26043701171875, |
| "logps/chosen": -640.5, |
| "logps/rejected": -765.5, |
| "loss": 0.5179, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.447265625, |
| "rewards/margins": 1.009765625, |
| "rewards/rejected": -4.458984375, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.65664, |
| "grad_norm": 8.84797363334171, |
| "learning_rate": 2.229642285066236e-07, |
| "logits/chosen": -0.33831787109375, |
| "logits/rejected": -0.3756103515625, |
| "logps/chosen": -658.0, |
| "logps/rejected": -813.0, |
| "loss": 0.3957, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.5234375, |
| "rewards/margins": 1.4072265625, |
| "rewards/rejected": -4.931640625, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.65792, |
| "grad_norm": 8.898032465683093, |
| "learning_rate": 2.2150599966091535e-07, |
| "logits/chosen": -0.23968505859375, |
| "logits/rejected": -0.240509033203125, |
| "logps/chosen": -670.0, |
| "logps/rejected": -768.0, |
| "loss": 0.4581, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.576171875, |
| "rewards/margins": 1.08349609375, |
| "rewards/rejected": -4.66015625, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.6592, |
| "grad_norm": 9.301895530550201, |
| "learning_rate": 2.200503442172792e-07, |
| "logits/chosen": -0.2496337890625, |
| "logits/rejected": -0.316650390625, |
| "logps/chosen": -694.5, |
| "logps/rejected": -817.0, |
| "loss": 0.4339, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.78515625, |
| "rewards/margins": 1.220703125, |
| "rewards/rejected": -5.0, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.66048, |
| "grad_norm": 9.315272518266339, |
| "learning_rate": 2.1859729132872407e-07, |
| "logits/chosen": -0.2623291015625, |
| "logits/rejected": -0.2822265625, |
| "logps/chosen": -610.25, |
| "logps/rejected": -698.0, |
| "loss": 0.5085, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.298828125, |
| "rewards/margins": 1.0263671875, |
| "rewards/rejected": -4.326171875, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.66176, |
| "grad_norm": 8.846499820634536, |
| "learning_rate": 2.171468700961363e-07, |
| "logits/chosen": -0.243621826171875, |
| "logits/rejected": -0.24200439453125, |
| "logps/chosen": -702.5, |
| "logps/rejected": -804.0, |
| "loss": 0.4638, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.7265625, |
| "rewards/margins": 1.20751953125, |
| "rewards/rejected": -4.931640625, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.66304, |
| "grad_norm": 19.450404147912273, |
| "learning_rate": 2.1569910956769707e-07, |
| "logits/chosen": -0.264404296875, |
| "logits/rejected": -0.3031005859375, |
| "logps/chosen": -731.0, |
| "logps/rejected": -857.0, |
| "loss": 0.3519, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.779296875, |
| "rewards/margins": 1.4775390625, |
| "rewards/rejected": -5.25390625, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.66432, |
| "grad_norm": 11.9383597079162, |
| "learning_rate": 2.1425403873830082e-07, |
| "logits/chosen": -0.267974853515625, |
| "logits/rejected": -0.300872802734375, |
| "logps/chosen": -758.5, |
| "logps/rejected": -859.5, |
| "loss": 0.4013, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -4.015625, |
| "rewards/margins": 1.35791015625, |
| "rewards/rejected": -5.37109375, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.6656, |
| "grad_norm": 9.054749925855036, |
| "learning_rate": 2.1281168654897377e-07, |
| "logits/chosen": -0.24322509765625, |
| "logits/rejected": -0.27294921875, |
| "logps/chosen": -732.0, |
| "logps/rejected": -846.0, |
| "loss": 0.4282, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.001953125, |
| "rewards/margins": 1.291015625, |
| "rewards/rejected": -5.29296875, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.66688, |
| "grad_norm": 17.341035886669818, |
| "learning_rate": 2.113720818862951e-07, |
| "logits/chosen": -0.206298828125, |
| "logits/rejected": -0.20355224609375, |
| "logps/chosen": -724.0, |
| "logps/rejected": -787.0, |
| "loss": 0.5664, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -3.859375, |
| "rewards/margins": 0.95849609375, |
| "rewards/rejected": -4.81640625, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.66816, |
| "grad_norm": 9.905351905849056, |
| "learning_rate": 2.0993525358181822e-07, |
| "logits/chosen": -0.275146484375, |
| "logits/rejected": -0.3148193359375, |
| "logps/chosen": -760.0, |
| "logps/rejected": -883.0, |
| "loss": 0.4874, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.158203125, |
| "rewards/margins": 1.284912109375, |
| "rewards/rejected": -5.453125, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.66944, |
| "grad_norm": 11.092239401478107, |
| "learning_rate": 2.085012304114933e-07, |
| "logits/chosen": -0.150665283203125, |
| "logits/rejected": -0.187652587890625, |
| "logps/chosen": -728.5, |
| "logps/rejected": -839.0, |
| "loss": 0.4484, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.9375, |
| "rewards/margins": 1.2138671875, |
| "rewards/rejected": -5.14453125, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.67072, |
| "grad_norm": 15.689220713255043, |
| "learning_rate": 2.0707004109509057e-07, |
| "logits/chosen": -0.125213623046875, |
| "logits/rejected": -0.13275146484375, |
| "logps/chosen": -734.5, |
| "logps/rejected": -794.0, |
| "loss": 0.5323, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -4.185546875, |
| "rewards/margins": 1.04541015625, |
| "rewards/rejected": -5.23828125, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 22.019953630911694, |
| "learning_rate": 2.0564171429562586e-07, |
| "logits/chosen": -0.18068695068359375, |
| "logits/rejected": -0.2034912109375, |
| "logps/chosen": -683.0, |
| "logps/rejected": -821.5, |
| "loss": 0.3464, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -3.689453125, |
| "rewards/margins": 1.6474609375, |
| "rewards/rejected": -5.328125, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.67328, |
| "grad_norm": 9.269392815729315, |
| "learning_rate": 2.042162786187862e-07, |
| "logits/chosen": -0.1710357666015625, |
| "logits/rejected": -0.20377349853515625, |
| "logps/chosen": -755.5, |
| "logps/rejected": -875.5, |
| "loss": 0.4172, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -4.275390625, |
| "rewards/margins": 1.36376953125, |
| "rewards/rejected": -5.63671875, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.67456, |
| "grad_norm": 18.30397649803818, |
| "learning_rate": 2.027937626123565e-07, |
| "logits/chosen": -0.2088623046875, |
| "logits/rejected": -0.22777557373046875, |
| "logps/chosen": -816.0, |
| "logps/rejected": -960.0, |
| "loss": 0.4332, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.57421875, |
| "rewards/margins": 1.33251953125, |
| "rewards/rejected": -5.91015625, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.67584, |
| "grad_norm": 10.323921457948874, |
| "learning_rate": 2.0137419476564897e-07, |
| "logits/chosen": -0.12468719482421875, |
| "logits/rejected": -0.16400146484375, |
| "logps/chosen": -781.0, |
| "logps/rejected": -894.0, |
| "loss": 0.4222, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.625, |
| "rewards/margins": 1.38525390625, |
| "rewards/rejected": -6.00390625, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.67712, |
| "grad_norm": 19.985890606151916, |
| "learning_rate": 1.9995760350893097e-07, |
| "logits/chosen": -0.15547943115234375, |
| "logits/rejected": -0.218109130859375, |
| "logps/chosen": -795.5, |
| "logps/rejected": -912.0, |
| "loss": 0.4694, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.646484375, |
| "rewards/margins": 1.234375, |
| "rewards/rejected": -5.88671875, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.6784, |
| "grad_norm": 20.81916495287454, |
| "learning_rate": 1.985440172128573e-07, |
| "logits/chosen": -0.15631103515625, |
| "logits/rejected": -0.201385498046875, |
| "logps/chosen": -786.0, |
| "logps/rejected": -920.5, |
| "loss": 0.3556, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -4.556640625, |
| "rewards/margins": 1.4970703125, |
| "rewards/rejected": -6.05859375, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.67968, |
| "grad_norm": 11.921218446157969, |
| "learning_rate": 1.9713346418790056e-07, |
| "logits/chosen": -0.126007080078125, |
| "logits/rejected": -0.188629150390625, |
| "logps/chosen": -756.5, |
| "logps/rejected": -899.0, |
| "loss": 0.4957, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.63671875, |
| "rewards/margins": 1.23779296875, |
| "rewards/rejected": -5.87109375, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.68096, |
| "grad_norm": 11.545371418939542, |
| "learning_rate": 1.957259726837849e-07, |
| "logits/chosen": -0.177947998046875, |
| "logits/rejected": -0.2209320068359375, |
| "logps/chosen": -775.0, |
| "logps/rejected": -927.0, |
| "loss": 0.4769, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.494140625, |
| "rewards/margins": 1.47119140625, |
| "rewards/rejected": -5.97265625, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.68224, |
| "grad_norm": 19.85526612585022, |
| "learning_rate": 1.9432157088892065e-07, |
| "logits/chosen": -0.1734619140625, |
| "logits/rejected": -0.236083984375, |
| "logps/chosen": -811.5, |
| "logps/rejected": -915.0, |
| "loss": 0.4809, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.828125, |
| "rewards/margins": 1.300048828125, |
| "rewards/rejected": -6.1328125, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.68352, |
| "grad_norm": 35.982005117679044, |
| "learning_rate": 1.9292028692983824e-07, |
| "logits/chosen": -0.162567138671875, |
| "logits/rejected": -0.21380615234375, |
| "logps/chosen": -779.0, |
| "logps/rejected": -879.5, |
| "loss": 0.5361, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.6953125, |
| "rewards/margins": 1.1611328125, |
| "rewards/rejected": -5.8515625, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.6848, |
| "grad_norm": 9.401343460064473, |
| "learning_rate": 1.9152214887062702e-07, |
| "logits/chosen": -0.18548583984375, |
| "logits/rejected": -0.222076416015625, |
| "logps/chosen": -784.0, |
| "logps/rejected": -911.5, |
| "loss": 0.4402, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -4.724609375, |
| "rewards/margins": 1.4794921875, |
| "rewards/rejected": -6.19921875, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.68608, |
| "grad_norm": 19.636202317515714, |
| "learning_rate": 1.9012718471237144e-07, |
| "logits/chosen": -0.20050048828125, |
| "logits/rejected": -0.25152587890625, |
| "logps/chosen": -883.5, |
| "logps/rejected": -995.0, |
| "loss": 0.4611, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -5.19140625, |
| "rewards/margins": 1.3603515625, |
| "rewards/rejected": -6.55078125, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.68736, |
| "grad_norm": 9.609636299261897, |
| "learning_rate": 1.8873542239259109e-07, |
| "logits/chosen": -0.13946533203125, |
| "logits/rejected": -0.18389892578125, |
| "logps/chosen": -783.5, |
| "logps/rejected": -957.0, |
| "loss": 0.4048, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.6484375, |
| "rewards/margins": 1.47802734375, |
| "rewards/rejected": -6.125, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.68864, |
| "grad_norm": 28.14830330305055, |
| "learning_rate": 1.8734688978468098e-07, |
| "logits/chosen": -0.23590087890625, |
| "logits/rejected": -0.26971435546875, |
| "logps/chosen": -811.0, |
| "logps/rejected": -912.0, |
| "loss": 0.5124, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.46875, |
| "rewards/margins": 1.19091796875, |
| "rewards/rejected": -5.66015625, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.68992, |
| "grad_norm": 17.73425969488332, |
| "learning_rate": 1.8596161469735374e-07, |
| "logits/chosen": -0.2036590576171875, |
| "logits/rejected": -0.2774658203125, |
| "logps/chosen": -808.0, |
| "logps/rejected": -948.0, |
| "loss": 0.4463, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -4.76953125, |
| "rewards/margins": 1.41064453125, |
| "rewards/rejected": -6.17578125, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.6912, |
| "grad_norm": 25.111899072723798, |
| "learning_rate": 1.8457962487408174e-07, |
| "logits/chosen": -0.12348175048828125, |
| "logits/rejected": -0.16607093811035156, |
| "logps/chosen": -771.0, |
| "logps/rejected": -857.0, |
| "loss": 0.5007, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.49609375, |
| "rewards/margins": 1.124267578125, |
| "rewards/rejected": -5.6171875, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.69248, |
| "grad_norm": 12.189044823657042, |
| "learning_rate": 1.8320094799254222e-07, |
| "logits/chosen": -0.20697021484375, |
| "logits/rejected": -0.2476806640625, |
| "logps/chosen": -793.0, |
| "logps/rejected": -945.5, |
| "loss": 0.4448, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.625, |
| "rewards/margins": 1.31787109375, |
| "rewards/rejected": -5.9453125, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.69376, |
| "grad_norm": 19.576872251438612, |
| "learning_rate": 1.8182561166406308e-07, |
| "logits/chosen": -0.22357177734375, |
| "logits/rejected": -0.2642822265625, |
| "logps/chosen": -749.5, |
| "logps/rejected": -835.0, |
| "loss": 0.5487, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.265625, |
| "rewards/margins": 0.9267578125, |
| "rewards/rejected": -5.1953125, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.69504, |
| "grad_norm": 9.991255716215326, |
| "learning_rate": 1.8045364343306914e-07, |
| "logits/chosen": -0.1717987060546875, |
| "logits/rejected": -0.21246337890625, |
| "logps/chosen": -731.5, |
| "logps/rejected": -863.5, |
| "loss": 0.4698, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.220703125, |
| "rewards/margins": 1.2138671875, |
| "rewards/rejected": -5.4375, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.69632, |
| "grad_norm": 9.256546508375417, |
| "learning_rate": 1.7908507077653123e-07, |
| "logits/chosen": -0.220703125, |
| "logits/rejected": -0.2713623046875, |
| "logps/chosen": -718.5, |
| "logps/rejected": -876.0, |
| "loss": 0.412, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.1015625, |
| "rewards/margins": 1.4931640625, |
| "rewards/rejected": -5.59375, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6976, |
| "grad_norm": 10.085774446505868, |
| "learning_rate": 1.7771992110341532e-07, |
| "logits/chosen": -0.181396484375, |
| "logits/rejected": -0.2060546875, |
| "logps/chosen": -746.0, |
| "logps/rejected": -893.0, |
| "loss": 0.4511, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -3.97265625, |
| "rewards/margins": 1.49462890625, |
| "rewards/rejected": -5.46875, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.69888, |
| "grad_norm": 10.969992056433007, |
| "learning_rate": 1.7635822175413445e-07, |
| "logits/chosen": -0.284912109375, |
| "logits/rejected": -0.3399658203125, |
| "logps/chosen": -665.0, |
| "logps/rejected": -803.0, |
| "loss": 0.4535, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.654296875, |
| "rewards/margins": 1.290283203125, |
| "rewards/rejected": -4.94140625, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.70016, |
| "grad_norm": 18.256669397800174, |
| "learning_rate": 1.7500000000000007e-07, |
| "logits/chosen": -0.3035888671875, |
| "logits/rejected": -0.346923828125, |
| "logps/chosen": -667.0, |
| "logps/rejected": -792.0, |
| "loss": 0.3832, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -3.58984375, |
| "rewards/margins": 1.271484375, |
| "rewards/rejected": -4.86328125, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.70144, |
| "grad_norm": 12.455904821192838, |
| "learning_rate": 1.7364528304267644e-07, |
| "logits/chosen": -0.2991943359375, |
| "logits/rejected": -0.333740234375, |
| "logps/chosen": -641.5, |
| "logps/rejected": -753.0, |
| "loss": 0.437, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.29296875, |
| "rewards/margins": 1.140625, |
| "rewards/rejected": -4.435546875, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.70272, |
| "grad_norm": 9.000099856418279, |
| "learning_rate": 1.7229409801363634e-07, |
| "logits/chosen": -0.29541015625, |
| "logits/rejected": -0.36572265625, |
| "logps/chosen": -667.5, |
| "logps/rejected": -761.5, |
| "loss": 0.4534, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.552734375, |
| "rewards/margins": 1.12939453125, |
| "rewards/rejected": -4.68359375, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 9.631183638602456, |
| "learning_rate": 1.7094647197361656e-07, |
| "logits/chosen": -0.322052001953125, |
| "logits/rejected": -0.3463134765625, |
| "logps/chosen": -696.5, |
| "logps/rejected": -818.0, |
| "loss": 0.4881, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.470703125, |
| "rewards/margins": 1.14697265625, |
| "rewards/rejected": -4.615234375, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.70528, |
| "grad_norm": 10.011321742256536, |
| "learning_rate": 1.6960243191207686e-07, |
| "logits/chosen": -0.296875, |
| "logits/rejected": -0.35693359375, |
| "logps/chosen": -691.5, |
| "logps/rejected": -786.0, |
| "loss": 0.4621, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.609375, |
| "rewards/margins": 1.216796875, |
| "rewards/rejected": -4.8203125, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.70656, |
| "grad_norm": 13.798592755194116, |
| "learning_rate": 1.682620047466589e-07, |
| "logits/chosen": -0.35546875, |
| "logits/rejected": -0.391357421875, |
| "logps/chosen": -674.0, |
| "logps/rejected": -765.5, |
| "loss": 0.4362, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.546875, |
| "rewards/margins": 1.2548828125, |
| "rewards/rejected": -4.80078125, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.70784, |
| "grad_norm": 8.22483168916061, |
| "learning_rate": 1.6692521732264789e-07, |
| "logits/chosen": -0.32275390625, |
| "logits/rejected": -0.35107421875, |
| "logps/chosen": -708.0, |
| "logps/rejected": -820.0, |
| "loss": 0.4363, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.662109375, |
| "rewards/margins": 1.157958984375, |
| "rewards/rejected": -4.82421875, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.70912, |
| "grad_norm": 11.12423189134255, |
| "learning_rate": 1.655920964124339e-07, |
| "logits/chosen": -0.30712890625, |
| "logits/rejected": -0.342376708984375, |
| "logps/chosen": -666.5, |
| "logps/rejected": -764.5, |
| "loss": 0.4255, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.408203125, |
| "rewards/margins": 1.1845703125, |
| "rewards/rejected": -4.595703125, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.7104, |
| "grad_norm": 9.470624413639532, |
| "learning_rate": 1.642626687149765e-07, |
| "logits/chosen": -0.298095703125, |
| "logits/rejected": -0.3577880859375, |
| "logps/chosen": -672.0, |
| "logps/rejected": -760.0, |
| "loss": 0.4545, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.388671875, |
| "rewards/margins": 1.16259765625, |
| "rewards/rejected": -4.5546875, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.71168, |
| "grad_norm": 10.343750237066667, |
| "learning_rate": 1.629369608552696e-07, |
| "logits/chosen": -0.3017578125, |
| "logits/rejected": -0.359619140625, |
| "logps/chosen": -703.5, |
| "logps/rejected": -814.5, |
| "loss": 0.4353, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.642578125, |
| "rewards/margins": 1.1728515625, |
| "rewards/rejected": -4.81640625, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.71296, |
| "grad_norm": 10.683608221205487, |
| "learning_rate": 1.6161499938380874e-07, |
| "logits/chosen": -0.3658447265625, |
| "logits/rejected": -0.42822265625, |
| "logps/chosen": -701.5, |
| "logps/rejected": -829.0, |
| "loss": 0.4438, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.548828125, |
| "rewards/margins": 1.4580078125, |
| "rewards/rejected": -5.005859375, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.71424, |
| "grad_norm": 9.927387665144652, |
| "learning_rate": 1.6029681077605865e-07, |
| "logits/chosen": -0.347412109375, |
| "logits/rejected": -0.39990234375, |
| "logps/chosen": -686.5, |
| "logps/rejected": -856.5, |
| "loss": 0.4152, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.654296875, |
| "rewards/margins": 1.3212890625, |
| "rewards/rejected": -4.9765625, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.71552, |
| "grad_norm": 8.62349737887557, |
| "learning_rate": 1.5898242143192336e-07, |
| "logits/chosen": -0.307373046875, |
| "logits/rejected": -0.3692626953125, |
| "logps/chosen": -644.5, |
| "logps/rejected": -791.5, |
| "loss": 0.3838, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.294921875, |
| "rewards/margins": 1.3662109375, |
| "rewards/rejected": -4.66796875, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.7168, |
| "grad_norm": 20.08172331534478, |
| "learning_rate": 1.576718576752179e-07, |
| "logits/chosen": -0.25872802734375, |
| "logits/rejected": -0.3118896484375, |
| "logps/chosen": -621.0, |
| "logps/rejected": -742.5, |
| "loss": 0.4035, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.26171875, |
| "rewards/margins": 1.31884765625, |
| "rewards/rejected": -4.58203125, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.71808, |
| "grad_norm": 9.964788088459969, |
| "learning_rate": 1.5636514575314023e-07, |
| "logits/chosen": -0.3486328125, |
| "logits/rejected": -0.4261474609375, |
| "logps/chosen": -727.0, |
| "logps/rejected": -832.0, |
| "loss": 0.5178, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.638671875, |
| "rewards/margins": 1.04345703125, |
| "rewards/rejected": -4.6796875, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.71936, |
| "grad_norm": 8.246036203914949, |
| "learning_rate": 1.550623118357463e-07, |
| "logits/chosen": -0.261474609375, |
| "logits/rejected": -0.327880859375, |
| "logps/chosen": -699.0, |
| "logps/rejected": -844.0, |
| "loss": 0.4117, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -3.875, |
| "rewards/margins": 1.34619140625, |
| "rewards/rejected": -5.22265625, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.72064, |
| "grad_norm": 9.24571549890142, |
| "learning_rate": 1.5376338201542535e-07, |
| "logits/chosen": -0.3028564453125, |
| "logits/rejected": -0.3468017578125, |
| "logps/chosen": -667.0, |
| "logps/rejected": -782.0, |
| "loss": 0.4665, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -3.62109375, |
| "rewards/margins": 1.04150390625, |
| "rewards/rejected": -4.66015625, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.72192, |
| "grad_norm": 11.314423673740029, |
| "learning_rate": 1.524683823063783e-07, |
| "logits/chosen": -0.33697509765625, |
| "logits/rejected": -0.35467529296875, |
| "logps/chosen": -703.0, |
| "logps/rejected": -758.5, |
| "loss": 0.5141, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.708984375, |
| "rewards/margins": 1.0166015625, |
| "rewards/rejected": -4.73046875, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.7232, |
| "grad_norm": 8.185307525881214, |
| "learning_rate": 1.5117733864409549e-07, |
| "logits/chosen": -0.247314453125, |
| "logits/rejected": -0.314208984375, |
| "logps/chosen": -650.5, |
| "logps/rejected": -819.5, |
| "loss": 0.3748, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.607421875, |
| "rewards/margins": 1.447265625, |
| "rewards/rejected": -5.05859375, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.72448, |
| "grad_norm": 18.20769264970902, |
| "learning_rate": 1.4989027688483806e-07, |
| "logits/chosen": -0.311767578125, |
| "logits/rejected": -0.364501953125, |
| "logps/chosen": -695.5, |
| "logps/rejected": -808.0, |
| "loss": 0.4326, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.751953125, |
| "rewards/margins": 1.30908203125, |
| "rewards/rejected": -5.06640625, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.72576, |
| "grad_norm": 8.80192874409478, |
| "learning_rate": 1.4860722280512023e-07, |
| "logits/chosen": -0.2942657470703125, |
| "logits/rejected": -0.33642578125, |
| "logps/chosen": -649.5, |
| "logps/rejected": -767.0, |
| "loss": 0.4278, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.607421875, |
| "rewards/margins": 1.18017578125, |
| "rewards/rejected": -4.78515625, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.72704, |
| "grad_norm": 10.943134384510522, |
| "learning_rate": 1.4732820210119238e-07, |
| "logits/chosen": -0.3031005859375, |
| "logits/rejected": -0.3203125, |
| "logps/chosen": -734.0, |
| "logps/rejected": -838.0, |
| "loss": 0.3886, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.98828125, |
| "rewards/margins": 1.3251953125, |
| "rewards/rejected": -5.3125, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.72832, |
| "grad_norm": 11.85355005830973, |
| "learning_rate": 1.4605324038852707e-07, |
| "logits/chosen": -0.20941162109375, |
| "logits/rejected": -0.2779541015625, |
| "logps/chosen": -668.0, |
| "logps/rejected": -806.5, |
| "loss": 0.4069, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -3.740234375, |
| "rewards/margins": 1.44677734375, |
| "rewards/rejected": -5.1875, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.7296, |
| "grad_norm": 8.286237978866287, |
| "learning_rate": 1.4478236320130553e-07, |
| "logits/chosen": -0.217041015625, |
| "logits/rejected": -0.267822265625, |
| "logps/chosen": -680.0, |
| "logps/rejected": -779.0, |
| "loss": 0.4393, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.6484375, |
| "rewards/margins": 1.163330078125, |
| "rewards/rejected": -4.8125, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.73088, |
| "grad_norm": 13.68132204214939, |
| "learning_rate": 1.4351559599190707e-07, |
| "logits/chosen": -0.17962646484375, |
| "logits/rejected": -0.239990234375, |
| "logps/chosen": -720.0, |
| "logps/rejected": -853.0, |
| "loss": 0.4864, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.06640625, |
| "rewards/margins": 1.2568359375, |
| "rewards/rejected": -5.32421875, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.73216, |
| "grad_norm": 9.213667928480618, |
| "learning_rate": 1.4225296413039794e-07, |
| "logits/chosen": -0.20670700073242188, |
| "logits/rejected": -0.26763916015625, |
| "logps/chosen": -705.0, |
| "logps/rejected": -866.0, |
| "loss": 0.4484, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.021484375, |
| "rewards/margins": 1.3505859375, |
| "rewards/rejected": -5.37109375, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.73344, |
| "grad_norm": 9.155271393542527, |
| "learning_rate": 1.409944929040249e-07, |
| "logits/chosen": -0.188690185546875, |
| "logits/rejected": -0.23065185546875, |
| "logps/chosen": -703.5, |
| "logps/rejected": -821.5, |
| "loss": 0.4569, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.984375, |
| "rewards/margins": 1.224609375, |
| "rewards/rejected": -5.21484375, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.73472, |
| "grad_norm": 12.402920193014143, |
| "learning_rate": 1.3974020751670732e-07, |
| "logits/chosen": -0.237548828125, |
| "logits/rejected": -0.302734375, |
| "logps/chosen": -666.0, |
| "logps/rejected": -824.5, |
| "loss": 0.4361, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.7734375, |
| "rewards/margins": 1.27490234375, |
| "rewards/rejected": -5.048828125, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 13.672795096747066, |
| "learning_rate": 1.3849013308853368e-07, |
| "logits/chosen": -0.1479034423828125, |
| "logits/rejected": -0.21832275390625, |
| "logps/chosen": -779.0, |
| "logps/rejected": -881.5, |
| "loss": 0.4502, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.23828125, |
| "rewards/margins": 1.22607421875, |
| "rewards/rejected": -5.4609375, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.73728, |
| "grad_norm": 9.971405880324383, |
| "learning_rate": 1.3724429465525732e-07, |
| "logits/chosen": -0.1641998291015625, |
| "logits/rejected": -0.186248779296875, |
| "logps/chosen": -713.0, |
| "logps/rejected": -790.0, |
| "loss": 0.499, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.955078125, |
| "rewards/margins": 1.1162109375, |
| "rewards/rejected": -5.06640625, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.73856, |
| "grad_norm": 8.772600149464196, |
| "learning_rate": 1.360027171677957e-07, |
| "logits/chosen": -0.2245025634765625, |
| "logits/rejected": -0.2720947265625, |
| "logps/chosen": -705.0, |
| "logps/rejected": -841.0, |
| "loss": 0.4277, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.859375, |
| "rewards/margins": 1.580078125, |
| "rewards/rejected": -5.4375, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.73984, |
| "grad_norm": 18.81863742515878, |
| "learning_rate": 1.3476542549173096e-07, |
| "logits/chosen": -0.2587890625, |
| "logits/rejected": -0.3282470703125, |
| "logps/chosen": -762.5, |
| "logps/rejected": -856.0, |
| "loss": 0.5151, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.23828125, |
| "rewards/margins": 1.15771484375, |
| "rewards/rejected": -5.39453125, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.74112, |
| "grad_norm": 9.936151878622386, |
| "learning_rate": 1.335324444068108e-07, |
| "logits/chosen": -0.2752685546875, |
| "logits/rejected": -0.334228515625, |
| "logps/chosen": -753.0, |
| "logps/rejected": -913.0, |
| "loss": 0.3748, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -4.279296875, |
| "rewards/margins": 1.607421875, |
| "rewards/rejected": -5.890625, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.7424, |
| "grad_norm": 14.86753917739169, |
| "learning_rate": 1.3230379860645365e-07, |
| "logits/chosen": -0.25177001953125, |
| "logits/rejected": -0.300537109375, |
| "logps/chosen": -718.0, |
| "logps/rejected": -872.0, |
| "loss": 0.3886, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -4.125, |
| "rewards/margins": 1.49072265625, |
| "rewards/rejected": -5.625, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.74368, |
| "grad_norm": 14.0480837535775, |
| "learning_rate": 1.3107951269725286e-07, |
| "logits/chosen": -0.1736297607421875, |
| "logits/rejected": -0.22271728515625, |
| "logps/chosen": -788.5, |
| "logps/rejected": -911.5, |
| "loss": 0.4652, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.408203125, |
| "rewards/margins": 1.3837890625, |
| "rewards/rejected": -5.8046875, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.74496, |
| "grad_norm": 9.252489484832754, |
| "learning_rate": 1.2985961119848506e-07, |
| "logits/chosen": -0.15355682373046875, |
| "logits/rejected": -0.20587158203125, |
| "logps/chosen": -729.5, |
| "logps/rejected": -853.0, |
| "loss": 0.4643, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.099609375, |
| "rewards/margins": 1.3515625, |
| "rewards/rejected": -5.453125, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.74624, |
| "grad_norm": 18.79861359200076, |
| "learning_rate": 1.28644118541618e-07, |
| "logits/chosen": -0.179351806640625, |
| "logits/rejected": -0.22210693359375, |
| "logps/chosen": -792.0, |
| "logps/rejected": -912.0, |
| "loss": 0.5022, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -4.345703125, |
| "rewards/margins": 1.33837890625, |
| "rewards/rejected": -5.68359375, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.74752, |
| "grad_norm": 9.556413731103051, |
| "learning_rate": 1.2743305906982183e-07, |
| "logits/chosen": -0.142608642578125, |
| "logits/rejected": -0.1815185546875, |
| "logps/chosen": -772.0, |
| "logps/rejected": -916.0, |
| "loss": 0.4378, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.3984375, |
| "rewards/margins": 1.37841796875, |
| "rewards/rejected": -5.7734375, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.7488, |
| "grad_norm": 10.524837392255892, |
| "learning_rate": 1.2622645703748163e-07, |
| "logits/chosen": -0.0958251953125, |
| "logits/rejected": -0.1723480224609375, |
| "logps/chosen": -704.5, |
| "logps/rejected": -864.5, |
| "loss": 0.4003, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -4.115234375, |
| "rewards/margins": 1.41064453125, |
| "rewards/rejected": -5.5234375, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.75008, |
| "grad_norm": 13.312445253403741, |
| "learning_rate": 1.2502433660971123e-07, |
| "logits/chosen": -0.19342041015625, |
| "logits/rejected": -0.23333740234375, |
| "logps/chosen": -759.0, |
| "logps/rejected": -883.0, |
| "loss": 0.3936, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -4.291015625, |
| "rewards/margins": 1.40185546875, |
| "rewards/rejected": -5.69921875, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.75136, |
| "grad_norm": 8.960818660553267, |
| "learning_rate": 1.2382672186187003e-07, |
| "logits/chosen": -0.2032470703125, |
| "logits/rejected": -0.2041015625, |
| "logps/chosen": -725.5, |
| "logps/rejected": -849.0, |
| "loss": 0.3993, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.953125, |
| "rewards/margins": 1.3955078125, |
| "rewards/rejected": -5.3515625, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.75264, |
| "grad_norm": 10.646689306325051, |
| "learning_rate": 1.2263363677907974e-07, |
| "logits/chosen": -0.123687744140625, |
| "logits/rejected": -0.1717071533203125, |
| "logps/chosen": -723.0, |
| "logps/rejected": -856.5, |
| "loss": 0.4223, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.0390625, |
| "rewards/margins": 1.4228515625, |
| "rewards/rejected": -5.4609375, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.75392, |
| "grad_norm": 12.393571858690176, |
| "learning_rate": 1.214451052557453e-07, |
| "logits/chosen": -0.177520751953125, |
| "logits/rejected": -0.19673919677734375, |
| "logps/chosen": -767.0, |
| "logps/rejected": -846.5, |
| "loss": 0.5177, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.318359375, |
| "rewards/margins": 0.98583984375, |
| "rewards/rejected": -5.30078125, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.7552, |
| "grad_norm": 22.161788073197705, |
| "learning_rate": 1.202611510950747e-07, |
| "logits/chosen": -0.08966064453125, |
| "logits/rejected": -0.11224365234375, |
| "logps/chosen": -767.5, |
| "logps/rejected": -872.0, |
| "loss": 0.5003, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.390625, |
| "rewards/margins": 1.14306640625, |
| "rewards/rejected": -5.53515625, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.75648, |
| "grad_norm": 12.260254703657399, |
| "learning_rate": 1.1908179800860415e-07, |
| "logits/chosen": -0.0744476318359375, |
| "logits/rejected": -0.13702392578125, |
| "logps/chosen": -723.0, |
| "logps/rejected": -851.5, |
| "loss": 0.4506, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.1640625, |
| "rewards/margins": 1.28857421875, |
| "rewards/rejected": -5.4453125, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.75776, |
| "grad_norm": 9.937355029365806, |
| "learning_rate": 1.1790706961572176e-07, |
| "logits/chosen": -0.20186614990234375, |
| "logits/rejected": -0.202545166015625, |
| "logps/chosen": -788.0, |
| "logps/rejected": -897.5, |
| "loss": 0.4231, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -4.359375, |
| "rewards/margins": 1.36572265625, |
| "rewards/rejected": -5.71875, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.75904, |
| "grad_norm": 10.470389968279672, |
| "learning_rate": 1.1673698944319491e-07, |
| "logits/chosen": -0.11907958984375, |
| "logits/rejected": -0.17713165283203125, |
| "logps/chosen": -760.5, |
| "logps/rejected": -864.5, |
| "loss": 0.4464, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.271484375, |
| "rewards/margins": 1.3154296875, |
| "rewards/rejected": -5.58984375, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.76032, |
| "grad_norm": 25.904113311082135, |
| "learning_rate": 1.1557158092469967e-07, |
| "logits/chosen": -0.13934326171875, |
| "logits/rejected": -0.1776123046875, |
| "logps/chosen": -762.0, |
| "logps/rejected": -914.5, |
| "loss": 0.5208, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.568359375, |
| "rewards/margins": 1.5400390625, |
| "rewards/rejected": -6.109375, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.7616, |
| "grad_norm": 14.910481470992487, |
| "learning_rate": 1.1441086740035036e-07, |
| "logits/chosen": -0.197296142578125, |
| "logits/rejected": -0.231689453125, |
| "logps/chosen": -748.5, |
| "logps/rejected": -863.0, |
| "loss": 0.3905, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -4.0625, |
| "rewards/margins": 1.39453125, |
| "rewards/rejected": -5.4609375, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.76288, |
| "grad_norm": 10.59387700561826, |
| "learning_rate": 1.1325487211623342e-07, |
| "logits/chosen": -0.15478515625, |
| "logits/rejected": -0.188751220703125, |
| "logps/chosen": -709.0, |
| "logps/rejected": -851.0, |
| "loss": 0.4572, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.25390625, |
| "rewards/margins": 1.296875, |
| "rewards/rejected": -5.546875, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.76416, |
| "grad_norm": 11.067694544255176, |
| "learning_rate": 1.1210361822394029e-07, |
| "logits/chosen": -0.2096710205078125, |
| "logits/rejected": -0.2550048828125, |
| "logps/chosen": -758.0, |
| "logps/rejected": -878.5, |
| "loss": 0.4847, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.25, |
| "rewards/margins": 1.408203125, |
| "rewards/rejected": -5.65234375, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.76544, |
| "grad_norm": 45.69276578416623, |
| "learning_rate": 1.1095712878010541e-07, |
| "logits/chosen": -0.0606689453125, |
| "logits/rejected": -0.1527099609375, |
| "logps/chosen": -777.0, |
| "logps/rejected": -876.0, |
| "loss": 0.5514, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.46484375, |
| "rewards/margins": 1.1143798828125, |
| "rewards/rejected": -5.58203125, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.76672, |
| "grad_norm": 12.679098112978194, |
| "learning_rate": 1.0981542674594328e-07, |
| "logits/chosen": -0.1817779541015625, |
| "logits/rejected": -0.23337554931640625, |
| "logps/chosen": -700.0, |
| "logps/rejected": -842.5, |
| "loss": 0.4295, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.791015625, |
| "rewards/margins": 1.537109375, |
| "rewards/rejected": -5.33203125, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 10.800303505547818, |
| "learning_rate": 1.0867853498678901e-07, |
| "logits/chosen": -0.273681640625, |
| "logits/rejected": -0.293701171875, |
| "logps/chosen": -705.0, |
| "logps/rejected": -885.5, |
| "loss": 0.4958, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.986328125, |
| "rewards/margins": 1.387939453125, |
| "rewards/rejected": -5.375, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.768, |
| "eval_logits/chosen": -0.19268035888671875, |
| "eval_logits/rejected": -0.27252197265625, |
| "eval_logps/chosen": -706.25, |
| "eval_logps/rejected": -804.5, |
| "eval_loss": 0.47617968916893005, |
| "eval_rewards/accuracies": 0.740234375, |
| "eval_rewards/chosen": -3.86328125, |
| "eval_rewards/margins": 1.166748046875, |
| "eval_rewards/rejected": -5.025390625, |
| "eval_runtime": 27.9176, |
| "eval_samples_per_second": 17.91, |
| "eval_steps_per_second": 0.573, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.76928, |
| "grad_norm": 9.048271943790935, |
| "learning_rate": 1.0754647627164022e-07, |
| "logits/chosen": -0.184814453125, |
| "logits/rejected": -0.2506103515625, |
| "logps/chosen": -703.5, |
| "logps/rejected": -881.5, |
| "loss": 0.3442, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.921875, |
| "rewards/margins": 1.6376953125, |
| "rewards/rejected": -5.5625, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.77056, |
| "grad_norm": 26.0709495707422, |
| "learning_rate": 1.064192732727016e-07, |
| "logits/chosen": -0.19481658935546875, |
| "logits/rejected": -0.232147216796875, |
| "logps/chosen": -694.5, |
| "logps/rejected": -756.0, |
| "loss": 0.6118, |
| "rewards/accuracies": 0.6953125, |
| "rewards/chosen": -3.634765625, |
| "rewards/margins": 0.9560546875, |
| "rewards/rejected": -4.59375, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.77184, |
| "grad_norm": 9.88642766961704, |
| "learning_rate": 1.0529694856493002e-07, |
| "logits/chosen": -0.2410888671875, |
| "logits/rejected": -0.287353515625, |
| "logps/chosen": -764.0, |
| "logps/rejected": -886.5, |
| "loss": 0.4617, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.41015625, |
| "rewards/margins": 1.09234619140625, |
| "rewards/rejected": -5.50390625, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.77312, |
| "grad_norm": 9.433226751910178, |
| "learning_rate": 1.0417952462558286e-07, |
| "logits/chosen": -0.21282958984375, |
| "logits/rejected": -0.2587127685546875, |
| "logps/chosen": -719.0, |
| "logps/rejected": -844.5, |
| "loss": 0.4325, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.951171875, |
| "rewards/margins": 1.25732421875, |
| "rewards/rejected": -5.2109375, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.7744, |
| "grad_norm": 11.539221682126705, |
| "learning_rate": 1.0306702383376813e-07, |
| "logits/chosen": -0.20572662353515625, |
| "logits/rejected": -0.2346649169921875, |
| "logps/chosen": -729.0, |
| "logps/rejected": -841.5, |
| "loss": 0.4227, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -3.88671875, |
| "rewards/margins": 1.31982421875, |
| "rewards/rejected": -5.20703125, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.77568, |
| "grad_norm": 9.610274231941142, |
| "learning_rate": 1.0195946846999551e-07, |
| "logits/chosen": -0.274658203125, |
| "logits/rejected": -0.3345947265625, |
| "logps/chosen": -688.5, |
| "logps/rejected": -803.5, |
| "loss": 0.4396, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.70703125, |
| "rewards/margins": 1.340576171875, |
| "rewards/rejected": -5.04296875, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.77696, |
| "grad_norm": 9.567408589138397, |
| "learning_rate": 1.0085688071573085e-07, |
| "logits/chosen": -0.221038818359375, |
| "logits/rejected": -0.2277069091796875, |
| "logps/chosen": -713.0, |
| "logps/rejected": -840.0, |
| "loss": 0.4913, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.861328125, |
| "rewards/margins": 1.158935546875, |
| "rewards/rejected": -5.015625, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.77824, |
| "grad_norm": 12.50027732304155, |
| "learning_rate": 9.975928265295139e-08, |
| "logits/chosen": -0.26611328125, |
| "logits/rejected": -0.309814453125, |
| "logps/chosen": -691.5, |
| "logps/rejected": -806.5, |
| "loss": 0.3885, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.720703125, |
| "rewards/margins": 1.33642578125, |
| "rewards/rejected": -5.05859375, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.77952, |
| "grad_norm": 10.627712453917534, |
| "learning_rate": 9.866669626370412e-08, |
| "logits/chosen": -0.2711181640625, |
| "logits/rejected": -0.32177734375, |
| "logps/chosen": -739.5, |
| "logps/rejected": -880.0, |
| "loss": 0.4459, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.81640625, |
| "rewards/margins": 1.3115234375, |
| "rewards/rejected": -5.12109375, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.7808, |
| "grad_norm": 9.075340025404623, |
| "learning_rate": 9.757914342966495e-08, |
| "logits/chosen": -0.17635345458984375, |
| "logits/rejected": -0.23785400390625, |
| "logps/chosen": -698.5, |
| "logps/rejected": -799.5, |
| "loss": 0.4333, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.68359375, |
| "rewards/margins": 1.27197265625, |
| "rewards/rejected": -4.958984375, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.78208, |
| "grad_norm": 17.6370736058902, |
| "learning_rate": 9.64966459317006e-08, |
| "logits/chosen": -0.2579345703125, |
| "logits/rejected": -0.31915283203125, |
| "logps/chosen": -692.5, |
| "logps/rejected": -871.0, |
| "loss": 0.3659, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -3.849609375, |
| "rewards/margins": 1.578125, |
| "rewards/rejected": -5.421875, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.78336, |
| "grad_norm": 10.37299929145861, |
| "learning_rate": 9.541922544943294e-08, |
| "logits/chosen": -0.2517547607421875, |
| "logits/rejected": -0.315185546875, |
| "logps/chosen": -712.5, |
| "logps/rejected": -842.0, |
| "loss": 0.4197, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.94140625, |
| "rewards/margins": 1.3232421875, |
| "rewards/rejected": -5.265625, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.78464, |
| "grad_norm": 8.76113086501841, |
| "learning_rate": 9.434690356080393e-08, |
| "logits/chosen": -0.3062744140625, |
| "logits/rejected": -0.37060546875, |
| "logps/chosen": -683.0, |
| "logps/rejected": -821.0, |
| "loss": 0.4658, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.763671875, |
| "rewards/margins": 1.23486328125, |
| "rewards/rejected": -5.00390625, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.78592, |
| "grad_norm": 12.189626085161583, |
| "learning_rate": 9.327970174164408e-08, |
| "logits/chosen": -0.16046142578125, |
| "logits/rejected": -0.1937255859375, |
| "logps/chosen": -695.5, |
| "logps/rejected": -782.0, |
| "loss": 0.534, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.953125, |
| "rewards/margins": 0.9755859375, |
| "rewards/rejected": -4.92578125, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.7872, |
| "grad_norm": 15.30898275082047, |
| "learning_rate": 9.221764136524202e-08, |
| "logits/chosen": -0.2682647705078125, |
| "logits/rejected": -0.337371826171875, |
| "logps/chosen": -689.5, |
| "logps/rejected": -798.0, |
| "loss": 0.417, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.775390625, |
| "rewards/margins": 1.229736328125, |
| "rewards/rejected": -5.0078125, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.78848, |
| "grad_norm": 9.836705918506746, |
| "learning_rate": 9.116074370191705e-08, |
| "logits/chosen": -0.2236480712890625, |
| "logits/rejected": -0.28759765625, |
| "logps/chosen": -670.0, |
| "logps/rejected": -761.0, |
| "loss": 0.4567, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.65234375, |
| "rewards/margins": 1.11376953125, |
| "rewards/rejected": -4.765625, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.78976, |
| "grad_norm": 9.891228631270444, |
| "learning_rate": 9.010902991859196e-08, |
| "logits/chosen": -0.19852447509765625, |
| "logits/rejected": -0.21734619140625, |
| "logps/chosen": -712.0, |
| "logps/rejected": -813.5, |
| "loss": 0.4786, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.890625, |
| "rewards/margins": 1.10205078125, |
| "rewards/rejected": -4.98828125, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.79104, |
| "grad_norm": 10.619085582647367, |
| "learning_rate": 8.906252107837054e-08, |
| "logits/chosen": -0.24609375, |
| "logits/rejected": -0.298095703125, |
| "logps/chosen": -682.5, |
| "logps/rejected": -806.0, |
| "loss": 0.4685, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.83203125, |
| "rewards/margins": 1.31005859375, |
| "rewards/rejected": -5.1328125, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.79232, |
| "grad_norm": 8.55320493819975, |
| "learning_rate": 8.802123814011458e-08, |
| "logits/chosen": -0.2899169921875, |
| "logits/rejected": -0.3363037109375, |
| "logps/chosen": -700.0, |
| "logps/rejected": -820.5, |
| "loss": 0.4403, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.98046875, |
| "rewards/margins": 1.17822265625, |
| "rewards/rejected": -5.15625, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.7936, |
| "grad_norm": 9.70388545104585, |
| "learning_rate": 8.698520195802499e-08, |
| "logits/chosen": -0.26741790771484375, |
| "logits/rejected": -0.2891845703125, |
| "logps/chosen": -744.5, |
| "logps/rejected": -860.0, |
| "loss": 0.4775, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.951171875, |
| "rewards/margins": 1.2177734375, |
| "rewards/rejected": -5.171875, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.79488, |
| "grad_norm": 12.896195486210319, |
| "learning_rate": 8.595443328122345e-08, |
| "logits/chosen": -0.253082275390625, |
| "logits/rejected": -0.288360595703125, |
| "logps/chosen": -699.0, |
| "logps/rejected": -840.0, |
| "loss": 0.4228, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.779296875, |
| "rewards/margins": 1.33056640625, |
| "rewards/rejected": -5.1171875, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.79616, |
| "grad_norm": 10.316721984106971, |
| "learning_rate": 8.492895275333704e-08, |
| "logits/chosen": -0.242431640625, |
| "logits/rejected": -0.29632568359375, |
| "logps/chosen": -647.0, |
| "logps/rejected": -806.5, |
| "loss": 0.4143, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.740234375, |
| "rewards/margins": 1.31005859375, |
| "rewards/rejected": -5.05078125, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.79744, |
| "grad_norm": 10.794420061393975, |
| "learning_rate": 8.390878091208543e-08, |
| "logits/chosen": -0.1590423583984375, |
| "logits/rejected": -0.21337890625, |
| "logps/chosen": -706.0, |
| "logps/rejected": -777.0, |
| "loss": 0.46, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.787109375, |
| "rewards/margins": 1.115234375, |
| "rewards/rejected": -4.90625, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.79872, |
| "grad_norm": 8.48224104747155, |
| "learning_rate": 8.289393818886838e-08, |
| "logits/chosen": -0.1787109375, |
| "logits/rejected": -0.205230712890625, |
| "logps/chosen": -709.5, |
| "logps/rejected": -812.0, |
| "loss": 0.4332, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.748046875, |
| "rewards/margins": 1.294921875, |
| "rewards/rejected": -5.0546875, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 23.137925812568916, |
| "learning_rate": 8.188444490835773e-08, |
| "logits/chosen": -0.23029327392578125, |
| "logits/rejected": -0.27685546875, |
| "logps/chosen": -721.5, |
| "logps/rejected": -848.0, |
| "loss": 0.508, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.85546875, |
| "rewards/margins": 1.2080078125, |
| "rewards/rejected": -5.0546875, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.80128, |
| "grad_norm": 9.512426613145827, |
| "learning_rate": 8.088032128808952e-08, |
| "logits/chosen": -0.16583251953125, |
| "logits/rejected": -0.2113037109375, |
| "logps/chosen": -700.0, |
| "logps/rejected": -822.5, |
| "loss": 0.4431, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.73046875, |
| "rewards/margins": 1.28515625, |
| "rewards/rejected": -5.021484375, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.80256, |
| "grad_norm": 9.523485315600638, |
| "learning_rate": 7.988158743805972e-08, |
| "logits/chosen": -0.28082275390625, |
| "logits/rejected": -0.3321533203125, |
| "logps/chosen": -685.5, |
| "logps/rejected": -781.0, |
| "loss": 0.4848, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.615234375, |
| "rewards/margins": 1.0703125, |
| "rewards/rejected": -4.6875, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.80384, |
| "grad_norm": 9.450418163489136, |
| "learning_rate": 7.888826336032093e-08, |
| "logits/chosen": -0.2333984375, |
| "logits/rejected": -0.3123779296875, |
| "logps/chosen": -714.5, |
| "logps/rejected": -828.0, |
| "loss": 0.4416, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.89453125, |
| "rewards/margins": 1.34619140625, |
| "rewards/rejected": -5.2421875, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.80512, |
| "grad_norm": 13.14269906470784, |
| "learning_rate": 7.790036894858197e-08, |
| "logits/chosen": -0.24493408203125, |
| "logits/rejected": -0.3167724609375, |
| "logps/chosen": -700.0, |
| "logps/rejected": -795.0, |
| "loss": 0.4833, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.8125, |
| "rewards/margins": 1.1923828125, |
| "rewards/rejected": -5.0, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.8064, |
| "grad_norm": 17.566771895062967, |
| "learning_rate": 7.691792398780962e-08, |
| "logits/chosen": -0.18544769287109375, |
| "logits/rejected": -0.23531341552734375, |
| "logps/chosen": -714.0, |
| "logps/rejected": -850.0, |
| "loss": 0.4277, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.693359375, |
| "rewards/margins": 1.42333984375, |
| "rewards/rejected": -5.1171875, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.80768, |
| "grad_norm": 10.9996007916988, |
| "learning_rate": 7.594094815383224e-08, |
| "logits/chosen": -0.1927490234375, |
| "logits/rejected": -0.25665283203125, |
| "logps/chosen": -712.0, |
| "logps/rejected": -824.5, |
| "loss": 0.4316, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.744140625, |
| "rewards/margins": 1.3701171875, |
| "rewards/rejected": -5.10546875, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.80896, |
| "grad_norm": 16.150017435469856, |
| "learning_rate": 7.496946101294586e-08, |
| "logits/chosen": -0.2930908203125, |
| "logits/rejected": -0.3349609375, |
| "logps/chosen": -733.5, |
| "logps/rejected": -856.0, |
| "loss": 0.423, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.951171875, |
| "rewards/margins": 1.19189453125, |
| "rewards/rejected": -5.140625, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.81024, |
| "grad_norm": 22.47987317628445, |
| "learning_rate": 7.400348202152192e-08, |
| "logits/chosen": -0.12548828125, |
| "logits/rejected": -0.17365264892578125, |
| "logps/chosen": -735.0, |
| "logps/rejected": -837.5, |
| "loss": 0.5789, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -4.09765625, |
| "rewards/margins": 0.86962890625, |
| "rewards/rejected": -4.958984375, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.81152, |
| "grad_norm": 15.769251930029661, |
| "learning_rate": 7.304303052561841e-08, |
| "logits/chosen": -0.16961669921875, |
| "logits/rejected": -0.24853515625, |
| "logps/chosen": -687.0, |
| "logps/rejected": -828.5, |
| "loss": 0.3706, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -3.740234375, |
| "rewards/margins": 1.4990234375, |
| "rewards/rejected": -5.23046875, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.8128, |
| "grad_norm": 14.6903117429968, |
| "learning_rate": 7.208812576059112e-08, |
| "logits/chosen": -0.35009765625, |
| "logits/rejected": -0.3636474609375, |
| "logps/chosen": -746.5, |
| "logps/rejected": -820.5, |
| "loss": 0.5197, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.94140625, |
| "rewards/margins": 1.1171875, |
| "rewards/rejected": -5.06640625, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.81408, |
| "grad_norm": 13.239985107487296, |
| "learning_rate": 7.113878685070993e-08, |
| "logits/chosen": -0.2686767578125, |
| "logits/rejected": -0.2921142578125, |
| "logps/chosen": -738.5, |
| "logps/rejected": -828.5, |
| "loss": 0.5338, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.21875, |
| "rewards/margins": 0.9364013671875, |
| "rewards/rejected": -5.1484375, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.81536, |
| "grad_norm": 18.26280480241238, |
| "learning_rate": 7.019503280877466e-08, |
| "logits/chosen": -0.207672119140625, |
| "logits/rejected": -0.2611083984375, |
| "logps/chosen": -721.0, |
| "logps/rejected": -863.5, |
| "loss": 0.3718, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -3.927734375, |
| "rewards/margins": 1.470703125, |
| "rewards/rejected": -5.40234375, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.81664, |
| "grad_norm": 11.402715846459442, |
| "learning_rate": 6.925688253573465e-08, |
| "logits/chosen": -0.165740966796875, |
| "logits/rejected": -0.204833984375, |
| "logps/chosen": -711.0, |
| "logps/rejected": -823.0, |
| "loss": 0.4429, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.888671875, |
| "rewards/margins": 1.33349609375, |
| "rewards/rejected": -5.22265625, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.81792, |
| "grad_norm": 9.972091848028276, |
| "learning_rate": 6.832435482031064e-08, |
| "logits/chosen": -0.244384765625, |
| "logits/rejected": -0.24786376953125, |
| "logps/chosen": -713.5, |
| "logps/rejected": -818.0, |
| "loss": 0.445, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.828125, |
| "rewards/margins": 1.15966796875, |
| "rewards/rejected": -4.986328125, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.8192, |
| "grad_norm": 15.756161036698968, |
| "learning_rate": 6.739746833861759e-08, |
| "logits/chosen": -0.227813720703125, |
| "logits/rejected": -0.30035400390625, |
| "logps/chosen": -700.5, |
| "logps/rejected": -846.5, |
| "loss": 0.4093, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.9375, |
| "rewards/margins": 1.30615234375, |
| "rewards/rejected": -5.25390625, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.82048, |
| "grad_norm": 12.006378355004856, |
| "learning_rate": 6.647624165379173e-08, |
| "logits/chosen": -0.203277587890625, |
| "logits/rejected": -0.27020263671875, |
| "logps/chosen": -650.0, |
| "logps/rejected": -774.0, |
| "loss": 0.4311, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.59765625, |
| "rewards/margins": 1.26904296875, |
| "rewards/rejected": -4.8671875, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.82176, |
| "grad_norm": 8.47930046439374, |
| "learning_rate": 6.55606932156175e-08, |
| "logits/chosen": -0.185546875, |
| "logits/rejected": -0.24249267578125, |
| "logps/chosen": -706.5, |
| "logps/rejected": -811.5, |
| "loss": 0.404, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.810546875, |
| "rewards/margins": 1.3720703125, |
| "rewards/rejected": -5.1796875, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.82304, |
| "grad_norm": 8.893390613363472, |
| "learning_rate": 6.46508413601595e-08, |
| "logits/chosen": -0.22678756713867188, |
| "logits/rejected": -0.26580810546875, |
| "logps/chosen": -698.0, |
| "logps/rejected": -811.5, |
| "loss": 0.4419, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.79296875, |
| "rewards/margins": 1.20361328125, |
| "rewards/rejected": -4.99609375, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.82432, |
| "grad_norm": 9.63687460143777, |
| "learning_rate": 6.374670430939404e-08, |
| "logits/chosen": -0.19427490234375, |
| "logits/rejected": -0.2384033203125, |
| "logps/chosen": -729.5, |
| "logps/rejected": -863.0, |
| "loss": 0.4036, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.90234375, |
| "rewards/margins": 1.35791015625, |
| "rewards/rejected": -5.2578125, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.8256, |
| "grad_norm": 16.684775851774532, |
| "learning_rate": 6.284830017084488e-08, |
| "logits/chosen": -0.223876953125, |
| "logits/rejected": -0.291748046875, |
| "logps/chosen": -698.5, |
| "logps/rejected": -857.0, |
| "loss": 0.4179, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -4.015625, |
| "rewards/margins": 1.4716796875, |
| "rewards/rejected": -5.4921875, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.82688, |
| "grad_norm": 20.24004777671895, |
| "learning_rate": 6.195564693722028e-08, |
| "logits/chosen": -0.24462890625, |
| "logits/rejected": -0.2711181640625, |
| "logps/chosen": -721.0, |
| "logps/rejected": -830.5, |
| "loss": 0.5167, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.87109375, |
| "rewards/margins": 1.27490234375, |
| "rewards/rejected": -5.140625, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.82816, |
| "grad_norm": 9.754642974447716, |
| "learning_rate": 6.1068762486053e-08, |
| "logits/chosen": -0.10089111328125, |
| "logits/rejected": -0.14161300659179688, |
| "logps/chosen": -691.5, |
| "logps/rejected": -848.5, |
| "loss": 0.449, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.041015625, |
| "rewards/margins": 1.12451171875, |
| "rewards/rejected": -5.17578125, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.82944, |
| "grad_norm": 8.380496897716654, |
| "learning_rate": 6.018766457934177e-08, |
| "logits/chosen": -0.157867431640625, |
| "logits/rejected": -0.177886962890625, |
| "logps/chosen": -722.0, |
| "logps/rejected": -858.5, |
| "loss": 0.4101, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -3.986328125, |
| "rewards/margins": 1.50341796875, |
| "rewards/rejected": -5.49609375, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.83072, |
| "grad_norm": 30.217667929527913, |
| "learning_rate": 5.931237086319592e-08, |
| "logits/chosen": -0.240966796875, |
| "logits/rejected": -0.3031005859375, |
| "logps/chosen": -713.0, |
| "logps/rejected": -784.0, |
| "loss": 0.6239, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -4.0, |
| "rewards/margins": 0.900146484375, |
| "rewards/rejected": -4.8984375, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 13.251227443569483, |
| "learning_rate": 5.844289886748196e-08, |
| "logits/chosen": -0.1904144287109375, |
| "logits/rejected": -0.260955810546875, |
| "logps/chosen": -729.0, |
| "logps/rejected": -828.0, |
| "loss": 0.4859, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.0078125, |
| "rewards/margins": 1.28173828125, |
| "rewards/rejected": -5.29296875, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.83328, |
| "grad_norm": 10.941982402370376, |
| "learning_rate": 5.7579266005472304e-08, |
| "logits/chosen": -0.22198486328125, |
| "logits/rejected": -0.2850189208984375, |
| "logps/chosen": -731.0, |
| "logps/rejected": -805.5, |
| "loss": 0.4837, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.943359375, |
| "rewards/margins": 1.12548828125, |
| "rewards/rejected": -5.072265625, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.83456, |
| "grad_norm": 11.451372721475789, |
| "learning_rate": 5.672148957349661e-08, |
| "logits/chosen": -0.2811279296875, |
| "logits/rejected": -0.3087158203125, |
| "logps/chosen": -737.5, |
| "logps/rejected": -837.5, |
| "loss": 0.5053, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.259765625, |
| "rewards/margins": 1.033203125, |
| "rewards/rejected": -5.29296875, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.83584, |
| "grad_norm": 13.98757298272134, |
| "learning_rate": 5.586958675059548e-08, |
| "logits/chosen": -0.2623291015625, |
| "logits/rejected": -0.28857421875, |
| "logps/chosen": -730.0, |
| "logps/rejected": -806.0, |
| "loss": 0.5169, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.154296875, |
| "rewards/margins": 0.978271484375, |
| "rewards/rejected": -5.13671875, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.83712, |
| "grad_norm": 12.725608737110653, |
| "learning_rate": 5.502357459817639e-08, |
| "logits/chosen": -0.3026123046875, |
| "logits/rejected": -0.3392333984375, |
| "logps/chosen": -785.5, |
| "logps/rejected": -902.5, |
| "loss": 0.4683, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.27734375, |
| "rewards/margins": 1.37109375, |
| "rewards/rejected": -5.6484375, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.8384, |
| "grad_norm": 18.59261713230035, |
| "learning_rate": 5.418347005967189e-08, |
| "logits/chosen": -0.22393798828125, |
| "logits/rejected": -0.2733154296875, |
| "logps/chosen": -670.5, |
| "logps/rejected": -854.0, |
| "loss": 0.3584, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -3.83203125, |
| "rewards/margins": 1.6171875, |
| "rewards/rejected": -5.453125, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.83968, |
| "grad_norm": 15.681430188596131, |
| "learning_rate": 5.334928996020012e-08, |
| "logits/chosen": -0.2601318359375, |
| "logits/rejected": -0.2978515625, |
| "logps/chosen": -706.0, |
| "logps/rejected": -789.0, |
| "loss": 0.4116, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.630859375, |
| "rewards/margins": 1.21728515625, |
| "rewards/rejected": -4.84375, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.84096, |
| "grad_norm": 14.42329884280154, |
| "learning_rate": 5.2521051006228475e-08, |
| "logits/chosen": -0.16900634765625, |
| "logits/rejected": -0.22943115234375, |
| "logps/chosen": -719.0, |
| "logps/rejected": -837.5, |
| "loss": 0.4993, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.09765625, |
| "rewards/margins": 1.08544921875, |
| "rewards/rejected": -5.18359375, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.84224, |
| "grad_norm": 8.370169088229156, |
| "learning_rate": 5.169876978523828e-08, |
| "logits/chosen": -0.2957763671875, |
| "logits/rejected": -0.3585205078125, |
| "logps/chosen": -735.0, |
| "logps/rejected": -874.0, |
| "loss": 0.4017, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -4.001953125, |
| "rewards/margins": 1.5009765625, |
| "rewards/rejected": -5.51171875, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.84352, |
| "grad_norm": 9.292854951761994, |
| "learning_rate": 5.088246276539292e-08, |
| "logits/chosen": -0.2945556640625, |
| "logits/rejected": -0.3333740234375, |
| "logps/chosen": -772.5, |
| "logps/rejected": -869.5, |
| "loss": 0.4474, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.111328125, |
| "rewards/margins": 1.32177734375, |
| "rewards/rejected": -5.4296875, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.8448, |
| "grad_norm": 12.841846778247005, |
| "learning_rate": 5.0072146295208e-08, |
| "logits/chosen": -0.3035888671875, |
| "logits/rejected": -0.3436279296875, |
| "logps/chosen": -690.0, |
| "logps/rejected": -835.0, |
| "loss": 0.3979, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.791015625, |
| "rewards/margins": 1.52734375, |
| "rewards/rejected": -5.31640625, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.84608, |
| "grad_norm": 12.621963464933918, |
| "learning_rate": 4.926783660322411e-08, |
| "logits/chosen": -0.275390625, |
| "logits/rejected": -0.326904296875, |
| "logps/chosen": -724.0, |
| "logps/rejected": -809.0, |
| "loss": 0.4929, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.876953125, |
| "rewards/margins": 1.166015625, |
| "rewards/rejected": -5.046875, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.84736, |
| "grad_norm": 9.6461916615264, |
| "learning_rate": 4.846954979768149e-08, |
| "logits/chosen": -0.308380126953125, |
| "logits/rejected": -0.3372802734375, |
| "logps/chosen": -733.0, |
| "logps/rejected": -825.0, |
| "loss": 0.4582, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.904296875, |
| "rewards/margins": 0.998046875, |
| "rewards/rejected": -4.90625, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.84864, |
| "grad_norm": 12.039674839827219, |
| "learning_rate": 4.7677301866197455e-08, |
| "logits/chosen": -0.2691650390625, |
| "logits/rejected": -0.311279296875, |
| "logps/chosen": -731.5, |
| "logps/rejected": -866.0, |
| "loss": 0.4554, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.056640625, |
| "rewards/margins": 1.3349609375, |
| "rewards/rejected": -5.38671875, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.84992, |
| "grad_norm": 8.987446887684213, |
| "learning_rate": 4.689110867544645e-08, |
| "logits/chosen": -0.169097900390625, |
| "logits/rejected": -0.20819091796875, |
| "logps/chosen": -667.5, |
| "logps/rejected": -767.0, |
| "loss": 0.4374, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.666015625, |
| "rewards/margins": 1.1484375, |
| "rewards/rejected": -4.8046875, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.8512, |
| "grad_norm": 14.644380809572876, |
| "learning_rate": 4.611098597084226e-08, |
| "logits/chosen": -0.1959228515625, |
| "logits/rejected": -0.249267578125, |
| "logps/chosen": -718.0, |
| "logps/rejected": -857.5, |
| "loss": 0.5053, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -4.177734375, |
| "rewards/margins": 1.05908203125, |
| "rewards/rejected": -5.23046875, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.85248, |
| "grad_norm": 8.230847446292731, |
| "learning_rate": 4.5336949376222274e-08, |
| "logits/chosen": -0.2403564453125, |
| "logits/rejected": -0.26849365234375, |
| "logps/chosen": -734.5, |
| "logps/rejected": -859.0, |
| "loss": 0.3859, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -3.775390625, |
| "rewards/margins": 1.490234375, |
| "rewards/rejected": -5.2734375, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.85376, |
| "grad_norm": 9.397546683815692, |
| "learning_rate": 4.4569014393534986e-08, |
| "logits/chosen": -0.22314453125, |
| "logits/rejected": -0.26226806640625, |
| "logps/chosen": -684.5, |
| "logps/rejected": -783.0, |
| "loss": 0.4507, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.8203125, |
| "rewards/margins": 1.06103515625, |
| "rewards/rejected": -4.87890625, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.85504, |
| "grad_norm": 9.127958781532016, |
| "learning_rate": 4.380719640252953e-08, |
| "logits/chosen": -0.2354736328125, |
| "logits/rejected": -0.2667236328125, |
| "logps/chosen": -749.5, |
| "logps/rejected": -839.5, |
| "loss": 0.456, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.07421875, |
| "rewards/margins": 1.228515625, |
| "rewards/rejected": -5.30078125, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.85632, |
| "grad_norm": 9.421323267573934, |
| "learning_rate": 4.3051510660447335e-08, |
| "logits/chosen": -0.295379638671875, |
| "logits/rejected": -0.35400390625, |
| "logps/chosen": -676.0, |
| "logps/rejected": -819.0, |
| "loss": 0.4437, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.71875, |
| "rewards/margins": 1.28564453125, |
| "rewards/rejected": -5.0078125, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.8576, |
| "grad_norm": 10.534790363492052, |
| "learning_rate": 4.230197230171693e-08, |
| "logits/chosen": -0.3251953125, |
| "logits/rejected": -0.375244140625, |
| "logps/chosen": -760.5, |
| "logps/rejected": -831.0, |
| "loss": 0.4472, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.83984375, |
| "rewards/margins": 1.1640625, |
| "rewards/rejected": -4.998046875, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.85888, |
| "grad_norm": 8.740923113820415, |
| "learning_rate": 4.155859633765044e-08, |
| "logits/chosen": -0.27716064453125, |
| "logits/rejected": -0.295166015625, |
| "logps/chosen": -698.0, |
| "logps/rejected": -825.0, |
| "loss": 0.459, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.890625, |
| "rewards/margins": 1.258544921875, |
| "rewards/rejected": -5.15234375, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.86016, |
| "grad_norm": 11.346727433404798, |
| "learning_rate": 4.08213976561435e-08, |
| "logits/chosen": -0.2637290954589844, |
| "logits/rejected": -0.3646240234375, |
| "logps/chosen": -735.5, |
| "logps/rejected": -866.5, |
| "loss": 0.4928, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.146484375, |
| "rewards/margins": 1.279296875, |
| "rewards/rejected": -5.42578125, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.86144, |
| "grad_norm": 10.398636636838543, |
| "learning_rate": 4.009039102137657e-08, |
| "logits/chosen": -0.2867431640625, |
| "logits/rejected": -0.31884765625, |
| "logps/chosen": -739.5, |
| "logps/rejected": -823.0, |
| "loss": 0.52, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.947265625, |
| "rewards/margins": 1.0927734375, |
| "rewards/rejected": -5.03515625, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.86272, |
| "grad_norm": 13.027404663461114, |
| "learning_rate": 3.936559107351939e-08, |
| "logits/chosen": -0.25640869140625, |
| "logits/rejected": -0.2908935546875, |
| "logps/chosen": -682.0, |
| "logps/rejected": -813.5, |
| "loss": 0.3722, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.75, |
| "rewards/margins": 1.4267578125, |
| "rewards/rejected": -5.17578125, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 9.010470752300636, |
| "learning_rate": 3.864701232843808e-08, |
| "logits/chosen": -0.335205078125, |
| "logits/rejected": -0.3759765625, |
| "logps/chosen": -703.5, |
| "logps/rejected": -835.0, |
| "loss": 0.4373, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.712890625, |
| "rewards/margins": 1.20849609375, |
| "rewards/rejected": -4.921875, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.86528, |
| "grad_norm": 9.78070477469183, |
| "learning_rate": 3.7934669177404015e-08, |
| "logits/chosen": -0.2587890625, |
| "logits/rejected": -0.32421875, |
| "logps/chosen": -728.5, |
| "logps/rejected": -826.0, |
| "loss": 0.4832, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.0625, |
| "rewards/margins": 1.38916015625, |
| "rewards/rejected": -5.453125, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.86656, |
| "grad_norm": 12.145508203259396, |
| "learning_rate": 3.722857588680574e-08, |
| "logits/chosen": -0.2547607421875, |
| "logits/rejected": -0.296630859375, |
| "logps/chosen": -678.0, |
| "logps/rejected": -843.5, |
| "loss": 0.3965, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.734375, |
| "rewards/margins": 1.4375, |
| "rewards/rejected": -5.171875, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.86784, |
| "grad_norm": 14.052967853348232, |
| "learning_rate": 3.652874659786328e-08, |
| "logits/chosen": -0.268218994140625, |
| "logits/rejected": -0.321044921875, |
| "logps/chosen": -726.0, |
| "logps/rejected": -888.5, |
| "loss": 0.366, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -3.98046875, |
| "rewards/margins": 1.5517578125, |
| "rewards/rejected": -5.53125, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.86912, |
| "grad_norm": 8.818365407249955, |
| "learning_rate": 3.583519532634516e-08, |
| "logits/chosen": -0.2730865478515625, |
| "logits/rejected": -0.3010711669921875, |
| "logps/chosen": -701.0, |
| "logps/rejected": -813.5, |
| "loss": 0.4284, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.82421875, |
| "rewards/margins": 1.16748046875, |
| "rewards/rejected": -4.98828125, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.8704, |
| "grad_norm": 12.067218675895933, |
| "learning_rate": 3.514793596228702e-08, |
| "logits/chosen": -0.2110137939453125, |
| "logits/rejected": -0.283447265625, |
| "logps/chosen": -705.0, |
| "logps/rejected": -851.0, |
| "loss": 0.3709, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.740234375, |
| "rewards/margins": 1.58203125, |
| "rewards/rejected": -5.32421875, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.87168, |
| "grad_norm": 15.163206625422314, |
| "learning_rate": 3.4466982269714396e-08, |
| "logits/chosen": -0.2342529296875, |
| "logits/rejected": -0.271240234375, |
| "logps/chosen": -675.0, |
| "logps/rejected": -841.5, |
| "loss": 0.4759, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.54296875, |
| "rewards/margins": 1.16650390625, |
| "rewards/rejected": -4.70703125, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.87296, |
| "grad_norm": 13.083926981225705, |
| "learning_rate": 3.379234788636626e-08, |
| "logits/chosen": -0.24346923828125, |
| "logits/rejected": -0.260040283203125, |
| "logps/chosen": -682.0, |
| "logps/rejected": -784.0, |
| "loss": 0.499, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.755859375, |
| "rewards/margins": 1.13623046875, |
| "rewards/rejected": -4.892578125, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.87424, |
| "grad_norm": 9.209267131224845, |
| "learning_rate": 3.31240463234221e-08, |
| "logits/chosen": -0.2317047119140625, |
| "logits/rejected": -0.2623291015625, |
| "logps/chosen": -707.5, |
| "logps/rejected": -845.5, |
| "loss": 0.3632, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.77734375, |
| "rewards/margins": 1.587890625, |
| "rewards/rejected": -5.3671875, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.87552, |
| "grad_norm": 10.57378130101519, |
| "learning_rate": 3.246209096523176e-08, |
| "logits/chosen": -0.233551025390625, |
| "logits/rejected": -0.2782135009765625, |
| "logps/chosen": -741.0, |
| "logps/rejected": -830.0, |
| "loss": 0.4497, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.083984375, |
| "rewards/margins": 1.17626953125, |
| "rewards/rejected": -5.26171875, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.8768, |
| "grad_norm": 12.296255284365948, |
| "learning_rate": 3.180649506904667e-08, |
| "logits/chosen": -0.2994384765625, |
| "logits/rejected": -0.3275146484375, |
| "logps/chosen": -699.5, |
| "logps/rejected": -800.0, |
| "loss": 0.4883, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.74609375, |
| "rewards/margins": 1.09814453125, |
| "rewards/rejected": -4.84765625, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.87808, |
| "grad_norm": 12.690499499739776, |
| "learning_rate": 3.115727176475508e-08, |
| "logits/chosen": -0.2972412109375, |
| "logits/rejected": -0.36767578125, |
| "logps/chosen": -686.0, |
| "logps/rejected": -812.0, |
| "loss": 0.427, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.5546875, |
| "rewards/margins": 1.294921875, |
| "rewards/rejected": -4.8515625, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.87936, |
| "grad_norm": 9.788929929625333, |
| "learning_rate": 3.051443405461822e-08, |
| "logits/chosen": -0.2645263671875, |
| "logits/rejected": -0.296142578125, |
| "logps/chosen": -722.0, |
| "logps/rejected": -802.0, |
| "loss": 0.4763, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.84765625, |
| "rewards/margins": 1.03076171875, |
| "rewards/rejected": -4.875, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.88064, |
| "grad_norm": 11.417012062202968, |
| "learning_rate": 2.987799481301091e-08, |
| "logits/chosen": -0.282470703125, |
| "logits/rejected": -0.306640625, |
| "logps/chosen": -679.5, |
| "logps/rejected": -821.0, |
| "loss": 0.4559, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.6875, |
| "rewards/margins": 1.14501953125, |
| "rewards/rejected": -4.83984375, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.88192, |
| "grad_norm": 11.033813131632254, |
| "learning_rate": 2.924796678616297e-08, |
| "logits/chosen": -0.2666015625, |
| "logits/rejected": -0.3145751953125, |
| "logps/chosen": -709.5, |
| "logps/rejected": -838.0, |
| "loss": 0.4804, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.859375, |
| "rewards/margins": 1.21484375, |
| "rewards/rejected": -5.07421875, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.8832, |
| "grad_norm": 9.639277971365505, |
| "learning_rate": 2.862436259190414e-08, |
| "logits/chosen": -0.24395751953125, |
| "logits/rejected": -0.2987060546875, |
| "logps/chosen": -702.0, |
| "logps/rejected": -878.5, |
| "loss": 0.471, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.1015625, |
| "rewards/margins": 1.365234375, |
| "rewards/rejected": -5.46484375, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.88448, |
| "grad_norm": 8.604549708205145, |
| "learning_rate": 2.800719471941152e-08, |
| "logits/chosen": -0.26934814453125, |
| "logits/rejected": -0.3045654296875, |
| "logps/chosen": -693.5, |
| "logps/rejected": -802.5, |
| "loss": 0.4883, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.83203125, |
| "rewards/margins": 1.09423828125, |
| "rewards/rejected": -4.9296875, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.88576, |
| "grad_norm": 8.224767246318196, |
| "learning_rate": 2.739647552895949e-08, |
| "logits/chosen": -0.24432373046875, |
| "logits/rejected": -0.268463134765625, |
| "logps/chosen": -714.5, |
| "logps/rejected": -807.0, |
| "loss": 0.413, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -3.919921875, |
| "rewards/margins": 1.251953125, |
| "rewards/rejected": -5.162109375, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.88704, |
| "grad_norm": 10.030626998099564, |
| "learning_rate": 2.6792217251671745e-08, |
| "logits/chosen": -0.239654541015625, |
| "logits/rejected": -0.248565673828125, |
| "logps/chosen": -754.5, |
| "logps/rejected": -814.5, |
| "loss": 0.435, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.072265625, |
| "rewards/margins": 1.16650390625, |
| "rewards/rejected": -5.23828125, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.88832, |
| "grad_norm": 11.888659481208316, |
| "learning_rate": 2.6194431989276773e-08, |
| "logits/chosen": -0.2733154296875, |
| "logits/rejected": -0.331298828125, |
| "logps/chosen": -675.5, |
| "logps/rejected": -841.5, |
| "loss": 0.47, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -3.80078125, |
| "rewards/margins": 1.370849609375, |
| "rewards/rejected": -5.171875, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.8896, |
| "grad_norm": 9.80399886350075, |
| "learning_rate": 2.5603131713865372e-08, |
| "logits/chosen": -0.26300048828125, |
| "logits/rejected": -0.3204345703125, |
| "logps/chosen": -745.0, |
| "logps/rejected": -841.0, |
| "loss": 0.4927, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.22265625, |
| "rewards/margins": 1.11181640625, |
| "rewards/rejected": -5.33984375, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.89088, |
| "grad_norm": 11.910638478728464, |
| "learning_rate": 2.5018328267650796e-08, |
| "logits/chosen": -0.264404296875, |
| "logits/rejected": -0.276702880859375, |
| "logps/chosen": -774.5, |
| "logps/rejected": -894.0, |
| "loss": 0.4589, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.185546875, |
| "rewards/margins": 1.25048828125, |
| "rewards/rejected": -5.43359375, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.89216, |
| "grad_norm": 9.464215441247289, |
| "learning_rate": 2.4440033362731626e-08, |
| "logits/chosen": -0.2911376953125, |
| "logits/rejected": -0.3267822265625, |
| "logps/chosen": -739.5, |
| "logps/rejected": -840.5, |
| "loss": 0.4642, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -4.09375, |
| "rewards/margins": 1.214111328125, |
| "rewards/rejected": -5.30859375, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.89344, |
| "grad_norm": 11.076879696440999, |
| "learning_rate": 2.3868258580857163e-08, |
| "logits/chosen": -0.2603759765625, |
| "logits/rejected": -0.3201904296875, |
| "logps/chosen": -723.0, |
| "logps/rejected": -831.5, |
| "loss": 0.4242, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.93359375, |
| "rewards/margins": 1.27734375, |
| "rewards/rejected": -5.2109375, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.89472, |
| "grad_norm": 9.93416968714434, |
| "learning_rate": 2.330301537319571e-08, |
| "logits/chosen": -0.3101806640625, |
| "logits/rejected": -0.36083984375, |
| "logps/chosen": -727.5, |
| "logps/rejected": -926.0, |
| "loss": 0.4007, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.072265625, |
| "rewards/margins": 1.513671875, |
| "rewards/rejected": -5.5859375, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 8.352284052025437, |
| "learning_rate": 2.2744315060104845e-08, |
| "logits/chosen": -0.173828125, |
| "logits/rejected": -0.23614501953125, |
| "logps/chosen": -693.0, |
| "logps/rejected": -882.5, |
| "loss": 0.3667, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.837890625, |
| "rewards/margins": 1.92333984375, |
| "rewards/rejected": -5.76171875, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.896, |
| "eval_logits/chosen": -0.2226715087890625, |
| "eval_logits/rejected": -0.30279541015625, |
| "eval_logps/chosen": -703.0, |
| "eval_logps/rejected": -800.5, |
| "eval_loss": 0.4712187647819519, |
| "eval_rewards/accuracies": 0.74609375, |
| "eval_rewards/chosen": -3.8193359375, |
| "eval_rewards/margins": 1.1708984375, |
| "eval_rewards/rejected": -4.990234375, |
| "eval_runtime": 27.3686, |
| "eval_samples_per_second": 18.269, |
| "eval_steps_per_second": 0.585, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.89728, |
| "grad_norm": 9.595567585238719, |
| "learning_rate": 2.2192168830904962e-08, |
| "logits/chosen": -0.2698974609375, |
| "logits/rejected": -0.294189453125, |
| "logps/chosen": -675.5, |
| "logps/rejected": -832.0, |
| "loss": 0.4001, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.697265625, |
| "rewards/margins": 1.458984375, |
| "rewards/rejected": -5.15625, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.89856, |
| "grad_norm": 12.777276327167845, |
| "learning_rate": 2.164658774365529e-08, |
| "logits/chosen": -0.28564453125, |
| "logits/rejected": -0.334320068359375, |
| "logps/chosen": -748.5, |
| "logps/rejected": -879.5, |
| "loss": 0.4901, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.021484375, |
| "rewards/margins": 1.2333984375, |
| "rewards/rejected": -5.25390625, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.89984, |
| "grad_norm": 9.224895376105101, |
| "learning_rate": 2.1107582724932088e-08, |
| "logits/chosen": -0.229522705078125, |
| "logits/rejected": -0.2841796875, |
| "logps/chosen": -706.0, |
| "logps/rejected": -798.5, |
| "loss": 0.483, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.982421875, |
| "rewards/margins": 1.16064453125, |
| "rewards/rejected": -5.14453125, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.90112, |
| "grad_norm": 9.774698117520298, |
| "learning_rate": 2.0575164569610016e-08, |
| "logits/chosen": -0.194671630859375, |
| "logits/rejected": -0.2525634765625, |
| "logps/chosen": -700.0, |
| "logps/rejected": -818.0, |
| "loss": 0.4548, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.830078125, |
| "rewards/margins": 1.3994140625, |
| "rewards/rejected": -5.23046875, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.9024, |
| "grad_norm": 9.309057650942739, |
| "learning_rate": 2.0049343940645937e-08, |
| "logits/chosen": -0.20440673828125, |
| "logits/rejected": -0.270263671875, |
| "logps/chosen": -697.0, |
| "logps/rejected": -826.5, |
| "loss": 0.4319, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.767578125, |
| "rewards/margins": 1.33984375, |
| "rewards/rejected": -5.109375, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.90368, |
| "grad_norm": 13.991840106741984, |
| "learning_rate": 1.953013136886541e-08, |
| "logits/chosen": -0.2295684814453125, |
| "logits/rejected": -0.2913818359375, |
| "logps/chosen": -684.5, |
| "logps/rejected": -847.0, |
| "loss": 0.3583, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -3.62890625, |
| "rewards/margins": 1.5439453125, |
| "rewards/rejected": -5.16796875, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.90496, |
| "grad_norm": 8.819520427002814, |
| "learning_rate": 1.901753725275166e-08, |
| "logits/chosen": -0.14373779296875, |
| "logits/rejected": -0.20538330078125, |
| "logps/chosen": -721.5, |
| "logps/rejected": -832.5, |
| "loss": 0.4801, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.9609375, |
| "rewards/margins": 1.1591796875, |
| "rewards/rejected": -5.12109375, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.90624, |
| "grad_norm": 13.348990809308994, |
| "learning_rate": 1.8511571858237356e-08, |
| "logits/chosen": -0.13494873046875, |
| "logits/rejected": -0.16680908203125, |
| "logps/chosen": -714.0, |
| "logps/rejected": -808.5, |
| "loss": 0.4172, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.765625, |
| "rewards/margins": 1.22314453125, |
| "rewards/rejected": -4.984375, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.90752, |
| "grad_norm": 14.870724504070937, |
| "learning_rate": 1.801224531849908e-08, |
| "logits/chosen": -0.171142578125, |
| "logits/rejected": -0.218017578125, |
| "logps/chosen": -668.0, |
| "logps/rejected": -773.0, |
| "loss": 0.4573, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.64453125, |
| "rewards/margins": 1.0498046875, |
| "rewards/rejected": -4.69140625, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.9088, |
| "grad_norm": 11.205083148950653, |
| "learning_rate": 1.751956763375435e-08, |
| "logits/chosen": -0.22489166259765625, |
| "logits/rejected": -0.2996826171875, |
| "logps/chosen": -690.5, |
| "logps/rejected": -798.0, |
| "loss": 0.4199, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -3.837890625, |
| "rewards/margins": 1.08837890625, |
| "rewards/rejected": -4.921875, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.91008, |
| "grad_norm": 9.544243168409615, |
| "learning_rate": 1.70335486710614e-08, |
| "logits/chosen": -0.2523193359375, |
| "logits/rejected": -0.2890625, |
| "logps/chosen": -686.5, |
| "logps/rejected": -831.0, |
| "loss": 0.4423, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.88671875, |
| "rewards/margins": 1.15478515625, |
| "rewards/rejected": -5.0390625, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.91136, |
| "grad_norm": 12.6996755573186, |
| "learning_rate": 1.6554198164121263e-08, |
| "logits/chosen": -0.2281494140625, |
| "logits/rejected": -0.2470703125, |
| "logps/chosen": -725.0, |
| "logps/rejected": -819.5, |
| "loss": 0.4172, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.76953125, |
| "rewards/margins": 1.28759765625, |
| "rewards/rejected": -5.0546875, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.91264, |
| "grad_norm": 10.207786072588835, |
| "learning_rate": 1.6081525713083427e-08, |
| "logits/chosen": -0.20416259765625, |
| "logits/rejected": -0.2568359375, |
| "logps/chosen": -692.0, |
| "logps/rejected": -842.0, |
| "loss": 0.4328, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.908203125, |
| "rewards/margins": 1.1376953125, |
| "rewards/rejected": -5.04296875, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.91392, |
| "grad_norm": 10.540319829243698, |
| "learning_rate": 1.561554078435296e-08, |
| "logits/chosen": -0.2947998046875, |
| "logits/rejected": -0.3328857421875, |
| "logps/chosen": -727.0, |
| "logps/rejected": -864.0, |
| "loss": 0.4035, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.021484375, |
| "rewards/margins": 1.36328125, |
| "rewards/rejected": -5.38671875, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.9152, |
| "grad_norm": 15.698890404120037, |
| "learning_rate": 1.5156252710401207e-08, |
| "logits/chosen": -0.192352294921875, |
| "logits/rejected": -0.27923583984375, |
| "logps/chosen": -698.0, |
| "logps/rejected": -886.0, |
| "loss": 0.3362, |
| "rewards/accuracies": 0.8671875, |
| "rewards/chosen": -3.923828125, |
| "rewards/margins": 1.6572265625, |
| "rewards/rejected": -5.578125, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.91648, |
| "grad_norm": 13.504679296790746, |
| "learning_rate": 1.4703670689578884e-08, |
| "logits/chosen": -0.232635498046875, |
| "logits/rejected": -0.3350830078125, |
| "logps/chosen": -711.0, |
| "logps/rejected": -889.5, |
| "loss": 0.3787, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -3.85546875, |
| "rewards/margins": 1.46630859375, |
| "rewards/rejected": -5.32421875, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.91776, |
| "grad_norm": 15.869638544760475, |
| "learning_rate": 1.4257803785931927e-08, |
| "logits/chosen": -0.24554443359375, |
| "logits/rejected": -0.268798828125, |
| "logps/chosen": -677.0, |
| "logps/rejected": -788.5, |
| "loss": 0.5024, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -3.693359375, |
| "rewards/margins": 1.0751953125, |
| "rewards/rejected": -4.767578125, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.91904, |
| "grad_norm": 9.319905258380075, |
| "learning_rate": 1.3818660929019715e-08, |
| "logits/chosen": -0.240478515625, |
| "logits/rejected": -0.2955322265625, |
| "logps/chosen": -679.5, |
| "logps/rejected": -822.0, |
| "loss": 0.4519, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -3.9453125, |
| "rewards/margins": 1.27001953125, |
| "rewards/rejected": -5.2109375, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.92032, |
| "grad_norm": 14.445515309974768, |
| "learning_rate": 1.3386250913736408e-08, |
| "logits/chosen": -0.259307861328125, |
| "logits/rejected": -0.30859375, |
| "logps/chosen": -763.5, |
| "logps/rejected": -907.0, |
| "loss": 0.4477, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.28515625, |
| "rewards/margins": 1.2734375, |
| "rewards/rejected": -5.55859375, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.9216, |
| "grad_norm": 13.180277849379051, |
| "learning_rate": 1.2960582400134912e-08, |
| "logits/chosen": -0.3209228515625, |
| "logits/rejected": -0.3797607421875, |
| "logps/chosen": -695.5, |
| "logps/rejected": -893.5, |
| "loss": 0.3956, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -3.771484375, |
| "rewards/margins": 1.61767578125, |
| "rewards/rejected": -5.390625, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.92288, |
| "grad_norm": 12.337718937891417, |
| "learning_rate": 1.2541663913253191e-08, |
| "logits/chosen": -0.19183349609375, |
| "logits/rejected": -0.258819580078125, |
| "logps/chosen": -740.5, |
| "logps/rejected": -852.5, |
| "loss": 0.5031, |
| "rewards/accuracies": 0.7265625, |
| "rewards/chosen": -4.0703125, |
| "rewards/margins": 1.23486328125, |
| "rewards/rejected": -5.30078125, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.92416, |
| "grad_norm": 9.489271375743723, |
| "learning_rate": 1.2129503842943645e-08, |
| "logits/chosen": -0.22479248046875, |
| "logits/rejected": -0.24298095703125, |
| "logps/chosen": -699.0, |
| "logps/rejected": -837.5, |
| "loss": 0.4083, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.0859375, |
| "rewards/margins": 1.3935546875, |
| "rewards/rejected": -5.48046875, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.92544, |
| "grad_norm": 10.82430093944136, |
| "learning_rate": 1.1724110443705115e-08, |
| "logits/chosen": -0.24761962890625, |
| "logits/rejected": -0.265869140625, |
| "logps/chosen": -704.0, |
| "logps/rejected": -775.0, |
| "loss": 0.5147, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.76953125, |
| "rewards/margins": 0.96337890625, |
| "rewards/rejected": -4.734375, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.92672, |
| "grad_norm": 10.645094500743772, |
| "learning_rate": 1.1325491834517675e-08, |
| "logits/chosen": -0.246551513671875, |
| "logits/rejected": -0.2755126953125, |
| "logps/chosen": -762.0, |
| "logps/rejected": -847.5, |
| "loss": 0.5002, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.234375, |
| "rewards/margins": 1.134033203125, |
| "rewards/rejected": -5.3671875, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 10.724174338748702, |
| "learning_rate": 1.0933655998679653e-08, |
| "logits/chosen": -0.199462890625, |
| "logits/rejected": -0.2744140625, |
| "logps/chosen": -688.0, |
| "logps/rejected": -800.5, |
| "loss": 0.4812, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.857421875, |
| "rewards/margins": 1.195068359375, |
| "rewards/rejected": -5.052734375, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.92928, |
| "grad_norm": 9.575511353587679, |
| "learning_rate": 1.0548610783648198e-08, |
| "logits/chosen": -0.1815185546875, |
| "logits/rejected": -0.19659423828125, |
| "logps/chosen": -692.5, |
| "logps/rejected": -795.5, |
| "loss": 0.4822, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.77734375, |
| "rewards/margins": 1.10498046875, |
| "rewards/rejected": -4.88671875, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.93056, |
| "grad_norm": 11.150367172489458, |
| "learning_rate": 1.0170363900881796e-08, |
| "logits/chosen": -0.236328125, |
| "logits/rejected": -0.3133544921875, |
| "logps/chosen": -714.0, |
| "logps/rejected": -836.5, |
| "loss": 0.4768, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.025390625, |
| "rewards/margins": 1.1396484375, |
| "rewards/rejected": -5.16015625, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.93184, |
| "grad_norm": 9.876466042629044, |
| "learning_rate": 9.798922925685992e-09, |
| "logits/chosen": -0.14385986328125, |
| "logits/rejected": -0.172760009765625, |
| "logps/chosen": -773.0, |
| "logps/rejected": -894.5, |
| "loss": 0.4802, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.216796875, |
| "rewards/margins": 1.3310546875, |
| "rewards/rejected": -5.5546875, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.93312, |
| "grad_norm": 13.034111105340324, |
| "learning_rate": 9.434295297061668e-09, |
| "logits/chosen": -0.2009124755859375, |
| "logits/rejected": -0.24066162109375, |
| "logps/chosen": -754.0, |
| "logps/rejected": -856.0, |
| "loss": 0.5133, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.19921875, |
| "rewards/margins": 1.14404296875, |
| "rewards/rejected": -5.34765625, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.9344, |
| "grad_norm": 8.571599786583675, |
| "learning_rate": 9.076488317555886e-09, |
| "logits/chosen": -0.32958984375, |
| "logits/rejected": -0.383056640625, |
| "logps/chosen": -708.5, |
| "logps/rejected": -856.5, |
| "loss": 0.3725, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -3.923828125, |
| "rewards/margins": 1.5029296875, |
| "rewards/rejected": -5.43359375, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.93568, |
| "grad_norm": 8.597215059260597, |
| "learning_rate": 8.725509153115918e-09, |
| "logits/chosen": -0.2611083984375, |
| "logits/rejected": -0.32562255859375, |
| "logps/chosen": -733.5, |
| "logps/rejected": -851.5, |
| "loss": 0.4326, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -4.072265625, |
| "rewards/margins": 1.204833984375, |
| "rewards/rejected": -5.275390625, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.93696, |
| "grad_norm": 12.286472292592713, |
| "learning_rate": 8.381364832945459e-09, |
| "logits/chosen": -0.28240966796875, |
| "logits/rejected": -0.30352783203125, |
| "logps/chosen": -752.0, |
| "logps/rejected": -879.5, |
| "loss": 0.4621, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.296875, |
| "rewards/margins": 1.1865234375, |
| "rewards/rejected": -5.48046875, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.93824, |
| "grad_norm": 11.941872693164612, |
| "learning_rate": 8.044062249364047e-09, |
| "logits/chosen": -0.18436813354492188, |
| "logits/rejected": -0.243377685546875, |
| "logps/chosen": -772.0, |
| "logps/rejected": -890.5, |
| "loss": 0.4532, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -4.326171875, |
| "rewards/margins": 1.33349609375, |
| "rewards/rejected": -5.65625, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.93952, |
| "grad_norm": 10.424030696964069, |
| "learning_rate": 7.713608157668921e-09, |
| "logits/chosen": -0.21624755859375, |
| "logits/rejected": -0.2452392578125, |
| "logps/chosen": -746.0, |
| "logps/rejected": -870.0, |
| "loss": 0.4378, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -4.1875, |
| "rewards/margins": 1.3876953125, |
| "rewards/rejected": -5.57421875, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.9408, |
| "grad_norm": 14.949460184750698, |
| "learning_rate": 7.390009175999834e-09, |
| "logits/chosen": -0.2874755859375, |
| "logits/rejected": -0.319091796875, |
| "logps/chosen": -735.0, |
| "logps/rejected": -890.5, |
| "loss": 0.3524, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -4.1015625, |
| "rewards/margins": 1.51220703125, |
| "rewards/rejected": -5.61328125, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.94208, |
| "grad_norm": 9.527587985419053, |
| "learning_rate": 7.073271785206314e-09, |
| "logits/chosen": -0.279052734375, |
| "logits/rejected": -0.291259765625, |
| "logps/chosen": -720.0, |
| "logps/rejected": -811.5, |
| "loss": 0.4741, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.892578125, |
| "rewards/margins": 1.220703125, |
| "rewards/rejected": -5.11328125, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.94336, |
| "grad_norm": 12.15877893601555, |
| "learning_rate": 6.763402328718115e-09, |
| "logits/chosen": -0.2161865234375, |
| "logits/rejected": -0.2706298828125, |
| "logps/chosen": -712.5, |
| "logps/rejected": -831.0, |
| "loss": 0.3963, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.826171875, |
| "rewards/margins": 1.43603515625, |
| "rewards/rejected": -5.26171875, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.94464, |
| "grad_norm": 8.913274393831173, |
| "learning_rate": 6.460407012417918e-09, |
| "logits/chosen": -0.214202880859375, |
| "logits/rejected": -0.218719482421875, |
| "logps/chosen": -779.0, |
| "logps/rejected": -907.5, |
| "loss": 0.458, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -4.17578125, |
| "rewards/margins": 1.3076171875, |
| "rewards/rejected": -5.4921875, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.94592, |
| "grad_norm": 13.019357743010502, |
| "learning_rate": 6.164291904517333e-09, |
| "logits/chosen": -0.22491455078125, |
| "logits/rejected": -0.2719573974609375, |
| "logps/chosen": -704.0, |
| "logps/rejected": -834.5, |
| "loss": 0.4764, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -3.91015625, |
| "rewards/margins": 1.169921875, |
| "rewards/rejected": -5.078125, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.9472, |
| "grad_norm": 14.066472266902457, |
| "learning_rate": 5.875062935435121e-09, |
| "logits/chosen": -0.2550048828125, |
| "logits/rejected": -0.27685546875, |
| "logps/chosen": -748.5, |
| "logps/rejected": -869.0, |
| "loss": 0.4442, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.072265625, |
| "rewards/margins": 1.40234375, |
| "rewards/rejected": -5.47265625, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.94848, |
| "grad_norm": 13.888471288481163, |
| "learning_rate": 5.592725897678446e-09, |
| "logits/chosen": -0.2496337890625, |
| "logits/rejected": -0.2772216796875, |
| "logps/chosen": -710.5, |
| "logps/rejected": -837.0, |
| "loss": 0.4921, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.01171875, |
| "rewards/margins": 1.125, |
| "rewards/rejected": -5.1328125, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.94976, |
| "grad_norm": 9.351078828106235, |
| "learning_rate": 5.317286445727193e-09, |
| "logits/chosen": -0.1865997314453125, |
| "logits/rejected": -0.22784423828125, |
| "logps/chosen": -721.5, |
| "logps/rejected": -857.0, |
| "loss": 0.4165, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -4.095703125, |
| "rewards/margins": 1.375, |
| "rewards/rejected": -5.47265625, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.95104, |
| "grad_norm": 8.64152279502156, |
| "learning_rate": 5.048750095920151e-09, |
| "logits/chosen": -0.2593994140625, |
| "logits/rejected": -0.294921875, |
| "logps/chosen": -705.0, |
| "logps/rejected": -815.5, |
| "loss": 0.4576, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -3.935546875, |
| "rewards/margins": 1.132080078125, |
| "rewards/rejected": -5.06640625, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.95232, |
| "grad_norm": 14.443814841229473, |
| "learning_rate": 4.787122226345014e-09, |
| "logits/chosen": -0.17755126953125, |
| "logits/rejected": -0.22381591796875, |
| "logps/chosen": -720.0, |
| "logps/rejected": -811.5, |
| "loss": 0.5046, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -3.841796875, |
| "rewards/margins": 1.265869140625, |
| "rewards/rejected": -5.11328125, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.9536, |
| "grad_norm": 10.100416966960784, |
| "learning_rate": 4.532408076730504e-09, |
| "logits/chosen": -0.1763916015625, |
| "logits/rejected": -0.219482421875, |
| "logps/chosen": -746.5, |
| "logps/rejected": -882.0, |
| "loss": 0.4617, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -4.29296875, |
| "rewards/margins": 1.4052734375, |
| "rewards/rejected": -5.6953125, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.95488, |
| "grad_norm": 16.507849057578323, |
| "learning_rate": 4.284612748341421e-09, |
| "logits/chosen": -0.1527099609375, |
| "logits/rejected": -0.189727783203125, |
| "logps/chosen": -747.5, |
| "logps/rejected": -908.5, |
| "loss": 0.3097, |
| "rewards/accuracies": 0.8828125, |
| "rewards/chosen": -3.98828125, |
| "rewards/margins": 1.7822265625, |
| "rewards/rejected": -5.76953125, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.95616, |
| "grad_norm": 14.09994700103146, |
| "learning_rate": 4.0437412038764826e-09, |
| "logits/chosen": -0.1898193359375, |
| "logits/rejected": -0.2166748046875, |
| "logps/chosen": -748.0, |
| "logps/rejected": -843.0, |
| "loss": 0.409, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.23046875, |
| "rewards/margins": 1.23681640625, |
| "rewards/rejected": -5.47265625, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.95744, |
| "grad_norm": 9.05904451158265, |
| "learning_rate": 3.80979826736893e-09, |
| "logits/chosen": -0.25396728515625, |
| "logits/rejected": -0.2950439453125, |
| "logps/chosen": -742.0, |
| "logps/rejected": -919.0, |
| "loss": 0.383, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -4.208984375, |
| "rewards/margins": 1.5869140625, |
| "rewards/rejected": -5.79296875, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.95872, |
| "grad_norm": 9.091126144194298, |
| "learning_rate": 3.5827886240899998e-09, |
| "logits/chosen": -0.2483062744140625, |
| "logits/rejected": -0.3052978515625, |
| "logps/chosen": -716.0, |
| "logps/rejected": -846.0, |
| "loss": 0.4343, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.90625, |
| "rewards/margins": 1.361572265625, |
| "rewards/rejected": -5.265625, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 10.017266905551178, |
| "learning_rate": 3.3627168204549304e-09, |
| "logits/chosen": -0.2337646484375, |
| "logits/rejected": -0.277587890625, |
| "logps/chosen": -712.5, |
| "logps/rejected": -828.5, |
| "loss": 0.4071, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.931640625, |
| "rewards/margins": 1.38330078125, |
| "rewards/rejected": -5.31640625, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.96128, |
| "grad_norm": 14.829233382165352, |
| "learning_rate": 3.149587263932035e-09, |
| "logits/chosen": -0.312744140625, |
| "logits/rejected": -0.321533203125, |
| "logps/chosen": -753.5, |
| "logps/rejected": -829.5, |
| "loss": 0.5308, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.072265625, |
| "rewards/margins": 1.01171875, |
| "rewards/rejected": -5.08984375, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.96256, |
| "grad_norm": 15.392472528414778, |
| "learning_rate": 2.9434042229544544e-09, |
| "logits/chosen": -0.29705810546875, |
| "logits/rejected": -0.330322265625, |
| "logps/chosen": -716.5, |
| "logps/rejected": -865.5, |
| "loss": 0.3916, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.953125, |
| "rewards/margins": 1.419921875, |
| "rewards/rejected": -5.37109375, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.96384, |
| "grad_norm": 9.893884489429142, |
| "learning_rate": 2.744171826834474e-09, |
| "logits/chosen": -0.2510986328125, |
| "logits/rejected": -0.26873779296875, |
| "logps/chosen": -753.0, |
| "logps/rejected": -834.0, |
| "loss": 0.4875, |
| "rewards/accuracies": 0.7109375, |
| "rewards/chosen": -3.814453125, |
| "rewards/margins": 1.1533203125, |
| "rewards/rejected": -4.9765625, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.96512, |
| "grad_norm": 14.07898237216757, |
| "learning_rate": 2.5518940656811094e-09, |
| "logits/chosen": -0.281005859375, |
| "logits/rejected": -0.3043212890625, |
| "logps/chosen": -718.5, |
| "logps/rejected": -836.0, |
| "loss": 0.5053, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.9921875, |
| "rewards/margins": 1.32275390625, |
| "rewards/rejected": -5.3125, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.9664, |
| "grad_norm": 15.140775410792148, |
| "learning_rate": 2.366574790319942e-09, |
| "logits/chosen": -0.1983642578125, |
| "logits/rejected": -0.257568359375, |
| "logps/chosen": -697.5, |
| "logps/rejected": -834.5, |
| "loss": 0.4643, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.939453125, |
| "rewards/margins": 1.399169921875, |
| "rewards/rejected": -5.33984375, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.96768, |
| "grad_norm": 20.58163084682585, |
| "learning_rate": 2.188217712216217e-09, |
| "logits/chosen": -0.274169921875, |
| "logits/rejected": -0.28857421875, |
| "logps/chosen": -736.0, |
| "logps/rejected": -826.0, |
| "loss": 0.3668, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -3.97265625, |
| "rewards/margins": 1.29736328125, |
| "rewards/rejected": -5.2734375, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.96896, |
| "grad_norm": 13.921962866937543, |
| "learning_rate": 2.01682640340024e-09, |
| "logits/chosen": -0.2674560546875, |
| "logits/rejected": -0.28857421875, |
| "logps/chosen": -742.5, |
| "logps/rejected": -847.5, |
| "loss": 0.4826, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -4.158203125, |
| "rewards/margins": 1.14892578125, |
| "rewards/rejected": -5.30859375, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.97024, |
| "grad_norm": 10.786302286042769, |
| "learning_rate": 1.8524042963961096e-09, |
| "logits/chosen": -0.2705078125, |
| "logits/rejected": -0.306640625, |
| "logps/chosen": -766.5, |
| "logps/rejected": -916.5, |
| "loss": 0.4616, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.330078125, |
| "rewards/margins": 1.359375, |
| "rewards/rejected": -5.6796875, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.97152, |
| "grad_norm": 8.73289653841681, |
| "learning_rate": 1.6949546841528607e-09, |
| "logits/chosen": -0.16156005859375, |
| "logits/rejected": -0.21087646484375, |
| "logps/chosen": -673.0, |
| "logps/rejected": -794.5, |
| "loss": 0.4352, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.763671875, |
| "rewards/margins": 1.30078125, |
| "rewards/rejected": -5.064453125, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.9728, |
| "grad_norm": 11.011261520088386, |
| "learning_rate": 1.5444807199784471e-09, |
| "logits/chosen": -0.217681884765625, |
| "logits/rejected": -0.2626953125, |
| "logps/chosen": -699.0, |
| "logps/rejected": -835.5, |
| "loss": 0.4936, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -3.9140625, |
| "rewards/margins": 1.07958984375, |
| "rewards/rejected": -4.9921875, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.97408, |
| "grad_norm": 20.320010738404367, |
| "learning_rate": 1.4009854174767521e-09, |
| "logits/chosen": -0.29150390625, |
| "logits/rejected": -0.3443603515625, |
| "logps/chosen": -717.5, |
| "logps/rejected": -860.5, |
| "loss": 0.5028, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.16796875, |
| "rewards/margins": 1.07177734375, |
| "rewards/rejected": -5.23828125, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.97536, |
| "grad_norm": 12.1440388856, |
| "learning_rate": 1.264471650487009e-09, |
| "logits/chosen": -0.253204345703125, |
| "logits/rejected": -0.306884765625, |
| "logps/chosen": -789.5, |
| "logps/rejected": -921.5, |
| "loss": 0.3703, |
| "rewards/accuracies": 0.8359375, |
| "rewards/chosen": -4.087890625, |
| "rewards/margins": 1.7041015625, |
| "rewards/rejected": -5.80078125, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.97664, |
| "grad_norm": 19.756277043960345, |
| "learning_rate": 1.1349421530265247e-09, |
| "logits/chosen": -0.25432395935058594, |
| "logits/rejected": -0.265899658203125, |
| "logps/chosen": -721.5, |
| "logps/rejected": -802.0, |
| "loss": 0.5194, |
| "rewards/accuracies": 0.7578125, |
| "rewards/chosen": -4.05859375, |
| "rewards/margins": 1.168212890625, |
| "rewards/rejected": -5.2265625, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.97792, |
| "grad_norm": 11.30781557188891, |
| "learning_rate": 1.0123995192356182e-09, |
| "logits/chosen": -0.27215576171875, |
| "logits/rejected": -0.3157958984375, |
| "logps/chosen": -729.5, |
| "logps/rejected": -847.5, |
| "loss": 0.4481, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -4.09375, |
| "rewards/margins": 1.2841796875, |
| "rewards/rejected": -5.37890625, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.9792, |
| "grad_norm": 11.090064368669557, |
| "learning_rate": 8.968462033259405e-10, |
| "logits/chosen": -0.2755126953125, |
| "logits/rejected": -0.30224609375, |
| "logps/chosen": -734.0, |
| "logps/rejected": -863.5, |
| "loss": 0.4139, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -4.12890625, |
| "rewards/margins": 1.259765625, |
| "rewards/rejected": -5.390625, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.98048, |
| "grad_norm": 8.905708808696662, |
| "learning_rate": 7.882845195312016e-10, |
| "logits/chosen": -0.2078857421875, |
| "logits/rejected": -0.2855224609375, |
| "logps/chosen": -683.25, |
| "logps/rejected": -863.5, |
| "loss": 0.3897, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -4.033203125, |
| "rewards/margins": 1.23291015625, |
| "rewards/rejected": -5.26953125, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.98176, |
| "grad_norm": 13.207336578714601, |
| "learning_rate": 6.867166420607362e-10, |
| "logits/chosen": -0.230712890625, |
| "logits/rejected": -0.24365234375, |
| "logps/chosen": -749.0, |
| "logps/rejected": -829.5, |
| "loss": 0.4359, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -4.16796875, |
| "rewards/margins": 1.08740234375, |
| "rewards/rejected": -5.2578125, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.98304, |
| "grad_norm": 15.119029537418763, |
| "learning_rate": 5.921446050561385e-10, |
| "logits/chosen": -0.179931640625, |
| "logits/rejected": -0.24912261962890625, |
| "logps/chosen": -678.5, |
| "logps/rejected": -854.0, |
| "loss": 0.3582, |
| "rewards/accuracies": 0.8515625, |
| "rewards/chosen": -3.90625, |
| "rewards/margins": 1.5869140625, |
| "rewards/rejected": -5.49609375, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.98432, |
| "grad_norm": 18.31926688721905, |
| "learning_rate": 5.045703025503834e-10, |
| "logits/chosen": -0.200286865234375, |
| "logits/rejected": -0.2401123046875, |
| "logps/chosen": -715.5, |
| "logps/rejected": -845.5, |
| "loss": 0.5413, |
| "rewards/accuracies": 0.7421875, |
| "rewards/chosen": -3.947265625, |
| "rewards/margins": 1.12841796875, |
| "rewards/rejected": -5.0703125, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.9856, |
| "grad_norm": 9.632067095301219, |
| "learning_rate": 4.2399548842994017e-10, |
| "logits/chosen": -0.2724609375, |
| "logits/rejected": -0.30517578125, |
| "logps/chosen": -731.5, |
| "logps/rejected": -849.5, |
| "loss": 0.4275, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -3.91796875, |
| "rewards/margins": 1.27099609375, |
| "rewards/rejected": -5.1875, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.98688, |
| "grad_norm": 20.57119149988017, |
| "learning_rate": 3.5042177639972304e-10, |
| "logits/chosen": -0.143798828125, |
| "logits/rejected": -0.193359375, |
| "logps/chosen": -734.0, |
| "logps/rejected": -852.0, |
| "loss": 0.5429, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.04296875, |
| "rewards/margins": 1.1286468505859375, |
| "rewards/rejected": -5.16796875, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.98816, |
| "grad_norm": 20.50073312922875, |
| "learning_rate": 2.8385063995064463e-10, |
| "logits/chosen": -0.21649169921875, |
| "logits/rejected": -0.2684326171875, |
| "logps/chosen": -775.0, |
| "logps/rejected": -883.5, |
| "loss": 0.4822, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.328125, |
| "rewards/margins": 1.306640625, |
| "rewards/rejected": -5.6328125, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.98944, |
| "grad_norm": 10.125073496731055, |
| "learning_rate": 2.2428341233012293e-10, |
| "logits/chosen": -0.2061920166015625, |
| "logits/rejected": -0.2589263916015625, |
| "logps/chosen": -753.0, |
| "logps/rejected": -847.0, |
| "loss": 0.4463, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -4.240234375, |
| "rewards/margins": 1.12890625, |
| "rewards/rejected": -5.3671875, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.99072, |
| "grad_norm": 8.724529533275442, |
| "learning_rate": 1.7172128651554151e-10, |
| "logits/chosen": -0.259307861328125, |
| "logits/rejected": -0.2984619140625, |
| "logps/chosen": -720.5, |
| "logps/rejected": -837.0, |
| "loss": 0.4245, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.974609375, |
| "rewards/margins": 1.40234375, |
| "rewards/rejected": -5.37890625, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 10.031507803839778, |
| "learning_rate": 1.2616531519011876e-10, |
| "logits/chosen": -0.2186279296875, |
| "logits/rejected": -0.2698974609375, |
| "logps/chosen": -761.5, |
| "logps/rejected": -877.0, |
| "loss": 0.4012, |
| "rewards/accuracies": 0.8203125, |
| "rewards/chosen": -4.119140625, |
| "rewards/margins": 1.47802734375, |
| "rewards/rejected": -5.58984375, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.99328, |
| "grad_norm": 11.25971739653657, |
| "learning_rate": 8.761641072196346e-11, |
| "logits/chosen": -0.23828125, |
| "logits/rejected": -0.3046875, |
| "logps/chosen": -719.0, |
| "logps/rejected": -844.5, |
| "loss": 0.451, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.099609375, |
| "rewards/margins": 1.158203125, |
| "rewards/rejected": -5.2578125, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.99456, |
| "grad_norm": 11.91792314235709, |
| "learning_rate": 5.607534514585066e-11, |
| "logits/chosen": -0.221282958984375, |
| "logits/rejected": -0.26171875, |
| "logps/chosen": -714.0, |
| "logps/rejected": -831.5, |
| "loss": 0.4559, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.826171875, |
| "rewards/margins": 1.36328125, |
| "rewards/rejected": -5.1875, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.99584, |
| "grad_norm": 9.429202223984744, |
| "learning_rate": 3.154275014763952e-11, |
| "logits/chosen": -0.1851806640625, |
| "logits/rejected": -0.23614501953125, |
| "logps/chosen": -694.0, |
| "logps/rejected": -843.0, |
| "loss": 0.3918, |
| "rewards/accuracies": 0.8046875, |
| "rewards/chosen": -3.8984375, |
| "rewards/margins": 1.54296875, |
| "rewards/rejected": -5.4453125, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.99712, |
| "grad_norm": 9.182804511873856, |
| "learning_rate": 1.4019117051683461e-11, |
| "logits/chosen": -0.185791015625, |
| "logits/rejected": -0.255859375, |
| "logps/chosen": -720.5, |
| "logps/rejected": -881.5, |
| "loss": 0.3743, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -3.85546875, |
| "rewards/margins": 1.4609375, |
| "rewards/rejected": -5.3203125, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.9984, |
| "grad_norm": 13.685193571545302, |
| "learning_rate": 3.504796810921418e-12, |
| "logits/chosen": -0.2198486328125, |
| "logits/rejected": -0.26318359375, |
| "logps/chosen": -672.0, |
| "logps/rejected": -777.0, |
| "loss": 0.4135, |
| "rewards/accuracies": 0.7890625, |
| "rewards/chosen": -3.607421875, |
| "rewards/margins": 1.242919921875, |
| "rewards/rejected": -4.845703125, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.99968, |
| "grad_norm": 9.030651964482605, |
| "learning_rate": 0.0, |
| "logits/chosen": -0.1768798828125, |
| "logits/rejected": -0.21502685546875, |
| "logps/chosen": -735.5, |
| "logps/rejected": -832.5, |
| "loss": 0.4697, |
| "rewards/accuracies": 0.7734375, |
| "rewards/chosen": -4.044921875, |
| "rewards/margins": 1.16845703125, |
| "rewards/rejected": -5.21484375, |
| "step": 781 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 781, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|