Invalid JSON:
Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": 21.52120590209961, | |
| "best_model_checkpoint": "./output/checkpoints/2024-05-27_09-03-42/checkpoint-1000", | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 1271, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003933910306845004, | |
| "grad_norm": 26.56111717224121, | |
| "learning_rate": 4.6875000000000006e-07, | |
| "logits/chosen": -0.23300500214099884, | |
| "logits/rejected": -0.71368008852005, | |
| "logps/chosen": -1.009130835533142, | |
| "logps/rejected": -1.6767795085906982, | |
| "loss": 25.003, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -3.0723961117473664e-06, | |
| "rewards/margins": -2.978298653033562e-05, | |
| "rewards/rejected": 2.6710587917477824e-05, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.007867820613690008, | |
| "grad_norm": 12.107626914978027, | |
| "learning_rate": 1.25e-06, | |
| "logits/chosen": -0.3981935381889343, | |
| "logits/rejected": -0.7374774217605591, | |
| "logps/chosen": -0.8980743288993835, | |
| "logps/rejected": -1.1683518886566162, | |
| "loss": 24.9985, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -7.3963587965408806e-06, | |
| "rewards/margins": 1.5192717910395004e-05, | |
| "rewards/rejected": -2.2589078071177937e-05, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.011801730920535013, | |
| "grad_norm": 13.772482872009277, | |
| "learning_rate": 2.0312500000000002e-06, | |
| "logits/chosen": -0.35801252722740173, | |
| "logits/rejected": -0.6602836847305298, | |
| "logps/chosen": -0.8121687173843384, | |
| "logps/rejected": -1.0027592182159424, | |
| "loss": 24.995, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -6.642851803917438e-05, | |
| "rewards/margins": 5.0098860810976475e-05, | |
| "rewards/rejected": -0.00011652738612610847, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.015735641227380016, | |
| "grad_norm": 34.1744270324707, | |
| "learning_rate": 2.65625e-06, | |
| "logits/chosen": -0.3884078562259674, | |
| "logits/rejected": -0.7273606061935425, | |
| "logps/chosen": -1.128949522972107, | |
| "logps/rejected": -1.5428626537322998, | |
| "loss": 24.9691, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.00044426563545130193, | |
| "rewards/margins": 0.0003119274042546749, | |
| "rewards/rejected": -0.0007561930106021464, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01966955153422502, | |
| "grad_norm": 14.233505249023438, | |
| "learning_rate": 3.4375e-06, | |
| "logits/chosen": -0.24776780605316162, | |
| "logits/rejected": -0.6983073949813843, | |
| "logps/chosen": -1.1775879859924316, | |
| "logps/rejected": -1.319035291671753, | |
| "loss": 24.9842, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.0011746595846489072, | |
| "rewards/margins": 0.00015929507208056748, | |
| "rewards/rejected": -0.001333954744040966, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.023603461841070025, | |
| "grad_norm": 20.398685455322266, | |
| "learning_rate": 4.21875e-06, | |
| "logits/chosen": -0.31311506032943726, | |
| "logits/rejected": -0.6022701263427734, | |
| "logps/chosen": -0.8845601081848145, | |
| "logps/rejected": -1.1203999519348145, | |
| "loss": 24.9235, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.00107015878893435, | |
| "rewards/margins": 0.0007737897685728967, | |
| "rewards/rejected": -0.0018439484992995858, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02753737214791503, | |
| "grad_norm": 31.93773651123047, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": -0.4594844877719879, | |
| "logits/rejected": -0.7395190596580505, | |
| "logps/chosen": -1.1479393243789673, | |
| "logps/rejected": -1.334791898727417, | |
| "loss": 24.8865, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.0031395156402140856, | |
| "rewards/margins": 0.001148442504927516, | |
| "rewards/rejected": -0.004287957213819027, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.03147128245476003, | |
| "grad_norm": 28.67970848083496, | |
| "learning_rate": 5.781250000000001e-06, | |
| "logits/chosen": -0.3382038176059723, | |
| "logits/rejected": -0.6991093158721924, | |
| "logps/chosen": -0.9335897564888, | |
| "logps/rejected": -1.2500836849212646, | |
| "loss": 24.6453, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.003828689455986023, | |
| "rewards/margins": 0.0037640363443642855, | |
| "rewards/rejected": -0.007592725567519665, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03540519276160504, | |
| "grad_norm": 188.25912475585938, | |
| "learning_rate": 6.5625e-06, | |
| "logits/chosen": -0.665987491607666, | |
| "logits/rejected": -0.9978095293045044, | |
| "logps/chosen": -1.427006483078003, | |
| "logps/rejected": -1.8565871715545654, | |
| "loss": 24.6265, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.011942429468035698, | |
| "rewards/margins": 0.004286443814635277, | |
| "rewards/rejected": -0.016228875145316124, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03933910306845004, | |
| "grad_norm": 25.781064987182617, | |
| "learning_rate": 7.343750000000001e-06, | |
| "logits/chosen": -0.24072375893592834, | |
| "logits/rejected": -0.3356158137321472, | |
| "logps/chosen": -0.8685480952262878, | |
| "logps/rejected": -1.291337013244629, | |
| "loss": 24.6485, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.005547626875340939, | |
| "rewards/margins": 0.0038076243363320827, | |
| "rewards/rejected": -0.009355251677334309, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.043273013375295044, | |
| "grad_norm": 33.6733512878418, | |
| "learning_rate": 8.125000000000001e-06, | |
| "logits/chosen": -0.5986557006835938, | |
| "logits/rejected": -0.8063839673995972, | |
| "logps/chosen": -1.3582450151443481, | |
| "logps/rejected": -1.497201681137085, | |
| "loss": 24.645, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.014818480238318443, | |
| "rewards/margins": 0.004309489857405424, | |
| "rewards/rejected": -0.019127970561385155, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.04720692368214005, | |
| "grad_norm": 42.20875549316406, | |
| "learning_rate": 8.906250000000001e-06, | |
| "logits/chosen": -0.35940369963645935, | |
| "logits/rejected": -0.8012642860412598, | |
| "logps/chosen": -1.276085615158081, | |
| "logps/rejected": -1.7626205682754517, | |
| "loss": 24.043, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.019142691045999527, | |
| "rewards/margins": 0.01269704382866621, | |
| "rewards/rejected": -0.03183973208069801, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05114083398898505, | |
| "grad_norm": 93.29661560058594, | |
| "learning_rate": 9.6875e-06, | |
| "logits/chosen": -0.5578395128250122, | |
| "logits/rejected": -0.8026430010795593, | |
| "logps/chosen": -0.9669203758239746, | |
| "logps/rejected": -1.9894134998321533, | |
| "loss": 22.9578, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.016798650845885277, | |
| "rewards/margins": 0.03050239011645317, | |
| "rewards/rejected": -0.0473010428249836, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.05507474429583006, | |
| "grad_norm": 52.22159194946289, | |
| "learning_rate": 1.046875e-05, | |
| "logits/chosen": -0.5786597728729248, | |
| "logits/rejected": -0.8957271575927734, | |
| "logps/chosen": -1.2181226015090942, | |
| "logps/rejected": -1.7076442241668701, | |
| "loss": 23.1897, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.029290784150362015, | |
| "rewards/margins": 0.022643322125077248, | |
| "rewards/rejected": -0.05193411186337471, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.059008654602675056, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.0937500000000002e-05, | |
| "logits/chosen": -0.4328407347202301, | |
| "logits/rejected": -0.7798544764518738, | |
| "logps/chosen": -1.1851781606674194, | |
| "logps/rejected": -2.4100356101989746, | |
| "loss": 26.8402, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0339423306286335, | |
| "rewards/margins": 0.05872698873281479, | |
| "rewards/rejected": -0.09266932308673859, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.06294256490952006, | |
| "grad_norm": 51.69522476196289, | |
| "learning_rate": 1.171875e-05, | |
| "logits/chosen": -0.6042202711105347, | |
| "logits/rejected": -0.8020713925361633, | |
| "logps/chosen": -1.2562280893325806, | |
| "logps/rejected": -1.9946972131729126, | |
| "loss": 23.3327, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.03992198035120964, | |
| "rewards/margins": 0.04370134323835373, | |
| "rewards/rejected": -0.08362331986427307, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06687647521636507, | |
| "grad_norm": 49.019283294677734, | |
| "learning_rate": 1.25e-05, | |
| "logits/chosen": -0.7644809484481812, | |
| "logits/rejected": -1.0907962322235107, | |
| "logps/chosen": -1.3791667222976685, | |
| "logps/rejected": -1.8712146282196045, | |
| "loss": 23.2878, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.05529268458485603, | |
| "rewards/margins": 0.025984305888414383, | |
| "rewards/rejected": -0.08127699047327042, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.07081038552321008, | |
| "grad_norm": 69.54235076904297, | |
| "learning_rate": 1.3281250000000001e-05, | |
| "logits/chosen": -0.8694972991943359, | |
| "logits/rejected": -1.0780664682388306, | |
| "logps/chosen": -1.6866194009780884, | |
| "logps/rejected": -2.2792537212371826, | |
| "loss": 26.1959, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.07276991009712219, | |
| "rewards/margins": 0.03862577676773071, | |
| "rewards/rejected": -0.1113956943154335, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07474429583005507, | |
| "grad_norm": 79.65019989013672, | |
| "learning_rate": 1.4062500000000001e-05, | |
| "logits/chosen": -0.9231563806533813, | |
| "logits/rejected": -1.215816855430603, | |
| "logps/chosen": -1.5471779108047485, | |
| "logps/rejected": -2.4878039360046387, | |
| "loss": 23.5309, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.06874530017375946, | |
| "rewards/margins": 0.04303915798664093, | |
| "rewards/rejected": -0.11178445816040039, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.07867820613690008, | |
| "grad_norm": 194.3590850830078, | |
| "learning_rate": 1.4843750000000002e-05, | |
| "logits/chosen": -1.0412330627441406, | |
| "logits/rejected": -1.2349543571472168, | |
| "logps/chosen": -1.5710781812667847, | |
| "logps/rejected": -2.36897611618042, | |
| "loss": 24.1227, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.06695514917373657, | |
| "rewards/margins": 0.03971674293279648, | |
| "rewards/rejected": -0.10667190700769424, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07867820613690008, | |
| "eval_logits/chosen": -1.1061171293258667, | |
| "eval_logits/rejected": -1.3813872337341309, | |
| "eval_logps/chosen": -1.8335192203521729, | |
| "eval_logps/rejected": -2.4041695594787598, | |
| "eval_loss": 23.714366912841797, | |
| "eval_rewards/accuracies": 0.6312500238418579, | |
| "eval_rewards/chosen": -0.08083178102970123, | |
| "eval_rewards/margins": 0.03503800183534622, | |
| "eval_rewards/rejected": -0.11586978286504745, | |
| "eval_runtime": 247.2, | |
| "eval_samples_per_second": 2.589, | |
| "eval_steps_per_second": 0.162, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08261211644374508, | |
| "grad_norm": 80.87075805664062, | |
| "learning_rate": 1.546875e-05, | |
| "logits/chosen": -0.9996848106384277, | |
| "logits/rejected": -1.1654517650604248, | |
| "logps/chosen": -1.577480673789978, | |
| "logps/rejected": -2.5612683296203613, | |
| "loss": 28.6426, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.06727541238069534, | |
| "rewards/margins": 0.05553777888417244, | |
| "rewards/rejected": -0.12281318753957748, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.08654602675059009, | |
| "grad_norm": 73.16770935058594, | |
| "learning_rate": 1.6250000000000002e-05, | |
| "logits/chosen": -1.1857019662857056, | |
| "logits/rejected": -1.5013740062713623, | |
| "logps/chosen": -1.474392294883728, | |
| "logps/rejected": -2.593820095062256, | |
| "loss": 21.5001, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.06618812680244446, | |
| "rewards/margins": 0.05912921577692032, | |
| "rewards/rejected": -0.12531733512878418, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0904799370574351, | |
| "grad_norm": 104.88764190673828, | |
| "learning_rate": 1.703125e-05, | |
| "logits/chosen": -1.3142554759979248, | |
| "logits/rejected": -1.3756258487701416, | |
| "logps/chosen": -2.3472671508789062, | |
| "logps/rejected": -3.370978832244873, | |
| "loss": 23.5687, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.11941848695278168, | |
| "rewards/margins": 0.0546293742954731, | |
| "rewards/rejected": -0.17404787242412567, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.0944138473642801, | |
| "grad_norm": 72.0654525756836, | |
| "learning_rate": 1.7812500000000003e-05, | |
| "logits/chosen": -1.2925318479537964, | |
| "logits/rejected": -1.3378468751907349, | |
| "logps/chosen": -1.7563148736953735, | |
| "logps/rejected": -2.4153127670288086, | |
| "loss": 22.8458, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.08634118735790253, | |
| "rewards/margins": 0.04032357782125473, | |
| "rewards/rejected": -0.12666477262973785, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0983477576711251, | |
| "grad_norm": 109.86412048339844, | |
| "learning_rate": 1.859375e-05, | |
| "logits/chosen": -1.1322760581970215, | |
| "logits/rejected": -1.2811723947525024, | |
| "logps/chosen": -2.012166738510132, | |
| "logps/rejected": -2.9437954425811768, | |
| "loss": 24.6766, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.11020763218402863, | |
| "rewards/margins": 0.04710468277335167, | |
| "rewards/rejected": -0.1573123037815094, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.1022816679779701, | |
| "grad_norm": 70.12146759033203, | |
| "learning_rate": 1.9375e-05, | |
| "logits/chosen": -1.121302604675293, | |
| "logits/rejected": -1.4168516397476196, | |
| "logps/chosen": -2.2604711055755615, | |
| "logps/rejected": -2.7976536750793457, | |
| "loss": 22.5659, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.12430934607982635, | |
| "rewards/margins": 0.029838770627975464, | |
| "rewards/rejected": -0.154148131608963, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10621557828481511, | |
| "grad_norm": 67.6087875366211, | |
| "learning_rate": 1.999996222738818e-05, | |
| "logits/chosen": -1.164771318435669, | |
| "logits/rejected": -1.427054762840271, | |
| "logps/chosen": -2.148642063140869, | |
| "logps/rejected": -2.688873291015625, | |
| "loss": 22.3161, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.1076754778623581, | |
| "rewards/margins": 0.03560269996523857, | |
| "rewards/rejected": -0.14327818155288696, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.11014948859166011, | |
| "grad_norm": 619.0317993164062, | |
| "learning_rate": 1.999864021593644e-05, | |
| "logits/chosen": -1.1507548093795776, | |
| "logits/rejected": -1.3616728782653809, | |
| "logps/chosen": -2.8358840942382812, | |
| "logps/rejected": -4.126664161682129, | |
| "loss": 25.3403, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.16398155689239502, | |
| "rewards/margins": 0.07606328278779984, | |
| "rewards/rejected": -0.24004480242729187, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.11408339889850512, | |
| "grad_norm": 148.94737243652344, | |
| "learning_rate": 1.9995429859238083e-05, | |
| "logits/chosen": -1.1917606592178345, | |
| "logits/rejected": -1.3496078252792358, | |
| "logps/chosen": -2.2078733444213867, | |
| "logps/rejected": -3.3514275550842285, | |
| "loss": 20.3823, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.14150744676589966, | |
| "rewards/margins": 0.08197928965091705, | |
| "rewards/rejected": -0.2234867364168167, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.11801730920535011, | |
| "grad_norm": 1704.982421875, | |
| "learning_rate": 1.999033176360174e-05, | |
| "logits/chosen": -1.2818559408187866, | |
| "logits/rejected": -1.3965641260147095, | |
| "logps/chosen": -4.222221374511719, | |
| "logps/rejected": -5.802225589752197, | |
| "loss": 37.61, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.29492706060409546, | |
| "rewards/margins": 0.1222861185669899, | |
| "rewards/rejected": -0.4172131419181824, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12195121951219512, | |
| "grad_norm": 269.0439453125, | |
| "learning_rate": 1.9983346891854798e-05, | |
| "logits/chosen": -1.515080213546753, | |
| "logits/rejected": -1.478830337524414, | |
| "logps/chosen": -3.518225908279419, | |
| "logps/rejected": -4.065728187561035, | |
| "loss": 31.8289, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.23775264620780945, | |
| "rewards/margins": 0.047318849712610245, | |
| "rewards/rejected": -0.285071462392807, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.12588512981904013, | |
| "grad_norm": 169.8654022216797, | |
| "learning_rate": 1.9974476563161572e-05, | |
| "logits/chosen": -1.4493688344955444, | |
| "logits/rejected": -1.6804521083831787, | |
| "logps/chosen": -2.9031851291656494, | |
| "logps/rejected": -3.8406753540039062, | |
| "loss": 21.2523, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.18268796801567078, | |
| "rewards/margins": 0.06636744737625122, | |
| "rewards/rejected": -0.2490553855895996, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.12981904012588513, | |
| "grad_norm": 102.75287628173828, | |
| "learning_rate": 1.996372245277416e-05, | |
| "logits/chosen": -1.4163097143173218, | |
| "logits/rejected": -1.546154260635376, | |
| "logps/chosen": -2.2338547706604004, | |
| "logps/rejected": -2.8918473720550537, | |
| "loss": 22.9048, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.13199904561042786, | |
| "rewards/margins": 0.04669683054089546, | |
| "rewards/rejected": -0.17869587242603302, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.13375295043273014, | |
| "grad_norm": 175.13946533203125, | |
| "learning_rate": 1.995108659171607e-05, | |
| "logits/chosen": -1.425185203552246, | |
| "logits/rejected": -1.6292508840560913, | |
| "logps/chosen": -2.4529311656951904, | |
| "logps/rejected": -3.5743954181671143, | |
| "loss": 21.3254, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.14629025757312775, | |
| "rewards/margins": 0.07296375930309296, | |
| "rewards/rejected": -0.2192540168762207, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.13768686073957515, | |
| "grad_norm": 117.64968872070312, | |
| "learning_rate": 1.9936571366398605e-05, | |
| "logits/chosen": -1.3784323930740356, | |
| "logits/rejected": -1.521807074546814, | |
| "logps/chosen": -2.3742852210998535, | |
| "logps/rejected": -3.2318038940429688, | |
| "loss": 21.5947, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.15116852521896362, | |
| "rewards/margins": 0.06355693191289902, | |
| "rewards/rejected": -0.21472544968128204, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.14162077104642015, | |
| "grad_norm": 139.72425842285156, | |
| "learning_rate": 1.9920179518170212e-05, | |
| "logits/chosen": -1.2019041776657104, | |
| "logits/rejected": -1.3150001764297485, | |
| "logps/chosen": -3.229523181915283, | |
| "logps/rejected": -3.6520283222198486, | |
| "loss": 26.1244, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.20531490445137024, | |
| "rewards/margins": 0.03322164714336395, | |
| "rewards/rejected": -0.2385365217924118, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.14555468135326516, | |
| "grad_norm": 190.1792755126953, | |
| "learning_rate": 1.9901914142798715e-05, | |
| "logits/chosen": -1.389827013015747, | |
| "logits/rejected": -1.6211198568344116, | |
| "logps/chosen": -3.402869462966919, | |
| "logps/rejected": -4.286434173583984, | |
| "loss": 22.2432, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.20572206377983093, | |
| "rewards/margins": 0.06867971271276474, | |
| "rewards/rejected": -0.27440178394317627, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.14948859166011014, | |
| "grad_norm": 151.8868865966797, | |
| "learning_rate": 1.9881778689886658e-05, | |
| "logits/chosen": -1.4556262493133545, | |
| "logits/rejected": -1.6839582920074463, | |
| "logps/chosen": -2.455641269683838, | |
| "logps/rejected": -3.4040608406066895, | |
| "loss": 20.236, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.16571606695652008, | |
| "rewards/margins": 0.07361607253551483, | |
| "rewards/rejected": -0.23933212459087372, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.15342250196695514, | |
| "grad_norm": 224.09608459472656, | |
| "learning_rate": 1.9859776962219826e-05, | |
| "logits/chosen": -1.5867639780044556, | |
| "logits/rejected": -1.6572364568710327, | |
| "logps/chosen": -3.317305088043213, | |
| "logps/rejected": -5.196422100067139, | |
| "loss": 21.2057, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.23907625675201416, | |
| "rewards/margins": 0.13949504494667053, | |
| "rewards/rejected": -0.3785712718963623, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.15735641227380015, | |
| "grad_norm": 130.1480712890625, | |
| "learning_rate": 1.9835913115049022e-05, | |
| "logits/chosen": -1.5566515922546387, | |
| "logits/rejected": -1.6651328802108765, | |
| "logps/chosen": -3.3987045288085938, | |
| "logps/rejected": -4.329782009124756, | |
| "loss": 22.9124, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.25078803300857544, | |
| "rewards/margins": 0.07485075294971466, | |
| "rewards/rejected": -0.3256387710571289, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15735641227380015, | |
| "eval_logits/chosen": -2.0355148315429688, | |
| "eval_logits/rejected": -2.2883784770965576, | |
| "eval_logps/chosen": -3.306915283203125, | |
| "eval_logps/rejected": -4.350674629211426, | |
| "eval_loss": 22.99012565612793, | |
| "eval_rewards/accuracies": 0.6546875238418579, | |
| "eval_rewards/chosen": -0.22817137837409973, | |
| "eval_rewards/margins": 0.08234894275665283, | |
| "eval_rewards/rejected": -0.31052032113075256, | |
| "eval_runtime": 247.2935, | |
| "eval_samples_per_second": 2.588, | |
| "eval_steps_per_second": 0.162, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 133.68333435058594, | |
| "learning_rate": 1.9810191655305326e-05, | |
| "logits/chosen": -1.5825779438018799, | |
| "logits/rejected": -1.7512319087982178, | |
| "logps/chosen": -2.8017444610595703, | |
| "logps/rejected": -4.179649353027344, | |
| "loss": 23.0091, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.19492463767528534, | |
| "rewards/margins": 0.08605017513036728, | |
| "rewards/rejected": -0.280974805355072, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.16522423288749016, | |
| "grad_norm": 118.48152160644531, | |
| "learning_rate": 1.9782617440748918e-05, | |
| "logits/chosen": -1.689234972000122, | |
| "logits/rejected": -1.8324044942855835, | |
| "logps/chosen": -2.277280330657959, | |
| "logps/rejected": -2.580872058868408, | |
| "loss": 23.8451, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.14595453441143036, | |
| "rewards/margins": 0.022251242771744728, | |
| "rewards/rejected": -0.16820578277111053, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.16915814319433517, | |
| "grad_norm": 140.0244903564453, | |
| "learning_rate": 1.975319567905163e-05, | |
| "logits/chosen": -1.753382921218872, | |
| "logits/rejected": -1.891911506652832, | |
| "logps/chosen": -2.793912410736084, | |
| "logps/rejected": -3.64801287651062, | |
| "loss": 21.8372, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.15913638472557068, | |
| "rewards/margins": 0.0484389066696167, | |
| "rewards/rejected": -0.20757532119750977, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.17309205350118018, | |
| "grad_norm": 185.76527404785156, | |
| "learning_rate": 1.9721931926813415e-05, | |
| "logits/chosen": -1.8908586502075195, | |
| "logits/rejected": -1.9289289712905884, | |
| "logps/chosen": -3.3189690113067627, | |
| "logps/rejected": -3.903099775314331, | |
| "loss": 22.522, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.20980055630207062, | |
| "rewards/margins": 0.03744065761566162, | |
| "rewards/rejected": -0.24724121391773224, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.17702596380802518, | |
| "grad_norm": 134.59030151367188, | |
| "learning_rate": 1.9688832088512955e-05, | |
| "logits/chosen": -1.73268723487854, | |
| "logits/rejected": -1.8541278839111328, | |
| "logps/chosen": -3.7829718589782715, | |
| "logps/rejected": -4.3722686767578125, | |
| "loss": 25.3367, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.28310832381248474, | |
| "rewards/margins": 0.029837841168045998, | |
| "rewards/rejected": -0.3129461705684662, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1809598741148702, | |
| "grad_norm": 126.69501495361328, | |
| "learning_rate": 1.9653902415392517e-05, | |
| "logits/chosen": -1.8219941854476929, | |
| "logits/rejected": -1.9303795099258423, | |
| "logps/chosen": -3.337585926055908, | |
| "logps/rejected": -4.7950544357299805, | |
| "loss": 19.7298, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.25193560123443604, | |
| "rewards/margins": 0.09537702798843384, | |
| "rewards/rejected": -0.3473126292228699, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1848937844217152, | |
| "grad_norm": 157.1126708984375, | |
| "learning_rate": 1.961714950427734e-05, | |
| "logits/chosen": -1.7077398300170898, | |
| "logits/rejected": -1.8231598138809204, | |
| "logps/chosen": -3.6711058616638184, | |
| "logps/rejected": -5.125331401824951, | |
| "loss": 21.3529, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.2676393389701843, | |
| "rewards/margins": 0.08539289236068726, | |
| "rewards/rejected": -0.35303226113319397, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1888276947285602, | |
| "grad_norm": 229.6774444580078, | |
| "learning_rate": 1.957858029632978e-05, | |
| "logits/chosen": -1.6331892013549805, | |
| "logits/rejected": -1.7666242122650146, | |
| "logps/chosen": -4.580256462097168, | |
| "logps/rejected": -5.432528972625732, | |
| "loss": 21.1146, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.32507577538490295, | |
| "rewards/margins": 0.0653512254357338, | |
| "rewards/rejected": -0.39042696356773376, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.19276160503540518, | |
| "grad_norm": 192.6251220703125, | |
| "learning_rate": 1.9538202075738373e-05, | |
| "logits/chosen": -1.5171202421188354, | |
| "logits/rejected": -1.6242902278900146, | |
| "logps/chosen": -3.6769096851348877, | |
| "logps/rejected": -4.305304527282715, | |
| "loss": 22.4899, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.2733447551727295, | |
| "rewards/margins": 0.047822751104831696, | |
| "rewards/rejected": -0.321167528629303, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.1966955153422502, | |
| "grad_norm": 154.02066040039062, | |
| "learning_rate": 1.9496022468342163e-05, | |
| "logits/chosen": -1.4627959728240967, | |
| "logits/rejected": -1.5201256275177002, | |
| "logps/chosen": -3.598710536956787, | |
| "logps/rejected": -4.086635112762451, | |
| "loss": 23.9305, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.2709196209907532, | |
| "rewards/margins": 0.03638899326324463, | |
| "rewards/rejected": -0.3073085844516754, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2006294256490952, | |
| "grad_norm": 103.46431732177734, | |
| "learning_rate": 1.9452049440190473e-05, | |
| "logits/chosen": -1.3153817653656006, | |
| "logits/rejected": -1.4681147336959839, | |
| "logps/chosen": -3.1210968494415283, | |
| "logps/rejected": -3.9421095848083496, | |
| "loss": 22.1329, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.2205667495727539, | |
| "rewards/margins": 0.06444404274225235, | |
| "rewards/rejected": -0.28501078486442566, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2045633359559402, | |
| "grad_norm": 187.50987243652344, | |
| "learning_rate": 1.940629129603844e-05, | |
| "logits/chosen": -1.442635416984558, | |
| "logits/rejected": -1.5263605117797852, | |
| "logps/chosen": -3.5598156452178955, | |
| "logps/rejected": -3.8421435356140137, | |
| "loss": 26.5362, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.23259183764457703, | |
| "rewards/margins": 0.018016267567873, | |
| "rewards/rejected": -0.2506081163883209, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2084972462627852, | |
| "grad_norm": 228.58592224121094, | |
| "learning_rate": 1.9358756677778578e-05, | |
| "logits/chosen": -1.4751585721969604, | |
| "logits/rejected": -1.6874910593032837, | |
| "logps/chosen": -2.8757126331329346, | |
| "logps/rejected": -3.79717755317688, | |
| "loss": 20.8412, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.19514627754688263, | |
| "rewards/margins": 0.06573396921157837, | |
| "rewards/rejected": -0.2608802616596222, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.21243115656963021, | |
| "grad_norm": 177.6842803955078, | |
| "learning_rate": 1.9309454562808656e-05, | |
| "logits/chosen": -1.4525483846664429, | |
| "logits/rejected": -1.6046775579452515, | |
| "logps/chosen": -2.6963038444519043, | |
| "logps/rejected": -3.8056578636169434, | |
| "loss": 21.8278, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.18190334737300873, | |
| "rewards/margins": 0.08625774085521698, | |
| "rewards/rejected": -0.2681610882282257, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.21636506687647522, | |
| "grad_norm": 133.5586395263672, | |
| "learning_rate": 1.925839426233625e-05, | |
| "logits/chosen": -1.4591630697250366, | |
| "logits/rejected": -1.4536678791046143, | |
| "logps/chosen": -2.8722312450408936, | |
| "logps/rejected": -3.7189888954162598, | |
| "loss": 21.1936, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.1960582137107849, | |
| "rewards/margins": 0.06288814544677734, | |
| "rewards/rejected": -0.25894635915756226, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.22029897718332023, | |
| "grad_norm": 151.77810668945312, | |
| "learning_rate": 1.9205585419620197e-05, | |
| "logits/chosen": -1.39272141456604, | |
| "logits/rejected": -1.4622434377670288, | |
| "logps/chosen": -2.742765188217163, | |
| "logps/rejected": -3.9621074199676514, | |
| "loss": 22.4231, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.18704722821712494, | |
| "rewards/margins": 0.08700847625732422, | |
| "rewards/rejected": -0.27405571937561035, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.22423288749016523, | |
| "grad_norm": 136.83651733398438, | |
| "learning_rate": 1.9151038008149393e-05, | |
| "logits/chosen": -1.4424396753311157, | |
| "logits/rejected": -1.517502784729004, | |
| "logps/chosen": -3.267077922821045, | |
| "logps/rejected": -4.230588912963867, | |
| "loss": 24.1373, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.20152482390403748, | |
| "rewards/margins": 0.06966569274663925, | |
| "rewards/rejected": -0.2711905241012573, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.22816679779701024, | |
| "grad_norm": 130.2345428466797, | |
| "learning_rate": 1.909476232975918e-05, | |
| "logits/chosen": -1.2587270736694336, | |
| "logits/rejected": -1.3539146184921265, | |
| "logps/chosen": -2.6924257278442383, | |
| "logps/rejected": -3.551044464111328, | |
| "loss": 22.9406, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.1798776388168335, | |
| "rewards/margins": 0.043981801718473434, | |
| "rewards/rejected": -0.22385945916175842, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.23210070810385522, | |
| "grad_norm": 103.1630630493164, | |
| "learning_rate": 1.903676901268575e-05, | |
| "logits/chosen": -1.5180273056030273, | |
| "logits/rejected": -1.5656169652938843, | |
| "logps/chosen": -2.8886916637420654, | |
| "logps/rejected": -3.570155620574951, | |
| "loss": 23.701, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.19855554401874542, | |
| "rewards/margins": 0.042535360902547836, | |
| "rewards/rejected": -0.24109089374542236, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.23603461841070023, | |
| "grad_norm": 160.47129821777344, | |
| "learning_rate": 1.8977069009558882e-05, | |
| "logits/chosen": -1.3101279735565186, | |
| "logits/rejected": -1.4326939582824707, | |
| "logps/chosen": -3.139376163482666, | |
| "logps/rejected": -3.806004285812378, | |
| "loss": 24.4116, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.22226376831531525, | |
| "rewards/margins": 0.04967343434691429, | |
| "rewards/rejected": -0.27193719148635864, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23603461841070023, | |
| "eval_logits/chosen": -2.216668128967285, | |
| "eval_logits/rejected": -2.479491710662842, | |
| "eval_logps/chosen": -3.180922508239746, | |
| "eval_logps/rejected": -4.010288238525391, | |
| "eval_loss": 23.414220809936523, | |
| "eval_rewards/accuracies": 0.6468750238418579, | |
| "eval_rewards/chosen": -0.21557214856147766, | |
| "eval_rewards/margins": 0.06090952828526497, | |
| "eval_rewards/rejected": -0.2764816880226135, | |
| "eval_runtime": 247.1728, | |
| "eval_samples_per_second": 2.589, | |
| "eval_steps_per_second": 0.162, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23996852871754523, | |
| "grad_norm": 281.60491943359375, | |
| "learning_rate": 1.8915673595333443e-05, | |
| "logits/chosen": -1.5050787925720215, | |
| "logits/rejected": -1.6323124170303345, | |
| "logps/chosen": -3.338160276412964, | |
| "logps/rejected": -4.359830379486084, | |
| "loss": 22.5138, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.2318885773420334, | |
| "rewards/margins": 0.06987401843070984, | |
| "rewards/rejected": -0.3017626106739044, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.24390243902439024, | |
| "grad_norm": 260.46575927734375, | |
| "learning_rate": 1.8852594365159986e-05, | |
| "logits/chosen": -1.451187252998352, | |
| "logits/rejected": -1.4611809253692627, | |
| "logps/chosen": -3.6160502433776855, | |
| "logps/rejected": -4.279057025909424, | |
| "loss": 24.5229, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.2361462116241455, | |
| "rewards/margins": 0.034786950796842575, | |
| "rewards/rejected": -0.27093321084976196, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.24783634933123525, | |
| "grad_norm": 74.10368347167969, | |
| "learning_rate": 1.8787843232194886e-05, | |
| "logits/chosen": -1.482669472694397, | |
| "logits/rejected": -1.583233118057251, | |
| "logps/chosen": -3.6931068897247314, | |
| "logps/rejected": -4.4176764488220215, | |
| "loss": 21.9835, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.2447434663772583, | |
| "rewards/margins": 0.047359541058540344, | |
| "rewards/rejected": -0.29210299253463745, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.25177025963808025, | |
| "grad_norm": 128.80845642089844, | |
| "learning_rate": 1.8721432425350426e-05, | |
| "logits/chosen": -1.5568349361419678, | |
| "logits/rejected": -1.525159478187561, | |
| "logps/chosen": -3.331374406814575, | |
| "logps/rejected": -4.187075138092041, | |
| "loss": 23.4193, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.24244609475135803, | |
| "rewards/margins": 0.057249516248703, | |
| "rewards/rejected": -0.29969558119773865, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.25570416994492523, | |
| "grad_norm": 1299.7669677734375, | |
| "learning_rate": 1.865337448698524e-05, | |
| "logits/chosen": -1.580106496810913, | |
| "logits/rejected": -1.6747982501983643, | |
| "logps/chosen": -3.402766466140747, | |
| "logps/rejected": -4.652956485748291, | |
| "loss": 22.8937, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2514630854129791, | |
| "rewards/margins": 0.08678792417049408, | |
| "rewards/rejected": -0.3382510244846344, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.25963808025177026, | |
| "grad_norm": 218.9337615966797, | |
| "learning_rate": 1.8583682270535558e-05, | |
| "logits/chosen": -1.5960299968719482, | |
| "logits/rejected": -1.6804955005645752, | |
| "logps/chosen": -3.6657605171203613, | |
| "logps/rejected": -4.17224645614624, | |
| "loss": 21.3752, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.25297340750694275, | |
| "rewards/margins": 0.05916937440633774, | |
| "rewards/rejected": -0.3121427893638611, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.26357199055861524, | |
| "grad_norm": 140.0518798828125, | |
| "learning_rate": 1.8512368938087703e-05, | |
| "logits/chosen": -1.6643073558807373, | |
| "logits/rejected": -1.7697950601577759, | |
| "logps/chosen": -4.020524501800537, | |
| "logps/rejected": -4.902769565582275, | |
| "loss": 21.1926, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3161371350288391, | |
| "rewards/margins": 0.0684891939163208, | |
| "rewards/rejected": -0.3846263289451599, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.2675059008654603, | |
| "grad_norm": 121.75955963134766, | |
| "learning_rate": 1.8439447957892308e-05, | |
| "logits/chosen": -1.6311817169189453, | |
| "logits/rejected": -1.6869274377822876, | |
| "logps/chosen": -4.317450523376465, | |
| "logps/rejected": -5.130967140197754, | |
| "loss": 22.8217, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.33960452675819397, | |
| "rewards/margins": 0.06364957988262177, | |
| "rewards/rejected": -0.40325412154197693, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.27143981117230526, | |
| "grad_norm": 117.1114730834961, | |
| "learning_rate": 1.8364933101820673e-05, | |
| "logits/chosen": -1.6428329944610596, | |
| "logits/rejected": -1.6879663467407227, | |
| "logps/chosen": -3.7996773719787598, | |
| "logps/rejected": -5.0632219314575195, | |
| "loss": 22.1705, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.293424516916275, | |
| "rewards/margins": 0.07091490924358368, | |
| "rewards/rejected": -0.3643394112586975, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.2753737214791503, | |
| "grad_norm": 298.380859375, | |
| "learning_rate": 1.8288838442763838e-05, | |
| "logits/chosen": -1.6302757263183594, | |
| "logits/rejected": -1.665606141090393, | |
| "logps/chosen": -4.570167541503906, | |
| "logps/rejected": -5.593611717224121, | |
| "loss": 21.4761, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3547164797782898, | |
| "rewards/margins": 0.08478643000125885, | |
| "rewards/rejected": -0.43950289487838745, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.27930763178599527, | |
| "grad_norm": 255.9145050048828, | |
| "learning_rate": 1.8211178351974762e-05, | |
| "logits/chosen": -1.6789066791534424, | |
| "logits/rejected": -1.717881202697754, | |
| "logps/chosen": -4.821520805358887, | |
| "logps/rejected": -6.301035404205322, | |
| "loss": 20.7457, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.38325121998786926, | |
| "rewards/margins": 0.12138603627681732, | |
| "rewards/rejected": -0.5046372413635254, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.2832415420928403, | |
| "grad_norm": 82.21104431152344, | |
| "learning_rate": 1.813196749635415e-05, | |
| "logits/chosen": -1.5261609554290771, | |
| "logits/rejected": -1.6307690143585205, | |
| "logps/chosen": -4.540532112121582, | |
| "logps/rejected": -5.993468761444092, | |
| "loss": 18.2253, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.3627888858318329, | |
| "rewards/margins": 0.13603493571281433, | |
| "rewards/rejected": -0.4988238215446472, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2871754523996853, | |
| "grad_norm": 254.77951049804688, | |
| "learning_rate": 1.8051220835680475e-05, | |
| "logits/chosen": -1.5561740398406982, | |
| "logits/rejected": -1.6325286626815796, | |
| "logps/chosen": -4.805132865905762, | |
| "logps/rejected": -5.560373783111572, | |
| "loss": 27.0414, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.3958406448364258, | |
| "rewards/margins": 0.03839923068881035, | |
| "rewards/rejected": -0.43423986434936523, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2911093627065303, | |
| "grad_norm": 261.9372253417969, | |
| "learning_rate": 1.7968953619784675e-05, | |
| "logits/chosen": -1.6089742183685303, | |
| "logits/rejected": -1.6071525812149048, | |
| "logps/chosen": -5.670124053955078, | |
| "logps/rejected": -5.927383899688721, | |
| "loss": 26.7228, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.45441731810569763, | |
| "rewards/margins": 0.028978880494832993, | |
| "rewards/rejected": -0.4833962321281433, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2950432730133753, | |
| "grad_norm": 192.900146484375, | |
| "learning_rate": 1.788518138567006e-05, | |
| "logits/chosen": -1.549302101135254, | |
| "logits/rejected": -1.6313527822494507, | |
| "logps/chosen": -4.84414529800415, | |
| "logps/rejected": -5.667928218841553, | |
| "loss": 22.6824, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.3907596170902252, | |
| "rewards/margins": 0.04788483679294586, | |
| "rewards/rejected": -0.4386444687843323, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2989771833202203, | |
| "grad_norm": 123.12667083740234, | |
| "learning_rate": 1.7799919954577977e-05, | |
| "logits/chosen": -1.4504163265228271, | |
| "logits/rejected": -1.5128448009490967, | |
| "logps/chosen": -4.6250810623168945, | |
| "logps/rejected": -5.694144248962402, | |
| "loss": 21.5658, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.3649711012840271, | |
| "rewards/margins": 0.0703701302409172, | |
| "rewards/rejected": -0.4353412091732025, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3029110936270653, | |
| "grad_norm": 111.5103988647461, | |
| "learning_rate": 1.7713185428999834e-05, | |
| "logits/chosen": -1.5528513193130493, | |
| "logits/rejected": -1.5752861499786377, | |
| "logps/chosen": -4.1281046867370605, | |
| "logps/rejected": -5.281017780303955, | |
| "loss": 21.9272, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3216732442378998, | |
| "rewards/margins": 0.07142569869756699, | |
| "rewards/rejected": -0.39309895038604736, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3068450039339103, | |
| "grad_norm": 133.6621856689453, | |
| "learning_rate": 1.762499418963596e-05, | |
| "logits/chosen": -1.4560383558273315, | |
| "logits/rejected": -1.5339572429656982, | |
| "logps/chosen": -3.8447978496551514, | |
| "logps/rejected": -4.268062114715576, | |
| "loss": 23.9149, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.2921312749385834, | |
| "rewards/margins": 0.03141012042760849, | |
| "rewards/rejected": -0.32354140281677246, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3107789142407553, | |
| "grad_norm": 214.52981567382812, | |
| "learning_rate": 1.7535362892301953e-05, | |
| "logits/chosen": -1.4598350524902344, | |
| "logits/rejected": -1.5942457914352417, | |
| "logps/chosen": -3.9587693214416504, | |
| "logps/rejected": -4.580626487731934, | |
| "loss": 23.6596, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.2964509427547455, | |
| "rewards/margins": 0.04400955140590668, | |
| "rewards/rejected": -0.34046050906181335, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3147128245476003, | |
| "grad_norm": 114.73934173583984, | |
| "learning_rate": 1.744430846478306e-05, | |
| "logits/chosen": -1.3783127069473267, | |
| "logits/rejected": -1.354361653327942, | |
| "logps/chosen": -4.398768424987793, | |
| "logps/rejected": -5.0471062660217285, | |
| "loss": 21.7893, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.30568915605545044, | |
| "rewards/margins": 0.05113334208726883, | |
| "rewards/rejected": -0.35682249069213867, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3147128245476003, | |
| "eval_logits/chosen": -2.1845593452453613, | |
| "eval_logits/rejected": -2.3796591758728027, | |
| "eval_logps/chosen": -3.8176822662353516, | |
| "eval_logps/rejected": -4.645543575286865, | |
| "eval_loss": 22.590557098388672, | |
| "eval_rewards/accuracies": 0.6656249761581421, | |
| "eval_rewards/chosen": -0.27924811840057373, | |
| "eval_rewards/margins": 0.06075913459062576, | |
| "eval_rewards/rejected": -0.3400072157382965, | |
| "eval_runtime": 247.317, | |
| "eval_samples_per_second": 2.588, | |
| "eval_steps_per_second": 0.162, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.31864673485444533, | |
| "grad_norm": 303.71954345703125, | |
| "learning_rate": 1.7351848103637165e-05, | |
| "logits/chosen": -1.4067426919937134, | |
| "logits/rejected": -1.433226227760315, | |
| "logps/chosen": -4.79337215423584, | |
| "logps/rejected": -4.957041263580322, | |
| "loss": 28.1561, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.3237573802471161, | |
| "rewards/margins": 0.016921238973736763, | |
| "rewards/rejected": -0.3406786024570465, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 166.90481567382812, | |
| "learning_rate": 1.7257999270947094e-05, | |
| "logits/chosen": -1.4020185470581055, | |
| "logits/rejected": -1.5051838159561157, | |
| "logps/chosen": -4.32220458984375, | |
| "logps/rejected": -5.045875549316406, | |
| "loss": 23.7082, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.31713593006134033, | |
| "rewards/margins": 0.057282157242298126, | |
| "rewards/rejected": -0.37441807985305786, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.32651455546813535, | |
| "grad_norm": 98.86615753173828, | |
| "learning_rate": 1.7162779691022672e-05, | |
| "logits/chosen": -1.537492036819458, | |
| "logits/rejected": -1.6176296472549438, | |
| "logps/chosen": -4.154641151428223, | |
| "logps/rejected": -4.576406478881836, | |
| "loss": 25.1505, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.3061120808124542, | |
| "rewards/margins": 0.028859728947281837, | |
| "rewards/rejected": -0.3349718153476715, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.3304484657749803, | |
| "grad_norm": 242.99798583984375, | |
| "learning_rate": 1.7066207347053344e-05, | |
| "logits/chosen": -1.4779250621795654, | |
| "logits/rejected": -1.6732213497161865, | |
| "logps/chosen": -3.8076605796813965, | |
| "logps/rejected": -4.531802177429199, | |
| "loss": 21.9169, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.27425679564476013, | |
| "rewards/margins": 0.05525635555386543, | |
| "rewards/rejected": -0.3295131325721741, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.33438237608182536, | |
| "grad_norm": 152.2194366455078, | |
| "learning_rate": 1.6968300477711842e-05, | |
| "logits/chosen": -1.5241143703460693, | |
| "logits/rejected": -1.555153250694275, | |
| "logps/chosen": -4.127291202545166, | |
| "logps/rejected": -4.750130653381348, | |
| "loss": 22.7341, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.30315932631492615, | |
| "rewards/margins": 0.04498932510614395, | |
| "rewards/rejected": -0.3481486737728119, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.33831628638867034, | |
| "grad_norm": 165.51031494140625, | |
| "learning_rate": 1.686907757370964e-05, | |
| "logits/chosen": -1.5997859239578247, | |
| "logits/rejected": -1.6548550128936768, | |
| "logps/chosen": -4.201173782348633, | |
| "logps/rejected": -5.533048629760742, | |
| "loss": 21.7724, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3329020142555237, | |
| "rewards/margins": 0.06577399373054504, | |
| "rewards/rejected": -0.39867597818374634, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3422501966955153, | |
| "grad_norm": 184.28565979003906, | |
| "learning_rate": 1.6768557374304784e-05, | |
| "logits/chosen": -1.550006628036499, | |
| "logits/rejected": -1.5935580730438232, | |
| "logps/chosen": -4.805244445800781, | |
| "logps/rejected": -5.302831172943115, | |
| "loss": 22.3908, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.3597099184989929, | |
| "rewards/margins": 0.04813358187675476, | |
| "rewards/rejected": -0.4078435003757477, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.34618410700236035, | |
| "grad_norm": 249.4634552001953, | |
| "learning_rate": 1.6666758863762796e-05, | |
| "logits/chosen": -1.5116612911224365, | |
| "logits/rejected": -1.6511796712875366, | |
| "logps/chosen": -5.2712297439575195, | |
| "logps/rejected": -6.735253810882568, | |
| "loss": 24.446, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3934195339679718, | |
| "rewards/margins": 0.05257143825292587, | |
| "rewards/rejected": -0.44599097967147827, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.35011801730920533, | |
| "grad_norm": 121.32429504394531, | |
| "learning_rate": 1.65637012677713e-05, | |
| "logits/chosen": -1.522199273109436, | |
| "logits/rejected": -1.5548865795135498, | |
| "logps/chosen": -5.2593817710876465, | |
| "logps/rejected": -6.0441389083862305, | |
| "loss": 23.4002, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.42989540100097656, | |
| "rewards/margins": 0.04936647415161133, | |
| "rewards/rejected": -0.4792618751525879, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.35405192761605037, | |
| "grad_norm": 130.83099365234375, | |
| "learning_rate": 1.6459404049809072e-05, | |
| "logits/chosen": -1.4077280759811401, | |
| "logits/rejected": -1.5028517246246338, | |
| "logps/chosen": -5.468560218811035, | |
| "logps/rejected": -6.542233467102051, | |
| "loss": 23.6891, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.43146586418151855, | |
| "rewards/margins": 0.06058318167924881, | |
| "rewards/rejected": -0.49204903841018677, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.35798583792289534, | |
| "grad_norm": 248.70077514648438, | |
| "learning_rate": 1.6353886907470125e-05, | |
| "logits/chosen": -1.3115510940551758, | |
| "logits/rejected": -1.51992928981781, | |
| "logps/chosen": -4.2276201248168945, | |
| "logps/rejected": -5.167584419250488, | |
| "loss": 23.5641, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3317582607269287, | |
| "rewards/margins": 0.07050777971744537, | |
| "rewards/rejected": -0.4022659659385681, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.3619197482297404, | |
| "grad_norm": 85.6197280883789, | |
| "learning_rate": 1.6247169768743658e-05, | |
| "logits/chosen": -1.3957135677337646, | |
| "logits/rejected": -1.4850984811782837, | |
| "logps/chosen": -4.224881172180176, | |
| "logps/rejected": -4.811681270599365, | |
| "loss": 21.6945, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3067195415496826, | |
| "rewards/margins": 0.06704317033290863, | |
| "rewards/rejected": -0.37376269698143005, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.36585365853658536, | |
| "grad_norm": 176.3291473388672, | |
| "learning_rate": 1.613927278825043e-05, | |
| "logits/chosen": -1.434696912765503, | |
| "logits/rejected": -1.4473021030426025, | |
| "logps/chosen": -3.8976681232452393, | |
| "logps/rejected": -4.508739948272705, | |
| "loss": 24.959, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.29903483390808105, | |
| "rewards/margins": 0.042590878903865814, | |
| "rewards/rejected": -0.3416256904602051, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.3697875688434304, | |
| "grad_norm": 112.67879486083984, | |
| "learning_rate": 1.6030216343436354e-05, | |
| "logits/chosen": -1.4505010843276978, | |
| "logits/rejected": -1.6456613540649414, | |
| "logps/chosen": -3.5870590209960938, | |
| "logps/rejected": -4.018393039703369, | |
| "loss": 24.7101, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.2629242539405823, | |
| "rewards/margins": 0.03190271183848381, | |
| "rewards/rejected": -0.2948269546031952, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.37372147915027537, | |
| "grad_norm": 134.83592224121094, | |
| "learning_rate": 1.5920021030724017e-05, | |
| "logits/chosen": -1.5103158950805664, | |
| "logits/rejected": -1.604021430015564, | |
| "logps/chosen": -3.3592028617858887, | |
| "logps/rejected": -4.374294281005859, | |
| "loss": 21.0119, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.22468845546245575, | |
| "rewards/margins": 0.05931607633829117, | |
| "rewards/rejected": -0.2840045094490051, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.3776553894571204, | |
| "grad_norm": 95.25556945800781, | |
| "learning_rate": 1.5808707661622835e-05, | |
| "logits/chosen": -1.6079397201538086, | |
| "logits/rejected": -1.5737743377685547, | |
| "logps/chosen": -3.741440534591675, | |
| "logps/rejected": -4.678065299987793, | |
| "loss": 20.2573, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.2804642617702484, | |
| "rewards/margins": 0.069583460688591, | |
| "rewards/rejected": -0.3500477373600006, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.3815892997639654, | |
| "grad_norm": 104.40715789794922, | |
| "learning_rate": 1.5696297258798573e-05, | |
| "logits/chosen": -1.583701491355896, | |
| "logits/rejected": -1.562860131263733, | |
| "logps/chosen": -4.199146270751953, | |
| "logps/rejected": -4.646213054656982, | |
| "loss": 24.1568, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.3161547780036926, | |
| "rewards/margins": 0.03264855220913887, | |
| "rewards/rejected": -0.3488033413887024, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.38552321007081036, | |
| "grad_norm": 184.5244903564453, | |
| "learning_rate": 1.558281105210302e-05, | |
| "logits/chosen": -1.5556423664093018, | |
| "logits/rejected": -1.6839488744735718, | |
| "logps/chosen": -4.8659186363220215, | |
| "logps/rejected": -6.025291442871094, | |
| "loss": 21.4117, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3903878629207611, | |
| "rewards/margins": 0.07987485826015472, | |
| "rewards/rejected": -0.47026270627975464, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3894571203776554, | |
| "grad_norm": 130.12147521972656, | |
| "learning_rate": 1.5468270474564503e-05, | |
| "logits/chosen": -1.4907524585723877, | |
| "logits/rejected": -1.594044804573059, | |
| "logps/chosen": -4.60575008392334, | |
| "logps/rejected": -5.835744857788086, | |
| "loss": 20.1121, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3732062578201294, | |
| "rewards/margins": 0.09308288991451263, | |
| "rewards/rejected": -0.4662891924381256, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.3933910306845004, | |
| "grad_norm": 154.03810119628906, | |
| "learning_rate": 1.535269715834004e-05, | |
| "logits/chosen": -1.5770604610443115, | |
| "logits/rejected": -1.6915347576141357, | |
| "logps/chosen": -4.623286247253418, | |
| "logps/rejected": -5.872547149658203, | |
| "loss": 20.841, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.3804221749305725, | |
| "rewards/margins": 0.0961974710226059, | |
| "rewards/rejected": -0.4766196310520172, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3933910306845004, | |
| "eval_logits/chosen": -2.051055669784546, | |
| "eval_logits/rejected": -2.1928887367248535, | |
| "eval_logps/chosen": -4.980111122131348, | |
| "eval_logps/rejected": -5.963034152984619, | |
| "eval_loss": 22.19961166381836, | |
| "eval_rewards/accuracies": 0.6781250238418579, | |
| "eval_rewards/chosen": -0.3954910337924957, | |
| "eval_rewards/margins": 0.07626526802778244, | |
| "eval_rewards/rejected": -0.4717562794685364, | |
| "eval_runtime": 247.0571, | |
| "eval_samples_per_second": 2.59, | |
| "eval_steps_per_second": 0.162, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3973249409913454, | |
| "grad_norm": 117.43452453613281, | |
| "learning_rate": 1.5236112930629896e-05, | |
| "logits/chosen": -1.59442138671875, | |
| "logits/rejected": -1.608278512954712, | |
| "logps/chosen": -4.784956932067871, | |
| "logps/rejected": -6.083520889282227, | |
| "loss": 21.3228, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.39433416724205017, | |
| "rewards/margins": 0.0851038470864296, | |
| "rewards/rejected": -0.47943800687789917, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.4012588512981904, | |
| "grad_norm": 131.9105987548828, | |
| "learning_rate": 1.511853980955528e-05, | |
| "logits/chosen": -1.5525928735733032, | |
| "logits/rejected": -1.6741384267807007, | |
| "logps/chosen": -4.761956691741943, | |
| "logps/rejected": -6.223165035247803, | |
| "loss": 18.7233, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.3881562054157257, | |
| "rewards/margins": 0.11896620690822601, | |
| "rewards/rejected": -0.5071223974227905, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4051927616050354, | |
| "grad_norm": 219.87973022460938, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "logits/chosen": -1.4453318119049072, | |
| "logits/rejected": -1.5039575099945068, | |
| "logps/chosen": -5.150791645050049, | |
| "logps/rejected": -5.92265510559082, | |
| "loss": 22.5345, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.42183756828308105, | |
| "rewards/margins": 0.06285471469163895, | |
| "rewards/rejected": -0.4846922755241394, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.4091266719118804, | |
| "grad_norm": 165.92495727539062, | |
| "learning_rate": 1.488051588941687e-05, | |
| "logits/chosen": -1.5345083475112915, | |
| "logits/rejected": -1.5863834619522095, | |
| "logps/chosen": -4.62529182434082, | |
| "logps/rejected": -5.6540374755859375, | |
| "loss": 22.3543, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3801875710487366, | |
| "rewards/margins": 0.08038468658924103, | |
| "rewards/rejected": -0.4605723023414612, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.41306058221872544, | |
| "grad_norm": 90.85760498046875, | |
| "learning_rate": 1.4760110043599571e-05, | |
| "logits/chosen": -1.4365699291229248, | |
| "logits/rejected": -1.5294816493988037, | |
| "logps/chosen": -5.155371189117432, | |
| "logps/rejected": -5.7571702003479, | |
| "loss": 23.5323, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.40600451827049255, | |
| "rewards/margins": 0.053865253925323486, | |
| "rewards/rejected": -0.45986977219581604, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.4169944925255704, | |
| "grad_norm": 133.31532287597656, | |
| "learning_rate": 1.4638805202420896e-05, | |
| "logits/chosen": -1.5367436408996582, | |
| "logits/rejected": -1.6193835735321045, | |
| "logps/chosen": -4.300149440765381, | |
| "logps/rejected": -5.198160648345947, | |
| "loss": 20.7757, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.34353598952293396, | |
| "rewards/margins": 0.07413734495639801, | |
| "rewards/rejected": -0.41767334938049316, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.4209284028324154, | |
| "grad_norm": 72.27132415771484, | |
| "learning_rate": 1.4516624275538085e-05, | |
| "logits/chosen": -1.5670139789581299, | |
| "logits/rejected": -1.572481632232666, | |
| "logps/chosen": -4.886091709136963, | |
| "logps/rejected": -5.762571811676025, | |
| "loss": 22.0826, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.37716084718704224, | |
| "rewards/margins": 0.06687341630458832, | |
| "rewards/rejected": -0.4440341889858246, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.42486231313926043, | |
| "grad_norm": 125.95482635498047, | |
| "learning_rate": 1.4393590338066111e-05, | |
| "logits/chosen": -1.4181983470916748, | |
| "logits/rejected": -1.5720667839050293, | |
| "logps/chosen": -4.6530890464782715, | |
| "logps/rejected": -5.8773040771484375, | |
| "loss": 21.225, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3725031316280365, | |
| "rewards/margins": 0.09332089871168137, | |
| "rewards/rejected": -0.46582403779029846, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.4287962234461054, | |
| "grad_norm": 125.4962158203125, | |
| "learning_rate": 1.4269726626219694e-05, | |
| "logits/chosen": -1.4898996353149414, | |
| "logits/rejected": -1.588863492012024, | |
| "logps/chosen": -4.545314311981201, | |
| "logps/rejected": -5.7105536460876465, | |
| "loss": 20.5982, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3671793043613434, | |
| "rewards/margins": 0.08724094182252884, | |
| "rewards/rejected": -0.4544202387332916, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.43273013375295044, | |
| "grad_norm": 136.95050048828125, | |
| "learning_rate": 1.4145056532924915e-05, | |
| "logits/chosen": -1.4961071014404297, | |
| "logits/rejected": -1.5412185192108154, | |
| "logps/chosen": -4.766295433044434, | |
| "logps/rejected": -5.421988487243652, | |
| "loss": 23.7911, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3876652121543884, | |
| "rewards/margins": 0.0486636683344841, | |
| "rewards/rejected": -0.4363288879394531, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4366640440597954, | |
| "grad_norm": 111.8359375, | |
| "learning_rate": 1.4019603603401222e-05, | |
| "logits/chosen": -1.5303133726119995, | |
| "logits/rejected": -1.531965970993042, | |
| "logps/chosen": -5.281257152557373, | |
| "logps/rejected": -6.489635467529297, | |
| "loss": 20.8782, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.43693438172340393, | |
| "rewards/margins": 0.0869705006480217, | |
| "rewards/rejected": -0.5239048600196838, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.44059795436664045, | |
| "grad_norm": 108.2451400756836, | |
| "learning_rate": 1.389339153071468e-05, | |
| "logits/chosen": -1.500227928161621, | |
| "logits/rejected": -1.5907671451568604, | |
| "logps/chosen": -4.559876441955566, | |
| "logps/rejected": -5.24734354019165, | |
| "loss": 22.9528, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.36998695135116577, | |
| "rewards/margins": 0.06032613664865494, | |
| "rewards/rejected": -0.4303130507469177, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.44453186467348543, | |
| "grad_norm": 111.49331665039062, | |
| "learning_rate": 1.3766444151303276e-05, | |
| "logits/chosen": -1.500124216079712, | |
| "logits/rejected": -1.5592477321624756, | |
| "logps/chosen": -4.5935750007629395, | |
| "logps/rejected": -5.141480922698975, | |
| "loss": 23.8155, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.3515246510505676, | |
| "rewards/margins": 0.04220908135175705, | |
| "rewards/rejected": -0.3937337100505829, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.44846577498033047, | |
| "grad_norm": 169.8197021484375, | |
| "learning_rate": 1.3638785440475186e-05, | |
| "logits/chosen": -1.4828747510910034, | |
| "logits/rejected": -1.6195532083511353, | |
| "logps/chosen": -4.379355430603027, | |
| "logps/rejected": -5.253105163574219, | |
| "loss": 22.077, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.33442041277885437, | |
| "rewards/margins": 0.06863003969192505, | |
| "rewards/rejected": -0.40305042266845703, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.45239968528717545, | |
| "grad_norm": 99.62928009033203, | |
| "learning_rate": 1.3510439507880778e-05, | |
| "logits/chosen": -1.4429081678390503, | |
| "logits/rejected": -1.5855743885040283, | |
| "logps/chosen": -4.258365631103516, | |
| "logps/rejected": -5.172659873962402, | |
| "loss": 20.7018, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.32063132524490356, | |
| "rewards/margins": 0.07806047052145004, | |
| "rewards/rejected": -0.3986917734146118, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.4563335955940205, | |
| "grad_norm": 331.7801818847656, | |
| "learning_rate": 1.3381430592959266e-05, | |
| "logits/chosen": -1.533367395401001, | |
| "logits/rejected": -1.674047827720642, | |
| "logps/chosen": -4.476586818695068, | |
| "logps/rejected": -5.315201759338379, | |
| "loss": 22.2494, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.33964449167251587, | |
| "rewards/margins": 0.07077815383672714, | |
| "rewards/rejected": -0.4104226529598236, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.46026750590086546, | |
| "grad_norm": 83.21395111083984, | |
| "learning_rate": 1.3251783060360843e-05, | |
| "logits/chosen": -1.5329582691192627, | |
| "logits/rejected": -1.5836857557296753, | |
| "logps/chosen": -4.263331413269043, | |
| "logps/rejected": -5.0875420570373535, | |
| "loss": 20.9799, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.34249481558799744, | |
| "rewards/margins": 0.06859047710895538, | |
| "rewards/rejected": -0.411085307598114, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.46420141620771044, | |
| "grad_norm": 115.21592712402344, | |
| "learning_rate": 1.3121521395345166e-05, | |
| "logits/chosen": -1.6251742839813232, | |
| "logits/rejected": -1.630035400390625, | |
| "logps/chosen": -4.937412738800049, | |
| "logps/rejected": -5.566755771636963, | |
| "loss": 23.4551, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.4054068922996521, | |
| "rewards/margins": 0.03334500640630722, | |
| "rewards/rejected": -0.43875187635421753, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.46813532651455547, | |
| "grad_norm": 91.47386932373047, | |
| "learning_rate": 1.2990670199157074e-05, | |
| "logits/chosen": -1.5872230529785156, | |
| "logits/rejected": -1.6779727935791016, | |
| "logps/chosen": -4.554791450500488, | |
| "logps/rejected": -5.94228458404541, | |
| "loss": 19.8409, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.37332695722579956, | |
| "rewards/margins": 0.1068461686372757, | |
| "rewards/rejected": -0.48017311096191406, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.47206923682140045, | |
| "grad_norm": 84.26165771484375, | |
| "learning_rate": 1.285925418438037e-05, | |
| "logits/chosen": -1.6029882431030273, | |
| "logits/rejected": -1.5973970890045166, | |
| "logps/chosen": -4.753634452819824, | |
| "logps/rejected": -6.724286079406738, | |
| "loss": 20.5538, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3792814016342163, | |
| "rewards/margins": 0.10161124169826508, | |
| "rewards/rejected": -0.4808926582336426, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.47206923682140045, | |
| "eval_logits/chosen": -1.8687881231307983, | |
| "eval_logits/rejected": -2.063835620880127, | |
| "eval_logps/chosen": -4.705691337585449, | |
| "eval_logps/rejected": -5.671477317810059, | |
| "eval_loss": 22.097375869750977, | |
| "eval_rewards/accuracies": 0.6640625, | |
| "eval_rewards/chosen": -0.3680490553379059, | |
| "eval_rewards/margins": 0.07455149292945862, | |
| "eval_rewards/rejected": -0.4426005482673645, | |
| "eval_runtime": 247.0565, | |
| "eval_samples_per_second": 2.591, | |
| "eval_steps_per_second": 0.162, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4760031471282455, | |
| "grad_norm": 436.2966003417969, | |
| "learning_rate": 1.2727298170270623e-05, | |
| "logits/chosen": -1.4476096630096436, | |
| "logits/rejected": -1.7463172674179077, | |
| "logps/chosen": -4.9814863204956055, | |
| "logps/rejected": -6.1390061378479, | |
| "loss": 22.5387, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.39945507049560547, | |
| "rewards/margins": 0.06587421894073486, | |
| "rewards/rejected": -0.46532925963401794, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.47993705743509046, | |
| "grad_norm": 176.86717224121094, | |
| "learning_rate": 1.2594827078067788e-05, | |
| "logits/chosen": -1.4699801206588745, | |
| "logits/rejected": -1.6459217071533203, | |
| "logps/chosen": -4.43405818939209, | |
| "logps/rejected": -5.642327308654785, | |
| "loss": 21.4092, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.33053499460220337, | |
| "rewards/margins": 0.08509950339794159, | |
| "rewards/rejected": -0.4156344532966614, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.4838709677419355, | |
| "grad_norm": 89.04745483398438, | |
| "learning_rate": 1.2461865926289565e-05, | |
| "logits/chosen": -1.5937103033065796, | |
| "logits/rejected": -1.6736860275268555, | |
| "logps/chosen": -4.249716758728027, | |
| "logps/rejected": -5.313623905181885, | |
| "loss": 20.1482, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3353779911994934, | |
| "rewards/margins": 0.08377678692340851, | |
| "rewards/rejected": -0.41915470361709595, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.4878048780487805, | |
| "grad_norm": 142.5464630126953, | |
| "learning_rate": 1.2328439826006415e-05, | |
| "logits/chosen": -1.8125406503677368, | |
| "logits/rejected": -1.82503342628479, | |
| "logps/chosen": -4.829066276550293, | |
| "logps/rejected": -6.025434494018555, | |
| "loss": 21.8627, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.38841956853866577, | |
| "rewards/margins": 0.09203478693962097, | |
| "rewards/rejected": -0.48045435547828674, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4917387883556255, | |
| "grad_norm": 92.0487060546875, | |
| "learning_rate": 1.2194573976099065e-05, | |
| "logits/chosen": -1.7436134815216064, | |
| "logits/rejected": -1.7932506799697876, | |
| "logps/chosen": -4.684365272521973, | |
| "logps/rejected": -6.019822597503662, | |
| "loss": 19.5295, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.3807242512702942, | |
| "rewards/margins": 0.08837229013442993, | |
| "rewards/rejected": -0.46909651160240173, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4956726986624705, | |
| "grad_norm": 147.95338439941406, | |
| "learning_rate": 1.206029365849945e-05, | |
| "logits/chosen": -1.6783860921859741, | |
| "logits/rejected": -1.9319006204605103, | |
| "logps/chosen": -5.285382270812988, | |
| "logps/rejected": -6.642569541931152, | |
| "loss": 21.2155, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.43607670068740845, | |
| "rewards/margins": 0.10433085262775421, | |
| "rewards/rejected": -0.5404075384140015, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.4996066089693155, | |
| "grad_norm": 130.0589141845703, | |
| "learning_rate": 1.1925624233415953e-05, | |
| "logits/chosen": -1.6536309719085693, | |
| "logits/rejected": -1.7914777994155884, | |
| "logps/chosen": -6.562493801116943, | |
| "logps/rejected": -7.187270164489746, | |
| "loss": 24.9862, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.5217987298965454, | |
| "rewards/margins": 0.049688175320625305, | |
| "rewards/rejected": -0.5714868903160095, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.5035405192761605, | |
| "grad_norm": 105.65159606933594, | |
| "learning_rate": 1.1790591134543878e-05, | |
| "logits/chosen": -1.687402367591858, | |
| "logits/rejected": -1.7607853412628174, | |
| "logps/chosen": -5.57774019241333, | |
| "logps/rejected": -7.084832668304443, | |
| "loss": 20.6912, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.4535636901855469, | |
| "rewards/margins": 0.11514492332935333, | |
| "rewards/rejected": -0.568708598613739, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.5074744295830055, | |
| "grad_norm": 92.58214569091797, | |
| "learning_rate": 1.165521986426204e-05, | |
| "logits/chosen": -1.7766138315200806, | |
| "logits/rejected": -1.8096840381622314, | |
| "logps/chosen": -5.609960556030273, | |
| "logps/rejected": -6.572140693664551, | |
| "loss": 20.9903, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.45290178060531616, | |
| "rewards/margins": 0.07533474266529083, | |
| "rewards/rejected": -0.5282365083694458, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.5114083398898505, | |
| "grad_norm": 149.65013122558594, | |
| "learning_rate": 1.1519535988816376e-05, | |
| "logits/chosen": -1.5931161642074585, | |
| "logits/rejected": -1.6935539245605469, | |
| "logps/chosen": -5.396282196044922, | |
| "logps/rejected": -6.744470119476318, | |
| "loss": 20.1648, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.40467891097068787, | |
| "rewards/margins": 0.09381942451000214, | |
| "rewards/rejected": -0.4984983503818512, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.5153422501966956, | |
| "grad_norm": 200.92955017089844, | |
| "learning_rate": 1.1383565133491486e-05, | |
| "logits/chosen": -1.795839548110962, | |
| "logits/rejected": -1.819392442703247, | |
| "logps/chosen": -5.257883548736572, | |
| "logps/rejected": -7.0443830490112305, | |
| "loss": 22.4534, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.43912070989608765, | |
| "rewards/margins": 0.07490712404251099, | |
| "rewards/rejected": -0.5140278339385986, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.5192761605035405, | |
| "grad_norm": 125.60820770263672, | |
| "learning_rate": 1.1247332977771064e-05, | |
| "logits/chosen": -1.6573776006698608, | |
| "logits/rejected": -1.8588536977767944, | |
| "logps/chosen": -4.678572177886963, | |
| "logps/rejected": -5.781623840332031, | |
| "loss": 20.5053, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3861141800880432, | |
| "rewards/margins": 0.08240343630313873, | |
| "rewards/rejected": -0.46851760149002075, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.5232100708103855, | |
| "grad_norm": 157.74685668945312, | |
| "learning_rate": 1.1110865250488047e-05, | |
| "logits/chosen": -1.6955581903457642, | |
| "logits/rejected": -1.7286555767059326, | |
| "logps/chosen": -5.368699073791504, | |
| "logps/rejected": -6.521600246429443, | |
| "loss": 22.1845, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.4410589635372162, | |
| "rewards/margins": 0.06638728827238083, | |
| "rewards/rejected": -0.5074462294578552, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.5271439811172305, | |
| "grad_norm": 117.55720520019531, | |
| "learning_rate": 1.0974187724965459e-05, | |
| "logits/chosen": -1.569481611251831, | |
| "logits/rejected": -1.7076787948608398, | |
| "logps/chosen": -5.216125965118408, | |
| "logps/rejected": -6.768074035644531, | |
| "loss": 20.6526, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.41673794388771057, | |
| "rewards/margins": 0.10740216076374054, | |
| "rewards/rejected": -0.5241400599479675, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.5310778914240756, | |
| "grad_norm": 97.75656127929688, | |
| "learning_rate": 1.083732621414887e-05, | |
| "logits/chosen": -1.5581352710723877, | |
| "logits/rejected": -1.7143176794052124, | |
| "logps/chosen": -4.81557559967041, | |
| "logps/rejected": -6.235506534576416, | |
| "loss": 20.7019, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.39709532260894775, | |
| "rewards/margins": 0.07989685237407684, | |
| "rewards/rejected": -0.4769921898841858, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.5350118017309206, | |
| "grad_norm": 177.1349639892578, | |
| "learning_rate": 1.0700306565731348e-05, | |
| "logits/chosen": -1.6809425354003906, | |
| "logits/rejected": -1.775265097618103, | |
| "logps/chosen": -5.231817245483398, | |
| "logps/rejected": -6.009397983551025, | |
| "loss": 24.3801, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.4042357802391052, | |
| "rewards/margins": 0.0449453704059124, | |
| "rewards/rejected": -0.4491811692714691, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.5389457120377655, | |
| "grad_norm": 90.295166015625, | |
| "learning_rate": 1.0563154657271856e-05, | |
| "logits/chosen": -1.5968455076217651, | |
| "logits/rejected": -1.7083053588867188, | |
| "logps/chosen": -4.497256278991699, | |
| "logps/rejected": -5.899313926696777, | |
| "loss": 19.5135, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3596218526363373, | |
| "rewards/margins": 0.11312826722860336, | |
| "rewards/rejected": -0.47275009751319885, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.5428796223446105, | |
| "grad_norm": 124.57329559326172, | |
| "learning_rate": 1.0425896391308027e-05, | |
| "logits/chosen": -1.76220703125, | |
| "logits/rejected": -1.80035400390625, | |
| "logps/chosen": -4.3184638023376465, | |
| "logps/rejected": -5.549654960632324, | |
| "loss": 22.3509, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3421816825866699, | |
| "rewards/margins": 0.07712335884571075, | |
| "rewards/rejected": -0.4193050265312195, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.5468135326514555, | |
| "grad_norm": 95.38504791259766, | |
| "learning_rate": 1.0288557690464183e-05, | |
| "logits/chosen": -1.7643308639526367, | |
| "logits/rejected": -1.8514759540557861, | |
| "logps/chosen": -4.870394706726074, | |
| "logps/rejected": -6.192694664001465, | |
| "loss": 19.6511, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3783365786075592, | |
| "rewards/margins": 0.10224858671426773, | |
| "rewards/rejected": -0.48058515787124634, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.5507474429583006, | |
| "grad_norm": 110.24113464355469, | |
| "learning_rate": 1.0151164492555598e-05, | |
| "logits/chosen": -1.7323192358016968, | |
| "logits/rejected": -1.7812278270721436, | |
| "logps/chosen": -4.445910453796387, | |
| "logps/rejected": -5.475697994232178, | |
| "loss": 20.5733, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3584834635257721, | |
| "rewards/margins": 0.08415325731039047, | |
| "rewards/rejected": -0.44263672828674316, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5507474429583006, | |
| "eval_logits/chosen": -1.9791358709335327, | |
| "eval_logits/rejected": -2.168715715408325, | |
| "eval_logps/chosen": -4.671382904052734, | |
| "eval_logps/rejected": -5.675909996032715, | |
| "eval_loss": 21.87997055053711, | |
| "eval_rewards/accuracies": 0.6890624761581421, | |
| "eval_rewards/chosen": -0.3646181523799896, | |
| "eval_rewards/margins": 0.07842567563056946, | |
| "eval_rewards/rejected": -0.4430437982082367, | |
| "eval_runtime": 247.0548, | |
| "eval_samples_per_second": 2.591, | |
| "eval_steps_per_second": 0.162, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5546813532651456, | |
| "grad_norm": 93.88453674316406, | |
| "learning_rate": 1.0013742745689873e-05, | |
| "logits/chosen": -1.6371465921401978, | |
| "logits/rejected": -1.6118228435516357, | |
| "logps/chosen": -4.992671966552734, | |
| "logps/rejected": -6.3686113357543945, | |
| "loss": 22.5795, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.40251269936561584, | |
| "rewards/margins": 0.07059164345264435, | |
| "rewards/rejected": -0.4731043875217438, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.5586152635719905, | |
| "grad_norm": 84.3992919921875, | |
| "learning_rate": 9.876318403366371e-06, | |
| "logits/chosen": -1.6351890563964844, | |
| "logits/rejected": -1.742375373840332, | |
| "logps/chosen": -4.1463117599487305, | |
| "logps/rejected": -5.639761924743652, | |
| "loss": 18.9145, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3305463492870331, | |
| "rewards/margins": 0.09927482903003693, | |
| "rewards/rejected": -0.4298211932182312, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.5625491738788355, | |
| "grad_norm": 80.05113220214844, | |
| "learning_rate": 9.738917419574639e-06, | |
| "logits/chosen": -1.6415116786956787, | |
| "logits/rejected": -1.809754729270935, | |
| "logps/chosen": -4.596456050872803, | |
| "logps/rejected": -6.007749080657959, | |
| "loss": 18.0583, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.3703993856906891, | |
| "rewards/margins": 0.11888410896062851, | |
| "rewards/rejected": -0.4892834722995758, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.5664830841856806, | |
| "grad_norm": 113.48812866210938, | |
| "learning_rate": 9.601565743892714e-06, | |
| "logits/chosen": -1.5977214574813843, | |
| "logits/rejected": -1.6212304830551147, | |
| "logps/chosen": -4.664346218109131, | |
| "logps/rejected": -5.388762474060059, | |
| "loss": 22.5597, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.38028937578201294, | |
| "rewards/margins": 0.059345345944166183, | |
| "rewards/rejected": -0.4396347403526306, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5704169944925256, | |
| "grad_norm": 70.57742309570312, | |
| "learning_rate": 9.464289316586303e-06, | |
| "logits/chosen": -1.5158501863479614, | |
| "logits/rejected": -1.676015853881836, | |
| "logps/chosen": -5.285845756530762, | |
| "logps/rejected": -6.9133148193359375, | |
| "loss": 18.5202, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3834846615791321, | |
| "rewards/margins": 0.11843238770961761, | |
| "rewards/rejected": -0.5019170641899109, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5743509047993706, | |
| "grad_norm": 103.70975494384766, | |
| "learning_rate": 9.327114063709687e-06, | |
| "logits/chosen": -1.518786072731018, | |
| "logits/rejected": -1.5470190048217773, | |
| "logps/chosen": -4.85964822769165, | |
| "logps/rejected": -5.655099391937256, | |
| "loss": 21.9101, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.39920276403427124, | |
| "rewards/margins": 0.06735944002866745, | |
| "rewards/rejected": -0.4665622115135193, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5782848151062155, | |
| "grad_norm": 122.11375427246094, | |
| "learning_rate": 9.19006589220933e-06, | |
| "logits/chosen": -1.6511976718902588, | |
| "logits/rejected": -1.7952783107757568, | |
| "logps/chosen": -5.165816783905029, | |
| "logps/rejected": -6.50637149810791, | |
| "loss": 19.2205, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.4252663552761078, | |
| "rewards/margins": 0.11230112612247467, | |
| "rewards/rejected": -0.5375674962997437, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.5822187254130606, | |
| "grad_norm": 98.64827728271484, | |
| "learning_rate": 9.053170685031065e-06, | |
| "logits/chosen": -1.476125717163086, | |
| "logits/rejected": -1.5079152584075928, | |
| "logps/chosen": -4.6666107177734375, | |
| "logps/rejected": -5.756585597991943, | |
| "loss": 21.5261, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.37994053959846497, | |
| "rewards/margins": 0.08982710540294647, | |
| "rewards/rejected": -0.46976765990257263, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.5861526357199056, | |
| "grad_norm": 163.19903564453125, | |
| "learning_rate": 8.916454296231866e-06, | |
| "logits/chosen": -1.5693914890289307, | |
| "logits/rejected": -1.6560795307159424, | |
| "logps/chosen": -5.214982032775879, | |
| "logps/rejected": -6.230101585388184, | |
| "loss": 22.3663, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.42850279808044434, | |
| "rewards/margins": 0.08659662306308746, | |
| "rewards/rejected": -0.5150994062423706, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.5900865460267506, | |
| "grad_norm": 287.3004455566406, | |
| "learning_rate": 8.77994254609703e-06, | |
| "logits/chosen": -1.5711700916290283, | |
| "logits/rejected": -1.6349785327911377, | |
| "logps/chosen": -5.405780792236328, | |
| "logps/rejected": -6.518923759460449, | |
| "loss": 21.7013, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.42655763030052185, | |
| "rewards/margins": 0.10133453458547592, | |
| "rewards/rejected": -0.5278922319412231, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5940204563335956, | |
| "grad_norm": 87.57359313964844, | |
| "learning_rate": 8.643661216263744e-06, | |
| "logits/chosen": -1.7837469577789307, | |
| "logits/rejected": -1.9320176839828491, | |
| "logps/chosen": -4.869183540344238, | |
| "logps/rejected": -6.500770568847656, | |
| "loss": 21.0578, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3997957706451416, | |
| "rewards/margins": 0.11517678201198578, | |
| "rewards/rejected": -0.5149725079536438, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5979543666404405, | |
| "grad_norm": 140.55946350097656, | |
| "learning_rate": 8.507636044851978e-06, | |
| "logits/chosen": -1.824091911315918, | |
| "logits/rejected": -1.8229138851165771, | |
| "logps/chosen": -5.666548252105713, | |
| "logps/rejected": -6.8593854904174805, | |
| "loss": 20.3215, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.45906418561935425, | |
| "rewards/margins": 0.08720727264881134, | |
| "rewards/rejected": -0.5462714433670044, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.6018882769472856, | |
| "grad_norm": 112.21092987060547, | |
| "learning_rate": 8.371892721603572e-06, | |
| "logits/chosen": -1.6853454113006592, | |
| "logits/rejected": -1.8647477626800537, | |
| "logps/chosen": -4.651841163635254, | |
| "logps/rejected": -6.3494367599487305, | |
| "loss": 18.3569, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.3507687747478485, | |
| "rewards/margins": 0.14806708693504333, | |
| "rewards/rejected": -0.49883586168289185, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.6058221872541306, | |
| "grad_norm": 81.10078430175781, | |
| "learning_rate": 8.236456883030451e-06, | |
| "logits/chosen": -1.68671452999115, | |
| "logits/rejected": -1.7528860569000244, | |
| "logps/chosen": -4.768989562988281, | |
| "logps/rejected": -5.5941948890686035, | |
| "loss": 22.1186, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3809035122394562, | |
| "rewards/margins": 0.06425388157367706, | |
| "rewards/rejected": -0.44515734910964966, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.6097560975609756, | |
| "grad_norm": 105.58920288085938, | |
| "learning_rate": 8.101354107572936e-06, | |
| "logits/chosen": -1.727433204650879, | |
| "logits/rejected": -1.8371779918670654, | |
| "logps/chosen": -4.869941711425781, | |
| "logps/rejected": -6.077930450439453, | |
| "loss": 20.7747, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.39489075541496277, | |
| "rewards/margins": 0.09834662824869156, | |
| "rewards/rejected": -0.4932374060153961, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.6136900078678206, | |
| "grad_norm": 98.20643615722656, | |
| "learning_rate": 7.966609910768977e-06, | |
| "logits/chosen": -1.792258620262146, | |
| "logits/rejected": -1.8587232828140259, | |
| "logps/chosen": -5.296938419342041, | |
| "logps/rejected": -6.899237155914307, | |
| "loss": 21.6057, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.4271044135093689, | |
| "rewards/margins": 0.09831608831882477, | |
| "rewards/rejected": -0.5254205465316772, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.6176239181746657, | |
| "grad_norm": 81.22627258300781, | |
| "learning_rate": 7.83224974043527e-06, | |
| "logits/chosen": -1.9083926677703857, | |
| "logits/rejected": -1.8744350671768188, | |
| "logps/chosen": -5.533473014831543, | |
| "logps/rejected": -7.378387451171875, | |
| "loss": 21.26, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.44664111733436584, | |
| "rewards/margins": 0.0884137898683548, | |
| "rewards/rejected": -0.5350548624992371, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.6215578284815106, | |
| "grad_norm": 110.95684814453125, | |
| "learning_rate": 7.698298971861216e-06, | |
| "logits/chosen": -1.8310887813568115, | |
| "logits/rejected": -1.9082119464874268, | |
| "logps/chosen": -5.640771865844727, | |
| "logps/rejected": -6.7010650634765625, | |
| "loss": 22.755, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.4458495080471039, | |
| "rewards/margins": 0.06785107403993607, | |
| "rewards/rejected": -0.5137006044387817, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.6254917387883556, | |
| "grad_norm": 128.42193603515625, | |
| "learning_rate": 7.564782903016495e-06, | |
| "logits/chosen": -2.0378453731536865, | |
| "logits/rejected": -2.0055670738220215, | |
| "logps/chosen": -5.865384101867676, | |
| "logps/rejected": -7.270517826080322, | |
| "loss": 20.4632, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.4653663635253906, | |
| "rewards/margins": 0.10677427053451538, | |
| "rewards/rejected": -0.572140634059906, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.6294256490952006, | |
| "grad_norm": 99.83875274658203, | |
| "learning_rate": 7.431726749773322e-06, | |
| "logits/chosen": -1.9313580989837646, | |
| "logits/rejected": -2.0229592323303223, | |
| "logps/chosen": -5.312073707580566, | |
| "logps/rejected": -6.263284683227539, | |
| "loss": 21.2401, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.44135594367980957, | |
| "rewards/margins": 0.07802332937717438, | |
| "rewards/rejected": -0.5193793177604675, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6294256490952006, | |
| "eval_logits/chosen": -2.0016579627990723, | |
| "eval_logits/rejected": -2.1698896884918213, | |
| "eval_logps/chosen": -5.630452632904053, | |
| "eval_logps/rejected": -6.631816864013672, | |
| "eval_loss": 21.630550384521484, | |
| "eval_rewards/accuracies": 0.6703125238418579, | |
| "eval_rewards/chosen": -0.46052512526512146, | |
| "eval_rewards/margins": 0.07810942083597183, | |
| "eval_rewards/rejected": -0.5386345982551575, | |
| "eval_runtime": 247.0319, | |
| "eval_samples_per_second": 2.591, | |
| "eval_steps_per_second": 0.162, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6333595594020456, | |
| "grad_norm": 118.10179138183594, | |
| "learning_rate": 7.299155641144151e-06, | |
| "logits/chosen": -1.8322381973266602, | |
| "logits/rejected": -1.933854341506958, | |
| "logps/chosen": -5.868415832519531, | |
| "logps/rejected": -6.592629909515381, | |
| "loss": 23.4385, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.4805046617984772, | |
| "rewards/margins": 0.05157938599586487, | |
| "rewards/rejected": -0.5320841073989868, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.6372934697088907, | |
| "grad_norm": 109.38636016845703, | |
| "learning_rate": 7.167094614535808e-06, | |
| "logits/chosen": -1.8569862842559814, | |
| "logits/rejected": -2.0059075355529785, | |
| "logps/chosen": -5.487217903137207, | |
| "logps/rejected": -6.167322635650635, | |
| "loss": 22.5869, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.449852854013443, | |
| "rewards/margins": 0.05447512865066528, | |
| "rewards/rejected": -0.5043280720710754, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.6412273800157356, | |
| "grad_norm": 103.17411041259766, | |
| "learning_rate": 7.0355686110209266e-06, | |
| "logits/chosen": -1.8461898565292358, | |
| "logits/rejected": -1.979327917098999, | |
| "logps/chosen": -5.215566158294678, | |
| "logps/rejected": -5.694491386413574, | |
| "loss": 25.0548, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.4297303259372711, | |
| "rewards/margins": 0.038676049560308456, | |
| "rewards/rejected": -0.4684063792228699, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 119.45822143554688, | |
| "learning_rate": 6.904602470627572e-06, | |
| "logits/chosen": -1.8498461246490479, | |
| "logits/rejected": -1.8748204708099365, | |
| "logps/chosen": -5.026296138763428, | |
| "logps/rejected": -6.262150764465332, | |
| "loss": 20.1373, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.41805368661880493, | |
| "rewards/margins": 0.09066227823495865, | |
| "rewards/rejected": -0.5087159276008606, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.6490952006294256, | |
| "grad_norm": 100.11577606201172, | |
| "learning_rate": 6.7742209276479375e-06, | |
| "logits/chosen": -1.756453514099121, | |
| "logits/rejected": -1.8702630996704102, | |
| "logps/chosen": -4.895664215087891, | |
| "logps/rejected": -6.6527557373046875, | |
| "loss": 19.6483, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.40346187353134155, | |
| "rewards/margins": 0.10056328773498535, | |
| "rewards/rejected": -0.5040251016616821, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.6530291109362707, | |
| "grad_norm": 91.52615356445312, | |
| "learning_rate": 6.644448605967034e-06, | |
| "logits/chosen": -1.8409059047698975, | |
| "logits/rejected": -1.81235671043396, | |
| "logps/chosen": -5.046607971191406, | |
| "logps/rejected": -6.257250785827637, | |
| "loss": 20.7867, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.41690778732299805, | |
| "rewards/margins": 0.09308180958032608, | |
| "rewards/rejected": -0.5099896192550659, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.6569630212431157, | |
| "grad_norm": 88.48129272460938, | |
| "learning_rate": 6.515310014412213e-06, | |
| "logits/chosen": -1.750044584274292, | |
| "logits/rejected": -1.850754737854004, | |
| "logps/chosen": -5.071898460388184, | |
| "logps/rejected": -6.416234016418457, | |
| "loss": 20.4118, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.4191848337650299, | |
| "rewards/margins": 0.09486079961061478, | |
| "rewards/rejected": -0.5140455961227417, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.6608969315499607, | |
| "grad_norm": 115.54293060302734, | |
| "learning_rate": 6.3868295421244234e-06, | |
| "logits/chosen": -1.8720375299453735, | |
| "logits/rejected": -1.8789699077606201, | |
| "logps/chosen": -5.031576156616211, | |
| "logps/rejected": -6.325047969818115, | |
| "loss": 20.8292, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.41966381669044495, | |
| "rewards/margins": 0.08945666253566742, | |
| "rewards/rejected": -0.5091204643249512, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.6648308418568056, | |
| "grad_norm": 188.25338745117188, | |
| "learning_rate": 6.2590314539520695e-06, | |
| "logits/chosen": -1.8582954406738281, | |
| "logits/rejected": -1.8773494958877563, | |
| "logps/chosen": -4.929537296295166, | |
| "logps/rejected": -5.595026969909668, | |
| "loss": 23.416, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.4085397720336914, | |
| "rewards/margins": 0.0541224367916584, | |
| "rewards/rejected": -0.4626621603965759, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.6687647521636507, | |
| "grad_norm": 102.74374389648438, | |
| "learning_rate": 6.131939885868357e-06, | |
| "logits/chosen": -1.8416216373443604, | |
| "logits/rejected": -1.872199296951294, | |
| "logps/chosen": -5.063607692718506, | |
| "logps/rejected": -6.0397844314575195, | |
| "loss": 21.714, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.4123477339744568, | |
| "rewards/margins": 0.07956025004386902, | |
| "rewards/rejected": -0.4919079840183258, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6726986624704957, | |
| "grad_norm": 319.5467834472656, | |
| "learning_rate": 6.005578840412964e-06, | |
| "logits/chosen": -1.8419501781463623, | |
| "logits/rejected": -1.8877992630004883, | |
| "logps/chosen": -5.0865797996521, | |
| "logps/rejected": -6.0676093101501465, | |
| "loss": 23.5725, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.4163091778755188, | |
| "rewards/margins": 0.0651886910200119, | |
| "rewards/rejected": -0.4814978539943695, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.6766325727773407, | |
| "grad_norm": 99.07049560546875, | |
| "learning_rate": 5.8799721821589065e-06, | |
| "logits/chosen": -1.8766921758651733, | |
| "logits/rejected": -1.9340273141860962, | |
| "logps/chosen": -5.623085021972656, | |
| "logps/rejected": -6.5402045249938965, | |
| "loss": 21.6329, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.4452272057533264, | |
| "rewards/margins": 0.07060147821903229, | |
| "rewards/rejected": -0.5158286690711975, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.6805664830841857, | |
| "grad_norm": 97.50904083251953, | |
| "learning_rate": 5.755143633205485e-06, | |
| "logits/chosen": -1.8800722360610962, | |
| "logits/rejected": -1.9219684600830078, | |
| "logps/chosen": -5.201785087585449, | |
| "logps/rejected": -6.21309232711792, | |
| "loss": 23.0811, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.40404200553894043, | |
| "rewards/margins": 0.0643695518374443, | |
| "rewards/rejected": -0.46841153502464294, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.6845003933910306, | |
| "grad_norm": 79.03717041015625, | |
| "learning_rate": 5.631116768698115e-06, | |
| "logits/chosen": -1.89470636844635, | |
| "logits/rejected": -1.9333388805389404, | |
| "logps/chosen": -4.1788506507873535, | |
| "logps/rejected": -4.974513053894043, | |
| "loss": 20.706, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3371396064758301, | |
| "rewards/margins": 0.06806127727031708, | |
| "rewards/rejected": -0.40520086884498596, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.6884343036978757, | |
| "grad_norm": 83.89424133300781, | |
| "learning_rate": 5.507915012375928e-06, | |
| "logits/chosen": -1.8191906213760376, | |
| "logits/rejected": -1.9314982891082764, | |
| "logps/chosen": -4.707968711853027, | |
| "logps/rejected": -5.8728346824646, | |
| "loss": 20.151, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.37588876485824585, | |
| "rewards/margins": 0.09074429422616959, | |
| "rewards/rejected": -0.4666330814361572, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6923682140047207, | |
| "grad_norm": 142.26219177246094, | |
| "learning_rate": 5.385561632147949e-06, | |
| "logits/chosen": -1.7470595836639404, | |
| "logits/rejected": -1.866180419921875, | |
| "logps/chosen": -4.5063371658325195, | |
| "logps/rejected": -5.85304594039917, | |
| "loss": 21.0088, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3654096722602844, | |
| "rewards/margins": 0.0996973067522049, | |
| "rewards/rejected": -0.4651069641113281, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6963021243115657, | |
| "grad_norm": 119.41082000732422, | |
| "learning_rate": 5.264079735698712e-06, | |
| "logits/chosen": -1.7012622356414795, | |
| "logits/rejected": -1.8376951217651367, | |
| "logps/chosen": -4.514288902282715, | |
| "logps/rejected": -5.9205121994018555, | |
| "loss": 19.2854, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3598877787590027, | |
| "rewards/margins": 0.11095305532217026, | |
| "rewards/rejected": -0.4708408713340759, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.7002360346184107, | |
| "grad_norm": 135.3374786376953, | |
| "learning_rate": 5.143492266124164e-06, | |
| "logits/chosen": -1.8720405101776123, | |
| "logits/rejected": -1.9114782810211182, | |
| "logps/chosen": -5.398524284362793, | |
| "logps/rejected": -6.263294219970703, | |
| "loss": 24.2036, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.43828243017196655, | |
| "rewards/margins": 0.04635043814778328, | |
| "rewards/rejected": -0.4846329092979431, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.7041699449252558, | |
| "grad_norm": 241.8609161376953, | |
| "learning_rate": 5.02382199759859e-06, | |
| "logits/chosen": -1.7537660598754883, | |
| "logits/rejected": -1.9083173274993896, | |
| "logps/chosen": -4.677842140197754, | |
| "logps/rejected": -6.321853160858154, | |
| "loss": 18.7965, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.3807620108127594, | |
| "rewards/margins": 0.12588202953338623, | |
| "rewards/rejected": -0.506644070148468, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.7081038552321007, | |
| "grad_norm": 146.8525390625, | |
| "learning_rate": 4.90509153107351e-06, | |
| "logits/chosen": -1.8356144428253174, | |
| "logits/rejected": -1.8589966297149658, | |
| "logps/chosen": -4.85366153717041, | |
| "logps/rejected": -5.896268367767334, | |
| "loss": 21.6632, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.3950599133968353, | |
| "rewards/margins": 0.0631428137421608, | |
| "rewards/rejected": -0.4582027494907379, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7081038552321007, | |
| "eval_logits/chosen": -1.7989633083343506, | |
| "eval_logits/rejected": -1.9845269918441772, | |
| "eval_logps/chosen": -4.957736968994141, | |
| "eval_logps/rejected": -5.975499629974365, | |
| "eval_loss": 21.701828002929688, | |
| "eval_rewards/accuracies": 0.676562488079071, | |
| "eval_rewards/chosen": -0.3932535946369171, | |
| "eval_rewards/margins": 0.07974924147129059, | |
| "eval_rewards/rejected": -0.4730028212070465, | |
| "eval_runtime": 247.1628, | |
| "eval_samples_per_second": 2.589, | |
| "eval_steps_per_second": 0.162, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7120377655389457, | |
| "grad_norm": 93.06356811523438, | |
| "learning_rate": 4.7873232900092414e-06, | |
| "logits/chosen": -1.7392574548721313, | |
| "logits/rejected": -1.8717933893203735, | |
| "logps/chosen": -5.1849870681762695, | |
| "logps/rejected": -6.5442795753479, | |
| "loss": 21.6833, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.40751686692237854, | |
| "rewards/margins": 0.08898209035396576, | |
| "rewards/rejected": -0.4964989125728607, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.7159716758457907, | |
| "grad_norm": 132.93258666992188, | |
| "learning_rate": 4.6705395161399936e-06, | |
| "logits/chosen": -1.630149483680725, | |
| "logits/rejected": -1.7430260181427002, | |
| "logps/chosen": -4.627926826477051, | |
| "logps/rejected": -5.704072952270508, | |
| "loss": 21.0561, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.35822638869285583, | |
| "rewards/margins": 0.08486741781234741, | |
| "rewards/rejected": -0.44309377670288086, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.7199055861526357, | |
| "grad_norm": 154.4405059814453, | |
| "learning_rate": 4.554762265273311e-06, | |
| "logits/chosen": -1.6530017852783203, | |
| "logits/rejected": -1.8972609043121338, | |
| "logps/chosen": -4.7651166915893555, | |
| "logps/rejected": -6.288015365600586, | |
| "loss": 18.4294, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.38746368885040283, | |
| "rewards/margins": 0.11755411326885223, | |
| "rewards/rejected": -0.5050178170204163, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.7238394964594808, | |
| "grad_norm": 102.96554565429688, | |
| "learning_rate": 4.440013403124597e-06, | |
| "logits/chosen": -1.7715336084365845, | |
| "logits/rejected": -1.7801620960235596, | |
| "logps/chosen": -4.690044403076172, | |
| "logps/rejected": -5.505946159362793, | |
| "loss": 22.2085, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.38295048475265503, | |
| "rewards/margins": 0.0695444643497467, | |
| "rewards/rejected": -0.45249491930007935, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.7277734067663257, | |
| "grad_norm": 315.7669677734375, | |
| "learning_rate": 4.326314601187538e-06, | |
| "logits/chosen": -1.8609683513641357, | |
| "logits/rejected": -1.9618885517120361, | |
| "logps/chosen": -5.145749092102051, | |
| "logps/rejected": -6.3977789878845215, | |
| "loss": 22.8082, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.41296815872192383, | |
| "rewards/margins": 0.07956559956073761, | |
| "rewards/rejected": -0.49253368377685547, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 80.00617980957031, | |
| "learning_rate": 4.213687332641244e-06, | |
| "logits/chosen": -1.8031578063964844, | |
| "logits/rejected": -1.8300079107284546, | |
| "logps/chosen": -4.608497142791748, | |
| "logps/rejected": -5.627560615539551, | |
| "loss": 21.8399, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.37813931703567505, | |
| "rewards/margins": 0.06815028190612793, | |
| "rewards/rejected": -0.44628962874412537, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.7356412273800157, | |
| "grad_norm": 103.14537048339844, | |
| "learning_rate": 4.1021528682948064e-06, | |
| "logits/chosen": -1.935750961303711, | |
| "logits/rejected": -2.0146632194519043, | |
| "logps/chosen": -5.339895725250244, | |
| "logps/rejected": -6.566633701324463, | |
| "loss": 22.9976, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.4325796663761139, | |
| "rewards/margins": 0.09251324087381363, | |
| "rewards/rejected": -0.5250928997993469, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.7395751376868608, | |
| "grad_norm": 122.98945617675781, | |
| "learning_rate": 3.99173227257009e-06, | |
| "logits/chosen": -1.7238731384277344, | |
| "logits/rejected": -1.8724349737167358, | |
| "logps/chosen": -4.779284954071045, | |
| "logps/rejected": -5.879046440124512, | |
| "loss": 20.3834, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3796885907649994, | |
| "rewards/margins": 0.08734798431396484, | |
| "rewards/rejected": -0.46703657507896423, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.7435090479937058, | |
| "grad_norm": 93.698974609375, | |
| "learning_rate": 3.8824463995235025e-06, | |
| "logits/chosen": -1.8173907995224, | |
| "logits/rejected": -1.8792049884796143, | |
| "logps/chosen": -4.475302696228027, | |
| "logps/rejected": -5.640101909637451, | |
| "loss": 21.2437, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.36529648303985596, | |
| "rewards/margins": 0.088043212890625, | |
| "rewards/rejected": -0.45333972573280334, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.7474429583005507, | |
| "grad_norm": 117.70040130615234, | |
| "learning_rate": 3.774315888907498e-06, | |
| "logits/chosen": -1.7552030086517334, | |
| "logits/rejected": -1.952716588973999, | |
| "logps/chosen": -4.961709976196289, | |
| "logps/rejected": -6.055539131164551, | |
| "loss": 20.6516, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.40568727254867554, | |
| "rewards/margins": 0.08449403941631317, | |
| "rewards/rejected": -0.4901813566684723, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7513768686073957, | |
| "grad_norm": 78.84608459472656, | |
| "learning_rate": 3.6673611622725326e-06, | |
| "logits/chosen": -1.7900937795639038, | |
| "logits/rejected": -1.8674201965332031, | |
| "logps/chosen": -4.5862603187561035, | |
| "logps/rejected": -6.226560592651367, | |
| "loss": 21.1241, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3641781508922577, | |
| "rewards/margins": 0.0932440310716629, | |
| "rewards/rejected": -0.4574221670627594, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.7553107789142408, | |
| "grad_norm": 101.40766143798828, | |
| "learning_rate": 3.561602419110267e-06, | |
| "logits/chosen": -1.7657877206802368, | |
| "logits/rejected": -1.849156141281128, | |
| "logps/chosen": -4.796189308166504, | |
| "logps/rejected": -5.998377323150635, | |
| "loss": 20.5484, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.388437956571579, | |
| "rewards/margins": 0.09469521045684814, | |
| "rewards/rejected": -0.4831331670284271, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.7592446892210858, | |
| "grad_norm": 78.99065399169922, | |
| "learning_rate": 3.4570596330386775e-06, | |
| "logits/chosen": -1.844770073890686, | |
| "logits/rejected": -1.9092273712158203, | |
| "logps/chosen": -4.890164375305176, | |
| "logps/rejected": -6.159483432769775, | |
| "loss": 21.2832, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.40066736936569214, | |
| "rewards/margins": 0.08416299521923065, | |
| "rewards/rejected": -0.484830379486084, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.7631785995279308, | |
| "grad_norm": 109.26583099365234, | |
| "learning_rate": 3.3537525480298314e-06, | |
| "logits/chosen": -1.8256486654281616, | |
| "logits/rejected": -1.8702704906463623, | |
| "logps/chosen": -5.2911200523376465, | |
| "logps/rejected": -6.402520656585693, | |
| "loss": 21.1085, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.41231974959373474, | |
| "rewards/margins": 0.08990296721458435, | |
| "rewards/rejected": -0.5022227168083191, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.7671125098347757, | |
| "grad_norm": 199.02462768554688, | |
| "learning_rate": 3.251700674681042e-06, | |
| "logits/chosen": -1.8512033224105835, | |
| "logits/rejected": -1.9163382053375244, | |
| "logps/chosen": -4.809088230133057, | |
| "logps/rejected": -6.037604331970215, | |
| "loss": 22.0358, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3932774066925049, | |
| "rewards/margins": 0.07615714520215988, | |
| "rewards/rejected": -0.46943455934524536, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.7710464201416207, | |
| "grad_norm": 95.2743911743164, | |
| "learning_rate": 3.1509232865300886e-06, | |
| "logits/chosen": -1.8119735717773438, | |
| "logits/rejected": -1.8144118785858154, | |
| "logps/chosen": -4.221276760101318, | |
| "logps/rejected": -5.142813205718994, | |
| "loss": 20.943, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.3364713788032532, | |
| "rewards/margins": 0.07149138301610947, | |
| "rewards/rejected": -0.40796273946762085, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.7749803304484658, | |
| "grad_norm": 123.05020904541016, | |
| "learning_rate": 3.051439416415216e-06, | |
| "logits/chosen": -1.8963220119476318, | |
| "logits/rejected": -1.8464494943618774, | |
| "logps/chosen": -4.875748634338379, | |
| "logps/rejected": -6.1705427169799805, | |
| "loss": 19.8297, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.37498602271080017, | |
| "rewards/margins": 0.09870316833257675, | |
| "rewards/rejected": -0.47368916869163513, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.7789142407553108, | |
| "grad_norm": 89.97714233398438, | |
| "learning_rate": 2.95326785288058e-06, | |
| "logits/chosen": -1.7886253595352173, | |
| "logits/rejected": -1.9196971654891968, | |
| "logps/chosen": -5.477912902832031, | |
| "logps/rejected": -5.896014213562012, | |
| "loss": 24.1286, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.41751259565353394, | |
| "rewards/margins": 0.04653157666325569, | |
| "rewards/rejected": -0.4640441834926605, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.7828481510621558, | |
| "grad_norm": 81.39910125732422, | |
| "learning_rate": 2.856427136627854e-06, | |
| "logits/chosen": -1.6037006378173828, | |
| "logits/rejected": -1.8636585474014282, | |
| "logps/chosen": -4.456266403198242, | |
| "logps/rejected": -6.3112382888793945, | |
| "loss": 17.7645, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -0.36230579018592834, | |
| "rewards/margins": 0.1193535327911377, | |
| "rewards/rejected": -0.48165932297706604, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.7867820613690008, | |
| "grad_norm": 113.1491928100586, | |
| "learning_rate": 2.7609355570146166e-06, | |
| "logits/chosen": -1.6726270914077759, | |
| "logits/rejected": -1.876440405845642, | |
| "logps/chosen": -4.736680030822754, | |
| "logps/rejected": -6.039677143096924, | |
| "loss": 20.4253, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3803684711456299, | |
| "rewards/margins": 0.10021813958883286, | |
| "rewards/rejected": -0.48058661818504333, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7867820613690008, | |
| "eval_logits/chosen": -1.784537672996521, | |
| "eval_logits/rejected": -1.951536774635315, | |
| "eval_logps/chosen": -5.034027099609375, | |
| "eval_logps/rejected": -6.096713066101074, | |
| "eval_loss": 21.52120590209961, | |
| "eval_rewards/accuracies": 0.676562488079071, | |
| "eval_rewards/chosen": -0.40088263154029846, | |
| "eval_rewards/margins": 0.08424156904220581, | |
| "eval_rewards/rejected": -0.4851241707801819, | |
| "eval_runtime": 247.1498, | |
| "eval_samples_per_second": 2.59, | |
| "eval_steps_per_second": 0.162, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7907159716758458, | |
| "grad_norm": 110.6238021850586, | |
| "learning_rate": 2.6668111486002247e-06, | |
| "logits/chosen": -1.8057550191879272, | |
| "logits/rejected": -1.8280699253082275, | |
| "logps/chosen": -5.131612777709961, | |
| "logps/rejected": -5.8884477615356445, | |
| "loss": 23.5826, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.41880735754966736, | |
| "rewards/margins": 0.061731867492198944, | |
| "rewards/rejected": -0.4805392324924469, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.7946498819826908, | |
| "grad_norm": 105.42929077148438, | |
| "learning_rate": 2.574071687739801e-06, | |
| "logits/chosen": -1.8108255863189697, | |
| "logits/rejected": -1.903489112854004, | |
| "logps/chosen": -5.22059440612793, | |
| "logps/rejected": -6.197423458099365, | |
| "loss": 20.6822, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.432489812374115, | |
| "rewards/margins": 0.06953287124633789, | |
| "rewards/rejected": -0.5020226836204529, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7985837922895358, | |
| "grad_norm": 106.72551727294922, | |
| "learning_rate": 2.48273468922699e-06, | |
| "logits/chosen": -1.8216731548309326, | |
| "logits/rejected": -1.9628276824951172, | |
| "logps/chosen": -5.797194480895996, | |
| "logps/rejected": -7.790940761566162, | |
| "loss": 17.9958, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.4596679210662842, | |
| "rewards/margins": 0.12584641575813293, | |
| "rewards/rejected": -0.5855143666267395, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.8025177025963808, | |
| "grad_norm": 107.29378509521484, | |
| "learning_rate": 2.3928174029861005e-06, | |
| "logits/chosen": -1.830509901046753, | |
| "logits/rejected": -2.0430874824523926, | |
| "logps/chosen": -5.2167768478393555, | |
| "logps/rejected": -6.807923793792725, | |
| "loss": 18.4616, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.43341606855392456, | |
| "rewards/margins": 0.11867929995059967, | |
| "rewards/rejected": -0.552095353603363, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 103.05904388427734, | |
| "learning_rate": 2.304336810814305e-06, | |
| "logits/chosen": -1.8188985586166382, | |
| "logits/rejected": -1.888267159461975, | |
| "logps/chosen": -5.591950416564941, | |
| "logps/rejected": -6.8574419021606445, | |
| "loss": 19.7411, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.4638952612876892, | |
| "rewards/margins": 0.09795578569173813, | |
| "rewards/rejected": -0.5618510842323303, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.8103855232100708, | |
| "grad_norm": 116.0064926147461, | |
| "learning_rate": 2.2173096231744217e-06, | |
| "logits/chosen": -1.7337671518325806, | |
| "logits/rejected": -1.889411211013794, | |
| "logps/chosen": -5.262411594390869, | |
| "logps/rejected": -6.726646423339844, | |
| "loss": 19.8161, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.4401858448982239, | |
| "rewards/margins": 0.11178751289844513, | |
| "rewards/rejected": -0.5519734025001526, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.8143194335169158, | |
| "grad_norm": 128.4368896484375, | |
| "learning_rate": 2.131752276039002e-06, | |
| "logits/chosen": -1.7105789184570312, | |
| "logits/rejected": -1.809747338294983, | |
| "logps/chosen": -4.921965599060059, | |
| "logps/rejected": -6.313961982727051, | |
| "loss": 21.3099, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.4140931963920593, | |
| "rewards/margins": 0.08928213268518448, | |
| "rewards/rejected": -0.5033753514289856, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.8182533438237608, | |
| "grad_norm": 209.68450927734375, | |
| "learning_rate": 2.0476809277862154e-06, | |
| "logits/chosen": -1.803832769393921, | |
| "logits/rejected": -1.8204796314239502, | |
| "logps/chosen": -5.523232460021973, | |
| "logps/rejected": -6.527283668518066, | |
| "loss": 22.8854, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.46234196424484253, | |
| "rewards/margins": 0.08235760778188705, | |
| "rewards/rejected": -0.5446996688842773, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.8221872541306058, | |
| "grad_norm": 119.3587417602539, | |
| "learning_rate": 1.9651114561481733e-06, | |
| "logits/chosen": -1.7038593292236328, | |
| "logits/rejected": -1.8346683979034424, | |
| "logps/chosen": -6.007963180541992, | |
| "logps/rejected": -6.655022621154785, | |
| "loss": 25.867, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.4634365141391754, | |
| "rewards/margins": 0.06129822880029678, | |
| "rewards/rejected": -0.5247347354888916, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.8261211644374509, | |
| "grad_norm": 79.99335479736328, | |
| "learning_rate": 1.884059455212266e-06, | |
| "logits/chosen": -1.7199535369873047, | |
| "logits/rejected": -1.8485997915267944, | |
| "logps/chosen": -5.268481254577637, | |
| "logps/rejected": -6.340824604034424, | |
| "loss": 22.6915, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.4204502999782562, | |
| "rewards/margins": 0.08819383382797241, | |
| "rewards/rejected": -0.5086441040039062, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8300550747442959, | |
| "grad_norm": 130.1524200439453, | |
| "learning_rate": 1.8045402324760519e-06, | |
| "logits/chosen": -1.7958511114120483, | |
| "logits/rejected": -1.8302634954452515, | |
| "logps/chosen": -5.048253059387207, | |
| "logps/rejected": -5.484943389892578, | |
| "loss": 25.9034, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.4145466685295105, | |
| "rewards/margins": 0.03546775132417679, | |
| "rewards/rejected": -0.4500144422054291, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.8339889850511408, | |
| "grad_norm": 127.26850128173828, | |
| "learning_rate": 1.7265688059562802e-06, | |
| "logits/chosen": -1.7270920276641846, | |
| "logits/rejected": -1.8327009677886963, | |
| "logps/chosen": -4.603668212890625, | |
| "logps/rejected": -5.5318803787231445, | |
| "loss": 22.0355, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.37435227632522583, | |
| "rewards/margins": 0.06871159374713898, | |
| "rewards/rejected": -0.4430638253688812, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.8379228953579858, | |
| "grad_norm": 218.64767456054688, | |
| "learning_rate": 1.6501599013525948e-06, | |
| "logits/chosen": -1.7791879177093506, | |
| "logits/rejected": -1.9213966131210327, | |
| "logps/chosen": -5.5008544921875, | |
| "logps/rejected": -6.275876522064209, | |
| "loss": 23.6332, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.4242435395717621, | |
| "rewards/margins": 0.06345196068286896, | |
| "rewards/rejected": -0.48769545555114746, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.8418568056648308, | |
| "grad_norm": 116.54981231689453, | |
| "learning_rate": 1.5753279492664264e-06, | |
| "logits/chosen": -1.5851417779922485, | |
| "logits/rejected": -1.6315984725952148, | |
| "logps/chosen": -5.29194450378418, | |
| "logps/rejected": -6.206376075744629, | |
| "loss": 21.8335, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.4099368155002594, | |
| "rewards/margins": 0.08156135678291321, | |
| "rewards/rejected": -0.4914981424808502, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.8457907159716759, | |
| "grad_norm": 119.92091369628906, | |
| "learning_rate": 1.5020870824756295e-06, | |
| "logits/chosen": -1.7545772790908813, | |
| "logits/rejected": -1.8091652393341064, | |
| "logps/chosen": -5.3978071212768555, | |
| "logps/rejected": -6.283621311187744, | |
| "loss": 24.1831, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.42009788751602173, | |
| "rewards/margins": 0.04570513963699341, | |
| "rewards/rejected": -0.46580299735069275, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.8497246262785209, | |
| "grad_norm": 86.9328842163086, | |
| "learning_rate": 1.4304511332653614e-06, | |
| "logits/chosen": -1.7142693996429443, | |
| "logits/rejected": -1.7047803401947021, | |
| "logps/chosen": -4.968667507171631, | |
| "logps/rejected": -5.843184471130371, | |
| "loss": 22.647, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.40169453620910645, | |
| "rewards/margins": 0.06825835257768631, | |
| "rewards/rejected": -0.46995291113853455, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.8536585365853658, | |
| "grad_norm": 112.41460418701172, | |
| "learning_rate": 1.3604336308157172e-06, | |
| "logits/chosen": -1.7628469467163086, | |
| "logits/rejected": -1.8740917444229126, | |
| "logps/chosen": -5.511053562164307, | |
| "logps/rejected": -6.322039604187012, | |
| "loss": 23.8528, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.435097873210907, | |
| "rewards/margins": 0.0529196560382843, | |
| "rewards/rejected": -0.48801755905151367, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.8575924468922108, | |
| "grad_norm": 113.66337585449219, | |
| "learning_rate": 1.292047798646605e-06, | |
| "logits/chosen": -1.7777509689331055, | |
| "logits/rejected": -1.9056212902069092, | |
| "logps/chosen": -5.063532829284668, | |
| "logps/rejected": -6.671146392822266, | |
| "loss": 18.4221, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.41087451577186584, | |
| "rewards/margins": 0.11442618072032928, | |
| "rewards/rejected": -0.5253006815910339, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.8615263571990559, | |
| "grad_norm": 109.1603775024414, | |
| "learning_rate": 1.2253065521203422e-06, | |
| "logits/chosen": -1.7811886072158813, | |
| "logits/rejected": -1.8713340759277344, | |
| "logps/chosen": -5.659114360809326, | |
| "logps/rejected": -6.662581443786621, | |
| "loss": 21.13, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.4250081479549408, | |
| "rewards/margins": 0.07375886291265488, | |
| "rewards/rejected": -0.4987669885158539, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.8654602675059009, | |
| "grad_norm": 155.84959411621094, | |
| "learning_rate": 1.1602224960024689e-06, | |
| "logits/chosen": -1.821662187576294, | |
| "logits/rejected": -1.9437990188598633, | |
| "logps/chosen": -4.78905725479126, | |
| "logps/rejected": -6.0805206298828125, | |
| "loss": 22.2144, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.3794526159763336, | |
| "rewards/margins": 0.0814034566283226, | |
| "rewards/rejected": -0.46085605025291443, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8654602675059009, | |
| "eval_logits/chosen": -1.8295215368270874, | |
| "eval_logits/rejected": -2.0107364654541016, | |
| "eval_logps/chosen": -4.755988121032715, | |
| "eval_logps/rejected": -5.804187774658203, | |
| "eval_loss": 21.543960571289062, | |
| "eval_rewards/accuracies": 0.671875, | |
| "eval_rewards/chosen": -0.37307873368263245, | |
| "eval_rewards/margins": 0.08279295265674591, | |
| "eval_rewards/rejected": -0.45587167143821716, | |
| "eval_runtime": 247.0834, | |
| "eval_samples_per_second": 2.59, | |
| "eval_steps_per_second": 0.162, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8693941778127459, | |
| "grad_norm": 110.79974365234375, | |
| "learning_rate": 1.0968079220812056e-06, | |
| "logits/chosen": -1.742896318435669, | |
| "logits/rejected": -1.8495962619781494, | |
| "logps/chosen": -5.089158535003662, | |
| "logps/rejected": -5.888894081115723, | |
| "loss": 23.3241, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.4033307135105133, | |
| "rewards/margins": 0.059556812047958374, | |
| "rewards/rejected": -0.4628875255584717, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.8733280881195908, | |
| "grad_norm": 145.94041442871094, | |
| "learning_rate": 1.035074806846016e-06, | |
| "logits/chosen": -1.7783820629119873, | |
| "logits/rejected": -1.8108813762664795, | |
| "logps/chosen": -4.913872718811035, | |
| "logps/rejected": -5.677584171295166, | |
| "loss": 22.9987, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.39593344926834106, | |
| "rewards/margins": 0.060481537133455276, | |
| "rewards/rejected": -0.45641499757766724, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.8772619984264359, | |
| "grad_norm": 88.40230560302734, | |
| "learning_rate": 9.750348092257368e-07, | |
| "logits/chosen": -1.7553093433380127, | |
| "logits/rejected": -1.8539350032806396, | |
| "logps/chosen": -4.951904296875, | |
| "logps/rejected": -5.487706661224365, | |
| "loss": 23.8982, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.38161516189575195, | |
| "rewards/margins": 0.06274580210447311, | |
| "rewards/rejected": -0.44436097145080566, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.8811959087332809, | |
| "grad_norm": 170.2921600341797, | |
| "learning_rate": 9.166992683866604e-07, | |
| "logits/chosen": -1.7590631246566772, | |
| "logits/rejected": -1.7259708642959595, | |
| "logps/chosen": -4.942062854766846, | |
| "logps/rejected": -5.8028435707092285, | |
| "loss": 25.37, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.39719924330711365, | |
| "rewards/margins": 0.04227542132139206, | |
| "rewards/rejected": -0.4394746720790863, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.8851298190401259, | |
| "grad_norm": 93.46953582763672, | |
| "learning_rate": 8.600792015910209e-07, | |
| "logits/chosen": -1.576556921005249, | |
| "logits/rejected": -1.7663720846176147, | |
| "logps/chosen": -4.863258361816406, | |
| "logps/rejected": -6.207003593444824, | |
| "loss": 20.464, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3765638470649719, | |
| "rewards/margins": 0.1050010547041893, | |
| "rewards/rejected": -0.4815649092197418, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.8890637293469709, | |
| "grad_norm": 304.78228759765625, | |
| "learning_rate": 8.051853021162714e-07, | |
| "logits/chosen": -1.7251790761947632, | |
| "logits/rejected": -1.73681640625, | |
| "logps/chosen": -4.827609062194824, | |
| "logps/rejected": -5.7671942710876465, | |
| "loss": 22.8032, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.37261947989463806, | |
| "rewards/margins": 0.05082275718450546, | |
| "rewards/rejected": -0.4234422743320465, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.8929976396538158, | |
| "grad_norm": 127.6941909790039, | |
| "learning_rate": 7.520279372355543e-07, | |
| "logits/chosen": -1.8332078456878662, | |
| "logits/rejected": -1.9421097040176392, | |
| "logps/chosen": -4.690258026123047, | |
| "logps/rejected": -5.638973236083984, | |
| "loss": 22.6266, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.36157697439193726, | |
| "rewards/margins": 0.06764058768749237, | |
| "rewards/rejected": -0.4292175769805908, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.8969315499606609, | |
| "grad_norm": 114.48271179199219, | |
| "learning_rate": 7.107591077238474e-07, | |
| "logits/chosen": -1.7219377756118774, | |
| "logits/rejected": -1.7783973217010498, | |
| "logps/chosen": -4.36461877822876, | |
| "logps/rejected": -5.683481693267822, | |
| "loss": 19.6075, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.34781816601753235, | |
| "rewards/margins": 0.10409276187419891, | |
| "rewards/rejected": -0.45191091299057007, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.9008654602675059, | |
| "grad_norm": 93.94386291503906, | |
| "learning_rate": 6.607525825730854e-07, | |
| "logits/chosen": -1.7087438106536865, | |
| "logits/rejected": -1.7997496128082275, | |
| "logps/chosen": -4.1009931564331055, | |
| "logps/rejected": -5.165204048156738, | |
| "loss": 20.4201, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.33012428879737854, | |
| "rewards/margins": 0.08290891349315643, | |
| "rewards/rejected": -0.4130331575870514, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.9047993705743509, | |
| "grad_norm": 72.78524780273438, | |
| "learning_rate": 6.125098696094589e-07, | |
| "logits/chosen": -1.664223313331604, | |
| "logits/rejected": -1.7873681783676147, | |
| "logps/chosen": -4.817714214324951, | |
| "logps/rejected": -5.75089168548584, | |
| "loss": 22.5165, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.38428038358688354, | |
| "rewards/margins": 0.07553905248641968, | |
| "rewards/rejected": -0.45981940627098083, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9087332808811959, | |
| "grad_norm": 153.98193359375, | |
| "learning_rate": 5.660400799616572e-07, | |
| "logits/chosen": -1.721644639968872, | |
| "logits/rejected": -1.6672241687774658, | |
| "logps/chosen": -4.418652534484863, | |
| "logps/rejected": -4.775282859802246, | |
| "loss": 26.2547, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.3543107211589813, | |
| "rewards/margins": 0.012788213789463043, | |
| "rewards/rejected": -0.36709895730018616, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.912667191188041, | |
| "grad_norm": 163.8769073486328, | |
| "learning_rate": 5.213519899237018e-07, | |
| "logits/chosen": -1.7301452159881592, | |
| "logits/rejected": -1.772040605545044, | |
| "logps/chosen": -4.367914199829102, | |
| "logps/rejected": -5.680964469909668, | |
| "loss": 22.0063, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.34692102670669556, | |
| "rewards/margins": 0.07109556347131729, | |
| "rewards/rejected": -0.41801658272743225, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.9166011014948859, | |
| "grad_norm": 102.21440124511719, | |
| "learning_rate": 4.784540392974668e-07, | |
| "logits/chosen": -1.6948988437652588, | |
| "logits/rejected": -1.7838863134384155, | |
| "logps/chosen": -4.535280704498291, | |
| "logps/rejected": -5.534449577331543, | |
| "loss": 20.1151, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.33810168504714966, | |
| "rewards/margins": 0.0977487862110138, | |
| "rewards/rejected": -0.43585047125816345, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.9205350118017309, | |
| "grad_norm": 85.85963439941406, | |
| "learning_rate": 4.3735432979872593e-07, | |
| "logits/chosen": -1.635202169418335, | |
| "logits/rejected": -1.7734687328338623, | |
| "logps/chosen": -3.72965669631958, | |
| "logps/rejected": -4.853572368621826, | |
| "loss": 20.3521, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.29524677991867065, | |
| "rewards/margins": 0.09069748222827911, | |
| "rewards/rejected": -0.38594430685043335, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.9244689221085759, | |
| "grad_norm": 82.03185272216797, | |
| "learning_rate": 3.980606235270623e-07, | |
| "logits/chosen": -1.671134352684021, | |
| "logits/rejected": -1.7984254360198975, | |
| "logps/chosen": -4.129214763641357, | |
| "logps/rejected": -5.1452956199646, | |
| "loss": 19.8155, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.32818371057510376, | |
| "rewards/margins": 0.08804444223642349, | |
| "rewards/rejected": -0.41622814536094666, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.9284028324154209, | |
| "grad_norm": 222.82241821289062, | |
| "learning_rate": 3.6058034149992206e-07, | |
| "logits/chosen": -1.7147480249404907, | |
| "logits/rejected": -1.7917505502700806, | |
| "logps/chosen": -4.810155868530273, | |
| "logps/rejected": -5.444459438323975, | |
| "loss": 22.2583, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3714321553707123, | |
| "rewards/margins": 0.06614001095294952, | |
| "rewards/rejected": -0.4375721514225006, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.932336742722266, | |
| "grad_norm": 76.60244750976562, | |
| "learning_rate": 3.2492056225107205e-07, | |
| "logits/chosen": -1.7337175607681274, | |
| "logits/rejected": -1.9063804149627686, | |
| "logps/chosen": -4.483534812927246, | |
| "logps/rejected": -5.9358086585998535, | |
| "loss": 17.4638, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.3575553297996521, | |
| "rewards/margins": 0.11146707832813263, | |
| "rewards/rejected": -0.4690224528312683, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.9362706530291109, | |
| "grad_norm": 89.62410736083984, | |
| "learning_rate": 2.910880204937527e-07, | |
| "logits/chosen": -1.6736266613006592, | |
| "logits/rejected": -1.7633039951324463, | |
| "logps/chosen": -4.219028949737549, | |
| "logps/rejected": -5.474398136138916, | |
| "loss": 21.6673, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.33258548378944397, | |
| "rewards/margins": 0.08430587500333786, | |
| "rewards/rejected": -0.4168913960456848, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.9402045633359559, | |
| "grad_norm": 100.01248931884766, | |
| "learning_rate": 2.5908910584875545e-07, | |
| "logits/chosen": -1.735568642616272, | |
| "logits/rejected": -1.7764047384262085, | |
| "logps/chosen": -5.126141548156738, | |
| "logps/rejected": -6.031923294067383, | |
| "loss": 21.8897, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.38696879148483276, | |
| "rewards/margins": 0.07426150143146515, | |
| "rewards/rejected": -0.4612302780151367, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.9441384736428009, | |
| "grad_norm": 91.11402893066406, | |
| "learning_rate": 2.2892986163767782e-07, | |
| "logits/chosen": -1.7912143468856812, | |
| "logits/rejected": -1.8166290521621704, | |
| "logps/chosen": -4.271235466003418, | |
| "logps/rejected": -5.315844535827637, | |
| "loss": 21.1156, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3341251313686371, | |
| "rewards/margins": 0.08204144984483719, | |
| "rewards/rejected": -0.41616660356521606, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9441384736428009, | |
| "eval_logits/chosen": -1.7999874353408813, | |
| "eval_logits/rejected": -1.9898983240127563, | |
| "eval_logps/chosen": -4.466375350952148, | |
| "eval_logps/rejected": -5.501485347747803, | |
| "eval_loss": 21.576175689697266, | |
| "eval_rewards/accuracies": 0.671875, | |
| "eval_rewards/chosen": -0.3441174328327179, | |
| "eval_rewards/margins": 0.08148395270109177, | |
| "eval_rewards/rejected": -0.42560139298439026, | |
| "eval_runtime": 247.2098, | |
| "eval_samples_per_second": 2.589, | |
| "eval_steps_per_second": 0.162, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.948072383949646, | |
| "grad_norm": 155.07894897460938, | |
| "learning_rate": 2.006159837415822e-07, | |
| "logits/chosen": -1.6891205310821533, | |
| "logits/rejected": -1.793623685836792, | |
| "logps/chosen": -4.2115583419799805, | |
| "logps/rejected": -4.851222038269043, | |
| "loss": 23.1406, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.3286646902561188, | |
| "rewards/margins": 0.060015954077243805, | |
| "rewards/rejected": -0.388680636882782, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.952006294256491, | |
| "grad_norm": 95.4771499633789, | |
| "learning_rate": 1.741528195252673e-07, | |
| "logits/chosen": -1.603528380393982, | |
| "logits/rejected": -1.6641511917114258, | |
| "logps/chosen": -4.216994285583496, | |
| "logps/rejected": -5.5280866622924805, | |
| "loss": 21.9906, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.3291688561439514, | |
| "rewards/margins": 0.07073946297168732, | |
| "rewards/rejected": -0.39990830421447754, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.955940204563336, | |
| "grad_norm": 86.5512466430664, | |
| "learning_rate": 1.495453668273672e-07, | |
| "logits/chosen": -1.6800447702407837, | |
| "logits/rejected": -1.7727559804916382, | |
| "logps/chosen": -3.5090785026550293, | |
| "logps/rejected": -4.510993957519531, | |
| "loss": 20.7094, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.2760564386844635, | |
| "rewards/margins": 0.08404560387134552, | |
| "rewards/rejected": -0.3601020276546478, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.9598741148701809, | |
| "grad_norm": 141.68653869628906, | |
| "learning_rate": 1.267982730164574e-07, | |
| "logits/chosen": -1.693539023399353, | |
| "logits/rejected": -1.7461254596710205, | |
| "logps/chosen": -4.826737403869629, | |
| "logps/rejected": -6.116568565368652, | |
| "loss": 20.7372, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.3675940930843353, | |
| "rewards/margins": 0.09557177871465683, | |
| "rewards/rejected": -0.46316590905189514, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.963808025177026, | |
| "grad_norm": 114.91510772705078, | |
| "learning_rate": 1.059158341133526e-07, | |
| "logits/chosen": -1.6573117971420288, | |
| "logits/rejected": -1.8423898220062256, | |
| "logps/chosen": -4.616578578948975, | |
| "logps/rejected": -5.3418073654174805, | |
| "loss": 22.5625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.35180655121803284, | |
| "rewards/margins": 0.059971295297145844, | |
| "rewards/rejected": -0.4117778241634369, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 165.18972778320312, | |
| "learning_rate": 8.69019939797544e-08, | |
| "logits/chosen": -1.7003284692764282, | |
| "logits/rejected": -1.8333613872528076, | |
| "logps/chosen": -4.7574567794799805, | |
| "logps/rejected": -5.909626483917236, | |
| "loss": 23.7927, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.3618602454662323, | |
| "rewards/margins": 0.08184085041284561, | |
| "rewards/rejected": -0.4437011182308197, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.971675845790716, | |
| "grad_norm": 83.45700073242188, | |
| "learning_rate": 6.976034357342066e-08, | |
| "logits/chosen": -1.7630846500396729, | |
| "logits/rejected": -1.90488600730896, | |
| "logps/chosen": -5.025973320007324, | |
| "logps/rejected": -6.5638556480407715, | |
| "loss": 20.0806, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3844282627105713, | |
| "rewards/margins": 0.10775335878133774, | |
| "rewards/rejected": -0.4921816289424896, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.975609756097561, | |
| "grad_norm": 83.81243133544922, | |
| "learning_rate": 5.449412026997114e-08, | |
| "logits/chosen": -1.678797721862793, | |
| "logits/rejected": -1.7921825647354126, | |
| "logps/chosen": -4.162950038909912, | |
| "logps/rejected": -5.131404399871826, | |
| "loss": 21.4719, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.3323225975036621, | |
| "rewards/margins": 0.07925871759653091, | |
| "rewards/rejected": -0.4115813374519348, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.9795436664044059, | |
| "grad_norm": 102.78945922851562, | |
| "learning_rate": 4.1106207251475626e-08, | |
| "logits/chosen": -1.5444132089614868, | |
| "logits/rejected": -1.7323096990585327, | |
| "logps/chosen": -4.179232597351074, | |
| "logps/rejected": -4.8622236251831055, | |
| "loss": 23.0477, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.3256162106990814, | |
| "rewards/margins": 0.05740431696176529, | |
| "rewards/rejected": -0.3830205798149109, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.983477576711251, | |
| "grad_norm": 108.89738464355469, | |
| "learning_rate": 2.9599132961940502e-08, | |
| "logits/chosen": -1.7586724758148193, | |
| "logits/rejected": -1.8370046615600586, | |
| "logps/chosen": -4.328100681304932, | |
| "logps/rejected": -5.9137749671936035, | |
| "loss": 18.9963, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.34320682287216187, | |
| "rewards/margins": 0.11217638105154037, | |
| "rewards/rejected": -0.4553832411766052, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.987411487018096, | |
| "grad_norm": 109.5070571899414, | |
| "learning_rate": 1.997507062978077e-08, | |
| "logits/chosen": -1.718653678894043, | |
| "logits/rejected": -1.8099029064178467, | |
| "logps/chosen": -5.313704490661621, | |
| "logps/rejected": -6.312743186950684, | |
| "loss": 23.4344, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.38815608620643616, | |
| "rewards/margins": 0.08191094547510147, | |
| "rewards/rejected": -0.47006702423095703, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.991345397324941, | |
| "grad_norm": 151.8542938232422, | |
| "learning_rate": 1.2235837857387246e-08, | |
| "logits/chosen": -1.7727177143096924, | |
| "logits/rejected": -1.7255008220672607, | |
| "logps/chosen": -4.564472198486328, | |
| "logps/rejected": -6.008821487426758, | |
| "loss": 22.8326, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.367508202791214, | |
| "rewards/margins": 0.0941065102815628, | |
| "rewards/rejected": -0.4616147577762604, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.995279307631786, | |
| "grad_norm": 94.18794250488281, | |
| "learning_rate": 6.382896277852269e-09, | |
| "logits/chosen": -1.7885596752166748, | |
| "logits/rejected": -1.7931379079818726, | |
| "logps/chosen": -4.7318620681762695, | |
| "logps/rejected": -6.309884071350098, | |
| "loss": 21.1497, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.37726715207099915, | |
| "rewards/margins": 0.10229357331991196, | |
| "rewards/rejected": -0.4795607030391693, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.999213217938631, | |
| "grad_norm": 247.1566619873047, | |
| "learning_rate": 2.417351278926061e-09, | |
| "logits/chosen": -1.7155132293701172, | |
| "logits/rejected": -1.8437669277191162, | |
| "logps/chosen": -4.636131763458252, | |
| "logps/rejected": -5.921980857849121, | |
| "loss": 20.0677, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.36834850907325745, | |
| "rewards/margins": 0.08465222269296646, | |
| "rewards/rejected": -0.4530007243156433, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1271, | |
| "total_flos": 0.0, | |
| "train_loss": 22.261434271427518, | |
| "train_runtime": 20998.5435, | |
| "train_samples_per_second": 0.968, | |
| "train_steps_per_second": 0.061 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1271, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |