| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9984301412872841, | |
| "eval_steps": 100, | |
| "global_step": 477, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0020931449502878076, | |
| "grad_norm": 4.875121866371553, | |
| "learning_rate": 4.166666666666666e-09, | |
| "logits/chosen": -2.238138437271118, | |
| "logits/rejected": -2.554456949234009, | |
| "logps/chosen": -443.7523193359375, | |
| "logps/rejected": -491.8927001953125, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.020931449502878074, | |
| "grad_norm": 5.553929970393955, | |
| "learning_rate": 4.166666666666667e-08, | |
| "logits/chosen": -2.4126930236816406, | |
| "logits/rejected": -2.5005030632019043, | |
| "logps/chosen": -418.43328857421875, | |
| "logps/rejected": -405.0360107421875, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.0017023859545588493, | |
| "rewards/margins": 0.00048581857117824256, | |
| "rewards/rejected": 0.0012165673542767763, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04186289900575615, | |
| "grad_norm": 4.513029874801273, | |
| "learning_rate": 8.333333333333334e-08, | |
| "logits/chosen": -2.208683490753174, | |
| "logits/rejected": -2.485910415649414, | |
| "logps/chosen": -428.45208740234375, | |
| "logps/rejected": -408.13763427734375, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": 0.0008482746779918671, | |
| "rewards/margins": -0.00037219192017801106, | |
| "rewards/rejected": 0.0012204666854813695, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06279434850863422, | |
| "grad_norm": 4.637552468831084, | |
| "learning_rate": 1.25e-07, | |
| "logits/chosen": -2.224863290786743, | |
| "logits/rejected": -2.4407901763916016, | |
| "logps/chosen": -398.6038818359375, | |
| "logps/rejected": -367.05999755859375, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.0041518621146678925, | |
| "rewards/margins": 0.0011339159682393074, | |
| "rewards/rejected": -0.005285778548568487, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0837257980115123, | |
| "grad_norm": 4.657136939144448, | |
| "learning_rate": 1.6666666666666668e-07, | |
| "logits/chosen": -2.3235936164855957, | |
| "logits/rejected": -2.4915928840637207, | |
| "logps/chosen": -372.97442626953125, | |
| "logps/rejected": -390.05841064453125, | |
| "loss": 0.6899, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.019573217257857323, | |
| "rewards/margins": 0.007190874312072992, | |
| "rewards/rejected": -0.026764091104269028, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10465724751439037, | |
| "grad_norm": 4.947790369246717, | |
| "learning_rate": 1.9998927475076103e-07, | |
| "logits/chosen": -2.1541531085968018, | |
| "logits/rejected": -2.355862855911255, | |
| "logps/chosen": -408.7329406738281, | |
| "logps/rejected": -406.50347900390625, | |
| "loss": 0.6855, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.04146841913461685, | |
| "rewards/margins": 0.02013658545911312, | |
| "rewards/rejected": -0.061604999005794525, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12558869701726844, | |
| "grad_norm": 6.135445605235113, | |
| "learning_rate": 1.9961413253717213e-07, | |
| "logits/chosen": -2.120229482650757, | |
| "logits/rejected": -2.287370204925537, | |
| "logps/chosen": -376.740234375, | |
| "logps/rejected": -386.8778381347656, | |
| "loss": 0.678, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.08536554872989655, | |
| "rewards/margins": 0.03690432757139206, | |
| "rewards/rejected": -0.12226986885070801, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.14652014652014653, | |
| "grad_norm": 5.2300665585071835, | |
| "learning_rate": 1.9870502626379125e-07, | |
| "logits/chosen": -2.208547830581665, | |
| "logits/rejected": -2.316659927368164, | |
| "logps/chosen": -425.2916564941406, | |
| "logps/rejected": -429.31463623046875, | |
| "loss": 0.6673, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.14128030836582184, | |
| "rewards/margins": 0.05471445247530937, | |
| "rewards/rejected": -0.1959947645664215, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1674515960230246, | |
| "grad_norm": 6.361729619349137, | |
| "learning_rate": 1.9726682903510838e-07, | |
| "logits/chosen": -1.8886642456054688, | |
| "logits/rejected": -2.2390127182006836, | |
| "logps/chosen": -470.6441955566406, | |
| "logps/rejected": -419.4126892089844, | |
| "loss": 0.6583, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.2689892053604126, | |
| "rewards/margins": 0.07578183710575104, | |
| "rewards/rejected": -0.34477105736732483, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18838304552590268, | |
| "grad_norm": 7.250967252041406, | |
| "learning_rate": 1.9530725005474194e-07, | |
| "logits/chosen": -2.3355867862701416, | |
| "logits/rejected": -2.404792070388794, | |
| "logps/chosen": -411.76806640625, | |
| "logps/rejected": -441.7333068847656, | |
| "loss": 0.6355, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.42172950506210327, | |
| "rewards/margins": 0.12971071898937225, | |
| "rewards/rejected": -0.5514402985572815, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.20931449502878074, | |
| "grad_norm": 7.1454110672964335, | |
| "learning_rate": 1.9283679330160724e-07, | |
| "logits/chosen": -2.2639448642730713, | |
| "logits/rejected": -2.5537800788879395, | |
| "logps/chosen": -477.0587463378906, | |
| "logps/rejected": -489.705810546875, | |
| "loss": 0.6351, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.6349204778671265, | |
| "rewards/margins": 0.18245458602905273, | |
| "rewards/rejected": -0.8173751831054688, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.20931449502878074, | |
| "eval_logits/chosen": -2.2922377586364746, | |
| "eval_logits/rejected": -2.4565351009368896, | |
| "eval_logps/chosen": -472.2982177734375, | |
| "eval_logps/rejected": -487.7696533203125, | |
| "eval_loss": 0.6359348893165588, | |
| "eval_rewards/accuracies": 0.6746031641960144, | |
| "eval_rewards/chosen": -0.675361156463623, | |
| "eval_rewards/margins": 0.2425757199525833, | |
| "eval_rewards/rejected": -0.9179368615150452, | |
| "eval_runtime": 88.9262, | |
| "eval_samples_per_second": 22.491, | |
| "eval_steps_per_second": 0.708, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2302459445316588, | |
| "grad_norm": 9.360622478279684, | |
| "learning_rate": 1.898687012251826e-07, | |
| "logits/chosen": -2.217447280883789, | |
| "logits/rejected": -2.3863320350646973, | |
| "logps/chosen": -481.96990966796875, | |
| "logps/rejected": -499.48345947265625, | |
| "loss": 0.6311, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.7452836036682129, | |
| "rewards/margins": 0.209157794713974, | |
| "rewards/rejected": -0.9544414281845093, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.25117739403453687, | |
| "grad_norm": 7.953755036427896, | |
| "learning_rate": 1.8641888376168482e-07, | |
| "logits/chosen": -2.2092318534851074, | |
| "logits/rejected": -2.2929816246032715, | |
| "logps/chosen": -454.405517578125, | |
| "logps/rejected": -497.1351623535156, | |
| "loss": 0.6209, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.7448408007621765, | |
| "rewards/margins": 0.29463425278663635, | |
| "rewards/rejected": -1.0394752025604248, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.272108843537415, | |
| "grad_norm": 8.821105331401093, | |
| "learning_rate": 1.8250583305165094e-07, | |
| "logits/chosen": -2.2061495780944824, | |
| "logits/rejected": -2.3711869716644287, | |
| "logps/chosen": -472.7056579589844, | |
| "logps/rejected": -487.33880615234375, | |
| "loss": 0.6204, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -0.7287603616714478, | |
| "rewards/margins": 0.2083979845046997, | |
| "rewards/rejected": -0.9371584057807922, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.29304029304029305, | |
| "grad_norm": 9.167325969849378, | |
| "learning_rate": 1.78150524316067e-07, | |
| "logits/chosen": -2.2468433380126953, | |
| "logits/rejected": -2.466036319732666, | |
| "logps/chosen": -501.697021484375, | |
| "logps/rejected": -497.5772399902344, | |
| "loss": 0.6195, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.7497612237930298, | |
| "rewards/margins": 0.30543631315231323, | |
| "rewards/rejected": -1.0551974773406982, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3139717425431711, | |
| "grad_norm": 10.828055616866019, | |
| "learning_rate": 1.7337630342238038e-07, | |
| "logits/chosen": -2.163837432861328, | |
| "logits/rejected": -2.328864574432373, | |
| "logps/chosen": -474.3462829589844, | |
| "logps/rejected": -480.0904846191406, | |
| "loss": 0.621, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.7979869246482849, | |
| "rewards/margins": 0.22926858067512512, | |
| "rewards/rejected": -1.0272555351257324, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3349031920460492, | |
| "grad_norm": 9.907119624068729, | |
| "learning_rate": 1.682087617430782e-07, | |
| "logits/chosen": -2.1256282329559326, | |
| "logits/rejected": -2.4207208156585693, | |
| "logps/chosen": -476.00933837890625, | |
| "logps/rejected": -491.25799560546875, | |
| "loss": 0.6148, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.8471584320068359, | |
| "rewards/margins": 0.2906045913696289, | |
| "rewards/rejected": -1.1377630233764648, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.35583464154892724, | |
| "grad_norm": 10.130673374633192, | |
| "learning_rate": 1.6267559897763025e-07, | |
| "logits/chosen": -2.240748405456543, | |
| "logits/rejected": -2.3730461597442627, | |
| "logps/chosen": -466.5884704589844, | |
| "logps/rejected": -470.2240295410156, | |
| "loss": 0.6136, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.8948806524276733, | |
| "rewards/margins": 0.24292059242725372, | |
| "rewards/rejected": -1.137801170349121, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37676609105180536, | |
| "grad_norm": 12.664244024162585, | |
| "learning_rate": 1.5680647467311557e-07, | |
| "logits/chosen": -2.3886361122131348, | |
| "logits/rejected": -2.48551344871521, | |
| "logps/chosen": -466.68115234375, | |
| "logps/rejected": -481.260498046875, | |
| "loss": 0.589, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.8065212965011597, | |
| "rewards/margins": 0.28530603647232056, | |
| "rewards/rejected": -1.091827392578125, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3976975405546834, | |
| "grad_norm": 15.413041204374277, | |
| "learning_rate": 1.506328492394303e-07, | |
| "logits/chosen": -2.425926685333252, | |
| "logits/rejected": -2.436657190322876, | |
| "logps/chosen": -480.2686462402344, | |
| "logps/rejected": -514.1541137695312, | |
| "loss": 0.6247, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -1.0268957614898682, | |
| "rewards/margins": 0.26106053590774536, | |
| "rewards/rejected": -1.2879562377929688, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4186289900575615, | |
| "grad_norm": 16.30024056431674, | |
| "learning_rate": 1.4418781531128634e-07, | |
| "logits/chosen": -2.3286993503570557, | |
| "logits/rejected": -2.387202024459839, | |
| "logps/chosen": -454.547119140625, | |
| "logps/rejected": -511.773681640625, | |
| "loss": 0.6101, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": -0.8713696599006653, | |
| "rewards/margins": 0.2568342685699463, | |
| "rewards/rejected": -1.1282037496566772, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4186289900575615, | |
| "eval_logits/chosen": -2.293304443359375, | |
| "eval_logits/rejected": -2.447746753692627, | |
| "eval_logps/chosen": -484.72442626953125, | |
| "eval_logps/rejected": -515.6393432617188, | |
| "eval_loss": 0.5989560484886169, | |
| "eval_rewards/accuracies": 0.7142857313156128, | |
| "eval_rewards/chosen": -0.7996230125427246, | |
| "eval_rewards/margins": 0.39701077342033386, | |
| "eval_rewards/rejected": -1.1966338157653809, | |
| "eval_runtime": 88.7991, | |
| "eval_samples_per_second": 22.523, | |
| "eval_steps_per_second": 0.709, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43956043956043955, | |
| "grad_norm": 12.590959189684769, | |
| "learning_rate": 1.375059203609562e-07, | |
| "logits/chosen": -2.251105785369873, | |
| "logits/rejected": -2.49545955657959, | |
| "logps/chosen": -514.7989501953125, | |
| "logps/rejected": -508.8777770996094, | |
| "loss": 0.6036, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.9383622407913208, | |
| "rewards/margins": 0.3089975416660309, | |
| "rewards/rejected": -1.2473597526550293, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4604918890633176, | |
| "grad_norm": 32.27211919256004, | |
| "learning_rate": 1.306229815126159e-07, | |
| "logits/chosen": -2.374002456665039, | |
| "logits/rejected": -2.5104002952575684, | |
| "logps/chosen": -453.17889404296875, | |
| "logps/rejected": -502.31829833984375, | |
| "loss": 0.5905, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -1.0016330480575562, | |
| "rewards/margins": 0.3531147539615631, | |
| "rewards/rejected": -1.3547478914260864, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.48142333856619574, | |
| "grad_norm": 11.074374701972996, | |
| "learning_rate": 1.2357589355094274e-07, | |
| "logits/chosen": -2.240893602371216, | |
| "logits/rejected": -2.4365756511688232, | |
| "logps/chosen": -464.9483947753906, | |
| "logps/rejected": -497.55950927734375, | |
| "loss": 0.6032, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.8673335909843445, | |
| "rewards/margins": 0.4288042187690735, | |
| "rewards/rejected": -1.2961379289627075, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5023547880690737, | |
| "grad_norm": 13.608161796310325, | |
| "learning_rate": 1.1640243115310217e-07, | |
| "logits/chosen": -2.263231039047241, | |
| "logits/rejected": -2.374429225921631, | |
| "logps/chosen": -483.5979919433594, | |
| "logps/rejected": -511.7247009277344, | |
| "loss": 0.5829, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.8133866190910339, | |
| "rewards/margins": 0.35704511404037476, | |
| "rewards/rejected": -1.1704318523406982, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5232862375719518, | |
| "grad_norm": 14.904992006409358, | |
| "learning_rate": 1.0914104640422679e-07, | |
| "logits/chosen": -2.312152862548828, | |
| "logits/rejected": -2.504575490951538, | |
| "logps/chosen": -487.4195861816406, | |
| "logps/rejected": -509.62213134765625, | |
| "loss": 0.5914, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.9289507865905762, | |
| "rewards/margins": 0.4651150703430176, | |
| "rewards/rejected": -1.3940656185150146, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.54421768707483, | |
| "grad_norm": 32.859126344847056, | |
| "learning_rate": 1.0183066268176774e-07, | |
| "logits/chosen": -2.452216863632202, | |
| "logits/rejected": -2.5787224769592285, | |
| "logps/chosen": -454.101318359375, | |
| "logps/rejected": -491.07708740234375, | |
| "loss": 0.5958, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -0.8231406211853027, | |
| "rewards/margins": 0.37211668491363525, | |
| "rewards/rejected": -1.1952574253082275, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.565149136577708, | |
| "grad_norm": 16.410575278967542, | |
| "learning_rate": 9.451046601356724e-08, | |
| "logits/chosen": -2.4211385250091553, | |
| "logits/rejected": -2.5718777179718018, | |
| "logps/chosen": -482.42889404296875, | |
| "logps/rejected": -517.08447265625, | |
| "loss": 0.5968, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -0.7412260174751282, | |
| "rewards/margins": 0.46059027314186096, | |
| "rewards/rejected": -1.201816439628601, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5860805860805861, | |
| "grad_norm": 14.64481409505789, | |
| "learning_rate": 8.721969502803953e-08, | |
| "logits/chosen": -2.414080858230591, | |
| "logits/rejected": -2.641306161880493, | |
| "logps/chosen": -471.8504943847656, | |
| "logps/rejected": -492.3824157714844, | |
| "loss": 0.6088, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.9498642086982727, | |
| "rewards/margins": 0.3709770143032074, | |
| "rewards/rejected": -1.3208411931991577, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6070120355834642, | |
| "grad_norm": 21.87484189841818, | |
| "learning_rate": 7.999743062239557e-08, | |
| "logits/chosen": -2.5216970443725586, | |
| "logits/rejected": -2.5266430377960205, | |
| "logps/chosen": -452.1351623535156, | |
| "logps/rejected": -507.50408935546875, | |
| "loss": 0.5975, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.9256707429885864, | |
| "rewards/margins": 0.38739797472953796, | |
| "rewards/rejected": -1.3130687475204468, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6279434850863422, | |
| "grad_norm": 13.23460942074812, | |
| "learning_rate": 7.28823864763583e-08, | |
| "logits/chosen": -2.3628604412078857, | |
| "logits/rejected": -2.5071964263916016, | |
| "logps/chosen": -530.2737426757812, | |
| "logps/rejected": -534.9356689453125, | |
| "loss": 0.5738, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.9033306241035461, | |
| "rewards/margins": 0.409872442483902, | |
| "rewards/rejected": -1.313202977180481, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6279434850863422, | |
| "eval_logits/chosen": -2.3505780696868896, | |
| "eval_logits/rejected": -2.500311851501465, | |
| "eval_logps/chosen": -511.9820861816406, | |
| "eval_logps/rejected": -562.04541015625, | |
| "eval_loss": 0.5819065570831299, | |
| "eval_rewards/accuracies": 0.7142857313156128, | |
| "eval_rewards/chosen": -1.0721999406814575, | |
| "eval_rewards/margins": 0.5884942412376404, | |
| "eval_rewards/rejected": -1.6606942415237427, | |
| "eval_runtime": 88.8035, | |
| "eval_samples_per_second": 22.522, | |
| "eval_steps_per_second": 0.709, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6488749345892203, | |
| "grad_norm": 23.240653261962176, | |
| "learning_rate": 6.591270153428288e-08, | |
| "logits/chosen": -2.3066353797912598, | |
| "logits/rejected": -2.4188685417175293, | |
| "logps/chosen": -530.1605224609375, | |
| "logps/rejected": -555.5882568359375, | |
| "loss": 0.5816, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.0851608514785767, | |
| "rewards/margins": 0.6294665932655334, | |
| "rewards/rejected": -1.7146275043487549, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6698063840920984, | |
| "grad_norm": 12.35925417664361, | |
| "learning_rate": 5.912573556804452e-08, | |
| "logits/chosen": -2.4511845111846924, | |
| "logits/rejected": -2.5960700511932373, | |
| "logps/chosen": -462.8910217285156, | |
| "logps/rejected": -492.77459716796875, | |
| "loss": 0.5721, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.9141901135444641, | |
| "rewards/margins": 0.49542441964149475, | |
| "rewards/rejected": -1.4096145629882812, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6907378335949764, | |
| "grad_norm": 19.635922794228048, | |
| "learning_rate": 5.255786891654399e-08, | |
| "logits/chosen": -2.2881722450256348, | |
| "logits/rejected": -2.3245983123779297, | |
| "logps/chosen": -490.61956787109375, | |
| "logps/rejected": -528.5936279296875, | |
| "loss": 0.5831, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.0118191242218018, | |
| "rewards/margins": 0.3562072217464447, | |
| "rewards/rejected": -1.3680263757705688, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7116692830978545, | |
| "grad_norm": 34.0341920873177, | |
| "learning_rate": 4.624430747529102e-08, | |
| "logits/chosen": -2.2541534900665283, | |
| "logits/rejected": -2.3677923679351807, | |
| "logps/chosen": -520.711181640625, | |
| "logps/rejected": -555.8665771484375, | |
| "loss": 0.5771, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1455854177474976, | |
| "rewards/margins": 0.44834479689598083, | |
| "rewards/rejected": -1.5939301252365112, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7326007326007326, | |
| "grad_norm": 20.184086200131315, | |
| "learning_rate": 4.0218893981385925e-08, | |
| "logits/chosen": -2.336240291595459, | |
| "logits/rejected": -2.5228190422058105, | |
| "logps/chosen": -490.032470703125, | |
| "logps/rejected": -514.3966064453125, | |
| "loss": 0.5772, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.1221544742584229, | |
| "rewards/margins": 0.41546517610549927, | |
| "rewards/rejected": -1.5376195907592773, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7535321821036107, | |
| "grad_norm": 14.840705395348046, | |
| "learning_rate": 3.45139266054715e-08, | |
| "logits/chosen": -2.3588707447052, | |
| "logits/rejected": -2.5286855697631836, | |
| "logps/chosen": -525.8394775390625, | |
| "logps/rejected": -543.2139892578125, | |
| "loss": 0.5961, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.9700316190719604, | |
| "rewards/margins": 0.42892080545425415, | |
| "rewards/rejected": -1.3989523649215698, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7744636316064888, | |
| "grad_norm": 12.56992511385935, | |
| "learning_rate": 2.9159985823062993e-08, | |
| "logits/chosen": -2.4362387657165527, | |
| "logits/rejected": -2.588212251663208, | |
| "logps/chosen": -469.63018798828125, | |
| "logps/rejected": -491.34185791015625, | |
| "loss": 0.5787, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.9046362638473511, | |
| "rewards/margins": 0.42833614349365234, | |
| "rewards/rejected": -1.332972526550293, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7953950811093669, | |
| "grad_norm": 14.216122099186137, | |
| "learning_rate": 2.4185770493280577e-08, | |
| "logits/chosen": -2.4785826206207275, | |
| "logits/rejected": -2.5475876331329346, | |
| "logps/chosen": -463.3335876464844, | |
| "logps/rejected": -562.8516235351562, | |
| "loss": 0.5816, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.0568846464157104, | |
| "rewards/margins": 0.6403349041938782, | |
| "rewards/rejected": -1.6972196102142334, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 17.166403382209694, | |
| "learning_rate": 1.9617944023656108e-08, | |
| "logits/chosen": -2.3412299156188965, | |
| "logits/rejected": -2.431159257888794, | |
| "logps/chosen": -569.6896362304688, | |
| "logps/rejected": -604.4752197265625, | |
| "loss": 0.5647, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.135259985923767, | |
| "rewards/margins": 0.5612015724182129, | |
| "rewards/rejected": -1.6964616775512695, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.837257980115123, | |
| "grad_norm": 25.5326876410102, | |
| "learning_rate": 1.5480991445620538e-08, | |
| "logits/chosen": -2.438910961151123, | |
| "logits/rejected": -2.621582269668579, | |
| "logps/chosen": -477.71551513671875, | |
| "logps/rejected": -516.8345336914062, | |
| "loss": 0.5808, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.0198707580566406, | |
| "rewards/margins": 0.485908567905426, | |
| "rewards/rejected": -1.5057791471481323, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.837257980115123, | |
| "eval_logits/chosen": -2.4454309940338135, | |
| "eval_logits/rejected": -2.60603404045105, | |
| "eval_logps/chosen": -509.0269470214844, | |
| "eval_logps/rejected": -557.9309692382812, | |
| "eval_loss": 0.5776250958442688, | |
| "eval_rewards/accuracies": 0.7063491940498352, | |
| "eval_rewards/chosen": -1.042648196220398, | |
| "eval_rewards/margins": 0.5769018530845642, | |
| "eval_rewards/rejected": -1.619550108909607, | |
| "eval_runtime": 88.8844, | |
| "eval_samples_per_second": 22.501, | |
| "eval_steps_per_second": 0.709, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.858189429618001, | |
| "grad_norm": 12.2363803809367, | |
| "learning_rate": 1.1797088166794e-08, | |
| "logits/chosen": -2.327822208404541, | |
| "logits/rejected": -2.539658308029175, | |
| "logps/chosen": -523.35693359375, | |
| "logps/rejected": -556.1873168945312, | |
| "loss": 0.5837, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.0230482816696167, | |
| "rewards/margins": 0.5963117480278015, | |
| "rewards/rejected": -1.6193599700927734, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8791208791208791, | |
| "grad_norm": 17.1630701293683, | |
| "learning_rate": 8.585981103608341e-09, | |
| "logits/chosen": -2.3502843379974365, | |
| "logits/rejected": -2.5074477195739746, | |
| "logps/chosen": -481.4237365722656, | |
| "logps/rejected": -559.5806884765625, | |
| "loss": 0.567, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.0329768657684326, | |
| "rewards/margins": 0.5681900978088379, | |
| "rewards/rejected": -1.6011669635772705, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9000523286237572, | |
| "grad_norm": 16.184790708379772, | |
| "learning_rate": 5.864882831430273e-09, | |
| "logits/chosen": -2.352280378341675, | |
| "logits/rejected": -2.436026096343994, | |
| "logps/chosen": -513.5238647460938, | |
| "logps/rejected": -551.8958129882812, | |
| "loss": 0.5755, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.0582252740859985, | |
| "rewards/margins": 0.5332168340682983, | |
| "rewards/rejected": -1.5914418697357178, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9209837781266352, | |
| "grad_norm": 17.526839475687186, | |
| "learning_rate": 3.6483793195745682e-09, | |
| "logits/chosen": -2.3311455249786377, | |
| "logits/rejected": -2.440988063812256, | |
| "logps/chosen": -482.4281311035156, | |
| "logps/rejected": -498.60345458984375, | |
| "loss": 0.5787, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -1.0323375463485718, | |
| "rewards/margins": 0.4054194390773773, | |
| "rewards/rejected": -1.4377570152282715, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9419152276295133, | |
| "grad_norm": 14.705602904039639, | |
| "learning_rate": 1.9483517457776433e-09, | |
| "logits/chosen": -2.2350025177001953, | |
| "logits/rejected": -2.3830924034118652, | |
| "logps/chosen": -490.513427734375, | |
| "logps/rejected": -551.2727661132812, | |
| "loss": 0.579, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.0369895696640015, | |
| "rewards/margins": 0.5606644153594971, | |
| "rewards/rejected": -1.597654104232788, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9628466771323915, | |
| "grad_norm": 15.228089724513376, | |
| "learning_rate": 7.739128092312918e-10, | |
| "logits/chosen": -2.281054973602295, | |
| "logits/rejected": -2.4768524169921875, | |
| "logps/chosen": -496.84814453125, | |
| "logps/rejected": -510.46258544921875, | |
| "loss": 0.579, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -1.0984748601913452, | |
| "rewards/margins": 0.47915878891944885, | |
| "rewards/rejected": -1.5776336193084717, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9837781266352695, | |
| "grad_norm": 17.607957497609636, | |
| "learning_rate": 1.313578835593465e-10, | |
| "logits/chosen": -2.3311634063720703, | |
| "logits/rejected": -2.4415996074676514, | |
| "logps/chosen": -519.3492431640625, | |
| "logps/rejected": -541.9041137695312, | |
| "loss": 0.5694, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.0364539623260498, | |
| "rewards/margins": 0.33034905791282654, | |
| "rewards/rejected": -1.3668031692504883, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9984301412872841, | |
| "step": 477, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6095632167232361, | |
| "train_runtime": 6900.3625, | |
| "train_samples_per_second": 8.86, | |
| "train_steps_per_second": 0.069 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 477, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |