| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9959925193694897, | |
| "eval_steps": 400, | |
| "global_step": 233, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "agreement_weights/mean": 0.9893633127212524, | |
| "agreement_weights/std": 0.0038108511362224817, | |
| "epoch": 0.004274646005877639, | |
| "eta/annotator_0": 0.9899773001670837, | |
| "grad_norm": 9.13903924052661, | |
| "learning_rate": 2.083333333333333e-08, | |
| "loss": 1.6004, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.68994140625, | |
| "rewards/margins": 0.0044460296630859375, | |
| "rewards/rejected": -0.69384765625, | |
| "step": 1 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9890749454498291, | |
| "agreement_weights/std": 0.00438307598233223, | |
| "epoch": 0.02137323002938819, | |
| "eta/annotator_0": 0.9897143840789795, | |
| "grad_norm": 7.102513518402141, | |
| "learning_rate": 1.0416666666666667e-07, | |
| "loss": 1.5937, | |
| "rewards/accuracies": 0.46484375, | |
| "rewards/chosen": -0.6795654296875, | |
| "rewards/margins": 0.01406717300415039, | |
| "rewards/rejected": -0.6934814453125, | |
| "step": 5 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.989261269569397, | |
| "agreement_weights/std": 0.004110876005142927, | |
| "epoch": 0.04274646005877638, | |
| "eta/annotator_0": 0.9895066022872925, | |
| "grad_norm": 15.885381584580474, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "loss": 1.5727, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.672436535358429, | |
| "rewards/margins": 0.042784880846738815, | |
| "rewards/rejected": -0.715039074420929, | |
| "step": 10 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9888086318969727, | |
| "agreement_weights/std": 0.004452961962670088, | |
| "epoch": 0.06411969008816458, | |
| "eta/annotator_0": 0.9891055822372437, | |
| "grad_norm": 12.714553836314233, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "loss": 1.5824, | |
| "rewards/accuracies": 0.508593738079071, | |
| "rewards/chosen": -0.6779540777206421, | |
| "rewards/margins": 0.028568649664521217, | |
| "rewards/rejected": -0.7066894769668579, | |
| "step": 15 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9885651469230652, | |
| "agreement_weights/std": 0.004792415536940098, | |
| "epoch": 0.08549292011755276, | |
| "eta/annotator_0": 0.9884439706802368, | |
| "grad_norm": 7.472966386913895, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "loss": 1.5768, | |
| "rewards/accuracies": 0.5257812738418579, | |
| "rewards/chosen": -0.667187511920929, | |
| "rewards/margins": 0.03665924072265625, | |
| "rewards/rejected": -0.7039550542831421, | |
| "step": 20 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9881379008293152, | |
| "agreement_weights/std": 0.004852100275456905, | |
| "epoch": 0.10686615014694095, | |
| "eta/annotator_0": 0.987867534160614, | |
| "grad_norm": 9.598415267317163, | |
| "learning_rate": 4.999717571181741e-07, | |
| "loss": 1.5823, | |
| "rewards/accuracies": 0.5140625238418579, | |
| "rewards/chosen": -0.681835949420929, | |
| "rewards/margins": 0.030136490240693092, | |
| "rewards/rejected": -0.711962878704071, | |
| "step": 25 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9881780743598938, | |
| "agreement_weights/std": 0.004625464789569378, | |
| "epoch": 0.12823938017632916, | |
| "eta/annotator_0": 0.9875534772872925, | |
| "grad_norm": 7.783063478858838, | |
| "learning_rate": 4.98983926127519e-07, | |
| "loss": 1.5677, | |
| "rewards/accuracies": 0.5257812738418579, | |
| "rewards/chosen": -0.6817871332168579, | |
| "rewards/margins": 0.051012419164180756, | |
| "rewards/rejected": -0.7325683832168579, | |
| "step": 30 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9877825975418091, | |
| "agreement_weights/std": 0.00548733863979578, | |
| "epoch": 0.14961261020571734, | |
| "eta/annotator_0": 0.9871212244033813, | |
| "grad_norm": 6.975340706147195, | |
| "learning_rate": 4.965903258506806e-07, | |
| "loss": 1.5649, | |
| "rewards/accuracies": 0.5132812261581421, | |
| "rewards/chosen": -0.7159668207168579, | |
| "rewards/margins": 0.058887481689453125, | |
| "rewards/rejected": -0.774951159954071, | |
| "step": 35 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9874189496040344, | |
| "agreement_weights/std": 0.006337934639304876, | |
| "epoch": 0.17098584023510552, | |
| "eta/annotator_0": 0.9867643117904663, | |
| "grad_norm": 9.52825785684312, | |
| "learning_rate": 4.928044706128802e-07, | |
| "loss": 1.5521, | |
| "rewards/accuracies": 0.5296875238418579, | |
| "rewards/chosen": -0.718823254108429, | |
| "rewards/margins": 0.08782501518726349, | |
| "rewards/rejected": -0.806445300579071, | |
| "step": 40 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9850943684577942, | |
| "agreement_weights/std": 0.011619331315159798, | |
| "epoch": 0.19235907026449373, | |
| "eta/annotator_0": 0.9836000204086304, | |
| "grad_norm": 8.176852848379246, | |
| "learning_rate": 4.876477354446189e-07, | |
| "loss": 1.5612, | |
| "rewards/accuracies": 0.5023437738418579, | |
| "rewards/chosen": -0.735595703125, | |
| "rewards/margins": 0.07326431572437286, | |
| "rewards/rejected": -0.80859375, | |
| "step": 45 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9840047955513, | |
| "agreement_weights/std": 0.010932808741927147, | |
| "epoch": 0.2137323002938819, | |
| "eta/annotator_0": 0.9824264645576477, | |
| "grad_norm": 12.12547075711657, | |
| "learning_rate": 4.811492353977365e-07, | |
| "loss": 1.5727, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.770312488079071, | |
| "rewards/margins": 0.052251435816287994, | |
| "rewards/rejected": -0.822558581829071, | |
| "step": 50 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9834893345832825, | |
| "agreement_weights/std": 0.010059957392513752, | |
| "epoch": 0.2351055303232701, | |
| "eta/annotator_0": 0.9817886352539062, | |
| "grad_norm": 8.645788658599258, | |
| "learning_rate": 4.7334566116112327e-07, | |
| "loss": 1.5544, | |
| "rewards/accuracies": 0.514843761920929, | |
| "rewards/chosen": -0.7423095703125, | |
| "rewards/margins": 0.09745025634765625, | |
| "rewards/rejected": -0.8397461175918579, | |
| "step": 55 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9825822710990906, | |
| "agreement_weights/std": 0.012372071854770184, | |
| "epoch": 0.2564787603526583, | |
| "eta/annotator_0": 0.9805394411087036, | |
| "grad_norm": 8.8650266511209, | |
| "learning_rate": 4.6428107190419983e-07, | |
| "loss": 1.5354, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.77099609375, | |
| "rewards/margins": 0.13408812880516052, | |
| "rewards/rejected": -0.905029296875, | |
| "step": 60 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9812002182006836, | |
| "agreement_weights/std": 0.014747394248843193, | |
| "epoch": 0.2778519903820465, | |
| "eta/annotator_0": 0.9782701730728149, | |
| "grad_norm": 9.151952067919474, | |
| "learning_rate": 4.540066465177783e-07, | |
| "loss": 1.5263, | |
| "rewards/accuracies": 0.535937488079071, | |
| "rewards/chosen": -0.785595715045929, | |
| "rewards/margins": 0.14189758896827698, | |
| "rewards/rejected": -0.927734375, | |
| "step": 65 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9765976071357727, | |
| "agreement_weights/std": 0.02648126147687435, | |
| "epoch": 0.2992252204114347, | |
| "eta/annotator_0": 0.9731000065803528, | |
| "grad_norm": 8.708292437307195, | |
| "learning_rate": 4.425803946568032e-07, | |
| "loss": 1.5369, | |
| "rewards/accuracies": 0.5570312738418579, | |
| "rewards/chosen": -0.8238281011581421, | |
| "rewards/margins": 0.10733337700366974, | |
| "rewards/rejected": -0.931347668170929, | |
| "step": 70 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9765526056289673, | |
| "agreement_weights/std": 0.020670022815465927, | |
| "epoch": 0.32059845044082286, | |
| "eta/annotator_0": 0.9643263816833496, | |
| "grad_norm": 9.587633854940714, | |
| "learning_rate": 4.300668292164329e-07, | |
| "loss": 1.5067, | |
| "rewards/accuracies": 0.559374988079071, | |
| "rewards/chosen": -0.80029296875, | |
| "rewards/margins": 0.2055046111345291, | |
| "rewards/rejected": -1.0055663585662842, | |
| "step": 75 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9720351099967957, | |
| "agreement_weights/std": 0.02838682010769844, | |
| "epoch": 0.34197168047021104, | |
| "eta/annotator_0": 0.958343505859375, | |
| "grad_norm": 9.873051661870614, | |
| "learning_rate": 4.165366020906683e-07, | |
| "loss": 1.5141, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.85693359375, | |
| "rewards/margins": 0.20672722160816193, | |
| "rewards/rejected": -1.063623070716858, | |
| "step": 80 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9665838479995728, | |
| "agreement_weights/std": 0.03906597942113876, | |
| "epoch": 0.36334491049959927, | |
| "eta/annotator_0": 0.9532757997512817, | |
| "grad_norm": 10.098422274405984, | |
| "learning_rate": 4.0206610527004607e-07, | |
| "loss": 1.4912, | |
| "rewards/accuracies": 0.5796874761581421, | |
| "rewards/chosen": -0.904833972454071, | |
| "rewards/margins": 0.2045547515153885, | |
| "rewards/rejected": -1.1090819835662842, | |
| "step": 85 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9634488224983215, | |
| "agreement_weights/std": 0.041894152760505676, | |
| "epoch": 0.38471814052898745, | |
| "eta/annotator_0": 0.9423317909240723, | |
| "grad_norm": 9.104208697747195, | |
| "learning_rate": 3.867370395306068e-07, | |
| "loss": 1.4817, | |
| "rewards/accuracies": 0.55859375, | |
| "rewards/chosen": -0.89501953125, | |
| "rewards/margins": 0.223399356007576, | |
| "rewards/rejected": -1.118554711341858, | |
| "step": 90 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9626390337944031, | |
| "agreement_weights/std": 0.03493572026491165, | |
| "epoch": 0.40609137055837563, | |
| "eta/annotator_0": 0.9396722912788391, | |
| "grad_norm": 7.126343696816854, | |
| "learning_rate": 3.7063595314933156e-07, | |
| "loss": 1.4836, | |
| "rewards/accuracies": 0.5843750238418579, | |
| "rewards/chosen": -0.925732433795929, | |
| "rewards/margins": 0.2864089906215668, | |
| "rewards/rejected": -1.2123534679412842, | |
| "step": 95 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9595847129821777, | |
| "agreement_weights/std": 0.04063795506954193, | |
| "epoch": 0.4274646005877638, | |
| "eta/annotator_0": 0.9406528472900391, | |
| "grad_norm": 10.548775383749875, | |
| "learning_rate": 3.5385375325047163e-07, | |
| "loss": 1.4949, | |
| "rewards/accuracies": 0.5804687738418579, | |
| "rewards/chosen": -0.927539050579071, | |
| "rewards/margins": 0.225819393992424, | |
| "rewards/rejected": -1.153662085533142, | |
| "step": 100 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9436739087104797, | |
| "agreement_weights/std": 0.06930957734584808, | |
| "epoch": 0.448837830617152, | |
| "eta/annotator_0": 0.929207444190979, | |
| "grad_norm": 9.347532066530137, | |
| "learning_rate": 3.36485192541719e-07, | |
| "loss": 1.4894, | |
| "rewards/accuracies": 0.561718761920929, | |
| "rewards/chosen": -1.030908226966858, | |
| "rewards/margins": 0.16647644340991974, | |
| "rewards/rejected": -1.1977050304412842, | |
| "step": 105 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9477313756942749, | |
| "agreement_weights/std": 0.049039699137210846, | |
| "epoch": 0.4702110606465402, | |
| "eta/annotator_0": 0.9266969561576843, | |
| "grad_norm": 9.688059674033342, | |
| "learning_rate": 3.186283343381213e-07, | |
| "loss": 1.453, | |
| "rewards/accuracies": 0.6109374761581421, | |
| "rewards/chosen": -1.014257788658142, | |
| "rewards/margins": 0.3441413938999176, | |
| "rewards/rejected": -1.3579590320587158, | |
| "step": 110 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9420230984687805, | |
| "agreement_weights/std": 0.05487104505300522, | |
| "epoch": 0.4915842906759284, | |
| "eta/annotator_0": 0.9168604612350464, | |
| "grad_norm": 9.736883695942245, | |
| "learning_rate": 3.003839988942255e-07, | |
| "loss": 1.4668, | |
| "rewards/accuracies": 0.59765625, | |
| "rewards/chosen": -1.000146508216858, | |
| "rewards/margins": 0.23876723647117615, | |
| "rewards/rejected": -1.238916039466858, | |
| "step": 115 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9381793737411499, | |
| "agreement_weights/std": 0.06204790621995926, | |
| "epoch": 0.5129575207053166, | |
| "eta/annotator_0": 0.9129531979560852, | |
| "grad_norm": 9.73732055546487, | |
| "learning_rate": 2.8185519417047623e-07, | |
| "loss": 1.4459, | |
| "rewards/accuracies": 0.628125011920929, | |
| "rewards/chosen": -1.0601074695587158, | |
| "rewards/margins": 0.3048393130302429, | |
| "rewards/rejected": -1.365087866783142, | |
| "step": 120 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9374347925186157, | |
| "agreement_weights/std": 0.06295043975114822, | |
| "epoch": 0.5343307507347048, | |
| "eta/annotator_0": 0.9159477353096008, | |
| "grad_norm": 8.827071222761129, | |
| "learning_rate": 2.631465342477719e-07, | |
| "loss": 1.425, | |
| "rewards/accuracies": 0.629687488079071, | |
| "rewards/chosen": -1.0723145008087158, | |
| "rewards/margins": 0.35406264662742615, | |
| "rewards/rejected": -1.4267089366912842, | |
| "step": 125 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9301049113273621, | |
| "agreement_weights/std": 0.07405496388673782, | |
| "epoch": 0.555703980764093, | |
| "eta/annotator_0": 0.9193568229675293, | |
| "grad_norm": 10.235426456133796, | |
| "learning_rate": 2.44363648673827e-07, | |
| "loss": 1.4406, | |
| "rewards/accuracies": 0.633593738079071, | |
| "rewards/chosen": -1.1417968273162842, | |
| "rewards/margins": 0.286337286233902, | |
| "rewards/rejected": -1.427587866783142, | |
| "step": 130 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9224993586540222, | |
| "agreement_weights/std": 0.08192013949155807, | |
| "epoch": 0.5770772107934812, | |
| "eta/annotator_0": 0.9185341000556946, | |
| "grad_norm": 13.055371078114435, | |
| "learning_rate": 2.2561258607618294e-07, | |
| "loss": 1.4315, | |
| "rewards/accuracies": 0.628125011920929, | |
| "rewards/chosen": -1.1554687023162842, | |
| "rewards/margins": 0.29844361543655396, | |
| "rewards/rejected": -1.4543945789337158, | |
| "step": 135 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9188439249992371, | |
| "agreement_weights/std": 0.08069366961717606, | |
| "epoch": 0.5984504408228694, | |
| "eta/annotator_0": 0.9095417857170105, | |
| "grad_norm": 13.367593154679895, | |
| "learning_rate": 2.069992154090854e-07, | |
| "loss": 1.4244, | |
| "rewards/accuracies": 0.621874988079071, | |
| "rewards/chosen": -1.1845214366912842, | |
| "rewards/margins": 0.3037376403808594, | |
| "rewards/rejected": -1.4882323741912842, | |
| "step": 140 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9211521148681641, | |
| "agreement_weights/std": 0.07038389146327972, | |
| "epoch": 0.6198236708522575, | |
| "eta/annotator_0": 0.9060202836990356, | |
| "grad_norm": 9.782018935027837, | |
| "learning_rate": 1.886286282148002e-07, | |
| "loss": 1.4255, | |
| "rewards/accuracies": 0.64453125, | |
| "rewards/chosen": -1.1845703125, | |
| "rewards/margins": 0.3395950198173523, | |
| "rewards/rejected": -1.5246093273162842, | |
| "step": 145 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9100608825683594, | |
| "agreement_weights/std": 0.0978657454252243, | |
| "epoch": 0.6411969008816457, | |
| "eta/annotator_0": 0.9040514826774597, | |
| "grad_norm": 8.821271102986696, | |
| "learning_rate": 1.7060454527421686e-07, | |
| "loss": 1.3959, | |
| "rewards/accuracies": 0.6421874761581421, | |
| "rewards/chosen": -1.296484351158142, | |
| "rewards/margins": 0.3692916929721832, | |
| "rewards/rejected": -1.665771484375, | |
| "step": 150 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9100178480148315, | |
| "agreement_weights/std": 0.09003014117479324, | |
| "epoch": 0.6625701309110339, | |
| "eta/annotator_0": 0.8944258689880371, | |
| "grad_norm": 12.000776056381852, | |
| "learning_rate": 1.5302873099680374e-07, | |
| "loss": 1.3975, | |
| "rewards/accuracies": 0.6390625238418579, | |
| "rewards/chosen": -1.254052758216858, | |
| "rewards/margins": 0.3678039610385895, | |
| "rewards/rejected": -1.6216309070587158, | |
| "step": 155 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9026163220405579, | |
| "agreement_weights/std": 0.10090925544500351, | |
| "epoch": 0.6839433609404221, | |
| "eta/annotator_0": 0.875512957572937, | |
| "grad_norm": 13.811663032880974, | |
| "learning_rate": 1.360004188562841e-07, | |
| "loss": 1.4053, | |
| "rewards/accuracies": 0.6429687738418579, | |
| "rewards/chosen": -1.293554663658142, | |
| "rewards/margins": 0.36749571561813354, | |
| "rewards/rejected": -1.6610839366912842, | |
| "step": 160 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9087193608283997, | |
| "agreement_weights/std": 0.08467105031013489, | |
| "epoch": 0.7053165909698104, | |
| "eta/annotator_0": 0.8710571527481079, | |
| "grad_norm": 8.565292902683819, | |
| "learning_rate": 1.1961575111603586e-07, | |
| "loss": 1.3804, | |
| "rewards/accuracies": 0.660937488079071, | |
| "rewards/chosen": -1.29052734375, | |
| "rewards/margins": 0.430908203125, | |
| "rewards/rejected": -1.7209961414337158, | |
| "step": 165 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9024698138237, | |
| "agreement_weights/std": 0.09167172759771347, | |
| "epoch": 0.7266898209991985, | |
| "eta/annotator_0": 0.8774527311325073, | |
| "grad_norm": 12.636141779987188, | |
| "learning_rate": 1.0396723600754143e-07, | |
| "loss": 1.4046, | |
| "rewards/accuracies": 0.657031238079071, | |
| "rewards/chosen": -1.3505370616912842, | |
| "rewards/margins": 0.3924667239189148, | |
| "rewards/rejected": -1.7434570789337158, | |
| "step": 170 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9095037579536438, | |
| "agreement_weights/std": 0.08033014833927155, | |
| "epoch": 0.7480630510285867, | |
| "eta/annotator_0": 0.883401095867157, | |
| "grad_norm": 16.247214462681676, | |
| "learning_rate": 8.914322542666822e-08, | |
| "loss": 1.3835, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -1.332275390625, | |
| "rewards/margins": 0.44039231538772583, | |
| "rewards/rejected": -1.7732422351837158, | |
| "step": 175 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9053813815116882, | |
| "agreement_weights/std": 0.08672865480184555, | |
| "epoch": 0.7694362810579749, | |
| "eta/annotator_0": 0.8866379857063293, | |
| "grad_norm": 10.38117956645124, | |
| "learning_rate": 7.522741609672193e-08, | |
| "loss": 1.3894, | |
| "rewards/accuracies": 0.6695312261581421, | |
| "rewards/chosen": -1.363916039466858, | |
| "rewards/margins": 0.42420655488967896, | |
| "rewards/rejected": -1.787695288658142, | |
| "step": 180 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9022833108901978, | |
| "agreement_weights/std": 0.0886184424161911, | |
| "epoch": 0.7908095110873631, | |
| "eta/annotator_0": 0.8881160020828247, | |
| "grad_norm": 11.639888229803354, | |
| "learning_rate": 6.229837701471644e-08, | |
| "loss": 1.3881, | |
| "rewards/accuracies": 0.632031261920929, | |
| "rewards/chosen": -1.4340331554412842, | |
| "rewards/margins": 0.4479431211948395, | |
| "rewards/rejected": -1.8821289539337158, | |
| "step": 185 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9009215235710144, | |
| "agreement_weights/std": 0.1035546064376831, | |
| "epoch": 0.8121827411167513, | |
| "eta/annotator_0": 0.8845187425613403, | |
| "grad_norm": 10.334279908094931, | |
| "learning_rate": 5.0429105848910996e-08, | |
| "loss": 1.3478, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -1.462890625, | |
| "rewards/margins": 0.4393371641635895, | |
| "rewards/rejected": -1.902441382408142, | |
| "step": 190 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9054125547409058, | |
| "agreement_weights/std": 0.09317369014024734, | |
| "epoch": 0.8335559711461394, | |
| "eta/annotator_0": 0.8970395922660828, | |
| "grad_norm": 13.529308896096568, | |
| "learning_rate": 3.968661679220467e-08, | |
| "loss": 1.3466, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.447851538658142, | |
| "rewards/margins": 0.5635604858398438, | |
| "rewards/rejected": -2.010498046875, | |
| "step": 195 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.909978985786438, | |
| "agreement_weights/std": 0.08680907636880875, | |
| "epoch": 0.8549292011755276, | |
| "eta/annotator_0": 0.9059289693832397, | |
| "grad_norm": 14.8750987464841, | |
| "learning_rate": 3.013156219837776e-08, | |
| "loss": 1.3394, | |
| "rewards/accuracies": 0.686718761920929, | |
| "rewards/chosen": -1.44580078125, | |
| "rewards/margins": 0.5717681646347046, | |
| "rewards/rejected": -2.017578125, | |
| "step": 200 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9009490013122559, | |
| "agreement_weights/std": 0.09950422495603561, | |
| "epoch": 0.8763024312049158, | |
| "eta/annotator_0": 0.9060670137405396, | |
| "grad_norm": 11.03690850563146, | |
| "learning_rate": 2.1817890137430932e-08, | |
| "loss": 1.373, | |
| "rewards/accuracies": 0.6585937738418579, | |
| "rewards/chosen": -1.499365210533142, | |
| "rewards/margins": 0.44548338651657104, | |
| "rewards/rejected": -1.9451172351837158, | |
| "step": 205 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.8993496894836426, | |
| "agreement_weights/std": 0.10450420528650284, | |
| "epoch": 0.897675661234304, | |
| "eta/annotator_0": 0.9054271578788757, | |
| "grad_norm": 10.926435820427173, | |
| "learning_rate": 1.479253980347392e-08, | |
| "loss": 1.3494, | |
| "rewards/accuracies": 0.6703125238418579, | |
| "rewards/chosen": -1.4269530773162842, | |
| "rewards/margins": 0.4932968020439148, | |
| "rewards/rejected": -1.920312523841858, | |
| "step": 210 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.8937209844589233, | |
| "agreement_weights/std": 0.10731947422027588, | |
| "epoch": 0.9190488912636923, | |
| "eta/annotator_0": 0.9135538339614868, | |
| "grad_norm": 13.805011448701395, | |
| "learning_rate": 9.095176494896661e-09, | |
| "loss": 1.3761, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": -1.497216820716858, | |
| "rewards/margins": 0.4012207090854645, | |
| "rewards/rejected": -1.899999976158142, | |
| "step": 215 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.898374080657959, | |
| "agreement_weights/std": 0.09499609470367432, | |
| "epoch": 0.9404221212930804, | |
| "eta/annotator_0": 0.9047737121582031, | |
| "grad_norm": 15.308557834289246, | |
| "learning_rate": 4.757967663132689e-09, | |
| "loss": 1.3681, | |
| "rewards/accuracies": 0.6656249761581421, | |
| "rewards/chosen": -1.476318359375, | |
| "rewards/margins": 0.48259276151657104, | |
| "rewards/rejected": -1.959130883216858, | |
| "step": 220 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.8990669250488281, | |
| "agreement_weights/std": 0.09686337411403656, | |
| "epoch": 0.9617953513224686, | |
| "eta/annotator_0": 0.8999508619308472, | |
| "grad_norm": 10.411971079104832, | |
| "learning_rate": 1.8054012944479224e-09, | |
| "loss": 1.3562, | |
| "rewards/accuracies": 0.6859375238418579, | |
| "rewards/chosen": -1.455712914466858, | |
| "rewards/margins": 0.5232818722724915, | |
| "rewards/rejected": -1.979394555091858, | |
| "step": 225 | |
| }, | |
| { | |
| "agreement_weights/mean": 0.9076964259147644, | |
| "agreement_weights/std": 0.08395025134086609, | |
| "epoch": 0.9831685813518568, | |
| "eta/annotator_0": 0.9021452069282532, | |
| "grad_norm": 9.775521792655194, | |
| "learning_rate": 2.541476501764228e-10, | |
| "loss": 1.3527, | |
| "rewards/accuracies": 0.69140625, | |
| "rewards/chosen": -1.46875, | |
| "rewards/margins": 0.5552108883857727, | |
| "rewards/rejected": -2.024218797683716, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9959925193694897, | |
| "step": 233, | |
| "total_flos": 0.0, | |
| "train_loss": 1.4599583829421343, | |
| "train_runtime": 7117.4583, | |
| "train_samples_per_second": 8.413, | |
| "train_steps_per_second": 0.033 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 233, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |