Instructions to use rovdetection/code-1b-aligned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use rovdetection/code-1b-aligned with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("rovdetection/code-1b-aligned", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Invalid JSON:Unexpected token 'N', ..."_chosen": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.3616146092302129, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007232292184604258, | |
| "grad_norm": 0.24258430302143097, | |
| "learning_rate": 4.977500000000001e-06, | |
| "log_odds_chosen": 0.024490734562277794, | |
| "log_odds_ratio": -0.8170725703239441, | |
| "logits/chosen": -1.157371997833252, | |
| "logits/rejected": -1.3074114322662354, | |
| "logps/chosen": -5.802087306976318, | |
| "logps/rejected": -5.824903964996338, | |
| "loss": 5.930429458618164, | |
| "nll_loss": 5.848721981048584, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.5802086591720581, | |
| "rewards/margins": 0.002281700726598501, | |
| "rewards/rejected": -0.5824903845787048, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.014464584369208515, | |
| "grad_norm": 0.2757987976074219, | |
| "learning_rate": 4.9525000000000004e-06, | |
| "log_odds_chosen": 0.07715226709842682, | |
| "log_odds_ratio": -0.824821949005127, | |
| "logits/chosen": -1.2339580059051514, | |
| "logits/rejected": -1.3473726511001587, | |
| "logps/chosen": -5.762179851531982, | |
| "logps/rejected": -5.835549354553223, | |
| "loss": 5.804796600341797, | |
| "nll_loss": 5.722315311431885, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -0.5762180089950562, | |
| "rewards/margins": 0.007336919195950031, | |
| "rewards/rejected": -0.5835549235343933, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.021696876553812774, | |
| "grad_norm": 0.335886150598526, | |
| "learning_rate": 4.927500000000001e-06, | |
| "log_odds_chosen": 0.1334013044834137, | |
| "log_odds_ratio": -0.7585476636886597, | |
| "logits/chosen": -1.2495059967041016, | |
| "logits/rejected": -1.377275824546814, | |
| "logps/chosen": -5.738375663757324, | |
| "logps/rejected": -5.869871616363525, | |
| "loss": 5.848030853271484, | |
| "nll_loss": 5.772176265716553, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.5738375782966614, | |
| "rewards/margins": 0.013149544596672058, | |
| "rewards/rejected": -0.5869871377944946, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02892916873841703, | |
| "grad_norm": 0.3094758987426758, | |
| "learning_rate": 4.902500000000001e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.2641137838363647, | |
| "logits/rejected": -1.3740476369857788, | |
| "logps/chosen": -5.545978546142578, | |
| "logps/rejected": NaN, | |
| "loss": 6.1603240966796875, | |
| "nll_loss": 5.585268974304199, | |
| "rewards/accuracies": 0.5218750238418579, | |
| "rewards/chosen": -0.5545979738235474, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": NaN, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03616146092302129, | |
| "grad_norm": 0.32100710272789, | |
| "learning_rate": 4.8775e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.30973219871521, | |
| "logits/rejected": -1.4038114547729492, | |
| "logps/chosen": NaN, | |
| "logps/rejected": -5.679649829864502, | |
| "loss": 5.871247863769531, | |
| "nll_loss": 5.540013790130615, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": NaN, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": -0.567965030670166, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04339375310762555, | |
| "grad_norm": 0.37317097187042236, | |
| "learning_rate": 4.8525000000000006e-06, | |
| "log_odds_chosen": 0.08813583850860596, | |
| "log_odds_ratio": -0.7889271974563599, | |
| "logits/chosen": -1.2617380619049072, | |
| "logits/rejected": -1.3692435026168823, | |
| "logps/chosen": -5.557856559753418, | |
| "logps/rejected": -5.643843173980713, | |
| "loss": 5.632916259765625, | |
| "nll_loss": 5.554023742675781, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.5557857155799866, | |
| "rewards/margins": 0.008598615415394306, | |
| "rewards/rejected": -0.5643843412399292, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.050626045292229804, | |
| "grad_norm": 0.2950762212276459, | |
| "learning_rate": 4.827500000000001e-06, | |
| "log_odds_chosen": 0.12244565784931183, | |
| "log_odds_ratio": -0.7549425959587097, | |
| "logits/chosen": -1.312510371208191, | |
| "logits/rejected": -1.41781485080719, | |
| "logps/chosen": -5.558300018310547, | |
| "logps/rejected": -5.677781581878662, | |
| "loss": 5.611359786987305, | |
| "nll_loss": 5.535863876342773, | |
| "rewards/accuracies": 0.5093749761581421, | |
| "rewards/chosen": -0.5558300018310547, | |
| "rewards/margins": 0.01194816268980503, | |
| "rewards/rejected": -0.5677782297134399, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05785833747683406, | |
| "grad_norm": 0.3010028600692749, | |
| "learning_rate": 4.8025e-06, | |
| "log_odds_chosen": 0.17995290458202362, | |
| "log_odds_ratio": -0.7352453470230103, | |
| "logits/chosen": -1.3281229734420776, | |
| "logits/rejected": -1.4346697330474854, | |
| "logps/chosen": -5.496776580810547, | |
| "logps/rejected": -5.675050735473633, | |
| "loss": 5.57300033569336, | |
| "nll_loss": 5.499476432800293, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.5496777296066284, | |
| "rewards/margins": 0.017827384173870087, | |
| "rewards/rejected": -0.5675050616264343, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.06509062966143832, | |
| "grad_norm": 0.2564737796783447, | |
| "learning_rate": 4.7775e-06, | |
| "log_odds_chosen": 0.08067800104618073, | |
| "log_odds_ratio": -0.757804274559021, | |
| "logits/chosen": -1.2539831399917603, | |
| "logits/rejected": -1.3801463842391968, | |
| "logps/chosen": -5.533335208892822, | |
| "logps/rejected": -5.611950874328613, | |
| "loss": 5.607465744018555, | |
| "nll_loss": 5.531683921813965, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.5533335208892822, | |
| "rewards/margins": 0.007861590944230556, | |
| "rewards/rejected": -0.5611951351165771, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.07232292184604258, | |
| "grad_norm": 0.2745928168296814, | |
| "learning_rate": 4.752500000000001e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.1503195762634277, | |
| "logits/rejected": -1.3092117309570312, | |
| "logps/chosen": -5.674561500549316, | |
| "logps/rejected": NaN, | |
| "loss": 5.882052612304688, | |
| "nll_loss": 5.633638381958008, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": -0.5674561262130737, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": NaN, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07955521403064683, | |
| "grad_norm": 0.22206123173236847, | |
| "learning_rate": 4.7275e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.3089696168899536, | |
| "logits/rejected": -1.4873392581939697, | |
| "logps/chosen": -5.392054557800293, | |
| "logps/rejected": NaN, | |
| "loss": 5.727831649780273, | |
| "nll_loss": 5.414012908935547, | |
| "rewards/accuracies": 0.515625, | |
| "rewards/chosen": -0.5392054319381714, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": NaN, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0867875062152511, | |
| "grad_norm": 0.19849246740341187, | |
| "learning_rate": 4.7025e-06, | |
| "log_odds_chosen": 0.09243413805961609, | |
| "log_odds_ratio": -0.7682880759239197, | |
| "logits/chosen": -1.3715227842330933, | |
| "logits/rejected": -1.4874814748764038, | |
| "logps/chosen": -5.332463264465332, | |
| "logps/rejected": -5.421862602233887, | |
| "loss": 5.385763931274414, | |
| "nll_loss": 5.308935642242432, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.5332463383674622, | |
| "rewards/margins": 0.00893993116915226, | |
| "rewards/rejected": -0.5421862602233887, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.09401979839985536, | |
| "grad_norm": 0.20989899337291718, | |
| "learning_rate": 4.6775000000000005e-06, | |
| "log_odds_chosen": 0.07261505722999573, | |
| "log_odds_ratio": -0.7669461965560913, | |
| "logits/chosen": -1.2831826210021973, | |
| "logits/rejected": -1.3855401277542114, | |
| "logps/chosen": -5.570550441741943, | |
| "logps/rejected": -5.639805793762207, | |
| "loss": 5.608541107177734, | |
| "nll_loss": 5.531846046447754, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": -0.5570551156997681, | |
| "rewards/margins": 0.006925526075065136, | |
| "rewards/rejected": -0.5639805793762207, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.10125209058445961, | |
| "grad_norm": 0.24246586859226227, | |
| "learning_rate": 4.652500000000001e-06, | |
| "log_odds_chosen": 0.13198330998420715, | |
| "log_odds_ratio": -0.7554014921188354, | |
| "logits/chosen": -1.3736729621887207, | |
| "logits/rejected": -1.4753162860870361, | |
| "logps/chosen": -5.387119770050049, | |
| "logps/rejected": -5.51568078994751, | |
| "loss": 5.4450115203857425, | |
| "nll_loss": 5.369471549987793, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.5387119650840759, | |
| "rewards/margins": 0.01285608857870102, | |
| "rewards/rejected": -0.5515680909156799, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.10848438276906387, | |
| "grad_norm": 0.22028212249279022, | |
| "learning_rate": 4.6275e-06, | |
| "log_odds_chosen": 0.1945369690656662, | |
| "log_odds_ratio": -0.7150241732597351, | |
| "logits/chosen": -1.222019076347351, | |
| "logits/rejected": -1.3116670846939087, | |
| "logps/chosen": -5.559757232666016, | |
| "logps/rejected": -5.74977970123291, | |
| "loss": 5.611997985839844, | |
| "nll_loss": 5.540493965148926, | |
| "rewards/accuracies": 0.559374988079071, | |
| "rewards/chosen": -0.5559757351875305, | |
| "rewards/margins": 0.01900230534374714, | |
| "rewards/rejected": -0.5749779939651489, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.11571667495366812, | |
| "grad_norm": 0.24762941896915436, | |
| "learning_rate": 4.6025e-06, | |
| "log_odds_chosen": 0.12068144977092743, | |
| "log_odds_ratio": -0.7480632066726685, | |
| "logits/chosen": -1.2661784887313843, | |
| "logits/rejected": -1.3999755382537842, | |
| "logps/chosen": -5.444934844970703, | |
| "logps/rejected": -5.563178062438965, | |
| "loss": 5.528120040893555, | |
| "nll_loss": 5.453313827514648, | |
| "rewards/accuracies": 0.5531250238418579, | |
| "rewards/chosen": -0.544493556022644, | |
| "rewards/margins": 0.011824256740510464, | |
| "rewards/rejected": -0.5563178062438965, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.12294896713827239, | |
| "grad_norm": 0.24410110712051392, | |
| "learning_rate": 4.577500000000001e-06, | |
| "log_odds_chosen": 0.07302852720022202, | |
| "log_odds_ratio": -0.7880190014839172, | |
| "logits/chosen": -1.2591315507888794, | |
| "logits/rejected": -1.379748821258545, | |
| "logps/chosen": -5.474093437194824, | |
| "logps/rejected": -5.544507026672363, | |
| "loss": 5.533624267578125, | |
| "nll_loss": 5.454824447631836, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.5474093556404114, | |
| "rewards/margins": 0.00704141054302454, | |
| "rewards/rejected": -0.5544507503509521, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.13018125932287664, | |
| "grad_norm": 0.238921120762825, | |
| "learning_rate": 4.5525e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.2266502380371094, | |
| "logits/rejected": -1.3530925512313843, | |
| "logps/chosen": -5.5178141593933105, | |
| "logps/rejected": NaN, | |
| "loss": 5.79667854309082, | |
| "nll_loss": 5.490872859954834, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": -0.551781415939331, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": NaN, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.1374135515074809, | |
| "grad_norm": 0.24503076076507568, | |
| "learning_rate": 4.5275e-06, | |
| "log_odds_chosen": 0.03844783455133438, | |
| "log_odds_ratio": -0.7719030976295471, | |
| "logits/chosen": -1.2923024892807007, | |
| "logits/rejected": -1.4297457933425903, | |
| "logps/chosen": -5.426344871520996, | |
| "logps/rejected": -5.463438987731934, | |
| "loss": 5.509374618530273, | |
| "nll_loss": 5.432183265686035, | |
| "rewards/accuracies": 0.4468750059604645, | |
| "rewards/chosen": -0.5426343679428101, | |
| "rewards/margins": 0.003709450364112854, | |
| "rewards/rejected": -0.5463439226150513, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.14464584369208516, | |
| "grad_norm": 0.2474403828382492, | |
| "learning_rate": 4.5025000000000005e-06, | |
| "log_odds_chosen": 0.15352819859981537, | |
| "log_odds_ratio": -0.7516843676567078, | |
| "logits/chosen": -1.238797664642334, | |
| "logits/rejected": -1.3516855239868164, | |
| "logps/chosen": -5.482898235321045, | |
| "logps/rejected": -5.631514549255371, | |
| "loss": 5.482235336303711, | |
| "nll_loss": 5.407067775726318, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": -0.5482897758483887, | |
| "rewards/margins": 0.01486161071807146, | |
| "rewards/rejected": -0.563151478767395, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1518781358766894, | |
| "grad_norm": 0.24647395312786102, | |
| "learning_rate": 4.4775e-06, | |
| "log_odds_chosen": 0.06791242212057114, | |
| "log_odds_ratio": -0.7611157894134521, | |
| "logits/chosen": -1.2040866613388062, | |
| "logits/rejected": -1.325791835784912, | |
| "logps/chosen": -5.527557373046875, | |
| "logps/rejected": -5.593737602233887, | |
| "loss": 5.5501853942871096, | |
| "nll_loss": 5.4740729331970215, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.5527557134628296, | |
| "rewards/margins": 0.00661806296557188, | |
| "rewards/rejected": -0.5593737959861755, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.15911042806129366, | |
| "grad_norm": 0.3191209137439728, | |
| "learning_rate": 4.4525e-06, | |
| "log_odds_chosen": 0.11287051439285278, | |
| "log_odds_ratio": -0.744554877281189, | |
| "logits/chosen": -1.2233508825302124, | |
| "logits/rejected": -1.334123134613037, | |
| "logps/chosen": -5.541935920715332, | |
| "logps/rejected": -5.6515703201293945, | |
| "loss": 5.557551574707031, | |
| "nll_loss": 5.483095169067383, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.5541935563087463, | |
| "rewards/margins": 0.010963483713567257, | |
| "rewards/rejected": -0.5651570558547974, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.16634272024589794, | |
| "grad_norm": 0.3177518844604492, | |
| "learning_rate": 4.4275e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.2862989902496338, | |
| "logits/rejected": -1.396356225013733, | |
| "logps/chosen": -5.400467872619629, | |
| "logps/rejected": NaN, | |
| "loss": 5.670994567871094, | |
| "nll_loss": 5.330209255218506, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": -0.5400468111038208, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": NaN, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.1735750124305022, | |
| "grad_norm": 0.2698371410369873, | |
| "learning_rate": 4.4025e-06, | |
| "log_odds_chosen": 0.10866693407297134, | |
| "log_odds_ratio": -0.7732769250869751, | |
| "logits/chosen": -1.2277987003326416, | |
| "logits/rejected": -1.3508949279785156, | |
| "logps/chosen": -5.505708694458008, | |
| "logps/rejected": -5.6127214431762695, | |
| "loss": 5.515193176269531, | |
| "nll_loss": 5.437865257263184, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": -0.5505709052085876, | |
| "rewards/margins": 0.010701271705329418, | |
| "rewards/rejected": -0.5612722039222717, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.18080730461510644, | |
| "grad_norm": 0.2867446839809418, | |
| "learning_rate": 4.3775e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.2118570804595947, | |
| "logits/rejected": -1.3736122846603394, | |
| "logps/chosen": NaN, | |
| "logps/rejected": -5.480542182922363, | |
| "loss": 5.575833892822265, | |
| "nll_loss": 5.324864387512207, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": NaN, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": -0.5480541586875916, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.18803959679971072, | |
| "grad_norm": 0.31495073437690735, | |
| "learning_rate": 4.3525e-06, | |
| "log_odds_chosen": 0.008091190829873085, | |
| "log_odds_ratio": -0.8035072088241577, | |
| "logits/chosen": -1.214800477027893, | |
| "logits/rejected": -1.3568775653839111, | |
| "logps/chosen": -5.552915096282959, | |
| "logps/rejected": -5.560267925262451, | |
| "loss": 5.492018127441407, | |
| "nll_loss": 5.411666393280029, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.5552915334701538, | |
| "rewards/margins": 0.00073523836908862, | |
| "rewards/rejected": -0.5560267567634583, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.19527188898431497, | |
| "grad_norm": 0.3158721327781677, | |
| "learning_rate": 4.3275000000000005e-06, | |
| "log_odds_chosen": 0.12867891788482666, | |
| "log_odds_ratio": -0.7282706499099731, | |
| "logits/chosen": -1.1992931365966797, | |
| "logits/rejected": -1.3094508647918701, | |
| "logps/chosen": -5.397171497344971, | |
| "logps/rejected": -5.522560119628906, | |
| "loss": 5.501744079589844, | |
| "nll_loss": 5.42891788482666, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.5397171378135681, | |
| "rewards/margins": 0.012538868002593517, | |
| "rewards/rejected": -0.5522559881210327, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.20250418116891922, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.302500000000001e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.1986209154129028, | |
| "logits/rejected": -1.325928807258606, | |
| "logps/chosen": -5.435647964477539, | |
| "logps/rejected": NaN, | |
| "loss": 5.748141479492188, | |
| "nll_loss": 5.416440010070801, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.5435648560523987, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": NaN, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2097364733535235, | |
| "grad_norm": 0.3696766197681427, | |
| "learning_rate": 4.2775e-06, | |
| "log_odds_chosen": -0.056973300874233246, | |
| "log_odds_ratio": -0.8312661051750183, | |
| "logits/chosen": -1.1497722864151, | |
| "logits/rejected": -1.2519116401672363, | |
| "logps/chosen": -5.443070888519287, | |
| "logps/rejected": -5.3841233253479, | |
| "loss": 5.432368469238281, | |
| "nll_loss": 5.349241733551025, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.5443071126937866, | |
| "rewards/margins": -0.005894799251109362, | |
| "rewards/rejected": -0.5384122729301453, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.21696876553812774, | |
| "grad_norm": 0.4062056839466095, | |
| "learning_rate": 4.2525e-06, | |
| "log_odds_chosen": 0.06658594310283661, | |
| "log_odds_ratio": -0.7710601687431335, | |
| "logits/chosen": -1.152179479598999, | |
| "logits/rejected": -1.2460296154022217, | |
| "logps/chosen": -5.450706958770752, | |
| "logps/rejected": -5.514741897583008, | |
| "loss": 5.461360168457031, | |
| "nll_loss": 5.384252548217773, | |
| "rewards/accuracies": 0.484375, | |
| "rewards/chosen": -0.5450707077980042, | |
| "rewards/margins": 0.006403499282896519, | |
| "rewards/rejected": -0.5514742136001587, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.224201057722732, | |
| "grad_norm": 0.35252928733825684, | |
| "learning_rate": 4.227500000000001e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.11289381980896, | |
| "logits/rejected": -1.1899579763412476, | |
| "logps/chosen": NaN, | |
| "logps/rejected": -5.594590187072754, | |
| "loss": 5.755791473388672, | |
| "nll_loss": 5.4381585121154785, | |
| "rewards/accuracies": 0.503125011920929, | |
| "rewards/chosen": NaN, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": -0.5594589710235596, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.23143334990733624, | |
| "grad_norm": 0.32109105587005615, | |
| "learning_rate": 4.202500000000001e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.1014893054962158, | |
| "logits/rejected": -1.2668919563293457, | |
| "logps/chosen": NaN, | |
| "logps/rejected": -5.533962249755859, | |
| "loss": 5.670769500732422, | |
| "nll_loss": 5.3555803298950195, | |
| "rewards/accuracies": 0.5531250238418579, | |
| "rewards/chosen": NaN, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": -0.5533961653709412, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.23866564209194052, | |
| "grad_norm": 0.3987061083316803, | |
| "learning_rate": 4.1775e-06, | |
| "log_odds_chosen": 0.09470056742429733, | |
| "log_odds_ratio": -0.7665299773216248, | |
| "logits/chosen": -1.0869171619415283, | |
| "logits/rejected": -1.1906062364578247, | |
| "logps/chosen": -5.4382758140563965, | |
| "logps/rejected": -5.529966831207275, | |
| "loss": 5.484853363037109, | |
| "nll_loss": 5.408199787139893, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.5438276529312134, | |
| "rewards/margins": 0.009169066324830055, | |
| "rewards/rejected": -0.5529965758323669, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.24589793427654477, | |
| "grad_norm": 0.3762986958026886, | |
| "learning_rate": 4.1525000000000005e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.0889309644699097, | |
| "logits/rejected": -1.210901141166687, | |
| "logps/chosen": NaN, | |
| "logps/rejected": -5.512875556945801, | |
| "loss": 5.707857513427735, | |
| "nll_loss": 5.374106407165527, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": NaN, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": -0.5512875318527222, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.253130226461149, | |
| "grad_norm": 0.3539881706237793, | |
| "learning_rate": 4.127500000000001e-06, | |
| "log_odds_chosen": 0.05990752577781677, | |
| "log_odds_ratio": -0.7520009279251099, | |
| "logits/chosen": -1.1000460386276245, | |
| "logits/rejected": -1.1998263597488403, | |
| "logps/chosen": -5.449780464172363, | |
| "logps/rejected": -5.5063323974609375, | |
| "loss": 5.4952552795410154, | |
| "nll_loss": 5.420053958892822, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.5449780225753784, | |
| "rewards/margins": 0.005655230488628149, | |
| "rewards/rejected": -0.5506333112716675, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.26036251864575327, | |
| "grad_norm": 0.4491257071495056, | |
| "learning_rate": 4.1025e-06, | |
| "log_odds_chosen": 0.0024302334059029818, | |
| "log_odds_ratio": -0.803369402885437, | |
| "logits/chosen": -1.1746547222137451, | |
| "logits/rejected": -1.2685177326202393, | |
| "logps/chosen": -5.358044624328613, | |
| "logps/rejected": -5.358481407165527, | |
| "loss": 5.37115707397461, | |
| "nll_loss": 5.290821075439453, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -0.5358044505119324, | |
| "rewards/margins": 4.368703957879916e-05, | |
| "rewards/rejected": -0.5358482003211975, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2675948108303575, | |
| "grad_norm": 0.4225010275840759, | |
| "learning_rate": 4.0775e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.1588351726531982, | |
| "logits/rejected": -1.2908666133880615, | |
| "logps/chosen": -5.33625602722168, | |
| "logps/rejected": NaN, | |
| "loss": 5.6588897705078125, | |
| "nll_loss": 5.325121879577637, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": -0.533625602722168, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": NaN, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2748271030149618, | |
| "grad_norm": 0.39807629585266113, | |
| "learning_rate": 4.052500000000001e-06, | |
| "log_odds_chosen": 0.10123734176158905, | |
| "log_odds_ratio": -0.7437968850135803, | |
| "logits/chosen": -1.1793785095214844, | |
| "logits/rejected": -1.3256503343582153, | |
| "logps/chosen": -5.213901996612549, | |
| "logps/rejected": -5.312169551849365, | |
| "loss": 5.233790588378906, | |
| "nll_loss": 5.159411430358887, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.5213901996612549, | |
| "rewards/margins": 0.009826736524701118, | |
| "rewards/rejected": -0.5312169790267944, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2820593951995661, | |
| "grad_norm": 0.4834080934524536, | |
| "learning_rate": 4.0275e-06, | |
| "log_odds_chosen": 0.07742507755756378, | |
| "log_odds_ratio": -0.7695199847221375, | |
| "logits/chosen": -1.0857242345809937, | |
| "logits/rejected": -1.2077770233154297, | |
| "logps/chosen": -5.406026363372803, | |
| "logps/rejected": -5.479708671569824, | |
| "loss": 5.404788970947266, | |
| "nll_loss": 5.3278374671936035, | |
| "rewards/accuracies": 0.5218750238418579, | |
| "rewards/chosen": -0.5406026840209961, | |
| "rewards/margins": 0.007368179503828287, | |
| "rewards/rejected": -0.5479708313941956, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2892916873841703, | |
| "grad_norm": 0.4307977259159088, | |
| "learning_rate": 4.0025e-06, | |
| "log_odds_chosen": 0.13691337406635284, | |
| "log_odds_ratio": -0.7147995233535767, | |
| "logits/chosen": -1.1390448808670044, | |
| "logits/rejected": -1.2907403707504272, | |
| "logps/chosen": -5.265523433685303, | |
| "logps/rejected": -5.39796257019043, | |
| "loss": 5.341230010986328, | |
| "nll_loss": 5.269749641418457, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.5265523791313171, | |
| "rewards/margins": 0.013243894092738628, | |
| "rewards/rejected": -0.5397962331771851, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2965239795687746, | |
| "grad_norm": 0.40860849618911743, | |
| "learning_rate": 3.9775000000000005e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.1594798564910889, | |
| "logits/rejected": -1.288938283920288, | |
| "logps/chosen": -5.337071895599365, | |
| "logps/rejected": NaN, | |
| "loss": 5.633375549316407, | |
| "nll_loss": 5.293572425842285, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.5337072014808655, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": NaN, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3037562717533788, | |
| "grad_norm": 0.5024765729904175, | |
| "learning_rate": 3.9525e-06, | |
| "log_odds_chosen": 0.14180947840213776, | |
| "log_odds_ratio": -0.716079831123352, | |
| "logits/chosen": -1.05372154712677, | |
| "logits/rejected": -1.1723723411560059, | |
| "logps/chosen": -5.377732753753662, | |
| "logps/rejected": -5.514806747436523, | |
| "loss": 5.390876770019531, | |
| "nll_loss": 5.319269180297852, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": -0.5377733707427979, | |
| "rewards/margins": 0.013707393780350685, | |
| "rewards/rejected": -0.5514807105064392, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.3109885639379831, | |
| "grad_norm": 0.43874257802963257, | |
| "learning_rate": 3.9275e-06, | |
| "log_odds_chosen": 0.041213370859622955, | |
| "log_odds_ratio": -0.7626296281814575, | |
| "logits/chosen": -1.0877110958099365, | |
| "logits/rejected": -1.2165416479110718, | |
| "logps/chosen": -5.347620010375977, | |
| "logps/rejected": -5.385829925537109, | |
| "loss": 5.355945587158203, | |
| "nll_loss": 5.279682636260986, | |
| "rewards/accuracies": 0.49687498807907104, | |
| "rewards/chosen": -0.5347620248794556, | |
| "rewards/margins": 0.0038209576159715652, | |
| "rewards/rejected": -0.538582980632782, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.3182208561225873, | |
| "grad_norm": 0.4173244535923004, | |
| "learning_rate": 3.9025e-06, | |
| "log_odds_chosen": 0.11863790452480316, | |
| "log_odds_ratio": -0.7459925413131714, | |
| "logits/chosen": -1.1519792079925537, | |
| "logits/rejected": -1.3020139932632446, | |
| "logps/chosen": -5.2671380043029785, | |
| "logps/rejected": -5.38327693939209, | |
| "loss": 5.298666381835938, | |
| "nll_loss": 5.224067211151123, | |
| "rewards/accuracies": 0.578125, | |
| "rewards/chosen": -0.5267137885093689, | |
| "rewards/margins": 0.011613896116614342, | |
| "rewards/rejected": -0.538327693939209, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.32545314830719163, | |
| "grad_norm": 0.4220745861530304, | |
| "learning_rate": 3.8775000000000006e-06, | |
| "log_odds_chosen": NaN, | |
| "log_odds_ratio": NaN, | |
| "logits/chosen": -1.0332152843475342, | |
| "logits/rejected": -1.1924374103546143, | |
| "logps/chosen": NaN, | |
| "logps/rejected": -5.533570766448975, | |
| "loss": 5.6558387756347654, | |
| "nll_loss": 5.354229927062988, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": NaN, | |
| "rewards/margins": NaN, | |
| "rewards/rejected": -0.5533571243286133, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3326854404917959, | |
| "grad_norm": 0.4184921085834503, | |
| "learning_rate": 3.8525e-06, | |
| "log_odds_chosen": 0.0849083662033081, | |
| "log_odds_ratio": -0.7574716210365295, | |
| "logits/chosen": -1.0905861854553223, | |
| "logits/rejected": -1.18590247631073, | |
| "logps/chosen": -5.311153888702393, | |
| "logps/rejected": -5.390518665313721, | |
| "loss": 5.285702133178711, | |
| "nll_loss": 5.209954261779785, | |
| "rewards/accuracies": 0.5218750238418579, | |
| "rewards/chosen": -0.5311154127120972, | |
| "rewards/margins": 0.007936512120068073, | |
| "rewards/rejected": -0.53905189037323, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.33991773267640013, | |
| "grad_norm": 0.4340634047985077, | |
| "learning_rate": 3.8275e-06, | |
| "log_odds_chosen": 0.12307295948266983, | |
| "log_odds_ratio": -0.7435885667800903, | |
| "logits/chosen": -1.1616504192352295, | |
| "logits/rejected": -1.275611162185669, | |
| "logps/chosen": -5.210690498352051, | |
| "logps/rejected": -5.329777717590332, | |
| "loss": 5.172726058959961, | |
| "nll_loss": 5.098366737365723, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.5210691094398499, | |
| "rewards/margins": 0.011908676475286484, | |
| "rewards/rejected": -0.5329777598381042, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3471500248610044, | |
| "grad_norm": 0.5348623394966125, | |
| "learning_rate": 3.8025e-06, | |
| "log_odds_chosen": 0.10718987882137299, | |
| "log_odds_ratio": -0.7553779482841492, | |
| "logits/chosen": -1.069937825202942, | |
| "logits/rejected": -1.2259876728057861, | |
| "logps/chosen": -5.3289594650268555, | |
| "logps/rejected": -5.430902004241943, | |
| "loss": 5.337881469726563, | |
| "nll_loss": 5.262343406677246, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.5328959822654724, | |
| "rewards/margins": 0.010194242931902409, | |
| "rewards/rejected": -0.5430902242660522, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.35438231704560863, | |
| "grad_norm": 0.5375458002090454, | |
| "learning_rate": 3.7775000000000003e-06, | |
| "log_odds_chosen": -0.0630793422460556, | |
| "log_odds_ratio": -0.8402652740478516, | |
| "logits/chosen": -1.0562111139297485, | |
| "logits/rejected": -1.1850430965423584, | |
| "logps/chosen": -5.353094100952148, | |
| "logps/rejected": -5.288957595825195, | |
| "loss": 5.370440673828125, | |
| "nll_loss": 5.286414623260498, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.5353094339370728, | |
| "rewards/margins": -0.006413729395717382, | |
| "rewards/rejected": -0.5288957357406616, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3616146092302129, | |
| "grad_norm": 0.5694707036018372, | |
| "learning_rate": 3.7525e-06, | |
| "log_odds_chosen": 0.1951710283756256, | |
| "log_odds_ratio": -0.7369016408920288, | |
| "logits/chosen": -1.1101913452148438, | |
| "logits/rejected": -1.2623517513275146, | |
| "logps/chosen": -5.364739418029785, | |
| "logps/rejected": -5.554258823394775, | |
| "loss": 5.355496597290039, | |
| "nll_loss": 5.281806945800781, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": -0.5364739298820496, | |
| "rewards/margins": 0.018951958045363426, | |
| "rewards/rejected": -0.5554260015487671, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |