| { | |
| "best_metric": 1.4967381954193115, | |
| "best_model_checkpoint": "saves/Falcon-7B-Instruct/lora/orpo-salt-half/checkpoint-1500", | |
| "epoch": 2.9974597798475866, | |
| "eval_steps": 500, | |
| "global_step": 1770, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01693480101608806, | |
| "grad_norm": 0.6027132868766785, | |
| "learning_rate": 4.999614014035063e-06, | |
| "logits/chosen": -14.201833724975586, | |
| "logits/rejected": -14.270045280456543, | |
| "logps/chosen": -1.961771011352539, | |
| "logps/rejected": -2.1497561931610107, | |
| "loss": 2.0361, | |
| "odds_ratio_loss": 0.7429978251457214, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.19617711007595062, | |
| "rewards/margins": 0.01879853382706642, | |
| "rewards/rejected": -0.21497564017772675, | |
| "sft_loss": 1.961771011352539, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03386960203217612, | |
| "grad_norm": 0.4791746735572815, | |
| "learning_rate": 4.998440543386042e-06, | |
| "logits/chosen": -14.17326545715332, | |
| "logits/rejected": -14.03160572052002, | |
| "logps/chosen": -1.9260406494140625, | |
| "logps/rejected": -2.0053372383117676, | |
| "loss": 2.0019, | |
| "odds_ratio_loss": 0.7586489915847778, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.19260406494140625, | |
| "rewards/margins": 0.007929656654596329, | |
| "rewards/rejected": -0.20053371787071228, | |
| "sft_loss": 1.9260406494140625, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05080440304826418, | |
| "grad_norm": 0.3785243630409241, | |
| "learning_rate": 4.996479918381253e-06, | |
| "logits/chosen": -14.245376586914062, | |
| "logits/rejected": -14.222900390625, | |
| "logps/chosen": -1.8398857116699219, | |
| "logps/rejected": -1.8666032552719116, | |
| "loss": 1.9146, | |
| "odds_ratio_loss": 0.7475350499153137, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.18398860096931458, | |
| "rewards/margins": 0.00267172628082335, | |
| "rewards/rejected": -0.18666031956672668, | |
| "sft_loss": 1.8398857116699219, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06773920406435224, | |
| "grad_norm": 0.637917697429657, | |
| "learning_rate": 4.993732756731818e-06, | |
| "logits/chosen": -14.213427543640137, | |
| "logits/rejected": -14.385249137878418, | |
| "logps/chosen": -1.8162885904312134, | |
| "logps/rejected": -1.9234222173690796, | |
| "loss": 1.889, | |
| "odds_ratio_loss": 0.7271509766578674, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.18162885308265686, | |
| "rewards/margins": 0.010713383555412292, | |
| "rewards/rejected": -0.19234223663806915, | |
| "sft_loss": 1.8162885904312134, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0846740050804403, | |
| "grad_norm": 0.6790710091590881, | |
| "learning_rate": 4.9901999239537345e-06, | |
| "logits/chosen": -14.203392028808594, | |
| "logits/rejected": -14.118731498718262, | |
| "logps/chosen": -1.9451831579208374, | |
| "logps/rejected": -1.9480127096176147, | |
| "loss": 2.0255, | |
| "odds_ratio_loss": 0.8034948110580444, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.1945182979106903, | |
| "rewards/margins": 0.0002829456643667072, | |
| "rewards/rejected": -0.19480125606060028, | |
| "sft_loss": 1.9451831579208374, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10160880609652836, | |
| "grad_norm": 0.38820621371269226, | |
| "learning_rate": 4.985882533095186e-06, | |
| "logits/chosen": -14.125239372253418, | |
| "logits/rejected": -14.241134643554688, | |
| "logps/chosen": -1.7669858932495117, | |
| "logps/rejected": -1.818566918373108, | |
| "loss": 1.8465, | |
| "odds_ratio_loss": 0.7950754761695862, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.17669859528541565, | |
| "rewards/margins": 0.005158091429620981, | |
| "rewards/rejected": -0.1818566769361496, | |
| "sft_loss": 1.7669858932495117, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11854360711261643, | |
| "grad_norm": 1.485378384590149, | |
| "learning_rate": 4.9807819443858705e-06, | |
| "logits/chosen": -14.16772174835205, | |
| "logits/rejected": -14.14952564239502, | |
| "logps/chosen": -1.7974742650985718, | |
| "logps/rejected": -1.8876419067382812, | |
| "loss": 1.8722, | |
| "odds_ratio_loss": 0.7475281953811646, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.17974743247032166, | |
| "rewards/margins": 0.009016749449074268, | |
| "rewards/rejected": -0.18876421451568604, | |
| "sft_loss": 1.7974742650985718, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1354784081287045, | |
| "grad_norm": 0.6158199310302734, | |
| "learning_rate": 4.9748997648084404e-06, | |
| "logits/chosen": -14.09917163848877, | |
| "logits/rejected": -14.224530220031738, | |
| "logps/chosen": -1.7899717092514038, | |
| "logps/rejected": -1.8508037328720093, | |
| "loss": 1.8688, | |
| "odds_ratio_loss": 0.7882196307182312, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.17899715900421143, | |
| "rewards/margins": 0.0060832141898572445, | |
| "rewards/rejected": -0.1850803941488266, | |
| "sft_loss": 1.7899717092514038, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15241320914479256, | |
| "grad_norm": 1.0399421453475952, | |
| "learning_rate": 4.96823784759222e-06, | |
| "logits/chosen": -14.11219596862793, | |
| "logits/rejected": -14.099919319152832, | |
| "logps/chosen": -1.7365163564682007, | |
| "logps/rejected": -1.7418320178985596, | |
| "loss": 1.8161, | |
| "odds_ratio_loss": 0.7956770658493042, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": -0.17365165054798126, | |
| "rewards/margins": 0.0005315736052580178, | |
| "rewards/rejected": -0.174183189868927, | |
| "sft_loss": 1.7365163564682007, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1693480101608806, | |
| "grad_norm": 0.9482620358467102, | |
| "learning_rate": 4.960798291629323e-06, | |
| "logits/chosen": -14.198771476745605, | |
| "logits/rejected": -14.24067497253418, | |
| "logps/chosen": -1.8019222021102905, | |
| "logps/rejected": -1.7944272756576538, | |
| "loss": 1.8785, | |
| "odds_ratio_loss": 0.765292227268219, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": -0.18019220232963562, | |
| "rewards/margins": -0.000749480735976249, | |
| "rewards/rejected": -0.17944273352622986, | |
| "sft_loss": 1.8019222021102905, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18628281117696868, | |
| "grad_norm": 1.496517539024353, | |
| "learning_rate": 4.952583440813383e-06, | |
| "logits/chosen": -14.270334243774414, | |
| "logits/rejected": -14.252988815307617, | |
| "logps/chosen": -1.8082859516143799, | |
| "logps/rejected": -1.8689155578613281, | |
| "loss": 1.885, | |
| "odds_ratio_loss": 0.7666890025138855, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.18082860112190247, | |
| "rewards/margins": 0.0060629709623754025, | |
| "rewards/rejected": -0.186891570687294, | |
| "sft_loss": 1.8082859516143799, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.20321761219305673, | |
| "grad_norm": 0.8162474036216736, | |
| "learning_rate": 4.943595883301086e-06, | |
| "logits/chosen": -14.396245002746582, | |
| "logits/rejected": -14.407267570495605, | |
| "logps/chosen": -1.8202846050262451, | |
| "logps/rejected": -1.8238685131072998, | |
| "loss": 1.8966, | |
| "odds_ratio_loss": 0.7631626129150391, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.18202845752239227, | |
| "rewards/margins": 0.0003583906218409538, | |
| "rewards/rejected": -0.1823868602514267, | |
| "sft_loss": 1.8202846050262451, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2201524132091448, | |
| "grad_norm": 0.9815341234207153, | |
| "learning_rate": 4.933838450696757e-06, | |
| "logits/chosen": -14.14527702331543, | |
| "logits/rejected": -14.11426830291748, | |
| "logps/chosen": -1.6691076755523682, | |
| "logps/rejected": -1.7151718139648438, | |
| "loss": 1.7441, | |
| "odds_ratio_loss": 0.7502495050430298, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.16691075265407562, | |
| "rewards/margins": 0.004606431350111961, | |
| "rewards/rejected": -0.17151719331741333, | |
| "sft_loss": 1.6691076755523682, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.23708721422523285, | |
| "grad_norm": 1.7414650917053223, | |
| "learning_rate": 4.923314217160234e-06, | |
| "logits/chosen": -14.14660358428955, | |
| "logits/rejected": -14.196474075317383, | |
| "logps/chosen": -1.7544286251068115, | |
| "logps/rejected": -1.7217376232147217, | |
| "loss": 1.8341, | |
| "odds_ratio_loss": 0.7964597344398499, | |
| "rewards/accuracies": 0.39375001192092896, | |
| "rewards/chosen": -0.1754428595304489, | |
| "rewards/margins": -0.003269097302109003, | |
| "rewards/rejected": -0.17217376828193665, | |
| "sft_loss": 1.7544286251068115, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2540220152413209, | |
| "grad_norm": 0.6009025573730469, | |
| "learning_rate": 4.9120264984383285e-06, | |
| "logits/chosen": -14.155496597290039, | |
| "logits/rejected": -14.008768081665039, | |
| "logps/chosen": -1.5715187788009644, | |
| "logps/rejected": -1.608656644821167, | |
| "loss": 1.6472, | |
| "odds_ratio_loss": 0.7572886347770691, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.15715190768241882, | |
| "rewards/margins": 0.003713789861649275, | |
| "rewards/rejected": -0.1608656644821167, | |
| "sft_loss": 1.5715187788009644, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.270956816257409, | |
| "grad_norm": 0.7238659858703613, | |
| "learning_rate": 4.899978850820176e-06, | |
| "logits/chosen": -14.257448196411133, | |
| "logits/rejected": -14.187673568725586, | |
| "logps/chosen": -1.7162948846817017, | |
| "logps/rejected": -1.7536369562149048, | |
| "loss": 1.7925, | |
| "odds_ratio_loss": 0.7625432014465332, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.17162947356700897, | |
| "rewards/margins": 0.0037342351861298084, | |
| "rewards/rejected": -0.1753637045621872, | |
| "sft_loss": 1.7162948846817017, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.28789161727349705, | |
| "grad_norm": 0.9593597650527954, | |
| "learning_rate": 4.887175070016795e-06, | |
| "logits/chosen": -14.389033317565918, | |
| "logits/rejected": -14.29101276397705, | |
| "logps/chosen": -1.514937162399292, | |
| "logps/rejected": -1.5708329677581787, | |
| "loss": 1.5883, | |
| "odds_ratio_loss": 0.7331644296646118, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.15149369835853577, | |
| "rewards/margins": 0.005589589010924101, | |
| "rewards/rejected": -0.15708328783512115, | |
| "sft_loss": 1.514937162399292, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3048264182895851, | |
| "grad_norm": 1.0034801959991455, | |
| "learning_rate": 4.873619189965217e-06, | |
| "logits/chosen": -14.039607048034668, | |
| "logits/rejected": -14.147199630737305, | |
| "logps/chosen": -1.5949114561080933, | |
| "logps/rejected": -1.746072769165039, | |
| "loss": 1.6635, | |
| "odds_ratio_loss": 0.6863279938697815, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.1594911515712738, | |
| "rewards/margins": 0.015116140246391296, | |
| "rewards/rejected": -0.1746072769165039, | |
| "sft_loss": 1.5949114561080933, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.32176121930567314, | |
| "grad_norm": 0.89156574010849, | |
| "learning_rate": 4.859315481557563e-06, | |
| "logits/chosen": -14.219070434570312, | |
| "logits/rejected": -14.151147842407227, | |
| "logps/chosen": -1.5719926357269287, | |
| "logps/rejected": -1.6470130681991577, | |
| "loss": 1.6487, | |
| "odds_ratio_loss": 0.767541766166687, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.15719927847385406, | |
| "rewards/margins": 0.0075020515359938145, | |
| "rewards/rejected": -0.16470131278038025, | |
| "sft_loss": 1.5719926357269287, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3386960203217612, | |
| "grad_norm": 0.587933361530304, | |
| "learning_rate": 4.84426845129546e-06, | |
| "logits/chosen": -14.344035148620605, | |
| "logits/rejected": -14.321207046508789, | |
| "logps/chosen": -1.6490224599838257, | |
| "logps/rejected": -1.638528823852539, | |
| "loss": 1.7261, | |
| "odds_ratio_loss": 0.7703002095222473, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.1649022400379181, | |
| "rewards/margins": -0.0010493483860045671, | |
| "rewards/rejected": -0.16385288536548615, | |
| "sft_loss": 1.6490224599838257, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3556308213378493, | |
| "grad_norm": 2.0271973609924316, | |
| "learning_rate": 4.828482839870233e-06, | |
| "logits/chosen": -14.22668170928955, | |
| "logits/rejected": -14.1005220413208, | |
| "logps/chosen": -1.5818629264831543, | |
| "logps/rejected": -1.5753711462020874, | |
| "loss": 1.6618, | |
| "odds_ratio_loss": 0.7996558547019958, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.15818628668785095, | |
| "rewards/margins": -0.0006491712993010879, | |
| "rewards/rejected": -0.15753711760044098, | |
| "sft_loss": 1.5818629264831543, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.37256562235393736, | |
| "grad_norm": 0.809647262096405, | |
| "learning_rate": 4.811963620669314e-06, | |
| "logits/chosen": -14.262086868286133, | |
| "logits/rejected": -14.35071849822998, | |
| "logps/chosen": -1.5450419187545776, | |
| "logps/rejected": -1.599981665611267, | |
| "loss": 1.6187, | |
| "odds_ratio_loss": 0.7366654276847839, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.15450419485569, | |
| "rewards/margins": 0.005493967793881893, | |
| "rewards/rejected": -0.15999816358089447, | |
| "sft_loss": 1.5450419187545776, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3895004233700254, | |
| "grad_norm": 0.9206905961036682, | |
| "learning_rate": 4.794715998209328e-06, | |
| "logits/chosen": -14.026702880859375, | |
| "logits/rejected": -14.009126663208008, | |
| "logps/chosen": -1.5401651859283447, | |
| "logps/rejected": -1.6259161233901978, | |
| "loss": 1.6132, | |
| "odds_ratio_loss": 0.7308396100997925, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.15401650965213776, | |
| "rewards/margins": 0.008575108833611012, | |
| "rewards/rejected": -0.1625916212797165, | |
| "sft_loss": 1.5401651859283447, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.40643522438611346, | |
| "grad_norm": 1.0553600788116455, | |
| "learning_rate": 4.7767454064963724e-06, | |
| "logits/chosen": -14.294774055480957, | |
| "logits/rejected": -14.33879280090332, | |
| "logps/chosen": -1.571942925453186, | |
| "logps/rejected": -1.6219526529312134, | |
| "loss": 1.6455, | |
| "odds_ratio_loss": 0.7359451651573181, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.15719430148601532, | |
| "rewards/margins": 0.005000968463718891, | |
| "rewards/rejected": -0.16219528019428253, | |
| "sft_loss": 1.571942925453186, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.42337002540220153, | |
| "grad_norm": 1.133743166923523, | |
| "learning_rate": 4.758057507313987e-06, | |
| "logits/chosen": -14.3100004196167, | |
| "logits/rejected": -14.21064567565918, | |
| "logps/chosen": -1.4966617822647095, | |
| "logps/rejected": -1.5281431674957275, | |
| "loss": 1.5708, | |
| "odds_ratio_loss": 0.7413426041603088, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.1496661901473999, | |
| "rewards/margins": 0.0031481466721743345, | |
| "rewards/rejected": -0.1528143286705017, | |
| "sft_loss": 1.4966617822647095, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4403048264182896, | |
| "grad_norm": 1.6055690050125122, | |
| "learning_rate": 4.73865818843936e-06, | |
| "logits/chosen": -14.18690299987793, | |
| "logits/rejected": -14.250242233276367, | |
| "logps/chosen": -1.5969842672348022, | |
| "logps/rejected": -1.7042526006698608, | |
| "loss": 1.6715, | |
| "odds_ratio_loss": 0.744690477848053, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.1596984565258026, | |
| "rewards/margins": 0.010726812295615673, | |
| "rewards/rejected": -0.17042526602745056, | |
| "sft_loss": 1.5969842672348022, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4572396274343776, | |
| "grad_norm": 0.7864425778388977, | |
| "learning_rate": 4.718553561788339e-06, | |
| "logits/chosen": -14.111845016479492, | |
| "logits/rejected": -14.31633186340332, | |
| "logps/chosen": -1.487687110900879, | |
| "logps/rejected": -1.5424432754516602, | |
| "loss": 1.5596, | |
| "odds_ratio_loss": 0.7193279266357422, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.14876870810985565, | |
| "rewards/margins": 0.00547564122825861, | |
| "rewards/rejected": -0.1542443484067917, | |
| "sft_loss": 1.487687110900879, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4741744284504657, | |
| "grad_norm": 1.302501916885376, | |
| "learning_rate": 4.697749961489822e-06, | |
| "logits/chosen": -14.314417839050293, | |
| "logits/rejected": -14.266924858093262, | |
| "logps/chosen": -1.6229807138442993, | |
| "logps/rejected": -1.7468087673187256, | |
| "loss": 1.6957, | |
| "odds_ratio_loss": 0.7271685004234314, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.1622980535030365, | |
| "rewards/margins": 0.012382803484797478, | |
| "rewards/rejected": -0.17468087375164032, | |
| "sft_loss": 1.6229807138442993, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4911092294665538, | |
| "grad_norm": 0.9335818886756897, | |
| "learning_rate": 4.67625394189013e-06, | |
| "logits/chosen": -14.308195114135742, | |
| "logits/rejected": -14.264862060546875, | |
| "logps/chosen": -1.47157883644104, | |
| "logps/rejected": -1.6349776983261108, | |
| "loss": 1.5387, | |
| "odds_ratio_loss": 0.6707261204719543, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.14715787768363953, | |
| "rewards/margins": 0.01633988879621029, | |
| "rewards/rejected": -0.16349777579307556, | |
| "sft_loss": 1.47157883644104, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5080440304826418, | |
| "grad_norm": 1.5830973386764526, | |
| "learning_rate": 4.654072275488016e-06, | |
| "logits/chosen": -14.484451293945312, | |
| "logits/rejected": -14.427891731262207, | |
| "logps/chosen": -1.4168641567230225, | |
| "logps/rejected": -1.4915310144424438, | |
| "loss": 1.4878, | |
| "odds_ratio_loss": 0.7094072103500366, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.14168642461299896, | |
| "rewards/margins": 0.0074666752479970455, | |
| "rewards/rejected": -0.14915308356285095, | |
| "sft_loss": 1.4168641567230225, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5249788314987299, | |
| "grad_norm": 1.3539669513702393, | |
| "learning_rate": 4.631211950800925e-06, | |
| "logits/chosen": -14.32929515838623, | |
| "logits/rejected": -14.424825668334961, | |
| "logps/chosen": -1.4027061462402344, | |
| "logps/rejected": -1.481377363204956, | |
| "loss": 1.4748, | |
| "odds_ratio_loss": 0.7213728427886963, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.14027062058448792, | |
| "rewards/margins": 0.007867120206356049, | |
| "rewards/rejected": -0.14813774824142456, | |
| "sft_loss": 1.4027061462402344, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.541913632514818, | |
| "grad_norm": 2.352029323577881, | |
| "learning_rate": 4.6076801701632095e-06, | |
| "logits/chosen": -14.217028617858887, | |
| "logits/rejected": -14.44648551940918, | |
| "logps/chosen": -1.513146162033081, | |
| "logps/rejected": -1.49079430103302, | |
| "loss": 1.5925, | |
| "odds_ratio_loss": 0.7936692833900452, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.1513146311044693, | |
| "rewards/margins": -0.002235203282907605, | |
| "rewards/rejected": -0.14907941222190857, | |
| "sft_loss": 1.513146162033081, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.558848433530906, | |
| "grad_norm": 0.966873288154602, | |
| "learning_rate": 4.583484347456972e-06, | |
| "logits/chosen": -14.30597972869873, | |
| "logits/rejected": -14.244359016418457, | |
| "logps/chosen": -1.5698734521865845, | |
| "logps/rejected": -1.5634009838104248, | |
| "loss": 1.648, | |
| "odds_ratio_loss": 0.781231164932251, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.15698735415935516, | |
| "rewards/margins": -0.0006472375243902206, | |
| "rewards/rejected": -0.1563401073217392, | |
| "sft_loss": 1.5698734521865845, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5757832345469941, | |
| "grad_norm": 0.9054247140884399, | |
| "learning_rate": 4.55863210577626e-06, | |
| "logits/chosen": -14.461858749389648, | |
| "logits/rejected": -14.340890884399414, | |
| "logps/chosen": -1.5450735092163086, | |
| "logps/rejected": -1.656599760055542, | |
| "loss": 1.6172, | |
| "odds_ratio_loss": 0.7215217351913452, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.1545073539018631, | |
| "rewards/margins": 0.011152632534503937, | |
| "rewards/rejected": -0.16565999388694763, | |
| "sft_loss": 1.5450735092163086, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5927180355630821, | |
| "grad_norm": 0.9015621542930603, | |
| "learning_rate": 4.5331312750253465e-06, | |
| "logits/chosen": -14.178003311157227, | |
| "logits/rejected": -14.2726411819458, | |
| "logps/chosen": -1.487000584602356, | |
| "logps/rejected": -1.4908943176269531, | |
| "loss": 1.5652, | |
| "odds_ratio_loss": 0.7824643850326538, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.1487000733613968, | |
| "rewards/margins": 0.00038935727206990123, | |
| "rewards/rejected": -0.14908942580223083, | |
| "sft_loss": 1.487000584602356, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6096528365791702, | |
| "grad_norm": 2.001441717147827, | |
| "learning_rate": 4.506989889451858e-06, | |
| "logits/chosen": -14.397753715515137, | |
| "logits/rejected": -14.500781059265137, | |
| "logps/chosen": -1.4975332021713257, | |
| "logps/rejected": -1.5102782249450684, | |
| "loss": 1.5735, | |
| "odds_ratio_loss": 0.7592841982841492, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.14975331723690033, | |
| "rewards/margins": 0.0012745079584419727, | |
| "rewards/rejected": -0.1510278284549713, | |
| "sft_loss": 1.4975332021713257, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6265876375952583, | |
| "grad_norm": 1.57513427734375, | |
| "learning_rate": 4.480216185115512e-06, | |
| "logits/chosen": -14.3065767288208, | |
| "logits/rejected": -14.306581497192383, | |
| "logps/chosen": -1.4990990161895752, | |
| "logps/rejected": -1.6238371133804321, | |
| "loss": 1.5673, | |
| "odds_ratio_loss": 0.6823247671127319, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.14990989863872528, | |
| "rewards/margins": 0.0124738160520792, | |
| "rewards/rejected": -0.16238370537757874, | |
| "sft_loss": 1.4990990161895752, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6435224386113463, | |
| "grad_norm": 1.0783131122589111, | |
| "learning_rate": 4.4528185972932856e-06, | |
| "logits/chosen": -14.319122314453125, | |
| "logits/rejected": -14.488665580749512, | |
| "logps/chosen": -1.5176422595977783, | |
| "logps/rejected": -1.656542420387268, | |
| "loss": 1.5915, | |
| "odds_ratio_loss": 0.7389153242111206, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.15176422894001007, | |
| "rewards/margins": 0.013890010304749012, | |
| "rewards/rejected": -0.1656542271375656, | |
| "sft_loss": 1.5176422595977783, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6604572396274344, | |
| "grad_norm": 1.4694324731826782, | |
| "learning_rate": 4.424805757821803e-06, | |
| "logits/chosen": -14.226755142211914, | |
| "logits/rejected": -14.333894729614258, | |
| "logps/chosen": -1.574268102645874, | |
| "logps/rejected": -1.6511255502700806, | |
| "loss": 1.6513, | |
| "odds_ratio_loss": 0.7702363133430481, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -0.15742680430412292, | |
| "rewards/margins": 0.007685736753046513, | |
| "rewards/rejected": -0.16511255502700806, | |
| "sft_loss": 1.574268102645874, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6773920406435224, | |
| "grad_norm": 0.8252859711647034, | |
| "learning_rate": 4.396186492377812e-06, | |
| "logits/chosen": -14.237678527832031, | |
| "logits/rejected": -14.311739921569824, | |
| "logps/chosen": -1.508466124534607, | |
| "logps/rejected": -1.5852457284927368, | |
| "loss": 1.5797, | |
| "odds_ratio_loss": 0.7126177549362183, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.15084661543369293, | |
| "rewards/margins": 0.0076779513619840145, | |
| "rewards/rejected": -0.15852457284927368, | |
| "sft_loss": 1.508466124534607, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6943268416596104, | |
| "grad_norm": 1.2841962575912476, | |
| "learning_rate": 4.366969817697578e-06, | |
| "logits/chosen": -14.2535400390625, | |
| "logits/rejected": -14.371434211730957, | |
| "logps/chosen": -1.5005015134811401, | |
| "logps/rejected": -1.5292456150054932, | |
| "loss": 1.5766, | |
| "odds_ratio_loss": 0.7610150575637817, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -0.15005014836788177, | |
| "rewards/margins": 0.002874411642551422, | |
| "rewards/rejected": -0.1529245674610138, | |
| "sft_loss": 1.5005015134811401, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7112616426756986, | |
| "grad_norm": 1.2207895517349243, | |
| "learning_rate": 4.337164938736086e-06, | |
| "logits/chosen": -14.3642578125, | |
| "logits/rejected": -14.369051933288574, | |
| "logps/chosen": -1.5299899578094482, | |
| "logps/rejected": -1.4911963939666748, | |
| "loss": 1.6108, | |
| "odds_ratio_loss": 0.8085638284683228, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": -0.15299901366233826, | |
| "rewards/margins": -0.0038793571293354034, | |
| "rewards/rejected": -0.14911964535713196, | |
| "sft_loss": 1.5299899578094482, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7281964436917866, | |
| "grad_norm": 0.8184213042259216, | |
| "learning_rate": 4.306781245766945e-06, | |
| "logits/chosen": -14.233909606933594, | |
| "logits/rejected": -14.245084762573242, | |
| "logps/chosen": -1.3620591163635254, | |
| "logps/rejected": -1.4749568700790405, | |
| "loss": 1.4336, | |
| "odds_ratio_loss": 0.7158304452896118, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.13620591163635254, | |
| "rewards/margins": 0.011289774440228939, | |
| "rewards/rejected": -0.14749568700790405, | |
| "sft_loss": 1.3620591163635254, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7451312447078747, | |
| "grad_norm": 2.0060269832611084, | |
| "learning_rate": 4.275828311423903e-06, | |
| "logits/chosen": -14.381686210632324, | |
| "logits/rejected": -14.249435424804688, | |
| "logps/chosen": -1.6260135173797607, | |
| "logps/rejected": -1.5776515007019043, | |
| "loss": 1.709, | |
| "odds_ratio_loss": 0.829800009727478, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.16260136663913727, | |
| "rewards/margins": -0.004836211446672678, | |
| "rewards/rejected": -0.15776515007019043, | |
| "sft_loss": 1.6260135173797607, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7620660457239627, | |
| "grad_norm": 4.041975498199463, | |
| "learning_rate": 4.244315887684912e-06, | |
| "logits/chosen": -14.30778980255127, | |
| "logits/rejected": -14.218801498413086, | |
| "logps/chosen": -1.483784794807434, | |
| "logps/rejected": -1.5452721118927002, | |
| "loss": 1.5595, | |
| "odds_ratio_loss": 0.7568337917327881, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.14837847650051117, | |
| "rewards/margins": 0.006148716900497675, | |
| "rewards/rejected": -0.1545272022485733, | |
| "sft_loss": 1.483784794807434, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7790008467400508, | |
| "grad_norm": 0.7099826335906982, | |
| "learning_rate": 4.212253902799685e-06, | |
| "logits/chosen": -14.486287117004395, | |
| "logits/rejected": -14.316320419311523, | |
| "logps/chosen": -1.4297285079956055, | |
| "logps/rejected": -1.5128008127212524, | |
| "loss": 1.5023, | |
| "odds_ratio_loss": 0.7252711057662964, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.14297285676002502, | |
| "rewards/margins": 0.008307242766022682, | |
| "rewards/rejected": -0.15128009021282196, | |
| "sft_loss": 1.4297285079956055, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7959356477561389, | |
| "grad_norm": 1.2492146492004395, | |
| "learning_rate": 4.179652458161718e-06, | |
| "logits/chosen": -14.241589546203613, | |
| "logits/rejected": -14.272315979003906, | |
| "logps/chosen": -1.4517958164215088, | |
| "logps/rejected": -1.4656177759170532, | |
| "loss": 1.5259, | |
| "odds_ratio_loss": 0.7411133050918579, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.1451795995235443, | |
| "rewards/margins": 0.0013821950415149331, | |
| "rewards/rejected": -0.14656177163124084, | |
| "sft_loss": 1.4517958164215088, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8128704487722269, | |
| "grad_norm": 0.9384155869483948, | |
| "learning_rate": 4.146521825125765e-06, | |
| "logits/chosen": -14.420669555664062, | |
| "logits/rejected": -14.434637069702148, | |
| "logps/chosen": -1.4806429147720337, | |
| "logps/rejected": -1.5676599740982056, | |
| "loss": 1.5509, | |
| "odds_ratio_loss": 0.7023881673812866, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.14806430041790009, | |
| "rewards/margins": 0.008701696991920471, | |
| "rewards/rejected": -0.15676598250865936, | |
| "sft_loss": 1.4806429147720337, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8298052497883149, | |
| "grad_norm": 1.070791244506836, | |
| "learning_rate": 4.11287244177176e-06, | |
| "logits/chosen": -14.464094161987305, | |
| "logits/rejected": -14.335436820983887, | |
| "logps/chosen": -1.455758810043335, | |
| "logps/rejected": -1.5940083265304565, | |
| "loss": 1.5245, | |
| "odds_ratio_loss": 0.6876194477081299, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.1455758810043335, | |
| "rewards/margins": 0.01382494904100895, | |
| "rewards/rejected": -0.1594008356332779, | |
| "sft_loss": 1.455758810043335, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8467400508044031, | |
| "grad_norm": 2.7851524353027344, | |
| "learning_rate": 4.078714909616215e-06, | |
| "logits/chosen": -14.458696365356445, | |
| "logits/rejected": -14.464262008666992, | |
| "logps/chosen": -1.531051754951477, | |
| "logps/rejected": -1.6913106441497803, | |
| "loss": 1.5988, | |
| "odds_ratio_loss": 0.6771985292434692, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.15310516953468323, | |
| "rewards/margins": 0.01602589525282383, | |
| "rewards/rejected": -0.1691310703754425, | |
| "sft_loss": 1.531051754951477, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8467400508044031, | |
| "eval_logits/chosen": -14.385932922363281, | |
| "eval_logits/rejected": -14.353007316589355, | |
| "eval_logps/chosen": -1.491932988166809, | |
| "eval_logps/rejected": -1.5724329948425293, | |
| "eval_loss": 1.5655477046966553, | |
| "eval_odds_ratio_loss": 0.736146092414856, | |
| "eval_rewards/accuracies": 0.49619048833847046, | |
| "eval_rewards/chosen": -0.14919330179691315, | |
| "eval_rewards/margins": 0.008049987256526947, | |
| "eval_rewards/rejected": -0.1572432965040207, | |
| "eval_runtime": 207.7292, | |
| "eval_samples_per_second": 5.055, | |
| "eval_sft_loss": 1.491932988166809, | |
| "eval_steps_per_second": 2.527, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8636748518204911, | |
| "grad_norm": 2.8025050163269043, | |
| "learning_rate": 4.044059990272125e-06, | |
| "logits/chosen": -14.447216987609863, | |
| "logits/rejected": -14.498886108398438, | |
| "logps/chosen": -1.528641700744629, | |
| "logps/rejected": -1.6202799081802368, | |
| "loss": 1.6018, | |
| "odds_ratio_loss": 0.732014536857605, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.15286414325237274, | |
| "rewards/margins": 0.009163827635347843, | |
| "rewards/rejected": -0.1620279997587204, | |
| "sft_loss": 1.528641700744629, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.8806096528365792, | |
| "grad_norm": 1.3604254722595215, | |
| "learning_rate": 4.0089186020584345e-06, | |
| "logits/chosen": -14.258474349975586, | |
| "logits/rejected": -14.413030624389648, | |
| "logps/chosen": -1.5629048347473145, | |
| "logps/rejected": -1.5826667547225952, | |
| "loss": 1.6364, | |
| "odds_ratio_loss": 0.7350566387176514, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.15629048645496368, | |
| "rewards/margins": 0.001976185943931341, | |
| "rewards/rejected": -0.15826669335365295, | |
| "sft_loss": 1.5629048347473145, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8975444538526672, | |
| "grad_norm": 2.011760711669922, | |
| "learning_rate": 3.973301816560124e-06, | |
| "logits/chosen": -14.397709846496582, | |
| "logits/rejected": -14.129496574401855, | |
| "logps/chosen": -1.4165706634521484, | |
| "logps/rejected": -1.5228968858718872, | |
| "loss": 1.4866, | |
| "odds_ratio_loss": 0.6998282670974731, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.14165706932544708, | |
| "rewards/margins": 0.010632617399096489, | |
| "rewards/rejected": -0.15228970348834991, | |
| "sft_loss": 1.4165706634521484, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9144792548687553, | |
| "grad_norm": 1.5524851083755493, | |
| "learning_rate": 3.937220855140021e-06, | |
| "logits/chosen": -14.287254333496094, | |
| "logits/rejected": -14.5077543258667, | |
| "logps/chosen": -1.445703148841858, | |
| "logps/rejected": -1.4684772491455078, | |
| "loss": 1.5204, | |
| "odds_ratio_loss": 0.7468188405036926, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.14457032084465027, | |
| "rewards/margins": 0.0022774008102715015, | |
| "rewards/rejected": -0.14684772491455078, | |
| "sft_loss": 1.445703148841858, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9314140558848434, | |
| "grad_norm": 1.5534979104995728, | |
| "learning_rate": 3.900687085403418e-06, | |
| "logits/chosen": -14.357900619506836, | |
| "logits/rejected": -14.454984664916992, | |
| "logps/chosen": -1.386063575744629, | |
| "logps/rejected": -1.3658872842788696, | |
| "loss": 1.4644, | |
| "odds_ratio_loss": 0.7831361293792725, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.13860636949539185, | |
| "rewards/margins": -0.0020176374819129705, | |
| "rewards/rejected": -0.13658872246742249, | |
| "sft_loss": 1.386063575744629, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9483488569009314, | |
| "grad_norm": 1.1890796422958374, | |
| "learning_rate": 3.863712017616614e-06, | |
| "logits/chosen": -14.284517288208008, | |
| "logits/rejected": -14.413591384887695, | |
| "logps/chosen": -1.4638760089874268, | |
| "logps/rejected": -1.5988643169403076, | |
| "loss": 1.5324, | |
| "odds_ratio_loss": 0.6851348876953125, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.14638759195804596, | |
| "rewards/margins": 0.013498829677700996, | |
| "rewards/rejected": -0.1598864495754242, | |
| "sft_loss": 1.4638760089874268, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9652836579170194, | |
| "grad_norm": 6.166572570800781, | |
| "learning_rate": 3.826307301080504e-06, | |
| "logits/chosen": -14.168184280395508, | |
| "logits/rejected": -14.155644416809082, | |
| "logps/chosen": -1.4714304208755493, | |
| "logps/rejected": -1.577530860900879, | |
| "loss": 1.5501, | |
| "odds_ratio_loss": 0.7865978479385376, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.14714303612709045, | |
| "rewards/margins": 0.010610053315758705, | |
| "rewards/rejected": -0.1577531099319458, | |
| "sft_loss": 1.4714304208755493, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.9822184589331076, | |
| "grad_norm": 1.6688357591629028, | |
| "learning_rate": 3.7884847204603775e-06, | |
| "logits/chosen": -14.45263385772705, | |
| "logits/rejected": -14.489707946777344, | |
| "logps/chosen": -1.519616961479187, | |
| "logps/rejected": -1.4644415378570557, | |
| "loss": 1.5989, | |
| "odds_ratio_loss": 0.7931729555130005, | |
| "rewards/accuracies": 0.39375001192092896, | |
| "rewards/chosen": -0.15196169912815094, | |
| "rewards/margins": -0.0055175526067614555, | |
| "rewards/rejected": -0.1464441567659378, | |
| "sft_loss": 1.519616961479187, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9991532599491956, | |
| "grad_norm": 1.3263885974884033, | |
| "learning_rate": 3.750256192073058e-06, | |
| "logits/chosen": -14.519624710083008, | |
| "logits/rejected": -14.511543273925781, | |
| "logps/chosen": -1.6179249286651611, | |
| "logps/rejected": -1.6542644500732422, | |
| "loss": 1.6929, | |
| "odds_ratio_loss": 0.7493273019790649, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.16179248690605164, | |
| "rewards/margins": 0.0036339648067951202, | |
| "rewards/rejected": -0.16542646288871765, | |
| "sft_loss": 1.6179249286651611, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.0160880609652836, | |
| "grad_norm": 2.145953893661499, | |
| "learning_rate": 3.7116337601325715e-06, | |
| "logits/chosen": -14.438863754272461, | |
| "logits/rejected": -14.496429443359375, | |
| "logps/chosen": -1.4121149778366089, | |
| "logps/rejected": -1.4823601245880127, | |
| "loss": 1.4826, | |
| "odds_ratio_loss": 0.7051838636398315, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.14121152460575104, | |
| "rewards/margins": 0.007024504244327545, | |
| "rewards/rejected": -0.1482360064983368, | |
| "sft_loss": 1.4121149778366089, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0330228619813717, | |
| "grad_norm": 1.4814651012420654, | |
| "learning_rate": 3.6726295929555154e-06, | |
| "logits/chosen": -14.25225830078125, | |
| "logits/rejected": -14.299070358276367, | |
| "logps/chosen": -1.333702802658081, | |
| "logps/rejected": -1.4111506938934326, | |
| "loss": 1.4074, | |
| "odds_ratio_loss": 0.7373310327529907, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.1333702653646469, | |
| "rewards/margins": 0.007744790520519018, | |
| "rewards/rejected": -0.14111506938934326, | |
| "sft_loss": 1.333702802658081, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.0499576629974599, | |
| "grad_norm": 1.6669461727142334, | |
| "learning_rate": 3.6332559791273307e-06, | |
| "logits/chosen": -14.348184585571289, | |
| "logits/rejected": -14.468172073364258, | |
| "logps/chosen": -1.3673087358474731, | |
| "logps/rejected": -1.4689829349517822, | |
| "loss": 1.4376, | |
| "odds_ratio_loss": 0.703393280506134, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.1367308795452118, | |
| "rewards/margins": 0.01016741432249546, | |
| "rewards/rejected": -0.1468982994556427, | |
| "sft_loss": 1.3673087358474731, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.0668924640135478, | |
| "grad_norm": 1.9912712574005127, | |
| "learning_rate": 3.593525323630681e-06, | |
| "logits/chosen": -14.204243659973145, | |
| "logits/rejected": -14.313570976257324, | |
| "logps/chosen": -1.4642140865325928, | |
| "logps/rejected": -1.5515140295028687, | |
| "loss": 1.5346, | |
| "odds_ratio_loss": 0.7034581899642944, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.1464214026927948, | |
| "rewards/margins": 0.008729999884963036, | |
| "rewards/rejected": -0.15515141189098358, | |
| "sft_loss": 1.4642140865325928, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.083827265029636, | |
| "grad_norm": 1.084834098815918, | |
| "learning_rate": 3.5534501439371615e-06, | |
| "logits/chosen": -14.336616516113281, | |
| "logits/rejected": -14.360015869140625, | |
| "logps/chosen": -1.431004285812378, | |
| "logps/rejected": -1.5110365152359009, | |
| "loss": 1.5053, | |
| "odds_ratio_loss": 0.7428441047668457, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.14310042560100555, | |
| "rewards/margins": 0.008003225550055504, | |
| "rewards/rejected": -0.1511036604642868, | |
| "sft_loss": 1.431004285812378, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.100762066045724, | |
| "grad_norm": 7.101503372192383, | |
| "learning_rate": 3.5130430660635633e-06, | |
| "logits/chosen": -14.246923446655273, | |
| "logits/rejected": -14.310781478881836, | |
| "logps/chosen": -1.4178617000579834, | |
| "logps/rejected": -1.4921773672103882, | |
| "loss": 1.4902, | |
| "odds_ratio_loss": 0.7228954434394836, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.14178617298603058, | |
| "rewards/margins": 0.0074315681122243404, | |
| "rewards/rejected": -0.14921775460243225, | |
| "sft_loss": 1.4178617000579834, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.117696867061812, | |
| "grad_norm": 0.7868030071258545, | |
| "learning_rate": 3.4723168205939444e-06, | |
| "logits/chosen": -14.346036911010742, | |
| "logits/rejected": -14.401220321655273, | |
| "logps/chosen": -1.4435014724731445, | |
| "logps/rejected": -1.4272395372390747, | |
| "loss": 1.5198, | |
| "odds_ratio_loss": 0.7628483772277832, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.14435014128684998, | |
| "rewards/margins": -0.0016262030694633722, | |
| "rewards/rejected": -0.142723947763443, | |
| "sft_loss": 1.4435014724731445, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.1346316680779, | |
| "grad_norm": 0.8476426601409912, | |
| "learning_rate": 3.431284238668754e-06, | |
| "logits/chosen": -14.173054695129395, | |
| "logits/rejected": -14.25976276397705, | |
| "logps/chosen": -1.5427913665771484, | |
| "logps/rejected": -1.51954984664917, | |
| "loss": 1.6222, | |
| "odds_ratio_loss": 0.7936299443244934, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.15427914261817932, | |
| "rewards/margins": -0.002324149012565613, | |
| "rewards/rejected": -0.15195497870445251, | |
| "sft_loss": 1.5427913665771484, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.1515664690939882, | |
| "grad_norm": 3.089587688446045, | |
| "learning_rate": 3.389958247942274e-06, | |
| "logits/chosen": -14.338518142700195, | |
| "logits/rejected": -14.398809432983398, | |
| "logps/chosen": -1.508374810218811, | |
| "logps/rejected": -1.6098997592926025, | |
| "loss": 1.5859, | |
| "odds_ratio_loss": 0.7754709720611572, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.1508374661207199, | |
| "rewards/margins": 0.010152501054108143, | |
| "rewards/rejected": -0.16098996996879578, | |
| "sft_loss": 1.508374810218811, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.168501270110076, | |
| "grad_norm": 1.2698506116867065, | |
| "learning_rate": 3.3483518685096588e-06, | |
| "logits/chosen": -14.310267448425293, | |
| "logits/rejected": -14.27270221710205, | |
| "logps/chosen": -1.4493352174758911, | |
| "logps/rejected": -1.5172946453094482, | |
| "loss": 1.5243, | |
| "odds_ratio_loss": 0.7492562532424927, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.1449335366487503, | |
| "rewards/margins": 0.006795944180339575, | |
| "rewards/rejected": -0.15172946453094482, | |
| "sft_loss": 1.4493352174758911, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.1854360711261642, | |
| "grad_norm": 1.6422189474105835, | |
| "learning_rate": 3.306478208804839e-06, | |
| "logits/chosen": -14.337800979614258, | |
| "logits/rejected": -14.443319320678711, | |
| "logps/chosen": -1.3992269039154053, | |
| "logps/rejected": -1.4721871614456177, | |
| "loss": 1.4736, | |
| "odds_ratio_loss": 0.7440443634986877, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.13992270827293396, | |
| "rewards/margins": 0.007296019699424505, | |
| "rewards/rejected": -0.1472187042236328, | |
| "sft_loss": 1.3992269039154053, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.2023708721422524, | |
| "grad_norm": 1.635892391204834, | |
| "learning_rate": 3.264350461470608e-06, | |
| "logits/chosen": -14.11363410949707, | |
| "logits/rejected": -14.23077392578125, | |
| "logps/chosen": -1.4146158695220947, | |
| "logps/rejected": -1.5160566568374634, | |
| "loss": 1.4872, | |
| "odds_ratio_loss": 0.726182222366333, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.1414615958929062, | |
| "rewards/margins": 0.010144074447453022, | |
| "rewards/rejected": -0.15160568058490753, | |
| "sft_loss": 1.4146158695220947, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.2193056731583405, | |
| "grad_norm": 2.807609796524048, | |
| "learning_rate": 3.2219818992021685e-06, | |
| "logits/chosen": -14.307601928710938, | |
| "logits/rejected": -14.457585334777832, | |
| "logps/chosen": -1.3360792398452759, | |
| "logps/rejected": -1.5054932832717896, | |
| "loss": 1.4058, | |
| "odds_ratio_loss": 0.6972737312316895, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.13360795378684998, | |
| "rewards/margins": 0.016941383481025696, | |
| "rewards/rejected": -0.15054932236671448, | |
| "sft_loss": 1.3360792398452759, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.2362404741744284, | |
| "grad_norm": 4.885401248931885, | |
| "learning_rate": 3.1793858705654595e-06, | |
| "logits/chosen": -14.334493637084961, | |
| "logits/rejected": -14.283819198608398, | |
| "logps/chosen": -1.435250997543335, | |
| "logps/rejected": -1.4584420919418335, | |
| "loss": 1.5098, | |
| "odds_ratio_loss": 0.7454192638397217, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.14352509379386902, | |
| "rewards/margins": 0.0023191256914287806, | |
| "rewards/rejected": -0.1458442211151123, | |
| "sft_loss": 1.435250997543335, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.2531752751905165, | |
| "grad_norm": 2.119098424911499, | |
| "learning_rate": 3.1365757957915787e-06, | |
| "logits/chosen": -14.451696395874023, | |
| "logits/rejected": -14.478349685668945, | |
| "logps/chosen": -1.4766839742660522, | |
| "logps/rejected": -1.5273820161819458, | |
| "loss": 1.5488, | |
| "odds_ratio_loss": 0.7213960289955139, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.14766840636730194, | |
| "rewards/margins": 0.005069802515208721, | |
| "rewards/rejected": -0.15273821353912354, | |
| "sft_loss": 1.4766839742660522, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.2701100762066047, | |
| "grad_norm": 2.60243821144104, | |
| "learning_rate": 3.093565162548633e-06, | |
| "logits/chosen": -14.26720905303955, | |
| "logits/rejected": -14.301678657531738, | |
| "logps/chosen": -1.4956939220428467, | |
| "logps/rejected": -1.5772297382354736, | |
| "loss": 1.5741, | |
| "odds_ratio_loss": 0.7844332456588745, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.14956940710544586, | |
| "rewards/margins": 0.008153588511049747, | |
| "rewards/rejected": -0.15772297978401184, | |
| "sft_loss": 1.4956939220428467, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.2870448772226926, | |
| "grad_norm": 1.4909660816192627, | |
| "learning_rate": 3.0503675216923294e-06, | |
| "logits/chosen": -14.459734916687012, | |
| "logits/rejected": -14.364084243774414, | |
| "logps/chosen": -1.3072437047958374, | |
| "logps/rejected": -1.4731833934783936, | |
| "loss": 1.3741, | |
| "odds_ratio_loss": 0.6682445406913757, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.13072435557842255, | |
| "rewards/margins": 0.01659397967159748, | |
| "rewards/rejected": -0.14731833338737488, | |
| "sft_loss": 1.3072437047958374, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.3039796782387807, | |
| "grad_norm": 1.1245403289794922, | |
| "learning_rate": 3.0069964829966748e-06, | |
| "logits/chosen": -14.397039413452148, | |
| "logits/rejected": -14.449551582336426, | |
| "logps/chosen": -1.3757708072662354, | |
| "logps/rejected": -1.4135478734970093, | |
| "loss": 1.4523, | |
| "odds_ratio_loss": 0.7652989029884338, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.137577086687088, | |
| "rewards/margins": 0.003777713282033801, | |
| "rewards/rejected": -0.1413547843694687, | |
| "sft_loss": 1.3757708072662354, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.3209144792548688, | |
| "grad_norm": 1.2307573556900024, | |
| "learning_rate": 2.963465710866094e-06, | |
| "logits/chosen": -14.386013984680176, | |
| "logits/rejected": -14.34870719909668, | |
| "logps/chosen": -1.4350049495697021, | |
| "logps/rejected": -1.5495213270187378, | |
| "loss": 1.5065, | |
| "odds_ratio_loss": 0.7147475481033325, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.14350050687789917, | |
| "rewards/margins": 0.01145164854824543, | |
| "rewards/rejected": -0.15495215356349945, | |
| "sft_loss": 1.4350049495697021, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.337849280270957, | |
| "grad_norm": 2.506805181503296, | |
| "learning_rate": 2.919788920030357e-06, | |
| "logits/chosen": -14.521794319152832, | |
| "logits/rejected": -14.562520027160645, | |
| "logps/chosen": -1.5004112720489502, | |
| "logps/rejected": -1.5385072231292725, | |
| "loss": 1.5749, | |
| "odds_ratio_loss": 0.7447755336761475, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.1500411331653595, | |
| "rewards/margins": 0.003809594316408038, | |
| "rewards/rejected": -0.1538507342338562, | |
| "sft_loss": 1.5004112720489502, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.3547840812870449, | |
| "grad_norm": 2.221041440963745, | |
| "learning_rate": 2.8759798712236303e-06, | |
| "logits/chosen": -14.375375747680664, | |
| "logits/rejected": -14.200535774230957, | |
| "logps/chosen": -1.3673021793365479, | |
| "logps/rejected": -1.4980638027191162, | |
| "loss": 1.4391, | |
| "odds_ratio_loss": 0.7180894017219543, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.13673020899295807, | |
| "rewards/margins": 0.013076169416308403, | |
| "rewards/rejected": -0.14980638027191162, | |
| "sft_loss": 1.3673021793365479, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.371718882303133, | |
| "grad_norm": 1.1964547634124756, | |
| "learning_rate": 2.8320523668490507e-06, | |
| "logits/chosen": -14.326695442199707, | |
| "logits/rejected": -14.330057144165039, | |
| "logps/chosen": -1.4386107921600342, | |
| "logps/rejected": -1.4542288780212402, | |
| "loss": 1.516, | |
| "odds_ratio_loss": 0.7743045091629028, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.1438610851764679, | |
| "rewards/margins": 0.00156181410420686, | |
| "rewards/rejected": -0.14542289078235626, | |
| "sft_loss": 1.4386107921600342, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.388653683319221, | |
| "grad_norm": 1.63833749294281, | |
| "learning_rate": 2.7880202466301597e-06, | |
| "logits/chosen": -14.170251846313477, | |
| "logits/rejected": -14.376757621765137, | |
| "logps/chosen": -1.4189726114273071, | |
| "logps/rejected": -1.4344730377197266, | |
| "loss": 1.4949, | |
| "odds_ratio_loss": 0.7592172026634216, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -0.1418972760438919, | |
| "rewards/margins": 0.00155004789121449, | |
| "rewards/rejected": -0.14344730973243713, | |
| "sft_loss": 1.4189726114273071, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.405588484335309, | |
| "grad_norm": 1.4605140686035156, | |
| "learning_rate": 2.7438973832505854e-06, | |
| "logits/chosen": -14.213847160339355, | |
| "logits/rejected": -14.075439453125, | |
| "logps/chosen": -1.394853115081787, | |
| "logps/rejected": -1.4763586521148682, | |
| "loss": 1.4703, | |
| "odds_ratio_loss": 0.7543301582336426, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.13948531448841095, | |
| "rewards/margins": 0.008150560781359673, | |
| "rewards/rejected": -0.14763586223125458, | |
| "sft_loss": 1.394853115081787, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.4225232853513972, | |
| "grad_norm": 6.998382091522217, | |
| "learning_rate": 2.699697677983341e-06, | |
| "logits/chosen": -14.502642631530762, | |
| "logits/rejected": -14.471555709838867, | |
| "logps/chosen": -1.3794063329696655, | |
| "logps/rejected": -1.3286025524139404, | |
| "loss": 1.4577, | |
| "odds_ratio_loss": 0.7826226353645325, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.1379406601190567, | |
| "rewards/margins": -0.005080387927591801, | |
| "rewards/rejected": -0.13286025822162628, | |
| "sft_loss": 1.3794063329696655, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.4394580863674853, | |
| "grad_norm": 6.508487224578857, | |
| "learning_rate": 2.6554350563111115e-06, | |
| "logits/chosen": -14.415182113647461, | |
| "logits/rejected": -14.4021577835083, | |
| "logps/chosen": -1.4343197345733643, | |
| "logps/rejected": -1.389868140220642, | |
| "loss": 1.5147, | |
| "odds_ratio_loss": 0.803573489189148, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.14343199133872986, | |
| "rewards/margins": -0.00444516446441412, | |
| "rewards/rejected": -0.13898679614067078, | |
| "sft_loss": 1.4343197345733643, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.4563928873835732, | |
| "grad_norm": 3.286094903945923, | |
| "learning_rate": 2.611123463538913e-06, | |
| "logits/chosen": -14.409162521362305, | |
| "logits/rejected": -14.423065185546875, | |
| "logps/chosen": -1.3563302755355835, | |
| "logps/rejected": -1.470460295677185, | |
| "loss": 1.4284, | |
| "odds_ratio_loss": 0.7211607694625854, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.13563302159309387, | |
| "rewards/margins": 0.011413001455366611, | |
| "rewards/rejected": -0.1470460146665573, | |
| "sft_loss": 1.3563302755355835, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.4733276883996613, | |
| "grad_norm": 1.353800654411316, | |
| "learning_rate": 2.566776860400514e-06, | |
| "logits/chosen": -14.359599113464355, | |
| "logits/rejected": -14.388442993164062, | |
| "logps/chosen": -1.4657598733901978, | |
| "logps/rejected": -1.5304598808288574, | |
| "loss": 1.5387, | |
| "odds_ratio_loss": 0.7289360761642456, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.14657600224018097, | |
| "rewards/margins": 0.006469997111707926, | |
| "rewards/rejected": -0.15304598212242126, | |
| "sft_loss": 1.4657598733901978, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.4902624894157492, | |
| "grad_norm": 0.8999080657958984, | |
| "learning_rate": 2.522409218659989e-06, | |
| "logits/chosen": -14.522372245788574, | |
| "logits/rejected": -14.516871452331543, | |
| "logps/chosen": -1.5183885097503662, | |
| "logps/rejected": -1.5601129531860352, | |
| "loss": 1.5903, | |
| "odds_ratio_loss": 0.7187842130661011, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.15183886885643005, | |
| "rewards/margins": 0.004172446206212044, | |
| "rewards/rejected": -0.15601131319999695, | |
| "sft_loss": 1.5183885097503662, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.5071972904318374, | |
| "grad_norm": 1.7247016429901123, | |
| "learning_rate": 2.4780345167097976e-06, | |
| "logits/chosen": -14.4078369140625, | |
| "logits/rejected": -14.206354141235352, | |
| "logps/chosen": -1.422533392906189, | |
| "logps/rejected": -1.617108941078186, | |
| "loss": 1.4925, | |
| "odds_ratio_loss": 0.6991701126098633, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.1422533541917801, | |
| "rewards/margins": 0.019457560032606125, | |
| "rewards/rejected": -0.16171090304851532, | |
| "sft_loss": 1.422533392906189, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.5241320914479255, | |
| "grad_norm": 1.1559327840805054, | |
| "learning_rate": 2.4336667351667747e-06, | |
| "logits/chosen": -14.479301452636719, | |
| "logits/rejected": -14.487524032592773, | |
| "logps/chosen": -1.5677707195281982, | |
| "logps/rejected": -1.654937982559204, | |
| "loss": 1.6407, | |
| "odds_ratio_loss": 0.7297645807266235, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.1567770540714264, | |
| "rewards/margins": 0.00871671736240387, | |
| "rewards/rejected": -0.16549380123615265, | |
| "sft_loss": 1.5677707195281982, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.5410668924640136, | |
| "grad_norm": 2.899705171585083, | |
| "learning_rate": 2.3893198524674264e-06, | |
| "logits/chosen": -14.416735649108887, | |
| "logits/rejected": -14.323824882507324, | |
| "logps/chosen": -1.3870880603790283, | |
| "logps/rejected": -1.490903615951538, | |
| "loss": 1.4593, | |
| "odds_ratio_loss": 0.7216765284538269, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.13870880007743835, | |
| "rewards/margins": 0.010381558910012245, | |
| "rewards/rejected": -0.14909036457538605, | |
| "sft_loss": 1.3870880603790283, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.5580016934801018, | |
| "grad_norm": 1.2076252698898315, | |
| "learning_rate": 2.345007840463904e-06, | |
| "logits/chosen": -14.292505264282227, | |
| "logits/rejected": -14.244054794311523, | |
| "logps/chosen": -1.4259792566299438, | |
| "logps/rejected": -1.4341694116592407, | |
| "loss": 1.5022, | |
| "odds_ratio_loss": 0.7626054883003235, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.14259792864322662, | |
| "rewards/margins": 0.0008190165390260518, | |
| "rewards/rejected": -0.14341694116592407, | |
| "sft_loss": 1.4259792566299438, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.5749364944961897, | |
| "grad_norm": 2.6530520915985107, | |
| "learning_rate": 2.3007446600220572e-06, | |
| "logits/chosen": -14.440101623535156, | |
| "logits/rejected": -14.175987243652344, | |
| "logps/chosen": -1.361826777458191, | |
| "logps/rejected": -1.4479329586029053, | |
| "loss": 1.4351, | |
| "odds_ratio_loss": 0.7332156300544739, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.13618269562721252, | |
| "rewards/margins": 0.008610614575445652, | |
| "rewards/rejected": -0.144793301820755, | |
| "sft_loss": 1.361826777458191, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.5918712955122776, | |
| "grad_norm": 3.269102096557617, | |
| "learning_rate": 2.2565442566229507e-06, | |
| "logits/chosen": -14.330474853515625, | |
| "logits/rejected": -14.3932466506958, | |
| "logps/chosen": -1.4583683013916016, | |
| "logps/rejected": -1.4522769451141357, | |
| "loss": 1.5392, | |
| "odds_ratio_loss": 0.8081096410751343, | |
| "rewards/accuracies": 0.3812499940395355, | |
| "rewards/chosen": -0.14583681523799896, | |
| "rewards/margins": -0.0006091395625844598, | |
| "rewards/rejected": -0.14522768557071686, | |
| "sft_loss": 1.4583683013916016, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.6088060965283657, | |
| "grad_norm": 1.2394914627075195, | |
| "learning_rate": 2.2124205559692195e-06, | |
| "logits/chosen": -14.25177001953125, | |
| "logits/rejected": -14.32116985321045, | |
| "logps/chosen": -1.4207613468170166, | |
| "logps/rejected": -1.5083825588226318, | |
| "loss": 1.4919, | |
| "odds_ratio_loss": 0.7114149928092957, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.14207611978054047, | |
| "rewards/margins": 0.008762138895690441, | |
| "rewards/rejected": -0.15083825588226318, | |
| "sft_loss": 1.4207613468170166, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.6257408975444538, | |
| "grad_norm": 1.6583099365234375, | |
| "learning_rate": 2.168387459597666e-06, | |
| "logits/chosen": -14.210861206054688, | |
| "logits/rejected": -14.444610595703125, | |
| "logps/chosen": -1.5090281963348389, | |
| "logps/rejected": -1.5863807201385498, | |
| "loss": 1.5813, | |
| "odds_ratio_loss": 0.7230504155158997, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.15090280771255493, | |
| "rewards/margins": 0.007735258433967829, | |
| "rewards/rejected": -0.15863807499408722, | |
| "sft_loss": 1.5090281963348389, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.642675698560542, | |
| "grad_norm": 1.3439754247665405, | |
| "learning_rate": 2.1244588404994648e-06, | |
| "logits/chosen": -14.237951278686523, | |
| "logits/rejected": -14.269018173217773, | |
| "logps/chosen": -1.376792073249817, | |
| "logps/rejected": -1.4212851524353027, | |
| "loss": 1.453, | |
| "odds_ratio_loss": 0.7622562646865845, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.13767921924591064, | |
| "rewards/margins": 0.0044493041932582855, | |
| "rewards/rejected": -0.14212851226329803, | |
| "sft_loss": 1.376792073249817, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.65961049957663, | |
| "grad_norm": 2.962531328201294, | |
| "learning_rate": 2.08064853874936e-06, | |
| "logits/chosen": -14.473817825317383, | |
| "logits/rejected": -14.631460189819336, | |
| "logps/chosen": -1.4066752195358276, | |
| "logps/rejected": -1.455766201019287, | |
| "loss": 1.4788, | |
| "odds_ratio_loss": 0.7210047245025635, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.14066752791404724, | |
| "rewards/margins": 0.004909100476652384, | |
| "rewards/rejected": -0.1455766260623932, | |
| "sft_loss": 1.4066752195358276, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.676545300592718, | |
| "grad_norm": 3.2846462726593018, | |
| "learning_rate": 2.0369703571452387e-06, | |
| "logits/chosen": -14.20033073425293, | |
| "logits/rejected": -14.109931945800781, | |
| "logps/chosen": -1.309378743171692, | |
| "logps/rejected": -1.4727327823638916, | |
| "loss": 1.3763, | |
| "odds_ratio_loss": 0.6690842509269714, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.13093788921833038, | |
| "rewards/margins": 0.01633540540933609, | |
| "rewards/rejected": -0.14727327227592468, | |
| "sft_loss": 1.309378743171692, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.6934801016088061, | |
| "grad_norm": 1.1083016395568848, | |
| "learning_rate": 1.993438056859441e-06, | |
| "logits/chosen": -14.497441291809082, | |
| "logits/rejected": -14.366804122924805, | |
| "logps/chosen": -1.353459119796753, | |
| "logps/rejected": -1.469897985458374, | |
| "loss": 1.4213, | |
| "odds_ratio_loss": 0.6788592338562012, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.13534590601921082, | |
| "rewards/margins": 0.011643897742033005, | |
| "rewards/rejected": -0.14698980748653412, | |
| "sft_loss": 1.353459119796753, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6934801016088061, | |
| "eval_logits/chosen": -14.432435989379883, | |
| "eval_logits/rejected": -14.399744987487793, | |
| "eval_logps/chosen": -1.4366357326507568, | |
| "eval_logps/rejected": -1.5239636898040771, | |
| "eval_loss": 1.509663701057434, | |
| "eval_odds_ratio_loss": 0.7302786707878113, | |
| "eval_rewards/accuracies": 0.5038095116615295, | |
| "eval_rewards/chosen": -0.14366357028484344, | |
| "eval_rewards/margins": 0.00873279757797718, | |
| "eval_rewards/rejected": -0.15239638090133667, | |
| "eval_runtime": 445.7589, | |
| "eval_samples_per_second": 2.356, | |
| "eval_sft_loss": 1.4366357326507568, | |
| "eval_steps_per_second": 1.178, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.710414902624894, | |
| "grad_norm": 1.8078409433364868, | |
| "learning_rate": 1.9500653531031917e-06, | |
| "logits/chosen": -14.443731307983398, | |
| "logits/rejected": -14.476076126098633, | |
| "logps/chosen": -1.361530065536499, | |
| "logps/rejected": -1.5223243236541748, | |
| "loss": 1.4302, | |
| "odds_ratio_loss": 0.6869481205940247, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.136152982711792, | |
| "rewards/margins": 0.016079427674412727, | |
| "rewards/rejected": -0.15223243832588196, | |
| "sft_loss": 1.361530065536499, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.7273497036409822, | |
| "grad_norm": 1.288388729095459, | |
| "learning_rate": 1.9068659108055117e-06, | |
| "logits/chosen": -14.475682258605957, | |
| "logits/rejected": -14.473660469055176, | |
| "logps/chosen": -1.4284050464630127, | |
| "logps/rejected": -1.4647681713104248, | |
| "loss": 1.5008, | |
| "odds_ratio_loss": 0.7240586280822754, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.14284051954746246, | |
| "rewards/margins": 0.003636319888755679, | |
| "rewards/rejected": -0.1464768350124359, | |
| "sft_loss": 1.4284050464630127, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.7442845046570703, | |
| "grad_norm": 1.2943964004516602, | |
| "learning_rate": 1.863853340307962e-06, | |
| "logits/chosen": -14.312501907348633, | |
| "logits/rejected": -14.362284660339355, | |
| "logps/chosen": -1.2968519926071167, | |
| "logps/rejected": -1.579993486404419, | |
| "loss": 1.3634, | |
| "odds_ratio_loss": 0.6657830476760864, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.1296851933002472, | |
| "rewards/margins": 0.0283141378313303, | |
| "rewards/rejected": -0.15799932181835175, | |
| "sft_loss": 1.2968519926071167, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.7612193056731584, | |
| "grad_norm": 1.1572942733764648, | |
| "learning_rate": 1.8210411930766019e-06, | |
| "logits/chosen": -14.294156074523926, | |
| "logits/rejected": -14.323614120483398, | |
| "logps/chosen": -1.479034662246704, | |
| "logps/rejected": -1.6268787384033203, | |
| "loss": 1.547, | |
| "odds_ratio_loss": 0.6801426410675049, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1479034721851349, | |
| "rewards/margins": 0.014784415252506733, | |
| "rewards/rejected": -0.16268786787986755, | |
| "sft_loss": 1.479034662246704, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.7781541066892466, | |
| "grad_norm": 1.574400782585144, | |
| "learning_rate": 1.7784429574324803e-06, | |
| "logits/chosen": -14.368769645690918, | |
| "logits/rejected": -14.502416610717773, | |
| "logps/chosen": -1.3905737400054932, | |
| "logps/rejected": -1.5777366161346436, | |
| "loss": 1.4567, | |
| "odds_ratio_loss": 0.6612822413444519, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.13905738294124603, | |
| "rewards/margins": 0.018716301769018173, | |
| "rewards/rejected": -0.1577736884355545, | |
| "sft_loss": 1.3905737400054932, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.7950889077053345, | |
| "grad_norm": 1.195115089416504, | |
| "learning_rate": 1.7360720543020327e-06, | |
| "logits/chosen": -14.439001083374023, | |
| "logits/rejected": -14.227216720581055, | |
| "logps/chosen": -1.3061621189117432, | |
| "logps/rejected": -1.3979461193084717, | |
| "loss": 1.3747, | |
| "odds_ratio_loss": 0.6853240728378296, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.1306162178516388, | |
| "rewards/margins": 0.009178402833640575, | |
| "rewards/rejected": -0.13979461789131165, | |
| "sft_loss": 1.3061621189117432, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.8120237087214224, | |
| "grad_norm": 3.909592390060425, | |
| "learning_rate": 1.6939418329887042e-06, | |
| "logits/chosen": -14.45744514465332, | |
| "logits/rejected": -14.5038423538208, | |
| "logps/chosen": -1.4311087131500244, | |
| "logps/rejected": -1.4849843978881836, | |
| "loss": 1.5049, | |
| "odds_ratio_loss": 0.7381945848464966, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.14311087131500244, | |
| "rewards/margins": 0.005387583281844854, | |
| "rewards/rejected": -0.14849844574928284, | |
| "sft_loss": 1.4311087131500244, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.8289585097375105, | |
| "grad_norm": 1.7437409162521362, | |
| "learning_rate": 1.6520655669671467e-06, | |
| "logits/chosen": -14.631024360656738, | |
| "logits/rejected": -14.515978813171387, | |
| "logps/chosen": -1.4438676834106445, | |
| "logps/rejected": -1.4797852039337158, | |
| "loss": 1.519, | |
| "odds_ratio_loss": 0.7515386343002319, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.14438676834106445, | |
| "rewards/margins": 0.0035917561035603285, | |
| "rewards/rejected": -0.1479785144329071, | |
| "sft_loss": 1.4438676834106445, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.8458933107535986, | |
| "grad_norm": 3.1396241188049316, | |
| "learning_rate": 1.610456449701294e-06, | |
| "logits/chosen": -14.319239616394043, | |
| "logits/rejected": -14.346944808959961, | |
| "logps/chosen": -1.4771324396133423, | |
| "logps/rejected": -1.537941336631775, | |
| "loss": 1.5548, | |
| "odds_ratio_loss": 0.776719331741333, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.14771324396133423, | |
| "rewards/margins": 0.0060809021815657616, | |
| "rewards/rejected": -0.15379413962364197, | |
| "sft_loss": 1.4771324396133423, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.8628281117696868, | |
| "grad_norm": 1.4689712524414062, | |
| "learning_rate": 1.5691275904876545e-06, | |
| "logits/chosen": -14.461804389953613, | |
| "logits/rejected": -14.278103828430176, | |
| "logps/chosen": -1.407566785812378, | |
| "logps/rejected": -1.5848530530929565, | |
| "loss": 1.474, | |
| "odds_ratio_loss": 0.6638895869255066, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.14075669646263123, | |
| "rewards/margins": 0.017728609964251518, | |
| "rewards/rejected": -0.1584853082895279, | |
| "sft_loss": 1.407566785812378, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.879762912785775, | |
| "grad_norm": 0.9537128210067749, | |
| "learning_rate": 1.5280920103251235e-06, | |
| "logits/chosen": -14.299761772155762, | |
| "logits/rejected": -14.347249984741211, | |
| "logps/chosen": -1.3132389783859253, | |
| "logps/rejected": -1.451719880104065, | |
| "loss": 1.3829, | |
| "odds_ratio_loss": 0.6965407133102417, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.131323903799057, | |
| "rewards/margins": 0.013848078437149525, | |
| "rewards/rejected": -0.14517197012901306, | |
| "sft_loss": 1.3132389783859253, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.8966977138018628, | |
| "grad_norm": 1.3030270338058472, | |
| "learning_rate": 1.4873626378126015e-06, | |
| "logits/chosen": -14.38860034942627, | |
| "logits/rejected": -14.277740478515625, | |
| "logps/chosen": -1.3292900323867798, | |
| "logps/rejected": -1.5040452480316162, | |
| "loss": 1.3984, | |
| "odds_ratio_loss": 0.6911579966545105, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.1329289972782135, | |
| "rewards/margins": 0.017475521191954613, | |
| "rewards/rejected": -0.15040451288223267, | |
| "sft_loss": 1.3292900323867798, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.913632514817951, | |
| "grad_norm": 2.765397071838379, | |
| "learning_rate": 1.446952305075738e-06, | |
| "logits/chosen": -14.399679183959961, | |
| "logits/rejected": -14.427862167358398, | |
| "logps/chosen": -1.3543717861175537, | |
| "logps/rejected": -1.3891161680221558, | |
| "loss": 1.4306, | |
| "odds_ratio_loss": 0.7619328498840332, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.13543717563152313, | |
| "rewards/margins": 0.003474441124126315, | |
| "rewards/rejected": -0.13891161978244781, | |
| "sft_loss": 1.3543717861175537, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.9305673158340388, | |
| "grad_norm": 1.730094075202942, | |
| "learning_rate": 1.406873743724065e-06, | |
| "logits/chosen": -14.437395095825195, | |
| "logits/rejected": -14.322535514831543, | |
| "logps/chosen": -1.4621553421020508, | |
| "logps/rejected": -1.6176691055297852, | |
| "loss": 1.5314, | |
| "odds_ratio_loss": 0.692920982837677, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1462155282497406, | |
| "rewards/margins": 0.015551361255347729, | |
| "rewards/rejected": -0.1617669016122818, | |
| "sft_loss": 1.4621553421020508, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.947502116850127, | |
| "grad_norm": 1.5328463315963745, | |
| "learning_rate": 1.3671395808397898e-06, | |
| "logits/chosen": -14.267127990722656, | |
| "logits/rejected": -14.463046073913574, | |
| "logps/chosen": -1.335663080215454, | |
| "logps/rejected": -1.3676198720932007, | |
| "loss": 1.4094, | |
| "odds_ratio_loss": 0.7378238439559937, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.13356631994247437, | |
| "rewards/margins": 0.0031956590246409178, | |
| "rewards/rejected": -0.13676197826862335, | |
| "sft_loss": 1.335663080215454, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.964436917866215, | |
| "grad_norm": 3.9082131385803223, | |
| "learning_rate": 1.3277623349995418e-06, | |
| "logits/chosen": -14.250445365905762, | |
| "logits/rejected": -14.258328437805176, | |
| "logps/chosen": -1.386776089668274, | |
| "logps/rejected": -1.3914397954940796, | |
| "loss": 1.4653, | |
| "odds_ratio_loss": 0.7851333618164062, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.13867759704589844, | |
| "rewards/margins": 0.00046637197374366224, | |
| "rewards/rejected": -0.13914397358894348, | |
| "sft_loss": 1.386776089668274, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.9813717188823032, | |
| "grad_norm": 3.576561450958252, | |
| "learning_rate": 1.2887544123302781e-06, | |
| "logits/chosen": -14.434526443481445, | |
| "logits/rejected": -14.393232345581055, | |
| "logps/chosen": -1.4019829034805298, | |
| "logps/rejected": -1.4435473680496216, | |
| "loss": 1.4772, | |
| "odds_ratio_loss": 0.752662181854248, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": -0.14019827544689178, | |
| "rewards/margins": 0.004156465642154217, | |
| "rewards/rejected": -0.14435474574565887, | |
| "sft_loss": 1.4019829034805298, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.9983065198983911, | |
| "grad_norm": 1.4880342483520508, | |
| "learning_rate": 1.2501281026006393e-06, | |
| "logits/chosen": -14.47376823425293, | |
| "logits/rejected": -14.513628005981445, | |
| "logps/chosen": -1.420966386795044, | |
| "logps/rejected": -1.4258407354354858, | |
| "loss": 1.5002, | |
| "odds_ratio_loss": 0.7924087643623352, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.1420966386795044, | |
| "rewards/margins": 0.000487445795442909, | |
| "rewards/rejected": -0.14258407056331635, | |
| "sft_loss": 1.420966386795044, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.015241320914479, | |
| "grad_norm": 1.0734080076217651, | |
| "learning_rate": 1.2118955753489523e-06, | |
| "logits/chosen": -14.561826705932617, | |
| "logits/rejected": -14.332305908203125, | |
| "logps/chosen": -1.3783150911331177, | |
| "logps/rejected": -1.4396107196807861, | |
| "loss": 1.4511, | |
| "odds_ratio_loss": 0.7278788685798645, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.13783150911331177, | |
| "rewards/margins": 0.006129562854766846, | |
| "rewards/rejected": -0.1439610719680786, | |
| "sft_loss": 1.3783150911331177, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.032176121930567, | |
| "grad_norm": 1.3539475202560425, | |
| "learning_rate": 1.1740688760491189e-06, | |
| "logits/chosen": -14.37562370300293, | |
| "logits/rejected": -14.43455696105957, | |
| "logps/chosen": -1.3733515739440918, | |
| "logps/rejected": -1.4605834484100342, | |
| "loss": 1.4435, | |
| "odds_ratio_loss": 0.7019113302230835, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.1373351514339447, | |
| "rewards/margins": 0.00872319657355547, | |
| "rewards/rejected": -0.1460583508014679, | |
| "sft_loss": 1.3733515739440918, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.0491109229466553, | |
| "grad_norm": 1.5765854120254517, | |
| "learning_rate": 1.1366599223155847e-06, | |
| "logits/chosen": -14.275134086608887, | |
| "logits/rejected": -14.2963228225708, | |
| "logps/chosen": -1.371392011642456, | |
| "logps/rejected": -1.4632259607315063, | |
| "loss": 1.4461, | |
| "odds_ratio_loss": 0.7467167377471924, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.1371392160654068, | |
| "rewards/margins": 0.009183400310575962, | |
| "rewards/rejected": -0.1463226079940796, | |
| "sft_loss": 1.371392011642456, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.0660457239627434, | |
| "grad_norm": 1.6226162910461426, | |
| "learning_rate": 1.0996805001486067e-06, | |
| "logits/chosen": -14.387079238891602, | |
| "logits/rejected": -14.525866508483887, | |
| "logps/chosen": -1.3380024433135986, | |
| "logps/rejected": -1.4540449380874634, | |
| "loss": 1.4055, | |
| "odds_ratio_loss": 0.6752744913101196, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.13380023837089539, | |
| "rewards/margins": 0.011604254133999348, | |
| "rewards/rejected": -0.14540448784828186, | |
| "sft_loss": 1.3380024433135986, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.0829805249788316, | |
| "grad_norm": 2.682673454284668, | |
| "learning_rate": 1.0631422602209608e-06, | |
| "logits/chosen": -14.46452808380127, | |
| "logits/rejected": -14.45245361328125, | |
| "logps/chosen": -1.52396559715271, | |
| "logps/rejected": -1.5300567150115967, | |
| "loss": 1.6016, | |
| "odds_ratio_loss": 0.7762898802757263, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": -0.15239658951759338, | |
| "rewards/margins": 0.0006091115646995604, | |
| "rewards/rejected": -0.1530056893825531, | |
| "sft_loss": 1.52396559715271, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.0999153259949197, | |
| "grad_norm": 0.9156871438026428, | |
| "learning_rate": 1.027056714207319e-06, | |
| "logits/chosen": -14.493863105773926, | |
| "logits/rejected": -14.539648056030273, | |
| "logps/chosen": -1.4326021671295166, | |
| "logps/rejected": -1.5681862831115723, | |
| "loss": 1.5034, | |
| "odds_ratio_loss": 0.7082626223564148, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.14326021075248718, | |
| "rewards/margins": 0.0135584007948637, | |
| "rewards/rejected": -0.15681862831115723, | |
| "sft_loss": 1.4326021671295166, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.116850127011008, | |
| "grad_norm": 3.18613600730896, | |
| "learning_rate": 9.914352311573838e-07, | |
| "logits/chosen": -14.396720886230469, | |
| "logits/rejected": -14.398330688476562, | |
| "logps/chosen": -1.3194880485534668, | |
| "logps/rejected": -1.4313329458236694, | |
| "loss": 1.3887, | |
| "odds_ratio_loss": 0.6922628283500671, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.1319488137960434, | |
| "rewards/margins": 0.011184502393007278, | |
| "rewards/rejected": -0.14313331246376038, | |
| "sft_loss": 1.3194880485534668, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.1337849280270955, | |
| "grad_norm": 1.0878351926803589, | |
| "learning_rate": 9.562890339139877e-07, | |
| "logits/chosen": -14.146682739257812, | |
| "logits/rejected": -14.353192329406738, | |
| "logps/chosen": -1.3349636793136597, | |
| "logps/rejected": -1.379267930984497, | |
| "loss": 1.4097, | |
| "odds_ratio_loss": 0.7469658255577087, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.13349637389183044, | |
| "rewards/margins": 0.004430420231074095, | |
| "rewards/rejected": -0.13792680203914642, | |
| "sft_loss": 1.3349636793136597, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.1507197290431836, | |
| "grad_norm": 1.177203893661499, | |
| "learning_rate": 9.216291955772374e-07, | |
| "logits/chosen": -14.328463554382324, | |
| "logits/rejected": -14.295025825500488, | |
| "logps/chosen": -1.3897377252578735, | |
| "logps/rejected": -1.4198486804962158, | |
| "loss": 1.4659, | |
| "odds_ratio_loss": 0.7619088292121887, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.13897378742694855, | |
| "rewards/margins": 0.0030110946390777826, | |
| "rewards/rejected": -0.14198487997055054, | |
| "sft_loss": 1.3897377252578735, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.167654530059272, | |
| "grad_norm": 2.2964181900024414, | |
| "learning_rate": 8.874666360158457e-07, | |
| "logits/chosen": -14.346217155456543, | |
| "logits/rejected": -14.197412490844727, | |
| "logps/chosen": -1.3614085912704468, | |
| "logps/rejected": -1.4674574136734009, | |
| "loss": 1.4341, | |
| "odds_ratio_loss": 0.7273774147033691, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.1361408680677414, | |
| "rewards/margins": 0.010604878887534142, | |
| "rewards/rejected": -0.1467457413673401, | |
| "sft_loss": 1.3614085912704468, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.18458933107536, | |
| "grad_norm": 3.246114492416382, | |
| "learning_rate": 8.538121184267315e-07, | |
| "logits/chosen": -14.440536499023438, | |
| "logits/rejected": -14.329854965209961, | |
| "logps/chosen": -1.2875430583953857, | |
| "logps/rejected": -1.3775211572647095, | |
| "loss": 1.3574, | |
| "odds_ratio_loss": 0.6986570954322815, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.12875431776046753, | |
| "rewards/margins": 0.008997795172035694, | |
| "rewards/rejected": -0.13775211572647095, | |
| "sft_loss": 1.2875430583953857, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.201524132091448, | |
| "grad_norm": 1.6076223850250244, | |
| "learning_rate": 8.206762459439907e-07, | |
| "logits/chosen": -14.393684387207031, | |
| "logits/rejected": -14.419075012207031, | |
| "logps/chosen": -1.4106100797653198, | |
| "logps/rejected": -1.4857350587844849, | |
| "loss": 1.4865, | |
| "odds_ratio_loss": 0.758701741695404, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.14106100797653198, | |
| "rewards/margins": 0.0075125014409422874, | |
| "rewards/rejected": -0.14857350289821625, | |
| "sft_loss": 1.4106100797653198, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.218458933107536, | |
| "grad_norm": 1.4635405540466309, | |
| "learning_rate": 7.880694582982898e-07, | |
| "logits/chosen": -14.465181350708008, | |
| "logits/rejected": -14.500001907348633, | |
| "logps/chosen": -1.4319560527801514, | |
| "logps/rejected": -1.5127556324005127, | |
| "loss": 1.506, | |
| "odds_ratio_loss": 0.7399921417236328, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.14319561421871185, | |
| "rewards/margins": 0.00807994045317173, | |
| "rewards/rejected": -0.15127556025981903, | |
| "sft_loss": 1.4319560527801514, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.235393734123624, | |
| "grad_norm": 3.1588046550750732, | |
| "learning_rate": 7.560020285277401e-07, | |
| "logits/chosen": -14.269197463989258, | |
| "logits/rejected": -14.49077320098877, | |
| "logps/chosen": -1.3981552124023438, | |
| "logps/rejected": -1.4313172101974487, | |
| "loss": 1.4741, | |
| "odds_ratio_loss": 0.7590950727462769, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": -0.1398155391216278, | |
| "rewards/margins": 0.0033162026666104794, | |
| "rewards/rejected": -0.14313173294067383, | |
| "sft_loss": 1.3981552124023438, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.252328535139712, | |
| "grad_norm": 3.586276054382324, | |
| "learning_rate": 7.244840597412956e-07, | |
| "logits/chosen": -14.22734546661377, | |
| "logits/rejected": -14.291172981262207, | |
| "logps/chosen": -1.514716625213623, | |
| "logps/rejected": -1.4183883666992188, | |
| "loss": 1.5981, | |
| "odds_ratio_loss": 0.8342422246932983, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.15147167444229126, | |
| "rewards/margins": -0.009632834233343601, | |
| "rewards/rejected": -0.14183883368968964, | |
| "sft_loss": 1.514716625213623, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.2692633361558, | |
| "grad_norm": 2.3110530376434326, | |
| "learning_rate": 6.935254819356796e-07, | |
| "logits/chosen": -14.419351577758789, | |
| "logits/rejected": -14.297566413879395, | |
| "logps/chosen": -1.4030816555023193, | |
| "logps/rejected": -1.4476964473724365, | |
| "loss": 1.4773, | |
| "odds_ratio_loss": 0.7421059012413025, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.1403081715106964, | |
| "rewards/margins": 0.0044614695943892, | |
| "rewards/rejected": -0.14476963877677917, | |
| "sft_loss": 1.4030816555023193, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.2861981371718882, | |
| "grad_norm": 1.1914503574371338, | |
| "learning_rate": 6.631360488668662e-07, | |
| "logits/chosen": -14.460253715515137, | |
| "logits/rejected": -14.41465950012207, | |
| "logps/chosen": -1.2984880208969116, | |
| "logps/rejected": -1.4945783615112305, | |
| "loss": 1.3662, | |
| "odds_ratio_loss": 0.6775275468826294, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.12984880805015564, | |
| "rewards/margins": 0.019609034061431885, | |
| "rewards/rejected": -0.14945784211158752, | |
| "sft_loss": 1.2984880208969116, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.3031329381879764, | |
| "grad_norm": 2.2295608520507812, | |
| "learning_rate": 6.333253349770672e-07, | |
| "logits/chosen": -14.249277114868164, | |
| "logits/rejected": -14.275445938110352, | |
| "logps/chosen": -1.4399076700210571, | |
| "logps/rejected": -1.4462318420410156, | |
| "loss": 1.5184, | |
| "odds_ratio_loss": 0.7848686575889587, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.14399076998233795, | |
| "rewards/margins": 0.0006324196001514792, | |
| "rewards/rejected": -0.14462319016456604, | |
| "sft_loss": 1.4399076700210571, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.3200677392040645, | |
| "grad_norm": 0.9200133681297302, | |
| "learning_rate": 6.041027323782364e-07, | |
| "logits/chosen": -14.550092697143555, | |
| "logits/rejected": -14.5205717086792, | |
| "logps/chosen": -1.3879852294921875, | |
| "logps/rejected": -1.5073843002319336, | |
| "loss": 1.4568, | |
| "odds_ratio_loss": 0.6877447366714478, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.1387985199689865, | |
| "rewards/margins": 0.011939908377826214, | |
| "rewards/rejected": -0.1507384330034256, | |
| "sft_loss": 1.3879852294921875, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.337002540220152, | |
| "grad_norm": 1.757595181465149, | |
| "learning_rate": 5.754774478929969e-07, | |
| "logits/chosen": -14.518872261047363, | |
| "logits/rejected": -14.515436172485352, | |
| "logps/chosen": -1.4030746221542358, | |
| "logps/rejected": -1.525309443473816, | |
| "loss": 1.4726, | |
| "odds_ratio_loss": 0.6956244707107544, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.1403074562549591, | |
| "rewards/margins": 0.012223480269312859, | |
| "rewards/rejected": -0.1525309532880783, | |
| "sft_loss": 1.4030746221542358, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.3539373412362403, | |
| "grad_norm": 1.9958380460739136, | |
| "learning_rate": 5.474585001539634e-07, | |
| "logits/chosen": -14.516281127929688, | |
| "logits/rejected": -14.449725151062012, | |
| "logps/chosen": -1.3020037412643433, | |
| "logps/rejected": -1.4323627948760986, | |
| "loss": 1.3692, | |
| "odds_ratio_loss": 0.671482503414154, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.13020040094852448, | |
| "rewards/margins": 0.013035891577601433, | |
| "rewards/rejected": -0.14323627948760986, | |
| "sft_loss": 1.3020037412643433, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.3708721422523285, | |
| "grad_norm": 0.9711344242095947, | |
| "learning_rate": 5.200547167623424e-07, | |
| "logits/chosen": -14.532658576965332, | |
| "logits/rejected": -14.446354866027832, | |
| "logps/chosen": -1.4261430501937866, | |
| "logps/rejected": -1.6040065288543701, | |
| "loss": 1.4923, | |
| "odds_ratio_loss": 0.6615304946899414, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.14261427521705627, | |
| "rewards/margins": 0.01778637059032917, | |
| "rewards/rejected": -0.1604006588459015, | |
| "sft_loss": 1.4261430501937866, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.3878069432684166, | |
| "grad_norm": 0.9917483925819397, | |
| "learning_rate": 4.932747315067271e-07, | |
| "logits/chosen": -14.57470417022705, | |
| "logits/rejected": -14.438740730285645, | |
| "logps/chosen": -1.4024930000305176, | |
| "logps/rejected": -1.469939947128296, | |
| "loss": 1.4755, | |
| "odds_ratio_loss": 0.7300769090652466, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.14024929702281952, | |
| "rewards/margins": 0.006744695361703634, | |
| "rewards/rejected": -0.1469939947128296, | |
| "sft_loss": 1.4024930000305176, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.4047417442845047, | |
| "grad_norm": 2.87284779548645, | |
| "learning_rate": 4.6712698164294553e-07, | |
| "logits/chosen": -14.489944458007812, | |
| "logits/rejected": -14.394497871398926, | |
| "logps/chosen": -1.4407953023910522, | |
| "logps/rejected": -1.4559253454208374, | |
| "loss": 1.5183, | |
| "odds_ratio_loss": 0.7750439047813416, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.1440795361995697, | |
| "rewards/margins": 0.0015129944076761603, | |
| "rewards/rejected": -0.14559254050254822, | |
| "sft_loss": 1.4407953023910522, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.421676545300593, | |
| "grad_norm": 3.170734167098999, | |
| "learning_rate": 4.41619705235842e-07, | |
| "logits/chosen": -14.575798034667969, | |
| "logits/rejected": -14.610578536987305, | |
| "logps/chosen": -1.375421404838562, | |
| "logps/rejected": -1.5859653949737549, | |
| "loss": 1.4439, | |
| "odds_ratio_loss": 0.6848722696304321, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.13754215836524963, | |
| "rewards/margins": 0.02105441316962242, | |
| "rewards/rejected": -0.15859656035900116, | |
| "sft_loss": 1.375421404838562, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.438611346316681, | |
| "grad_norm": 0.8895889520645142, | |
| "learning_rate": 4.167609385637961e-07, | |
| "logits/chosen": -14.474627494812012, | |
| "logits/rejected": -14.27497386932373, | |
| "logps/chosen": -1.3773252964019775, | |
| "logps/rejected": -1.4834753274917603, | |
| "loss": 1.4478, | |
| "odds_ratio_loss": 0.7047079205513, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.13773252069950104, | |
| "rewards/margins": 0.010615019127726555, | |
| "rewards/rejected": -0.14834752678871155, | |
| "sft_loss": 1.3773252964019775, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.4555461473327687, | |
| "grad_norm": 1.5126135349273682, | |
| "learning_rate": 3.9255851358683567e-07, | |
| "logits/chosen": -14.236564636230469, | |
| "logits/rejected": -14.380549430847168, | |
| "logps/chosen": -1.3431507349014282, | |
| "logps/rejected": -1.4221420288085938, | |
| "loss": 1.4184, | |
| "odds_ratio_loss": 0.7521894574165344, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.13431507349014282, | |
| "rewards/margins": 0.007899129763245583, | |
| "rewards/rejected": -0.14221420884132385, | |
| "sft_loss": 1.3431507349014282, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.472480948348857, | |
| "grad_norm": 2.2620511054992676, | |
| "learning_rate": 3.690200554791082e-07, | |
| "logits/chosen": -14.424779891967773, | |
| "logits/rejected": -14.354517936706543, | |
| "logps/chosen": -1.3713457584381104, | |
| "logps/rejected": -1.483659267425537, | |
| "loss": 1.44, | |
| "odds_ratio_loss": 0.6865109205245972, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.1371345818042755, | |
| "rewards/margins": 0.01123136654496193, | |
| "rewards/rejected": -0.14836594462394714, | |
| "sft_loss": 1.3713457584381104, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.489415749364945, | |
| "grad_norm": 2.7279679775238037, | |
| "learning_rate": 3.461529802265079e-07, | |
| "logits/chosen": -14.534950256347656, | |
| "logits/rejected": -14.408660888671875, | |
| "logps/chosen": -1.3657411336898804, | |
| "logps/rejected": -1.4428269863128662, | |
| "loss": 1.4382, | |
| "odds_ratio_loss": 0.724717915058136, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.13657411932945251, | |
| "rewards/margins": 0.007708588149398565, | |
| "rewards/rejected": -0.14428271353244781, | |
| "sft_loss": 1.3657411336898804, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.506350550381033, | |
| "grad_norm": 1.4955379962921143, | |
| "learning_rate": 3.2396449229020883e-07, | |
| "logits/chosen": -14.613665580749512, | |
| "logits/rejected": -14.357098579406738, | |
| "logps/chosen": -1.430061936378479, | |
| "logps/rejected": -1.4436513185501099, | |
| "loss": 1.5066, | |
| "odds_ratio_loss": 0.7651657462120056, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.14300617575645447, | |
| "rewards/margins": 0.001358934328891337, | |
| "rewards/rejected": -0.1443651169538498, | |
| "sft_loss": 1.430061936378479, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.523285351397121, | |
| "grad_norm": 2.4484000205993652, | |
| "learning_rate": 3.024615823368371e-07, | |
| "logits/chosen": -14.309808731079102, | |
| "logits/rejected": -14.362199783325195, | |
| "logps/chosen": -1.368744134902954, | |
| "logps/rejected": -1.4789055585861206, | |
| "loss": 1.4407, | |
| "odds_ratio_loss": 0.7196033596992493, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.13687442243099213, | |
| "rewards/margins": 0.011016142554581165, | |
| "rewards/rejected": -0.14789055287837982, | |
| "sft_loss": 1.368744134902954, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.5402201524132093, | |
| "grad_norm": 1.3006510734558105, | |
| "learning_rate": 2.8165102503600716e-07, | |
| "logits/chosen": -14.335368156433105, | |
| "logits/rejected": -14.394729614257812, | |
| "logps/chosen": -1.3518388271331787, | |
| "logps/rejected": -1.5090402364730835, | |
| "loss": 1.4234, | |
| "odds_ratio_loss": 0.7160680890083313, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.1351839005947113, | |
| "rewards/margins": 0.015720132738351822, | |
| "rewards/rejected": -0.15090402960777283, | |
| "sft_loss": 1.3518388271331787, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.5402201524132093, | |
| "eval_logits/chosen": -14.433335304260254, | |
| "eval_logits/rejected": -14.40054702758789, | |
| "eval_logps/chosen": -1.4238022565841675, | |
| "eval_logps/rejected": -1.5122665166854858, | |
| "eval_loss": 1.4967381954193115, | |
| "eval_odds_ratio_loss": 0.7293583154678345, | |
| "eval_rewards/accuracies": 0.5038095116615295, | |
| "eval_rewards/chosen": -0.1423802226781845, | |
| "eval_rewards/margins": 0.008846436627209187, | |
| "eval_rewards/rejected": -0.15122665464878082, | |
| "eval_runtime": 445.9302, | |
| "eval_samples_per_second": 2.355, | |
| "eval_sft_loss": 1.4238022565841675, | |
| "eval_steps_per_second": 1.177, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.557154953429297, | |
| "grad_norm": 1.7379106283187866, | |
| "learning_rate": 2.615393769259039e-07, | |
| "logits/chosen": -14.186014175415039, | |
| "logits/rejected": -14.296531677246094, | |
| "logps/chosen": -1.5615041255950928, | |
| "logps/rejected": -1.4595506191253662, | |
| "loss": 1.6479, | |
| "odds_ratio_loss": 0.8642258644104004, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": -0.1561504304409027, | |
| "rewards/margins": -0.01019534282386303, | |
| "rewards/rejected": -0.14595508575439453, | |
| "sft_loss": 1.5615041255950928, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.574089754445385, | |
| "grad_norm": 1.4174609184265137, | |
| "learning_rate": 2.421329743475917e-07, | |
| "logits/chosen": -14.357484817504883, | |
| "logits/rejected": -14.365758895874023, | |
| "logps/chosen": -1.3432402610778809, | |
| "logps/rejected": -1.4273216724395752, | |
| "loss": 1.4165, | |
| "odds_ratio_loss": 0.7326976656913757, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.13432399928569794, | |
| "rewards/margins": 0.008408156223595142, | |
| "rewards/rejected": -0.142732173204422, | |
| "sft_loss": 1.3432402610778809, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.5910245554614733, | |
| "grad_norm": 2.1974966526031494, | |
| "learning_rate": 2.234379314486973e-07, | |
| "logits/chosen": -14.357455253601074, | |
| "logits/rejected": -14.430908203125, | |
| "logps/chosen": -1.3924636840820312, | |
| "logps/rejected": -1.438753366470337, | |
| "loss": 1.4659, | |
| "odds_ratio_loss": 0.734772801399231, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.1392463743686676, | |
| "rewards/margins": 0.004628963768482208, | |
| "rewards/rejected": -0.1438753306865692, | |
| "sft_loss": 1.3924636840820312, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.6079593564775614, | |
| "grad_norm": 1.687161922454834, | |
| "learning_rate": 2.0546013825709783e-07, | |
| "logits/chosen": -14.250285148620605, | |
| "logits/rejected": -14.199666976928711, | |
| "logps/chosen": -1.3859349489212036, | |
| "logps/rejected": -1.6351137161254883, | |
| "loss": 1.4537, | |
| "odds_ratio_loss": 0.6773584485054016, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.13859349489212036, | |
| "rewards/margins": 0.024917880073189735, | |
| "rewards/rejected": -0.16351138055324554, | |
| "sft_loss": 1.3859349489212036, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.6248941574936495, | |
| "grad_norm": 1.5129095315933228, | |
| "learning_rate": 1.88205258825217e-07, | |
| "logits/chosen": -14.429784774780273, | |
| "logits/rejected": -14.17693042755127, | |
| "logps/chosen": -1.2826873064041138, | |
| "logps/rejected": -1.4500634670257568, | |
| "loss": 1.35, | |
| "odds_ratio_loss": 0.6734637022018433, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.12826873362064362, | |
| "rewards/margins": 0.016737615689635277, | |
| "rewards/rejected": -0.14500637352466583, | |
| "sft_loss": 1.2826873064041138, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.6418289585097376, | |
| "grad_norm": 2.0735878944396973, | |
| "learning_rate": 1.7167872944552245e-07, | |
| "logits/chosen": -14.309649467468262, | |
| "logits/rejected": -14.5745849609375, | |
| "logps/chosen": -1.3819622993469238, | |
| "logps/rejected": -1.4382798671722412, | |
| "loss": 1.4545, | |
| "odds_ratio_loss": 0.7257741689682007, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.1381962150335312, | |
| "rewards/margins": 0.005631768610328436, | |
| "rewards/rejected": -0.14382800459861755, | |
| "sft_loss": 1.3819622993469238, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.6587637595258258, | |
| "grad_norm": 1.331615924835205, | |
| "learning_rate": 1.5588575693777142e-07, | |
| "logits/chosen": -14.269506454467773, | |
| "logits/rejected": -14.277575492858887, | |
| "logps/chosen": -1.3485890626907349, | |
| "logps/rejected": -1.391801118850708, | |
| "loss": 1.42, | |
| "odds_ratio_loss": 0.714430034160614, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.1348589062690735, | |
| "rewards/margins": 0.004321185871958733, | |
| "rewards/rejected": -0.13918009400367737, | |
| "sft_loss": 1.3485890626907349, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.675698560541914, | |
| "grad_norm": 1.4459912776947021, | |
| "learning_rate": 1.4083131700856428e-07, | |
| "logits/chosen": -14.257006645202637, | |
| "logits/rejected": -14.398195266723633, | |
| "logps/chosen": -1.4757592678070068, | |
| "logps/rejected": -1.4755744934082031, | |
| "loss": 1.553, | |
| "odds_ratio_loss": 0.7721089124679565, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.14757592976093292, | |
| "rewards/margins": -1.848684587457683e-05, | |
| "rewards/rejected": -0.14755743741989136, | |
| "sft_loss": 1.4757592678070068, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.6926333615580016, | |
| "grad_norm": 1.7114406824111938, | |
| "learning_rate": 1.2652015268370315e-07, | |
| "logits/chosen": -14.462023735046387, | |
| "logits/rejected": -14.4578218460083, | |
| "logps/chosen": -1.3610906600952148, | |
| "logps/rejected": -1.4776142835617065, | |
| "loss": 1.4335, | |
| "odds_ratio_loss": 0.7242997884750366, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.13610906898975372, | |
| "rewards/margins": 0.011652367189526558, | |
| "rewards/rejected": -0.14776143431663513, | |
| "sft_loss": 1.3610906600952148, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.7095681625740897, | |
| "grad_norm": 1.469370722770691, | |
| "learning_rate": 1.1295677281386502e-07, | |
| "logits/chosen": -14.559967041015625, | |
| "logits/rejected": -14.478399276733398, | |
| "logps/chosen": -1.4620916843414307, | |
| "logps/rejected": -1.5956697463989258, | |
| "loss": 1.5327, | |
| "odds_ratio_loss": 0.706096351146698, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.14620915055274963, | |
| "rewards/margins": 0.013357831165194511, | |
| "rewards/rejected": -0.1595669686794281, | |
| "sft_loss": 1.4620916843414307, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.726502963590178, | |
| "grad_norm": 3.563047409057617, | |
| "learning_rate": 1.0014545065404973e-07, | |
| "logits/chosen": -14.436056137084961, | |
| "logits/rejected": -14.507670402526855, | |
| "logps/chosen": -1.4244582653045654, | |
| "logps/rejected": -1.5525462627410889, | |
| "loss": 1.4981, | |
| "odds_ratio_loss": 0.7365024089813232, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.14244583249092102, | |
| "rewards/margins": 0.012808804400265217, | |
| "rewards/rejected": -0.15525463223457336, | |
| "sft_loss": 1.4244582653045654, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.743437764606266, | |
| "grad_norm": 1.1012893915176392, | |
| "learning_rate": 8.809022251725502e-08, | |
| "logits/chosen": -14.58587646484375, | |
| "logits/rejected": -14.311334609985352, | |
| "logps/chosen": -1.3465197086334229, | |
| "logps/rejected": -1.5383667945861816, | |
| "loss": 1.4143, | |
| "odds_ratio_loss": 0.6780072450637817, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.13465197384357452, | |
| "rewards/margins": 0.01918472908437252, | |
| "rewards/rejected": -0.1538366973400116, | |
| "sft_loss": 1.3465197086334229, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.7603725656223537, | |
| "grad_norm": 1.1277046203613281, | |
| "learning_rate": 7.679488650280509e-08, | |
| "logits/chosen": -14.479377746582031, | |
| "logits/rejected": -14.5874605178833, | |
| "logps/chosen": -1.3598499298095703, | |
| "logps/rejected": -1.5038646459579468, | |
| "loss": 1.4265, | |
| "odds_ratio_loss": 0.6669132113456726, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.13598500192165375, | |
| "rewards/margins": 0.014401474967598915, | |
| "rewards/rejected": -0.1503864824771881, | |
| "sft_loss": 1.3598499298095703, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.777307366638442, | |
| "grad_norm": 4.1279425621032715, | |
| "learning_rate": 6.626300129972563e-08, | |
| "logits/chosen": -14.374710083007812, | |
| "logits/rejected": -14.649663925170898, | |
| "logps/chosen": -1.337192177772522, | |
| "logps/rejected": -1.399910569190979, | |
| "loss": 1.4103, | |
| "odds_ratio_loss": 0.7308821678161621, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -0.13371922075748444, | |
| "rewards/margins": 0.0062718503177165985, | |
| "rewards/rejected": -0.13999105989933014, | |
| "sft_loss": 1.337192177772522, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.79424216765453, | |
| "grad_norm": 1.395706057548523, | |
| "learning_rate": 5.649788506555065e-08, | |
| "logits/chosen": -14.170741081237793, | |
| "logits/rejected": -14.524632453918457, | |
| "logps/chosen": -1.359508752822876, | |
| "logps/rejected": -1.4829118251800537, | |
| "loss": 1.4274, | |
| "odds_ratio_loss": 0.6786811351776123, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.13595086336135864, | |
| "rewards/margins": 0.012340312823653221, | |
| "rewards/rejected": -0.1482912003993988, | |
| "sft_loss": 1.359508752822876, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.811176968670618, | |
| "grad_norm": 1.766761302947998, | |
| "learning_rate": 4.7502614380908474e-08, | |
| "logits/chosen": -14.416241645812988, | |
| "logits/rejected": -14.220751762390137, | |
| "logps/chosen": -1.3510209321975708, | |
| "logps/rejected": -1.4324430227279663, | |
| "loss": 1.4234, | |
| "odds_ratio_loss": 0.7241480946540833, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.13510209321975708, | |
| "rewards/margins": 0.008142213337123394, | |
| "rewards/rejected": -0.14324429631233215, | |
| "sft_loss": 1.3510209321975708, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.828111769686706, | |
| "grad_norm": 1.6919310092926025, | |
| "learning_rate": 3.9280023280222066e-08, | |
| "logits/chosen": -14.29878044128418, | |
| "logits/rejected": -14.355636596679688, | |
| "logps/chosen": -1.3545790910720825, | |
| "logps/rejected": -1.4631725549697876, | |
| "loss": 1.4267, | |
| "odds_ratio_loss": 0.7212874293327332, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.13545790314674377, | |
| "rewards/margins": 0.010859351605176926, | |
| "rewards/rejected": -0.146317258477211, | |
| "sft_loss": 1.3545790910720825, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.8450465707027943, | |
| "grad_norm": 1.2037099599838257, | |
| "learning_rate": 3.1832702358818855e-08, | |
| "logits/chosen": -14.370442390441895, | |
| "logits/rejected": -14.418550491333008, | |
| "logps/chosen": -1.509386658668518, | |
| "logps/rejected": -1.5371757745742798, | |
| "loss": 1.5849, | |
| "odds_ratio_loss": 0.7552896738052368, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.15093867480754852, | |
| "rewards/margins": 0.0027789073064923286, | |
| "rewards/rejected": -0.15371759235858917, | |
| "sft_loss": 1.509386658668518, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.8619813717188824, | |
| "grad_norm": 1.7988624572753906, | |
| "learning_rate": 2.5162997956746647e-08, | |
| "logits/chosen": -14.56567096710205, | |
| "logits/rejected": -14.401374816894531, | |
| "logps/chosen": -1.374145746231079, | |
| "logps/rejected": -1.5657732486724854, | |
| "loss": 1.4407, | |
| "odds_ratio_loss": 0.6658385992050171, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.1374145746231079, | |
| "rewards/margins": 0.019162729382514954, | |
| "rewards/rejected": -0.15657731890678406, | |
| "sft_loss": 1.374145746231079, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.8789161727349706, | |
| "grad_norm": 1.8519299030303955, | |
| "learning_rate": 1.9273011419536914e-08, | |
| "logits/chosen": -14.358851432800293, | |
| "logits/rejected": -14.361642837524414, | |
| "logps/chosen": -1.3464272022247314, | |
| "logps/rejected": -1.403352975845337, | |
| "loss": 1.4203, | |
| "odds_ratio_loss": 0.7389532327651978, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.13464272022247314, | |
| "rewards/margins": 0.005692584905773401, | |
| "rewards/rejected": -0.1403352916240692, | |
| "sft_loss": 1.3464272022247314, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.8958509737510583, | |
| "grad_norm": 1.5958627462387085, | |
| "learning_rate": 1.4164598436159083e-08, | |
| "logits/chosen": -14.45777416229248, | |
| "logits/rejected": -14.55150318145752, | |
| "logps/chosen": -1.3691927194595337, | |
| "logps/rejected": -1.3762314319610596, | |
| "loss": 1.4457, | |
| "odds_ratio_loss": 0.7649668455123901, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.13691926002502441, | |
| "rewards/margins": 0.0007038834737613797, | |
| "rewards/rejected": -0.1376231610774994, | |
| "sft_loss": 1.3691927194595337, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.9127857747671464, | |
| "grad_norm": 1.1447230577468872, | |
| "learning_rate": 9.839368454371556e-09, | |
| "logits/chosen": -14.424572944641113, | |
| "logits/rejected": -14.471136093139648, | |
| "logps/chosen": -1.3717620372772217, | |
| "logps/rejected": -1.5175390243530273, | |
| "loss": 1.4405, | |
| "odds_ratio_loss": 0.6872409582138062, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.13717620074748993, | |
| "rewards/margins": 0.014577709138393402, | |
| "rewards/rejected": -0.15175390243530273, | |
| "sft_loss": 1.3717620372772217, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.9297205757832345, | |
| "grad_norm": 1.2689136266708374, | |
| "learning_rate": 6.298684173650649e-09, | |
| "logits/chosen": -14.209467887878418, | |
| "logits/rejected": -14.251020431518555, | |
| "logps/chosen": -1.3433691263198853, | |
| "logps/rejected": -1.4693882465362549, | |
| "loss": 1.4164, | |
| "odds_ratio_loss": 0.7302767038345337, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.134336918592453, | |
| "rewards/margins": 0.012601924128830433, | |
| "rewards/rejected": -0.14693884551525116, | |
| "sft_loss": 1.3433691263198853, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.9466553767993227, | |
| "grad_norm": 1.0975892543792725, | |
| "learning_rate": 3.543661115860686e-09, | |
| "logits/chosen": -14.267629623413086, | |
| "logits/rejected": -14.19848918914795, | |
| "logps/chosen": -1.3776047229766846, | |
| "logps/rejected": -1.4311275482177734, | |
| "loss": 1.4519, | |
| "odds_ratio_loss": 0.7429286241531372, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.1377604901790619, | |
| "rewards/margins": 0.0053522614762187, | |
| "rewards/rejected": -0.14311274886131287, | |
| "sft_loss": 1.3776047229766846, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.963590177815411, | |
| "grad_norm": 1.3392242193222046, | |
| "learning_rate": 1.575167273800693e-09, | |
| "logits/chosen": -14.299784660339355, | |
| "logits/rejected": -14.385360717773438, | |
| "logps/chosen": -1.3382477760314941, | |
| "logps/rejected": -1.3448528051376343, | |
| "loss": 1.4132, | |
| "odds_ratio_loss": 0.7496879696846008, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.13382478058338165, | |
| "rewards/margins": 0.0006605213275179267, | |
| "rewards/rejected": -0.13448528945446014, | |
| "sft_loss": 1.3382477760314941, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.9805249788314985, | |
| "grad_norm": 1.3686504364013672, | |
| "learning_rate": 3.9382283773564676e-10, | |
| "logits/chosen": -14.456472396850586, | |
| "logits/rejected": -14.480894088745117, | |
| "logps/chosen": -1.4318442344665527, | |
| "logps/rejected": -1.5163114070892334, | |
| "loss": 1.5085, | |
| "odds_ratio_loss": 0.7666203379631042, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.14318443834781647, | |
| "rewards/margins": 0.0084467101842165, | |
| "rewards/rejected": -0.15163113176822662, | |
| "sft_loss": 1.4318442344665527, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.9974597798475866, | |
| "grad_norm": 8.027430534362793, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -14.504228591918945, | |
| "logits/rejected": -14.523704528808594, | |
| "logps/chosen": -1.456779956817627, | |
| "logps/rejected": -1.5364240407943726, | |
| "loss": 1.5332, | |
| "odds_ratio_loss": 0.7639864683151245, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.14567799866199493, | |
| "rewards/margins": 0.007964405231177807, | |
| "rewards/rejected": -0.1536424160003662, | |
| "sft_loss": 1.456779956817627, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.9974597798475866, | |
| "step": 1770, | |
| "total_flos": 1.8624482718096753e+18, | |
| "train_loss": 1.5362868001905539, | |
| "train_runtime": 27766.9561, | |
| "train_samples_per_second": 1.021, | |
| "train_steps_per_second": 0.064 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1770, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.8624482718096753e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |