{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9943502824858759, "eval_steps": 100, "global_step": 220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio": 0.0, "completion_length": 193.8046875, "epoch": 0.00903954802259887, "grad_norm": 0.299434095621109, "kl": 0.0, "learning_rate": 0.0, "loss": 0.0276, "num_tokens": 503964.0, "reward": 4.863432988524437, "reward_std": 1.8696988988667727, "rewards/accuracy_reward": 0.345703125, "rewards/exec_out_all_reward": 0.7421875, "rewards/exec_out_step_reward": 0.9397887196391821, "rewards/format_reward": 0.642578125, "rewards/keywords_iou_reward": 0.3189462535083294, "rewards/sql_step_keywords_recall_reward": 0.5181736210361123, "step": 1 }, { "clip_ratio": 0.0, "completion_length": 188.369140625, "epoch": 0.01807909604519774, "grad_norm": 0.3049573004245758, "kl": 8.754432201385498e-08, "learning_rate": 1.3636363636363637e-07, "loss": 0.0311, "num_tokens": 1008385.0, "reward": 4.9951048865914345, "reward_std": 1.8348420038819313, "rewards/accuracy_reward": 0.34765625, "rewards/exec_out_all_reward": 0.787109375, "rewards/exec_out_step_reward": 0.9488211516290903, "rewards/format_reward": 0.642578125, "rewards/keywords_iou_reward": 0.35271753335837275, "rewards/sql_step_keywords_recall_reward": 0.5205361591652036, "step": 2 }, { "clip_ratio": 0.0, "completion_length": 190.701171875, "epoch": 0.02711864406779661, "grad_norm": 0.3177661597728729, "kl": 0.00011102110147476196, "learning_rate": 2.7272727272727274e-07, "loss": 0.0338, "num_tokens": 1511588.0, "reward": 4.939835079014301, "reward_std": 1.7858339007943869, "rewards/accuracy_reward": 0.322265625, "rewards/exec_out_all_reward": 0.80078125, "rewards/exec_out_step_reward": 0.9515764508396387, "rewards/format_reward": 0.65625, "rewards/keywords_iou_reward": 0.3603124172659591, "rewards/sql_step_keywords_recall_reward": 0.5215400578454137, "step": 3 }, { "clip_ratio": 0.0, "completion_length": 194.2421875, "epoch": 0.03615819209039548, "grad_norm": 0.29520705342292786, "kl": 0.00011537596583366394, "learning_rate": 4.0909090909090906e-07, "loss": 0.0418, "num_tokens": 2016412.0, "reward": 4.9831836223602295, "reward_std": 1.7916885651648045, "rewards/accuracy_reward": 0.37109375, "rewards/exec_out_all_reward": 0.751953125, "rewards/exec_out_step_reward": 0.9367489777505398, "rewards/format_reward": 0.630859375, "rewards/keywords_iou_reward": 0.32331358385272324, "rewards/sql_step_keywords_recall_reward": 0.5326199810951948, "step": 4 }, { "clip_ratio": 0.0, "completion_length": 192.185546875, "epoch": 0.04519774011299435, "grad_norm": 0.2962106168270111, "kl": 0.00011671334505081177, "learning_rate": 5.454545454545455e-07, "loss": 0.0337, "num_tokens": 2520743.0, "reward": 4.651097267866135, "reward_std": 1.9646679311990738, "rewards/accuracy_reward": 0.318359375, "rewards/exec_out_all_reward": 0.763671875, "rewards/exec_out_step_reward": 0.9430377371609211, "rewards/format_reward": 0.595703125, "rewards/keywords_iou_reward": 0.29655176820233464, "rewards/sql_step_keywords_recall_reward": 0.48214349802583456, "step": 5 }, { "clip_ratio": 0.0, "completion_length": 190.55078125, "epoch": 0.05423728813559322, "grad_norm": 0.3201525807380676, "kl": 0.00012993812561035156, "learning_rate": 6.818181818181818e-07, "loss": 0.0321, "num_tokens": 3023701.0, "reward": 4.995345205068588, "reward_std": 2.013074729591608, "rewards/accuracy_reward": 0.36328125, "rewards/exec_out_all_reward": 0.7734375, "rewards/exec_out_step_reward": 0.9453125055879354, "rewards/format_reward": 0.623046875, "rewards/keywords_iou_reward": 0.33588895108550787, "rewards/sql_step_keywords_recall_reward": 0.5286454004235566, "step": 6 }, { "clip_ratio": 0.0, "completion_length": 187.921875, "epoch": 0.06327683615819209, "grad_norm": 0.2839597165584564, "kl": 0.00017252564430236816, "learning_rate": 8.181818181818181e-07, "loss": 0.0222, "num_tokens": 3524501.0, "reward": 5.12370303273201, "reward_std": 1.872809598222375, "rewards/accuracy_reward": 0.38671875, "rewards/exec_out_all_reward": 0.771484375, "rewards/exec_out_step_reward": 0.9374604746699333, "rewards/format_reward": 0.662109375, "rewards/keywords_iou_reward": 0.3198722831439227, "rewards/sql_step_keywords_recall_reward": 0.5660292678512633, "step": 7 }, { "clip_ratio": 0.0, "completion_length": 194.861328125, "epoch": 0.07231638418079096, "grad_norm": 0.264257550239563, "kl": 0.0003757178783416748, "learning_rate": 9.545454545454546e-07, "loss": 0.0214, "num_tokens": 4033750.0, "reward": 4.592174172401428, "reward_std": 1.6819164399057627, "rewards/accuracy_reward": 0.287109375, "rewards/exec_out_all_reward": 0.72265625, "rewards/exec_out_step_reward": 0.9274584576487541, "rewards/format_reward": 0.662109375, "rewards/keywords_iou_reward": 0.31115873460657895, "rewards/sql_step_keywords_recall_reward": 0.5091950967907906, "step": 8 }, { "clip_ratio": 0.0, "completion_length": 190.73046875, "epoch": 0.08135593220338982, "grad_norm": 0.2734237313270569, "kl": 0.0005688667297363281, "learning_rate": 1.090909090909091e-06, "loss": 0.0149, "num_tokens": 4536888.0, "reward": 5.064344555139542, "reward_std": 1.7968224007636309, "rewards/accuracy_reward": 0.35546875, "rewards/exec_out_all_reward": 0.7109375, "rewards/exec_out_step_reward": 0.9287527892738581, "rewards/format_reward": 0.7265625, "rewards/keywords_iou_reward": 0.354521602508612, "rewards/sql_step_keywords_recall_reward": 0.5671735098585486, "step": 9 }, { "clip_ratio": 0.0, "completion_length": 177.17578125, "epoch": 0.0903954802259887, "grad_norm": 0.22836743295192719, "kl": 0.002085447311401367, "learning_rate": 1.2272727272727274e-06, "loss": 0.002, "num_tokens": 5032606.0, "reward": 5.785055458545685, "reward_std": 1.7521540587767959, "rewards/accuracy_reward": 0.4609375, "rewards/exec_out_all_reward": 0.759765625, "rewards/exec_out_step_reward": 0.9359297584742308, "rewards/format_reward": 0.826171875, "rewards/keywords_iou_reward": 0.38180301152169704, "rewards/sql_step_keywords_recall_reward": 0.6558322114869952, "step": 10 }, { "clip_ratio": 0.0, "completion_length": 177.865234375, "epoch": 0.09943502824858758, "grad_norm": 0.2427971363067627, "kl": 0.0032596588134765625, "learning_rate": 1.3636363636363636e-06, "loss": 0.0119, "num_tokens": 5529005.0, "reward": 5.771241188049316, "reward_std": 1.641195336356759, "rewards/accuracy_reward": 0.4375, "rewards/exec_out_all_reward": 0.783203125, "rewards/exec_out_step_reward": 0.9457356799393892, "rewards/format_reward": 0.869140625, "rewards/keywords_iou_reward": 0.39362214831635356, "rewards/sql_step_keywords_recall_reward": 0.6359174773097038, "step": 11 }, { "clip_ratio": 0.0, "completion_length": 173.3203125, "epoch": 0.10847457627118644, "grad_norm": 0.23091059923171997, "kl": 0.003955364227294922, "learning_rate": 1.5e-06, "loss": 0.0064, "num_tokens": 6024409.0, "reward": 6.000889599323273, "reward_std": 1.5886465199291706, "rewards/accuracy_reward": 0.46875, "rewards/exec_out_all_reward": 0.76953125, "rewards/exec_out_step_reward": 0.9391183033585548, "rewards/format_reward": 0.8984375, "rewards/keywords_iou_reward": 0.42962841456755996, "rewards/sql_step_keywords_recall_reward": 0.6595456739887595, "step": 12 }, { "clip_ratio": 0.0, "completion_length": 171.25, "epoch": 0.11751412429378531, "grad_norm": 0.2572859823703766, "kl": 0.007582187652587891, "learning_rate": 1.6363636363636363e-06, "loss": 0.0009, "num_tokens": 6520105.0, "reward": 5.847834274172783, "reward_std": 1.4467571768909693, "rewards/accuracy_reward": 0.46484375, "rewards/exec_out_all_reward": 0.744140625, "rewards/exec_out_step_reward": 0.9297774098813534, "rewards/format_reward": 0.904296875, "rewards/keywords_iou_reward": 0.3783310679718852, "rewards/sql_step_keywords_recall_reward": 0.6535822190344334, "step": 13 }, { "clip_ratio": 0.0, "completion_length": 160.04296875, "epoch": 0.12655367231638417, "grad_norm": 0.23153281211853027, "kl": 0.012262344360351562, "learning_rate": 1.7727272727272729e-06, "loss": -0.0025, "num_tokens": 7007539.0, "reward": 5.9820186495780945, "reward_std": 1.6872543934732676, "rewards/accuracy_reward": 0.431640625, "rewards/exec_out_all_reward": 0.82421875, "rewards/exec_out_step_reward": 0.9496279824525118, "rewards/format_reward": 0.90234375, "rewards/keywords_iou_reward": 0.44878690084442496, "rewards/sql_step_keywords_recall_reward": 0.6816918756812811, "step": 14 }, { "clip_ratio": 0.0, "completion_length": 159.3203125, "epoch": 0.13559322033898305, "grad_norm": 0.22347486019134521, "kl": 0.015293121337890625, "learning_rate": 1.909090909090909e-06, "loss": 0.001, "num_tokens": 7495787.0, "reward": 5.671351440250874, "reward_std": 1.3719452489167452, "rewards/accuracy_reward": 0.357421875, "rewards/exec_out_all_reward": 0.771484375, "rewards/exec_out_step_reward": 0.939460875466466, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.46561355609446764, "rewards/sql_step_keywords_recall_reward": 0.6658978424966335, "step": 15 }, { "clip_ratio": 0.0, "completion_length": 158.810546875, "epoch": 0.14463276836158193, "grad_norm": 0.2615886926651001, "kl": 0.021930694580078125, "learning_rate": 2.0454545454545453e-06, "loss": -0.003, "num_tokens": 7983430.0, "reward": 5.687411919236183, "reward_std": 1.400244857184589, "rewards/accuracy_reward": 0.38671875, "rewards/exec_out_all_reward": 0.791015625, "rewards/exec_out_step_reward": 0.9499209504574537, "rewards/format_reward": 0.88671875, "rewards/keywords_iou_reward": 0.4251784700900316, "rewards/sql_step_keywords_recall_reward": 0.6625246489420533, "step": 16 }, { "clip_ratio": 0.0, "completion_length": 155.96484375, "epoch": 0.1536723163841808, "grad_norm": 0.2243858426809311, "kl": 0.019573211669921875, "learning_rate": 2.181818181818182e-06, "loss": 0.0037, "num_tokens": 8467588.0, "reward": 6.101026564836502, "reward_std": 1.3565897848457098, "rewards/accuracy_reward": 0.447265625, "rewards/exec_out_all_reward": 0.826171875, "rewards/exec_out_step_reward": 0.9590797107666731, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.4580969992093742, "rewards/sql_step_keywords_recall_reward": 0.6769247055053711, "step": 17 }, { "clip_ratio": 0.0, "completion_length": 157.99609375, "epoch": 0.16271186440677965, "grad_norm": 0.23680506646633148, "kl": 0.0247344970703125, "learning_rate": 2.318181818181818e-06, "loss": 0.0027, "num_tokens": 8954138.0, "reward": 6.041056051850319, "reward_std": 1.2283777361735702, "rewards/accuracy_reward": 0.431640625, "rewards/exec_out_all_reward": 0.845703125, "rewards/exec_out_step_reward": 0.959887308999896, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.44093527970835567, "rewards/sql_step_keywords_recall_reward": 0.6895325118675828, "step": 18 }, { "clip_ratio": 0.0, "completion_length": 156.0703125, "epoch": 0.17175141242937852, "grad_norm": 0.2468118816614151, "kl": 0.0263519287109375, "learning_rate": 2.454545454545455e-06, "loss": 0.0033, "num_tokens": 9439242.0, "reward": 6.419172838330269, "reward_std": 1.4407691890373826, "rewards/accuracy_reward": 0.49609375, "rewards/exec_out_all_reward": 0.83984375, "rewards/exec_out_step_reward": 0.9605569522827864, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.4862754005007446, "rewards/sql_step_keywords_recall_reward": 0.7243463154882193, "step": 19 }, { "clip_ratio": 0.0, "completion_length": 158.8828125, "epoch": 0.1807909604519774, "grad_norm": 0.2642815411090851, "kl": 0.028415679931640625, "learning_rate": 2.590909090909091e-06, "loss": 0.0059, "num_tokens": 9927642.0, "reward": 5.962770789861679, "reward_std": 1.364680239930749, "rewards/accuracy_reward": 0.423828125, "rewards/exec_out_all_reward": 0.875, "rewards/exec_out_step_reward": 0.9687531031668186, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.4039350217208266, "rewards/sql_step_keywords_recall_reward": 0.678335040807724, "step": 20 }, { "clip_ratio": 0.0, "completion_length": 157.8203125, "epoch": 0.18983050847457628, "grad_norm": 0.24316003918647766, "kl": 0.032009124755859375, "learning_rate": 2.7272727272727272e-06, "loss": -0.0003, "num_tokens": 10415362.0, "reward": 6.179068893194199, "reward_std": 1.4617707338184118, "rewards/accuracy_reward": 0.462890625, "rewards/exec_out_all_reward": 0.845703125, "rewards/exec_out_step_reward": 0.9599469937384129, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.4459744766354561, "rewards/sql_step_keywords_recall_reward": 0.6924073351547122, "step": 21 }, { "clip_ratio": 0.0, "completion_length": 157.458984375, "epoch": 0.19887005649717515, "grad_norm": 0.24961793422698975, "kl": 0.03668212890625, "learning_rate": 2.863636363636364e-06, "loss": -0.004, "num_tokens": 10901521.0, "reward": 6.071441277861595, "reward_std": 1.1777024501934648, "rewards/accuracy_reward": 0.453125, "rewards/exec_out_all_reward": 0.830078125, "rewards/exec_out_step_reward": 0.9483445044606924, "rewards/format_reward": 0.953125, "rewards/keywords_iou_reward": 0.42953827418386936, "rewards/sql_step_keywords_recall_reward": 0.6683171540498734, "step": 22 }, { "clip_ratio": 0.0, "completion_length": 158.517578125, "epoch": 0.207909604519774, "grad_norm": 0.26023003458976746, "kl": 0.03612518310546875, "learning_rate": 3e-06, "loss": 0.0019, "num_tokens": 11389862.0, "reward": 6.313733980059624, "reward_std": 1.3131706872954965, "rewards/accuracy_reward": 0.47265625, "rewards/exec_out_all_reward": 0.880859375, "rewards/exec_out_step_reward": 0.9664326030761003, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.46862596087157726, "rewards/sql_step_keywords_recall_reward": 0.6971588619053364, "step": 23 }, { "clip_ratio": 0.0, "completion_length": 162.736328125, "epoch": 0.21694915254237288, "grad_norm": 0.25574371218681335, "kl": 0.039890289306640625, "learning_rate": 2.9998111915108126e-06, "loss": -0.0018, "num_tokens": 11879287.0, "reward": 5.985842078924179, "reward_std": 1.2227760329842567, "rewards/accuracy_reward": 0.423828125, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9674719516187906, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4203591588884592, "rewards/sql_step_keywords_recall_reward": 0.6659330297261477, "step": 24 }, { "clip_ratio": 0.0, "completion_length": 162.716796875, "epoch": 0.22598870056497175, "grad_norm": 0.2357674390077591, "kl": 0.039127349853515625, "learning_rate": 2.9992448135747778e-06, "loss": -0.0065, "num_tokens": 12367230.0, "reward": 6.357031494379044, "reward_std": 1.352663902565837, "rewards/accuracy_reward": 0.482421875, "rewards/exec_out_all_reward": 0.89453125, "rewards/exec_out_step_reward": 0.9736126679927111, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.462111447006464, "rewards/sql_step_keywords_recall_reward": 0.6994303409010172, "step": 25 }, { "clip_ratio": 0.0, "completion_length": 163.759765625, "epoch": 0.23502824858757063, "grad_norm": 3.8160126209259033, "kl": 0.41811370849609375, "learning_rate": 2.998301008774512e-06, "loss": 0.0131, "num_tokens": 12855263.0, "reward": 6.066176131367683, "reward_std": 1.418117775581777, "rewards/accuracy_reward": 0.453125, "rewards/exec_out_all_reward": 0.865234375, "rewards/exec_out_step_reward": 0.9645538832992315, "rewards/format_reward": 0.9140625, "rewards/keywords_iou_reward": 0.4158592028543353, "rewards/sql_step_keywords_recall_reward": 0.678106939420104, "step": 26 }, { "clip_ratio": 0.0, "completion_length": 172.52734375, "epoch": 0.2440677966101695, "grad_norm": 0.23165632784366608, "kl": 0.03951263427734375, "learning_rate": 2.9969800147078265e-06, "loss": 0.0075, "num_tokens": 13348781.0, "reward": 6.2546906769275665, "reward_std": 1.2166710263118148, "rewards/accuracy_reward": 0.451171875, "rewards/exec_out_all_reward": 0.859375, "rewards/exec_out_step_reward": 0.960680965334177, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.4882043502293527, "rewards/sql_step_keywords_recall_reward": 0.7082259934395552, "step": 27 }, { "clip_ratio": 0.0, "completion_length": 174.0, "epoch": 0.25310734463276835, "grad_norm": 0.2445058375597, "kl": 0.04166412353515625, "learning_rate": 2.9952821639279137e-06, "loss": 0.0028, "num_tokens": 494680.0, "reward": 6.440436959266663, "reward_std": 1.2339025381952524, "rewards/accuracy_reward": 0.50390625, "rewards/exec_out_all_reward": 0.83984375, "rewards/exec_out_step_reward": 0.9568103682249784, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.48325524432584643, "rewards/sql_step_keywords_recall_reward": 0.7143817320466042, "step": 28 }, { "clip_ratio": 0.0, "completion_length": 178.755859375, "epoch": 0.2621468926553672, "grad_norm": 0.23020873963832855, "kl": 0.04241943359375, "learning_rate": 2.993207883859627e-06, "loss": -0.003, "num_tokens": 991863.0, "reward": 5.925758346915245, "reward_std": 1.3979150608647615, "rewards/accuracy_reward": 0.4140625, "rewards/exec_out_all_reward": 0.865234375, "rewards/exec_out_step_reward": 0.9639307502657175, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.41449853405356407, "rewards/sql_step_keywords_recall_reward": 0.6797055369243026, "step": 29 }, { "clip_ratio": 0.0, "completion_length": 182.48828125, "epoch": 0.2711864406779661, "grad_norm": 0.22643530368804932, "kl": 0.04361724853515625, "learning_rate": 2.990757696691881e-06, "loss": 0.0059, "num_tokens": 1490665.0, "reward": 6.013850957155228, "reward_std": 1.3883078750222921, "rewards/accuracy_reward": 0.43359375, "rewards/exec_out_all_reward": 0.833984375, "rewards/exec_out_step_reward": 0.9505758639425039, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.43903162656351924, "rewards/sql_step_keywords_recall_reward": 0.6773993754759431, "step": 30 }, { "clip_ratio": 0.0, "completion_length": 190.423828125, "epoch": 0.280225988700565, "grad_norm": 0.2063753306865692, "kl": 0.0457611083984375, "learning_rate": 2.987932219246193e-06, "loss": 0.0075, "num_tokens": 1993394.0, "reward": 5.88956793397665, "reward_std": 1.3650804716162384, "rewards/accuracy_reward": 0.40625, "rewards/exec_out_all_reward": 0.810546875, "rewards/exec_out_step_reward": 0.9385083485394716, "rewards/format_reward": 0.921875, "rewards/keywords_iou_reward": 0.4571849275380373, "rewards/sql_step_keywords_recall_reward": 0.679267879575491, "step": 31 }, { "clip_ratio": 0.0, "completion_length": 193.060546875, "epoch": 0.28926553672316385, "grad_norm": 0.22804522514343262, "kl": 0.0471343994140625, "learning_rate": 2.984732162821399e-06, "loss": 0.0114, "num_tokens": 2497401.0, "reward": 5.931887894868851, "reward_std": 1.4683727947995067, "rewards/accuracy_reward": 0.435546875, "rewards/exec_out_all_reward": 0.822265625, "rewards/exec_out_step_reward": 0.9442894347012043, "rewards/format_reward": 0.90625, "rewards/keywords_iou_reward": 0.4232069947756827, "rewards/sql_step_keywords_recall_reward": 0.670481245033443, "step": 32 }, { "clip_ratio": 0.0, "completion_length": 194.625, "epoch": 0.2983050847457627, "grad_norm": 0.20885376632213593, "kl": 0.04837799072265625, "learning_rate": 2.9811583330145917e-06, "loss": 0.0136, "num_tokens": 3002817.0, "reward": 6.591131284832954, "reward_std": 1.272476114332676, "rewards/accuracy_reward": 0.5390625, "rewards/exec_out_all_reward": 0.8359375, "rewards/exec_out_step_reward": 0.9475725479424, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.4941097451373935, "rewards/sql_step_keywords_recall_reward": 0.7217455059289932, "step": 33 }, { "clip_ratio": 0.0, "completion_length": 201.3046875, "epoch": 0.3073446327683616, "grad_norm": 0.21211469173431396, "kl": 0.048919677734375, "learning_rate": 2.9772116295183124e-06, "loss": -0.001, "num_tokens": 3512913.0, "reward": 6.256010413169861, "reward_std": 1.339096024632454, "rewards/accuracy_reward": 0.482421875, "rewards/exec_out_all_reward": 0.853515625, "rewards/exec_out_step_reward": 0.9534575026482344, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.4364256302360445, "rewards/sql_step_keywords_recall_reward": 0.7129048258066177, "step": 34 }, { "clip_ratio": 0.0, "completion_length": 204.94140625, "epoch": 0.3163841807909605, "grad_norm": 0.22092854976654053, "kl": 0.0505828857421875, "learning_rate": 2.97289304589406e-06, "loss": 0.017, "num_tokens": 4024451.0, "reward": 5.7779867351055145, "reward_std": 1.4151953971013427, "rewards/accuracy_reward": 0.376953125, "rewards/exec_out_all_reward": 0.8359375, "rewards/exec_out_step_reward": 0.9576846230775118, "rewards/format_reward": 0.91015625, "rewards/keywords_iou_reward": 0.4494855832308531, "rewards/sql_step_keywords_recall_reward": 0.6674247067421675, "step": 35 }, { "clip_ratio": 0.0, "completion_length": 205.88671875, "epoch": 0.3254237288135593, "grad_norm": 0.20495107769966125, "kl": 0.0490264892578125, "learning_rate": 2.9682036693221684e-06, "loss": 0.0146, "num_tokens": 4537929.0, "reward": 6.179159179329872, "reward_std": 1.19661252386868, "rewards/accuracy_reward": 0.458984375, "rewards/exec_out_all_reward": 0.837890625, "rewards/exec_out_step_reward": 0.9454868901520967, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.48097023693844676, "rewards/sql_step_keywords_recall_reward": 0.6564974309876561, "step": 36 }, { "clip_ratio": 0.0, "completion_length": 205.6953125, "epoch": 0.3344632768361582, "grad_norm": 0.20743127167224884, "kl": 0.05022430419921875, "learning_rate": 2.963144680328111e-06, "loss": 0.0123, "num_tokens": 5048565.0, "reward": 6.279510959982872, "reward_std": 1.4437304949387908, "rewards/accuracy_reward": 0.478515625, "rewards/exec_out_all_reward": 0.861328125, "rewards/exec_out_step_reward": 0.95453792065382, "rewards/format_reward": 0.923828125, "rewards/keywords_iou_reward": 0.4697499736212194, "rewards/sql_step_keywords_recall_reward": 0.686254383996129, "step": 37 }, { "clip_ratio": 0.0, "completion_length": 208.59765625, "epoch": 0.34350282485875705, "grad_norm": 0.20503395795822144, "kl": 0.05016326904296875, "learning_rate": 2.9577173524853125e-06, "loss": -0.0049, "num_tokens": 5560463.0, "reward": 5.8292489647865295, "reward_std": 1.3312111617997289, "rewards/accuracy_reward": 0.3984375, "rewards/exec_out_all_reward": 0.837890625, "rewards/exec_out_step_reward": 0.9564809743314981, "rewards/format_reward": 0.91796875, "rewards/keywords_iou_reward": 0.430765890982002, "rewards/sql_step_keywords_recall_reward": 0.6616268502548337, "step": 38 }, { "clip_ratio": 0.0, "completion_length": 204.892578125, "epoch": 0.3525423728813559, "grad_norm": 0.19791673123836517, "kl": 0.04753875732421875, "learning_rate": 2.9519230520945346e-06, "loss": -0.0044, "num_tokens": 6072524.0, "reward": 6.163229390978813, "reward_std": 1.2822516057640314, "rewards/accuracy_reward": 0.453125, "rewards/exec_out_all_reward": 0.859375, "rewards/exec_out_step_reward": 0.963240172713995, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.4430888262577355, "rewards/sql_step_keywords_recall_reward": 0.696624081581831, "step": 39 }, { "clip_ratio": 0.0, "completion_length": 206.439453125, "epoch": 0.3615819209039548, "grad_norm": 0.1895550638437271, "kl": 0.048187255859375, "learning_rate": 2.9457632378399134e-06, "loss": 0.0102, "num_tokens": 6585445.0, "reward": 5.869170263409615, "reward_std": 1.2297673234716058, "rewards/accuracy_reward": 0.37890625, "rewards/exec_out_all_reward": 0.85546875, "rewards/exec_out_step_reward": 0.9623821955174208, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.4554209231864661, "rewards/sql_step_keywords_recall_reward": 0.6834461260586977, "step": 40 }, { "clip_ratio": 0.0, "completion_length": 201.962890625, "epoch": 0.3706214689265537, "grad_norm": 0.20803290605545044, "kl": 0.0442962646484375, "learning_rate": 2.9392394604217463e-06, "loss": 0.0043, "num_tokens": 7094046.0, "reward": 6.21223983168602, "reward_std": 1.2842160500586033, "rewards/accuracy_reward": 0.4765625, "rewards/exec_out_all_reward": 0.875, "rewards/exec_out_step_reward": 0.9636959079653025, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.43379027443006635, "rewards/sql_step_keywords_recall_reward": 0.6680727442726493, "step": 41 }, { "clip_ratio": 0.0, "completion_length": 206.572265625, "epoch": 0.37966101694915255, "grad_norm": 0.19538187980651855, "kl": 0.044647216796875, "learning_rate": 2.932353362166111e-06, "loss": 0.0142, "num_tokens": 7608915.0, "reward": 6.033717706799507, "reward_std": 1.3723872043192387, "rewards/accuracy_reward": 0.431640625, "rewards/exec_out_all_reward": 0.857421875, "rewards/exec_out_step_reward": 0.9629634786397219, "rewards/format_reward": 0.95703125, "rewards/keywords_iou_reward": 0.429211582057178, "rewards/sql_step_keywords_recall_reward": 0.6713154595345259, "step": 42 }, { "clip_ratio": 0.0, "completion_length": 203.2421875, "epoch": 0.3887005649717514, "grad_norm": 0.20477567613124847, "kl": 0.04170989990234375, "learning_rate": 2.9251066766114183e-06, "loss": 0.0111, "num_tokens": 8120303.0, "reward": 5.484863147139549, "reward_std": 1.2587912240996957, "rewards/accuracy_reward": 0.3046875, "rewards/exec_out_all_reward": 0.87109375, "rewards/exec_out_step_reward": 0.9700288362801075, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.42473567882552743, "rewards/sql_step_keywords_recall_reward": 0.641925479285419, "step": 43 }, { "clip_ratio": 0.0, "completion_length": 198.205078125, "epoch": 0.3977401129943503, "grad_norm": 0.19639819860458374, "kl": 0.042938232421875, "learning_rate": 2.9175012280720027e-06, "loss": -0.0058, "num_tokens": 8629068.0, "reward": 5.85739204287529, "reward_std": 1.3158389078453183, "rewards/accuracy_reward": 0.39453125, "rewards/exec_out_all_reward": 0.828125, "rewards/exec_out_step_reward": 0.9519577771425247, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.44756252504885197, "rewards/sql_step_keywords_recall_reward": 0.6665591625496745, "step": 44 }, { "clip_ratio": 0.0, "completion_length": 199.18359375, "epoch": 0.4067796610169492, "grad_norm": 0.19431763887405396, "kl": 0.041290283203125, "learning_rate": 2.9095389311788626e-06, "loss": 0.0103, "num_tokens": 9137718.0, "reward": 6.069079004228115, "reward_std": 1.3046986246481538, "rewards/accuracy_reward": 0.451171875, "rewards/exec_out_all_reward": 0.8359375, "rewards/exec_out_step_reward": 0.9571854863315821, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.4311121259815991, "rewards/sql_step_keywords_recall_reward": 0.6656848564743996, "step": 45 }, { "clip_ratio": 0.0, "completion_length": 197.578125, "epoch": 0.415819209039548, "grad_norm": 0.18736235797405243, "kl": 0.04170989990234375, "learning_rate": 2.9012217903976603e-06, "loss": 0.0009, "num_tokens": 9644030.0, "reward": 6.115010187029839, "reward_std": 1.2594214268028736, "rewards/accuracy_reward": 0.443359375, "rewards/exec_out_all_reward": 0.87890625, "rewards/exec_out_step_reward": 0.9649127330631018, "rewards/format_reward": 0.94921875, "rewards/keywords_iou_reward": 0.42334456741809845, "rewards/sql_step_keywords_recall_reward": 0.7018458480015397, "step": 46 }, { "clip_ratio": 0.0, "completion_length": 194.72265625, "epoch": 0.4248587570621469, "grad_norm": 0.21365734934806824, "kl": 0.03936004638671875, "learning_rate": 2.892551899524109e-06, "loss": -0.0027, "num_tokens": 10148012.0, "reward": 6.157889060676098, "reward_std": 1.2168289944529533, "rewards/accuracy_reward": 0.447265625, "rewards/exec_out_all_reward": 0.90234375, "rewards/exec_out_step_reward": 0.9766919370740652, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.43009675364010036, "rewards/sql_step_keywords_recall_reward": 0.6862379219383001, "step": 47 }, { "clip_ratio": 0.0, "completion_length": 187.716796875, "epoch": 0.43389830508474575, "grad_norm": 0.1940770298242569, "kl": 0.0411834716796875, "learning_rate": 2.8835314411568722e-06, "loss": 0.0058, "num_tokens": 10649115.0, "reward": 6.114228963851929, "reward_std": 1.1688512060791254, "rewards/accuracy_reward": 0.423828125, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9754417818039656, "rewards/format_reward": 0.94921875, "rewards/keywords_iou_reward": 0.4551887298002839, "rewards/sql_step_keywords_recall_reward": 0.7069253623485565, "step": 48 }, { "clip_ratio": 0.0, "completion_length": 188.994140625, "epoch": 0.4429378531073446, "grad_norm": 0.1919233798980713, "kl": 0.040985107421875, "learning_rate": 2.8741626861481045e-06, "loss": 0.0096, "num_tokens": 11150488.0, "reward": 6.672178938984871, "reward_std": 1.279738076031208, "rewards/accuracy_reward": 0.51171875, "rewards/exec_out_all_reward": 0.90234375, "rewards/exec_out_step_reward": 0.9763346407562494, "rewards/format_reward": 0.970703125, "rewards/keywords_iou_reward": 0.5284834480844438, "rewards/sql_step_keywords_recall_reward": 0.7189555410295725, "step": 49 }, { "clip_ratio": 0.0, "completion_length": 191.287109375, "epoch": 0.4519774011299435, "grad_norm": 0.20888246595859528, "kl": 0.041839599609375, "learning_rate": 2.8644479930317777e-06, "loss": 0.0116, "num_tokens": 11654247.0, "reward": 6.019737772643566, "reward_std": 1.2559357401914895, "rewards/accuracy_reward": 0.423828125, "rewards/exec_out_all_reward": 0.87109375, "rewards/exec_out_step_reward": 0.9652940593659878, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.44982042722404003, "rewards/sql_step_keywords_recall_reward": 0.6469903746619821, "step": 50 }, { "clip_ratio": 0.0, "completion_length": 188.822265625, "epoch": 0.4610169491525424, "grad_norm": 0.20416894555091858, "kl": 0.04168701171875, "learning_rate": 2.854389807429932e-06, "loss": 0.0077, "num_tokens": 12156812.0, "reward": 5.93711394071579, "reward_std": 1.3493517027236521, "rewards/accuracy_reward": 0.419921875, "rewards/exec_out_all_reward": 0.859375, "rewards/exec_out_step_reward": 0.9683663547039032, "rewards/format_reward": 0.95703125, "rewards/keywords_iou_reward": 0.4109305152669549, "rewards/sql_step_keywords_recall_reward": 0.6507927812635899, "step": 51 }, { "clip_ratio": 0.0, "completion_length": 186.201171875, "epoch": 0.47005649717514125, "grad_norm": 0.2009599655866623, "kl": 0.04203033447265625, "learning_rate": 2.843990661437004e-06, "loss": -0.0079, "num_tokens": 12656615.0, "reward": 6.319135099649429, "reward_std": 1.1828816812485456, "rewards/accuracy_reward": 0.490234375, "rewards/exec_out_all_reward": 0.87109375, "rewards/exec_out_step_reward": 0.9687747992575169, "rewards/format_reward": 0.966796875, "rewards/keywords_iou_reward": 0.44148961594328284, "rewards/sql_step_keywords_recall_reward": 0.668552921153605, "step": 52 }, { "clip_ratio": 0.0, "completion_length": 184.240234375, "epoch": 0.47909604519774013, "grad_norm": 0.19552090764045715, "kl": 0.040004730224609375, "learning_rate": 2.8332531729823854e-06, "loss": 0.0091, "num_tokens": 13154062.0, "reward": 6.084091693162918, "reward_std": 1.3168746987357736, "rewards/accuracy_reward": 0.439453125, "rewards/exec_out_all_reward": 0.89453125, "rewards/exec_out_step_reward": 0.9761377759277821, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.41599453624803573, "rewards/sql_step_keywords_recall_reward": 0.6822148254141212, "step": 53 }, { "clip_ratio": 0.0, "completion_length": 186.546875, "epoch": 0.488135593220339, "grad_norm": 0.18975664675235748, "kl": 0.041107177734375, "learning_rate": 2.822180045171373e-06, "loss": 0.0031, "num_tokens": 13654138.0, "reward": 6.5647883862257, "reward_std": 1.142817527987063, "rewards/accuracy_reward": 0.5390625, "rewards/exec_out_all_reward": 0.890625, "rewards/exec_out_step_reward": 0.972067216411233, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.45550795644521713, "rewards/sql_step_keywords_recall_reward": 0.6836584862321615, "step": 54 }, { "clip_ratio": 0.0, "completion_length": 185.6171875, "epoch": 0.4971751412429379, "grad_norm": 0.19297046959400177, "kl": 0.04022216796875, "learning_rate": 2.8107740656046774e-06, "loss": 0.0018, "num_tokens": 14153762.0, "reward": 6.067966505885124, "reward_std": 1.339047422632575, "rewards/accuracy_reward": 0.419921875, "rewards/exec_out_all_reward": 0.87109375, "rewards/exec_out_step_reward": 0.9681291859596968, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.4687476740218699, "rewards/sql_step_keywords_recall_reward": 0.6701545566320419, "step": 55 }, { "clip_ratio": 0.0, "completion_length": 183.33203125, "epoch": 0.5062146892655367, "grad_norm": 0.19569052755832672, "kl": 0.03861236572265625, "learning_rate": 2.7990381056766585e-06, "loss": -0.0018, "num_tokens": 14653404.0, "reward": 6.230767786502838, "reward_std": 1.3125396608375013, "rewards/accuracy_reward": 0.4609375, "rewards/exec_out_all_reward": 0.892578125, "rewards/exec_out_step_reward": 0.9760687928646803, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.44265242759138346, "rewards/sql_step_keywords_recall_reward": 0.681894151493907, "step": 56 }, { "clip_ratio": 0.0, "completion_length": 191.53125, "epoch": 0.5152542372881356, "grad_norm": 0.19364245235919952, "kl": 0.0371551513671875, "learning_rate": 2.7869751198524656e-06, "loss": -0.0058, "num_tokens": 15157368.0, "reward": 6.011801972985268, "reward_std": 1.4972982537001371, "rewards/accuracy_reward": 0.435546875, "rewards/exec_out_all_reward": 0.841796875, "rewards/exec_out_step_reward": 0.9621403776109219, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.4371058586984873, "rewards/sql_step_keywords_recall_reward": 0.6559187090024352, "step": 57 }, { "clip_ratio": 0.0, "completion_length": 189.95703125, "epoch": 0.5242937853107345, "grad_norm": 0.1767347753047943, "kl": 0.037456512451171875, "learning_rate": 2.7745881449242716e-06, "loss": 0.0095, "num_tokens": 15662582.0, "reward": 6.229088187217712, "reward_std": 1.2932423749007285, "rewards/accuracy_reward": 0.4921875, "rewards/exec_out_all_reward": 0.853515625, "rewards/exec_out_step_reward": 0.9643019940704107, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4135230230167508, "rewards/sql_step_keywords_recall_reward": 0.6760213691741228, "step": 58 }, { "clip_ratio": 0.0, "completion_length": 191.84765625, "epoch": 0.5333333333333333, "grad_norm": 0.1982230544090271, "kl": 0.036502838134765625, "learning_rate": 2.761880299246772e-06, "loss": -0.0078, "num_tokens": 16165460.0, "reward": 6.063759118318558, "reward_std": 1.3337543765082955, "rewards/accuracy_reward": 0.439453125, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9730437770485878, "rewards/format_reward": 0.95703125, "rewards/keywords_iou_reward": 0.42756529804319143, "rewards/sql_step_keywords_recall_reward": 0.6379284737631679, "step": 59 }, { "clip_ratio": 0.0, "completion_length": 191.296875, "epoch": 0.5423728813559322, "grad_norm": 0.19018128514289856, "kl": 0.036468505859375, "learning_rate": 2.748854781952157e-06, "loss": -0.008, "num_tokens": 16671384.0, "reward": 6.285549536347389, "reward_std": 1.3978888802230358, "rewards/accuracy_reward": 0.490234375, "rewards/exec_out_all_reward": 0.861328125, "rewards/exec_out_step_reward": 0.9668580982834101, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4441379075869918, "rewards/sql_step_keywords_recall_reward": 0.6686968319118023, "step": 60 }, { "clip_ratio": 0.0, "completion_length": 203.625, "epoch": 0.5514124293785311, "grad_norm": 0.19416409730911255, "kl": 0.033016204833984375, "learning_rate": 2.735514872144749e-06, "loss": -0.008, "num_tokens": 17181944.0, "reward": 6.115775644779205, "reward_std": 1.6173988990485668, "rewards/accuracy_reward": 0.453125, "rewards/exec_out_all_reward": 0.873046875, "rewards/exec_out_step_reward": 0.9669309612363577, "rewards/format_reward": 0.953125, "rewards/keywords_iou_reward": 0.4201988475397229, "rewards/sql_step_keywords_recall_reward": 0.6697751097381115, "step": 61 }, { "clip_ratio": 0.0, "completion_length": 197.470703125, "epoch": 0.56045197740113, "grad_norm": 0.19720803201198578, "kl": 0.033603668212890625, "learning_rate": 2.721863928075504e-06, "loss": 0.0067, "num_tokens": 17690761.0, "reward": 6.248985543847084, "reward_std": 1.3238589530810714, "rewards/accuracy_reward": 0.4609375, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9681136887520552, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.4740994907915592, "rewards/sql_step_keywords_recall_reward": 0.6705634454265237, "step": 62 }, { "clip_ratio": 0.0, "completion_length": 202.150390625, "epoch": 0.5694915254237288, "grad_norm": 0.19422629475593567, "kl": 0.032680511474609375, "learning_rate": 2.707905386296588e-06, "loss": -0.0065, "num_tokens": 18198586.0, "reward": 6.26141269505024, "reward_std": 1.2987114731222391, "rewards/accuracy_reward": 0.451171875, "rewards/exec_out_all_reward": 0.884765625, "rewards/exec_out_step_reward": 0.9684066604822874, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.4992506830021739, "rewards/sql_step_keywords_recall_reward": 0.6577859437093139, "step": 63 }, { "clip_ratio": 0.0, "completion_length": 206.173828125, "epoch": 0.5785310734463277, "grad_norm": 0.17673034965991974, "kl": 0.034145355224609375, "learning_rate": 2.6936427607962483e-06, "loss": 0.0066, "num_tokens": 18710943.0, "reward": 5.954583629965782, "reward_std": 1.3590196399018168, "rewards/accuracy_reward": 0.43359375, "rewards/exec_out_all_reward": 0.86328125, "rewards/exec_out_step_reward": 0.9674510210752487, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.3994289576075971, "rewards/sql_step_keywords_recall_reward": 0.6550715854391456, "step": 64 }, { "clip_ratio": 0.0, "completion_length": 205.908203125, "epoch": 0.5875706214689266, "grad_norm": 0.1863545924425125, "kl": 0.035305023193359375, "learning_rate": 2.6790796421141813e-06, "loss": 0.0025, "num_tokens": 19222232.0, "reward": 6.260747715830803, "reward_std": 1.2453271835111082, "rewards/accuracy_reward": 0.48046875, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9697412867099047, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.4435248177032918, "rewards/sql_step_keywords_recall_reward": 0.6637223660945892, "step": 65 }, { "clip_ratio": 0.0, "completion_length": 208.349609375, "epoch": 0.5966101694915255, "grad_norm": 0.18152064085006714, "kl": 0.034008026123046875, "learning_rate": 2.6642196964376354e-06, "loss": 0.005, "num_tokens": 19736371.0, "reward": 6.178658068180084, "reward_std": 1.2107095727697015, "rewards/accuracy_reward": 0.439453125, "rewards/exec_out_all_reward": 0.875, "rewards/exec_out_step_reward": 0.9720323383808136, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.47833117935806513, "rewards/sql_step_keywords_recall_reward": 0.6698852656409144, "step": 66 }, { "clip_ratio": 0.0, "completion_length": 213.107421875, "epoch": 0.6056497175141243, "grad_norm": 0.28298619389533997, "kl": 0.06278610229492188, "learning_rate": 2.649066664678467e-06, "loss": -0.001, "num_tokens": 20249726.0, "reward": 6.54718804359436, "reward_std": 1.2923204032704234, "rewards/accuracy_reward": 0.5546875, "rewards/exec_out_all_reward": 0.888671875, "rewards/exec_out_step_reward": 0.9712689146399498, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.41723292297683656, "rewards/sql_step_keywords_recall_reward": 0.6906719226390123, "step": 67 }, { "clip_ratio": 0.0, "completion_length": 216.087890625, "epoch": 0.6146892655367232, "grad_norm": 0.1797921508550644, "kl": 0.037776947021484375, "learning_rate": 2.6336243615313876e-06, "loss": -0.0023, "num_tokens": 20765263.0, "reward": 6.382973074913025, "reward_std": 1.2423311527818441, "rewards/accuracy_reward": 0.486328125, "rewards/exec_out_all_reward": 0.892578125, "rewards/exec_out_step_reward": 0.9681625198572874, "rewards/format_reward": 0.9296875, "rewards/keywords_iou_reward": 0.48494360502809286, "rewards/sql_step_keywords_recall_reward": 0.677345173433423, "step": 68 }, { "clip_ratio": 0.0, "completion_length": 218.08203125, "epoch": 0.6237288135593221, "grad_norm": 0.171707421541214, "kl": 0.03691864013671875, "learning_rate": 2.6178966745136323e-06, "loss": -0.0042, "num_tokens": 21284597.0, "reward": 6.1321365386247635, "reward_std": 1.2671317560598254, "rewards/accuracy_reward": 0.45703125, "rewards/exec_out_all_reward": 0.849609375, "rewards/exec_out_step_reward": 0.9618140794336796, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.43983029294759035, "rewards/sql_step_keywords_recall_reward": 0.6695681791752577, "step": 69 }, { "clip_ratio": 0.0, "completion_length": 215.431640625, "epoch": 0.632768361581921, "grad_norm": 0.18202371895313263, "kl": 0.0379486083984375, "learning_rate": 2.6018875629862996e-06, "loss": -0.0007, "num_tokens": 21802886.0, "reward": 6.155725434422493, "reward_std": 1.3498500874266028, "rewards/accuracy_reward": 0.447265625, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9724082369357347, "rewards/format_reward": 0.927734375, "rewards/keywords_iou_reward": 0.45339376712217927, "rewards/sql_step_keywords_recall_reward": 0.673013923689723, "step": 70 }, { "clip_ratio": 0.0, "completion_length": 222.77734375, "epoch": 0.6418079096045197, "grad_norm": 0.1931043118238449, "kl": 0.04012298583984375, "learning_rate": 2.585601057157605e-06, "loss": -0.0014, "num_tokens": 22324500.0, "reward": 6.071022488176823, "reward_std": 1.363126328913495, "rewards/accuracy_reward": 0.458984375, "rewards/exec_out_all_reward": 0.845703125, "rewards/exec_out_step_reward": 0.9601764027029276, "rewards/format_reward": 0.91015625, "rewards/keywords_iou_reward": 0.42694294080138206, "rewards/sql_step_keywords_recall_reward": 0.6651632944121957, "step": 71 }, { "clip_ratio": 0.0, "completion_length": 216.185546875, "epoch": 0.6508474576271186, "grad_norm": 0.18399913609027863, "kl": 0.03882598876953125, "learning_rate": 2.5690412570682945e-06, "loss": -0.0003, "num_tokens": 22841407.0, "reward": 6.496973499655724, "reward_std": 1.1896542916074395, "rewards/accuracy_reward": 0.525390625, "rewards/exec_out_all_reward": 0.87109375, "rewards/exec_out_step_reward": 0.9670332632958889, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.45264067919924855, "rewards/sql_step_keywords_recall_reward": 0.7105963062494993, "step": 72 }, { "clip_ratio": 0.0, "completion_length": 216.98828125, "epoch": 0.6598870056497175, "grad_norm": 0.17450737953186035, "kl": 0.037841796875, "learning_rate": 2.552212331559482e-06, "loss": -0.0076, "num_tokens": 23356665.0, "reward": 6.324795305728912, "reward_std": 1.2927344804629683, "rewards/accuracy_reward": 0.482421875, "rewards/exec_out_all_reward": 0.916015625, "rewards/exec_out_step_reward": 0.9763090629130602, "rewards/format_reward": 0.927734375, "rewards/keywords_iou_reward": 0.4405778916552663, "rewards/sql_step_keywords_recall_reward": 0.6938929669559002, "step": 73 }, { "clip_ratio": 0.0, "completion_length": 222.453125, "epoch": 0.6689265536723163, "grad_norm": 0.18439583480358124, "kl": 0.03949737548828125, "learning_rate": 2.535118517223168e-06, "loss": 0.0112, "num_tokens": 23875141.0, "reward": 6.2983558177948, "reward_std": 1.3202420324087143, "rewards/accuracy_reward": 0.494140625, "rewards/exec_out_all_reward": 0.880859375, "rewards/exec_out_step_reward": 0.9673696402460337, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.41860854998230934, "rewards/sql_step_keywords_recall_reward": 0.7047065645456314, "step": 74 }, { "clip_ratio": 0.0, "completion_length": 217.3203125, "epoch": 0.6779661016949152, "grad_norm": 0.17797358334064484, "kl": 0.037334442138671875, "learning_rate": 2.5177641173356982e-06, "loss": 0.0013, "num_tokens": 24391073.0, "reward": 6.193948924541473, "reward_std": 1.4235245073214173, "rewards/accuracy_reward": 0.466796875, "rewards/exec_out_all_reward": 0.8984375, "rewards/exec_out_step_reward": 0.9762230291962624, "rewards/format_reward": 0.91796875, "rewards/keywords_iou_reward": 0.42891340190544724, "rewards/sql_step_keywords_recall_reward": 0.6763053219765425, "step": 75 }, { "clip_ratio": 0.0, "completion_length": 222.564453125, "epoch": 0.6870056497175141, "grad_norm": 0.17648960649967194, "kl": 0.03826904296875, "learning_rate": 2.5001535007744377e-06, "loss": 0.0017, "num_tokens": 24910594.0, "reward": 6.196133196353912, "reward_std": 1.3525635278783739, "rewards/accuracy_reward": 0.4609375, "rewards/exec_out_all_reward": 0.8671875, "rewards/exec_out_step_reward": 0.9637571293860674, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.4552514897659421, "rewards/sql_step_keywords_recall_reward": 0.677341865375638, "step": 76 }, { "clip_ratio": 0.0, "completion_length": 217.21875, "epoch": 0.696045197740113, "grad_norm": 0.1959611028432846, "kl": 0.042377471923828125, "learning_rate": 2.4822911009179277e-06, "loss": 0.0062, "num_tokens": 25428242.0, "reward": 6.28637857735157, "reward_std": 1.4062156137079, "rewards/accuracy_reward": 0.466796875, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9695289265364408, "rewards/format_reward": 0.91796875, "rewards/keywords_iou_reward": 0.4715513661503792, "rewards/sql_step_keywords_recall_reward": 0.7116375369951129, "step": 77 }, { "clip_ratio": 0.0, "completion_length": 218.865234375, "epoch": 0.7050847457627119, "grad_norm": 0.1893206089735031, "kl": 0.03656005859375, "learning_rate": 2.464181414529809e-06, "loss": 0.0047, "num_tokens": 25947605.0, "reward": 5.8394907265901566, "reward_std": 1.3573181126266718, "rewards/accuracy_reward": 0.390625, "rewards/exec_out_all_reward": 0.84375, "rewards/exec_out_step_reward": 0.9585681743919849, "rewards/format_reward": 0.92578125, "rewards/keywords_iou_reward": 0.4459369848482311, "rewards/sql_step_keywords_recall_reward": 0.6570173809304833, "step": 78 }, { "clip_ratio": 0.0, "completion_length": 212.64453125, "epoch": 0.7141242937853107, "grad_norm": 0.17911891639232635, "kl": 0.037021636962890625, "learning_rate": 2.4458290006267837e-06, "loss": -0.0001, "num_tokens": 26462715.0, "reward": 6.297634035348892, "reward_std": 1.2602604366838932, "rewards/accuracy_reward": 0.4609375, "rewards/exec_out_all_reward": 0.8671875, "rewards/exec_out_step_reward": 0.9640160016715527, "rewards/format_reward": 0.91796875, "rewards/keywords_iou_reward": 0.4979046704247594, "rewards/sql_step_keywords_recall_reward": 0.7089024959132075, "step": 79 }, { "clip_ratio": 0.0, "completion_length": 210.369140625, "epoch": 0.7231638418079096, "grad_norm": 0.18120358884334564, "kl": 0.035877227783203125, "learning_rate": 2.427238479330908e-06, "loss": 0.0027, "num_tokens": 26975792.0, "reward": 6.11888575553894, "reward_std": 1.5019584177061915, "rewards/accuracy_reward": 0.431640625, "rewards/exec_out_all_reward": 0.892578125, "rewards/exec_out_step_reward": 0.9738637823611498, "rewards/format_reward": 0.90234375, "rewards/keywords_iou_reward": 0.4661337183788419, "rewards/sql_step_keywords_recall_reward": 0.6912701558321714, "step": 80 }, { "clip_ratio": 0.0, "completion_length": 210.333984375, "epoch": 0.7322033898305085, "grad_norm": 0.18645890057086945, "kl": 0.03778076171875, "learning_rate": 2.4084145307065e-06, "loss": 0.0031, "num_tokens": 27488767.0, "reward": 6.057440027594566, "reward_std": 1.336686883121729, "rewards/accuracy_reward": 0.41796875, "rewards/exec_out_all_reward": 0.8671875, "rewards/exec_out_step_reward": 0.9673099610954523, "rewards/format_reward": 0.90625, "rewards/keywords_iou_reward": 0.49649542290717363, "rewards/sql_step_keywords_recall_reward": 0.6518266946077347, "step": 81 }, { "clip_ratio": 0.0, "completion_length": 206.6796875, "epoch": 0.7412429378531074, "grad_norm": 0.18546722829341888, "kl": 0.035541534423828125, "learning_rate": 2.389361893581961e-06, "loss": -0.0067, "num_tokens": 28001731.0, "reward": 6.299026131629944, "reward_std": 1.3064639195799828, "rewards/accuracy_reward": 0.490234375, "rewards/exec_out_all_reward": 0.892578125, "rewards/exec_out_step_reward": 0.9757549054920673, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.4211979394312948, "rewards/sql_step_keywords_recall_reward": 0.6957191359251738, "step": 82 }, { "clip_ratio": 0.0, "completion_length": 205.423828125, "epoch": 0.7502824858757062, "grad_norm": 0.18241924047470093, "kl": 0.03655242919921875, "learning_rate": 2.3700853643567976e-06, "loss": 0.0047, "num_tokens": 28512732.0, "reward": 6.236706480383873, "reward_std": 1.2026822408661246, "rewards/accuracy_reward": 0.49609375, "rewards/exec_out_all_reward": 0.861328125, "rewards/exec_out_step_reward": 0.9606026802212, "rewards/format_reward": 0.904296875, "rewards/keywords_iou_reward": 0.43541459972038865, "rewards/sql_step_keywords_recall_reward": 0.6552745532244444, "step": 83 }, { "clip_ratio": 0.0, "completion_length": 201.544921875, "epoch": 0.7593220338983051, "grad_norm": 0.19208958745002747, "kl": 0.0334625244140625, "learning_rate": 2.350589795794156e-06, "loss": -0.0085, "num_tokens": 29021143.0, "reward": 6.086180254817009, "reward_std": 1.245661067776382, "rewards/accuracy_reward": 0.439453125, "rewards/exec_out_all_reward": 0.869140625, "rewards/exec_out_step_reward": 0.9663248769938946, "rewards/format_reward": 0.900390625, "rewards/keywords_iou_reward": 0.45154744386672974, "rewards/sql_step_keywords_recall_reward": 0.6894168108701706, "step": 84 }, { "clip_ratio": 0.0, "completion_length": 201.060546875, "epoch": 0.768361581920904, "grad_norm": 0.1888093501329422, "kl": 0.03191375732421875, "learning_rate": 2.3308800957991657e-06, "loss": 0.0122, "num_tokens": 29529626.0, "reward": 6.194907002151012, "reward_std": 1.2625772105529904, "rewards/accuracy_reward": 0.470703125, "rewards/exec_out_all_reward": 0.85546875, "rewards/exec_out_step_reward": 0.9580496698617935, "rewards/format_reward": 0.9140625, "rewards/keywords_iou_reward": 0.45335739478468895, "rewards/sql_step_keywords_recall_reward": 0.6777987945824862, "step": 85 }, { "clip_ratio": 0.0, "completion_length": 195.92578125, "epoch": 0.7774011299435029, "grad_norm": 0.18627431988716125, "kl": 0.031524658203125, "learning_rate": 2.3109612261833968e-06, "loss": -0.0039, "num_tokens": 30036392.0, "reward": 6.356723390519619, "reward_std": 1.5176555626094341, "rewards/accuracy_reward": 0.474609375, "rewards/exec_out_all_reward": 0.869140625, "rewards/exec_out_step_reward": 0.9662566669285297, "rewards/format_reward": 0.919921875, "rewards/keywords_iou_reward": 0.4986234325915575, "rewards/sql_step_keywords_recall_reward": 0.7057198826223612, "step": 86 }, { "clip_ratio": 0.0, "completion_length": 191.9296875, "epoch": 0.7864406779661017, "grad_norm": 0.19546450674533844, "kl": 0.031040191650390625, "learning_rate": 2.2908382014157536e-06, "loss": 0.0014, "num_tokens": 30540172.0, "reward": 6.007154896855354, "reward_std": 1.4377120230346918, "rewards/accuracy_reward": 0.44140625, "rewards/exec_out_all_reward": 0.85546875, "rewards/exec_out_step_reward": 0.9677215088158846, "rewards/format_reward": 0.908203125, "rewards/keywords_iou_reward": 0.4238502769730985, "rewards/sql_step_keywords_recall_reward": 0.6624359153211117, "step": 87 }, { "clip_ratio": 0.0, "completion_length": 191.09765625, "epoch": 0.7954802259887006, "grad_norm": 0.19553017616271973, "kl": 0.0312347412109375, "learning_rate": 2.27051608736011e-06, "loss": -0.0024, "num_tokens": 31042954.0, "reward": 6.573052808642387, "reward_std": 1.3831378351897001, "rewards/accuracy_reward": 0.52734375, "rewards/exec_out_all_reward": 0.873046875, "rewards/exec_out_step_reward": 0.9695428721606731, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4803972856607288, "rewards/sql_step_keywords_recall_reward": 0.7208402901887894, "step": 88 }, { "clip_ratio": 0.0, "completion_length": 193.73828125, "epoch": 0.8045197740112995, "grad_norm": 0.19286681711673737, "kl": 0.031497955322265625, "learning_rate": 2.25e-06, "loss": 0.0024, "num_tokens": 31547488.0, "reward": 6.184370666742325, "reward_std": 1.2657314036041498, "rewards/accuracy_reward": 0.451171875, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9704876635223627, "rewards/format_reward": 0.9140625, "rewards/keywords_iou_reward": 0.4544413227122277, "rewards/sql_step_keywords_recall_reward": 0.7034378284588456, "step": 89 }, { "clip_ratio": 0.0, "completion_length": 187.453125, "epoch": 0.8135593220338984, "grad_norm": 0.19443422555923462, "kl": 0.030490875244140625, "learning_rate": 2.229295104150703e-06, "loss": -0.0012, "num_tokens": 32049256.0, "reward": 6.299180343747139, "reward_std": 1.3650079052895308, "rewards/accuracy_reward": 0.494140625, "rewards/exec_out_all_reward": 0.859375, "rewards/exec_out_step_reward": 0.9578698594123125, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.4481339924968779, "rewards/sql_step_keywords_recall_reward": 0.6755113024264574, "step": 90 }, { "clip_ratio": 0.0, "completion_length": 186.705078125, "epoch": 0.8225988700564971, "grad_norm": 0.19431209564208984, "kl": 0.03226470947265625, "learning_rate": 2.2084066121590242e-06, "loss": 0.0028, "num_tokens": 32550041.0, "reward": 6.25195187330246, "reward_std": 1.1578238443471491, "rewards/accuracy_reward": 0.484375, "rewards/exec_out_all_reward": 0.859375, "rewards/exec_out_step_reward": 0.958809994161129, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.4414861728437245, "rewards/sql_step_keywords_recall_reward": 0.6699351165443659, "step": 91 }, { "clip_ratio": 0.0, "completion_length": 183.091796875, "epoch": 0.831638418079096, "grad_norm": 0.1891166865825653, "kl": 0.031597137451171875, "learning_rate": 2.187339782591116e-06, "loss": -0.0113, "num_tokens": 33048284.0, "reward": 6.556719660758972, "reward_std": 1.3454436883330345, "rewards/accuracy_reward": 0.529296875, "rewards/exec_out_all_reward": 0.875, "rewards/exec_out_step_reward": 0.9694142211228609, "rewards/format_reward": 0.94921875, "rewards/keywords_iou_reward": 0.4681888446211815, "rewards/sql_step_keywords_recall_reward": 0.7095215003937483, "step": 92 }, { "clip_ratio": 0.0, "completion_length": 189.263671875, "epoch": 0.8406779661016949, "grad_norm": 0.19213782250881195, "kl": 0.032009124755859375, "learning_rate": 2.166099918908661e-06, "loss": 0.0056, "num_tokens": 33552367.0, "reward": 6.123005196452141, "reward_std": 1.2814611946232617, "rewards/accuracy_reward": 0.439453125, "rewards/exec_out_all_reward": 0.849609375, "rewards/exec_out_step_reward": 0.9598136860877275, "rewards/format_reward": 0.955078125, "rewards/keywords_iou_reward": 0.4669042509049177, "rewards/sql_step_keywords_recall_reward": 0.6668829349800944, "step": 93 }, { "clip_ratio": 0.0, "completion_length": 187.416015625, "epoch": 0.8497175141242937, "grad_norm": 0.19189083576202393, "kl": 0.033023834228515625, "learning_rate": 2.1446923681337578e-06, "loss": 0.0026, "num_tokens": 34052664.0, "reward": 6.199526712298393, "reward_std": 1.2074398496188223, "rewards/accuracy_reward": 0.4375, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9721904434263706, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.4780406136997044, "rewards/sql_step_keywords_recall_reward": 0.6911769825965166, "step": 94 }, { "clip_ratio": 0.0, "completion_length": 179.875, "epoch": 0.8587570621468926, "grad_norm": 0.20052167773246765, "kl": 0.031097412109375, "learning_rate": 2.1231225195028298e-06, "loss": 0.0007, "num_tokens": 34549540.0, "reward": 6.093083538115025, "reward_std": 1.3363224570639431, "rewards/accuracy_reward": 0.421875, "rewards/exec_out_all_reward": 0.86328125, "rewards/exec_out_step_reward": 0.9643709696829319, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.45903572149109095, "rewards/sql_step_keywords_recall_reward": 0.7145474180579185, "step": 95 }, { "clip_ratio": 0.0, "completion_length": 178.525390625, "epoch": 0.8677966101694915, "grad_norm": 0.19684137403964996, "kl": 0.033634185791015625, "learning_rate": 2.1013958031099208e-06, "loss": 0.0089, "num_tokens": 35046073.0, "reward": 6.270583778619766, "reward_std": 1.2627136316150427, "rewards/accuracy_reward": 0.501953125, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9700528588145971, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.40578509494662285, "rewards/sql_step_keywords_recall_reward": 0.6627888614311814, "step": 96 }, { "clip_ratio": 0.0, "completion_length": 180.716796875, "epoch": 0.8768361581920904, "grad_norm": 0.19163627922534943, "kl": 0.0335235595703125, "learning_rate": 2.079517688539693e-06, "loss": 0.0145, "num_tokens": 35545980.0, "reward": 6.588066384196281, "reward_std": 1.1638533752411604, "rewards/accuracy_reward": 0.546875, "rewards/exec_out_all_reward": 0.888671875, "rewards/exec_out_step_reward": 0.9688662607222795, "rewards/format_reward": 0.958984375, "rewards/keywords_iou_reward": 0.4430620293132961, "rewards/sql_step_keywords_recall_reward": 0.6979197897017002, "step": 97 }, { "clip_ratio": 0.0, "completion_length": 184.3359375, "epoch": 0.8858757062146893, "grad_norm": 0.20076608657836914, "kl": 0.03387451171875, "learning_rate": 2.0574936834904912e-06, "loss": 0.0007, "num_tokens": 36044740.0, "reward": 6.348625332117081, "reward_std": 1.502235893625766, "rewards/accuracy_reward": 0.478515625, "rewards/exec_out_all_reward": 0.869140625, "rewards/exec_out_step_reward": 0.9674339685589075, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4792054174467921, "rewards/sql_step_keywords_recall_reward": 0.7001243568956852, "step": 98 }, { "clip_ratio": 0.0, "completion_length": 183.564453125, "epoch": 0.8949152542372881, "grad_norm": 0.1972309947013855, "kl": 0.036647796630859375, "learning_rate": 2.0353293323878076e-06, "loss": -0.001, "num_tokens": 36544293.0, "reward": 5.943858131766319, "reward_std": 1.3974212240427732, "rewards/accuracy_reward": 0.431640625, "rewards/exec_out_all_reward": 0.841796875, "rewards/exec_out_step_reward": 0.9585201255977154, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.39970124512910843, "rewards/sql_step_keywords_recall_reward": 0.6703105177730322, "step": 99 }, { "clip_ratio": 0.0, "completion_length": 179.31640625, "epoch": 0.903954802259887, "grad_norm": 0.1914178431034088, "kl": 0.035003662109375, "learning_rate": 2.0130302149885033e-06, "loss": 0.008, "num_tokens": 37040831.0, "reward": 6.4351161271333694, "reward_std": 1.3373866842593998, "rewards/accuracy_reward": 0.482421875, "rewards/exec_out_all_reward": 0.90234375, "rewards/exec_out_step_reward": 0.9746520053595304, "rewards/format_reward": 0.953125, "rewards/keywords_iou_reward": 0.481153879314661, "rewards/sql_step_keywords_recall_reward": 0.713000101968646, "step": 100 }, { "clip_ratio": 0.0, "completion_length": 182.529296875, "epoch": 0.9129943502824859, "grad_norm": 0.19319666922092438, "kl": 0.03475189208984375, "learning_rate": 1.990601944976133e-06, "loss": 0.0012, "num_tokens": 37538390.0, "reward": 6.169027402997017, "reward_std": 1.2931091291829944, "rewards/accuracy_reward": 0.423828125, "rewards/exec_out_all_reward": 0.916015625, "rewards/exec_out_step_reward": 0.980399776250124, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.4757719023618847, "rewards/sql_step_keywords_recall_reward": 0.6804432403296232, "step": 101 }, { "clip_ratio": 0.0, "completion_length": 181.212890625, "epoch": 0.9220338983050848, "grad_norm": 0.1960325837135315, "kl": 0.03401947021484375, "learning_rate": 1.9680501685477304e-06, "loss": 0.0151, "num_tokens": 38036931.0, "reward": 6.41617426276207, "reward_std": 1.365414334461093, "rewards/accuracy_reward": 0.5, "rewards/exec_out_all_reward": 0.85546875, "rewards/exec_out_step_reward": 0.96528010815382, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.4880142016336322, "rewards/sql_step_keywords_recall_reward": 0.6760376645252109, "step": 102 }, { "clip_ratio": 0.0, "completion_length": 177.46484375, "epoch": 0.9310734463276836, "grad_norm": 0.1870131492614746, "kl": 0.035228729248046875, "learning_rate": 1.9453805629924126e-06, "loss": -0.0004, "num_tokens": 38533177.0, "reward": 6.086783587932587, "reward_std": 1.2497869406361133, "rewards/accuracy_reward": 0.44921875, "rewards/exec_out_all_reward": 0.880859375, "rewards/exec_out_step_reward": 0.968900365754962, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.4101551335770637, "rewards/sql_step_keywords_recall_reward": 0.6823387397453189, "step": 103 }, { "clip_ratio": 0.0, "completion_length": 178.97265625, "epoch": 0.9401129943502825, "grad_norm": 0.20031407475471497, "kl": 0.035129547119140625, "learning_rate": 1.9225988352621446e-06, "loss": -0.0078, "num_tokens": 39029707.0, "reward": 6.0163338631391525, "reward_std": 1.0426889704540372, "rewards/accuracy_reward": 0.439453125, "rewards/exec_out_all_reward": 0.83203125, "rewards/exec_out_step_reward": 0.959713701158762, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.4327788045629859, "rewards/sql_step_keywords_recall_reward": 0.6695781610906124, "step": 104 }, { "clip_ratio": 0.0, "completion_length": 180.3046875, "epoch": 0.9491525423728814, "grad_norm": 0.19868069887161255, "kl": 0.0337371826171875, "learning_rate": 1.8997107205350524e-06, "loss": 0.0245, "num_tokens": 39526947.0, "reward": 6.0550860315561295, "reward_std": 1.201542696915567, "rewards/accuracy_reward": 0.416015625, "rewards/exec_out_all_reward": 0.84765625, "rewards/exec_out_step_reward": 0.9624209459871054, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.48174334689974785, "rewards/sql_step_keywords_recall_reward": 0.6760533768683672, "step": 105 }, { "clip_ratio": 0.0, "completion_length": 175.94140625, "epoch": 0.9581920903954803, "grad_norm": 0.19075711071491241, "kl": 0.035160064697265625, "learning_rate": 1.8767219807716187e-06, "loss": 0.0152, "num_tokens": 40023281.0, "reward": 6.078598067164421, "reward_std": 1.1788357459008694, "rewards/accuracy_reward": 0.4140625, "rewards/exec_out_all_reward": 0.880859375, "rewards/exec_out_step_reward": 0.9745574481785297, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.4620191561989486, "rewards/sql_step_keywords_recall_reward": 0.6917209886014462, "step": 106 }, { "clip_ratio": 0.0, "completion_length": 174.169921875, "epoch": 0.9672316384180791, "grad_norm": 0.19707335531711578, "kl": 0.036041259765625, "learning_rate": 1.853638403264141e-06, "loss": 0.0039, "num_tokens": 40516220.0, "reward": 6.2247384339571, "reward_std": 1.1692724945023656, "rewards/accuracy_reward": 0.46875, "rewards/exec_out_all_reward": 0.87109375, "rewards/exec_out_step_reward": 0.9709844719618559, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.43461011815816164, "rewards/sql_step_keywords_recall_reward": 0.6970336530357599, "step": 107 }, { "clip_ratio": 0.0, "completion_length": 179.353515625, "epoch": 0.976271186440678, "grad_norm": 0.19845238327980042, "kl": 0.036739349365234375, "learning_rate": 1.8304657991798111e-06, "loss": 0.0253, "num_tokens": 41014509.0, "reward": 6.089982569217682, "reward_std": 1.1940758088603616, "rewards/accuracy_reward": 0.4140625, "rewards/exec_out_all_reward": 0.880859375, "rewards/exec_out_step_reward": 0.9712944850325584, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.4622031897306442, "rewards/sql_step_keywords_recall_reward": 0.7060004426166415, "step": 108 }, { "clip_ratio": 0.0, "completion_length": 181.298828125, "epoch": 0.9853107344632769, "grad_norm": 0.20742465555667877, "kl": 0.03838348388671875, "learning_rate": 1.8072100020977862e-06, "loss": 0.0088, "num_tokens": 41514946.0, "reward": 5.96857476234436, "reward_std": 1.265411582775414, "rewards/accuracy_reward": 0.39453125, "rewards/exec_out_all_reward": 0.86328125, "rewards/exec_out_step_reward": 0.9691251274198294, "rewards/format_reward": 0.953125, "rewards/keywords_iou_reward": 0.47014313703402877, "rewards/sql_step_keywords_recall_reward": 0.6646321276202798, "step": 109 }, { "clip_ratio": 0.0, "completion_length": 174.021484375, "epoch": 0.9943502824858758, "grad_norm": 0.19735410809516907, "kl": 0.03719329833984375, "learning_rate": 1.7838768665406153e-06, "loss": -0.0014, "num_tokens": 42009789.0, "reward": 6.292138174176216, "reward_std": 1.134878752520308, "rewards/accuracy_reward": 0.474609375, "rewards/exec_out_all_reward": 0.890625, "rewards/exec_out_step_reward": 0.9741862006485462, "rewards/format_reward": 0.962890625, "rewards/keywords_iou_reward": 0.44052915135398507, "rewards/sql_step_keywords_recall_reward": 0.6849405262619257, "step": 110 }, { "clip_ratio": 0.0, "completion_length": 174.87109375, "epoch": 1.0090395480225989, "grad_norm": 0.20000207424163818, "kl": 0.04029083251953125, "learning_rate": 1.7604722665003958e-06, "loss": 0.0104, "num_tokens": 42504659.0, "reward": 6.249677374958992, "reward_std": 1.4170940481126308, "rewards/accuracy_reward": 0.4375, "rewards/exec_out_all_reward": 0.884765625, "rewards/exec_out_step_reward": 0.971117002889514, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.49388469848781824, "rewards/sql_step_keywords_recall_reward": 0.7165721878409386, "step": 111 }, { "clip_ratio": 0.0, "completion_length": 174.515625, "epoch": 1.0180790960451978, "grad_norm": 0.1991143524646759, "kl": 0.038921356201171875, "learning_rate": 1.737002093960025e-06, "loss": 0.0071, "num_tokens": 43000247.0, "reward": 6.2897831201553345, "reward_std": 1.209823683835566, "rewards/accuracy_reward": 0.44921875, "rewards/exec_out_all_reward": 0.900390625, "rewards/exec_out_step_reward": 0.9761315789073706, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.48860851069912314, "rewards/sql_step_keywords_recall_reward": 0.699715806171298, "step": 112 }, { "clip_ratio": 0.0, "completion_length": 176.083984375, "epoch": 1.0271186440677966, "grad_norm": 0.20260195434093475, "kl": 0.037258148193359375, "learning_rate": 1.713472257409928e-06, "loss": -0.0071, "num_tokens": 43496746.0, "reward": 6.175719887018204, "reward_std": 1.3017593873664737, "rewards/accuracy_reward": 0.453125, "rewards/exec_out_all_reward": 0.86328125, "rewards/exec_out_step_reward": 0.9692282117903233, "rewards/format_reward": 0.94921875, "rewards/keywords_iou_reward": 0.4611970195546746, "rewards/sql_step_keywords_recall_reward": 0.6590976314619184, "step": 113 }, { "clip_ratio": 0.0, "completion_length": 171.19921875, "epoch": 1.0361581920903955, "grad_norm": 0.20994015038013458, "kl": 0.038059234619140625, "learning_rate": 1.689888680360624e-06, "loss": 0.0009, "num_tokens": 43989932.0, "reward": 6.245173625648022, "reward_std": 1.1906684855930507, "rewards/accuracy_reward": 0.447265625, "rewards/exec_out_all_reward": 0.904296875, "rewards/exec_out_step_reward": 0.9748775381594896, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.4764430886134505, "rewards/sql_step_keywords_recall_reward": 0.6787380147725344, "step": 114 }, { "clip_ratio": 0.0, "completion_length": 177.447265625, "epoch": 1.0451977401129944, "grad_norm": 0.22471484541893005, "kl": 0.039340972900390625, "learning_rate": 1.6662572998515165e-06, "loss": 0.0046, "num_tokens": 44485501.0, "reward": 6.439530774950981, "reward_std": 1.2442573299631476, "rewards/accuracy_reward": 0.5, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9739366378635168, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.46715445443987846, "rewards/sql_step_keywords_recall_reward": 0.6933945845812559, "step": 115 }, { "clip_ratio": 0.0, "completion_length": 179.21484375, "epoch": 1.0542372881355933, "grad_norm": 0.20036683976650238, "kl": 0.03893280029296875, "learning_rate": 1.6425840649562737e-06, "loss": 0.0051, "num_tokens": 44984123.0, "reward": 6.33234478533268, "reward_std": 1.2393232183530927, "rewards/accuracy_reward": 0.48046875, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9720695428550243, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.45927969785407186, "rewards/sql_step_keywords_recall_reward": 0.6917158551514149, "step": 116 }, { "clip_ratio": 0.0, "completion_length": 181.154296875, "epoch": 1.0632768361581921, "grad_norm": 0.19345538318157196, "kl": 0.03975677490234375, "learning_rate": 1.6188749352851825e-06, "loss": 0.0073, "num_tokens": 45483218.0, "reward": 6.538501590490341, "reward_std": 1.1121491650119424, "rewards/accuracy_reward": 0.501953125, "rewards/exec_out_all_reward": 0.865234375, "rewards/exec_out_step_reward": 0.9685004372149706, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.5210402370430529, "rewards/sql_step_keywords_recall_reward": 0.7037018835544586, "step": 117 }, { "clip_ratio": 0.0, "completion_length": 180.533203125, "epoch": 1.072316384180791, "grad_norm": 0.20600463449954987, "kl": 0.036891937255859375, "learning_rate": 1.5951358794848467e-06, "loss": -0.002, "num_tokens": 45981975.0, "reward": 6.367153495550156, "reward_std": 1.4412866719067097, "rewards/accuracy_reward": 0.482421875, "rewards/exec_out_all_reward": 0.87109375, "rewards/exec_out_step_reward": 0.9675254262983799, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.48966031754389405, "rewards/sql_step_keywords_recall_reward": 0.6722605032846332, "step": 118 }, { "clip_ratio": 0.0, "completion_length": 180.583984375, "epoch": 1.0813559322033899, "grad_norm": 0.1875942498445511, "kl": 0.036533355712890625, "learning_rate": 1.5713728737356139e-06, "loss": -0.013, "num_tokens": 46481262.0, "reward": 5.682912960648537, "reward_std": 1.2121786596253514, "rewards/accuracy_reward": 0.36328125, "rewards/exec_out_all_reward": 0.8359375, "rewards/exec_out_step_reward": 0.9655343275517225, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.41401433339342475, "rewards/sql_step_keywords_recall_reward": 0.6491155764088035, "step": 119 }, { "clip_ratio": 0.0, "completion_length": 184.494140625, "epoch": 1.0903954802259888, "grad_norm": 0.2242497056722641, "kl": 0.041225433349609375, "learning_rate": 1.5475919002471018e-06, "loss": 0.0018, "num_tokens": 46983563.0, "reward": 6.413750275969505, "reward_std": 1.4098568577319384, "rewards/accuracy_reward": 0.505859375, "rewards/exec_out_all_reward": 0.875, "rewards/exec_out_step_reward": 0.9675168935209513, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.45125941652804613, "rewards/sql_step_keywords_recall_reward": 0.7019176911562681, "step": 120 }, { "clip_ratio": 0.0, "completion_length": 181.625, "epoch": 1.0994350282485876, "grad_norm": 0.1926642209291458, "kl": 0.035884857177734375, "learning_rate": 1.523798945752212e-06, "loss": 0.0016, "num_tokens": 47480911.0, "reward": 6.660622417926788, "reward_std": 1.1989344246685505, "rewards/accuracy_reward": 0.52734375, "rewards/exec_out_all_reward": 0.927734375, "rewards/exec_out_step_reward": 0.9832945894449949, "rewards/format_reward": 0.958984375, "rewards/keywords_iou_reward": 0.480492593254894, "rewards/sql_step_keywords_recall_reward": 0.7202489655464888, "step": 121 }, { "clip_ratio": 0.0, "completion_length": 190.0234375, "epoch": 1.1084745762711865, "grad_norm": 0.18677794933319092, "kl": 0.03594970703125, "learning_rate": 1.5e-06, "loss": -0.0015, "num_tokens": 47983023.0, "reward": 6.145782947540283, "reward_std": 1.3262191619724035, "rewards/accuracy_reward": 0.447265625, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9680919889360666, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.4536509499885142, "rewards/sql_step_keywords_recall_reward": 0.6532015362754464, "step": 122 }, { "clip_ratio": 0.0, "completion_length": 190.458984375, "epoch": 1.1175141242937854, "grad_norm": 0.19311833381652832, "kl": 0.034397125244140625, "learning_rate": 1.476201054247788e-06, "loss": 0.0084, "num_tokens": 48485038.0, "reward": 6.124564379453659, "reward_std": 1.1108355158939958, "rewards/accuracy_reward": 0.44921875, "rewards/exec_out_all_reward": 0.857421875, "rewards/exec_out_step_reward": 0.9658536426723003, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4373150817118585, "rewards/sql_step_keywords_recall_reward": 0.6903305593878031, "step": 123 }, { "clip_ratio": 0.0, "completion_length": 194.169921875, "epoch": 1.1265536723163843, "grad_norm": 0.19611912965774536, "kl": 0.03643798828125, "learning_rate": 1.452408099752899e-06, "loss": -0.0002, "num_tokens": 48990821.0, "reward": 6.194984808564186, "reward_std": 1.3247142443433404, "rewards/accuracy_reward": 0.4375, "rewards/exec_out_all_reward": 0.8515625, "rewards/exec_out_step_reward": 0.9632626511156559, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.4958450337871909, "rewards/sql_step_keywords_recall_reward": 0.6951102269813418, "step": 124 }, { "clip_ratio": 0.0, "completion_length": 191.966796875, "epoch": 1.1355932203389831, "grad_norm": 0.1942368596792221, "kl": 0.034610748291015625, "learning_rate": 1.4286271262643866e-06, "loss": 0.011, "num_tokens": 49493892.0, "reward": 6.56321893632412, "reward_std": 1.2955252706306055, "rewards/accuracy_reward": 0.51171875, "rewards/exec_out_all_reward": 0.888671875, "rewards/exec_out_step_reward": 0.9716486856341362, "rewards/format_reward": 0.95703125, "rewards/keywords_iou_reward": 0.5003192345611751, "rewards/sql_step_keywords_recall_reward": 0.6983536276966333, "step": 125 }, { "clip_ratio": 0.0, "completion_length": 192.224609375, "epoch": 1.144632768361582, "grad_norm": 0.20695985853672028, "kl": 0.03424835205078125, "learning_rate": 1.4048641205151533e-06, "loss": -0.0047, "num_tokens": 49996803.0, "reward": 6.204301163554192, "reward_std": 1.1253142580389977, "rewards/accuracy_reward": 0.451171875, "rewards/exec_out_all_reward": 0.8984375, "rewards/exec_out_step_reward": 0.9757238961756229, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.4387336834333837, "rewards/sql_step_keywords_recall_reward": 0.7007192308083177, "step": 126 }, { "clip_ratio": 0.0, "completion_length": 194.66015625, "epoch": 1.1536723163841809, "grad_norm": 0.19700393080711365, "kl": 0.03594970703125, "learning_rate": 1.3811250647148171e-06, "loss": 0.0124, "num_tokens": 50504301.0, "reward": 6.500779703259468, "reward_std": 1.2046514563262463, "rewards/accuracy_reward": 0.517578125, "rewards/exec_out_all_reward": 0.859375, "rewards/exec_out_step_reward": 0.9675091523677111, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.4829273517243564, "rewards/sql_step_keywords_recall_reward": 0.7021815236657858, "step": 127 }, { "clip_ratio": 0.0, "completion_length": 190.998046875, "epoch": 1.1627118644067798, "grad_norm": 0.19084982573986053, "kl": 0.034423828125, "learning_rate": 1.3574159350437264e-06, "loss": 0.003, "num_tokens": 51006412.0, "reward": 6.172577649354935, "reward_std": 1.3462738115340471, "rewards/accuracy_reward": 0.458984375, "rewards/exec_out_all_reward": 0.869140625, "rewards/exec_out_step_reward": 0.9645081553608179, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4471881305798888, "rewards/sql_step_keywords_recall_reward": 0.6691619791090488, "step": 128 }, { "clip_ratio": 0.0, "completion_length": 197.43359375, "epoch": 1.1717514124293786, "grad_norm": 0.1826663464307785, "kl": 0.035003662109375, "learning_rate": 1.3337427001484835e-06, "loss": 0.0024, "num_tokens": 51513734.0, "reward": 6.046234875917435, "reward_std": 1.4189167954027653, "rewards/accuracy_reward": 0.44140625, "rewards/exec_out_all_reward": 0.86328125, "rewards/exec_out_step_reward": 0.9655668716877699, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.42813110165297985, "rewards/sql_step_keywords_recall_reward": 0.656046318821609, "step": 129 }, { "clip_ratio": 0.0, "completion_length": 196.70703125, "epoch": 1.1807909604519775, "grad_norm": 0.1818644106388092, "kl": 0.033786773681640625, "learning_rate": 1.3101113196393759e-06, "loss": 0.0028, "num_tokens": 52020596.0, "reward": 5.911285370588303, "reward_std": 1.1000383193604648, "rewards/accuracy_reward": 0.408203125, "rewards/exec_out_all_reward": 0.85546875, "rewards/exec_out_step_reward": 0.9638741612434387, "rewards/format_reward": 0.95703125, "rewards/keywords_iou_reward": 0.4286212190054357, "rewards/sql_step_keywords_recall_reward": 0.6448562629520893, "step": 130 }, { "clip_ratio": 0.0, "completion_length": 192.52734375, "epoch": 1.1898305084745764, "grad_norm": 0.19093115627765656, "kl": 0.0337066650390625, "learning_rate": 1.2865277425900725e-06, "loss": 0.0096, "num_tokens": 52524122.0, "reward": 6.245767995715141, "reward_std": 1.3251913916319609, "rewards/accuracy_reward": 0.4921875, "rewards/exec_out_all_reward": 0.8671875, "rewards/exec_out_step_reward": 0.966382997110486, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.4250256856903434, "rewards/sql_step_keywords_recall_reward": 0.6480836141854525, "step": 131 }, { "clip_ratio": 0.0, "completion_length": 190.916015625, "epoch": 1.1988700564971753, "grad_norm": 0.19361349940299988, "kl": 0.033050537109375, "learning_rate": 1.2629979060399751e-06, "loss": -0.0008, "num_tokens": 53027339.0, "reward": 6.7118589878082275, "reward_std": 1.4029255080968142, "rewards/accuracy_reward": 0.560546875, "rewards/exec_out_all_reward": 0.9140625, "rewards/exec_out_step_reward": 0.9767283629626036, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.477216730825603, "rewards/sql_step_keywords_recall_reward": 0.6849940754473209, "step": 132 }, { "clip_ratio": 0.0, "completion_length": 189.78125, "epoch": 1.207909604519774, "grad_norm": 0.19163502752780914, "kl": 0.033824920654296875, "learning_rate": 1.2395277334996047e-06, "loss": -0.002, "num_tokens": 53530615.0, "reward": 6.5633436143398285, "reward_std": 1.354396466165781, "rewards/accuracy_reward": 0.529296875, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.969381669536233, "rewards/format_reward": 0.953125, "rewards/keywords_iou_reward": 0.4778866241686046, "rewards/sql_step_keywords_recall_reward": 0.6909231022000313, "step": 133 }, { "clip_ratio": 0.0, "completion_length": 188.806640625, "epoch": 1.2169491525423728, "grad_norm": 0.19524553418159485, "kl": 0.0322418212890625, "learning_rate": 1.2161231334593852e-06, "loss": 0.0042, "num_tokens": 54033328.0, "reward": 6.4206047505140305, "reward_std": 1.3125502597540617, "rewards/accuracy_reward": 0.494140625, "rewards/exec_out_all_reward": 0.8984375, "rewards/exec_out_step_reward": 0.9737436473369598, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.47170518431812525, "rewards/sql_step_keywords_recall_reward": 0.6909506395459175, "step": 134 }, { "clip_ratio": 0.0, "completion_length": 189.267578125, "epoch": 1.2259887005649717, "grad_norm": 0.18766768276691437, "kl": 0.034023284912109375, "learning_rate": 1.1927899979022142e-06, "loss": -0.0056, "num_tokens": 54535337.0, "reward": 6.526236951351166, "reward_std": 1.2283624270930886, "rewards/accuracy_reward": 0.515625, "rewards/exec_out_all_reward": 0.865234375, "rewards/exec_out_step_reward": 0.9680036306381226, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.5038230996578932, "rewards/sql_step_keywords_recall_reward": 0.6794933304190636, "step": 135 }, { "clip_ratio": 0.0, "completion_length": 192.39453125, "epoch": 1.2350282485875705, "grad_norm": 0.19446401298046112, "kl": 0.0313568115234375, "learning_rate": 1.169534200820189e-06, "loss": 0.0045, "num_tokens": 55037983.0, "reward": 6.247622415423393, "reward_std": 1.3995716699864715, "rewards/accuracy_reward": 0.443359375, "rewards/exec_out_all_reward": 0.85546875, "rewards/exec_out_step_reward": 0.962557353079319, "rewards/format_reward": 0.94921875, "rewards/keywords_iou_reward": 0.4969401224516332, "rewards/sql_step_keywords_recall_reward": 0.7130598044022918, "step": 136 }, { "clip_ratio": 0.0, "completion_length": 188.71484375, "epoch": 1.2440677966101694, "grad_norm": 0.1841951161623001, "kl": 0.03244781494140625, "learning_rate": 1.146361596735859e-06, "loss": 0.0073, "num_tokens": 55542381.0, "reward": 6.34030369669199, "reward_std": 1.3015205739066005, "rewards/accuracy_reward": 0.48046875, "rewards/exec_out_all_reward": 0.8984375, "rewards/exec_out_step_reward": 0.9747984893620014, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.46549498522654176, "rewards/sql_step_keywords_recall_reward": 0.6747496416792274, "step": 137 }, { "clip_ratio": 0.0, "completion_length": 190.142578125, "epoch": 1.2531073446327683, "grad_norm": 0.19216689467430115, "kl": 0.030513763427734375, "learning_rate": 1.1232780192283814e-06, "loss": 0.0089, "num_tokens": 56045178.0, "reward": 6.406587705016136, "reward_std": 1.128734229831025, "rewards/accuracy_reward": 0.486328125, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9704783633351326, "rewards/format_reward": 0.96484375, "rewards/keywords_iou_reward": 0.4696982908062637, "rewards/sql_step_keywords_recall_reward": 0.6998377349227667, "step": 138 }, { "clip_ratio": 0.0, "completion_length": 186.853515625, "epoch": 1.2621468926553672, "grad_norm": 0.19616030156612396, "kl": 0.032970428466796875, "learning_rate": 1.1002892794649477e-06, "loss": 0.0007, "num_tokens": 56547795.0, "reward": 6.060941353440285, "reward_std": 1.3276239773258567, "rewards/accuracy_reward": 0.451171875, "rewards/exec_out_all_reward": 0.853515625, "rewards/exec_out_step_reward": 0.9571653380990028, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.4333818582817912, "rewards/sql_step_keywords_recall_reward": 0.6471685189753771, "step": 139 }, { "clip_ratio": 0.0, "completion_length": 185.00390625, "epoch": 1.271186440677966, "grad_norm": 0.18865719437599182, "kl": 0.030315399169921875, "learning_rate": 1.0774011647378555e-06, "loss": 0.0, "num_tokens": 57049073.0, "reward": 6.1720483005046844, "reward_std": 1.3698342852294445, "rewards/accuracy_reward": 0.45703125, "rewards/exec_out_all_reward": 0.83984375, "rewards/exec_out_step_reward": 0.9541589226573706, "rewards/format_reward": 0.955078125, "rewards/keywords_iou_reward": 0.46421836549416184, "rewards/sql_step_keywords_recall_reward": 0.6664058230817318, "step": 140 }, { "clip_ratio": 0.0, "completion_length": 185.212890625, "epoch": 1.280225988700565, "grad_norm": 0.19604600965976715, "kl": 0.03199005126953125, "learning_rate": 1.0546194370075883e-06, "loss": -0.0021, "num_tokens": 57548486.0, "reward": 6.491792589426041, "reward_std": 1.2623751778155565, "rewards/accuracy_reward": 0.501953125, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9714409783482552, "rewards/format_reward": 0.953125, "rewards/keywords_iou_reward": 0.5014809351414442, "rewards/sql_step_keywords_recall_reward": 0.6736397361382842, "step": 141 }, { "clip_ratio": 0.0, "completion_length": 183.6796875, "epoch": 1.2892655367231638, "grad_norm": 0.1924704760313034, "kl": 0.03244781494140625, "learning_rate": 1.0319498314522695e-06, "loss": 0.0019, "num_tokens": 58047710.0, "reward": 6.139053791761398, "reward_std": 1.1230434579774737, "rewards/accuracy_reward": 0.4453125, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9705496653914452, "rewards/format_reward": 0.9296875, "rewards/keywords_iou_reward": 0.4448565673374105, "rewards/sql_step_keywords_recall_reward": 0.68504096288234, "step": 142 }, { "clip_ratio": 0.0, "completion_length": 188.365234375, "epoch": 1.2983050847457627, "grad_norm": 0.1963050365447998, "kl": 0.03145599365234375, "learning_rate": 1.0093980550238675e-06, "loss": 0.0036, "num_tokens": 58549501.0, "reward": 6.090264290571213, "reward_std": 1.4014679677784443, "rewards/accuracy_reward": 0.431640625, "rewards/exec_out_all_reward": 0.857421875, "rewards/exec_out_step_reward": 0.9621853325515985, "rewards/format_reward": 0.916015625, "rewards/keywords_iou_reward": 0.47523164842277765, "rewards/sql_step_keywords_recall_reward": 0.6776156453415751, "step": 143 }, { "clip_ratio": 0.0, "completion_length": 181.072265625, "epoch": 1.3073446327683615, "grad_norm": 0.2053123414516449, "kl": 0.031829833984375, "learning_rate": 9.86969785011497e-07, "loss": 0.0054, "num_tokens": 59046626.0, "reward": 6.3448584377765656, "reward_std": 1.176757472101599, "rewards/accuracy_reward": 0.458984375, "rewards/exec_out_all_reward": 0.884765625, "rewards/exec_out_step_reward": 0.9675664994865656, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.49913227930665016, "rewards/sql_step_keywords_recall_reward": 0.7110585309565067, "step": 144 }, { "clip_ratio": 0.0, "completion_length": 185.021484375, "epoch": 1.3163841807909604, "grad_norm": 0.19113275408744812, "kl": 0.031230926513671875, "learning_rate": 9.646706676121923e-07, "loss": -0.0098, "num_tokens": 59546101.0, "reward": 6.435375913977623, "reward_std": 1.407385234721005, "rewards/accuracy_reward": 0.4921875, "rewards/exec_out_all_reward": 0.880859375, "rewards/exec_out_step_reward": 0.9711650554090738, "rewards/format_reward": 0.953125, "rewards/keywords_iou_reward": 0.48695238353684545, "rewards/sql_step_keywords_recall_reward": 0.6875717546790838, "step": 145 }, { "clip_ratio": 0.0, "completion_length": 182.16015625, "epoch": 1.3254237288135593, "grad_norm": 0.2129882425069809, "kl": 0.03119659423828125, "learning_rate": 9.425063165095089e-07, "loss": -0.0039, "num_tokens": 60046491.0, "reward": 6.0068028047680855, "reward_std": 1.2135074082762003, "rewards/accuracy_reward": 0.4140625, "rewards/exec_out_all_reward": 0.87890625, "rewards/exec_out_step_reward": 0.9671890567988157, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.44391668657772243, "rewards/sql_step_keywords_recall_reward": 0.6752177719026804, "step": 146 }, { "clip_ratio": 0.0, "completion_length": 183.91796875, "epoch": 1.3344632768361582, "grad_norm": 0.21050292253494263, "kl": 0.032924652099609375, "learning_rate": 9.204823114603069e-07, "loss": 0.0047, "num_tokens": 60546385.0, "reward": 6.227329030632973, "reward_std": 1.179283824749291, "rewards/accuracy_reward": 0.458984375, "rewards/exec_out_all_reward": 0.8515625, "rewards/exec_out_step_reward": 0.9637028854340315, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4683625341858715, "rewards/sql_step_keywords_recall_reward": 0.6999479737132788, "step": 147 }, { "clip_ratio": 0.0, "completion_length": 185.40234375, "epoch": 1.343502824858757, "grad_norm": 0.21124009788036346, "kl": 0.032260894775390625, "learning_rate": 8.986041968900797e-07, "loss": 0.0088, "num_tokens": 61049115.0, "reward": 6.444768786430359, "reward_std": 1.4197536138817668, "rewards/accuracy_reward": 0.50390625, "rewards/exec_out_all_reward": 0.884765625, "rewards/exec_out_step_reward": 0.9735870882868767, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.4634511903859675, "rewards/sql_step_keywords_recall_reward": 0.7102948874235153, "step": 148 }, { "clip_ratio": 0.0, "completion_length": 186.373046875, "epoch": 1.352542372881356, "grad_norm": 0.21280112862586975, "kl": 0.02997589111328125, "learning_rate": 8.768774804971705e-07, "loss": 0.0098, "num_tokens": 61551534.0, "reward": 6.14260359108448, "reward_std": 1.3231439045630395, "rewards/accuracy_reward": 0.443359375, "rewards/exec_out_all_reward": 0.849609375, "rewards/exec_out_step_reward": 0.9615505710244179, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.46615127846598625, "rewards/sql_step_keywords_recall_reward": 0.6784378979355097, "step": 149 }, { "clip_ratio": 0.0, "completion_length": 187.35546875, "epoch": 1.3615819209039548, "grad_norm": 0.20350147783756256, "kl": 0.031444549560546875, "learning_rate": 8.553076318662425e-07, "loss": 0.0024, "num_tokens": 62054616.0, "reward": 5.876114495098591, "reward_std": 1.1356665641069412, "rewards/accuracy_reward": 0.373046875, "rewards/exec_out_all_reward": 0.857421875, "rewards/exec_out_step_reward": 0.9639787971973419, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.4735513115301728, "rewards/sql_step_keywords_recall_reward": 0.6779237259179354, "step": 150 }, { "clip_ratio": 0.0, "completion_length": 182.556640625, "epoch": 1.3706214689265537, "grad_norm": 0.18335995078086853, "kl": 0.030361175537109375, "learning_rate": 8.339000810913388e-07, "loss": -0.0031, "num_tokens": 62553909.0, "reward": 6.188477337360382, "reward_std": 1.1688060224987566, "rewards/accuracy_reward": 0.466796875, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9724152106791735, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.421648170100525, "rewards/sql_step_keywords_recall_reward": 0.6715939035639167, "step": 151 }, { "clip_ratio": 0.0, "completion_length": 178.1953125, "epoch": 1.3796610169491526, "grad_norm": 0.18019497394561768, "kl": 0.032196044921875, "learning_rate": 8.126602174088844e-07, "loss": -0.0063, "num_tokens": 63051989.0, "reward": 6.597941100597382, "reward_std": 1.2893404318019748, "rewards/accuracy_reward": 0.533203125, "rewards/exec_out_all_reward": 0.890625, "rewards/exec_out_step_reward": 0.9766291547566652, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.4863300649449229, "rewards/sql_step_keywords_recall_reward": 0.6838080808520317, "step": 152 }, { "clip_ratio": 0.0, "completion_length": 181.888671875, "epoch": 1.3887005649717514, "grad_norm": 0.20189358294010162, "kl": 0.031101226806640625, "learning_rate": 7.915933878409761e-07, "loss": -0.0082, "num_tokens": 63552568.0, "reward": 6.299270272254944, "reward_std": 1.1446837144903839, "rewards/accuracy_reward": 0.484375, "rewards/exec_out_all_reward": 0.873046875, "rewards/exec_out_step_reward": 0.9710131492465734, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.45233285753056407, "rewards/sql_step_keywords_recall_reward": 0.6716383351013064, "step": 153 }, { "clip_ratio": 0.0, "completion_length": 179.68359375, "epoch": 1.3977401129943503, "grad_norm": 0.1866413652896881, "kl": 0.03018951416015625, "learning_rate": 7.707048958492972e-07, "loss": 0.0052, "num_tokens": 64051946.0, "reward": 6.494629591703415, "reward_std": 1.1752266022376716, "rewards/accuracy_reward": 0.50390625, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9707899298518896, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.4983974387869239, "rewards/sql_step_keywords_recall_reward": 0.6989197302609682, "step": 154 }, { "clip_ratio": 0.0, "completion_length": 179.88671875, "epoch": 1.4067796610169492, "grad_norm": 0.18069399893283844, "kl": 0.031505584716796875, "learning_rate": 7.500000000000003e-07, "loss": 0.0009, "num_tokens": 64549212.0, "reward": 6.292890816926956, "reward_std": 1.408079206943512, "rewards/accuracy_reward": 0.4921875, "rewards/exec_out_all_reward": 0.859375, "rewards/exec_out_step_reward": 0.9635618217289448, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.44063833844847977, "rewards/sql_step_keywords_recall_reward": 0.6863335473462939, "step": 155 }, { "clip_ratio": 0.0, "completion_length": 180.708984375, "epoch": 1.415819209039548, "grad_norm": 0.19954738020896912, "kl": 0.03003692626953125, "learning_rate": 7.294839126398909e-07, "loss": 0.0072, "num_tokens": 65046991.0, "reward": 6.40887725353241, "reward_std": 1.1249554408714175, "rewards/accuracy_reward": 0.482421875, "rewards/exec_out_all_reward": 0.90234375, "rewards/exec_out_step_reward": 0.9757773783057928, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.48421067791059613, "rewards/sql_step_keywords_recall_reward": 0.6892878729850054, "step": 156 }, { "clip_ratio": 0.0, "completion_length": 180.326171875, "epoch": 1.424858757062147, "grad_norm": 0.1969391107559204, "kl": 0.0305023193359375, "learning_rate": 7.091617985842463e-07, "loss": 0.0018, "num_tokens": 65544562.0, "reward": 6.413661152124405, "reward_std": 1.2689514786470681, "rewards/accuracy_reward": 0.478515625, "rewards/exec_out_all_reward": 0.890625, "rewards/exec_out_step_reward": 0.9725539479404688, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.49607263831421733, "rewards/sql_step_keywords_recall_reward": 0.693102465942502, "step": 157 }, { "clip_ratio": 0.0, "completion_length": 178.669921875, "epoch": 1.4338983050847458, "grad_norm": 0.19801102578639984, "kl": 0.03037261962890625, "learning_rate": 6.890387738166042e-07, "loss": -0.0004, "num_tokens": 66041689.0, "reward": 6.508177891373634, "reward_std": 1.3759017111733556, "rewards/accuracy_reward": 0.5234375, "rewards/exec_out_all_reward": 0.923828125, "rewards/exec_out_step_reward": 0.9815886970609426, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.4463528748601675, "rewards/sql_step_keywords_recall_reward": 0.6709927897900343, "step": 158 }, { "clip_ratio": 0.0, "completion_length": 178.77734375, "epoch": 1.4429378531073447, "grad_norm": 0.19369132816791534, "kl": 0.0314788818359375, "learning_rate": 6.691199042008347e-07, "loss": 0.003, "num_tokens": 66538115.0, "reward": 6.160938322544098, "reward_std": 1.2979237555991858, "rewards/accuracy_reward": 0.46875, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9724400117993355, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.412393850274384, "rewards/sql_step_keywords_recall_reward": 0.6566793192178011, "step": 159 }, { "clip_ratio": 0.0, "completion_length": 176.439453125, "epoch": 1.4519774011299436, "grad_norm": 0.1850394755601883, "kl": 0.02997589111328125, "learning_rate": 6.494102042058441e-07, "loss": 0.002, "num_tokens": 67035676.0, "reward": 6.159577623009682, "reward_std": 1.2864303840324283, "rewards/accuracy_reward": 0.462890625, "rewards/exec_out_all_reward": 0.873046875, "rewards/exec_out_step_reward": 0.9678982235491276, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.4201263478025794, "rewards/sql_step_keywords_recall_reward": 0.6854111216962337, "step": 160 }, { "clip_ratio": 0.0, "completion_length": 180.287109375, "epoch": 1.4610169491525424, "grad_norm": 0.20010262727737427, "kl": 0.030330657958984375, "learning_rate": 6.29914635643203e-07, "loss": -0.0048, "num_tokens": 67531747.0, "reward": 6.243592485785484, "reward_std": 1.3317115511745214, "rewards/accuracy_reward": 0.455078125, "rewards/exec_out_all_reward": 0.888671875, "rewards/exec_out_step_reward": 0.9729833193123341, "rewards/format_reward": 0.966796875, "rewards/keywords_iou_reward": 0.46371736377477646, "rewards/sql_step_keywords_recall_reward": 0.6673932354897261, "step": 161 }, { "clip_ratio": 0.0, "completion_length": 181.583984375, "epoch": 1.4700564971751413, "grad_norm": 0.19057103991508484, "kl": 0.032482147216796875, "learning_rate": 6.106381064180395e-07, "loss": 0.0051, "num_tokens": 68029614.0, "reward": 6.253777638077736, "reward_std": 1.371273732278496, "rewards/accuracy_reward": 0.478515625, "rewards/exec_out_all_reward": 0.87890625, "rewards/exec_out_step_reward": 0.9656637534499168, "rewards/format_reward": 0.916015625, "rewards/keywords_iou_reward": 0.450145754031837, "rewards/sql_step_keywords_recall_reward": 0.6788380099460483, "step": 162 }, { "clip_ratio": 0.0, "completion_length": 181.328125, "epoch": 1.4790960451977402, "grad_norm": 0.1943560391664505, "kl": 0.02936553955078125, "learning_rate": 5.915854692935003e-07, "loss": 0.0001, "num_tokens": 68527730.0, "reward": 6.239083915948868, "reward_std": 1.3497515600174665, "rewards/accuracy_reward": 0.48046875, "rewards/exec_out_all_reward": 0.84375, "rewards/exec_out_step_reward": 0.9633672833442688, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.45536057371646166, "rewards/sql_step_keywords_recall_reward": 0.6638235626742244, "step": 163 }, { "clip_ratio": 0.0, "completion_length": 177.58203125, "epoch": 1.488135593220339, "grad_norm": 0.19292642176151276, "kl": 0.029979705810546875, "learning_rate": 5.727615206690921e-07, "loss": 0.0093, "num_tokens": 69024424.0, "reward": 6.809370994567871, "reward_std": 1.231651745736599, "rewards/accuracy_reward": 0.5703125, "rewards/exec_out_all_reward": 0.880859375, "rewards/exec_out_step_reward": 0.9722276534885168, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.4978084210306406, "rewards/sql_step_keywords_recall_reward": 0.7380108721554279, "step": 164 }, { "clip_ratio": 0.0, "completion_length": 176.873046875, "epoch": 1.497175141242938, "grad_norm": 0.19874171912670135, "kl": 0.02878570556640625, "learning_rate": 5.541709993732168e-07, "loss": 0.0052, "num_tokens": 69519951.0, "reward": 6.364575162529945, "reward_std": 1.2380520347505808, "rewards/accuracy_reward": 0.482421875, "rewards/exec_out_all_reward": 0.912109375, "rewards/exec_out_step_reward": 0.9779986720532179, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.4500721860677004, "rewards/sql_step_keywords_recall_reward": 0.6973696993663907, "step": 165 }, { "clip_ratio": 0.0, "completion_length": 182.763671875, "epoch": 1.5062146892655366, "grad_norm": 0.19222836196422577, "kl": 0.031803131103515625, "learning_rate": 5.358185854701909e-07, "loss": 0.016, "num_tokens": 70020190.0, "reward": 5.9986598044633865, "reward_std": 1.2878384962677956, "rewards/accuracy_reward": 0.392578125, "rewards/exec_out_all_reward": 0.90234375, "rewards/exec_out_step_reward": 0.9768469464033842, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.46051234856713563, "rewards/sql_step_keywords_recall_reward": 0.6847725082188845, "step": 166 }, { "clip_ratio": 0.0, "completion_length": 179.427734375, "epoch": 1.5152542372881355, "grad_norm": 0.18806356191635132, "kl": 0.02924346923828125, "learning_rate": 5.177088990820725e-07, "loss": 0.0156, "num_tokens": 70519589.0, "reward": 6.586422994732857, "reward_std": 1.445882560685277, "rewards/accuracy_reward": 0.525390625, "rewards/exec_out_all_reward": 0.873046875, "rewards/exec_out_step_reward": 0.9663845468312502, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.48613627161830664, "rewards/sql_step_keywords_recall_reward": 0.7317502833902836, "step": 167 }, { "clip_ratio": 0.0, "completion_length": 182.669921875, "epoch": 1.5242937853107343, "grad_norm": 0.19503186643123627, "kl": 0.027843475341796875, "learning_rate": 4.998464992255627e-07, "loss": -0.0017, "num_tokens": 71019964.0, "reward": 5.967584699392319, "reward_std": 1.1278308150358498, "rewards/accuracy_reward": 0.404296875, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9704303070902824, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4382792445831001, "rewards/sql_step_keywords_recall_reward": 0.6870021214708686, "step": 168 }, { "clip_ratio": 0.0, "completion_length": 179.84375, "epoch": 1.5333333333333332, "grad_norm": 0.18861566483974457, "kl": 0.027347564697265625, "learning_rate": 4.82235882664302e-07, "loss": -0.0001, "num_tokens": 71518200.0, "reward": 6.40145568549633, "reward_std": 1.264804814942181, "rewards/accuracy_reward": 0.486328125, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9703760556876659, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.47830750420689583, "rewards/sql_step_keywords_recall_reward": 0.7127459226176143, "step": 169 }, { "clip_ratio": 0.0, "completion_length": 179.998046875, "epoch": 1.542372881355932, "grad_norm": 0.19780074059963226, "kl": 0.027751922607421875, "learning_rate": 4.648814827768323e-07, "loss": 0.0012, "num_tokens": 72016087.0, "reward": 6.078360304236412, "reward_std": 1.233950492925942, "rewards/accuracy_reward": 0.4296875, "rewards/exec_out_all_reward": 0.869140625, "rewards/exec_out_step_reward": 0.9696281347423792, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.45836541755124927, "rewards/sql_step_keywords_recall_reward": 0.6568450266495347, "step": 170 }, { "clip_ratio": 0.0, "completion_length": 184.81640625, "epoch": 1.551412429378531, "grad_norm": 0.20185421407222748, "kl": 0.028972625732421875, "learning_rate": 4.4778766844051793e-07, "loss": -0.0002, "num_tokens": 72515641.0, "reward": 6.029541537165642, "reward_std": 1.3070494611747563, "rewards/accuracy_reward": 0.416015625, "rewards/exec_out_all_reward": 0.85546875, "rewards/exec_out_step_reward": 0.9650747179985046, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.4725735238753259, "rewards/sql_step_keywords_recall_reward": 0.6486166473478079, "step": 171 }, { "clip_ratio": 0.0, "completion_length": 180.265625, "epoch": 1.5604519774011298, "grad_norm": 0.18522052466869354, "kl": 0.028156280517578125, "learning_rate": 4.309587429317061e-07, "loss": -0.0069, "num_tokens": 73014473.0, "reward": 6.007387965917587, "reward_std": 1.1997167933732271, "rewards/accuracy_reward": 0.4140625, "rewards/exec_out_all_reward": 0.857421875, "rewards/exec_out_step_reward": 0.9654715433716774, "rewards/format_reward": 0.955078125, "rewards/keywords_iou_reward": 0.441091364948079, "rewards/sql_step_keywords_recall_reward": 0.690983671694994, "step": 172 }, { "clip_ratio": 0.0, "completion_length": 186.296875, "epoch": 1.5694915254237287, "grad_norm": 0.18709787726402283, "kl": 0.02730560302734375, "learning_rate": 4.1439894284239473e-07, "loss": 0.0048, "num_tokens": 73516757.0, "reward": 5.854448825120926, "reward_std": 1.0955259250476956, "rewards/accuracy_reward": 0.396484375, "rewards/exec_out_all_reward": 0.86328125, "rewards/exec_out_step_reward": 0.9658908490091562, "rewards/format_reward": 0.919921875, "rewards/keywords_iou_reward": 0.419542781310156, "rewards/sql_step_keywords_recall_reward": 0.6803318522870541, "step": 173 }, { "clip_ratio": 0.0, "completion_length": 184.041015625, "epoch": 1.5785310734463276, "grad_norm": 0.18421481549739838, "kl": 0.02740478515625, "learning_rate": 3.981124370137002e-07, "loss": 0.0015, "num_tokens": 74016222.0, "reward": 6.491113051772118, "reward_std": 1.2589517189189792, "rewards/accuracy_reward": 0.478515625, "rewards/exec_out_all_reward": 0.87890625, "rewards/exec_out_step_reward": 0.969364620745182, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.5320240047294647, "rewards/sql_step_keywords_recall_reward": 0.7194192241877317, "step": 174 }, { "clip_ratio": 0.0, "completion_length": 185.123046875, "epoch": 1.5875706214689265, "grad_norm": 0.20407724380493164, "kl": 0.027408599853515625, "learning_rate": 3.82103325486368e-07, "loss": 0.0167, "num_tokens": 74517801.0, "reward": 6.3000208735466, "reward_std": 1.357117084786296, "rewards/accuracy_reward": 0.4765625, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9679687526077032, "rewards/format_reward": 0.94921875, "rewards/keywords_iou_reward": 0.44746885914355516, "rewards/sql_step_keywords_recall_reward": 0.704692529514432, "step": 175 }, { "clip_ratio": 0.0, "completion_length": 180.05078125, "epoch": 1.5966101694915253, "grad_norm": 0.19040866196155548, "kl": 0.02652740478515625, "learning_rate": 3.6637563846861275e-07, "loss": -0.0012, "num_tokens": 75013259.0, "reward": 6.4401615858078, "reward_std": 1.168332906672731, "rewards/accuracy_reward": 0.505859375, "rewards/exec_out_all_reward": 0.890625, "rewards/exec_out_step_reward": 0.974679134786129, "rewards/format_reward": 0.9609375, "rewards/keywords_iou_reward": 0.43378148321062326, "rewards/sql_step_keywords_recall_reward": 0.7229194287210703, "step": 176 }, { "clip_ratio": 0.0, "completion_length": 183.84375, "epoch": 1.6056497175141242, "grad_norm": 0.19518616795539856, "kl": 0.027690887451171875, "learning_rate": 3.5093333532153313e-07, "loss": 0.0007, "num_tokens": 75511563.0, "reward": 6.2283158749341965, "reward_std": 1.2062615705654025, "rewards/accuracy_reward": 0.466796875, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9733979757875204, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.4363416051492095, "rewards/sql_step_keywords_recall_reward": 0.698640950024128, "step": 177 }, { "clip_ratio": 0.0, "completion_length": 180.58203125, "epoch": 1.614689265536723, "grad_norm": 0.19140039384365082, "kl": 0.03049468994140625, "learning_rate": 3.357803035623646e-07, "loss": 0.0114, "num_tokens": 76009449.0, "reward": 6.212793804705143, "reward_std": 1.3535035271197557, "rewards/accuracy_reward": 0.4296875, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9737645741552114, "rewards/format_reward": 0.94921875, "rewards/keywords_iou_reward": 0.502739230170846, "rewards/sql_step_keywords_recall_reward": 0.6788633242249489, "step": 178 }, { "clip_ratio": 0.0, "completion_length": 184.478515625, "epoch": 1.623728813559322, "grad_norm": 0.20329172909259796, "kl": 0.02678680419921875, "learning_rate": 3.209203578858191e-07, "loss": 0.0015, "num_tokens": 76512090.0, "reward": 5.99811252951622, "reward_std": 1.3144248933531344, "rewards/accuracy_reward": 0.427734375, "rewards/exec_out_all_reward": 0.890625, "rewards/exec_out_step_reward": 0.9720734115689993, "rewards/format_reward": 0.90625, "rewards/keywords_iou_reward": 0.43393061752431095, "rewards/sql_step_keywords_recall_reward": 0.6503653433173895, "step": 179 }, { "clip_ratio": 0.0, "completion_length": 182.916015625, "epoch": 1.6327683615819208, "grad_norm": 0.20274612307548523, "kl": 0.027004241943359375, "learning_rate": 3.063572392037517e-07, "loss": -0.0061, "num_tokens": 77010299.0, "reward": 6.448321744799614, "reward_std": 1.2834087014198303, "rewards/accuracy_reward": 0.490234375, "rewards/exec_out_all_reward": 0.888671875, "rewards/exec_out_step_reward": 0.9712286107242107, "rewards/format_reward": 0.9609375, "rewards/keywords_iou_reward": 0.4910502852872014, "rewards/sql_step_keywords_recall_reward": 0.6844456251710653, "step": 180 }, { "clip_ratio": 0.0, "completion_length": 178.615234375, "epoch": 1.6418079096045197, "grad_norm": 0.20460090041160583, "kl": 0.02655029296875, "learning_rate": 2.920946137034121e-07, "loss": 0.0016, "num_tokens": 77506942.0, "reward": 6.275775626301765, "reward_std": 1.3149840263649821, "rewards/accuracy_reward": 0.458984375, "rewards/exec_out_all_reward": 0.89453125, "rewards/exec_out_step_reward": 0.9711751285940409, "rewards/format_reward": 0.92578125, "rewards/keywords_iou_reward": 0.4703782368451357, "rewards/sql_step_keywords_recall_reward": 0.7075940538197756, "step": 181 }, { "clip_ratio": 0.0, "completion_length": 181.2421875, "epoch": 1.6508474576271186, "grad_norm": 0.1951521635055542, "kl": 0.02664947509765625, "learning_rate": 2.781360719244964e-07, "loss": 0.0085, "num_tokens": 78006654.0, "reward": 6.269969627261162, "reward_std": 1.5092957746237516, "rewards/accuracy_reward": 0.482421875, "rewards/exec_out_all_reward": 0.8671875, "rewards/exec_out_step_reward": 0.9697536900639534, "rewards/format_reward": 0.923828125, "rewards/keywords_iou_reward": 0.43865267653018236, "rewards/sql_step_keywords_recall_reward": 0.7022074311971664, "step": 182 }, { "clip_ratio": 0.0, "completion_length": 181.6171875, "epoch": 1.6598870056497175, "grad_norm": 0.18546244502067566, "kl": 0.02535247802734375, "learning_rate": 2.64485127855251e-07, "loss": 0.0072, "num_tokens": 78504702.0, "reward": 6.320208579301834, "reward_std": 1.431410001590848, "rewards/accuracy_reward": 0.474609375, "rewards/exec_out_all_reward": 0.859375, "rewards/exec_out_step_reward": 0.9669557642191648, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.4761747941374779, "rewards/sql_step_keywords_recall_reward": 0.7055906923487782, "step": 183 }, { "clip_ratio": 0.0, "completion_length": 179.552734375, "epoch": 1.6689265536723163, "grad_norm": 0.20000162720680237, "kl": 0.026123046875, "learning_rate": 2.5114521804784305e-07, "loss": -0.0022, "num_tokens": 79001101.0, "reward": 6.44641749560833, "reward_std": 1.3242136964108795, "rewards/accuracy_reward": 0.498046875, "rewards/exec_out_all_reward": 0.904296875, "rewards/exec_out_step_reward": 0.9747899696230888, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.46967238979414105, "rewards/sql_step_keywords_recall_reward": 0.6924389712512493, "step": 184 }, { "clip_ratio": 0.0, "completion_length": 179.234375, "epoch": 1.6779661016949152, "grad_norm": 0.18595600128173828, "kl": 0.026111602783203125, "learning_rate": 2.3811970075322803e-07, "loss": 0.0056, "num_tokens": 79497289.0, "reward": 6.366844519972801, "reward_std": 1.3081835759803653, "rewards/accuracy_reward": 0.490234375, "rewards/exec_out_all_reward": 0.904296875, "rewards/exec_out_step_reward": 0.9748759996145964, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.43892133235931396, "rewards/sql_step_keywords_recall_reward": 0.7094383966177702, "step": 185 }, { "clip_ratio": 0.0, "completion_length": 182.5234375, "epoch": 1.687005649717514, "grad_norm": 0.20412081480026245, "kl": 0.02643585205078125, "learning_rate": 2.254118550757286e-07, "loss": 0.0128, "num_tokens": 79997557.0, "reward": 6.095862299203873, "reward_std": 1.4136508908122778, "rewards/accuracy_reward": 0.453125, "rewards/exec_out_all_reward": 0.837890625, "rewards/exec_out_step_reward": 0.9592912942171097, "rewards/format_reward": 0.9375, "rewards/keywords_iou_reward": 0.44104228960350156, "rewards/sql_step_keywords_recall_reward": 0.6665958110243082, "step": 186 }, { "clip_ratio": 0.0, "completion_length": 185.845703125, "epoch": 1.696045197740113, "grad_norm": 0.19426719844341278, "kl": 0.026729583740234375, "learning_rate": 2.130248801475344e-07, "loss": -0.0023, "num_tokens": 80499130.0, "reward": 6.401883035898209, "reward_std": 1.3080633180215955, "rewards/accuracy_reward": 0.501953125, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9698521215468645, "rewards/format_reward": 0.93359375, "rewards/keywords_iou_reward": 0.45063989935442805, "rewards/sql_step_keywords_recall_reward": 0.7065323041751981, "step": 187 }, { "clip_ratio": 0.0, "completion_length": 184.69921875, "epoch": 1.7050847457627119, "grad_norm": 0.18957343697547913, "kl": 0.027210235595703125, "learning_rate": 2.0096189432334195e-07, "loss": -0.0014, "num_tokens": 80999876.0, "reward": 6.019113600254059, "reward_std": 1.2530112564563751, "rewards/accuracy_reward": 0.423828125, "rewards/exec_out_all_reward": 0.884765625, "rewards/exec_out_step_reward": 0.9723625108599663, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.4438905091956258, "rewards/sql_step_keywords_recall_reward": 0.6433451026678085, "step": 188 }, { "clip_ratio": 0.0, "completion_length": 182.720703125, "epoch": 1.7141242937853107, "grad_norm": 0.19690978527069092, "kl": 0.026309967041015625, "learning_rate": 1.892259343953226e-07, "loss": 0.0181, "num_tokens": 81498693.0, "reward": 6.560971170663834, "reward_std": 1.3874189644120634, "rewards/accuracy_reward": 0.5078125, "rewards/exec_out_all_reward": 0.9296875, "rewards/exec_out_step_reward": 0.9844036791473627, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.47905752109363675, "rewards/sql_step_keywords_recall_reward": 0.7258742917329073, "step": 189 }, { "clip_ratio": 0.0, "completion_length": 182.2578125, "epoch": 1.7231638418079096, "grad_norm": 0.2019815295934677, "kl": 0.02889251708984375, "learning_rate": 1.7781995482862706e-07, "loss": -0.0001, "num_tokens": 81997317.0, "reward": 6.342395722866058, "reward_std": 1.1282042702659965, "rewards/accuracy_reward": 0.4609375, "rewards/exec_out_all_reward": 0.9375, "rewards/exec_out_step_reward": 0.985164001584053, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.46832023840397596, "rewards/sql_step_keywords_recall_reward": 0.6979350317269564, "step": 190 }, { "clip_ratio": 0.0, "completion_length": 182.46875, "epoch": 1.7322033898305085, "grad_norm": 0.19806276261806488, "kl": 0.027133941650390625, "learning_rate": 1.6674682701761496e-07, "loss": 0.0079, "num_tokens": 82498165.0, "reward": 6.360767655074596, "reward_std": 1.2937721209600568, "rewards/accuracy_reward": 0.49609375, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9698614254593849, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.44721768144518137, "rewards/sql_step_keywords_recall_reward": 0.6937365289777517, "step": 191 }, { "clip_ratio": 0.0, "completion_length": 186.044921875, "epoch": 1.7412429378531074, "grad_norm": 0.1942872703075409, "kl": 0.025554656982421875, "learning_rate": 1.5600933856299637e-07, "loss": 0.0038, "num_tokens": 83000064.0, "reward": 6.068183168768883, "reward_std": 1.4636581200174987, "rewards/accuracy_reward": 0.451171875, "rewards/exec_out_all_reward": 0.833984375, "rewards/exec_out_step_reward": 0.9575916156172752, "rewards/format_reward": 0.912109375, "rewards/keywords_iou_reward": 0.44754891796037555, "rewards/sql_step_keywords_recall_reward": 0.6647125380113721, "step": 192 }, { "clip_ratio": 0.0, "completion_length": 184.869140625, "epoch": 1.7502824858757062, "grad_norm": 0.19219300150871277, "kl": 0.0272674560546875, "learning_rate": 1.4561019257006842e-07, "loss": 0.0018, "num_tokens": 83500337.0, "reward": 6.50420406460762, "reward_std": 1.2782420022413135, "rewards/accuracy_reward": 0.53125, "rewards/exec_out_all_reward": 0.857421875, "rewards/exec_out_step_reward": 0.9676587302237749, "rewards/format_reward": 0.927734375, "rewards/keywords_iou_reward": 0.46198446361813694, "rewards/sql_step_keywords_recall_reward": 0.7024201266467571, "step": 193 }, { "clip_ratio": 0.0, "completion_length": 187.498046875, "epoch": 1.759322033898305, "grad_norm": 0.1927022784948349, "kl": 0.02826690673828125, "learning_rate": 1.3555200696822234e-07, "loss": 0.0006, "num_tokens": 84003200.0, "reward": 6.2347564697265625, "reward_std": 1.3537420043721795, "rewards/accuracy_reward": 0.478515625, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9668782595545053, "rewards/format_reward": 0.92578125, "rewards/keywords_iou_reward": 0.43135018879547715, "rewards/sql_step_keywords_recall_reward": 0.6786152720451355, "step": 194 }, { "clip_ratio": 0.0, "completion_length": 181.091796875, "epoch": 1.768361581920904, "grad_norm": 0.1913248747587204, "kl": 0.026073455810546875, "learning_rate": 1.2583731385189562e-07, "loss": -0.0044, "num_tokens": 84499811.0, "reward": 6.492121763527393, "reward_std": 1.319369402481243, "rewards/accuracy_reward": 0.509765625, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9670100193470716, "rewards/format_reward": 0.947265625, "rewards/keywords_iou_reward": 0.47826224053278565, "rewards/sql_step_keywords_recall_reward": 0.7053060494363308, "step": 195 }, { "clip_ratio": 0.0, "completion_length": 182.16015625, "epoch": 1.7774011299435029, "grad_norm": 0.18614481389522552, "kl": 0.02542877197265625, "learning_rate": 1.1646855884312813e-07, "loss": -0.0025, "num_tokens": 84998661.0, "reward": 6.048209026455879, "reward_std": 1.3966924250125885, "rewards/accuracy_reward": 0.44921875, "rewards/exec_out_all_reward": 0.833984375, "rewards/exec_out_step_reward": 0.9590921085327864, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.42350240517407656, "rewards/sql_step_keywords_recall_reward": 0.6717996271327138, "step": 196 }, { "clip_ratio": 0.0, "completion_length": 183.830078125, "epoch": 1.7864406779661017, "grad_norm": 0.20049569010734558, "kl": 0.02552032470703125, "learning_rate": 1.0744810047589116e-07, "loss": 0.0085, "num_tokens": 85499318.0, "reward": 6.379006251692772, "reward_std": 1.3671105708926916, "rewards/accuracy_reward": 0.482421875, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9740528911352158, "rewards/format_reward": 0.921875, "rewards/keywords_iou_reward": 0.48139098659157753, "rewards/sql_step_keywords_recall_reward": 0.7038900572806597, "step": 197 }, { "clip_ratio": 0.0, "completion_length": 186.349609375, "epoch": 1.7954802259887006, "grad_norm": 0.1779472380876541, "kl": 0.02698516845703125, "learning_rate": 9.877820960234002e-08, "loss": -0.0, "num_tokens": 85999137.0, "reward": 6.308243364095688, "reward_std": 1.220099939033389, "rewards/accuracy_reward": 0.46875, "rewards/exec_out_all_reward": 0.888671875, "rewards/exec_out_step_reward": 0.9750612266361713, "rewards/format_reward": 0.958984375, "rewards/keywords_iou_reward": 0.4597327196970582, "rewards/sql_step_keywords_recall_reward": 0.6910604787990451, "step": 198 }, { "clip_ratio": 0.0, "completion_length": 181.806640625, "epoch": 1.8045197740112995, "grad_norm": 0.1846829503774643, "kl": 0.025753021240234375, "learning_rate": 9.046106882113752e-08, "loss": 0.0034, "num_tokens": 86497530.0, "reward": 6.5371609181165695, "reward_std": 1.2568824323825538, "rewards/accuracy_reward": 0.537109375, "rewards/exec_out_all_reward": 0.87109375, "rewards/exec_out_step_reward": 0.9686531201004982, "rewards/format_reward": 0.927734375, "rewards/keywords_iou_reward": 0.4614989855326712, "rewards/sql_step_keywords_recall_reward": 0.698244234547019, "step": 199 }, { "clip_ratio": 0.0, "completion_length": 181.83984375, "epoch": 1.8135593220338984, "grad_norm": 0.2010311335325241, "kl": 0.026294708251953125, "learning_rate": 8.249877192799731e-08, "loss": 0.0117, "num_tokens": 86996804.0, "reward": 6.099536940455437, "reward_std": 1.1783363316208124, "rewards/accuracy_reward": 0.42578125, "rewards/exec_out_all_reward": 0.869140625, "rewards/exec_out_step_reward": 0.9668596535921097, "rewards/format_reward": 0.919921875, "rewards/keywords_iou_reward": 0.4779109531082213, "rewards/sql_step_keywords_recall_reward": 0.6846678359434009, "step": 200 }, { "clip_ratio": 0.0, "completion_length": 176.578125, "epoch": 1.8225988700564972, "grad_norm": 0.19091665744781494, "kl": 0.0277862548828125, "learning_rate": 7.489332338858202e-08, "loss": 0.0051, "num_tokens": 87493076.0, "reward": 6.273200556635857, "reward_std": 1.2643384067341685, "rewards/accuracy_reward": 0.4609375, "rewards/exec_out_all_reward": 0.876953125, "rewards/exec_out_step_reward": 0.9696537107229233, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.474200002849102, "rewards/sql_step_keywords_recall_reward": 0.6930374698713422, "step": 201 }, { "clip_ratio": 0.0, "completion_length": 193.27734375, "epoch": 1.831638418079096, "grad_norm": 0.1949450820684433, "kl": 0.02576446533203125, "learning_rate": 6.76466378338892e-08, "loss": -0.003, "num_tokens": 88001494.0, "reward": 6.114458784461021, "reward_std": 1.2841259008273482, "rewards/accuracy_reward": 0.4609375, "rewards/exec_out_all_reward": 0.833984375, "rewards/exec_out_step_reward": 0.9544363897293806, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.45011676382273436, "rewards/sql_step_keywords_recall_reward": 0.6465076114982367, "step": 202 }, { "clip_ratio": 0.0, "completion_length": 183.59375, "epoch": 1.840677966101695, "grad_norm": 0.1869850605726242, "kl": 0.026782989501953125, "learning_rate": 6.076053957825411e-08, "loss": -0.0073, "num_tokens": 88501102.0, "reward": 6.228741064667702, "reward_std": 1.307523036841303, "rewards/accuracy_reward": 0.4609375, "rewards/exec_out_all_reward": 0.89453125, "rewards/exec_out_step_reward": 0.9735940638929605, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.4400957697071135, "rewards/sql_step_keywords_recall_reward": 0.685502259992063, "step": 203 }, { "clip_ratio": 0.0, "completion_length": 184.279296875, "epoch": 1.8497175141242939, "grad_norm": 0.19799074530601501, "kl": 0.026813507080078125, "learning_rate": 5.423676216008694e-08, "loss": 0.0047, "num_tokens": 89002113.0, "reward": 6.288302145898342, "reward_std": 1.3452563788741827, "rewards/accuracy_reward": 0.443359375, "rewards/exec_out_all_reward": 0.896484375, "rewards/exec_out_step_reward": 0.973502604290843, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.496027123183012, "rewards/sql_step_keywords_recall_reward": 0.7211827598512173, "step": 204 }, { "clip_ratio": 0.0, "completion_length": 181.740234375, "epoch": 1.8587570621468927, "grad_norm": 0.1976655274629593, "kl": 0.027782440185546875, "learning_rate": 4.807694790546563e-08, "loss": 0.0013, "num_tokens": 89500732.0, "reward": 6.4734716564416885, "reward_std": 1.2421362679451704, "rewards/accuracy_reward": 0.494140625, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9743815138936043, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.49230897752568126, "rewards/sql_step_keywords_recall_reward": 0.7156439917162061, "step": 205 }, { "clip_ratio": 0.0, "completion_length": 183.732421875, "epoch": 1.8677966101694916, "grad_norm": 0.21844159066677094, "kl": 0.026279449462890625, "learning_rate": 4.2282647514687525e-08, "loss": 0.0087, "num_tokens": 90000723.0, "reward": 6.1350885555148125, "reward_std": 1.201456573791802, "rewards/accuracy_reward": 0.427734375, "rewards/exec_out_all_reward": 0.87109375, "rewards/exec_out_step_reward": 0.9642880447208881, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.47726981807500124, "rewards/sql_step_keywords_recall_reward": 0.6889170501381159, "step": 206 }, { "clip_ratio": 0.0, "completion_length": 185.2109375, "epoch": 1.8768361581920905, "grad_norm": 0.19886131584644318, "kl": 0.026386260986328125, "learning_rate": 3.685531967188943e-08, "loss": 0.0084, "num_tokens": 90502179.0, "reward": 6.224872663617134, "reward_std": 1.0856527155265212, "rewards/accuracy_reward": 0.451171875, "rewards/exec_out_all_reward": 0.892578125, "rewards/exec_out_step_reward": 0.9733282178640366, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.4684679554775357, "rewards/sql_step_keywords_recall_reward": 0.6759366653859615, "step": 207 }, { "clip_ratio": 0.0, "completion_length": 181.986328125, "epoch": 1.8858757062146894, "grad_norm": 0.1973237842321396, "kl": 0.028614044189453125, "learning_rate": 3.1796330677832056e-08, "loss": 0.0041, "num_tokens": 91001508.0, "reward": 6.379835411906242, "reward_std": 1.199483459815383, "rewards/accuracy_reward": 0.478515625, "rewards/exec_out_all_reward": 0.892578125, "rewards/exec_out_step_reward": 0.973174761980772, "rewards/format_reward": 0.9453125, "rewards/keywords_iou_reward": 0.4832291747443378, "rewards/sql_step_keywords_recall_reward": 0.6882491651922464, "step": 208 }, { "clip_ratio": 0.0, "completion_length": 182.765625, "epoch": 1.8949152542372882, "grad_norm": 0.20336341857910156, "kl": 0.025714874267578125, "learning_rate": 2.710695410593994e-08, "loss": 0.0105, "num_tokens": 91500808.0, "reward": 6.037193328142166, "reward_std": 1.2909285621717572, "rewards/accuracy_reward": 0.431640625, "rewards/exec_out_all_reward": 0.8828125, "rewards/exec_out_step_reward": 0.9731863792985678, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.4269423745572567, "rewards/sql_step_keywords_recall_reward": 0.6573878172785044, "step": 209 }, { "clip_ratio": 0.0, "completion_length": 185.685546875, "epoch": 1.9039548022598871, "grad_norm": 0.19327940046787262, "kl": 0.02794647216796875, "learning_rate": 2.278837048168797e-08, "loss": -0.0007, "num_tokens": 92000983.0, "reward": 6.355218142271042, "reward_std": 1.3059450194705278, "rewards/accuracy_reward": 0.48046875, "rewards/exec_out_all_reward": 0.919921875, "rewards/exec_out_step_reward": 0.980823727324605, "rewards/format_reward": 0.939453125, "rewards/keywords_iou_reward": 0.4438144704326987, "rewards/sql_step_keywords_recall_reward": 0.7055154535919428, "step": 210 }, { "clip_ratio": 0.0, "completion_length": 187.62890625, "epoch": 1.912994350282486, "grad_norm": 0.19996102154254913, "kl": 0.02562713623046875, "learning_rate": 1.8841666985408568e-08, "loss": 0.0026, "num_tokens": 92501125.0, "reward": 6.144110098481178, "reward_std": 1.3673810623586178, "rewards/accuracy_reward": 0.419921875, "rewards/exec_out_all_reward": 0.85546875, "rewards/exec_out_step_reward": 0.9624224957078695, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.5000754236243665, "rewards/sql_step_keywords_recall_reward": 0.7108335876837373, "step": 211 }, { "clip_ratio": 0.0, "completion_length": 178.302734375, "epoch": 1.9220338983050849, "grad_norm": 0.19664518535137177, "kl": 0.025630950927734375, "learning_rate": 1.5267837178600972e-08, "loss": 0.0021, "num_tokens": 92998136.0, "reward": 6.30431304872036, "reward_std": 1.3761892821639776, "rewards/accuracy_reward": 0.474609375, "rewards/exec_out_all_reward": 0.89453125, "rewards/exec_out_step_reward": 0.9729538708925247, "rewards/format_reward": 0.943359375, "rewards/keywords_iou_reward": 0.454624411650002, "rewards/sql_step_keywords_recall_reward": 0.6857822621241212, "step": 212 }, { "clip_ratio": 0.0, "completion_length": 182.158203125, "epoch": 1.9310734463276837, "grad_norm": 0.18419690430164337, "kl": 0.02671051025390625, "learning_rate": 1.206778075380699e-08, "loss": 0.0046, "num_tokens": 93497689.0, "reward": 6.260894909501076, "reward_std": 1.3260251162573695, "rewards/accuracy_reward": 0.466796875, "rewards/exec_out_all_reward": 0.8984375, "rewards/exec_out_step_reward": 0.9784419946372509, "rewards/format_reward": 0.94140625, "rewards/keywords_iou_reward": 0.45361618138849735, "rewards/sql_step_keywords_recall_reward": 0.6681892573833466, "step": 213 }, { "clip_ratio": 0.0, "completion_length": 183.88671875, "epoch": 1.9401129943502826, "grad_norm": 0.2128608375787735, "kl": 0.027370452880859375, "learning_rate": 9.242303308118816e-09, "loss": -0.0028, "num_tokens": 93996519.0, "reward": 6.385763391852379, "reward_std": 1.3881093207746744, "rewards/accuracy_reward": 0.49609375, "rewards/exec_out_all_reward": 0.89453125, "rewards/exec_out_step_reward": 0.975878132507205, "rewards/format_reward": 0.923828125, "rewards/keywords_iou_reward": 0.44957458041608334, "rewards/sql_step_keywords_recall_reward": 0.7080017421394587, "step": 214 }, { "clip_ratio": 0.0, "completion_length": 183.0703125, "epoch": 1.9491525423728815, "grad_norm": 0.185265451669693, "kl": 0.026287078857421875, "learning_rate": 6.792116140373117e-09, "loss": -0.0056, "num_tokens": 94496719.0, "reward": 6.287876293063164, "reward_std": 1.1061802469193935, "rewards/accuracy_reward": 0.47265625, "rewards/exec_out_all_reward": 0.880859375, "rewards/exec_out_step_reward": 0.9741544220596552, "rewards/format_reward": 0.94921875, "rewards/keywords_iou_reward": 0.4433903433382511, "rewards/sql_step_keywords_recall_reward": 0.7062380816787481, "step": 215 }, { "clip_ratio": 0.0, "completion_length": 182.119140625, "epoch": 1.9581920903954804, "grad_norm": 0.19649042189121246, "kl": 0.02834320068359375, "learning_rate": 4.7178360720865895e-09, "loss": 0.0082, "num_tokens": 94996296.0, "reward": 6.1123000383377075, "reward_std": 1.1606702040880919, "rewards/accuracy_reward": 0.4375, "rewards/exec_out_all_reward": 0.90625, "rewards/exec_out_step_reward": 0.9780420735478401, "rewards/format_reward": 0.9296875, "rewards/keywords_iou_reward": 0.44118882389739156, "rewards/sql_step_keywords_recall_reward": 0.665942832827568, "step": 216 }, { "clip_ratio": 0.0, "completion_length": 180.314453125, "epoch": 1.9672316384180792, "grad_norm": 0.20041655004024506, "kl": 0.025310516357421875, "learning_rate": 3.0199852921735105e-09, "loss": -0.002, "num_tokens": 95493249.0, "reward": 6.628199502825737, "reward_std": 1.2294201632030308, "rewards/accuracy_reward": 0.521484375, "rewards/exec_out_all_reward": 0.88671875, "rewards/exec_out_step_reward": 0.9736746642738581, "rewards/format_reward": 0.951171875, "rewards/keywords_iou_reward": 0.5000922083854675, "rewards/sql_step_keywords_recall_reward": 0.730512335896492, "step": 217 }, { "clip_ratio": 0.0, "completion_length": 182.69140625, "epoch": 1.9762711864406781, "grad_norm": 0.2012374997138977, "kl": 0.0261688232421875, "learning_rate": 1.6989912254880557e-09, "loss": 0.0159, "num_tokens": 95991843.0, "reward": 6.3724522441625595, "reward_std": 1.4343089256435633, "rewards/accuracy_reward": 0.509765625, "rewards/exec_out_all_reward": 0.875, "rewards/exec_out_step_reward": 0.9689127672463655, "rewards/format_reward": 0.9140625, "rewards/keywords_iou_reward": 0.4413177212700248, "rewards/sql_step_keywords_recall_reward": 0.6927789896726608, "step": 218 }, { "clip_ratio": 0.0, "completion_length": 182.826171875, "epoch": 1.985310734463277, "grad_norm": 0.18999001383781433, "kl": 0.0272064208984375, "learning_rate": 7.551864252223761e-10, "loss": 0.0057, "num_tokens": 96489986.0, "reward": 6.2077417075634, "reward_std": 1.2748773116618395, "rewards/accuracy_reward": 0.4453125, "rewards/exec_out_all_reward": 0.892578125, "rewards/exec_out_step_reward": 0.9720354303717613, "rewards/format_reward": 0.935546875, "rewards/keywords_iou_reward": 0.4684856841340661, "rewards/sql_step_keywords_recall_reward": 0.6893599443137646, "step": 219 }, { "clip_ratio": 0.0, "completion_length": 183.3854250907898, "epoch": 1.9943502824858759, "grad_norm": 0.18637076020240784, "kl": 0.0267333984375, "learning_rate": 1.8880848918739758e-10, "loss": -0.0015, "num_tokens": 96990210.0, "reward": 6.196442812681198, "reward_std": 1.3213467076420784, "rewards/accuracy_reward": 0.4765625, "rewards/exec_out_all_reward": 0.86328125, "rewards/exec_out_step_reward": 0.969191774725914, "rewards/format_reward": 0.931640625, "rewards/keywords_iou_reward": 0.4246487212367356, "rewards/sql_step_keywords_recall_reward": 0.676781676709652, "step": 220 }, { "epoch": 1.9943502824858759, "step": 220, "total_flos": 0.0, "train_loss": 0.002633511937032877, "train_runtime": 233060.1474, "train_samples_per_second": 0.122, "train_steps_per_second": 0.001 } ], "logging_steps": 1, "max_steps": 220, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 27, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }