| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9943502824858759, | |
| "eval_steps": 100, | |
| "global_step": 220, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 193.8046875, | |
| "epoch": 0.00903954802259887, | |
| "grad_norm": 0.299434095621109, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0276, | |
| "num_tokens": 503964.0, | |
| "reward": 4.863432988524437, | |
| "reward_std": 1.8696988988667727, | |
| "rewards/accuracy_reward": 0.345703125, | |
| "rewards/exec_out_all_reward": 0.7421875, | |
| "rewards/exec_out_step_reward": 0.9397887196391821, | |
| "rewards/format_reward": 0.642578125, | |
| "rewards/keywords_iou_reward": 0.3189462535083294, | |
| "rewards/sql_step_keywords_recall_reward": 0.5181736210361123, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 188.369140625, | |
| "epoch": 0.01807909604519774, | |
| "grad_norm": 0.3049573004245758, | |
| "kl": 8.754432201385498e-08, | |
| "learning_rate": 1.3636363636363637e-07, | |
| "loss": 0.0311, | |
| "num_tokens": 1008385.0, | |
| "reward": 4.9951048865914345, | |
| "reward_std": 1.8348420038819313, | |
| "rewards/accuracy_reward": 0.34765625, | |
| "rewards/exec_out_all_reward": 0.787109375, | |
| "rewards/exec_out_step_reward": 0.9488211516290903, | |
| "rewards/format_reward": 0.642578125, | |
| "rewards/keywords_iou_reward": 0.35271753335837275, | |
| "rewards/sql_step_keywords_recall_reward": 0.5205361591652036, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 190.701171875, | |
| "epoch": 0.02711864406779661, | |
| "grad_norm": 0.3177661597728729, | |
| "kl": 0.00011102110147476196, | |
| "learning_rate": 2.7272727272727274e-07, | |
| "loss": 0.0338, | |
| "num_tokens": 1511588.0, | |
| "reward": 4.939835079014301, | |
| "reward_std": 1.7858339007943869, | |
| "rewards/accuracy_reward": 0.322265625, | |
| "rewards/exec_out_all_reward": 0.80078125, | |
| "rewards/exec_out_step_reward": 0.9515764508396387, | |
| "rewards/format_reward": 0.65625, | |
| "rewards/keywords_iou_reward": 0.3603124172659591, | |
| "rewards/sql_step_keywords_recall_reward": 0.5215400578454137, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 194.2421875, | |
| "epoch": 0.03615819209039548, | |
| "grad_norm": 0.29520705342292786, | |
| "kl": 0.00011537596583366394, | |
| "learning_rate": 4.0909090909090906e-07, | |
| "loss": 0.0418, | |
| "num_tokens": 2016412.0, | |
| "reward": 4.9831836223602295, | |
| "reward_std": 1.7916885651648045, | |
| "rewards/accuracy_reward": 0.37109375, | |
| "rewards/exec_out_all_reward": 0.751953125, | |
| "rewards/exec_out_step_reward": 0.9367489777505398, | |
| "rewards/format_reward": 0.630859375, | |
| "rewards/keywords_iou_reward": 0.32331358385272324, | |
| "rewards/sql_step_keywords_recall_reward": 0.5326199810951948, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 192.185546875, | |
| "epoch": 0.04519774011299435, | |
| "grad_norm": 0.2962106168270111, | |
| "kl": 0.00011671334505081177, | |
| "learning_rate": 5.454545454545455e-07, | |
| "loss": 0.0337, | |
| "num_tokens": 2520743.0, | |
| "reward": 4.651097267866135, | |
| "reward_std": 1.9646679311990738, | |
| "rewards/accuracy_reward": 0.318359375, | |
| "rewards/exec_out_all_reward": 0.763671875, | |
| "rewards/exec_out_step_reward": 0.9430377371609211, | |
| "rewards/format_reward": 0.595703125, | |
| "rewards/keywords_iou_reward": 0.29655176820233464, | |
| "rewards/sql_step_keywords_recall_reward": 0.48214349802583456, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 190.55078125, | |
| "epoch": 0.05423728813559322, | |
| "grad_norm": 0.3201525807380676, | |
| "kl": 0.00012993812561035156, | |
| "learning_rate": 6.818181818181818e-07, | |
| "loss": 0.0321, | |
| "num_tokens": 3023701.0, | |
| "reward": 4.995345205068588, | |
| "reward_std": 2.013074729591608, | |
| "rewards/accuracy_reward": 0.36328125, | |
| "rewards/exec_out_all_reward": 0.7734375, | |
| "rewards/exec_out_step_reward": 0.9453125055879354, | |
| "rewards/format_reward": 0.623046875, | |
| "rewards/keywords_iou_reward": 0.33588895108550787, | |
| "rewards/sql_step_keywords_recall_reward": 0.5286454004235566, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 187.921875, | |
| "epoch": 0.06327683615819209, | |
| "grad_norm": 0.2839597165584564, | |
| "kl": 0.00017252564430236816, | |
| "learning_rate": 8.181818181818181e-07, | |
| "loss": 0.0222, | |
| "num_tokens": 3524501.0, | |
| "reward": 5.12370303273201, | |
| "reward_std": 1.872809598222375, | |
| "rewards/accuracy_reward": 0.38671875, | |
| "rewards/exec_out_all_reward": 0.771484375, | |
| "rewards/exec_out_step_reward": 0.9374604746699333, | |
| "rewards/format_reward": 0.662109375, | |
| "rewards/keywords_iou_reward": 0.3198722831439227, | |
| "rewards/sql_step_keywords_recall_reward": 0.5660292678512633, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 194.861328125, | |
| "epoch": 0.07231638418079096, | |
| "grad_norm": 0.264257550239563, | |
| "kl": 0.0003757178783416748, | |
| "learning_rate": 9.545454545454546e-07, | |
| "loss": 0.0214, | |
| "num_tokens": 4033750.0, | |
| "reward": 4.592174172401428, | |
| "reward_std": 1.6819164399057627, | |
| "rewards/accuracy_reward": 0.287109375, | |
| "rewards/exec_out_all_reward": 0.72265625, | |
| "rewards/exec_out_step_reward": 0.9274584576487541, | |
| "rewards/format_reward": 0.662109375, | |
| "rewards/keywords_iou_reward": 0.31115873460657895, | |
| "rewards/sql_step_keywords_recall_reward": 0.5091950967907906, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 190.73046875, | |
| "epoch": 0.08135593220338982, | |
| "grad_norm": 0.2734237313270569, | |
| "kl": 0.0005688667297363281, | |
| "learning_rate": 1.090909090909091e-06, | |
| "loss": 0.0149, | |
| "num_tokens": 4536888.0, | |
| "reward": 5.064344555139542, | |
| "reward_std": 1.7968224007636309, | |
| "rewards/accuracy_reward": 0.35546875, | |
| "rewards/exec_out_all_reward": 0.7109375, | |
| "rewards/exec_out_step_reward": 0.9287527892738581, | |
| "rewards/format_reward": 0.7265625, | |
| "rewards/keywords_iou_reward": 0.354521602508612, | |
| "rewards/sql_step_keywords_recall_reward": 0.5671735098585486, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 177.17578125, | |
| "epoch": 0.0903954802259887, | |
| "grad_norm": 0.22836743295192719, | |
| "kl": 0.002085447311401367, | |
| "learning_rate": 1.2272727272727274e-06, | |
| "loss": 0.002, | |
| "num_tokens": 5032606.0, | |
| "reward": 5.785055458545685, | |
| "reward_std": 1.7521540587767959, | |
| "rewards/accuracy_reward": 0.4609375, | |
| "rewards/exec_out_all_reward": 0.759765625, | |
| "rewards/exec_out_step_reward": 0.9359297584742308, | |
| "rewards/format_reward": 0.826171875, | |
| "rewards/keywords_iou_reward": 0.38180301152169704, | |
| "rewards/sql_step_keywords_recall_reward": 0.6558322114869952, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 177.865234375, | |
| "epoch": 0.09943502824858758, | |
| "grad_norm": 0.2427971363067627, | |
| "kl": 0.0032596588134765625, | |
| "learning_rate": 1.3636363636363636e-06, | |
| "loss": 0.0119, | |
| "num_tokens": 5529005.0, | |
| "reward": 5.771241188049316, | |
| "reward_std": 1.641195336356759, | |
| "rewards/accuracy_reward": 0.4375, | |
| "rewards/exec_out_all_reward": 0.783203125, | |
| "rewards/exec_out_step_reward": 0.9457356799393892, | |
| "rewards/format_reward": 0.869140625, | |
| "rewards/keywords_iou_reward": 0.39362214831635356, | |
| "rewards/sql_step_keywords_recall_reward": 0.6359174773097038, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 173.3203125, | |
| "epoch": 0.10847457627118644, | |
| "grad_norm": 0.23091059923171997, | |
| "kl": 0.003955364227294922, | |
| "learning_rate": 1.5e-06, | |
| "loss": 0.0064, | |
| "num_tokens": 6024409.0, | |
| "reward": 6.000889599323273, | |
| "reward_std": 1.5886465199291706, | |
| "rewards/accuracy_reward": 0.46875, | |
| "rewards/exec_out_all_reward": 0.76953125, | |
| "rewards/exec_out_step_reward": 0.9391183033585548, | |
| "rewards/format_reward": 0.8984375, | |
| "rewards/keywords_iou_reward": 0.42962841456755996, | |
| "rewards/sql_step_keywords_recall_reward": 0.6595456739887595, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 171.25, | |
| "epoch": 0.11751412429378531, | |
| "grad_norm": 0.2572859823703766, | |
| "kl": 0.007582187652587891, | |
| "learning_rate": 1.6363636363636363e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 6520105.0, | |
| "reward": 5.847834274172783, | |
| "reward_std": 1.4467571768909693, | |
| "rewards/accuracy_reward": 0.46484375, | |
| "rewards/exec_out_all_reward": 0.744140625, | |
| "rewards/exec_out_step_reward": 0.9297774098813534, | |
| "rewards/format_reward": 0.904296875, | |
| "rewards/keywords_iou_reward": 0.3783310679718852, | |
| "rewards/sql_step_keywords_recall_reward": 0.6535822190344334, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 160.04296875, | |
| "epoch": 0.12655367231638417, | |
| "grad_norm": 0.23153281211853027, | |
| "kl": 0.012262344360351562, | |
| "learning_rate": 1.7727272727272729e-06, | |
| "loss": -0.0025, | |
| "num_tokens": 7007539.0, | |
| "reward": 5.9820186495780945, | |
| "reward_std": 1.6872543934732676, | |
| "rewards/accuracy_reward": 0.431640625, | |
| "rewards/exec_out_all_reward": 0.82421875, | |
| "rewards/exec_out_step_reward": 0.9496279824525118, | |
| "rewards/format_reward": 0.90234375, | |
| "rewards/keywords_iou_reward": 0.44878690084442496, | |
| "rewards/sql_step_keywords_recall_reward": 0.6816918756812811, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 159.3203125, | |
| "epoch": 0.13559322033898305, | |
| "grad_norm": 0.22347486019134521, | |
| "kl": 0.015293121337890625, | |
| "learning_rate": 1.909090909090909e-06, | |
| "loss": 0.001, | |
| "num_tokens": 7495787.0, | |
| "reward": 5.671351440250874, | |
| "reward_std": 1.3719452489167452, | |
| "rewards/accuracy_reward": 0.357421875, | |
| "rewards/exec_out_all_reward": 0.771484375, | |
| "rewards/exec_out_step_reward": 0.939460875466466, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.46561355609446764, | |
| "rewards/sql_step_keywords_recall_reward": 0.6658978424966335, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 158.810546875, | |
| "epoch": 0.14463276836158193, | |
| "grad_norm": 0.2615886926651001, | |
| "kl": 0.021930694580078125, | |
| "learning_rate": 2.0454545454545453e-06, | |
| "loss": -0.003, | |
| "num_tokens": 7983430.0, | |
| "reward": 5.687411919236183, | |
| "reward_std": 1.400244857184589, | |
| "rewards/accuracy_reward": 0.38671875, | |
| "rewards/exec_out_all_reward": 0.791015625, | |
| "rewards/exec_out_step_reward": 0.9499209504574537, | |
| "rewards/format_reward": 0.88671875, | |
| "rewards/keywords_iou_reward": 0.4251784700900316, | |
| "rewards/sql_step_keywords_recall_reward": 0.6625246489420533, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 155.96484375, | |
| "epoch": 0.1536723163841808, | |
| "grad_norm": 0.2243858426809311, | |
| "kl": 0.019573211669921875, | |
| "learning_rate": 2.181818181818182e-06, | |
| "loss": 0.0037, | |
| "num_tokens": 8467588.0, | |
| "reward": 6.101026564836502, | |
| "reward_std": 1.3565897848457098, | |
| "rewards/accuracy_reward": 0.447265625, | |
| "rewards/exec_out_all_reward": 0.826171875, | |
| "rewards/exec_out_step_reward": 0.9590797107666731, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.4580969992093742, | |
| "rewards/sql_step_keywords_recall_reward": 0.6769247055053711, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 157.99609375, | |
| "epoch": 0.16271186440677965, | |
| "grad_norm": 0.23680506646633148, | |
| "kl": 0.0247344970703125, | |
| "learning_rate": 2.318181818181818e-06, | |
| "loss": 0.0027, | |
| "num_tokens": 8954138.0, | |
| "reward": 6.041056051850319, | |
| "reward_std": 1.2283777361735702, | |
| "rewards/accuracy_reward": 0.431640625, | |
| "rewards/exec_out_all_reward": 0.845703125, | |
| "rewards/exec_out_step_reward": 0.959887308999896, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.44093527970835567, | |
| "rewards/sql_step_keywords_recall_reward": 0.6895325118675828, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 156.0703125, | |
| "epoch": 0.17175141242937852, | |
| "grad_norm": 0.2468118816614151, | |
| "kl": 0.0263519287109375, | |
| "learning_rate": 2.454545454545455e-06, | |
| "loss": 0.0033, | |
| "num_tokens": 9439242.0, | |
| "reward": 6.419172838330269, | |
| "reward_std": 1.4407691890373826, | |
| "rewards/accuracy_reward": 0.49609375, | |
| "rewards/exec_out_all_reward": 0.83984375, | |
| "rewards/exec_out_step_reward": 0.9605569522827864, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.4862754005007446, | |
| "rewards/sql_step_keywords_recall_reward": 0.7243463154882193, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 158.8828125, | |
| "epoch": 0.1807909604519774, | |
| "grad_norm": 0.2642815411090851, | |
| "kl": 0.028415679931640625, | |
| "learning_rate": 2.590909090909091e-06, | |
| "loss": 0.0059, | |
| "num_tokens": 9927642.0, | |
| "reward": 5.962770789861679, | |
| "reward_std": 1.364680239930749, | |
| "rewards/accuracy_reward": 0.423828125, | |
| "rewards/exec_out_all_reward": 0.875, | |
| "rewards/exec_out_step_reward": 0.9687531031668186, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.4039350217208266, | |
| "rewards/sql_step_keywords_recall_reward": 0.678335040807724, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 157.8203125, | |
| "epoch": 0.18983050847457628, | |
| "grad_norm": 0.24316003918647766, | |
| "kl": 0.032009124755859375, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": -0.0003, | |
| "num_tokens": 10415362.0, | |
| "reward": 6.179068893194199, | |
| "reward_std": 1.4617707338184118, | |
| "rewards/accuracy_reward": 0.462890625, | |
| "rewards/exec_out_all_reward": 0.845703125, | |
| "rewards/exec_out_step_reward": 0.9599469937384129, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.4459744766354561, | |
| "rewards/sql_step_keywords_recall_reward": 0.6924073351547122, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 157.458984375, | |
| "epoch": 0.19887005649717515, | |
| "grad_norm": 0.24961793422698975, | |
| "kl": 0.03668212890625, | |
| "learning_rate": 2.863636363636364e-06, | |
| "loss": -0.004, | |
| "num_tokens": 10901521.0, | |
| "reward": 6.071441277861595, | |
| "reward_std": 1.1777024501934648, | |
| "rewards/accuracy_reward": 0.453125, | |
| "rewards/exec_out_all_reward": 0.830078125, | |
| "rewards/exec_out_step_reward": 0.9483445044606924, | |
| "rewards/format_reward": 0.953125, | |
| "rewards/keywords_iou_reward": 0.42953827418386936, | |
| "rewards/sql_step_keywords_recall_reward": 0.6683171540498734, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 158.517578125, | |
| "epoch": 0.207909604519774, | |
| "grad_norm": 0.26023003458976746, | |
| "kl": 0.03612518310546875, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0019, | |
| "num_tokens": 11389862.0, | |
| "reward": 6.313733980059624, | |
| "reward_std": 1.3131706872954965, | |
| "rewards/accuracy_reward": 0.47265625, | |
| "rewards/exec_out_all_reward": 0.880859375, | |
| "rewards/exec_out_step_reward": 0.9664326030761003, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.46862596087157726, | |
| "rewards/sql_step_keywords_recall_reward": 0.6971588619053364, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 162.736328125, | |
| "epoch": 0.21694915254237288, | |
| "grad_norm": 0.25574371218681335, | |
| "kl": 0.039890289306640625, | |
| "learning_rate": 2.9998111915108126e-06, | |
| "loss": -0.0018, | |
| "num_tokens": 11879287.0, | |
| "reward": 5.985842078924179, | |
| "reward_std": 1.2227760329842567, | |
| "rewards/accuracy_reward": 0.423828125, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9674719516187906, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4203591588884592, | |
| "rewards/sql_step_keywords_recall_reward": 0.6659330297261477, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 162.716796875, | |
| "epoch": 0.22598870056497175, | |
| "grad_norm": 0.2357674390077591, | |
| "kl": 0.039127349853515625, | |
| "learning_rate": 2.9992448135747778e-06, | |
| "loss": -0.0065, | |
| "num_tokens": 12367230.0, | |
| "reward": 6.357031494379044, | |
| "reward_std": 1.352663902565837, | |
| "rewards/accuracy_reward": 0.482421875, | |
| "rewards/exec_out_all_reward": 0.89453125, | |
| "rewards/exec_out_step_reward": 0.9736126679927111, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.462111447006464, | |
| "rewards/sql_step_keywords_recall_reward": 0.6994303409010172, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 163.759765625, | |
| "epoch": 0.23502824858757063, | |
| "grad_norm": 3.8160126209259033, | |
| "kl": 0.41811370849609375, | |
| "learning_rate": 2.998301008774512e-06, | |
| "loss": 0.0131, | |
| "num_tokens": 12855263.0, | |
| "reward": 6.066176131367683, | |
| "reward_std": 1.418117775581777, | |
| "rewards/accuracy_reward": 0.453125, | |
| "rewards/exec_out_all_reward": 0.865234375, | |
| "rewards/exec_out_step_reward": 0.9645538832992315, | |
| "rewards/format_reward": 0.9140625, | |
| "rewards/keywords_iou_reward": 0.4158592028543353, | |
| "rewards/sql_step_keywords_recall_reward": 0.678106939420104, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 172.52734375, | |
| "epoch": 0.2440677966101695, | |
| "grad_norm": 0.23165632784366608, | |
| "kl": 0.03951263427734375, | |
| "learning_rate": 2.9969800147078265e-06, | |
| "loss": 0.0075, | |
| "num_tokens": 13348781.0, | |
| "reward": 6.2546906769275665, | |
| "reward_std": 1.2166710263118148, | |
| "rewards/accuracy_reward": 0.451171875, | |
| "rewards/exec_out_all_reward": 0.859375, | |
| "rewards/exec_out_step_reward": 0.960680965334177, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.4882043502293527, | |
| "rewards/sql_step_keywords_recall_reward": 0.7082259934395552, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 174.0, | |
| "epoch": 0.25310734463276835, | |
| "grad_norm": 0.2445058375597, | |
| "kl": 0.04166412353515625, | |
| "learning_rate": 2.9952821639279137e-06, | |
| "loss": 0.0028, | |
| "num_tokens": 494680.0, | |
| "reward": 6.440436959266663, | |
| "reward_std": 1.2339025381952524, | |
| "rewards/accuracy_reward": 0.50390625, | |
| "rewards/exec_out_all_reward": 0.83984375, | |
| "rewards/exec_out_step_reward": 0.9568103682249784, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.48325524432584643, | |
| "rewards/sql_step_keywords_recall_reward": 0.7143817320466042, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 178.755859375, | |
| "epoch": 0.2621468926553672, | |
| "grad_norm": 0.23020873963832855, | |
| "kl": 0.04241943359375, | |
| "learning_rate": 2.993207883859627e-06, | |
| "loss": -0.003, | |
| "num_tokens": 991863.0, | |
| "reward": 5.925758346915245, | |
| "reward_std": 1.3979150608647615, | |
| "rewards/accuracy_reward": 0.4140625, | |
| "rewards/exec_out_all_reward": 0.865234375, | |
| "rewards/exec_out_step_reward": 0.9639307502657175, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.41449853405356407, | |
| "rewards/sql_step_keywords_recall_reward": 0.6797055369243026, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.48828125, | |
| "epoch": 0.2711864406779661, | |
| "grad_norm": 0.22643530368804932, | |
| "kl": 0.04361724853515625, | |
| "learning_rate": 2.990757696691881e-06, | |
| "loss": 0.0059, | |
| "num_tokens": 1490665.0, | |
| "reward": 6.013850957155228, | |
| "reward_std": 1.3883078750222921, | |
| "rewards/accuracy_reward": 0.43359375, | |
| "rewards/exec_out_all_reward": 0.833984375, | |
| "rewards/exec_out_step_reward": 0.9505758639425039, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.43903162656351924, | |
| "rewards/sql_step_keywords_recall_reward": 0.6773993754759431, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 190.423828125, | |
| "epoch": 0.280225988700565, | |
| "grad_norm": 0.2063753306865692, | |
| "kl": 0.0457611083984375, | |
| "learning_rate": 2.987932219246193e-06, | |
| "loss": 0.0075, | |
| "num_tokens": 1993394.0, | |
| "reward": 5.88956793397665, | |
| "reward_std": 1.3650804716162384, | |
| "rewards/accuracy_reward": 0.40625, | |
| "rewards/exec_out_all_reward": 0.810546875, | |
| "rewards/exec_out_step_reward": 0.9385083485394716, | |
| "rewards/format_reward": 0.921875, | |
| "rewards/keywords_iou_reward": 0.4571849275380373, | |
| "rewards/sql_step_keywords_recall_reward": 0.679267879575491, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 193.060546875, | |
| "epoch": 0.28926553672316385, | |
| "grad_norm": 0.22804522514343262, | |
| "kl": 0.0471343994140625, | |
| "learning_rate": 2.984732162821399e-06, | |
| "loss": 0.0114, | |
| "num_tokens": 2497401.0, | |
| "reward": 5.931887894868851, | |
| "reward_std": 1.4683727947995067, | |
| "rewards/accuracy_reward": 0.435546875, | |
| "rewards/exec_out_all_reward": 0.822265625, | |
| "rewards/exec_out_step_reward": 0.9442894347012043, | |
| "rewards/format_reward": 0.90625, | |
| "rewards/keywords_iou_reward": 0.4232069947756827, | |
| "rewards/sql_step_keywords_recall_reward": 0.670481245033443, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 194.625, | |
| "epoch": 0.2983050847457627, | |
| "grad_norm": 0.20885376632213593, | |
| "kl": 0.04837799072265625, | |
| "learning_rate": 2.9811583330145917e-06, | |
| "loss": 0.0136, | |
| "num_tokens": 3002817.0, | |
| "reward": 6.591131284832954, | |
| "reward_std": 1.272476114332676, | |
| "rewards/accuracy_reward": 0.5390625, | |
| "rewards/exec_out_all_reward": 0.8359375, | |
| "rewards/exec_out_step_reward": 0.9475725479424, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.4941097451373935, | |
| "rewards/sql_step_keywords_recall_reward": 0.7217455059289932, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 201.3046875, | |
| "epoch": 0.3073446327683616, | |
| "grad_norm": 0.21211469173431396, | |
| "kl": 0.048919677734375, | |
| "learning_rate": 2.9772116295183124e-06, | |
| "loss": -0.001, | |
| "num_tokens": 3512913.0, | |
| "reward": 6.256010413169861, | |
| "reward_std": 1.339096024632454, | |
| "rewards/accuracy_reward": 0.482421875, | |
| "rewards/exec_out_all_reward": 0.853515625, | |
| "rewards/exec_out_step_reward": 0.9534575026482344, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.4364256302360445, | |
| "rewards/sql_step_keywords_recall_reward": 0.7129048258066177, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 204.94140625, | |
| "epoch": 0.3163841807909605, | |
| "grad_norm": 0.22092854976654053, | |
| "kl": 0.0505828857421875, | |
| "learning_rate": 2.97289304589406e-06, | |
| "loss": 0.017, | |
| "num_tokens": 4024451.0, | |
| "reward": 5.7779867351055145, | |
| "reward_std": 1.4151953971013427, | |
| "rewards/accuracy_reward": 0.376953125, | |
| "rewards/exec_out_all_reward": 0.8359375, | |
| "rewards/exec_out_step_reward": 0.9576846230775118, | |
| "rewards/format_reward": 0.91015625, | |
| "rewards/keywords_iou_reward": 0.4494855832308531, | |
| "rewards/sql_step_keywords_recall_reward": 0.6674247067421675, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 205.88671875, | |
| "epoch": 0.3254237288135593, | |
| "grad_norm": 0.20495107769966125, | |
| "kl": 0.0490264892578125, | |
| "learning_rate": 2.9682036693221684e-06, | |
| "loss": 0.0146, | |
| "num_tokens": 4537929.0, | |
| "reward": 6.179159179329872, | |
| "reward_std": 1.19661252386868, | |
| "rewards/accuracy_reward": 0.458984375, | |
| "rewards/exec_out_all_reward": 0.837890625, | |
| "rewards/exec_out_step_reward": 0.9454868901520967, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.48097023693844676, | |
| "rewards/sql_step_keywords_recall_reward": 0.6564974309876561, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 205.6953125, | |
| "epoch": 0.3344632768361582, | |
| "grad_norm": 0.20743127167224884, | |
| "kl": 0.05022430419921875, | |
| "learning_rate": 2.963144680328111e-06, | |
| "loss": 0.0123, | |
| "num_tokens": 5048565.0, | |
| "reward": 6.279510959982872, | |
| "reward_std": 1.4437304949387908, | |
| "rewards/accuracy_reward": 0.478515625, | |
| "rewards/exec_out_all_reward": 0.861328125, | |
| "rewards/exec_out_step_reward": 0.95453792065382, | |
| "rewards/format_reward": 0.923828125, | |
| "rewards/keywords_iou_reward": 0.4697499736212194, | |
| "rewards/sql_step_keywords_recall_reward": 0.686254383996129, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 208.59765625, | |
| "epoch": 0.34350282485875705, | |
| "grad_norm": 0.20503395795822144, | |
| "kl": 0.05016326904296875, | |
| "learning_rate": 2.9577173524853125e-06, | |
| "loss": -0.0049, | |
| "num_tokens": 5560463.0, | |
| "reward": 5.8292489647865295, | |
| "reward_std": 1.3312111617997289, | |
| "rewards/accuracy_reward": 0.3984375, | |
| "rewards/exec_out_all_reward": 0.837890625, | |
| "rewards/exec_out_step_reward": 0.9564809743314981, | |
| "rewards/format_reward": 0.91796875, | |
| "rewards/keywords_iou_reward": 0.430765890982002, | |
| "rewards/sql_step_keywords_recall_reward": 0.6616268502548337, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 204.892578125, | |
| "epoch": 0.3525423728813559, | |
| "grad_norm": 0.19791673123836517, | |
| "kl": 0.04753875732421875, | |
| "learning_rate": 2.9519230520945346e-06, | |
| "loss": -0.0044, | |
| "num_tokens": 6072524.0, | |
| "reward": 6.163229390978813, | |
| "reward_std": 1.2822516057640314, | |
| "rewards/accuracy_reward": 0.453125, | |
| "rewards/exec_out_all_reward": 0.859375, | |
| "rewards/exec_out_step_reward": 0.963240172713995, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.4430888262577355, | |
| "rewards/sql_step_keywords_recall_reward": 0.696624081581831, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 206.439453125, | |
| "epoch": 0.3615819209039548, | |
| "grad_norm": 0.1895550638437271, | |
| "kl": 0.048187255859375, | |
| "learning_rate": 2.9457632378399134e-06, | |
| "loss": 0.0102, | |
| "num_tokens": 6585445.0, | |
| "reward": 5.869170263409615, | |
| "reward_std": 1.2297673234716058, | |
| "rewards/accuracy_reward": 0.37890625, | |
| "rewards/exec_out_all_reward": 0.85546875, | |
| "rewards/exec_out_step_reward": 0.9623821955174208, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.4554209231864661, | |
| "rewards/sql_step_keywords_recall_reward": 0.6834461260586977, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 201.962890625, | |
| "epoch": 0.3706214689265537, | |
| "grad_norm": 0.20803290605545044, | |
| "kl": 0.0442962646484375, | |
| "learning_rate": 2.9392394604217463e-06, | |
| "loss": 0.0043, | |
| "num_tokens": 7094046.0, | |
| "reward": 6.21223983168602, | |
| "reward_std": 1.2842160500586033, | |
| "rewards/accuracy_reward": 0.4765625, | |
| "rewards/exec_out_all_reward": 0.875, | |
| "rewards/exec_out_step_reward": 0.9636959079653025, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.43379027443006635, | |
| "rewards/sql_step_keywords_recall_reward": 0.6680727442726493, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 206.572265625, | |
| "epoch": 0.37966101694915255, | |
| "grad_norm": 0.19538187980651855, | |
| "kl": 0.044647216796875, | |
| "learning_rate": 2.932353362166111e-06, | |
| "loss": 0.0142, | |
| "num_tokens": 7608915.0, | |
| "reward": 6.033717706799507, | |
| "reward_std": 1.3723872043192387, | |
| "rewards/accuracy_reward": 0.431640625, | |
| "rewards/exec_out_all_reward": 0.857421875, | |
| "rewards/exec_out_step_reward": 0.9629634786397219, | |
| "rewards/format_reward": 0.95703125, | |
| "rewards/keywords_iou_reward": 0.429211582057178, | |
| "rewards/sql_step_keywords_recall_reward": 0.6713154595345259, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 203.2421875, | |
| "epoch": 0.3887005649717514, | |
| "grad_norm": 0.20477567613124847, | |
| "kl": 0.04170989990234375, | |
| "learning_rate": 2.9251066766114183e-06, | |
| "loss": 0.0111, | |
| "num_tokens": 8120303.0, | |
| "reward": 5.484863147139549, | |
| "reward_std": 1.2587912240996957, | |
| "rewards/accuracy_reward": 0.3046875, | |
| "rewards/exec_out_all_reward": 0.87109375, | |
| "rewards/exec_out_step_reward": 0.9700288362801075, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.42473567882552743, | |
| "rewards/sql_step_keywords_recall_reward": 0.641925479285419, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 198.205078125, | |
| "epoch": 0.3977401129943503, | |
| "grad_norm": 0.19639819860458374, | |
| "kl": 0.042938232421875, | |
| "learning_rate": 2.9175012280720027e-06, | |
| "loss": -0.0058, | |
| "num_tokens": 8629068.0, | |
| "reward": 5.85739204287529, | |
| "reward_std": 1.3158389078453183, | |
| "rewards/accuracy_reward": 0.39453125, | |
| "rewards/exec_out_all_reward": 0.828125, | |
| "rewards/exec_out_step_reward": 0.9519577771425247, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.44756252504885197, | |
| "rewards/sql_step_keywords_recall_reward": 0.6665591625496745, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 199.18359375, | |
| "epoch": 0.4067796610169492, | |
| "grad_norm": 0.19431763887405396, | |
| "kl": 0.041290283203125, | |
| "learning_rate": 2.9095389311788626e-06, | |
| "loss": 0.0103, | |
| "num_tokens": 9137718.0, | |
| "reward": 6.069079004228115, | |
| "reward_std": 1.3046986246481538, | |
| "rewards/accuracy_reward": 0.451171875, | |
| "rewards/exec_out_all_reward": 0.8359375, | |
| "rewards/exec_out_step_reward": 0.9571854863315821, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.4311121259815991, | |
| "rewards/sql_step_keywords_recall_reward": 0.6656848564743996, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 197.578125, | |
| "epoch": 0.415819209039548, | |
| "grad_norm": 0.18736235797405243, | |
| "kl": 0.04170989990234375, | |
| "learning_rate": 2.9012217903976603e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 9644030.0, | |
| "reward": 6.115010187029839, | |
| "reward_std": 1.2594214268028736, | |
| "rewards/accuracy_reward": 0.443359375, | |
| "rewards/exec_out_all_reward": 0.87890625, | |
| "rewards/exec_out_step_reward": 0.9649127330631018, | |
| "rewards/format_reward": 0.94921875, | |
| "rewards/keywords_iou_reward": 0.42334456741809845, | |
| "rewards/sql_step_keywords_recall_reward": 0.7018458480015397, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 194.72265625, | |
| "epoch": 0.4248587570621469, | |
| "grad_norm": 0.21365734934806824, | |
| "kl": 0.03936004638671875, | |
| "learning_rate": 2.892551899524109e-06, | |
| "loss": -0.0027, | |
| "num_tokens": 10148012.0, | |
| "reward": 6.157889060676098, | |
| "reward_std": 1.2168289944529533, | |
| "rewards/accuracy_reward": 0.447265625, | |
| "rewards/exec_out_all_reward": 0.90234375, | |
| "rewards/exec_out_step_reward": 0.9766919370740652, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.43009675364010036, | |
| "rewards/sql_step_keywords_recall_reward": 0.6862379219383001, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 187.716796875, | |
| "epoch": 0.43389830508474575, | |
| "grad_norm": 0.1940770298242569, | |
| "kl": 0.0411834716796875, | |
| "learning_rate": 2.8835314411568722e-06, | |
| "loss": 0.0058, | |
| "num_tokens": 10649115.0, | |
| "reward": 6.114228963851929, | |
| "reward_std": 1.1688512060791254, | |
| "rewards/accuracy_reward": 0.423828125, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9754417818039656, | |
| "rewards/format_reward": 0.94921875, | |
| "rewards/keywords_iou_reward": 0.4551887298002839, | |
| "rewards/sql_step_keywords_recall_reward": 0.7069253623485565, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 188.994140625, | |
| "epoch": 0.4429378531073446, | |
| "grad_norm": 0.1919233798980713, | |
| "kl": 0.040985107421875, | |
| "learning_rate": 2.8741626861481045e-06, | |
| "loss": 0.0096, | |
| "num_tokens": 11150488.0, | |
| "reward": 6.672178938984871, | |
| "reward_std": 1.279738076031208, | |
| "rewards/accuracy_reward": 0.51171875, | |
| "rewards/exec_out_all_reward": 0.90234375, | |
| "rewards/exec_out_step_reward": 0.9763346407562494, | |
| "rewards/format_reward": 0.970703125, | |
| "rewards/keywords_iou_reward": 0.5284834480844438, | |
| "rewards/sql_step_keywords_recall_reward": 0.7189555410295725, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 191.287109375, | |
| "epoch": 0.4519774011299435, | |
| "grad_norm": 0.20888246595859528, | |
| "kl": 0.041839599609375, | |
| "learning_rate": 2.8644479930317777e-06, | |
| "loss": 0.0116, | |
| "num_tokens": 11654247.0, | |
| "reward": 6.019737772643566, | |
| "reward_std": 1.2559357401914895, | |
| "rewards/accuracy_reward": 0.423828125, | |
| "rewards/exec_out_all_reward": 0.87109375, | |
| "rewards/exec_out_step_reward": 0.9652940593659878, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.44982042722404003, | |
| "rewards/sql_step_keywords_recall_reward": 0.6469903746619821, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 188.822265625, | |
| "epoch": 0.4610169491525424, | |
| "grad_norm": 0.20416894555091858, | |
| "kl": 0.04168701171875, | |
| "learning_rate": 2.854389807429932e-06, | |
| "loss": 0.0077, | |
| "num_tokens": 12156812.0, | |
| "reward": 5.93711394071579, | |
| "reward_std": 1.3493517027236521, | |
| "rewards/accuracy_reward": 0.419921875, | |
| "rewards/exec_out_all_reward": 0.859375, | |
| "rewards/exec_out_step_reward": 0.9683663547039032, | |
| "rewards/format_reward": 0.95703125, | |
| "rewards/keywords_iou_reward": 0.4109305152669549, | |
| "rewards/sql_step_keywords_recall_reward": 0.6507927812635899, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.201171875, | |
| "epoch": 0.47005649717514125, | |
| "grad_norm": 0.2009599655866623, | |
| "kl": 0.04203033447265625, | |
| "learning_rate": 2.843990661437004e-06, | |
| "loss": -0.0079, | |
| "num_tokens": 12656615.0, | |
| "reward": 6.319135099649429, | |
| "reward_std": 1.1828816812485456, | |
| "rewards/accuracy_reward": 0.490234375, | |
| "rewards/exec_out_all_reward": 0.87109375, | |
| "rewards/exec_out_step_reward": 0.9687747992575169, | |
| "rewards/format_reward": 0.966796875, | |
| "rewards/keywords_iou_reward": 0.44148961594328284, | |
| "rewards/sql_step_keywords_recall_reward": 0.668552921153605, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.240234375, | |
| "epoch": 0.47909604519774013, | |
| "grad_norm": 0.19552090764045715, | |
| "kl": 0.040004730224609375, | |
| "learning_rate": 2.8332531729823854e-06, | |
| "loss": 0.0091, | |
| "num_tokens": 13154062.0, | |
| "reward": 6.084091693162918, | |
| "reward_std": 1.3168746987357736, | |
| "rewards/accuracy_reward": 0.439453125, | |
| "rewards/exec_out_all_reward": 0.89453125, | |
| "rewards/exec_out_step_reward": 0.9761377759277821, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.41599453624803573, | |
| "rewards/sql_step_keywords_recall_reward": 0.6822148254141212, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.546875, | |
| "epoch": 0.488135593220339, | |
| "grad_norm": 0.18975664675235748, | |
| "kl": 0.041107177734375, | |
| "learning_rate": 2.822180045171373e-06, | |
| "loss": 0.0031, | |
| "num_tokens": 13654138.0, | |
| "reward": 6.5647883862257, | |
| "reward_std": 1.142817527987063, | |
| "rewards/accuracy_reward": 0.5390625, | |
| "rewards/exec_out_all_reward": 0.890625, | |
| "rewards/exec_out_step_reward": 0.972067216411233, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.45550795644521713, | |
| "rewards/sql_step_keywords_recall_reward": 0.6836584862321615, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.6171875, | |
| "epoch": 0.4971751412429379, | |
| "grad_norm": 0.19297046959400177, | |
| "kl": 0.04022216796875, | |
| "learning_rate": 2.8107740656046774e-06, | |
| "loss": 0.0018, | |
| "num_tokens": 14153762.0, | |
| "reward": 6.067966505885124, | |
| "reward_std": 1.339047422632575, | |
| "rewards/accuracy_reward": 0.419921875, | |
| "rewards/exec_out_all_reward": 0.87109375, | |
| "rewards/exec_out_step_reward": 0.9681291859596968, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.4687476740218699, | |
| "rewards/sql_step_keywords_recall_reward": 0.6701545566320419, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.33203125, | |
| "epoch": 0.5062146892655367, | |
| "grad_norm": 0.19569052755832672, | |
| "kl": 0.03861236572265625, | |
| "learning_rate": 2.7990381056766585e-06, | |
| "loss": -0.0018, | |
| "num_tokens": 14653404.0, | |
| "reward": 6.230767786502838, | |
| "reward_std": 1.3125396608375013, | |
| "rewards/accuracy_reward": 0.4609375, | |
| "rewards/exec_out_all_reward": 0.892578125, | |
| "rewards/exec_out_step_reward": 0.9760687928646803, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.44265242759138346, | |
| "rewards/sql_step_keywords_recall_reward": 0.681894151493907, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 191.53125, | |
| "epoch": 0.5152542372881356, | |
| "grad_norm": 0.19364245235919952, | |
| "kl": 0.0371551513671875, | |
| "learning_rate": 2.7869751198524656e-06, | |
| "loss": -0.0058, | |
| "num_tokens": 15157368.0, | |
| "reward": 6.011801972985268, | |
| "reward_std": 1.4972982537001371, | |
| "rewards/accuracy_reward": 0.435546875, | |
| "rewards/exec_out_all_reward": 0.841796875, | |
| "rewards/exec_out_step_reward": 0.9621403776109219, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.4371058586984873, | |
| "rewards/sql_step_keywords_recall_reward": 0.6559187090024352, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 189.95703125, | |
| "epoch": 0.5242937853107345, | |
| "grad_norm": 0.1767347753047943, | |
| "kl": 0.037456512451171875, | |
| "learning_rate": 2.7745881449242716e-06, | |
| "loss": 0.0095, | |
| "num_tokens": 15662582.0, | |
| "reward": 6.229088187217712, | |
| "reward_std": 1.2932423749007285, | |
| "rewards/accuracy_reward": 0.4921875, | |
| "rewards/exec_out_all_reward": 0.853515625, | |
| "rewards/exec_out_step_reward": 0.9643019940704107, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4135230230167508, | |
| "rewards/sql_step_keywords_recall_reward": 0.6760213691741228, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 191.84765625, | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.1982230544090271, | |
| "kl": 0.036502838134765625, | |
| "learning_rate": 2.761880299246772e-06, | |
| "loss": -0.0078, | |
| "num_tokens": 16165460.0, | |
| "reward": 6.063759118318558, | |
| "reward_std": 1.3337543765082955, | |
| "rewards/accuracy_reward": 0.439453125, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9730437770485878, | |
| "rewards/format_reward": 0.95703125, | |
| "rewards/keywords_iou_reward": 0.42756529804319143, | |
| "rewards/sql_step_keywords_recall_reward": 0.6379284737631679, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 191.296875, | |
| "epoch": 0.5423728813559322, | |
| "grad_norm": 0.19018128514289856, | |
| "kl": 0.036468505859375, | |
| "learning_rate": 2.748854781952157e-06, | |
| "loss": -0.008, | |
| "num_tokens": 16671384.0, | |
| "reward": 6.285549536347389, | |
| "reward_std": 1.3978888802230358, | |
| "rewards/accuracy_reward": 0.490234375, | |
| "rewards/exec_out_all_reward": 0.861328125, | |
| "rewards/exec_out_step_reward": 0.9668580982834101, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4441379075869918, | |
| "rewards/sql_step_keywords_recall_reward": 0.6686968319118023, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 203.625, | |
| "epoch": 0.5514124293785311, | |
| "grad_norm": 0.19416409730911255, | |
| "kl": 0.033016204833984375, | |
| "learning_rate": 2.735514872144749e-06, | |
| "loss": -0.008, | |
| "num_tokens": 17181944.0, | |
| "reward": 6.115775644779205, | |
| "reward_std": 1.6173988990485668, | |
| "rewards/accuracy_reward": 0.453125, | |
| "rewards/exec_out_all_reward": 0.873046875, | |
| "rewards/exec_out_step_reward": 0.9669309612363577, | |
| "rewards/format_reward": 0.953125, | |
| "rewards/keywords_iou_reward": 0.4201988475397229, | |
| "rewards/sql_step_keywords_recall_reward": 0.6697751097381115, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 197.470703125, | |
| "epoch": 0.56045197740113, | |
| "grad_norm": 0.19720803201198578, | |
| "kl": 0.033603668212890625, | |
| "learning_rate": 2.721863928075504e-06, | |
| "loss": 0.0067, | |
| "num_tokens": 17690761.0, | |
| "reward": 6.248985543847084, | |
| "reward_std": 1.3238589530810714, | |
| "rewards/accuracy_reward": 0.4609375, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9681136887520552, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.4740994907915592, | |
| "rewards/sql_step_keywords_recall_reward": 0.6705634454265237, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 202.150390625, | |
| "epoch": 0.5694915254237288, | |
| "grad_norm": 0.19422629475593567, | |
| "kl": 0.032680511474609375, | |
| "learning_rate": 2.707905386296588e-06, | |
| "loss": -0.0065, | |
| "num_tokens": 18198586.0, | |
| "reward": 6.26141269505024, | |
| "reward_std": 1.2987114731222391, | |
| "rewards/accuracy_reward": 0.451171875, | |
| "rewards/exec_out_all_reward": 0.884765625, | |
| "rewards/exec_out_step_reward": 0.9684066604822874, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.4992506830021739, | |
| "rewards/sql_step_keywords_recall_reward": 0.6577859437093139, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 206.173828125, | |
| "epoch": 0.5785310734463277, | |
| "grad_norm": 0.17673034965991974, | |
| "kl": 0.034145355224609375, | |
| "learning_rate": 2.6936427607962483e-06, | |
| "loss": 0.0066, | |
| "num_tokens": 18710943.0, | |
| "reward": 5.954583629965782, | |
| "reward_std": 1.3590196399018168, | |
| "rewards/accuracy_reward": 0.43359375, | |
| "rewards/exec_out_all_reward": 0.86328125, | |
| "rewards/exec_out_step_reward": 0.9674510210752487, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.3994289576075971, | |
| "rewards/sql_step_keywords_recall_reward": 0.6550715854391456, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 205.908203125, | |
| "epoch": 0.5875706214689266, | |
| "grad_norm": 0.1863545924425125, | |
| "kl": 0.035305023193359375, | |
| "learning_rate": 2.6790796421141813e-06, | |
| "loss": 0.0025, | |
| "num_tokens": 19222232.0, | |
| "reward": 6.260747715830803, | |
| "reward_std": 1.2453271835111082, | |
| "rewards/accuracy_reward": 0.48046875, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9697412867099047, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.4435248177032918, | |
| "rewards/sql_step_keywords_recall_reward": 0.6637223660945892, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 208.349609375, | |
| "epoch": 0.5966101694915255, | |
| "grad_norm": 0.18152064085006714, | |
| "kl": 0.034008026123046875, | |
| "learning_rate": 2.6642196964376354e-06, | |
| "loss": 0.005, | |
| "num_tokens": 19736371.0, | |
| "reward": 6.178658068180084, | |
| "reward_std": 1.2107095727697015, | |
| "rewards/accuracy_reward": 0.439453125, | |
| "rewards/exec_out_all_reward": 0.875, | |
| "rewards/exec_out_step_reward": 0.9720323383808136, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.47833117935806513, | |
| "rewards/sql_step_keywords_recall_reward": 0.6698852656409144, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 213.107421875, | |
| "epoch": 0.6056497175141243, | |
| "grad_norm": 0.28298619389533997, | |
| "kl": 0.06278610229492188, | |
| "learning_rate": 2.649066664678467e-06, | |
| "loss": -0.001, | |
| "num_tokens": 20249726.0, | |
| "reward": 6.54718804359436, | |
| "reward_std": 1.2923204032704234, | |
| "rewards/accuracy_reward": 0.5546875, | |
| "rewards/exec_out_all_reward": 0.888671875, | |
| "rewards/exec_out_step_reward": 0.9712689146399498, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.41723292297683656, | |
| "rewards/sql_step_keywords_recall_reward": 0.6906719226390123, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 216.087890625, | |
| "epoch": 0.6146892655367232, | |
| "grad_norm": 0.1797921508550644, | |
| "kl": 0.037776947021484375, | |
| "learning_rate": 2.6336243615313876e-06, | |
| "loss": -0.0023, | |
| "num_tokens": 20765263.0, | |
| "reward": 6.382973074913025, | |
| "reward_std": 1.2423311527818441, | |
| "rewards/accuracy_reward": 0.486328125, | |
| "rewards/exec_out_all_reward": 0.892578125, | |
| "rewards/exec_out_step_reward": 0.9681625198572874, | |
| "rewards/format_reward": 0.9296875, | |
| "rewards/keywords_iou_reward": 0.48494360502809286, | |
| "rewards/sql_step_keywords_recall_reward": 0.677345173433423, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 218.08203125, | |
| "epoch": 0.6237288135593221, | |
| "grad_norm": 0.171707421541214, | |
| "kl": 0.03691864013671875, | |
| "learning_rate": 2.6178966745136323e-06, | |
| "loss": -0.0042, | |
| "num_tokens": 21284597.0, | |
| "reward": 6.1321365386247635, | |
| "reward_std": 1.2671317560598254, | |
| "rewards/accuracy_reward": 0.45703125, | |
| "rewards/exec_out_all_reward": 0.849609375, | |
| "rewards/exec_out_step_reward": 0.9618140794336796, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.43983029294759035, | |
| "rewards/sql_step_keywords_recall_reward": 0.6695681791752577, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 215.431640625, | |
| "epoch": 0.632768361581921, | |
| "grad_norm": 0.18202371895313263, | |
| "kl": 0.0379486083984375, | |
| "learning_rate": 2.6018875629862996e-06, | |
| "loss": -0.0007, | |
| "num_tokens": 21802886.0, | |
| "reward": 6.155725434422493, | |
| "reward_std": 1.3498500874266028, | |
| "rewards/accuracy_reward": 0.447265625, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9724082369357347, | |
| "rewards/format_reward": 0.927734375, | |
| "rewards/keywords_iou_reward": 0.45339376712217927, | |
| "rewards/sql_step_keywords_recall_reward": 0.673013923689723, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 222.77734375, | |
| "epoch": 0.6418079096045197, | |
| "grad_norm": 0.1931043118238449, | |
| "kl": 0.04012298583984375, | |
| "learning_rate": 2.585601057157605e-06, | |
| "loss": -0.0014, | |
| "num_tokens": 22324500.0, | |
| "reward": 6.071022488176823, | |
| "reward_std": 1.363126328913495, | |
| "rewards/accuracy_reward": 0.458984375, | |
| "rewards/exec_out_all_reward": 0.845703125, | |
| "rewards/exec_out_step_reward": 0.9601764027029276, | |
| "rewards/format_reward": 0.91015625, | |
| "rewards/keywords_iou_reward": 0.42694294080138206, | |
| "rewards/sql_step_keywords_recall_reward": 0.6651632944121957, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 216.185546875, | |
| "epoch": 0.6508474576271186, | |
| "grad_norm": 0.18399913609027863, | |
| "kl": 0.03882598876953125, | |
| "learning_rate": 2.5690412570682945e-06, | |
| "loss": -0.0003, | |
| "num_tokens": 22841407.0, | |
| "reward": 6.496973499655724, | |
| "reward_std": 1.1896542916074395, | |
| "rewards/accuracy_reward": 0.525390625, | |
| "rewards/exec_out_all_reward": 0.87109375, | |
| "rewards/exec_out_step_reward": 0.9670332632958889, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.45264067919924855, | |
| "rewards/sql_step_keywords_recall_reward": 0.7105963062494993, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 216.98828125, | |
| "epoch": 0.6598870056497175, | |
| "grad_norm": 0.17450737953186035, | |
| "kl": 0.037841796875, | |
| "learning_rate": 2.552212331559482e-06, | |
| "loss": -0.0076, | |
| "num_tokens": 23356665.0, | |
| "reward": 6.324795305728912, | |
| "reward_std": 1.2927344804629683, | |
| "rewards/accuracy_reward": 0.482421875, | |
| "rewards/exec_out_all_reward": 0.916015625, | |
| "rewards/exec_out_step_reward": 0.9763090629130602, | |
| "rewards/format_reward": 0.927734375, | |
| "rewards/keywords_iou_reward": 0.4405778916552663, | |
| "rewards/sql_step_keywords_recall_reward": 0.6938929669559002, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 222.453125, | |
| "epoch": 0.6689265536723163, | |
| "grad_norm": 0.18439583480358124, | |
| "kl": 0.03949737548828125, | |
| "learning_rate": 2.535118517223168e-06, | |
| "loss": 0.0112, | |
| "num_tokens": 23875141.0, | |
| "reward": 6.2983558177948, | |
| "reward_std": 1.3202420324087143, | |
| "rewards/accuracy_reward": 0.494140625, | |
| "rewards/exec_out_all_reward": 0.880859375, | |
| "rewards/exec_out_step_reward": 0.9673696402460337, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.41860854998230934, | |
| "rewards/sql_step_keywords_recall_reward": 0.7047065645456314, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 217.3203125, | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 0.17797358334064484, | |
| "kl": 0.037334442138671875, | |
| "learning_rate": 2.5177641173356982e-06, | |
| "loss": 0.0013, | |
| "num_tokens": 24391073.0, | |
| "reward": 6.193948924541473, | |
| "reward_std": 1.4235245073214173, | |
| "rewards/accuracy_reward": 0.466796875, | |
| "rewards/exec_out_all_reward": 0.8984375, | |
| "rewards/exec_out_step_reward": 0.9762230291962624, | |
| "rewards/format_reward": 0.91796875, | |
| "rewards/keywords_iou_reward": 0.42891340190544724, | |
| "rewards/sql_step_keywords_recall_reward": 0.6763053219765425, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 222.564453125, | |
| "epoch": 0.6870056497175141, | |
| "grad_norm": 0.17648960649967194, | |
| "kl": 0.03826904296875, | |
| "learning_rate": 2.5001535007744377e-06, | |
| "loss": 0.0017, | |
| "num_tokens": 24910594.0, | |
| "reward": 6.196133196353912, | |
| "reward_std": 1.3525635278783739, | |
| "rewards/accuracy_reward": 0.4609375, | |
| "rewards/exec_out_all_reward": 0.8671875, | |
| "rewards/exec_out_step_reward": 0.9637571293860674, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.4552514897659421, | |
| "rewards/sql_step_keywords_recall_reward": 0.677341865375638, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 217.21875, | |
| "epoch": 0.696045197740113, | |
| "grad_norm": 0.1959611028432846, | |
| "kl": 0.042377471923828125, | |
| "learning_rate": 2.4822911009179277e-06, | |
| "loss": 0.0062, | |
| "num_tokens": 25428242.0, | |
| "reward": 6.28637857735157, | |
| "reward_std": 1.4062156137079, | |
| "rewards/accuracy_reward": 0.466796875, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9695289265364408, | |
| "rewards/format_reward": 0.91796875, | |
| "rewards/keywords_iou_reward": 0.4715513661503792, | |
| "rewards/sql_step_keywords_recall_reward": 0.7116375369951129, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 218.865234375, | |
| "epoch": 0.7050847457627119, | |
| "grad_norm": 0.1893206089735031, | |
| "kl": 0.03656005859375, | |
| "learning_rate": 2.464181414529809e-06, | |
| "loss": 0.0047, | |
| "num_tokens": 25947605.0, | |
| "reward": 5.8394907265901566, | |
| "reward_std": 1.3573181126266718, | |
| "rewards/accuracy_reward": 0.390625, | |
| "rewards/exec_out_all_reward": 0.84375, | |
| "rewards/exec_out_step_reward": 0.9585681743919849, | |
| "rewards/format_reward": 0.92578125, | |
| "rewards/keywords_iou_reward": 0.4459369848482311, | |
| "rewards/sql_step_keywords_recall_reward": 0.6570173809304833, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 212.64453125, | |
| "epoch": 0.7141242937853107, | |
| "grad_norm": 0.17911891639232635, | |
| "kl": 0.037021636962890625, | |
| "learning_rate": 2.4458290006267837e-06, | |
| "loss": -0.0001, | |
| "num_tokens": 26462715.0, | |
| "reward": 6.297634035348892, | |
| "reward_std": 1.2602604366838932, | |
| "rewards/accuracy_reward": 0.4609375, | |
| "rewards/exec_out_all_reward": 0.8671875, | |
| "rewards/exec_out_step_reward": 0.9640160016715527, | |
| "rewards/format_reward": 0.91796875, | |
| "rewards/keywords_iou_reward": 0.4979046704247594, | |
| "rewards/sql_step_keywords_recall_reward": 0.7089024959132075, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 210.369140625, | |
| "epoch": 0.7231638418079096, | |
| "grad_norm": 0.18120358884334564, | |
| "kl": 0.035877227783203125, | |
| "learning_rate": 2.427238479330908e-06, | |
| "loss": 0.0027, | |
| "num_tokens": 26975792.0, | |
| "reward": 6.11888575553894, | |
| "reward_std": 1.5019584177061915, | |
| "rewards/accuracy_reward": 0.431640625, | |
| "rewards/exec_out_all_reward": 0.892578125, | |
| "rewards/exec_out_step_reward": 0.9738637823611498, | |
| "rewards/format_reward": 0.90234375, | |
| "rewards/keywords_iou_reward": 0.4661337183788419, | |
| "rewards/sql_step_keywords_recall_reward": 0.6912701558321714, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 210.333984375, | |
| "epoch": 0.7322033898305085, | |
| "grad_norm": 0.18645890057086945, | |
| "kl": 0.03778076171875, | |
| "learning_rate": 2.4084145307065e-06, | |
| "loss": 0.0031, | |
| "num_tokens": 27488767.0, | |
| "reward": 6.057440027594566, | |
| "reward_std": 1.336686883121729, | |
| "rewards/accuracy_reward": 0.41796875, | |
| "rewards/exec_out_all_reward": 0.8671875, | |
| "rewards/exec_out_step_reward": 0.9673099610954523, | |
| "rewards/format_reward": 0.90625, | |
| "rewards/keywords_iou_reward": 0.49649542290717363, | |
| "rewards/sql_step_keywords_recall_reward": 0.6518266946077347, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 206.6796875, | |
| "epoch": 0.7412429378531074, | |
| "grad_norm": 0.18546722829341888, | |
| "kl": 0.035541534423828125, | |
| "learning_rate": 2.389361893581961e-06, | |
| "loss": -0.0067, | |
| "num_tokens": 28001731.0, | |
| "reward": 6.299026131629944, | |
| "reward_std": 1.3064639195799828, | |
| "rewards/accuracy_reward": 0.490234375, | |
| "rewards/exec_out_all_reward": 0.892578125, | |
| "rewards/exec_out_step_reward": 0.9757549054920673, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.4211979394312948, | |
| "rewards/sql_step_keywords_recall_reward": 0.6957191359251738, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 205.423828125, | |
| "epoch": 0.7502824858757062, | |
| "grad_norm": 0.18241924047470093, | |
| "kl": 0.03655242919921875, | |
| "learning_rate": 2.3700853643567976e-06, | |
| "loss": 0.0047, | |
| "num_tokens": 28512732.0, | |
| "reward": 6.236706480383873, | |
| "reward_std": 1.2026822408661246, | |
| "rewards/accuracy_reward": 0.49609375, | |
| "rewards/exec_out_all_reward": 0.861328125, | |
| "rewards/exec_out_step_reward": 0.9606026802212, | |
| "rewards/format_reward": 0.904296875, | |
| "rewards/keywords_iou_reward": 0.43541459972038865, | |
| "rewards/sql_step_keywords_recall_reward": 0.6552745532244444, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 201.544921875, | |
| "epoch": 0.7593220338983051, | |
| "grad_norm": 0.19208958745002747, | |
| "kl": 0.0334625244140625, | |
| "learning_rate": 2.350589795794156e-06, | |
| "loss": -0.0085, | |
| "num_tokens": 29021143.0, | |
| "reward": 6.086180254817009, | |
| "reward_std": 1.245661067776382, | |
| "rewards/accuracy_reward": 0.439453125, | |
| "rewards/exec_out_all_reward": 0.869140625, | |
| "rewards/exec_out_step_reward": 0.9663248769938946, | |
| "rewards/format_reward": 0.900390625, | |
| "rewards/keywords_iou_reward": 0.45154744386672974, | |
| "rewards/sql_step_keywords_recall_reward": 0.6894168108701706, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 201.060546875, | |
| "epoch": 0.768361581920904, | |
| "grad_norm": 0.1888093501329422, | |
| "kl": 0.03191375732421875, | |
| "learning_rate": 2.3308800957991657e-06, | |
| "loss": 0.0122, | |
| "num_tokens": 29529626.0, | |
| "reward": 6.194907002151012, | |
| "reward_std": 1.2625772105529904, | |
| "rewards/accuracy_reward": 0.470703125, | |
| "rewards/exec_out_all_reward": 0.85546875, | |
| "rewards/exec_out_step_reward": 0.9580496698617935, | |
| "rewards/format_reward": 0.9140625, | |
| "rewards/keywords_iou_reward": 0.45335739478468895, | |
| "rewards/sql_step_keywords_recall_reward": 0.6777987945824862, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 195.92578125, | |
| "epoch": 0.7774011299435029, | |
| "grad_norm": 0.18627431988716125, | |
| "kl": 0.031524658203125, | |
| "learning_rate": 2.3109612261833968e-06, | |
| "loss": -0.0039, | |
| "num_tokens": 30036392.0, | |
| "reward": 6.356723390519619, | |
| "reward_std": 1.5176555626094341, | |
| "rewards/accuracy_reward": 0.474609375, | |
| "rewards/exec_out_all_reward": 0.869140625, | |
| "rewards/exec_out_step_reward": 0.9662566669285297, | |
| "rewards/format_reward": 0.919921875, | |
| "rewards/keywords_iou_reward": 0.4986234325915575, | |
| "rewards/sql_step_keywords_recall_reward": 0.7057198826223612, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 191.9296875, | |
| "epoch": 0.7864406779661017, | |
| "grad_norm": 0.19546450674533844, | |
| "kl": 0.031040191650390625, | |
| "learning_rate": 2.2908382014157536e-06, | |
| "loss": 0.0014, | |
| "num_tokens": 30540172.0, | |
| "reward": 6.007154896855354, | |
| "reward_std": 1.4377120230346918, | |
| "rewards/accuracy_reward": 0.44140625, | |
| "rewards/exec_out_all_reward": 0.85546875, | |
| "rewards/exec_out_step_reward": 0.9677215088158846, | |
| "rewards/format_reward": 0.908203125, | |
| "rewards/keywords_iou_reward": 0.4238502769730985, | |
| "rewards/sql_step_keywords_recall_reward": 0.6624359153211117, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 191.09765625, | |
| "epoch": 0.7954802259887006, | |
| "grad_norm": 0.19553017616271973, | |
| "kl": 0.0312347412109375, | |
| "learning_rate": 2.27051608736011e-06, | |
| "loss": -0.0024, | |
| "num_tokens": 31042954.0, | |
| "reward": 6.573052808642387, | |
| "reward_std": 1.3831378351897001, | |
| "rewards/accuracy_reward": 0.52734375, | |
| "rewards/exec_out_all_reward": 0.873046875, | |
| "rewards/exec_out_step_reward": 0.9695428721606731, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4803972856607288, | |
| "rewards/sql_step_keywords_recall_reward": 0.7208402901887894, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 193.73828125, | |
| "epoch": 0.8045197740112995, | |
| "grad_norm": 0.19286681711673737, | |
| "kl": 0.031497955322265625, | |
| "learning_rate": 2.25e-06, | |
| "loss": 0.0024, | |
| "num_tokens": 31547488.0, | |
| "reward": 6.184370666742325, | |
| "reward_std": 1.2657314036041498, | |
| "rewards/accuracy_reward": 0.451171875, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9704876635223627, | |
| "rewards/format_reward": 0.9140625, | |
| "rewards/keywords_iou_reward": 0.4544413227122277, | |
| "rewards/sql_step_keywords_recall_reward": 0.7034378284588456, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 187.453125, | |
| "epoch": 0.8135593220338984, | |
| "grad_norm": 0.19443422555923462, | |
| "kl": 0.030490875244140625, | |
| "learning_rate": 2.229295104150703e-06, | |
| "loss": -0.0012, | |
| "num_tokens": 32049256.0, | |
| "reward": 6.299180343747139, | |
| "reward_std": 1.3650079052895308, | |
| "rewards/accuracy_reward": 0.494140625, | |
| "rewards/exec_out_all_reward": 0.859375, | |
| "rewards/exec_out_step_reward": 0.9578698594123125, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.4481339924968779, | |
| "rewards/sql_step_keywords_recall_reward": 0.6755113024264574, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.705078125, | |
| "epoch": 0.8225988700564971, | |
| "grad_norm": 0.19431209564208984, | |
| "kl": 0.03226470947265625, | |
| "learning_rate": 2.2084066121590242e-06, | |
| "loss": 0.0028, | |
| "num_tokens": 32550041.0, | |
| "reward": 6.25195187330246, | |
| "reward_std": 1.1578238443471491, | |
| "rewards/accuracy_reward": 0.484375, | |
| "rewards/exec_out_all_reward": 0.859375, | |
| "rewards/exec_out_step_reward": 0.958809994161129, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.4414861728437245, | |
| "rewards/sql_step_keywords_recall_reward": 0.6699351165443659, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.091796875, | |
| "epoch": 0.831638418079096, | |
| "grad_norm": 0.1891166865825653, | |
| "kl": 0.031597137451171875, | |
| "learning_rate": 2.187339782591116e-06, | |
| "loss": -0.0113, | |
| "num_tokens": 33048284.0, | |
| "reward": 6.556719660758972, | |
| "reward_std": 1.3454436883330345, | |
| "rewards/accuracy_reward": 0.529296875, | |
| "rewards/exec_out_all_reward": 0.875, | |
| "rewards/exec_out_step_reward": 0.9694142211228609, | |
| "rewards/format_reward": 0.94921875, | |
| "rewards/keywords_iou_reward": 0.4681888446211815, | |
| "rewards/sql_step_keywords_recall_reward": 0.7095215003937483, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 189.263671875, | |
| "epoch": 0.8406779661016949, | |
| "grad_norm": 0.19213782250881195, | |
| "kl": 0.032009124755859375, | |
| "learning_rate": 2.166099918908661e-06, | |
| "loss": 0.0056, | |
| "num_tokens": 33552367.0, | |
| "reward": 6.123005196452141, | |
| "reward_std": 1.2814611946232617, | |
| "rewards/accuracy_reward": 0.439453125, | |
| "rewards/exec_out_all_reward": 0.849609375, | |
| "rewards/exec_out_step_reward": 0.9598136860877275, | |
| "rewards/format_reward": 0.955078125, | |
| "rewards/keywords_iou_reward": 0.4669042509049177, | |
| "rewards/sql_step_keywords_recall_reward": 0.6668829349800944, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 187.416015625, | |
| "epoch": 0.8497175141242937, | |
| "grad_norm": 0.19189083576202393, | |
| "kl": 0.033023834228515625, | |
| "learning_rate": 2.1446923681337578e-06, | |
| "loss": 0.0026, | |
| "num_tokens": 34052664.0, | |
| "reward": 6.199526712298393, | |
| "reward_std": 1.2074398496188223, | |
| "rewards/accuracy_reward": 0.4375, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9721904434263706, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.4780406136997044, | |
| "rewards/sql_step_keywords_recall_reward": 0.6911769825965166, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.875, | |
| "epoch": 0.8587570621468926, | |
| "grad_norm": 0.20052167773246765, | |
| "kl": 0.031097412109375, | |
| "learning_rate": 2.1231225195028298e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 34549540.0, | |
| "reward": 6.093083538115025, | |
| "reward_std": 1.3363224570639431, | |
| "rewards/accuracy_reward": 0.421875, | |
| "rewards/exec_out_all_reward": 0.86328125, | |
| "rewards/exec_out_step_reward": 0.9643709696829319, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.45903572149109095, | |
| "rewards/sql_step_keywords_recall_reward": 0.7145474180579185, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 178.525390625, | |
| "epoch": 0.8677966101694915, | |
| "grad_norm": 0.19684137403964996, | |
| "kl": 0.033634185791015625, | |
| "learning_rate": 2.1013958031099208e-06, | |
| "loss": 0.0089, | |
| "num_tokens": 35046073.0, | |
| "reward": 6.270583778619766, | |
| "reward_std": 1.2627136316150427, | |
| "rewards/accuracy_reward": 0.501953125, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9700528588145971, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.40578509494662285, | |
| "rewards/sql_step_keywords_recall_reward": 0.6627888614311814, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.716796875, | |
| "epoch": 0.8768361581920904, | |
| "grad_norm": 0.19163627922534943, | |
| "kl": 0.0335235595703125, | |
| "learning_rate": 2.079517688539693e-06, | |
| "loss": 0.0145, | |
| "num_tokens": 35545980.0, | |
| "reward": 6.588066384196281, | |
| "reward_std": 1.1638533752411604, | |
| "rewards/accuracy_reward": 0.546875, | |
| "rewards/exec_out_all_reward": 0.888671875, | |
| "rewards/exec_out_step_reward": 0.9688662607222795, | |
| "rewards/format_reward": 0.958984375, | |
| "rewards/keywords_iou_reward": 0.4430620293132961, | |
| "rewards/sql_step_keywords_recall_reward": 0.6979197897017002, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.3359375, | |
| "epoch": 0.8858757062146893, | |
| "grad_norm": 0.20076608657836914, | |
| "kl": 0.03387451171875, | |
| "learning_rate": 2.0574936834904912e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 36044740.0, | |
| "reward": 6.348625332117081, | |
| "reward_std": 1.502235893625766, | |
| "rewards/accuracy_reward": 0.478515625, | |
| "rewards/exec_out_all_reward": 0.869140625, | |
| "rewards/exec_out_step_reward": 0.9674339685589075, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4792054174467921, | |
| "rewards/sql_step_keywords_recall_reward": 0.7001243568956852, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.564453125, | |
| "epoch": 0.8949152542372881, | |
| "grad_norm": 0.1972309947013855, | |
| "kl": 0.036647796630859375, | |
| "learning_rate": 2.0353293323878076e-06, | |
| "loss": -0.001, | |
| "num_tokens": 36544293.0, | |
| "reward": 5.943858131766319, | |
| "reward_std": 1.3974212240427732, | |
| "rewards/accuracy_reward": 0.431640625, | |
| "rewards/exec_out_all_reward": 0.841796875, | |
| "rewards/exec_out_step_reward": 0.9585201255977154, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.39970124512910843, | |
| "rewards/sql_step_keywords_recall_reward": 0.6703105177730322, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.31640625, | |
| "epoch": 0.903954802259887, | |
| "grad_norm": 0.1914178431034088, | |
| "kl": 0.035003662109375, | |
| "learning_rate": 2.0130302149885033e-06, | |
| "loss": 0.008, | |
| "num_tokens": 37040831.0, | |
| "reward": 6.4351161271333694, | |
| "reward_std": 1.3373866842593998, | |
| "rewards/accuracy_reward": 0.482421875, | |
| "rewards/exec_out_all_reward": 0.90234375, | |
| "rewards/exec_out_step_reward": 0.9746520053595304, | |
| "rewards/format_reward": 0.953125, | |
| "rewards/keywords_iou_reward": 0.481153879314661, | |
| "rewards/sql_step_keywords_recall_reward": 0.713000101968646, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.529296875, | |
| "epoch": 0.9129943502824859, | |
| "grad_norm": 0.19319666922092438, | |
| "kl": 0.03475189208984375, | |
| "learning_rate": 1.990601944976133e-06, | |
| "loss": 0.0012, | |
| "num_tokens": 37538390.0, | |
| "reward": 6.169027402997017, | |
| "reward_std": 1.2931091291829944, | |
| "rewards/accuracy_reward": 0.423828125, | |
| "rewards/exec_out_all_reward": 0.916015625, | |
| "rewards/exec_out_step_reward": 0.980399776250124, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.4757719023618847, | |
| "rewards/sql_step_keywords_recall_reward": 0.6804432403296232, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.212890625, | |
| "epoch": 0.9220338983050848, | |
| "grad_norm": 0.1960325837135315, | |
| "kl": 0.03401947021484375, | |
| "learning_rate": 1.9680501685477304e-06, | |
| "loss": 0.0151, | |
| "num_tokens": 38036931.0, | |
| "reward": 6.41617426276207, | |
| "reward_std": 1.365414334461093, | |
| "rewards/accuracy_reward": 0.5, | |
| "rewards/exec_out_all_reward": 0.85546875, | |
| "rewards/exec_out_step_reward": 0.96528010815382, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.4880142016336322, | |
| "rewards/sql_step_keywords_recall_reward": 0.6760376645252109, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 177.46484375, | |
| "epoch": 0.9310734463276836, | |
| "grad_norm": 0.1870131492614746, | |
| "kl": 0.035228729248046875, | |
| "learning_rate": 1.9453805629924126e-06, | |
| "loss": -0.0004, | |
| "num_tokens": 38533177.0, | |
| "reward": 6.086783587932587, | |
| "reward_std": 1.2497869406361133, | |
| "rewards/accuracy_reward": 0.44921875, | |
| "rewards/exec_out_all_reward": 0.880859375, | |
| "rewards/exec_out_step_reward": 0.968900365754962, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.4101551335770637, | |
| "rewards/sql_step_keywords_recall_reward": 0.6823387397453189, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 178.97265625, | |
| "epoch": 0.9401129943502825, | |
| "grad_norm": 0.20031407475471497, | |
| "kl": 0.035129547119140625, | |
| "learning_rate": 1.9225988352621446e-06, | |
| "loss": -0.0078, | |
| "num_tokens": 39029707.0, | |
| "reward": 6.0163338631391525, | |
| "reward_std": 1.0426889704540372, | |
| "rewards/accuracy_reward": 0.439453125, | |
| "rewards/exec_out_all_reward": 0.83203125, | |
| "rewards/exec_out_step_reward": 0.959713701158762, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.4327788045629859, | |
| "rewards/sql_step_keywords_recall_reward": 0.6695781610906124, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.3046875, | |
| "epoch": 0.9491525423728814, | |
| "grad_norm": 0.19868069887161255, | |
| "kl": 0.0337371826171875, | |
| "learning_rate": 1.8997107205350524e-06, | |
| "loss": 0.0245, | |
| "num_tokens": 39526947.0, | |
| "reward": 6.0550860315561295, | |
| "reward_std": 1.201542696915567, | |
| "rewards/accuracy_reward": 0.416015625, | |
| "rewards/exec_out_all_reward": 0.84765625, | |
| "rewards/exec_out_step_reward": 0.9624209459871054, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.48174334689974785, | |
| "rewards/sql_step_keywords_recall_reward": 0.6760533768683672, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 175.94140625, | |
| "epoch": 0.9581920903954803, | |
| "grad_norm": 0.19075711071491241, | |
| "kl": 0.035160064697265625, | |
| "learning_rate": 1.8767219807716187e-06, | |
| "loss": 0.0152, | |
| "num_tokens": 40023281.0, | |
| "reward": 6.078598067164421, | |
| "reward_std": 1.1788357459008694, | |
| "rewards/accuracy_reward": 0.4140625, | |
| "rewards/exec_out_all_reward": 0.880859375, | |
| "rewards/exec_out_step_reward": 0.9745574481785297, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.4620191561989486, | |
| "rewards/sql_step_keywords_recall_reward": 0.6917209886014462, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 174.169921875, | |
| "epoch": 0.9672316384180791, | |
| "grad_norm": 0.19707335531711578, | |
| "kl": 0.036041259765625, | |
| "learning_rate": 1.853638403264141e-06, | |
| "loss": 0.0039, | |
| "num_tokens": 40516220.0, | |
| "reward": 6.2247384339571, | |
| "reward_std": 1.1692724945023656, | |
| "rewards/accuracy_reward": 0.46875, | |
| "rewards/exec_out_all_reward": 0.87109375, | |
| "rewards/exec_out_step_reward": 0.9709844719618559, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.43461011815816164, | |
| "rewards/sql_step_keywords_recall_reward": 0.6970336530357599, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.353515625, | |
| "epoch": 0.976271186440678, | |
| "grad_norm": 0.19845238327980042, | |
| "kl": 0.036739349365234375, | |
| "learning_rate": 1.8304657991798111e-06, | |
| "loss": 0.0253, | |
| "num_tokens": 41014509.0, | |
| "reward": 6.089982569217682, | |
| "reward_std": 1.1940758088603616, | |
| "rewards/accuracy_reward": 0.4140625, | |
| "rewards/exec_out_all_reward": 0.880859375, | |
| "rewards/exec_out_step_reward": 0.9712944850325584, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.4622031897306442, | |
| "rewards/sql_step_keywords_recall_reward": 0.7060004426166415, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.298828125, | |
| "epoch": 0.9853107344632769, | |
| "grad_norm": 0.20742465555667877, | |
| "kl": 0.03838348388671875, | |
| "learning_rate": 1.8072100020977862e-06, | |
| "loss": 0.0088, | |
| "num_tokens": 41514946.0, | |
| "reward": 5.96857476234436, | |
| "reward_std": 1.265411582775414, | |
| "rewards/accuracy_reward": 0.39453125, | |
| "rewards/exec_out_all_reward": 0.86328125, | |
| "rewards/exec_out_step_reward": 0.9691251274198294, | |
| "rewards/format_reward": 0.953125, | |
| "rewards/keywords_iou_reward": 0.47014313703402877, | |
| "rewards/sql_step_keywords_recall_reward": 0.6646321276202798, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 174.021484375, | |
| "epoch": 0.9943502824858758, | |
| "grad_norm": 0.19735410809516907, | |
| "kl": 0.03719329833984375, | |
| "learning_rate": 1.7838768665406153e-06, | |
| "loss": -0.0014, | |
| "num_tokens": 42009789.0, | |
| "reward": 6.292138174176216, | |
| "reward_std": 1.134878752520308, | |
| "rewards/accuracy_reward": 0.474609375, | |
| "rewards/exec_out_all_reward": 0.890625, | |
| "rewards/exec_out_step_reward": 0.9741862006485462, | |
| "rewards/format_reward": 0.962890625, | |
| "rewards/keywords_iou_reward": 0.44052915135398507, | |
| "rewards/sql_step_keywords_recall_reward": 0.6849405262619257, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 174.87109375, | |
| "epoch": 1.0090395480225989, | |
| "grad_norm": 0.20000207424163818, | |
| "kl": 0.04029083251953125, | |
| "learning_rate": 1.7604722665003958e-06, | |
| "loss": 0.0104, | |
| "num_tokens": 42504659.0, | |
| "reward": 6.249677374958992, | |
| "reward_std": 1.4170940481126308, | |
| "rewards/accuracy_reward": 0.4375, | |
| "rewards/exec_out_all_reward": 0.884765625, | |
| "rewards/exec_out_step_reward": 0.971117002889514, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.49388469848781824, | |
| "rewards/sql_step_keywords_recall_reward": 0.7165721878409386, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 174.515625, | |
| "epoch": 1.0180790960451978, | |
| "grad_norm": 0.1991143524646759, | |
| "kl": 0.038921356201171875, | |
| "learning_rate": 1.737002093960025e-06, | |
| "loss": 0.0071, | |
| "num_tokens": 43000247.0, | |
| "reward": 6.2897831201553345, | |
| "reward_std": 1.209823683835566, | |
| "rewards/accuracy_reward": 0.44921875, | |
| "rewards/exec_out_all_reward": 0.900390625, | |
| "rewards/exec_out_step_reward": 0.9761315789073706, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.48860851069912314, | |
| "rewards/sql_step_keywords_recall_reward": 0.699715806171298, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 176.083984375, | |
| "epoch": 1.0271186440677966, | |
| "grad_norm": 0.20260195434093475, | |
| "kl": 0.037258148193359375, | |
| "learning_rate": 1.713472257409928e-06, | |
| "loss": -0.0071, | |
| "num_tokens": 43496746.0, | |
| "reward": 6.175719887018204, | |
| "reward_std": 1.3017593873664737, | |
| "rewards/accuracy_reward": 0.453125, | |
| "rewards/exec_out_all_reward": 0.86328125, | |
| "rewards/exec_out_step_reward": 0.9692282117903233, | |
| "rewards/format_reward": 0.94921875, | |
| "rewards/keywords_iou_reward": 0.4611970195546746, | |
| "rewards/sql_step_keywords_recall_reward": 0.6590976314619184, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 171.19921875, | |
| "epoch": 1.0361581920903955, | |
| "grad_norm": 0.20994015038013458, | |
| "kl": 0.038059234619140625, | |
| "learning_rate": 1.689888680360624e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 43989932.0, | |
| "reward": 6.245173625648022, | |
| "reward_std": 1.1906684855930507, | |
| "rewards/accuracy_reward": 0.447265625, | |
| "rewards/exec_out_all_reward": 0.904296875, | |
| "rewards/exec_out_step_reward": 0.9748775381594896, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.4764430886134505, | |
| "rewards/sql_step_keywords_recall_reward": 0.6787380147725344, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 177.447265625, | |
| "epoch": 1.0451977401129944, | |
| "grad_norm": 0.22471484541893005, | |
| "kl": 0.039340972900390625, | |
| "learning_rate": 1.6662572998515165e-06, | |
| "loss": 0.0046, | |
| "num_tokens": 44485501.0, | |
| "reward": 6.439530774950981, | |
| "reward_std": 1.2442573299631476, | |
| "rewards/accuracy_reward": 0.5, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9739366378635168, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.46715445443987846, | |
| "rewards/sql_step_keywords_recall_reward": 0.6933945845812559, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.21484375, | |
| "epoch": 1.0542372881355933, | |
| "grad_norm": 0.20036683976650238, | |
| "kl": 0.03893280029296875, | |
| "learning_rate": 1.6425840649562737e-06, | |
| "loss": 0.0051, | |
| "num_tokens": 44984123.0, | |
| "reward": 6.33234478533268, | |
| "reward_std": 1.2393232183530927, | |
| "rewards/accuracy_reward": 0.48046875, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9720695428550243, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.45927969785407186, | |
| "rewards/sql_step_keywords_recall_reward": 0.6917158551514149, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.154296875, | |
| "epoch": 1.0632768361581921, | |
| "grad_norm": 0.19345538318157196, | |
| "kl": 0.03975677490234375, | |
| "learning_rate": 1.6188749352851825e-06, | |
| "loss": 0.0073, | |
| "num_tokens": 45483218.0, | |
| "reward": 6.538501590490341, | |
| "reward_std": 1.1121491650119424, | |
| "rewards/accuracy_reward": 0.501953125, | |
| "rewards/exec_out_all_reward": 0.865234375, | |
| "rewards/exec_out_step_reward": 0.9685004372149706, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.5210402370430529, | |
| "rewards/sql_step_keywords_recall_reward": 0.7037018835544586, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.533203125, | |
| "epoch": 1.072316384180791, | |
| "grad_norm": 0.20600463449954987, | |
| "kl": 0.036891937255859375, | |
| "learning_rate": 1.5951358794848467e-06, | |
| "loss": -0.002, | |
| "num_tokens": 45981975.0, | |
| "reward": 6.367153495550156, | |
| "reward_std": 1.4412866719067097, | |
| "rewards/accuracy_reward": 0.482421875, | |
| "rewards/exec_out_all_reward": 0.87109375, | |
| "rewards/exec_out_step_reward": 0.9675254262983799, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.48966031754389405, | |
| "rewards/sql_step_keywords_recall_reward": 0.6722605032846332, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.583984375, | |
| "epoch": 1.0813559322033899, | |
| "grad_norm": 0.1875942498445511, | |
| "kl": 0.036533355712890625, | |
| "learning_rate": 1.5713728737356139e-06, | |
| "loss": -0.013, | |
| "num_tokens": 46481262.0, | |
| "reward": 5.682912960648537, | |
| "reward_std": 1.2121786596253514, | |
| "rewards/accuracy_reward": 0.36328125, | |
| "rewards/exec_out_all_reward": 0.8359375, | |
| "rewards/exec_out_step_reward": 0.9655343275517225, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.41401433339342475, | |
| "rewards/sql_step_keywords_recall_reward": 0.6491155764088035, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.494140625, | |
| "epoch": 1.0903954802259888, | |
| "grad_norm": 0.2242497056722641, | |
| "kl": 0.041225433349609375, | |
| "learning_rate": 1.5475919002471018e-06, | |
| "loss": 0.0018, | |
| "num_tokens": 46983563.0, | |
| "reward": 6.413750275969505, | |
| "reward_std": 1.4098568577319384, | |
| "rewards/accuracy_reward": 0.505859375, | |
| "rewards/exec_out_all_reward": 0.875, | |
| "rewards/exec_out_step_reward": 0.9675168935209513, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.45125941652804613, | |
| "rewards/sql_step_keywords_recall_reward": 0.7019176911562681, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.625, | |
| "epoch": 1.0994350282485876, | |
| "grad_norm": 0.1926642209291458, | |
| "kl": 0.035884857177734375, | |
| "learning_rate": 1.523798945752212e-06, | |
| "loss": 0.0016, | |
| "num_tokens": 47480911.0, | |
| "reward": 6.660622417926788, | |
| "reward_std": 1.1989344246685505, | |
| "rewards/accuracy_reward": 0.52734375, | |
| "rewards/exec_out_all_reward": 0.927734375, | |
| "rewards/exec_out_step_reward": 0.9832945894449949, | |
| "rewards/format_reward": 0.958984375, | |
| "rewards/keywords_iou_reward": 0.480492593254894, | |
| "rewards/sql_step_keywords_recall_reward": 0.7202489655464888, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 190.0234375, | |
| "epoch": 1.1084745762711865, | |
| "grad_norm": 0.18677794933319092, | |
| "kl": 0.03594970703125, | |
| "learning_rate": 1.5e-06, | |
| "loss": -0.0015, | |
| "num_tokens": 47983023.0, | |
| "reward": 6.145782947540283, | |
| "reward_std": 1.3262191619724035, | |
| "rewards/accuracy_reward": 0.447265625, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9680919889360666, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.4536509499885142, | |
| "rewards/sql_step_keywords_recall_reward": 0.6532015362754464, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 190.458984375, | |
| "epoch": 1.1175141242937854, | |
| "grad_norm": 0.19311833381652832, | |
| "kl": 0.034397125244140625, | |
| "learning_rate": 1.476201054247788e-06, | |
| "loss": 0.0084, | |
| "num_tokens": 48485038.0, | |
| "reward": 6.124564379453659, | |
| "reward_std": 1.1108355158939958, | |
| "rewards/accuracy_reward": 0.44921875, | |
| "rewards/exec_out_all_reward": 0.857421875, | |
| "rewards/exec_out_step_reward": 0.9658536426723003, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4373150817118585, | |
| "rewards/sql_step_keywords_recall_reward": 0.6903305593878031, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 194.169921875, | |
| "epoch": 1.1265536723163843, | |
| "grad_norm": 0.19611912965774536, | |
| "kl": 0.03643798828125, | |
| "learning_rate": 1.452408099752899e-06, | |
| "loss": -0.0002, | |
| "num_tokens": 48990821.0, | |
| "reward": 6.194984808564186, | |
| "reward_std": 1.3247142443433404, | |
| "rewards/accuracy_reward": 0.4375, | |
| "rewards/exec_out_all_reward": 0.8515625, | |
| "rewards/exec_out_step_reward": 0.9632626511156559, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.4958450337871909, | |
| "rewards/sql_step_keywords_recall_reward": 0.6951102269813418, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 191.966796875, | |
| "epoch": 1.1355932203389831, | |
| "grad_norm": 0.1942368596792221, | |
| "kl": 0.034610748291015625, | |
| "learning_rate": 1.4286271262643866e-06, | |
| "loss": 0.011, | |
| "num_tokens": 49493892.0, | |
| "reward": 6.56321893632412, | |
| "reward_std": 1.2955252706306055, | |
| "rewards/accuracy_reward": 0.51171875, | |
| "rewards/exec_out_all_reward": 0.888671875, | |
| "rewards/exec_out_step_reward": 0.9716486856341362, | |
| "rewards/format_reward": 0.95703125, | |
| "rewards/keywords_iou_reward": 0.5003192345611751, | |
| "rewards/sql_step_keywords_recall_reward": 0.6983536276966333, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 192.224609375, | |
| "epoch": 1.144632768361582, | |
| "grad_norm": 0.20695985853672028, | |
| "kl": 0.03424835205078125, | |
| "learning_rate": 1.4048641205151533e-06, | |
| "loss": -0.0047, | |
| "num_tokens": 49996803.0, | |
| "reward": 6.204301163554192, | |
| "reward_std": 1.1253142580389977, | |
| "rewards/accuracy_reward": 0.451171875, | |
| "rewards/exec_out_all_reward": 0.8984375, | |
| "rewards/exec_out_step_reward": 0.9757238961756229, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.4387336834333837, | |
| "rewards/sql_step_keywords_recall_reward": 0.7007192308083177, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 194.66015625, | |
| "epoch": 1.1536723163841809, | |
| "grad_norm": 0.19700393080711365, | |
| "kl": 0.03594970703125, | |
| "learning_rate": 1.3811250647148171e-06, | |
| "loss": 0.0124, | |
| "num_tokens": 50504301.0, | |
| "reward": 6.500779703259468, | |
| "reward_std": 1.2046514563262463, | |
| "rewards/accuracy_reward": 0.517578125, | |
| "rewards/exec_out_all_reward": 0.859375, | |
| "rewards/exec_out_step_reward": 0.9675091523677111, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.4829273517243564, | |
| "rewards/sql_step_keywords_recall_reward": 0.7021815236657858, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 190.998046875, | |
| "epoch": 1.1627118644067798, | |
| "grad_norm": 0.19084982573986053, | |
| "kl": 0.034423828125, | |
| "learning_rate": 1.3574159350437264e-06, | |
| "loss": 0.003, | |
| "num_tokens": 51006412.0, | |
| "reward": 6.172577649354935, | |
| "reward_std": 1.3462738115340471, | |
| "rewards/accuracy_reward": 0.458984375, | |
| "rewards/exec_out_all_reward": 0.869140625, | |
| "rewards/exec_out_step_reward": 0.9645081553608179, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4471881305798888, | |
| "rewards/sql_step_keywords_recall_reward": 0.6691619791090488, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 197.43359375, | |
| "epoch": 1.1717514124293786, | |
| "grad_norm": 0.1826663464307785, | |
| "kl": 0.035003662109375, | |
| "learning_rate": 1.3337427001484835e-06, | |
| "loss": 0.0024, | |
| "num_tokens": 51513734.0, | |
| "reward": 6.046234875917435, | |
| "reward_std": 1.4189167954027653, | |
| "rewards/accuracy_reward": 0.44140625, | |
| "rewards/exec_out_all_reward": 0.86328125, | |
| "rewards/exec_out_step_reward": 0.9655668716877699, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.42813110165297985, | |
| "rewards/sql_step_keywords_recall_reward": 0.656046318821609, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 196.70703125, | |
| "epoch": 1.1807909604519775, | |
| "grad_norm": 0.1818644106388092, | |
| "kl": 0.033786773681640625, | |
| "learning_rate": 1.3101113196393759e-06, | |
| "loss": 0.0028, | |
| "num_tokens": 52020596.0, | |
| "reward": 5.911285370588303, | |
| "reward_std": 1.1000383193604648, | |
| "rewards/accuracy_reward": 0.408203125, | |
| "rewards/exec_out_all_reward": 0.85546875, | |
| "rewards/exec_out_step_reward": 0.9638741612434387, | |
| "rewards/format_reward": 0.95703125, | |
| "rewards/keywords_iou_reward": 0.4286212190054357, | |
| "rewards/sql_step_keywords_recall_reward": 0.6448562629520893, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 192.52734375, | |
| "epoch": 1.1898305084745764, | |
| "grad_norm": 0.19093115627765656, | |
| "kl": 0.0337066650390625, | |
| "learning_rate": 1.2865277425900725e-06, | |
| "loss": 0.0096, | |
| "num_tokens": 52524122.0, | |
| "reward": 6.245767995715141, | |
| "reward_std": 1.3251913916319609, | |
| "rewards/accuracy_reward": 0.4921875, | |
| "rewards/exec_out_all_reward": 0.8671875, | |
| "rewards/exec_out_step_reward": 0.966382997110486, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.4250256856903434, | |
| "rewards/sql_step_keywords_recall_reward": 0.6480836141854525, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 190.916015625, | |
| "epoch": 1.1988700564971753, | |
| "grad_norm": 0.19361349940299988, | |
| "kl": 0.033050537109375, | |
| "learning_rate": 1.2629979060399751e-06, | |
| "loss": -0.0008, | |
| "num_tokens": 53027339.0, | |
| "reward": 6.7118589878082275, | |
| "reward_std": 1.4029255080968142, | |
| "rewards/accuracy_reward": 0.560546875, | |
| "rewards/exec_out_all_reward": 0.9140625, | |
| "rewards/exec_out_step_reward": 0.9767283629626036, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.477216730825603, | |
| "rewards/sql_step_keywords_recall_reward": 0.6849940754473209, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 189.78125, | |
| "epoch": 1.207909604519774, | |
| "grad_norm": 0.19163502752780914, | |
| "kl": 0.033824920654296875, | |
| "learning_rate": 1.2395277334996047e-06, | |
| "loss": -0.002, | |
| "num_tokens": 53530615.0, | |
| "reward": 6.5633436143398285, | |
| "reward_std": 1.354396466165781, | |
| "rewards/accuracy_reward": 0.529296875, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.969381669536233, | |
| "rewards/format_reward": 0.953125, | |
| "rewards/keywords_iou_reward": 0.4778866241686046, | |
| "rewards/sql_step_keywords_recall_reward": 0.6909231022000313, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 188.806640625, | |
| "epoch": 1.2169491525423728, | |
| "grad_norm": 0.19524553418159485, | |
| "kl": 0.0322418212890625, | |
| "learning_rate": 1.2161231334593852e-06, | |
| "loss": 0.0042, | |
| "num_tokens": 54033328.0, | |
| "reward": 6.4206047505140305, | |
| "reward_std": 1.3125502597540617, | |
| "rewards/accuracy_reward": 0.494140625, | |
| "rewards/exec_out_all_reward": 0.8984375, | |
| "rewards/exec_out_step_reward": 0.9737436473369598, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.47170518431812525, | |
| "rewards/sql_step_keywords_recall_reward": 0.6909506395459175, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 189.267578125, | |
| "epoch": 1.2259887005649717, | |
| "grad_norm": 0.18766768276691437, | |
| "kl": 0.034023284912109375, | |
| "learning_rate": 1.1927899979022142e-06, | |
| "loss": -0.0056, | |
| "num_tokens": 54535337.0, | |
| "reward": 6.526236951351166, | |
| "reward_std": 1.2283624270930886, | |
| "rewards/accuracy_reward": 0.515625, | |
| "rewards/exec_out_all_reward": 0.865234375, | |
| "rewards/exec_out_step_reward": 0.9680036306381226, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.5038230996578932, | |
| "rewards/sql_step_keywords_recall_reward": 0.6794933304190636, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 192.39453125, | |
| "epoch": 1.2350282485875705, | |
| "grad_norm": 0.19446401298046112, | |
| "kl": 0.0313568115234375, | |
| "learning_rate": 1.169534200820189e-06, | |
| "loss": 0.0045, | |
| "num_tokens": 55037983.0, | |
| "reward": 6.247622415423393, | |
| "reward_std": 1.3995716699864715, | |
| "rewards/accuracy_reward": 0.443359375, | |
| "rewards/exec_out_all_reward": 0.85546875, | |
| "rewards/exec_out_step_reward": 0.962557353079319, | |
| "rewards/format_reward": 0.94921875, | |
| "rewards/keywords_iou_reward": 0.4969401224516332, | |
| "rewards/sql_step_keywords_recall_reward": 0.7130598044022918, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 188.71484375, | |
| "epoch": 1.2440677966101694, | |
| "grad_norm": 0.1841951161623001, | |
| "kl": 0.03244781494140625, | |
| "learning_rate": 1.146361596735859e-06, | |
| "loss": 0.0073, | |
| "num_tokens": 55542381.0, | |
| "reward": 6.34030369669199, | |
| "reward_std": 1.3015205739066005, | |
| "rewards/accuracy_reward": 0.48046875, | |
| "rewards/exec_out_all_reward": 0.8984375, | |
| "rewards/exec_out_step_reward": 0.9747984893620014, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.46549498522654176, | |
| "rewards/sql_step_keywords_recall_reward": 0.6747496416792274, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 190.142578125, | |
| "epoch": 1.2531073446327683, | |
| "grad_norm": 0.19216689467430115, | |
| "kl": 0.030513763427734375, | |
| "learning_rate": 1.1232780192283814e-06, | |
| "loss": 0.0089, | |
| "num_tokens": 56045178.0, | |
| "reward": 6.406587705016136, | |
| "reward_std": 1.128734229831025, | |
| "rewards/accuracy_reward": 0.486328125, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9704783633351326, | |
| "rewards/format_reward": 0.96484375, | |
| "rewards/keywords_iou_reward": 0.4696982908062637, | |
| "rewards/sql_step_keywords_recall_reward": 0.6998377349227667, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.853515625, | |
| "epoch": 1.2621468926553672, | |
| "grad_norm": 0.19616030156612396, | |
| "kl": 0.032970428466796875, | |
| "learning_rate": 1.1002892794649477e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 56547795.0, | |
| "reward": 6.060941353440285, | |
| "reward_std": 1.3276239773258567, | |
| "rewards/accuracy_reward": 0.451171875, | |
| "rewards/exec_out_all_reward": 0.853515625, | |
| "rewards/exec_out_step_reward": 0.9571653380990028, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.4333818582817912, | |
| "rewards/sql_step_keywords_recall_reward": 0.6471685189753771, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.00390625, | |
| "epoch": 1.271186440677966, | |
| "grad_norm": 0.18865719437599182, | |
| "kl": 0.030315399169921875, | |
| "learning_rate": 1.0774011647378555e-06, | |
| "loss": 0.0, | |
| "num_tokens": 57049073.0, | |
| "reward": 6.1720483005046844, | |
| "reward_std": 1.3698342852294445, | |
| "rewards/accuracy_reward": 0.45703125, | |
| "rewards/exec_out_all_reward": 0.83984375, | |
| "rewards/exec_out_step_reward": 0.9541589226573706, | |
| "rewards/format_reward": 0.955078125, | |
| "rewards/keywords_iou_reward": 0.46421836549416184, | |
| "rewards/sql_step_keywords_recall_reward": 0.6664058230817318, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.212890625, | |
| "epoch": 1.280225988700565, | |
| "grad_norm": 0.19604600965976715, | |
| "kl": 0.03199005126953125, | |
| "learning_rate": 1.0546194370075883e-06, | |
| "loss": -0.0021, | |
| "num_tokens": 57548486.0, | |
| "reward": 6.491792589426041, | |
| "reward_std": 1.2623751778155565, | |
| "rewards/accuracy_reward": 0.501953125, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9714409783482552, | |
| "rewards/format_reward": 0.953125, | |
| "rewards/keywords_iou_reward": 0.5014809351414442, | |
| "rewards/sql_step_keywords_recall_reward": 0.6736397361382842, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.6796875, | |
| "epoch": 1.2892655367231638, | |
| "grad_norm": 0.1924704760313034, | |
| "kl": 0.03244781494140625, | |
| "learning_rate": 1.0319498314522695e-06, | |
| "loss": 0.0019, | |
| "num_tokens": 58047710.0, | |
| "reward": 6.139053791761398, | |
| "reward_std": 1.1230434579774737, | |
| "rewards/accuracy_reward": 0.4453125, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9705496653914452, | |
| "rewards/format_reward": 0.9296875, | |
| "rewards/keywords_iou_reward": 0.4448565673374105, | |
| "rewards/sql_step_keywords_recall_reward": 0.68504096288234, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 188.365234375, | |
| "epoch": 1.2983050847457627, | |
| "grad_norm": 0.1963050365447998, | |
| "kl": 0.03145599365234375, | |
| "learning_rate": 1.0093980550238675e-06, | |
| "loss": 0.0036, | |
| "num_tokens": 58549501.0, | |
| "reward": 6.090264290571213, | |
| "reward_std": 1.4014679677784443, | |
| "rewards/accuracy_reward": 0.431640625, | |
| "rewards/exec_out_all_reward": 0.857421875, | |
| "rewards/exec_out_step_reward": 0.9621853325515985, | |
| "rewards/format_reward": 0.916015625, | |
| "rewards/keywords_iou_reward": 0.47523164842277765, | |
| "rewards/sql_step_keywords_recall_reward": 0.6776156453415751, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.072265625, | |
| "epoch": 1.3073446327683615, | |
| "grad_norm": 0.2053123414516449, | |
| "kl": 0.031829833984375, | |
| "learning_rate": 9.86969785011497e-07, | |
| "loss": 0.0054, | |
| "num_tokens": 59046626.0, | |
| "reward": 6.3448584377765656, | |
| "reward_std": 1.176757472101599, | |
| "rewards/accuracy_reward": 0.458984375, | |
| "rewards/exec_out_all_reward": 0.884765625, | |
| "rewards/exec_out_step_reward": 0.9675664994865656, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.49913227930665016, | |
| "rewards/sql_step_keywords_recall_reward": 0.7110585309565067, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.021484375, | |
| "epoch": 1.3163841807909604, | |
| "grad_norm": 0.19113275408744812, | |
| "kl": 0.031230926513671875, | |
| "learning_rate": 9.646706676121923e-07, | |
| "loss": -0.0098, | |
| "num_tokens": 59546101.0, | |
| "reward": 6.435375913977623, | |
| "reward_std": 1.407385234721005, | |
| "rewards/accuracy_reward": 0.4921875, | |
| "rewards/exec_out_all_reward": 0.880859375, | |
| "rewards/exec_out_step_reward": 0.9711650554090738, | |
| "rewards/format_reward": 0.953125, | |
| "rewards/keywords_iou_reward": 0.48695238353684545, | |
| "rewards/sql_step_keywords_recall_reward": 0.6875717546790838, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.16015625, | |
| "epoch": 1.3254237288135593, | |
| "grad_norm": 0.2129882425069809, | |
| "kl": 0.03119659423828125, | |
| "learning_rate": 9.425063165095089e-07, | |
| "loss": -0.0039, | |
| "num_tokens": 60046491.0, | |
| "reward": 6.0068028047680855, | |
| "reward_std": 1.2135074082762003, | |
| "rewards/accuracy_reward": 0.4140625, | |
| "rewards/exec_out_all_reward": 0.87890625, | |
| "rewards/exec_out_step_reward": 0.9671890567988157, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.44391668657772243, | |
| "rewards/sql_step_keywords_recall_reward": 0.6752177719026804, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.91796875, | |
| "epoch": 1.3344632768361582, | |
| "grad_norm": 0.21050292253494263, | |
| "kl": 0.032924652099609375, | |
| "learning_rate": 9.204823114603069e-07, | |
| "loss": 0.0047, | |
| "num_tokens": 60546385.0, | |
| "reward": 6.227329030632973, | |
| "reward_std": 1.179283824749291, | |
| "rewards/accuracy_reward": 0.458984375, | |
| "rewards/exec_out_all_reward": 0.8515625, | |
| "rewards/exec_out_step_reward": 0.9637028854340315, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4683625341858715, | |
| "rewards/sql_step_keywords_recall_reward": 0.6999479737132788, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.40234375, | |
| "epoch": 1.343502824858757, | |
| "grad_norm": 0.21124009788036346, | |
| "kl": 0.032260894775390625, | |
| "learning_rate": 8.986041968900797e-07, | |
| "loss": 0.0088, | |
| "num_tokens": 61049115.0, | |
| "reward": 6.444768786430359, | |
| "reward_std": 1.4197536138817668, | |
| "rewards/accuracy_reward": 0.50390625, | |
| "rewards/exec_out_all_reward": 0.884765625, | |
| "rewards/exec_out_step_reward": 0.9735870882868767, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.4634511903859675, | |
| "rewards/sql_step_keywords_recall_reward": 0.7102948874235153, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.373046875, | |
| "epoch": 1.352542372881356, | |
| "grad_norm": 0.21280112862586975, | |
| "kl": 0.02997589111328125, | |
| "learning_rate": 8.768774804971705e-07, | |
| "loss": 0.0098, | |
| "num_tokens": 61551534.0, | |
| "reward": 6.14260359108448, | |
| "reward_std": 1.3231439045630395, | |
| "rewards/accuracy_reward": 0.443359375, | |
| "rewards/exec_out_all_reward": 0.849609375, | |
| "rewards/exec_out_step_reward": 0.9615505710244179, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.46615127846598625, | |
| "rewards/sql_step_keywords_recall_reward": 0.6784378979355097, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 187.35546875, | |
| "epoch": 1.3615819209039548, | |
| "grad_norm": 0.20350147783756256, | |
| "kl": 0.031444549560546875, | |
| "learning_rate": 8.553076318662425e-07, | |
| "loss": 0.0024, | |
| "num_tokens": 62054616.0, | |
| "reward": 5.876114495098591, | |
| "reward_std": 1.1356665641069412, | |
| "rewards/accuracy_reward": 0.373046875, | |
| "rewards/exec_out_all_reward": 0.857421875, | |
| "rewards/exec_out_step_reward": 0.9639787971973419, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.4735513115301728, | |
| "rewards/sql_step_keywords_recall_reward": 0.6779237259179354, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.556640625, | |
| "epoch": 1.3706214689265537, | |
| "grad_norm": 0.18335995078086853, | |
| "kl": 0.030361175537109375, | |
| "learning_rate": 8.339000810913388e-07, | |
| "loss": -0.0031, | |
| "num_tokens": 62553909.0, | |
| "reward": 6.188477337360382, | |
| "reward_std": 1.1688060224987566, | |
| "rewards/accuracy_reward": 0.466796875, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9724152106791735, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.421648170100525, | |
| "rewards/sql_step_keywords_recall_reward": 0.6715939035639167, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 178.1953125, | |
| "epoch": 1.3796610169491526, | |
| "grad_norm": 0.18019497394561768, | |
| "kl": 0.032196044921875, | |
| "learning_rate": 8.126602174088844e-07, | |
| "loss": -0.0063, | |
| "num_tokens": 63051989.0, | |
| "reward": 6.597941100597382, | |
| "reward_std": 1.2893404318019748, | |
| "rewards/accuracy_reward": 0.533203125, | |
| "rewards/exec_out_all_reward": 0.890625, | |
| "rewards/exec_out_step_reward": 0.9766291547566652, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.4863300649449229, | |
| "rewards/sql_step_keywords_recall_reward": 0.6838080808520317, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.888671875, | |
| "epoch": 1.3887005649717514, | |
| "grad_norm": 0.20189358294010162, | |
| "kl": 0.031101226806640625, | |
| "learning_rate": 7.915933878409761e-07, | |
| "loss": -0.0082, | |
| "num_tokens": 63552568.0, | |
| "reward": 6.299270272254944, | |
| "reward_std": 1.1446837144903839, | |
| "rewards/accuracy_reward": 0.484375, | |
| "rewards/exec_out_all_reward": 0.873046875, | |
| "rewards/exec_out_step_reward": 0.9710131492465734, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.45233285753056407, | |
| "rewards/sql_step_keywords_recall_reward": 0.6716383351013064, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.68359375, | |
| "epoch": 1.3977401129943503, | |
| "grad_norm": 0.1866413652896881, | |
| "kl": 0.03018951416015625, | |
| "learning_rate": 7.707048958492972e-07, | |
| "loss": 0.0052, | |
| "num_tokens": 64051946.0, | |
| "reward": 6.494629591703415, | |
| "reward_std": 1.1752266022376716, | |
| "rewards/accuracy_reward": 0.50390625, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9707899298518896, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.4983974387869239, | |
| "rewards/sql_step_keywords_recall_reward": 0.6989197302609682, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.88671875, | |
| "epoch": 1.4067796610169492, | |
| "grad_norm": 0.18069399893283844, | |
| "kl": 0.031505584716796875, | |
| "learning_rate": 7.500000000000003e-07, | |
| "loss": 0.0009, | |
| "num_tokens": 64549212.0, | |
| "reward": 6.292890816926956, | |
| "reward_std": 1.408079206943512, | |
| "rewards/accuracy_reward": 0.4921875, | |
| "rewards/exec_out_all_reward": 0.859375, | |
| "rewards/exec_out_step_reward": 0.9635618217289448, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.44063833844847977, | |
| "rewards/sql_step_keywords_recall_reward": 0.6863335473462939, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.708984375, | |
| "epoch": 1.415819209039548, | |
| "grad_norm": 0.19954738020896912, | |
| "kl": 0.03003692626953125, | |
| "learning_rate": 7.294839126398909e-07, | |
| "loss": 0.0072, | |
| "num_tokens": 65046991.0, | |
| "reward": 6.40887725353241, | |
| "reward_std": 1.1249554408714175, | |
| "rewards/accuracy_reward": 0.482421875, | |
| "rewards/exec_out_all_reward": 0.90234375, | |
| "rewards/exec_out_step_reward": 0.9757773783057928, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.48421067791059613, | |
| "rewards/sql_step_keywords_recall_reward": 0.6892878729850054, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.326171875, | |
| "epoch": 1.424858757062147, | |
| "grad_norm": 0.1969391107559204, | |
| "kl": 0.0305023193359375, | |
| "learning_rate": 7.091617985842463e-07, | |
| "loss": 0.0018, | |
| "num_tokens": 65544562.0, | |
| "reward": 6.413661152124405, | |
| "reward_std": 1.2689514786470681, | |
| "rewards/accuracy_reward": 0.478515625, | |
| "rewards/exec_out_all_reward": 0.890625, | |
| "rewards/exec_out_step_reward": 0.9725539479404688, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.49607263831421733, | |
| "rewards/sql_step_keywords_recall_reward": 0.693102465942502, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 178.669921875, | |
| "epoch": 1.4338983050847458, | |
| "grad_norm": 0.19801102578639984, | |
| "kl": 0.03037261962890625, | |
| "learning_rate": 6.890387738166042e-07, | |
| "loss": -0.0004, | |
| "num_tokens": 66041689.0, | |
| "reward": 6.508177891373634, | |
| "reward_std": 1.3759017111733556, | |
| "rewards/accuracy_reward": 0.5234375, | |
| "rewards/exec_out_all_reward": 0.923828125, | |
| "rewards/exec_out_step_reward": 0.9815886970609426, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.4463528748601675, | |
| "rewards/sql_step_keywords_recall_reward": 0.6709927897900343, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 178.77734375, | |
| "epoch": 1.4429378531073447, | |
| "grad_norm": 0.19369132816791534, | |
| "kl": 0.0314788818359375, | |
| "learning_rate": 6.691199042008347e-07, | |
| "loss": 0.003, | |
| "num_tokens": 66538115.0, | |
| "reward": 6.160938322544098, | |
| "reward_std": 1.2979237555991858, | |
| "rewards/accuracy_reward": 0.46875, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9724400117993355, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.412393850274384, | |
| "rewards/sql_step_keywords_recall_reward": 0.6566793192178011, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 176.439453125, | |
| "epoch": 1.4519774011299436, | |
| "grad_norm": 0.1850394755601883, | |
| "kl": 0.02997589111328125, | |
| "learning_rate": 6.494102042058441e-07, | |
| "loss": 0.002, | |
| "num_tokens": 67035676.0, | |
| "reward": 6.159577623009682, | |
| "reward_std": 1.2864303840324283, | |
| "rewards/accuracy_reward": 0.462890625, | |
| "rewards/exec_out_all_reward": 0.873046875, | |
| "rewards/exec_out_step_reward": 0.9678982235491276, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.4201263478025794, | |
| "rewards/sql_step_keywords_recall_reward": 0.6854111216962337, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.287109375, | |
| "epoch": 1.4610169491525424, | |
| "grad_norm": 0.20010262727737427, | |
| "kl": 0.030330657958984375, | |
| "learning_rate": 6.29914635643203e-07, | |
| "loss": -0.0048, | |
| "num_tokens": 67531747.0, | |
| "reward": 6.243592485785484, | |
| "reward_std": 1.3317115511745214, | |
| "rewards/accuracy_reward": 0.455078125, | |
| "rewards/exec_out_all_reward": 0.888671875, | |
| "rewards/exec_out_step_reward": 0.9729833193123341, | |
| "rewards/format_reward": 0.966796875, | |
| "rewards/keywords_iou_reward": 0.46371736377477646, | |
| "rewards/sql_step_keywords_recall_reward": 0.6673932354897261, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.583984375, | |
| "epoch": 1.4700564971751413, | |
| "grad_norm": 0.19057103991508484, | |
| "kl": 0.032482147216796875, | |
| "learning_rate": 6.106381064180395e-07, | |
| "loss": 0.0051, | |
| "num_tokens": 68029614.0, | |
| "reward": 6.253777638077736, | |
| "reward_std": 1.371273732278496, | |
| "rewards/accuracy_reward": 0.478515625, | |
| "rewards/exec_out_all_reward": 0.87890625, | |
| "rewards/exec_out_step_reward": 0.9656637534499168, | |
| "rewards/format_reward": 0.916015625, | |
| "rewards/keywords_iou_reward": 0.450145754031837, | |
| "rewards/sql_step_keywords_recall_reward": 0.6788380099460483, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.328125, | |
| "epoch": 1.4790960451977402, | |
| "grad_norm": 0.1943560391664505, | |
| "kl": 0.02936553955078125, | |
| "learning_rate": 5.915854692935003e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 68527730.0, | |
| "reward": 6.239083915948868, | |
| "reward_std": 1.3497515600174665, | |
| "rewards/accuracy_reward": 0.48046875, | |
| "rewards/exec_out_all_reward": 0.84375, | |
| "rewards/exec_out_step_reward": 0.9633672833442688, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.45536057371646166, | |
| "rewards/sql_step_keywords_recall_reward": 0.6638235626742244, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 177.58203125, | |
| "epoch": 1.488135593220339, | |
| "grad_norm": 0.19292642176151276, | |
| "kl": 0.029979705810546875, | |
| "learning_rate": 5.727615206690921e-07, | |
| "loss": 0.0093, | |
| "num_tokens": 69024424.0, | |
| "reward": 6.809370994567871, | |
| "reward_std": 1.231651745736599, | |
| "rewards/accuracy_reward": 0.5703125, | |
| "rewards/exec_out_all_reward": 0.880859375, | |
| "rewards/exec_out_step_reward": 0.9722276534885168, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.4978084210306406, | |
| "rewards/sql_step_keywords_recall_reward": 0.7380108721554279, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 176.873046875, | |
| "epoch": 1.497175141242938, | |
| "grad_norm": 0.19874171912670135, | |
| "kl": 0.02878570556640625, | |
| "learning_rate": 5.541709993732168e-07, | |
| "loss": 0.0052, | |
| "num_tokens": 69519951.0, | |
| "reward": 6.364575162529945, | |
| "reward_std": 1.2380520347505808, | |
| "rewards/accuracy_reward": 0.482421875, | |
| "rewards/exec_out_all_reward": 0.912109375, | |
| "rewards/exec_out_step_reward": 0.9779986720532179, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.4500721860677004, | |
| "rewards/sql_step_keywords_recall_reward": 0.6973696993663907, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.763671875, | |
| "epoch": 1.5062146892655366, | |
| "grad_norm": 0.19222836196422577, | |
| "kl": 0.031803131103515625, | |
| "learning_rate": 5.358185854701909e-07, | |
| "loss": 0.016, | |
| "num_tokens": 70020190.0, | |
| "reward": 5.9986598044633865, | |
| "reward_std": 1.2878384962677956, | |
| "rewards/accuracy_reward": 0.392578125, | |
| "rewards/exec_out_all_reward": 0.90234375, | |
| "rewards/exec_out_step_reward": 0.9768469464033842, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.46051234856713563, | |
| "rewards/sql_step_keywords_recall_reward": 0.6847725082188845, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.427734375, | |
| "epoch": 1.5152542372881355, | |
| "grad_norm": 0.18806356191635132, | |
| "kl": 0.02924346923828125, | |
| "learning_rate": 5.177088990820725e-07, | |
| "loss": 0.0156, | |
| "num_tokens": 70519589.0, | |
| "reward": 6.586422994732857, | |
| "reward_std": 1.445882560685277, | |
| "rewards/accuracy_reward": 0.525390625, | |
| "rewards/exec_out_all_reward": 0.873046875, | |
| "rewards/exec_out_step_reward": 0.9663845468312502, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.48613627161830664, | |
| "rewards/sql_step_keywords_recall_reward": 0.7317502833902836, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.669921875, | |
| "epoch": 1.5242937853107343, | |
| "grad_norm": 0.19503186643123627, | |
| "kl": 0.027843475341796875, | |
| "learning_rate": 4.998464992255627e-07, | |
| "loss": -0.0017, | |
| "num_tokens": 71019964.0, | |
| "reward": 5.967584699392319, | |
| "reward_std": 1.1278308150358498, | |
| "rewards/accuracy_reward": 0.404296875, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9704303070902824, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4382792445831001, | |
| "rewards/sql_step_keywords_recall_reward": 0.6870021214708686, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.84375, | |
| "epoch": 1.5333333333333332, | |
| "grad_norm": 0.18861566483974457, | |
| "kl": 0.027347564697265625, | |
| "learning_rate": 4.82235882664302e-07, | |
| "loss": -0.0001, | |
| "num_tokens": 71518200.0, | |
| "reward": 6.40145568549633, | |
| "reward_std": 1.264804814942181, | |
| "rewards/accuracy_reward": 0.486328125, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9703760556876659, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.47830750420689583, | |
| "rewards/sql_step_keywords_recall_reward": 0.7127459226176143, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.998046875, | |
| "epoch": 1.542372881355932, | |
| "grad_norm": 0.19780074059963226, | |
| "kl": 0.027751922607421875, | |
| "learning_rate": 4.648814827768323e-07, | |
| "loss": 0.0012, | |
| "num_tokens": 72016087.0, | |
| "reward": 6.078360304236412, | |
| "reward_std": 1.233950492925942, | |
| "rewards/accuracy_reward": 0.4296875, | |
| "rewards/exec_out_all_reward": 0.869140625, | |
| "rewards/exec_out_step_reward": 0.9696281347423792, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.45836541755124927, | |
| "rewards/sql_step_keywords_recall_reward": 0.6568450266495347, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.81640625, | |
| "epoch": 1.551412429378531, | |
| "grad_norm": 0.20185421407222748, | |
| "kl": 0.028972625732421875, | |
| "learning_rate": 4.4778766844051793e-07, | |
| "loss": -0.0002, | |
| "num_tokens": 72515641.0, | |
| "reward": 6.029541537165642, | |
| "reward_std": 1.3070494611747563, | |
| "rewards/accuracy_reward": 0.416015625, | |
| "rewards/exec_out_all_reward": 0.85546875, | |
| "rewards/exec_out_step_reward": 0.9650747179985046, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.4725735238753259, | |
| "rewards/sql_step_keywords_recall_reward": 0.6486166473478079, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.265625, | |
| "epoch": 1.5604519774011298, | |
| "grad_norm": 0.18522052466869354, | |
| "kl": 0.028156280517578125, | |
| "learning_rate": 4.309587429317061e-07, | |
| "loss": -0.0069, | |
| "num_tokens": 73014473.0, | |
| "reward": 6.007387965917587, | |
| "reward_std": 1.1997167933732271, | |
| "rewards/accuracy_reward": 0.4140625, | |
| "rewards/exec_out_all_reward": 0.857421875, | |
| "rewards/exec_out_step_reward": 0.9654715433716774, | |
| "rewards/format_reward": 0.955078125, | |
| "rewards/keywords_iou_reward": 0.441091364948079, | |
| "rewards/sql_step_keywords_recall_reward": 0.690983671694994, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.296875, | |
| "epoch": 1.5694915254237287, | |
| "grad_norm": 0.18709787726402283, | |
| "kl": 0.02730560302734375, | |
| "learning_rate": 4.1439894284239473e-07, | |
| "loss": 0.0048, | |
| "num_tokens": 73516757.0, | |
| "reward": 5.854448825120926, | |
| "reward_std": 1.0955259250476956, | |
| "rewards/accuracy_reward": 0.396484375, | |
| "rewards/exec_out_all_reward": 0.86328125, | |
| "rewards/exec_out_step_reward": 0.9658908490091562, | |
| "rewards/format_reward": 0.919921875, | |
| "rewards/keywords_iou_reward": 0.419542781310156, | |
| "rewards/sql_step_keywords_recall_reward": 0.6803318522870541, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.041015625, | |
| "epoch": 1.5785310734463276, | |
| "grad_norm": 0.18421481549739838, | |
| "kl": 0.02740478515625, | |
| "learning_rate": 3.981124370137002e-07, | |
| "loss": 0.0015, | |
| "num_tokens": 74016222.0, | |
| "reward": 6.491113051772118, | |
| "reward_std": 1.2589517189189792, | |
| "rewards/accuracy_reward": 0.478515625, | |
| "rewards/exec_out_all_reward": 0.87890625, | |
| "rewards/exec_out_step_reward": 0.969364620745182, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.5320240047294647, | |
| "rewards/sql_step_keywords_recall_reward": 0.7194192241877317, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.123046875, | |
| "epoch": 1.5875706214689265, | |
| "grad_norm": 0.20407724380493164, | |
| "kl": 0.027408599853515625, | |
| "learning_rate": 3.82103325486368e-07, | |
| "loss": 0.0167, | |
| "num_tokens": 74517801.0, | |
| "reward": 6.3000208735466, | |
| "reward_std": 1.357117084786296, | |
| "rewards/accuracy_reward": 0.4765625, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9679687526077032, | |
| "rewards/format_reward": 0.94921875, | |
| "rewards/keywords_iou_reward": 0.44746885914355516, | |
| "rewards/sql_step_keywords_recall_reward": 0.704692529514432, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.05078125, | |
| "epoch": 1.5966101694915253, | |
| "grad_norm": 0.19040866196155548, | |
| "kl": 0.02652740478515625, | |
| "learning_rate": 3.6637563846861275e-07, | |
| "loss": -0.0012, | |
| "num_tokens": 75013259.0, | |
| "reward": 6.4401615858078, | |
| "reward_std": 1.168332906672731, | |
| "rewards/accuracy_reward": 0.505859375, | |
| "rewards/exec_out_all_reward": 0.890625, | |
| "rewards/exec_out_step_reward": 0.974679134786129, | |
| "rewards/format_reward": 0.9609375, | |
| "rewards/keywords_iou_reward": 0.43378148321062326, | |
| "rewards/sql_step_keywords_recall_reward": 0.7229194287210703, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.84375, | |
| "epoch": 1.6056497175141242, | |
| "grad_norm": 0.19518616795539856, | |
| "kl": 0.027690887451171875, | |
| "learning_rate": 3.5093333532153313e-07, | |
| "loss": 0.0007, | |
| "num_tokens": 75511563.0, | |
| "reward": 6.2283158749341965, | |
| "reward_std": 1.2062615705654025, | |
| "rewards/accuracy_reward": 0.466796875, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9733979757875204, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.4363416051492095, | |
| "rewards/sql_step_keywords_recall_reward": 0.698640950024128, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.58203125, | |
| "epoch": 1.614689265536723, | |
| "grad_norm": 0.19140039384365082, | |
| "kl": 0.03049468994140625, | |
| "learning_rate": 3.357803035623646e-07, | |
| "loss": 0.0114, | |
| "num_tokens": 76009449.0, | |
| "reward": 6.212793804705143, | |
| "reward_std": 1.3535035271197557, | |
| "rewards/accuracy_reward": 0.4296875, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9737645741552114, | |
| "rewards/format_reward": 0.94921875, | |
| "rewards/keywords_iou_reward": 0.502739230170846, | |
| "rewards/sql_step_keywords_recall_reward": 0.6788633242249489, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.478515625, | |
| "epoch": 1.623728813559322, | |
| "grad_norm": 0.20329172909259796, | |
| "kl": 0.02678680419921875, | |
| "learning_rate": 3.209203578858191e-07, | |
| "loss": 0.0015, | |
| "num_tokens": 76512090.0, | |
| "reward": 5.99811252951622, | |
| "reward_std": 1.3144248933531344, | |
| "rewards/accuracy_reward": 0.427734375, | |
| "rewards/exec_out_all_reward": 0.890625, | |
| "rewards/exec_out_step_reward": 0.9720734115689993, | |
| "rewards/format_reward": 0.90625, | |
| "rewards/keywords_iou_reward": 0.43393061752431095, | |
| "rewards/sql_step_keywords_recall_reward": 0.6503653433173895, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.916015625, | |
| "epoch": 1.6327683615819208, | |
| "grad_norm": 0.20274612307548523, | |
| "kl": 0.027004241943359375, | |
| "learning_rate": 3.063572392037517e-07, | |
| "loss": -0.0061, | |
| "num_tokens": 77010299.0, | |
| "reward": 6.448321744799614, | |
| "reward_std": 1.2834087014198303, | |
| "rewards/accuracy_reward": 0.490234375, | |
| "rewards/exec_out_all_reward": 0.888671875, | |
| "rewards/exec_out_step_reward": 0.9712286107242107, | |
| "rewards/format_reward": 0.9609375, | |
| "rewards/keywords_iou_reward": 0.4910502852872014, | |
| "rewards/sql_step_keywords_recall_reward": 0.6844456251710653, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 178.615234375, | |
| "epoch": 1.6418079096045197, | |
| "grad_norm": 0.20460090041160583, | |
| "kl": 0.02655029296875, | |
| "learning_rate": 2.920946137034121e-07, | |
| "loss": 0.0016, | |
| "num_tokens": 77506942.0, | |
| "reward": 6.275775626301765, | |
| "reward_std": 1.3149840263649821, | |
| "rewards/accuracy_reward": 0.458984375, | |
| "rewards/exec_out_all_reward": 0.89453125, | |
| "rewards/exec_out_step_reward": 0.9711751285940409, | |
| "rewards/format_reward": 0.92578125, | |
| "rewards/keywords_iou_reward": 0.4703782368451357, | |
| "rewards/sql_step_keywords_recall_reward": 0.7075940538197756, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.2421875, | |
| "epoch": 1.6508474576271186, | |
| "grad_norm": 0.1951521635055542, | |
| "kl": 0.02664947509765625, | |
| "learning_rate": 2.781360719244964e-07, | |
| "loss": 0.0085, | |
| "num_tokens": 78006654.0, | |
| "reward": 6.269969627261162, | |
| "reward_std": 1.5092957746237516, | |
| "rewards/accuracy_reward": 0.482421875, | |
| "rewards/exec_out_all_reward": 0.8671875, | |
| "rewards/exec_out_step_reward": 0.9697536900639534, | |
| "rewards/format_reward": 0.923828125, | |
| "rewards/keywords_iou_reward": 0.43865267653018236, | |
| "rewards/sql_step_keywords_recall_reward": 0.7022074311971664, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.6171875, | |
| "epoch": 1.6598870056497175, | |
| "grad_norm": 0.18546244502067566, | |
| "kl": 0.02535247802734375, | |
| "learning_rate": 2.64485127855251e-07, | |
| "loss": 0.0072, | |
| "num_tokens": 78504702.0, | |
| "reward": 6.320208579301834, | |
| "reward_std": 1.431410001590848, | |
| "rewards/accuracy_reward": 0.474609375, | |
| "rewards/exec_out_all_reward": 0.859375, | |
| "rewards/exec_out_step_reward": 0.9669557642191648, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.4761747941374779, | |
| "rewards/sql_step_keywords_recall_reward": 0.7055906923487782, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.552734375, | |
| "epoch": 1.6689265536723163, | |
| "grad_norm": 0.20000162720680237, | |
| "kl": 0.026123046875, | |
| "learning_rate": 2.5114521804784305e-07, | |
| "loss": -0.0022, | |
| "num_tokens": 79001101.0, | |
| "reward": 6.44641749560833, | |
| "reward_std": 1.3242136964108795, | |
| "rewards/accuracy_reward": 0.498046875, | |
| "rewards/exec_out_all_reward": 0.904296875, | |
| "rewards/exec_out_step_reward": 0.9747899696230888, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.46967238979414105, | |
| "rewards/sql_step_keywords_recall_reward": 0.6924389712512493, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 179.234375, | |
| "epoch": 1.6779661016949152, | |
| "grad_norm": 0.18595600128173828, | |
| "kl": 0.026111602783203125, | |
| "learning_rate": 2.3811970075322803e-07, | |
| "loss": 0.0056, | |
| "num_tokens": 79497289.0, | |
| "reward": 6.366844519972801, | |
| "reward_std": 1.3081835759803653, | |
| "rewards/accuracy_reward": 0.490234375, | |
| "rewards/exec_out_all_reward": 0.904296875, | |
| "rewards/exec_out_step_reward": 0.9748759996145964, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.43892133235931396, | |
| "rewards/sql_step_keywords_recall_reward": 0.7094383966177702, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.5234375, | |
| "epoch": 1.687005649717514, | |
| "grad_norm": 0.20412081480026245, | |
| "kl": 0.02643585205078125, | |
| "learning_rate": 2.254118550757286e-07, | |
| "loss": 0.0128, | |
| "num_tokens": 79997557.0, | |
| "reward": 6.095862299203873, | |
| "reward_std": 1.4136508908122778, | |
| "rewards/accuracy_reward": 0.453125, | |
| "rewards/exec_out_all_reward": 0.837890625, | |
| "rewards/exec_out_step_reward": 0.9592912942171097, | |
| "rewards/format_reward": 0.9375, | |
| "rewards/keywords_iou_reward": 0.44104228960350156, | |
| "rewards/sql_step_keywords_recall_reward": 0.6665958110243082, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.845703125, | |
| "epoch": 1.696045197740113, | |
| "grad_norm": 0.19426719844341278, | |
| "kl": 0.026729583740234375, | |
| "learning_rate": 2.130248801475344e-07, | |
| "loss": -0.0023, | |
| "num_tokens": 80499130.0, | |
| "reward": 6.401883035898209, | |
| "reward_std": 1.3080633180215955, | |
| "rewards/accuracy_reward": 0.501953125, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9698521215468645, | |
| "rewards/format_reward": 0.93359375, | |
| "rewards/keywords_iou_reward": 0.45063989935442805, | |
| "rewards/sql_step_keywords_recall_reward": 0.7065323041751981, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.69921875, | |
| "epoch": 1.7050847457627119, | |
| "grad_norm": 0.18957343697547913, | |
| "kl": 0.027210235595703125, | |
| "learning_rate": 2.0096189432334195e-07, | |
| "loss": -0.0014, | |
| "num_tokens": 80999876.0, | |
| "reward": 6.019113600254059, | |
| "reward_std": 1.2530112564563751, | |
| "rewards/accuracy_reward": 0.423828125, | |
| "rewards/exec_out_all_reward": 0.884765625, | |
| "rewards/exec_out_step_reward": 0.9723625108599663, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.4438905091956258, | |
| "rewards/sql_step_keywords_recall_reward": 0.6433451026678085, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.720703125, | |
| "epoch": 1.7141242937853107, | |
| "grad_norm": 0.19690978527069092, | |
| "kl": 0.026309967041015625, | |
| "learning_rate": 1.892259343953226e-07, | |
| "loss": 0.0181, | |
| "num_tokens": 81498693.0, | |
| "reward": 6.560971170663834, | |
| "reward_std": 1.3874189644120634, | |
| "rewards/accuracy_reward": 0.5078125, | |
| "rewards/exec_out_all_reward": 0.9296875, | |
| "rewards/exec_out_step_reward": 0.9844036791473627, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.47905752109363675, | |
| "rewards/sql_step_keywords_recall_reward": 0.7258742917329073, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.2578125, | |
| "epoch": 1.7231638418079096, | |
| "grad_norm": 0.2019815295934677, | |
| "kl": 0.02889251708984375, | |
| "learning_rate": 1.7781995482862706e-07, | |
| "loss": -0.0001, | |
| "num_tokens": 81997317.0, | |
| "reward": 6.342395722866058, | |
| "reward_std": 1.1282042702659965, | |
| "rewards/accuracy_reward": 0.4609375, | |
| "rewards/exec_out_all_reward": 0.9375, | |
| "rewards/exec_out_step_reward": 0.985164001584053, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.46832023840397596, | |
| "rewards/sql_step_keywords_recall_reward": 0.6979350317269564, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.46875, | |
| "epoch": 1.7322033898305085, | |
| "grad_norm": 0.19806276261806488, | |
| "kl": 0.027133941650390625, | |
| "learning_rate": 1.6674682701761496e-07, | |
| "loss": 0.0079, | |
| "num_tokens": 82498165.0, | |
| "reward": 6.360767655074596, | |
| "reward_std": 1.2937721209600568, | |
| "rewards/accuracy_reward": 0.49609375, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9698614254593849, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.44721768144518137, | |
| "rewards/sql_step_keywords_recall_reward": 0.6937365289777517, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.044921875, | |
| "epoch": 1.7412429378531074, | |
| "grad_norm": 0.1942872703075409, | |
| "kl": 0.025554656982421875, | |
| "learning_rate": 1.5600933856299637e-07, | |
| "loss": 0.0038, | |
| "num_tokens": 83000064.0, | |
| "reward": 6.068183168768883, | |
| "reward_std": 1.4636581200174987, | |
| "rewards/accuracy_reward": 0.451171875, | |
| "rewards/exec_out_all_reward": 0.833984375, | |
| "rewards/exec_out_step_reward": 0.9575916156172752, | |
| "rewards/format_reward": 0.912109375, | |
| "rewards/keywords_iou_reward": 0.44754891796037555, | |
| "rewards/sql_step_keywords_recall_reward": 0.6647125380113721, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.869140625, | |
| "epoch": 1.7502824858757062, | |
| "grad_norm": 0.19219300150871277, | |
| "kl": 0.0272674560546875, | |
| "learning_rate": 1.4561019257006842e-07, | |
| "loss": 0.0018, | |
| "num_tokens": 83500337.0, | |
| "reward": 6.50420406460762, | |
| "reward_std": 1.2782420022413135, | |
| "rewards/accuracy_reward": 0.53125, | |
| "rewards/exec_out_all_reward": 0.857421875, | |
| "rewards/exec_out_step_reward": 0.9676587302237749, | |
| "rewards/format_reward": 0.927734375, | |
| "rewards/keywords_iou_reward": 0.46198446361813694, | |
| "rewards/sql_step_keywords_recall_reward": 0.7024201266467571, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 187.498046875, | |
| "epoch": 1.759322033898305, | |
| "grad_norm": 0.1927022784948349, | |
| "kl": 0.02826690673828125, | |
| "learning_rate": 1.3555200696822234e-07, | |
| "loss": 0.0006, | |
| "num_tokens": 84003200.0, | |
| "reward": 6.2347564697265625, | |
| "reward_std": 1.3537420043721795, | |
| "rewards/accuracy_reward": 0.478515625, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9668782595545053, | |
| "rewards/format_reward": 0.92578125, | |
| "rewards/keywords_iou_reward": 0.43135018879547715, | |
| "rewards/sql_step_keywords_recall_reward": 0.6786152720451355, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.091796875, | |
| "epoch": 1.768361581920904, | |
| "grad_norm": 0.1913248747587204, | |
| "kl": 0.026073455810546875, | |
| "learning_rate": 1.2583731385189562e-07, | |
| "loss": -0.0044, | |
| "num_tokens": 84499811.0, | |
| "reward": 6.492121763527393, | |
| "reward_std": 1.319369402481243, | |
| "rewards/accuracy_reward": 0.509765625, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9670100193470716, | |
| "rewards/format_reward": 0.947265625, | |
| "rewards/keywords_iou_reward": 0.47826224053278565, | |
| "rewards/sql_step_keywords_recall_reward": 0.7053060494363308, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.16015625, | |
| "epoch": 1.7774011299435029, | |
| "grad_norm": 0.18614481389522552, | |
| "kl": 0.02542877197265625, | |
| "learning_rate": 1.1646855884312813e-07, | |
| "loss": -0.0025, | |
| "num_tokens": 84998661.0, | |
| "reward": 6.048209026455879, | |
| "reward_std": 1.3966924250125885, | |
| "rewards/accuracy_reward": 0.44921875, | |
| "rewards/exec_out_all_reward": 0.833984375, | |
| "rewards/exec_out_step_reward": 0.9590921085327864, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.42350240517407656, | |
| "rewards/sql_step_keywords_recall_reward": 0.6717996271327138, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.830078125, | |
| "epoch": 1.7864406779661017, | |
| "grad_norm": 0.20049569010734558, | |
| "kl": 0.02552032470703125, | |
| "learning_rate": 1.0744810047589116e-07, | |
| "loss": 0.0085, | |
| "num_tokens": 85499318.0, | |
| "reward": 6.379006251692772, | |
| "reward_std": 1.3671105708926916, | |
| "rewards/accuracy_reward": 0.482421875, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9740528911352158, | |
| "rewards/format_reward": 0.921875, | |
| "rewards/keywords_iou_reward": 0.48139098659157753, | |
| "rewards/sql_step_keywords_recall_reward": 0.7038900572806597, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 186.349609375, | |
| "epoch": 1.7954802259887006, | |
| "grad_norm": 0.1779472380876541, | |
| "kl": 0.02698516845703125, | |
| "learning_rate": 9.877820960234002e-08, | |
| "loss": -0.0, | |
| "num_tokens": 85999137.0, | |
| "reward": 6.308243364095688, | |
| "reward_std": 1.220099939033389, | |
| "rewards/accuracy_reward": 0.46875, | |
| "rewards/exec_out_all_reward": 0.888671875, | |
| "rewards/exec_out_step_reward": 0.9750612266361713, | |
| "rewards/format_reward": 0.958984375, | |
| "rewards/keywords_iou_reward": 0.4597327196970582, | |
| "rewards/sql_step_keywords_recall_reward": 0.6910604787990451, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.806640625, | |
| "epoch": 1.8045197740112995, | |
| "grad_norm": 0.1846829503774643, | |
| "kl": 0.025753021240234375, | |
| "learning_rate": 9.046106882113752e-08, | |
| "loss": 0.0034, | |
| "num_tokens": 86497530.0, | |
| "reward": 6.5371609181165695, | |
| "reward_std": 1.2568824323825538, | |
| "rewards/accuracy_reward": 0.537109375, | |
| "rewards/exec_out_all_reward": 0.87109375, | |
| "rewards/exec_out_step_reward": 0.9686531201004982, | |
| "rewards/format_reward": 0.927734375, | |
| "rewards/keywords_iou_reward": 0.4614989855326712, | |
| "rewards/sql_step_keywords_recall_reward": 0.698244234547019, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.83984375, | |
| "epoch": 1.8135593220338984, | |
| "grad_norm": 0.2010311335325241, | |
| "kl": 0.026294708251953125, | |
| "learning_rate": 8.249877192799731e-08, | |
| "loss": 0.0117, | |
| "num_tokens": 86996804.0, | |
| "reward": 6.099536940455437, | |
| "reward_std": 1.1783363316208124, | |
| "rewards/accuracy_reward": 0.42578125, | |
| "rewards/exec_out_all_reward": 0.869140625, | |
| "rewards/exec_out_step_reward": 0.9668596535921097, | |
| "rewards/format_reward": 0.919921875, | |
| "rewards/keywords_iou_reward": 0.4779109531082213, | |
| "rewards/sql_step_keywords_recall_reward": 0.6846678359434009, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 176.578125, | |
| "epoch": 1.8225988700564972, | |
| "grad_norm": 0.19091665744781494, | |
| "kl": 0.0277862548828125, | |
| "learning_rate": 7.489332338858202e-08, | |
| "loss": 0.0051, | |
| "num_tokens": 87493076.0, | |
| "reward": 6.273200556635857, | |
| "reward_std": 1.2643384067341685, | |
| "rewards/accuracy_reward": 0.4609375, | |
| "rewards/exec_out_all_reward": 0.876953125, | |
| "rewards/exec_out_step_reward": 0.9696537107229233, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.474200002849102, | |
| "rewards/sql_step_keywords_recall_reward": 0.6930374698713422, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 193.27734375, | |
| "epoch": 1.831638418079096, | |
| "grad_norm": 0.1949450820684433, | |
| "kl": 0.02576446533203125, | |
| "learning_rate": 6.76466378338892e-08, | |
| "loss": -0.003, | |
| "num_tokens": 88001494.0, | |
| "reward": 6.114458784461021, | |
| "reward_std": 1.2841259008273482, | |
| "rewards/accuracy_reward": 0.4609375, | |
| "rewards/exec_out_all_reward": 0.833984375, | |
| "rewards/exec_out_step_reward": 0.9544363897293806, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.45011676382273436, | |
| "rewards/sql_step_keywords_recall_reward": 0.6465076114982367, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.59375, | |
| "epoch": 1.840677966101695, | |
| "grad_norm": 0.1869850605726242, | |
| "kl": 0.026782989501953125, | |
| "learning_rate": 6.076053957825411e-08, | |
| "loss": -0.0073, | |
| "num_tokens": 88501102.0, | |
| "reward": 6.228741064667702, | |
| "reward_std": 1.307523036841303, | |
| "rewards/accuracy_reward": 0.4609375, | |
| "rewards/exec_out_all_reward": 0.89453125, | |
| "rewards/exec_out_step_reward": 0.9735940638929605, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.4400957697071135, | |
| "rewards/sql_step_keywords_recall_reward": 0.685502259992063, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.279296875, | |
| "epoch": 1.8497175141242939, | |
| "grad_norm": 0.19799074530601501, | |
| "kl": 0.026813507080078125, | |
| "learning_rate": 5.423676216008694e-08, | |
| "loss": 0.0047, | |
| "num_tokens": 89002113.0, | |
| "reward": 6.288302145898342, | |
| "reward_std": 1.3452563788741827, | |
| "rewards/accuracy_reward": 0.443359375, | |
| "rewards/exec_out_all_reward": 0.896484375, | |
| "rewards/exec_out_step_reward": 0.973502604290843, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.496027123183012, | |
| "rewards/sql_step_keywords_recall_reward": 0.7211827598512173, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.740234375, | |
| "epoch": 1.8587570621468927, | |
| "grad_norm": 0.1976655274629593, | |
| "kl": 0.027782440185546875, | |
| "learning_rate": 4.807694790546563e-08, | |
| "loss": 0.0013, | |
| "num_tokens": 89500732.0, | |
| "reward": 6.4734716564416885, | |
| "reward_std": 1.2421362679451704, | |
| "rewards/accuracy_reward": 0.494140625, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9743815138936043, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.49230897752568126, | |
| "rewards/sql_step_keywords_recall_reward": 0.7156439917162061, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.732421875, | |
| "epoch": 1.8677966101694916, | |
| "grad_norm": 0.21844159066677094, | |
| "kl": 0.026279449462890625, | |
| "learning_rate": 4.2282647514687525e-08, | |
| "loss": 0.0087, | |
| "num_tokens": 90000723.0, | |
| "reward": 6.1350885555148125, | |
| "reward_std": 1.201456573791802, | |
| "rewards/accuracy_reward": 0.427734375, | |
| "rewards/exec_out_all_reward": 0.87109375, | |
| "rewards/exec_out_step_reward": 0.9642880447208881, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.47726981807500124, | |
| "rewards/sql_step_keywords_recall_reward": 0.6889170501381159, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.2109375, | |
| "epoch": 1.8768361581920905, | |
| "grad_norm": 0.19886131584644318, | |
| "kl": 0.026386260986328125, | |
| "learning_rate": 3.685531967188943e-08, | |
| "loss": 0.0084, | |
| "num_tokens": 90502179.0, | |
| "reward": 6.224872663617134, | |
| "reward_std": 1.0856527155265212, | |
| "rewards/accuracy_reward": 0.451171875, | |
| "rewards/exec_out_all_reward": 0.892578125, | |
| "rewards/exec_out_step_reward": 0.9733282178640366, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.4684679554775357, | |
| "rewards/sql_step_keywords_recall_reward": 0.6759366653859615, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.986328125, | |
| "epoch": 1.8858757062146894, | |
| "grad_norm": 0.1973237842321396, | |
| "kl": 0.028614044189453125, | |
| "learning_rate": 3.1796330677832056e-08, | |
| "loss": 0.0041, | |
| "num_tokens": 91001508.0, | |
| "reward": 6.379835411906242, | |
| "reward_std": 1.199483459815383, | |
| "rewards/accuracy_reward": 0.478515625, | |
| "rewards/exec_out_all_reward": 0.892578125, | |
| "rewards/exec_out_step_reward": 0.973174761980772, | |
| "rewards/format_reward": 0.9453125, | |
| "rewards/keywords_iou_reward": 0.4832291747443378, | |
| "rewards/sql_step_keywords_recall_reward": 0.6882491651922464, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.765625, | |
| "epoch": 1.8949152542372882, | |
| "grad_norm": 0.20336341857910156, | |
| "kl": 0.025714874267578125, | |
| "learning_rate": 2.710695410593994e-08, | |
| "loss": 0.0105, | |
| "num_tokens": 91500808.0, | |
| "reward": 6.037193328142166, | |
| "reward_std": 1.2909285621717572, | |
| "rewards/accuracy_reward": 0.431640625, | |
| "rewards/exec_out_all_reward": 0.8828125, | |
| "rewards/exec_out_step_reward": 0.9731863792985678, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.4269423745572567, | |
| "rewards/sql_step_keywords_recall_reward": 0.6573878172785044, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.685546875, | |
| "epoch": 1.9039548022598871, | |
| "grad_norm": 0.19327940046787262, | |
| "kl": 0.02794647216796875, | |
| "learning_rate": 2.278837048168797e-08, | |
| "loss": -0.0007, | |
| "num_tokens": 92000983.0, | |
| "reward": 6.355218142271042, | |
| "reward_std": 1.3059450194705278, | |
| "rewards/accuracy_reward": 0.48046875, | |
| "rewards/exec_out_all_reward": 0.919921875, | |
| "rewards/exec_out_step_reward": 0.980823727324605, | |
| "rewards/format_reward": 0.939453125, | |
| "rewards/keywords_iou_reward": 0.4438144704326987, | |
| "rewards/sql_step_keywords_recall_reward": 0.7055154535919428, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 187.62890625, | |
| "epoch": 1.912994350282486, | |
| "grad_norm": 0.19996102154254913, | |
| "kl": 0.02562713623046875, | |
| "learning_rate": 1.8841666985408568e-08, | |
| "loss": 0.0026, | |
| "num_tokens": 92501125.0, | |
| "reward": 6.144110098481178, | |
| "reward_std": 1.3673810623586178, | |
| "rewards/accuracy_reward": 0.419921875, | |
| "rewards/exec_out_all_reward": 0.85546875, | |
| "rewards/exec_out_step_reward": 0.9624224957078695, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.5000754236243665, | |
| "rewards/sql_step_keywords_recall_reward": 0.7108335876837373, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 178.302734375, | |
| "epoch": 1.9220338983050849, | |
| "grad_norm": 0.19664518535137177, | |
| "kl": 0.025630950927734375, | |
| "learning_rate": 1.5267837178600972e-08, | |
| "loss": 0.0021, | |
| "num_tokens": 92998136.0, | |
| "reward": 6.30431304872036, | |
| "reward_std": 1.3761892821639776, | |
| "rewards/accuracy_reward": 0.474609375, | |
| "rewards/exec_out_all_reward": 0.89453125, | |
| "rewards/exec_out_step_reward": 0.9729538708925247, | |
| "rewards/format_reward": 0.943359375, | |
| "rewards/keywords_iou_reward": 0.454624411650002, | |
| "rewards/sql_step_keywords_recall_reward": 0.6857822621241212, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.158203125, | |
| "epoch": 1.9310734463276837, | |
| "grad_norm": 0.18419690430164337, | |
| "kl": 0.02671051025390625, | |
| "learning_rate": 1.206778075380699e-08, | |
| "loss": 0.0046, | |
| "num_tokens": 93497689.0, | |
| "reward": 6.260894909501076, | |
| "reward_std": 1.3260251162573695, | |
| "rewards/accuracy_reward": 0.466796875, | |
| "rewards/exec_out_all_reward": 0.8984375, | |
| "rewards/exec_out_step_reward": 0.9784419946372509, | |
| "rewards/format_reward": 0.94140625, | |
| "rewards/keywords_iou_reward": 0.45361618138849735, | |
| "rewards/sql_step_keywords_recall_reward": 0.6681892573833466, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.88671875, | |
| "epoch": 1.9401129943502826, | |
| "grad_norm": 0.2128608375787735, | |
| "kl": 0.027370452880859375, | |
| "learning_rate": 9.242303308118816e-09, | |
| "loss": -0.0028, | |
| "num_tokens": 93996519.0, | |
| "reward": 6.385763391852379, | |
| "reward_std": 1.3881093207746744, | |
| "rewards/accuracy_reward": 0.49609375, | |
| "rewards/exec_out_all_reward": 0.89453125, | |
| "rewards/exec_out_step_reward": 0.975878132507205, | |
| "rewards/format_reward": 0.923828125, | |
| "rewards/keywords_iou_reward": 0.44957458041608334, | |
| "rewards/sql_step_keywords_recall_reward": 0.7080017421394587, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.0703125, | |
| "epoch": 1.9491525423728815, | |
| "grad_norm": 0.185265451669693, | |
| "kl": 0.026287078857421875, | |
| "learning_rate": 6.792116140373117e-09, | |
| "loss": -0.0056, | |
| "num_tokens": 94496719.0, | |
| "reward": 6.287876293063164, | |
| "reward_std": 1.1061802469193935, | |
| "rewards/accuracy_reward": 0.47265625, | |
| "rewards/exec_out_all_reward": 0.880859375, | |
| "rewards/exec_out_step_reward": 0.9741544220596552, | |
| "rewards/format_reward": 0.94921875, | |
| "rewards/keywords_iou_reward": 0.4433903433382511, | |
| "rewards/sql_step_keywords_recall_reward": 0.7062380816787481, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.119140625, | |
| "epoch": 1.9581920903954804, | |
| "grad_norm": 0.19649042189121246, | |
| "kl": 0.02834320068359375, | |
| "learning_rate": 4.7178360720865895e-09, | |
| "loss": 0.0082, | |
| "num_tokens": 94996296.0, | |
| "reward": 6.1123000383377075, | |
| "reward_std": 1.1606702040880919, | |
| "rewards/accuracy_reward": 0.4375, | |
| "rewards/exec_out_all_reward": 0.90625, | |
| "rewards/exec_out_step_reward": 0.9780420735478401, | |
| "rewards/format_reward": 0.9296875, | |
| "rewards/keywords_iou_reward": 0.44118882389739156, | |
| "rewards/sql_step_keywords_recall_reward": 0.665942832827568, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.314453125, | |
| "epoch": 1.9672316384180792, | |
| "grad_norm": 0.20041655004024506, | |
| "kl": 0.025310516357421875, | |
| "learning_rate": 3.0199852921735105e-09, | |
| "loss": -0.002, | |
| "num_tokens": 95493249.0, | |
| "reward": 6.628199502825737, | |
| "reward_std": 1.2294201632030308, | |
| "rewards/accuracy_reward": 0.521484375, | |
| "rewards/exec_out_all_reward": 0.88671875, | |
| "rewards/exec_out_step_reward": 0.9736746642738581, | |
| "rewards/format_reward": 0.951171875, | |
| "rewards/keywords_iou_reward": 0.5000922083854675, | |
| "rewards/sql_step_keywords_recall_reward": 0.730512335896492, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.69140625, | |
| "epoch": 1.9762711864406781, | |
| "grad_norm": 0.2012374997138977, | |
| "kl": 0.0261688232421875, | |
| "learning_rate": 1.6989912254880557e-09, | |
| "loss": 0.0159, | |
| "num_tokens": 95991843.0, | |
| "reward": 6.3724522441625595, | |
| "reward_std": 1.4343089256435633, | |
| "rewards/accuracy_reward": 0.509765625, | |
| "rewards/exec_out_all_reward": 0.875, | |
| "rewards/exec_out_step_reward": 0.9689127672463655, | |
| "rewards/format_reward": 0.9140625, | |
| "rewards/keywords_iou_reward": 0.4413177212700248, | |
| "rewards/sql_step_keywords_recall_reward": 0.6927789896726608, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 182.826171875, | |
| "epoch": 1.985310734463277, | |
| "grad_norm": 0.18999001383781433, | |
| "kl": 0.0272064208984375, | |
| "learning_rate": 7.551864252223761e-10, | |
| "loss": 0.0057, | |
| "num_tokens": 96489986.0, | |
| "reward": 6.2077417075634, | |
| "reward_std": 1.2748773116618395, | |
| "rewards/accuracy_reward": 0.4453125, | |
| "rewards/exec_out_all_reward": 0.892578125, | |
| "rewards/exec_out_step_reward": 0.9720354303717613, | |
| "rewards/format_reward": 0.935546875, | |
| "rewards/keywords_iou_reward": 0.4684856841340661, | |
| "rewards/sql_step_keywords_recall_reward": 0.6893599443137646, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 183.3854250907898, | |
| "epoch": 1.9943502824858759, | |
| "grad_norm": 0.18637076020240784, | |
| "kl": 0.0267333984375, | |
| "learning_rate": 1.8880848918739758e-10, | |
| "loss": -0.0015, | |
| "num_tokens": 96990210.0, | |
| "reward": 6.196442812681198, | |
| "reward_std": 1.3213467076420784, | |
| "rewards/accuracy_reward": 0.4765625, | |
| "rewards/exec_out_all_reward": 0.86328125, | |
| "rewards/exec_out_step_reward": 0.969191774725914, | |
| "rewards/format_reward": 0.931640625, | |
| "rewards/keywords_iou_reward": 0.4246487212367356, | |
| "rewards/sql_step_keywords_recall_reward": 0.676781676709652, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.9943502824858759, | |
| "step": 220, | |
| "total_flos": 0.0, | |
| "train_loss": 0.002633511937032877, | |
| "train_runtime": 233060.1474, | |
| "train_samples_per_second": 0.122, | |
| "train_steps_per_second": 0.001 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 220, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 27, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |