| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 411, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.009375, |
| "completions/max_length": 3070.0, |
| "completions/max_terminated_length": 2896.7, |
| "completions/mean_length": 1017.178125, |
| "completions/mean_terminated_length": 988.4677978515625, |
| "completions/min_length": 239.7, |
| "completions/min_terminated_length": 239.7, |
| "entropy": 0.3605123937129974, |
| "epoch": 0.072992700729927, |
| "frac_reward_zero_std": 0.675, |
| "grad_norm": 0.6796875, |
| "learning_rate": 9.78102189781022e-06, |
| "loss": -0.0091, |
| "num_tokens": 1212849.0, |
| "reward": 0.434375, |
| "reward_std": 0.15717875137925147, |
| "rewards/qwen_accuracy_reward/mean": 0.434375, |
| "rewards/qwen_accuracy_reward/std": 0.46030205190181733, |
| "step": 10, |
| "step_time": 106.30040930798278 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.071875, |
| "completions/max_length": 3382.3, |
| "completions/max_terminated_length": 3000.2, |
| "completions/mean_length": 1388.7375, |
| "completions/mean_terminated_length": 1186.2124328613281, |
| "completions/min_length": 297.7, |
| "completions/min_terminated_length": 297.7, |
| "entropy": 0.4167850613594055, |
| "epoch": 0.145985401459854, |
| "frac_reward_zero_std": 0.375, |
| "grad_norm": 1.046875, |
| "learning_rate": 9.537712895377129e-06, |
| "loss": 0.0608, |
| "num_tokens": 2697493.0, |
| "reward": 0.478125, |
| "reward_std": 0.27870663031935694, |
| "rewards/qwen_accuracy_reward/mean": 0.478125, |
| "rewards/qwen_accuracy_reward/std": 0.4272594749927521, |
| "step": 20, |
| "step_time": 116.88615104537458 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.053125, |
| "completions/max_length": 3308.6, |
| "completions/max_terminated_length": 3243.7, |
| "completions/mean_length": 1304.284375, |
| "completions/mean_terminated_length": 1165.7695190429688, |
| "completions/min_length": 270.6, |
| "completions/min_terminated_length": 270.6, |
| "entropy": 0.4009901225566864, |
| "epoch": 0.21897810218978103, |
| "frac_reward_zero_std": 0.55, |
| "grad_norm": 1.875, |
| "learning_rate": 9.294403892944039e-06, |
| "loss": 0.0254, |
| "num_tokens": 4025352.0, |
| "reward": 0.471875, |
| "reward_std": 0.19138479307293893, |
| "rewards/qwen_accuracy_reward/mean": 0.471875, |
| "rewards/qwen_accuracy_reward/std": 0.48829147815704343, |
| "step": 30, |
| "step_time": 106.55014775730669 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.040625, |
| "completions/max_length": 3444.4, |
| "completions/max_terminated_length": 2867.6, |
| "completions/mean_length": 1196.390625, |
| "completions/mean_terminated_length": 1088.1075134277344, |
| "completions/min_length": 288.2, |
| "completions/min_terminated_length": 288.2, |
| "entropy": 0.4015911310911179, |
| "epoch": 0.291970802919708, |
| "frac_reward_zero_std": 0.6, |
| "grad_norm": 0.53515625, |
| "learning_rate": 9.05109489051095e-06, |
| "loss": 0.0434, |
| "num_tokens": 5191093.0, |
| "reward": 0.521875, |
| "reward_std": 0.18105824217200278, |
| "rewards/qwen_accuracy_reward/mean": 0.521875, |
| "rewards/qwen_accuracy_reward/std": 0.48740494847297666, |
| "step": 40, |
| "step_time": 116.13580646244809 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.025, |
| "completions/max_length": 3239.4, |
| "completions/max_terminated_length": 3025.9, |
| "completions/mean_length": 1166.659375, |
| "completions/mean_terminated_length": 1098.1408386230469, |
| "completions/min_length": 246.9, |
| "completions/min_terminated_length": 246.9, |
| "entropy": 0.4154060840606689, |
| "epoch": 0.36496350364963503, |
| "frac_reward_zero_std": 0.75, |
| "grad_norm": 0.71484375, |
| "learning_rate": 8.80778588807786e-06, |
| "loss": 0.0252, |
| "num_tokens": 6289576.0, |
| "reward": 0.56875, |
| "reward_std": 0.11836256608366966, |
| "rewards/qwen_accuracy_reward/mean": 0.56875, |
| "rewards/qwen_accuracy_reward/std": 0.47113593220710753, |
| "step": 50, |
| "step_time": 102.3454062897712 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.009375, |
| "completions/max_length": 2896.0, |
| "completions/max_terminated_length": 2697.2, |
| "completions/mean_length": 1067.15, |
| "completions/mean_terminated_length": 1040.172314453125, |
| "completions/min_length": 278.2, |
| "completions/min_terminated_length": 278.2, |
| "entropy": 0.4182083398103714, |
| "epoch": 0.43795620437956206, |
| "frac_reward_zero_std": 0.675, |
| "grad_norm": 0.0, |
| "learning_rate": 8.56447688564477e-06, |
| "loss": -0.0045, |
| "num_tokens": 7485720.0, |
| "reward": 0.46875, |
| "reward_std": 0.14403236508369446, |
| "rewards/qwen_accuracy_reward/mean": 0.46875, |
| "rewards/qwen_accuracy_reward/std": 0.4563746154308319, |
| "step": 60, |
| "step_time": 103.93981777941808 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.009375, |
| "completions/max_length": 2650.0, |
| "completions/max_terminated_length": 2549.8, |
| "completions/mean_length": 948.559375, |
| "completions/mean_terminated_length": 924.6193481445313, |
| "completions/min_length": 209.7, |
| "completions/min_terminated_length": 209.7, |
| "entropy": 0.34950864464044573, |
| "epoch": 0.5109489051094891, |
| "frac_reward_zero_std": 0.65, |
| "grad_norm": 0.0, |
| "learning_rate": 8.32116788321168e-06, |
| "loss": 0.016, |
| "num_tokens": 8730603.0, |
| "reward": 0.528125, |
| "reward_std": 0.1587614081799984, |
| "rewards/qwen_accuracy_reward/mean": 0.528125, |
| "rewards/qwen_accuracy_reward/std": 0.4472527623176575, |
| "step": 70, |
| "step_time": 90.1208279568702 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.003125, |
| "completions/max_length": 2975.0, |
| "completions/max_terminated_length": 2827.8, |
| "completions/mean_length": 988.715625, |
| "completions/mean_terminated_length": 980.0193481445312, |
| "completions/min_length": 225.9, |
| "completions/min_terminated_length": 225.9, |
| "entropy": 0.37495362758636475, |
| "epoch": 0.583941605839416, |
| "frac_reward_zero_std": 0.6, |
| "grad_norm": 1.234375, |
| "learning_rate": 8.07785888077859e-06, |
| "loss": -0.021, |
| "num_tokens": 9949824.0, |
| "reward": 0.521875, |
| "reward_std": 0.18053897097706795, |
| "rewards/qwen_accuracy_reward/mean": 0.521875, |
| "rewards/qwen_accuracy_reward/std": 0.4544884204864502, |
| "step": 80, |
| "step_time": 97.22130602933467 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.015625, |
| "completions/max_length": 2843.5, |
| "completions/max_terminated_length": 2426.7, |
| "completions/mean_length": 921.684375, |
| "completions/mean_terminated_length": 873.637158203125, |
| "completions/min_length": 278.9, |
| "completions/min_terminated_length": 278.9, |
| "entropy": 0.37491864860057833, |
| "epoch": 0.656934306569343, |
| "frac_reward_zero_std": 0.625, |
| "grad_norm": 1.546875, |
| "learning_rate": 7.8345498783455e-06, |
| "loss": 0.0557, |
| "num_tokens": 11177307.0, |
| "reward": 0.5625, |
| "reward_std": 0.1712738409638405, |
| "rewards/qwen_accuracy_reward/mean": 0.5625, |
| "rewards/qwen_accuracy_reward/std": 0.46112917065620423, |
| "step": 90, |
| "step_time": 95.75766938729211 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.015625, |
| "completions/max_length": 2333.1, |
| "completions/max_terminated_length": 2311.9, |
| "completions/mean_length": 859.08125, |
| "completions/mean_terminated_length": 811.47939453125, |
| "completions/min_length": 218.5, |
| "completions/min_terminated_length": 218.5, |
| "entropy": 0.36555847227573396, |
| "epoch": 0.7299270072992701, |
| "frac_reward_zero_std": 0.675, |
| "grad_norm": 0.96875, |
| "learning_rate": 7.591240875912409e-06, |
| "loss": -0.002, |
| "num_tokens": 12404869.0, |
| "reward": 0.440625, |
| "reward_std": 0.14087215512990953, |
| "rewards/qwen_accuracy_reward/mean": 0.440625, |
| "rewards/qwen_accuracy_reward/std": 0.4636432766914368, |
| "step": 100, |
| "step_time": 82.00954903159291 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.009375, |
| "completions/max_length": 3052.7, |
| "completions/max_terminated_length": 2857.8, |
| "completions/mean_length": 1005.96875, |
| "completions/mean_terminated_length": 977.4821411132813, |
| "completions/min_length": 276.6, |
| "completions/min_terminated_length": 276.6, |
| "entropy": 0.36328954696655275, |
| "epoch": 0.8029197080291971, |
| "frac_reward_zero_std": 0.675, |
| "grad_norm": 1.0546875, |
| "learning_rate": 7.347931873479319e-06, |
| "loss": 0.0546, |
| "num_tokens": 13642787.0, |
| "reward": 0.66875, |
| "reward_std": 0.1395061768591404, |
| "rewards/qwen_accuracy_reward/mean": 0.66875, |
| "rewards/qwen_accuracy_reward/std": 0.40047125071287154, |
| "step": 110, |
| "step_time": 102.26997727015987 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0125, |
| "completions/max_length": 3162.9, |
| "completions/max_terminated_length": 2987.6, |
| "completions/mean_length": 1102.9875, |
| "completions/mean_terminated_length": 1069.5874267578124, |
| "completions/min_length": 304.1, |
| "completions/min_terminated_length": 304.1, |
| "entropy": 0.35163818299770355, |
| "epoch": 0.8759124087591241, |
| "frac_reward_zero_std": 0.575, |
| "grad_norm": 0.84375, |
| "learning_rate": 7.1046228710462296e-06, |
| "loss": 0.0128, |
| "num_tokens": 14774175.0, |
| "reward": 0.534375, |
| "reward_std": 0.18348564356565475, |
| "rewards/qwen_accuracy_reward/mean": 0.534375, |
| "rewards/qwen_accuracy_reward/std": 0.4590408980846405, |
| "step": 120, |
| "step_time": 97.75387887172401 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.009375, |
| "completions/max_length": 2885.8, |
| "completions/max_terminated_length": 2841.5, |
| "completions/mean_length": 1009.309375, |
| "completions/mean_terminated_length": 981.8988159179687, |
| "completions/min_length": 288.1, |
| "completions/min_terminated_length": 288.1, |
| "entropy": 0.34102891981601713, |
| "epoch": 0.948905109489051, |
| "frac_reward_zero_std": 0.7, |
| "grad_norm": 0.82421875, |
| "learning_rate": 6.86131386861314e-06, |
| "loss": -0.0044, |
| "num_tokens": 15966530.0, |
| "reward": 0.70625, |
| "reward_std": 0.14433300495147705, |
| "rewards/qwen_accuracy_reward/mean": 0.70625, |
| "rewards/qwen_accuracy_reward/std": 0.411708801984787, |
| "step": 130, |
| "step_time": 103.35281996680423 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.04375, |
| "completions/max_length": 2664.1, |
| "completions/max_terminated_length": 2568.1, |
| "completions/mean_length": 1134.346875, |
| "completions/mean_terminated_length": 1014.4972045898437, |
| "completions/min_length": 318.0, |
| "completions/min_terminated_length": 318.0, |
| "entropy": 0.33926538228988645, |
| "epoch": 1.0218978102189782, |
| "frac_reward_zero_std": 0.625, |
| "grad_norm": 0.0, |
| "learning_rate": 6.618004866180049e-06, |
| "loss": 0.0148, |
| "num_tokens": 17214129.0, |
| "reward": 0.540625, |
| "reward_std": 0.1648663252592087, |
| "rewards/qwen_accuracy_reward/mean": 0.540625, |
| "rewards/qwen_accuracy_reward/std": 0.45352436900138854, |
| "step": 140, |
| "step_time": 91.66583738289773 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.00625, |
| "completions/max_length": 2898.0, |
| "completions/max_terminated_length": 2755.6, |
| "completions/mean_length": 1108.35625, |
| "completions/mean_terminated_length": 1091.26259765625, |
| "completions/min_length": 349.7, |
| "completions/min_terminated_length": 349.7, |
| "entropy": 0.33356466293334963, |
| "epoch": 1.094890510948905, |
| "frac_reward_zero_std": 0.675, |
| "grad_norm": 0.734375, |
| "learning_rate": 6.3746958637469595e-06, |
| "loss": 0.0161, |
| "num_tokens": 18739043.0, |
| "reward": 0.65, |
| "reward_std": 0.14624504819512368, |
| "rewards/qwen_accuracy_reward/mean": 0.65, |
| "rewards/qwen_accuracy_reward/std": 0.4041076198220253, |
| "step": 150, |
| "step_time": 111.4213294208981 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.05, |
| "completions/max_length": 2954.3, |
| "completions/max_terminated_length": 2814.9, |
| "completions/mean_length": 1294.83125, |
| "completions/mean_terminated_length": 1166.3035034179688, |
| "completions/min_length": 401.2, |
| "completions/min_terminated_length": 401.2, |
| "entropy": 0.36098510324954985, |
| "epoch": 1.167883211678832, |
| "frac_reward_zero_std": 0.675, |
| "grad_norm": 0.0, |
| "learning_rate": 6.13138686131387e-06, |
| "loss": 0.0291, |
| "num_tokens": 20131237.0, |
| "reward": 0.71875, |
| "reward_std": 0.14718669205904006, |
| "rewards/qwen_accuracy_reward/mean": 0.71875, |
| "rewards/qwen_accuracy_reward/std": 0.3977723315358162, |
| "step": 160, |
| "step_time": 100.20992619153112 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.00625, |
| "completions/max_length": 2856.4, |
| "completions/max_terminated_length": 2700.9, |
| "completions/mean_length": 1204.96875, |
| "completions/mean_terminated_length": 1187.974169921875, |
| "completions/min_length": 393.8, |
| "completions/min_terminated_length": 393.8, |
| "entropy": 0.3756037563085556, |
| "epoch": 1.2408759124087592, |
| "frac_reward_zero_std": 0.55, |
| "grad_norm": 0.671875, |
| "learning_rate": 5.888077858880778e-06, |
| "loss": 0.0116, |
| "num_tokens": 21383299.0, |
| "reward": 0.596875, |
| "reward_std": 0.20872601345181466, |
| "rewards/qwen_accuracy_reward/mean": 0.596875, |
| "rewards/qwen_accuracy_reward/std": 0.46048612892627716, |
| "step": 170, |
| "step_time": 99.4391059097834 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.00625, |
| "completions/max_length": 3201.4, |
| "completions/max_terminated_length": 3001.8, |
| "completions/mean_length": 1198.003125, |
| "completions/mean_terminated_length": 1179.4652099609375, |
| "completions/min_length": 440.1, |
| "completions/min_terminated_length": 440.1, |
| "entropy": 0.38104947507381437, |
| "epoch": 1.313868613138686, |
| "frac_reward_zero_std": 0.75, |
| "grad_norm": 0.5625, |
| "learning_rate": 5.6447688564476885e-06, |
| "loss": 0.0187, |
| "num_tokens": 22501172.0, |
| "reward": 0.7625, |
| "reward_std": 0.10478792265057564, |
| "rewards/qwen_accuracy_reward/mean": 0.7625, |
| "rewards/qwen_accuracy_reward/std": 0.3862288236618042, |
| "step": 180, |
| "step_time": 101.38317952565849 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.01875, |
| "completions/max_length": 2751.6, |
| "completions/max_terminated_length": 2576.8, |
| "completions/mean_length": 1141.85625, |
| "completions/mean_terminated_length": 1095.5864868164062, |
| "completions/min_length": 378.7, |
| "completions/min_terminated_length": 378.7, |
| "entropy": 0.37404528707265855, |
| "epoch": 1.3868613138686132, |
| "frac_reward_zero_std": 0.575, |
| "grad_norm": 0.0, |
| "learning_rate": 5.401459854014599e-06, |
| "loss": 0.0105, |
| "num_tokens": 23793222.0, |
| "reward": 0.521875, |
| "reward_std": 0.18928286358714103, |
| "rewards/qwen_accuracy_reward/mean": 0.521875, |
| "rewards/qwen_accuracy_reward/std": 0.43783398270606994, |
| "step": 190, |
| "step_time": 91.93207672638819 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0375, |
| "completions/max_length": 3231.8, |
| "completions/max_terminated_length": 3000.5, |
| "completions/mean_length": 1305.665625, |
| "completions/mean_terminated_length": 1198.9109619140625, |
| "completions/min_length": 365.7, |
| "completions/min_terminated_length": 365.7, |
| "entropy": 0.3786713719367981, |
| "epoch": 1.4598540145985401, |
| "frac_reward_zero_std": 0.725, |
| "grad_norm": 0.80078125, |
| "learning_rate": 5.158150851581509e-06, |
| "loss": 0.0082, |
| "num_tokens": 25037259.0, |
| "reward": 0.603125, |
| "reward_std": 0.12046253234148026, |
| "rewards/qwen_accuracy_reward/mean": 0.603125, |
| "rewards/qwen_accuracy_reward/std": 0.42881221920251844, |
| "step": 200, |
| "step_time": 103.0258747473359 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.015625, |
| "completions/max_length": 3153.1, |
| "completions/max_terminated_length": 3017.2, |
| "completions/mean_length": 1360.325, |
| "completions/mean_terminated_length": 1316.6628784179688, |
| "completions/min_length": 432.6, |
| "completions/min_terminated_length": 432.6, |
| "entropy": 0.3885278135538101, |
| "epoch": 1.5328467153284673, |
| "frac_reward_zero_std": 0.6, |
| "grad_norm": 0.50390625, |
| "learning_rate": 4.914841849148419e-06, |
| "loss": 0.034, |
| "num_tokens": 26258443.0, |
| "reward": 0.66875, |
| "reward_std": 0.18044402971863746, |
| "rewards/qwen_accuracy_reward/mean": 0.66875, |
| "rewards/qwen_accuracy_reward/std": 0.4504780650138855, |
| "step": 210, |
| "step_time": 96.96674608923495 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2816.0, |
| "completions/max_terminated_length": 2816.0, |
| "completions/mean_length": 1161.459375, |
| "completions/mean_terminated_length": 1161.459375, |
| "completions/min_length": 397.7, |
| "completions/min_terminated_length": 397.7, |
| "entropy": 0.38007940649986266, |
| "epoch": 1.6058394160583942, |
| "frac_reward_zero_std": 0.75, |
| "grad_norm": 0.7421875, |
| "learning_rate": 4.671532846715329e-06, |
| "loss": 0.0182, |
| "num_tokens": 27530662.0, |
| "reward": 0.7, |
| "reward_std": 0.11110442206263542, |
| "rewards/qwen_accuracy_reward/mean": 0.7, |
| "rewards/qwen_accuracy_reward/std": 0.36664991080760956, |
| "step": 220, |
| "step_time": 93.51679303245619 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.034375, |
| "completions/max_length": 2982.4, |
| "completions/max_terminated_length": 2932.6, |
| "completions/mean_length": 1282.75, |
| "completions/mean_terminated_length": 1197.8360961914063, |
| "completions/min_length": 432.0, |
| "completions/min_terminated_length": 432.0, |
| "entropy": 0.36942420303821566, |
| "epoch": 1.6788321167883211, |
| "frac_reward_zero_std": 0.55, |
| "grad_norm": 0.68359375, |
| "learning_rate": 4.428223844282239e-06, |
| "loss": 0.0049, |
| "num_tokens": 28913198.0, |
| "reward": 0.559375, |
| "reward_std": 0.1864362359046936, |
| "rewards/qwen_accuracy_reward/mean": 0.559375, |
| "rewards/qwen_accuracy_reward/std": 0.4745823562145233, |
| "step": 230, |
| "step_time": 106.11833359738812 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.028125, |
| "completions/max_length": 3358.5, |
| "completions/max_terminated_length": 3078.0, |
| "completions/mean_length": 1389.68125, |
| "completions/mean_terminated_length": 1318.075048828125, |
| "completions/min_length": 451.0, |
| "completions/min_terminated_length": 451.0, |
| "entropy": 0.4076398193836212, |
| "epoch": 1.7518248175182483, |
| "frac_reward_zero_std": 0.625, |
| "grad_norm": 0.640625, |
| "learning_rate": 4.184914841849148e-06, |
| "loss": 0.0356, |
| "num_tokens": 30106480.0, |
| "reward": 0.640625, |
| "reward_std": 0.16717590987682343, |
| "rewards/qwen_accuracy_reward/mean": 0.640625, |
| "rewards/qwen_accuracy_reward/std": 0.4686185359954834, |
| "step": 240, |
| "step_time": 115.86419467311353 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.015625, |
| "completions/max_length": 3010.5, |
| "completions/max_terminated_length": 2811.1, |
| "completions/mean_length": 1186.053125, |
| "completions/mean_terminated_length": 1145.055078125, |
| "completions/min_length": 383.4, |
| "completions/min_terminated_length": 383.4, |
| "entropy": 0.3604145884513855, |
| "epoch": 1.8248175182481752, |
| "frac_reward_zero_std": 0.625, |
| "grad_norm": 0.8984375, |
| "learning_rate": 3.9416058394160585e-06, |
| "loss": 0.0403, |
| "num_tokens": 31363833.0, |
| "reward": 0.59375, |
| "reward_std": 0.16990982741117477, |
| "rewards/qwen_accuracy_reward/mean": 0.59375, |
| "rewards/qwen_accuracy_reward/std": 0.43118971437215803, |
| "step": 250, |
| "step_time": 99.8452183963731 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2370.0, |
| "completions/max_terminated_length": 2370.0, |
| "completions/mean_length": 1080.86875, |
| "completions/mean_terminated_length": 1080.86875, |
| "completions/min_length": 409.6, |
| "completions/min_terminated_length": 409.6, |
| "entropy": 0.37349976003170016, |
| "epoch": 1.897810218978102, |
| "frac_reward_zero_std": 0.825, |
| "grad_norm": 0.53515625, |
| "learning_rate": 3.698296836982969e-06, |
| "loss": -0.0252, |
| "num_tokens": 32702599.0, |
| "reward": 0.65, |
| "reward_std": 0.07775410786271095, |
| "rewards/qwen_accuracy_reward/mean": 0.65, |
| "rewards/qwen_accuracy_reward/std": 0.3688706248998642, |
| "step": 260, |
| "step_time": 86.65903639029712 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.025, |
| "completions/max_length": 2481.9, |
| "completions/max_terminated_length": 2415.4, |
| "completions/mean_length": 1075.425, |
| "completions/mean_terminated_length": 1002.4191040039062, |
| "completions/min_length": 349.1, |
| "completions/min_terminated_length": 349.1, |
| "entropy": 0.34225144386291506, |
| "epoch": 1.9708029197080292, |
| "frac_reward_zero_std": 0.7, |
| "grad_norm": 0.96484375, |
| "learning_rate": 3.454987834549879e-06, |
| "loss": 0.0167, |
| "num_tokens": 33787199.0, |
| "reward": 0.578125, |
| "reward_std": 0.12972569838166237, |
| "rewards/qwen_accuracy_reward/mean": 0.578125, |
| "rewards/qwen_accuracy_reward/std": 0.4668997347354889, |
| "step": 270, |
| "step_time": 80.8301064182073 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.009375, |
| "completions/max_length": 2574.6, |
| "completions/max_terminated_length": 2453.7, |
| "completions/mean_length": 1054.425, |
| "completions/mean_terminated_length": 1026.784130859375, |
| "completions/min_length": 392.3, |
| "completions/min_terminated_length": 392.3, |
| "entropy": 0.3572549015283585, |
| "epoch": 2.0437956204379564, |
| "frac_reward_zero_std": 0.85, |
| "grad_norm": 0.8046875, |
| "learning_rate": 3.2116788321167884e-06, |
| "loss": 0.0306, |
| "num_tokens": 34995479.0, |
| "reward": 0.821875, |
| "reward_std": 0.06165712922811508, |
| "rewards/qwen_accuracy_reward/mean": 0.821875, |
| "rewards/qwen_accuracy_reward/std": 0.2794704169034958, |
| "step": 280, |
| "step_time": 85.92972797648981 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2578.2, |
| "completions/max_terminated_length": 2578.2, |
| "completions/mean_length": 1058.575, |
| "completions/mean_terminated_length": 1058.575, |
| "completions/min_length": 371.0, |
| "completions/min_terminated_length": 371.0, |
| "entropy": 0.3524599611759186, |
| "epoch": 2.116788321167883, |
| "frac_reward_zero_std": 0.775, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9683698296836987e-06, |
| "loss": 0.0015, |
| "num_tokens": 36338735.0, |
| "reward": 0.621875, |
| "reward_std": 0.10636548325419426, |
| "rewards/qwen_accuracy_reward/mean": 0.621875, |
| "rewards/qwen_accuracy_reward/std": 0.4201431304216385, |
| "step": 290, |
| "step_time": 93.4068580438383 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.009375, |
| "completions/max_length": 3337.3, |
| "completions/max_terminated_length": 3052.6, |
| "completions/mean_length": 1304.146875, |
| "completions/mean_terminated_length": 1277.986474609375, |
| "completions/min_length": 366.6, |
| "completions/min_terminated_length": 366.6, |
| "entropy": 0.37322444319725034, |
| "epoch": 2.18978102189781, |
| "frac_reward_zero_std": 0.675, |
| "grad_norm": 0.90625, |
| "learning_rate": 2.7250608272506085e-06, |
| "loss": 0.0091, |
| "num_tokens": 37781158.0, |
| "reward": 0.684375, |
| "reward_std": 0.13267236873507499, |
| "rewards/qwen_accuracy_reward/mean": 0.684375, |
| "rewards/qwen_accuracy_reward/std": 0.36027481555938723, |
| "step": 300, |
| "step_time": 124.43963868878782 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.025, |
| "completions/max_length": 3103.4, |
| "completions/max_terminated_length": 3048.6, |
| "completions/mean_length": 1280.71875, |
| "completions/mean_terminated_length": 1224.5757568359375, |
| "completions/min_length": 414.7, |
| "completions/min_terminated_length": 414.7, |
| "entropy": 0.36646572649478915, |
| "epoch": 2.2627737226277373, |
| "frac_reward_zero_std": 0.575, |
| "grad_norm": 0.75390625, |
| "learning_rate": 2.4817518248175183e-06, |
| "loss": -0.0218, |
| "num_tokens": 39053748.0, |
| "reward": 0.60625, |
| "reward_std": 0.19305532947182655, |
| "rewards/qwen_accuracy_reward/mean": 0.60625, |
| "rewards/qwen_accuracy_reward/std": 0.45746631026268003, |
| "step": 310, |
| "step_time": 96.06518278419972 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.025, |
| "completions/max_length": 3248.1, |
| "completions/max_terminated_length": 3058.9, |
| "completions/mean_length": 1360.24375, |
| "completions/mean_terminated_length": 1295.442919921875, |
| "completions/min_length": 362.0, |
| "completions/min_terminated_length": 362.0, |
| "entropy": 0.3854114145040512, |
| "epoch": 2.335766423357664, |
| "frac_reward_zero_std": 0.625, |
| "grad_norm": 1.078125, |
| "learning_rate": 2.2384428223844286e-06, |
| "loss": 0.0638, |
| "num_tokens": 40304938.0, |
| "reward": 0.70625, |
| "reward_std": 0.16034209728240967, |
| "rewards/qwen_accuracy_reward/mean": 0.70625, |
| "rewards/qwen_accuracy_reward/std": 0.3804707407951355, |
| "step": 320, |
| "step_time": 109.24540220741183 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.025, |
| "completions/max_length": 2834.8, |
| "completions/max_terminated_length": 2617.5, |
| "completions/mean_length": 1194.184375, |
| "completions/mean_terminated_length": 1109.6020874023438, |
| "completions/min_length": 388.7, |
| "completions/min_terminated_length": 388.7, |
| "entropy": 0.36874857246875764, |
| "epoch": 2.408759124087591, |
| "frac_reward_zero_std": 0.7, |
| "grad_norm": 0.0, |
| "learning_rate": 1.9951338199513384e-06, |
| "loss": -0.005, |
| "num_tokens": 41547669.0, |
| "reward": 0.7, |
| "reward_std": 0.13057240098714828, |
| "rewards/qwen_accuracy_reward/mean": 0.7, |
| "rewards/qwen_accuracy_reward/std": 0.3882273375988007, |
| "step": 330, |
| "step_time": 94.99811747204512 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2663.3, |
| "completions/max_terminated_length": 2663.3, |
| "completions/mean_length": 1023.784375, |
| "completions/mean_terminated_length": 1023.784375, |
| "completions/min_length": 363.6, |
| "completions/min_terminated_length": 363.6, |
| "entropy": 0.3397214740514755, |
| "epoch": 2.4817518248175183, |
| "frac_reward_zero_std": 0.675, |
| "grad_norm": 0.88671875, |
| "learning_rate": 1.7518248175182485e-06, |
| "loss": 0.0084, |
| "num_tokens": 42817864.0, |
| "reward": 0.740625, |
| "reward_std": 0.14487907364964486, |
| "rewards/qwen_accuracy_reward/mean": 0.740625, |
| "rewards/qwen_accuracy_reward/std": 0.35987740010023117, |
| "step": 340, |
| "step_time": 88.57682326808572 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2727.0, |
| "completions/max_terminated_length": 2727.0, |
| "completions/mean_length": 1162.9625, |
| "completions/mean_terminated_length": 1162.9625, |
| "completions/min_length": 442.9, |
| "completions/min_terminated_length": 442.9, |
| "entropy": 0.37642553746700286, |
| "epoch": 2.554744525547445, |
| "frac_reward_zero_std": 0.825, |
| "grad_norm": 0.0, |
| "learning_rate": 1.5085158150851583e-06, |
| "loss": -0.011, |
| "num_tokens": 44131612.0, |
| "reward": 0.80625, |
| "reward_std": 0.07280554845929146, |
| "rewards/qwen_accuracy_reward/mean": 0.80625, |
| "rewards/qwen_accuracy_reward/std": 0.2779258817434311, |
| "step": 350, |
| "step_time": 94.38652676101773 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.00625, |
| "completions/max_length": 3228.0, |
| "completions/max_terminated_length": 2900.1, |
| "completions/mean_length": 1259.84375, |
| "completions/mean_terminated_length": 1240.6628967285155, |
| "completions/min_length": 373.4, |
| "completions/min_terminated_length": 373.4, |
| "entropy": 0.36303475201129914, |
| "epoch": 2.627737226277372, |
| "frac_reward_zero_std": 0.675, |
| "grad_norm": 0.69140625, |
| "learning_rate": 1.2652068126520683e-06, |
| "loss": 0.0103, |
| "num_tokens": 45384666.0, |
| "reward": 0.58125, |
| "reward_std": 0.14351309314370156, |
| "rewards/qwen_accuracy_reward/mean": 0.58125, |
| "rewards/qwen_accuracy_reward/std": 0.4772630840539932, |
| "step": 360, |
| "step_time": 100.1825181835331 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.05, |
| "completions/max_length": 3610.3, |
| "completions/max_terminated_length": 3382.4, |
| "completions/mean_length": 1473.825, |
| "completions/mean_terminated_length": 1346.1535278320312, |
| "completions/min_length": 388.9, |
| "completions/min_terminated_length": 388.9, |
| "entropy": 0.3781063288450241, |
| "epoch": 2.7007299270072993, |
| "frac_reward_zero_std": 0.575, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.0218978102189781e-06, |
| "loss": 0.0401, |
| "num_tokens": 46681234.0, |
| "reward": 0.55625, |
| "reward_std": 0.19663594886660576, |
| "rewards/qwen_accuracy_reward/mean": 0.55625, |
| "rewards/qwen_accuracy_reward/std": 0.4555644616484642, |
| "step": 370, |
| "step_time": 121.74294393500313 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.00625, |
| "completions/max_length": 2807.6, |
| "completions/max_terminated_length": 2798.0, |
| "completions/mean_length": 1129.05, |
| "completions/mean_terminated_length": 1113.3502197265625, |
| "completions/min_length": 362.2, |
| "completions/min_terminated_length": 362.2, |
| "entropy": 0.3525690257549286, |
| "epoch": 2.7737226277372264, |
| "frac_reward_zero_std": 0.725, |
| "grad_norm": 0.66015625, |
| "learning_rate": 7.785888077858882e-07, |
| "loss": 0.024, |
| "num_tokens": 47890746.0, |
| "reward": 0.609375, |
| "reward_std": 0.12319448739290237, |
| "rewards/qwen_accuracy_reward/mean": 0.609375, |
| "rewards/qwen_accuracy_reward/std": 0.42726452350616456, |
| "step": 380, |
| "step_time": 91.42815532507375 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 3030.0, |
| "completions/max_terminated_length": 2951.3, |
| "completions/mean_length": 1326.25, |
| "completions/mean_terminated_length": 1238.2517333984374, |
| "completions/min_length": 360.5, |
| "completions/min_terminated_length": 360.5, |
| "entropy": 0.3662068575620651, |
| "epoch": 2.846715328467153, |
| "frac_reward_zero_std": 0.575, |
| "grad_norm": 0.640625, |
| "learning_rate": 5.352798053527981e-07, |
| "loss": 0.0044, |
| "num_tokens": 49127866.0, |
| "reward": 0.60625, |
| "reward_std": 0.18800986632704736, |
| "rewards/qwen_accuracy_reward/mean": 0.60625, |
| "rewards/qwen_accuracy_reward/std": 0.4078336015343666, |
| "step": 390, |
| "step_time": 111.37025026166812 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2633.6, |
| "completions/max_terminated_length": 2633.6, |
| "completions/mean_length": 1038.35625, |
| "completions/mean_terminated_length": 1038.35625, |
| "completions/min_length": 389.5, |
| "completions/min_terminated_length": 389.5, |
| "entropy": 0.3475939750671387, |
| "epoch": 2.9197080291970803, |
| "frac_reward_zero_std": 0.775, |
| "grad_norm": 0.859375, |
| "learning_rate": 2.9197080291970804e-07, |
| "loss": 0.0046, |
| "num_tokens": 50219684.0, |
| "reward": 0.703125, |
| "reward_std": 0.09816569313406945, |
| "rewards/qwen_accuracy_reward/mean": 0.703125, |
| "rewards/qwen_accuracy_reward/std": 0.3836729422211647, |
| "step": 400, |
| "step_time": 87.25570530630648 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.040625, |
| "completions/max_length": 3304.6, |
| "completions/max_terminated_length": 3197.0, |
| "completions/mean_length": 1342.228125, |
| "completions/mean_terminated_length": 1226.4643310546876, |
| "completions/min_length": 403.7, |
| "completions/min_terminated_length": 403.7, |
| "entropy": 0.36126827299594877, |
| "epoch": 2.9927007299270074, |
| "frac_reward_zero_std": 0.6, |
| "grad_norm": 0.0, |
| "learning_rate": 4.866180048661801e-08, |
| "loss": 0.0057, |
| "num_tokens": 51585277.0, |
| "reward": 0.634375, |
| "reward_std": 0.18369721844792367, |
| "rewards/qwen_accuracy_reward/mean": 0.634375, |
| "rewards/qwen_accuracy_reward/std": 0.4095410585403442, |
| "step": 410, |
| "step_time": 121.25470138275996 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 411, |
| "num_input_tokens_seen": 51708911, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|