| {"loss": 0.07838999, "grad_norm": 0.00430106, "learning_rate": 9.011e-05, "memory(GiB)": 184.28, "train_speed(iter/s)": 0.002176, "completion_length": 6686.59765625, "response_clip_ratio": 0.18359375, "rewards/CosineReward": 0.44159877, "reward": 0.44159877, "reward_std": 0.09190416, "kl": 0.01794434, "clip_ratio": 6.064e-05, "epoch": 1.39215686, "global_step/max_steps": "17/60", "percentage": "28.33%", "elapsed_time": "2h 10m 5s", "remaining_time": "5h 29m 3s"} | |
| {"loss": 0.07815618, "grad_norm": 0.00447908, "learning_rate": 8.83e-05, "memory(GiB)": 184.28, "train_speed(iter/s)": 0.002225, "epoch": 1.47058824, "global_step/max_steps": "18/60", "percentage": "30.00%", "elapsed_time": "2h 14m 43s", "remaining_time": "5h 14m 21s"} | |
| {"eval_loss": 0.00091588, "eval_completion_length": 6907.125, "eval_response_clip_ratio": 0.1875, "eval_rewards/CosineReward": 0.47237487, "eval_reward": 0.47237487, "eval_reward_std": 0.3155394, "eval_kl": 0.02203369, "eval_clip_ratio": 9.808e-05, "eval_runtime": 1457.6003, "eval_samples_per_second": 0.005, "eval_steps_per_second": 0.001, "epoch": 1.47058824, "global_step/max_steps": "18/60", "percentage": "30.00%", "elapsed_time": "2h 39m 1s", "remaining_time": "6h 11m 2s"} | |
| {"loss": 0.09426031, "grad_norm": 0.00592188, "learning_rate": 8.637e-05, "memory(GiB)": 184.28, "train_speed(iter/s)": 0.001092, "kl": 0.02223206, "clip_ratio": 0.00023432, "completion_length": 7038.578125, "response_clip_ratio": 0.19921875, "rewards/CosineReward": 0.42305946, "reward": 0.42305946, "reward_std": 0.17355207, "epoch": 1.54901961, "global_step/max_steps": "19/60", "percentage": "31.67%", "elapsed_time": "4h 50m 0s", "remaining_time": "10h 25m 48s"} | |
| {"loss": 0.09388024, "grad_norm": 0.00615149, "learning_rate": 8.431e-05, "memory(GiB)": 184.28, "train_speed(iter/s)": 0.001131, "epoch": 1.62745098, "global_step/max_steps": "20/60", "percentage": "33.33%", "elapsed_time": "4h 54m 42s", "remaining_time": "9h 49m 24s"} | |
| {"eval_loss": 0.0012895, "eval_completion_length": 7039.0625, "eval_response_clip_ratio": 0.1875, "eval_rewards/CosineReward": 0.46842009, "eval_reward": 0.46842009, "eval_reward_std": 0.3149526, "eval_kl": 0.02911377, "eval_clip_ratio": 6.594e-05, "eval_runtime": 1460.114, "eval_samples_per_second": 0.005, "eval_steps_per_second": 0.001, "epoch": 1.62745098, "global_step/max_steps": "20/60", "percentage": "33.33%", "elapsed_time": "5h 19m 2s", "remaining_time": "10h 38m 4s"} | |
| {"loss": 0.07547289, "grad_norm": 0.00666696, "learning_rate": 8.214e-05, "memory(GiB)": 184.28, "train_speed(iter/s)": 0.000775, "kl": 0.02796936, "clip_ratio": 0.00028659, "completion_length": 7469.6796875, "response_clip_ratio": 0.29296875, "rewards/CosineReward": 0.337832, "reward": 0.337832, "reward_std": 0.10592943, "epoch": 1.70588235, "global_step/max_steps": "21/60", "percentage": "35.00%", "elapsed_time": "7h 31m 44s", "remaining_time": "13h 58m 57s"} | |
| {"loss": 0.0750519, "grad_norm": 0.00698173, "learning_rate": 7.986e-05, "memory(GiB)": 184.28, "train_speed(iter/s)": 0.000803, "epoch": 1.78431373, "global_step/max_steps": "22/60", "percentage": "36.67%", "elapsed_time": "7h 36m 26s", "remaining_time": "13h 8m 24s"} | |
| {"eval_loss": 0.00155675, "eval_completion_length": 7378.4375, "eval_response_clip_ratio": 0.1875, "eval_rewards/CosineReward": 0.44724135, "eval_reward": 0.44724135, "eval_reward_std": 0.31175205, "eval_kl": 0.03723145, "eval_clip_ratio": 3.566e-05, "eval_runtime": 1469.5869, "eval_samples_per_second": 0.005, "eval_steps_per_second": 0.001, "epoch": 1.78431373, "global_step/max_steps": "22/60", "percentage": "36.67%", "elapsed_time": "8h 0m 56s", "remaining_time": "13h 50m 43s"} | |
| {"loss": 0.0703681, "grad_norm": 0.00867982, "learning_rate": 7.748e-05, "memory(GiB)": 184.28, "train_speed(iter/s)": 0.000623, "kl": 0.0357666, "clip_ratio": 0.00025828, "completion_length": 8084.92578125, "response_clip_ratio": 0.28125, "rewards/CosineReward": 0.32297049, "reward": 0.32297049, "reward_std": 0.15503303, "epoch": 1.8627451, "global_step/max_steps": "23/60", "percentage": "38.33%", "elapsed_time": "10h 14m 47s", "remaining_time": "16h 29m 1s"} | |