Instructions to use usr256864/ee_gol with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use usr256864/ee_gol with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("HiTZ/GoLLIE-7B") model = PeftModel.from_pretrained(base_model, "usr256864/ee_gol") - Transformers
How to use usr256864/ee_gol with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="usr256864/ee_gol")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("usr256864/ee_gol", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use usr256864/ee_gol with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "usr256864/ee_gol" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "usr256864/ee_gol", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/usr256864/ee_gol
- SGLang
How to use usr256864/ee_gol with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "usr256864/ee_gol" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "usr256864/ee_gol", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "usr256864/ee_gol" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "usr256864/ee_gol", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use usr256864/ee_gol with Docker Model Runner:
docker model run hf.co/usr256864/ee_gol
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.344995140913508, | |
| "eval_steps": 500, | |
| "global_step": 5500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0322265625, | |
| "completions/max_length": 244.3125, | |
| "completions/max_terminated_length": 204.375, | |
| "completions/mean_length": 64.1787109375, | |
| "completions/mean_terminated_length": 57.63671565055847, | |
| "completions/min_length": 3.0, | |
| "completions/min_terminated_length": 3.0, | |
| "entropy": 0.15918307239189744, | |
| "epoch": 0.015549076773566569, | |
| "frac_reward_zero_std": 0.453125, | |
| "grad_norm": 0.7093124389648438, | |
| "learning_rate": 5e-05, | |
| "loss": -0.4337, | |
| "num_tokens": 1189183.0, | |
| "reward": 3.017539083957672, | |
| "reward_std": 1.1567719243466854, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.3280859384685755, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.38037889264523983, | |
| "rewards/event_reward_fn/mean": 2.2734375, | |
| "rewards/event_reward_fn/std": 3.2615081816911697, | |
| "rewards/format_reward_fn/mean": 0.4160156287252903, | |
| "rewards/format_reward_fn/std": 0.33771974220871925, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0478515625, | |
| "completions/max_length": 240.6875, | |
| "completions/max_terminated_length": 213.0, | |
| "completions/mean_length": 96.623046875, | |
| "completions/mean_terminated_length": 88.71417284011841, | |
| "completions/min_length": 7.3125, | |
| "completions/min_terminated_length": 7.3125, | |
| "entropy": 0.0870705652050674, | |
| "epoch": 0.031098153547133137, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.11729823052883148, | |
| "learning_rate": 5e-05, | |
| "loss": -0.1999, | |
| "num_tokens": 2439389.0, | |
| "reward": 6.020513415336609, | |
| "reward_std": 1.318240948021412, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.636932659894228, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.4330139197409153, | |
| "rewards/event_reward_fn/mean": 4.67578125, | |
| "rewards/event_reward_fn/std": 4.073968470096588, | |
| "rewards/format_reward_fn/mean": 0.7077994756400585, | |
| "rewards/format_reward_fn/std": 0.37246643379330635, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0498046875, | |
| "completions/max_length": 249.75, | |
| "completions/max_terminated_length": 224.0, | |
| "completions/mean_length": 108.2373046875, | |
| "completions/mean_terminated_length": 100.33520174026489, | |
| "completions/min_length": 21.9375, | |
| "completions/min_terminated_length": 21.9375, | |
| "entropy": 0.06846319953911006, | |
| "epoch": 0.04664723032069971, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.11189325153827667, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0794, | |
| "num_tokens": 3697760.0, | |
| "reward": 6.618032068014145, | |
| "reward_std": 1.1964115016162395, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8193489573895931, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3472439646720886, | |
| "rewards/event_reward_fn/mean": 4.94921875, | |
| "rewards/event_reward_fn/std": 4.1042004227638245, | |
| "rewards/format_reward_fn/mean": 0.8494642823934555, | |
| "rewards/format_reward_fn/std": 0.30338616110384464, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.080078125, | |
| "completions/max_length": 251.625, | |
| "completions/max_terminated_length": 232.75, | |
| "completions/mean_length": 114.8154296875, | |
| "completions/mean_terminated_length": 102.74591159820557, | |
| "completions/min_length": 36.3125, | |
| "completions/min_terminated_length": 36.3125, | |
| "entropy": 0.0698625217191875, | |
| "epoch": 0.062196307094266275, | |
| "frac_reward_zero_std": 0.4140625, | |
| "grad_norm": 0.1286889612674713, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0521, | |
| "num_tokens": 4908167.0, | |
| "reward": 7.294231742620468, | |
| "reward_std": 1.1466168127954006, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.833867184817791, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3500053770840168, | |
| "rewards/event_reward_fn/mean": 5.6142578125, | |
| "rewards/event_reward_fn/std": 4.4990804344415665, | |
| "rewards/format_reward_fn/mean": 0.8461067788302898, | |
| "rewards/format_reward_fn/std": 0.3374804314225912, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0712890625, | |
| "completions/max_length": 246.625, | |
| "completions/max_terminated_length": 219.25, | |
| "completions/mean_length": 113.826171875, | |
| "completions/mean_terminated_length": 103.01660490036011, | |
| "completions/min_length": 38.6875, | |
| "completions/min_terminated_length": 38.6875, | |
| "entropy": 0.0676732650026679, | |
| "epoch": 0.07774538386783285, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.08458422869443893, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0159, | |
| "num_tokens": 6128157.0, | |
| "reward": 7.6867459416389465, | |
| "reward_std": 0.9968220815062523, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8819040954113007, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.29652632866054773, | |
| "rewards/event_reward_fn/mean": 5.904296875, | |
| "rewards/event_reward_fn/std": 4.028907224535942, | |
| "rewards/format_reward_fn/mean": 0.9005450159311295, | |
| "rewards/format_reward_fn/std": 0.2789665600284934, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0361328125, | |
| "completions/max_length": 247.125, | |
| "completions/max_terminated_length": 236.1875, | |
| "completions/mean_length": 112.671875, | |
| "completions/mean_terminated_length": 107.51665830612183, | |
| "completions/min_length": 37.375, | |
| "completions/min_terminated_length": 37.375, | |
| "entropy": 0.06292958417907357, | |
| "epoch": 0.09329446064139942, | |
| "frac_reward_zero_std": 0.453125, | |
| "grad_norm": 0.09709884226322174, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0353, | |
| "num_tokens": 7275181.0, | |
| "reward": 7.98236358165741, | |
| "reward_std": 1.0672973282635212, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9419283382594585, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18250679067568853, | |
| "rewards/event_reward_fn/mean": 6.091796875, | |
| "rewards/event_reward_fn/std": 4.740213438868523, | |
| "rewards/format_reward_fn/mean": 0.9486383907496929, | |
| "rewards/format_reward_fn/std": 0.16436113324016333, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1083984375, | |
| "completions/max_length": 255.1875, | |
| "completions/max_terminated_length": 238.3125, | |
| "completions/mean_length": 129.126953125, | |
| "completions/mean_terminated_length": 113.86429929733276, | |
| "completions/min_length": 41.4375, | |
| "completions/min_terminated_length": 41.4375, | |
| "entropy": 0.06285874638706446, | |
| "epoch": 0.10884353741496598, | |
| "frac_reward_zero_std": 0.46484375, | |
| "grad_norm": 0.17054593563079834, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0124, | |
| "num_tokens": 8523631.0, | |
| "reward": 7.8958849012851715, | |
| "reward_std": 1.0757801569998264, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8616694211959839, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.31423775386065245, | |
| "rewards/event_reward_fn/mean": 6.1513671875, | |
| "rewards/event_reward_fn/std": 4.978878691792488, | |
| "rewards/format_reward_fn/mean": 0.8828483074903488, | |
| "rewards/format_reward_fn/std": 0.30156402476131916, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0849609375, | |
| "completions/max_length": 251.375, | |
| "completions/max_terminated_length": 228.4375, | |
| "completions/mean_length": 127.2109375, | |
| "completions/mean_terminated_length": 115.21368026733398, | |
| "completions/min_length": 41.0, | |
| "completions/min_terminated_length": 41.0, | |
| "entropy": 0.0637968888040632, | |
| "epoch": 0.12439261418853255, | |
| "frac_reward_zero_std": 0.390625, | |
| "grad_norm": 0.1357981413602829, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0186, | |
| "num_tokens": 9707755.0, | |
| "reward": 8.13655748963356, | |
| "reward_std": 1.0848342552781105, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8800471648573875, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.29066222277469933, | |
| "rewards/event_reward_fn/mean": 6.36328125, | |
| "rewards/event_reward_fn/std": 4.869352951645851, | |
| "rewards/format_reward_fn/mean": 0.8932291641831398, | |
| "rewards/format_reward_fn/std": 0.28600863087922335, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1123046875, | |
| "completions/max_length": 253.1875, | |
| "completions/max_terminated_length": 237.5, | |
| "completions/mean_length": 140.646484375, | |
| "completions/mean_terminated_length": 126.51056718826294, | |
| "completions/min_length": 45.5, | |
| "completions/min_terminated_length": 45.5, | |
| "entropy": 0.06207763450220227, | |
| "epoch": 0.13994169096209913, | |
| "frac_reward_zero_std": 0.43359375, | |
| "grad_norm": 0.18493860960006714, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0196, | |
| "num_tokens": 10967133.0, | |
| "reward": 8.799611210823059, | |
| "reward_std": 0.9760072641074657, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8509132824838161, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3210932519286871, | |
| "rewards/event_reward_fn/mean": 7.080078125, | |
| "rewards/event_reward_fn/std": 4.870284929871559, | |
| "rewards/format_reward_fn/mean": 0.868619792163372, | |
| "rewards/format_reward_fn/std": 0.3133174767717719, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.158203125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 243.6875, | |
| "completions/mean_length": 147.4453125, | |
| "completions/mean_terminated_length": 126.75604343414307, | |
| "completions/min_length": 45.1875, | |
| "completions/min_terminated_length": 45.1875, | |
| "entropy": 0.07122921152040362, | |
| "epoch": 0.1554907677356657, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.2770453989505768, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0029, | |
| "num_tokens": 12272289.0, | |
| "reward": 8.378659665584564, | |
| "reward_std": 1.1576978042721748, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.814004722982645, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3608710467815399, | |
| "rewards/event_reward_fn/mean": 6.7333984375, | |
| "rewards/event_reward_fn/std": 4.9148435443639755, | |
| "rewards/format_reward_fn/mean": 0.8312565125524998, | |
| "rewards/format_reward_fn/std": 0.3619283623993397, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.166015625, | |
| "completions/max_length": 254.8125, | |
| "completions/max_terminated_length": 245.5625, | |
| "completions/mean_length": 147.28125, | |
| "completions/mean_terminated_length": 126.25686740875244, | |
| "completions/min_length": 47.875, | |
| "completions/min_terminated_length": 47.875, | |
| "entropy": 0.07063461863435805, | |
| "epoch": 0.17103984450923226, | |
| "frac_reward_zero_std": 0.359375, | |
| "grad_norm": 0.20308320224285126, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0226, | |
| "num_tokens": 13517901.0, | |
| "reward": 8.586736917495728, | |
| "reward_std": 1.180733297020197, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7950377985835075, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.35726089123636484, | |
| "rewards/event_reward_fn/mean": 6.9697265625, | |
| "rewards/event_reward_fn/std": 4.731213182210922, | |
| "rewards/format_reward_fn/mean": 0.8219726607203484, | |
| "rewards/format_reward_fn/std": 0.35164750274270773, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1572265625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 238.375, | |
| "completions/mean_length": 139.818359375, | |
| "completions/mean_terminated_length": 118.0954179763794, | |
| "completions/min_length": 40.8125, | |
| "completions/min_terminated_length": 40.8125, | |
| "entropy": 0.06718740961514413, | |
| "epoch": 0.18658892128279883, | |
| "frac_reward_zero_std": 0.41015625, | |
| "grad_norm": 0.10902810841798782, | |
| "learning_rate": 5e-05, | |
| "loss": -0.014, | |
| "num_tokens": 14756091.0, | |
| "reward": 8.501474261283875, | |
| "reward_std": 1.0462469272315502, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7894043922424316, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.36320002656430006, | |
| "rewards/event_reward_fn/mean": 6.900390625, | |
| "rewards/event_reward_fn/std": 4.841355547308922, | |
| "rewards/format_reward_fn/mean": 0.8116793744266033, | |
| "rewards/format_reward_fn/std": 0.36351621337234974, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1650390625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 240.75, | |
| "completions/mean_length": 150.31640625, | |
| "completions/mean_terminated_length": 129.93299293518066, | |
| "completions/min_length": 47.3125, | |
| "completions/min_terminated_length": 47.3125, | |
| "entropy": 0.06464909669011831, | |
| "epoch": 0.2021379980563654, | |
| "frac_reward_zero_std": 0.37109375, | |
| "grad_norm": 0.14535187184810638, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0053, | |
| "num_tokens": 16038167.0, | |
| "reward": 9.072274684906006, | |
| "reward_std": 1.216166764497757, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8119766861200333, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3669638652354479, | |
| "rewards/event_reward_fn/mean": 7.439453125, | |
| "rewards/event_reward_fn/std": 5.443397417664528, | |
| "rewards/format_reward_fn/mean": 0.8208449557423592, | |
| "rewards/format_reward_fn/std": 0.3688422851264477, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.123046875, | |
| "completions/max_length": 253.125, | |
| "completions/max_terminated_length": 236.25, | |
| "completions/mean_length": 134.2294921875, | |
| "completions/mean_terminated_length": 117.79497241973877, | |
| "completions/min_length": 37.875, | |
| "completions/min_terminated_length": 37.875, | |
| "entropy": 0.06634449982084334, | |
| "epoch": 0.21768707482993196, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.0848744735121727, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0096, | |
| "num_tokens": 17312310.0, | |
| "reward": 8.67900961637497, | |
| "reward_std": 1.2484335452318192, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8359074406325817, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.33306772634387016, | |
| "rewards/event_reward_fn/mean": 6.998046875, | |
| "rewards/event_reward_fn/std": 5.2749055325984955, | |
| "rewards/format_reward_fn/mean": 0.8450553454458714, | |
| "rewards/format_reward_fn/std": 0.3290289109572768, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1416015625, | |
| "completions/max_length": 251.875, | |
| "completions/max_terminated_length": 233.375, | |
| "completions/mean_length": 138.087890625, | |
| "completions/mean_terminated_length": 119.15170526504517, | |
| "completions/min_length": 43.5, | |
| "completions/min_terminated_length": 43.5, | |
| "entropy": 0.06127542559988797, | |
| "epoch": 0.23323615160349853, | |
| "frac_reward_zero_std": 0.390625, | |
| "grad_norm": 0.11555243283510208, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0131, | |
| "num_tokens": 18542780.0, | |
| "reward": 8.50430566072464, | |
| "reward_std": 0.9973306134343147, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8396431356668472, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3250976144336164, | |
| "rewards/event_reward_fn/mean": 6.8232421875, | |
| "rewards/event_reward_fn/std": 4.391354620456696, | |
| "rewards/format_reward_fn/mean": 0.8414202034473419, | |
| "rewards/format_reward_fn/std": 0.32731985161080956, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.150390625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 238.3125, | |
| "completions/mean_length": 145.9013671875, | |
| "completions/mean_terminated_length": 126.70299863815308, | |
| "completions/min_length": 42.625, | |
| "completions/min_terminated_length": 42.625, | |
| "entropy": 0.06281583779491484, | |
| "epoch": 0.2487852283770651, | |
| "frac_reward_zero_std": 0.375, | |
| "grad_norm": 0.18120716512203217, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0064, | |
| "num_tokens": 19826255.0, | |
| "reward": 8.296767592430115, | |
| "reward_std": 1.2354702651500702, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8322994858026505, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.347878853790462, | |
| "rewards/event_reward_fn/mean": 6.6259765625, | |
| "rewards/event_reward_fn/std": 4.630821079015732, | |
| "rewards/format_reward_fn/mean": 0.8384914398193359, | |
| "rewards/format_reward_fn/std": 0.3501485912129283, | |
| "step": 256 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.193359375, | |
| "completions/max_length": 255.875, | |
| "completions/max_terminated_length": 238.25, | |
| "completions/mean_length": 149.7919921875, | |
| "completions/mean_terminated_length": 124.16296577453613, | |
| "completions/min_length": 45.875, | |
| "completions/min_terminated_length": 45.875, | |
| "entropy": 0.06578910606913269, | |
| "epoch": 0.26433430515063167, | |
| "frac_reward_zero_std": 0.359375, | |
| "grad_norm": 0.1267288774251938, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0136, | |
| "num_tokens": 21125750.0, | |
| "reward": 8.781003445386887, | |
| "reward_std": 1.1383938118815422, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7845699526369572, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.37547132885083556, | |
| "rewards/event_reward_fn/mean": 7.2001953125, | |
| "rewards/event_reward_fn/std": 5.128496631979942, | |
| "rewards/format_reward_fn/mean": 0.7962380684912205, | |
| "rewards/format_reward_fn/std": 0.3762207794934511, | |
| "step": 272 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1591796875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 240.5625, | |
| "completions/mean_length": 153.1923828125, | |
| "completions/mean_terminated_length": 134.3278865814209, | |
| "completions/min_length": 47.875, | |
| "completions/min_terminated_length": 47.875, | |
| "entropy": 0.06297733471728861, | |
| "epoch": 0.27988338192419826, | |
| "frac_reward_zero_std": 0.40625, | |
| "grad_norm": 0.1375938057899475, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 22340679.0, | |
| "reward": 8.529892146587372, | |
| "reward_std": 0.9637267738580704, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8117021955549717, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.36230491753667593, | |
| "rewards/event_reward_fn/mean": 6.8916015625, | |
| "rewards/event_reward_fn/std": 4.614271923899651, | |
| "rewards/format_reward_fn/mean": 0.8265885375440121, | |
| "rewards/format_reward_fn/std": 0.3658856125548482, | |
| "step": 288 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15234375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 241.3125, | |
| "completions/mean_length": 154.25, | |
| "completions/mean_terminated_length": 135.76823568344116, | |
| "completions/min_length": 48.5625, | |
| "completions/min_terminated_length": 48.5625, | |
| "entropy": 0.06781743955798447, | |
| "epoch": 0.2954324586977648, | |
| "frac_reward_zero_std": 0.37109375, | |
| "grad_norm": 0.1462751030921936, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0151, | |
| "num_tokens": 23587243.0, | |
| "reward": 8.23677259683609, | |
| "reward_std": 1.0035298839211464, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8233126699924469, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3517280900850892, | |
| "rewards/event_reward_fn/mean": 6.5712890625, | |
| "rewards/event_reward_fn/std": 4.795703008770943, | |
| "rewards/format_reward_fn/mean": 0.8421707637608051, | |
| "rewards/format_reward_fn/std": 0.35116075072437525, | |
| "step": 304 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1806640625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 249.75, | |
| "completions/mean_length": 166.966796875, | |
| "completions/mean_terminated_length": 147.62712383270264, | |
| "completions/min_length": 61.375, | |
| "completions/min_terminated_length": 61.375, | |
| "entropy": 0.06373983481898904, | |
| "epoch": 0.3109815354713314, | |
| "frac_reward_zero_std": 0.3984375, | |
| "grad_norm": 0.11124490946531296, | |
| "learning_rate": 5e-05, | |
| "loss": 0.003, | |
| "num_tokens": 24903841.0, | |
| "reward": 9.053372412919998, | |
| "reward_std": 0.9619283508509398, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7753774374723434, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.390325166285038, | |
| "rewards/event_reward_fn/mean": 7.4970703125, | |
| "rewards/event_reward_fn/std": 4.754469409584999, | |
| "rewards/format_reward_fn/mean": 0.7809244766831398, | |
| "rewards/format_reward_fn/std": 0.39305115677416325, | |
| "step": 320 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2158203125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 242.0625, | |
| "completions/mean_length": 168.8544921875, | |
| "completions/mean_terminated_length": 144.7502179145813, | |
| "completions/min_length": 54.75, | |
| "completions/min_terminated_length": 54.75, | |
| "entropy": 0.06674353126436472, | |
| "epoch": 0.32653061224489793, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.11352943629026413, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0032, | |
| "num_tokens": 26176048.0, | |
| "reward": 8.903641551733017, | |
| "reward_std": 1.0895367171615362, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7484599277377129, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.39963601250201464, | |
| "rewards/event_reward_fn/mean": 7.390625, | |
| "rewards/event_reward_fn/std": 5.010941222310066, | |
| "rewards/format_reward_fn/mean": 0.7645566947758198, | |
| "rewards/format_reward_fn/std": 0.40272433403879404, | |
| "step": 336 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.201171875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 239.0, | |
| "completions/mean_length": 169.0126953125, | |
| "completions/mean_terminated_length": 147.5876121520996, | |
| "completions/min_length": 54.375, | |
| "completions/min_terminated_length": 54.375, | |
| "entropy": 0.06429841788485646, | |
| "epoch": 0.34207968901846453, | |
| "frac_reward_zero_std": 0.3828125, | |
| "grad_norm": 0.15351000428199768, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0046, | |
| "num_tokens": 27486521.0, | |
| "reward": 9.137612909078598, | |
| "reward_std": 1.0497351847589016, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7691521309316158, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3831571042537689, | |
| "rewards/event_reward_fn/mean": 7.5888671875, | |
| "rewards/event_reward_fn/std": 5.132935270667076, | |
| "rewards/format_reward_fn/mean": 0.7795935608446598, | |
| "rewards/format_reward_fn/std": 0.3872489295899868, | |
| "step": 352 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.18359375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 237.75, | |
| "completions/mean_length": 163.7431640625, | |
| "completions/mean_terminated_length": 143.10296440124512, | |
| "completions/min_length": 51.8125, | |
| "completions/min_terminated_length": 51.8125, | |
| "entropy": 0.06813837168738246, | |
| "epoch": 0.3576287657920311, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.17290791869163513, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0092, | |
| "num_tokens": 28746262.0, | |
| "reward": 9.347127586603165, | |
| "reward_std": 1.1120197921991348, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7918755821883678, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3813342722132802, | |
| "rewards/event_reward_fn/mean": 7.75390625, | |
| "rewards/event_reward_fn/std": 5.131016373634338, | |
| "rewards/format_reward_fn/mean": 0.8013457953929901, | |
| "rewards/format_reward_fn/std": 0.3841324523091316, | |
| "step": 368 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.169921875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 244.625, | |
| "completions/mean_length": 163.630859375, | |
| "completions/mean_terminated_length": 145.52464532852173, | |
| "completions/min_length": 53.8125, | |
| "completions/min_terminated_length": 53.8125, | |
| "entropy": 0.06338186049833894, | |
| "epoch": 0.37317784256559766, | |
| "frac_reward_zero_std": 0.42578125, | |
| "grad_norm": 0.15385830402374268, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0024, | |
| "num_tokens": 29971032.0, | |
| "reward": 8.972355782985687, | |
| "reward_std": 0.9110043197870255, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8088140487670898, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.35705708153545856, | |
| "rewards/event_reward_fn/mean": 7.345703125, | |
| "rewards/event_reward_fn/std": 4.8114437609910965, | |
| "rewards/format_reward_fn/mean": 0.817838542163372, | |
| "rewards/format_reward_fn/std": 0.35746027156710625, | |
| "step": 384 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1533203125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 239.5, | |
| "completions/mean_length": 163.6767578125, | |
| "completions/mean_terminated_length": 147.22436618804932, | |
| "completions/min_length": 49.375, | |
| "completions/min_terminated_length": 49.375, | |
| "entropy": 0.06737338448874652, | |
| "epoch": 0.38872691933916426, | |
| "frac_reward_zero_std": 0.3671875, | |
| "grad_norm": 0.11516160517930984, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0018, | |
| "num_tokens": 31274657.0, | |
| "reward": 9.454054236412048, | |
| "reward_std": 1.1389728896319866, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8227716907858849, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.35767858382314444, | |
| "rewards/event_reward_fn/mean": 7.80078125, | |
| "rewards/event_reward_fn/std": 4.638232260942459, | |
| "rewards/format_reward_fn/mean": 0.8305013030767441, | |
| "rewards/format_reward_fn/std": 0.3585221981629729, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.212890625, | |
| "completions/max_length": 255.9375, | |
| "completions/max_terminated_length": 249.6875, | |
| "completions/mean_length": 176.8935546875, | |
| "completions/mean_terminated_length": 156.1867184638977, | |
| "completions/min_length": 67.1875, | |
| "completions/min_terminated_length": 67.1875, | |
| "entropy": 0.07263953145593405, | |
| "epoch": 0.4042759961127308, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.12655992805957794, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0015, | |
| "num_tokens": 32597096.0, | |
| "reward": 9.407644420862198, | |
| "reward_std": 1.1765358839184046, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7457954213023186, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.4088666429743171, | |
| "rewards/event_reward_fn/mean": 7.8974609375, | |
| "rewards/event_reward_fn/std": 5.147656410932541, | |
| "rewards/format_reward_fn/mean": 0.764388021081686, | |
| "rewards/format_reward_fn/std": 0.39705412182956934, | |
| "step": 416 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2021484375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 245.125, | |
| "completions/mean_length": 175.6865234375, | |
| "completions/mean_terminated_length": 155.94458389282227, | |
| "completions/min_length": 69.0625, | |
| "completions/min_terminated_length": 69.0625, | |
| "entropy": 0.0728312199935317, | |
| "epoch": 0.4198250728862974, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.14607630670070648, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0018, | |
| "num_tokens": 33869635.0, | |
| "reward": 8.863501250743866, | |
| "reward_std": 1.1473261304199696, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7608970887959003, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.39875176921486855, | |
| "rewards/event_reward_fn/mean": 7.32421875, | |
| "rewards/event_reward_fn/std": 4.745256543159485, | |
| "rewards/format_reward_fn/mean": 0.7783854156732559, | |
| "rewards/format_reward_fn/std": 0.4040640462189913, | |
| "step": 432 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2216796875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 249.0, | |
| "completions/mean_length": 184.185546875, | |
| "completions/mean_terminated_length": 164.68915843963623, | |
| "completions/min_length": 79.1875, | |
| "completions/min_terminated_length": 79.1875, | |
| "entropy": 0.07330505712889135, | |
| "epoch": 0.43537414965986393, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.11223267763853073, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0046, | |
| "num_tokens": 35198117.0, | |
| "reward": 9.225192874670029, | |
| "reward_std": 1.0972841531038284, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.756638091057539, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.39518506824970245, | |
| "rewards/event_reward_fn/mean": 7.6982421875, | |
| "rewards/event_reward_fn/std": 4.850593596696854, | |
| "rewards/format_reward_fn/mean": 0.7703125029802322, | |
| "rewards/format_reward_fn/std": 0.39637050684541464, | |
| "step": 448 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.181640625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 243.4375, | |
| "completions/mean_length": 167.8017578125, | |
| "completions/mean_terminated_length": 149.08932733535767, | |
| "completions/min_length": 67.0625, | |
| "completions/min_terminated_length": 67.0625, | |
| "entropy": 0.06937285792082548, | |
| "epoch": 0.4509232264334305, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.277972549200058, | |
| "learning_rate": 5e-05, | |
| "loss": 0.005, | |
| "num_tokens": 36479498.0, | |
| "reward": 9.970476865768433, | |
| "reward_std": 1.2180952616035938, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7747513987123966, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3933687787503004, | |
| "rewards/event_reward_fn/mean": 8.412109375, | |
| "rewards/event_reward_fn/std": 5.536467835307121, | |
| "rewards/format_reward_fn/mean": 0.7836160659790039, | |
| "rewards/format_reward_fn/std": 0.3935097064822912, | |
| "step": 464 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1650390625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 241.75, | |
| "completions/mean_length": 165.2724609375, | |
| "completions/mean_terminated_length": 147.48859310150146, | |
| "completions/min_length": 61.3125, | |
| "completions/min_terminated_length": 61.3125, | |
| "entropy": 0.062284021405503154, | |
| "epoch": 0.46647230320699706, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.19548486173152924, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0031, | |
| "num_tokens": 37738681.0, | |
| "reward": 9.709998965263367, | |
| "reward_std": 1.080780379474163, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7859884761273861, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3875921927392483, | |
| "rewards/event_reward_fn/mean": 8.123046875, | |
| "rewards/event_reward_fn/std": 5.239727973937988, | |
| "rewards/format_reward_fn/mean": 0.8009635433554649, | |
| "rewards/format_reward_fn/std": 0.3870681691914797, | |
| "step": 480 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.177734375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 243.4375, | |
| "completions/mean_length": 175.16015625, | |
| "completions/mean_terminated_length": 158.66053676605225, | |
| "completions/min_length": 70.4375, | |
| "completions/min_terminated_length": 70.4375, | |
| "entropy": 0.06954935006797314, | |
| "epoch": 0.48202137998056366, | |
| "frac_reward_zero_std": 0.3515625, | |
| "grad_norm": 0.20231647789478302, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0035, | |
| "num_tokens": 39016317.0, | |
| "reward": 9.523818492889404, | |
| "reward_std": 1.1278588809072971, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7954656668007374, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.380647461861372, | |
| "rewards/event_reward_fn/mean": 7.9248046875, | |
| "rewards/event_reward_fn/std": 5.266398847103119, | |
| "rewards/format_reward_fn/mean": 0.8035481758415699, | |
| "rewards/format_reward_fn/std": 0.3839748175814748, | |
| "step": 496 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2158203125, | |
| "completions/max_length": 255.375, | |
| "completions/max_terminated_length": 247.625, | |
| "completions/mean_length": 189.3232421875, | |
| "completions/mean_terminated_length": 170.41810989379883, | |
| "completions/min_length": 91.3125, | |
| "completions/min_terminated_length": 91.3125, | |
| "entropy": 0.07108506350778043, | |
| "epoch": 0.4975704567541302, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.05343855917453766, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0085, | |
| "num_tokens": 40322552.0, | |
| "reward": 9.111008793115616, | |
| "reward_std": 1.051011398434639, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7504944987595081, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3889648839831352, | |
| "rewards/event_reward_fn/mean": 7.58984375, | |
| "rewards/event_reward_fn/std": 4.890510141849518, | |
| "rewards/format_reward_fn/mean": 0.7706705778837204, | |
| "rewards/format_reward_fn/std": 0.39065420906990767, | |
| "step": 512 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.185546875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 244.25, | |
| "completions/mean_length": 181.51953125, | |
| "completions/mean_terminated_length": 164.5301055908203, | |
| "completions/min_length": 85.625, | |
| "completions/min_terminated_length": 85.625, | |
| "entropy": 0.07393265794962645, | |
| "epoch": 0.5131195335276968, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.13203799724578857, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0024, | |
| "num_tokens": 41624024.0, | |
| "reward": 9.416305720806122, | |
| "reward_std": 1.0736836642026901, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.749378640204668, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.39308065082877874, | |
| "rewards/event_reward_fn/mean": 7.89453125, | |
| "rewards/event_reward_fn/std": 5.149897053837776, | |
| "rewards/format_reward_fn/mean": 0.7723958268761635, | |
| "rewards/format_reward_fn/std": 0.3941022912040353, | |
| "step": 528 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1455078125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 248.3125, | |
| "completions/mean_length": 183.716796875, | |
| "completions/mean_terminated_length": 171.55659580230713, | |
| "completions/min_length": 93.8125, | |
| "completions/min_terminated_length": 93.8125, | |
| "entropy": 0.07057009753771126, | |
| "epoch": 0.5286686103012633, | |
| "frac_reward_zero_std": 0.30859375, | |
| "grad_norm": 0.14103946089744568, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0125, | |
| "num_tokens": 42874182.0, | |
| "reward": 9.582858800888062, | |
| "reward_std": 0.9491388313472271, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8273874409496784, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.33826882019639015, | |
| "rewards/event_reward_fn/mean": 7.9111328125, | |
| "rewards/event_reward_fn/std": 4.9791994243860245, | |
| "rewards/format_reward_fn/mean": 0.844338733702898, | |
| "rewards/format_reward_fn/std": 0.3412060188129544, | |
| "step": 544 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1806640625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 241.75, | |
| "completions/mean_length": 182.1201171875, | |
| "completions/mean_terminated_length": 165.9890251159668, | |
| "completions/min_length": 90.0, | |
| "completions/min_terminated_length": 90.0, | |
| "entropy": 0.07039901474490762, | |
| "epoch": 0.54421768707483, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.1732136756181717, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0085, | |
| "num_tokens": 44221753.0, | |
| "reward": 9.929603159427643, | |
| "reward_std": 1.1261513829231262, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7931484319269657, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3795010205358267, | |
| "rewards/event_reward_fn/mean": 8.328125, | |
| "rewards/event_reward_fn/std": 4.941879317164421, | |
| "rewards/format_reward_fn/mean": 0.8083296120166779, | |
| "rewards/format_reward_fn/std": 0.3848690167069435, | |
| "step": 560 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.130859375, | |
| "completions/max_length": 255.75, | |
| "completions/max_terminated_length": 248.5625, | |
| "completions/mean_length": 183.498046875, | |
| "completions/mean_terminated_length": 172.7189416885376, | |
| "completions/min_length": 89.5, | |
| "completions/min_terminated_length": 89.5, | |
| "entropy": 0.06706819240935147, | |
| "epoch": 0.5597667638483965, | |
| "frac_reward_zero_std": 0.359375, | |
| "grad_norm": 0.18046796321868896, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0046, | |
| "num_tokens": 45493775.0, | |
| "reward": 9.964149117469788, | |
| "reward_std": 1.0226014591753483, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8349823988974094, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3266658801585436, | |
| "rewards/event_reward_fn/mean": 8.2734375, | |
| "rewards/event_reward_fn/std": 5.3706135004758835, | |
| "rewards/format_reward_fn/mean": 0.855729166418314, | |
| "rewards/format_reward_fn/std": 0.33050147350877523, | |
| "step": 576 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.123046875, | |
| "completions/max_length": 254.125, | |
| "completions/max_terminated_length": 240.3125, | |
| "completions/mean_length": 157.6474609375, | |
| "completions/mean_terminated_length": 144.27598762512207, | |
| "completions/min_length": 70.4375, | |
| "completions/min_terminated_length": 70.4375, | |
| "entropy": 0.06692534498870373, | |
| "epoch": 0.5753158406219631, | |
| "frac_reward_zero_std": 0.41015625, | |
| "grad_norm": 0.18845033645629883, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 46845374.0, | |
| "reward": 9.60788244009018, | |
| "reward_std": 1.077907931059599, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8490281663835049, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3180042654275894, | |
| "rewards/event_reward_fn/mean": 7.8984375, | |
| "rewards/event_reward_fn/std": 5.3387322425842285, | |
| "rewards/format_reward_fn/mean": 0.8604166693985462, | |
| "rewards/format_reward_fn/std": 0.3179410183802247, | |
| "step": 592 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 253.375, | |
| "completions/max_terminated_length": 237.9375, | |
| "completions/mean_length": 141.9638671875, | |
| "completions/mean_terminated_length": 132.3677864074707, | |
| "completions/min_length": 62.875, | |
| "completions/min_terminated_length": 62.875, | |
| "entropy": 0.06903915433213115, | |
| "epoch": 0.5908649173955296, | |
| "frac_reward_zero_std": 0.3984375, | |
| "grad_norm": 0.5333549380302429, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0115, | |
| "num_tokens": 48101641.0, | |
| "reward": 9.540560752153397, | |
| "reward_std": 0.925620548427105, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8527389727532864, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3208633568137884, | |
| "rewards/event_reward_fn/mean": 7.8310546875, | |
| "rewards/event_reward_fn/std": 4.8098659962415695, | |
| "rewards/format_reward_fn/mean": 0.8567671179771423, | |
| "rewards/format_reward_fn/std": 0.3000659542158246, | |
| "step": 608 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09765625, | |
| "completions/max_length": 255.0625, | |
| "completions/max_terminated_length": 246.0, | |
| "completions/mean_length": 166.4150390625, | |
| "completions/mean_terminated_length": 156.72006034851074, | |
| "completions/min_length": 85.125, | |
| "completions/min_terminated_length": 85.125, | |
| "entropy": 0.0729338163509965, | |
| "epoch": 0.6064139941690962, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.24336110055446625, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0083, | |
| "num_tokens": 49369398.0, | |
| "reward": 8.873536258935928, | |
| "reward_std": 1.0537522435188293, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.5898543912917376, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.43156279996037483, | |
| "rewards/event_reward_fn/mean": 7.6513671875, | |
| "rewards/event_reward_fn/std": 4.6099734753370285, | |
| "rewards/format_reward_fn/mean": 0.6323146112263203, | |
| "rewards/format_reward_fn/std": 0.3545870538800955, | |
| "step": 624 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1171875, | |
| "completions/max_length": 255.875, | |
| "completions/max_terminated_length": 247.125, | |
| "completions/mean_length": 177.6005859375, | |
| "completions/mean_terminated_length": 167.23325157165527, | |
| "completions/min_length": 87.3125, | |
| "completions/min_terminated_length": 87.3125, | |
| "entropy": 0.07631410518661141, | |
| "epoch": 0.6219630709426628, | |
| "frac_reward_zero_std": 0.29296875, | |
| "grad_norm": 0.20491930842399597, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0025, | |
| "num_tokens": 50684041.0, | |
| "reward": 10.097908169031143, | |
| "reward_std": 1.099338386207819, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8222074285149574, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3467689296230674, | |
| "rewards/event_reward_fn/mean": 8.4619140625, | |
| "rewards/event_reward_fn/std": 5.3020381182432175, | |
| "rewards/format_reward_fn/mean": 0.8137868903577328, | |
| "rewards/format_reward_fn/std": 0.34406947437673807, | |
| "step": 640 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08203125, | |
| "completions/max_length": 254.125, | |
| "completions/max_terminated_length": 246.4375, | |
| "completions/mean_length": 175.4794921875, | |
| "completions/mean_terminated_length": 168.33812427520752, | |
| "completions/min_length": 90.8125, | |
| "completions/min_terminated_length": 90.8125, | |
| "entropy": 0.0735539214219898, | |
| "epoch": 0.6375121477162293, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.09572162479162216, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0013, | |
| "num_tokens": 52012688.0, | |
| "reward": 10.082605361938477, | |
| "reward_std": 0.8642270974814892, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8720975369215012, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.265175896929577, | |
| "rewards/event_reward_fn/mean": 8.3173828125, | |
| "rewards/event_reward_fn/std": 5.005625352263451, | |
| "rewards/format_reward_fn/mean": 0.8931249976158142, | |
| "rewards/format_reward_fn/std": 0.26579738268628716, | |
| "step": 656 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1337890625, | |
| "completions/max_length": 254.5, | |
| "completions/max_terminated_length": 248.5, | |
| "completions/mean_length": 188.3662109375, | |
| "completions/mean_terminated_length": 178.33765697479248, | |
| "completions/min_length": 97.0625, | |
| "completions/min_terminated_length": 97.0625, | |
| "entropy": 0.07762870891019702, | |
| "epoch": 0.6530612244897959, | |
| "frac_reward_zero_std": 0.30078125, | |
| "grad_norm": 0.12062438577413559, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0038, | |
| "num_tokens": 53302267.0, | |
| "reward": 10.330396890640259, | |
| "reward_std": 0.9276157356798649, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8132092356681824, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.33317599166184664, | |
| "rewards/event_reward_fn/mean": 8.6669921875, | |
| "rewards/event_reward_fn/std": 5.405571684241295, | |
| "rewards/format_reward_fn/mean": 0.8501953110098839, | |
| "rewards/format_reward_fn/std": 0.3391735916957259, | |
| "step": 672 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1806640625, | |
| "completions/max_length": 255.5, | |
| "completions/max_terminated_length": 252.0625, | |
| "completions/mean_length": 195.8662109375, | |
| "completions/mean_terminated_length": 182.70546627044678, | |
| "completions/min_length": 118.8125, | |
| "completions/min_terminated_length": 118.8125, | |
| "entropy": 0.07736558141186833, | |
| "epoch": 0.6686103012633625, | |
| "frac_reward_zero_std": 0.29296875, | |
| "grad_norm": 0.1318187564611435, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0018, | |
| "num_tokens": 54548578.0, | |
| "reward": 10.327336311340332, | |
| "reward_std": 1.0552778337150812, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7777924984693527, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.36456546862609684, | |
| "rewards/event_reward_fn/mean": 8.7490234375, | |
| "rewards/event_reward_fn/std": 5.162845477461815, | |
| "rewards/format_reward_fn/mean": 0.8005203679203987, | |
| "rewards/format_reward_fn/std": 0.36596682760864496, | |
| "step": 688 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.169921875, | |
| "completions/max_length": 254.9375, | |
| "completions/max_terminated_length": 246.0, | |
| "completions/mean_length": 192.677734375, | |
| "completions/mean_terminated_length": 180.396879196167, | |
| "completions/min_length": 110.875, | |
| "completions/min_terminated_length": 110.875, | |
| "entropy": 0.07653255970217288, | |
| "epoch": 0.6841593780369291, | |
| "frac_reward_zero_std": 0.29296875, | |
| "grad_norm": 0.11105561256408691, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0111, | |
| "num_tokens": 55838164.0, | |
| "reward": 10.28305697441101, | |
| "reward_std": 0.9923089742660522, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8106463178992271, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.33590539428405464, | |
| "rewards/event_reward_fn/mean": 8.654296875, | |
| "rewards/event_reward_fn/std": 5.304804667830467, | |
| "rewards/format_reward_fn/mean": 0.8181138336658478, | |
| "rewards/format_reward_fn/std": 0.33803721610456705, | |
| "step": 704 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.091796875, | |
| "completions/max_length": 250.6875, | |
| "completions/max_terminated_length": 234.9375, | |
| "completions/mean_length": 174.64453125, | |
| "completions/mean_terminated_length": 166.46386337280273, | |
| "completions/min_length": 96.4375, | |
| "completions/min_terminated_length": 96.4375, | |
| "entropy": 0.0745530491694808, | |
| "epoch": 0.6997084548104956, | |
| "frac_reward_zero_std": 0.33984375, | |
| "grad_norm": 0.23312747478485107, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0036, | |
| "num_tokens": 57159160.0, | |
| "reward": 10.450001329183578, | |
| "reward_std": 0.9695746805518866, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8385304771363735, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.32711231615394354, | |
| "rewards/event_reward_fn/mean": 8.76953125, | |
| "rewards/event_reward_fn/std": 5.160630002617836, | |
| "rewards/format_reward_fn/mean": 0.841939639300108, | |
| "rewards/format_reward_fn/std": 0.3311331504955888, | |
| "step": 720 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1201171875, | |
| "completions/max_length": 252.625, | |
| "completions/max_terminated_length": 244.75, | |
| "completions/mean_length": 186.2763671875, | |
| "completions/mean_terminated_length": 177.73645687103271, | |
| "completions/min_length": 108.3125, | |
| "completions/min_terminated_length": 108.3125, | |
| "entropy": 0.06967416848056018, | |
| "epoch": 0.7152575315840622, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.18518145382404327, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0029, | |
| "num_tokens": 58498911.0, | |
| "reward": 10.422120094299316, | |
| "reward_std": 0.8677894007414579, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8313977345824242, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3058948842808604, | |
| "rewards/event_reward_fn/mean": 8.7490234375, | |
| "rewards/event_reward_fn/std": 5.314541980624199, | |
| "rewards/format_reward_fn/mean": 0.8416987583041191, | |
| "rewards/format_reward_fn/std": 0.30255721998400986, | |
| "step": 736 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0556640625, | |
| "completions/max_length": 250.3125, | |
| "completions/max_terminated_length": 239.4375, | |
| "completions/mean_length": 165.4521484375, | |
| "completions/mean_terminated_length": 160.36469841003418, | |
| "completions/min_length": 98.875, | |
| "completions/min_terminated_length": 98.875, | |
| "entropy": 0.07365260319784284, | |
| "epoch": 0.7308066083576288, | |
| "frac_reward_zero_std": 0.3515625, | |
| "grad_norm": 0.1640687733888626, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0024, | |
| "num_tokens": 59787238.0, | |
| "reward": 9.981308668851852, | |
| "reward_std": 0.8546336572617292, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8880095556378365, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.26380802411586046, | |
| "rewards/event_reward_fn/mean": 8.1982421875, | |
| "rewards/event_reward_fn/std": 5.018857464194298, | |
| "rewards/format_reward_fn/mean": 0.8950570411980152, | |
| "rewards/format_reward_fn/std": 0.2686548628844321, | |
| "step": 752 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05078125, | |
| "completions/max_length": 237.9375, | |
| "completions/max_terminated_length": 218.5, | |
| "completions/mean_length": 147.5947265625, | |
| "completions/mean_terminated_length": 141.85034561157227, | |
| "completions/min_length": 84.9375, | |
| "completions/min_terminated_length": 84.9375, | |
| "entropy": 0.0634845974855125, | |
| "epoch": 0.7463556851311953, | |
| "frac_reward_zero_std": 0.37109375, | |
| "grad_norm": 0.20938096940517426, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0135, | |
| "num_tokens": 61026539.0, | |
| "reward": 10.265896439552307, | |
| "reward_std": 0.9515191409736872, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9135676696896553, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.21515763795468956, | |
| "rewards/event_reward_fn/mean": 8.4326171875, | |
| "rewards/event_reward_fn/std": 5.086499974131584, | |
| "rewards/format_reward_fn/mean": 0.9197116829454899, | |
| "rewards/format_reward_fn/std": 0.20483782514929771, | |
| "step": 768 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1162109375, | |
| "completions/max_length": 253.375, | |
| "completions/max_terminated_length": 245.1875, | |
| "completions/mean_length": 187.802734375, | |
| "completions/mean_terminated_length": 179.58960628509521, | |
| "completions/min_length": 114.1875, | |
| "completions/min_terminated_length": 114.1875, | |
| "entropy": 0.0702137725893408, | |
| "epoch": 0.7619047619047619, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.18974582850933075, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0031, | |
| "num_tokens": 62313941.0, | |
| "reward": 10.483202993869781, | |
| "reward_std": 0.943267323076725, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.84468699619174, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3197319367900491, | |
| "rewards/event_reward_fn/mean": 8.7783203125, | |
| "rewards/event_reward_fn/std": 5.225345551967621, | |
| "rewards/format_reward_fn/mean": 0.8601957745850086, | |
| "rewards/format_reward_fn/std": 0.32167423889040947, | |
| "step": 784 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1279296875, | |
| "completions/max_length": 252.5625, | |
| "completions/max_terminated_length": 243.5625, | |
| "completions/mean_length": 186.220703125, | |
| "completions/mean_terminated_length": 176.8216428756714, | |
| "completions/min_length": 120.125, | |
| "completions/min_terminated_length": 120.125, | |
| "entropy": 0.07592986570671201, | |
| "epoch": 0.7774538386783285, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.1492370367050171, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 63617551.0, | |
| "reward": 9.786191403865814, | |
| "reward_std": 0.9563806988298893, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8338383696973324, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.31574585498310626, | |
| "rewards/event_reward_fn/mean": 8.1015625, | |
| "rewards/event_reward_fn/std": 4.919276848435402, | |
| "rewards/format_reward_fn/mean": 0.850790549069643, | |
| "rewards/format_reward_fn/std": 0.31896755122579634, | |
| "step": 800 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0654296875, | |
| "completions/max_length": 249.75, | |
| "completions/max_terminated_length": 233.875, | |
| "completions/mean_length": 160.158203125, | |
| "completions/mean_terminated_length": 153.3260145187378, | |
| "completions/min_length": 89.625, | |
| "completions/min_terminated_length": 89.625, | |
| "entropy": 0.06539753102697432, | |
| "epoch": 0.793002915451895, | |
| "frac_reward_zero_std": 0.33984375, | |
| "grad_norm": 0.20876899361610413, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0054, | |
| "num_tokens": 64856573.0, | |
| "reward": 9.658368825912476, | |
| "reward_std": 0.9620554894208908, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.895107377320528, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.25602476752828807, | |
| "rewards/event_reward_fn/mean": 7.857421875, | |
| "rewards/event_reward_fn/std": 4.493844509124756, | |
| "rewards/format_reward_fn/mean": 0.9058398455381393, | |
| "rewards/format_reward_fn/std": 0.2541873576119542, | |
| "step": 816 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0712890625, | |
| "completions/max_length": 249.5, | |
| "completions/max_terminated_length": 236.25, | |
| "completions/mean_length": 169.5673828125, | |
| "completions/mean_terminated_length": 163.07803535461426, | |
| "completions/min_length": 100.75, | |
| "completions/min_terminated_length": 100.75, | |
| "entropy": 0.06818107352592051, | |
| "epoch": 0.8085519922254616, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.20962977409362793, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0018, | |
| "num_tokens": 66158990.0, | |
| "reward": 10.353489756584167, | |
| "reward_std": 0.9985193219035864, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8550090603530407, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3100271187722683, | |
| "rewards/event_reward_fn/mean": 8.62890625, | |
| "rewards/event_reward_fn/std": 4.978297606110573, | |
| "rewards/format_reward_fn/mean": 0.8695743456482887, | |
| "rewards/format_reward_fn/std": 0.31247875466942787, | |
| "step": 832 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.130859375, | |
| "completions/max_length": 255.625, | |
| "completions/max_terminated_length": 245.0625, | |
| "completions/mean_length": 183.2822265625, | |
| "completions/mean_terminated_length": 172.68112754821777, | |
| "completions/min_length": 108.0625, | |
| "completions/min_terminated_length": 108.0625, | |
| "entropy": 0.07087993528693914, | |
| "epoch": 0.8241010689990281, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.17762945592403412, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0049, | |
| "num_tokens": 67501687.0, | |
| "reward": 10.45748645067215, | |
| "reward_std": 0.9843454174697399, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8369721993803978, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.33558181021362543, | |
| "rewards/event_reward_fn/mean": 8.7734375, | |
| "rewards/event_reward_fn/std": 5.215954706072807, | |
| "rewards/format_reward_fn/mean": 0.8470768220722675, | |
| "rewards/format_reward_fn/std": 0.33892421517521143, | |
| "step": 848 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.169921875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 250.8125, | |
| "completions/mean_length": 204.1787109375, | |
| "completions/mean_terminated_length": 193.6075620651245, | |
| "completions/min_length": 138.125, | |
| "completions/min_terminated_length": 138.125, | |
| "entropy": 0.07261033216491342, | |
| "epoch": 0.8396501457725948, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.18910834193229675, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0076, | |
| "num_tokens": 68769010.0, | |
| "reward": 10.237849026918411, | |
| "reward_std": 0.9591232761740685, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7683875225484371, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.39661576971411705, | |
| "rewards/event_reward_fn/mean": 8.6796875, | |
| "rewards/event_reward_fn/std": 5.074413627386093, | |
| "rewards/format_reward_fn/mean": 0.7897739969193935, | |
| "rewards/format_reward_fn/std": 0.4046425260603428, | |
| "step": 864 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.10546875, | |
| "completions/max_length": 255.75, | |
| "completions/max_terminated_length": 243.25, | |
| "completions/mean_length": 185.24609375, | |
| "completions/mean_terminated_length": 176.98966312408447, | |
| "completions/min_length": 115.1875, | |
| "completions/min_terminated_length": 115.1875, | |
| "entropy": 0.07335718860849738, | |
| "epoch": 0.8551992225461613, | |
| "frac_reward_zero_std": 0.30859375, | |
| "grad_norm": 0.19497302174568176, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0045, | |
| "num_tokens": 70072946.0, | |
| "reward": 9.902502715587616, | |
| "reward_std": 1.0277547165751457, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8267017714679241, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.33686008118093014, | |
| "rewards/event_reward_fn/mean": 8.2275390625, | |
| "rewards/event_reward_fn/std": 4.601325109601021, | |
| "rewards/format_reward_fn/mean": 0.848261721432209, | |
| "rewards/format_reward_fn/std": 0.33925584983080626, | |
| "step": 880 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0771484375, | |
| "completions/max_length": 253.25, | |
| "completions/max_terminated_length": 235.625, | |
| "completions/mean_length": 168.2685546875, | |
| "completions/mean_terminated_length": 160.67493724822998, | |
| "completions/min_length": 91.75, | |
| "completions/min_terminated_length": 91.75, | |
| "entropy": 0.07022251281887293, | |
| "epoch": 0.8707482993197279, | |
| "frac_reward_zero_std": 0.31640625, | |
| "grad_norm": 0.22896689176559448, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0009, | |
| "num_tokens": 71408625.0, | |
| "reward": 10.46012270450592, | |
| "reward_std": 0.9469396620988846, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8612882420420647, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.27462146105244756, | |
| "rewards/event_reward_fn/mean": 8.716796875, | |
| "rewards/event_reward_fn/std": 5.030902713537216, | |
| "rewards/format_reward_fn/mean": 0.8820377588272095, | |
| "rewards/format_reward_fn/std": 0.2647479181177914, | |
| "step": 896 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05078125, | |
| "completions/max_length": 242.9375, | |
| "completions/max_terminated_length": 230.4375, | |
| "completions/mean_length": 145.1064453125, | |
| "completions/mean_terminated_length": 139.08607959747314, | |
| "completions/min_length": 71.0, | |
| "completions/min_terminated_length": 71.0, | |
| "entropy": 0.06589728174731135, | |
| "epoch": 0.8862973760932945, | |
| "frac_reward_zero_std": 0.3671875, | |
| "grad_norm": 0.3958277702331543, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0044, | |
| "num_tokens": 72662642.0, | |
| "reward": 10.200001657009125, | |
| "reward_std": 0.9421045333147049, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9011733010411263, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2283891054103151, | |
| "rewards/event_reward_fn/mean": 8.373046875, | |
| "rewards/event_reward_fn/std": 4.713611409068108, | |
| "rewards/format_reward_fn/mean": 0.92578125, | |
| "rewards/format_reward_fn/std": 0.21754403738304973, | |
| "step": 912 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0283203125, | |
| "completions/max_length": 238.5, | |
| "completions/max_terminated_length": 225.9375, | |
| "completions/mean_length": 132.8330078125, | |
| "completions/mean_terminated_length": 129.05783081054688, | |
| "completions/min_length": 52.0, | |
| "completions/min_terminated_length": 52.0, | |
| "entropy": 0.060941929230466485, | |
| "epoch": 0.901846452866861, | |
| "frac_reward_zero_std": 0.4296875, | |
| "grad_norm": 0.17806340754032135, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0042, | |
| "num_tokens": 73875967.0, | |
| "reward": 9.45022863149643, | |
| "reward_std": 0.7097359485924244, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.935970850288868, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16215045971330255, | |
| "rewards/event_reward_fn/mean": 7.5576171875, | |
| "rewards/event_reward_fn/std": 4.745793879032135, | |
| "rewards/format_reward_fn/mean": 0.9566406235098839, | |
| "rewards/format_reward_fn/std": 0.13776301313191652, | |
| "step": 928 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 251.5, | |
| "completions/max_terminated_length": 235.5, | |
| "completions/mean_length": 162.619140625, | |
| "completions/mean_terminated_length": 155.03940200805664, | |
| "completions/min_length": 95.9375, | |
| "completions/min_terminated_length": 95.9375, | |
| "entropy": 0.06535043194890022, | |
| "epoch": 0.9173955296404276, | |
| "frac_reward_zero_std": 0.3671875, | |
| "grad_norm": 0.3161742687225342, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 75159981.0, | |
| "reward": 10.09964656829834, | |
| "reward_std": 0.951118241995573, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8730840981006622, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2608068126719445, | |
| "rewards/event_reward_fn/mean": 8.326171875, | |
| "rewards/event_reward_fn/std": 5.628681242465973, | |
| "rewards/format_reward_fn/mean": 0.900390625, | |
| "rewards/format_reward_fn/std": 0.25107863638550043, | |
| "step": 944 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1083984375, | |
| "completions/max_length": 252.875, | |
| "completions/max_terminated_length": 241.9375, | |
| "completions/mean_length": 188.71875, | |
| "completions/mean_terminated_length": 181.2633514404297, | |
| "completions/min_length": 117.625, | |
| "completions/min_terminated_length": 117.625, | |
| "entropy": 0.07137463777326047, | |
| "epoch": 0.9329446064139941, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.2805193066596985, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0023, | |
| "num_tokens": 76415209.0, | |
| "reward": 10.079432845115662, | |
| "reward_std": 0.7802535220980644, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8216202445328236, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3154827356338501, | |
| "rewards/event_reward_fn/mean": 8.40625, | |
| "rewards/event_reward_fn/std": 5.284300252795219, | |
| "rewards/format_reward_fn/mean": 0.8515625, | |
| "rewards/format_reward_fn/std": 0.3159356191754341, | |
| "step": 960 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.162109375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 246.875, | |
| "completions/mean_length": 205.09375, | |
| "completions/mean_terminated_length": 195.3371343612671, | |
| "completions/min_length": 141.0, | |
| "completions/min_terminated_length": 141.0, | |
| "entropy": 0.06873999303206801, | |
| "epoch": 0.9484936831875608, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.10008546710014343, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0041, | |
| "num_tokens": 77770037.0, | |
| "reward": 10.352019369602203, | |
| "reward_std": 0.7665594182908535, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7921560294926167, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3632864858955145, | |
| "rewards/event_reward_fn/mean": 8.732421875, | |
| "rewards/event_reward_fn/std": 5.339399605989456, | |
| "rewards/format_reward_fn/mean": 0.8274414055049419, | |
| "rewards/format_reward_fn/std": 0.37096375692635775, | |
| "step": 976 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0654296875, | |
| "completions/max_length": 254.0, | |
| "completions/max_terminated_length": 241.5, | |
| "completions/mean_length": 184.2421875, | |
| "completions/mean_terminated_length": 179.25956344604492, | |
| "completions/min_length": 118.875, | |
| "completions/min_terminated_length": 118.875, | |
| "entropy": 0.07302290247753263, | |
| "epoch": 0.9640427599611273, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.11569506675004959, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0, | |
| "num_tokens": 79058385.0, | |
| "reward": 10.270190715789795, | |
| "reward_std": 0.9035040959715843, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8707767426967621, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2817615191452205, | |
| "rewards/event_reward_fn/mean": 8.4990234375, | |
| "rewards/event_reward_fn/std": 5.17444010078907, | |
| "rewards/format_reward_fn/mean": 0.900390625, | |
| "rewards/format_reward_fn/std": 0.27954914048314095, | |
| "step": 992 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.083984375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 241.625, | |
| "completions/mean_length": 177.517578125, | |
| "completions/mean_terminated_length": 170.22113609313965, | |
| "completions/min_length": 109.5625, | |
| "completions/min_terminated_length": 109.5625, | |
| "entropy": 0.07096637412905693, | |
| "epoch": 0.9795918367346939, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.24779611825942993, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0015, | |
| "num_tokens": 80333979.0, | |
| "reward": 10.588190495967865, | |
| "reward_std": 0.8662494085729122, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8662735223770142, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.29072041157633066, | |
| "rewards/event_reward_fn/mean": 8.833984375, | |
| "rewards/event_reward_fn/std": 5.076077088713646, | |
| "rewards/format_reward_fn/mean": 0.8879324793815613, | |
| "rewards/format_reward_fn/std": 0.28930215165019035, | |
| "step": 1008 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0673828125, | |
| "completions/max_length": 252.1875, | |
| "completions/max_terminated_length": 235.3125, | |
| "completions/mean_length": 171.123046875, | |
| "completions/mean_terminated_length": 165.06201934814453, | |
| "completions/min_length": 99.5625, | |
| "completions/min_terminated_length": 99.5625, | |
| "entropy": 0.06698882719501853, | |
| "epoch": 0.9951409135082604, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.19038081169128418, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 81595449.0, | |
| "reward": 10.268950939178467, | |
| "reward_std": 0.8857842069119215, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8906435556709766, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.25779614597558975, | |
| "rewards/event_reward_fn/mean": 8.462890625, | |
| "rewards/event_reward_fn/std": 5.074081584811211, | |
| "rewards/format_reward_fn/mean": 0.915416669100523, | |
| "rewards/format_reward_fn/std": 0.25232047867029905, | |
| "step": 1024 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.08806818181818182, | |
| "completions/max_length": 254.45454545454547, | |
| "completions/max_terminated_length": 244.9090909090909, | |
| "completions/mean_length": 178.65482954545453, | |
| "completions/mean_terminated_length": 171.0937597101385, | |
| "completions/min_length": 112.27272727272727, | |
| "completions/min_terminated_length": 112.27272727272727, | |
| "entropy": 0.07033889300443909, | |
| "epoch": 1.010689990281827, | |
| "frac_reward_zero_std": 0.3465909090909091, | |
| "grad_norm": 0.13537470996379852, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0016, | |
| "num_tokens": 82875743.0, | |
| "reward": 10.102936571294611, | |
| "reward_std": 0.9548169591210105, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8734858144413341, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2784238914874467, | |
| "rewards/event_reward_fn/mean": 8.342329545454545, | |
| "rewards/event_reward_fn/std": 4.7735207947817715, | |
| "rewards/format_reward_fn/mean": 0.8871212113987316, | |
| "rewards/format_reward_fn/std": 0.27951826561581006, | |
| "step": 1040 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.123046875, | |
| "completions/max_length": 255.5625, | |
| "completions/max_terminated_length": 248.1875, | |
| "completions/mean_length": 187.521484375, | |
| "completions/mean_terminated_length": 177.78209781646729, | |
| "completions/min_length": 120.125, | |
| "completions/min_terminated_length": 120.125, | |
| "entropy": 0.06871294020675123, | |
| "epoch": 1.0262390670553936, | |
| "frac_reward_zero_std": 0.38671875, | |
| "grad_norm": 0.14529068768024445, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0064, | |
| "num_tokens": 84193629.0, | |
| "reward": 10.711235225200653, | |
| "reward_std": 0.9264990799129009, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8358808867633343, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3423158023506403, | |
| "rewards/event_reward_fn/mean": 9.029296875, | |
| "rewards/event_reward_fn/std": 5.407557427883148, | |
| "rewards/format_reward_fn/mean": 0.8460574820637703, | |
| "rewards/format_reward_fn/std": 0.34603168070316315, | |
| "step": 1056 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.095703125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 245.625, | |
| "completions/mean_length": 188.896484375, | |
| "completions/mean_terminated_length": 181.97695064544678, | |
| "completions/min_length": 120.8125, | |
| "completions/min_terminated_length": 120.8125, | |
| "entropy": 0.06455810344778001, | |
| "epoch": 1.0417881438289602, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.17880740761756897, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0014, | |
| "num_tokens": 85537059.0, | |
| "reward": 10.491520524024963, | |
| "reward_std": 0.9325292967259884, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.841754749417305, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3239448321983218, | |
| "rewards/event_reward_fn/mean": 8.7900390625, | |
| "rewards/event_reward_fn/std": 5.293820217251778, | |
| "rewards/format_reward_fn/mean": 0.8597265593707561, | |
| "rewards/format_reward_fn/std": 0.3177201831713319, | |
| "step": 1072 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15625, | |
| "completions/max_length": 255.5, | |
| "completions/max_terminated_length": 244.5, | |
| "completions/mean_length": 200.1123046875, | |
| "completions/mean_terminated_length": 190.06258392333984, | |
| "completions/min_length": 137.1875, | |
| "completions/min_terminated_length": 137.1875, | |
| "entropy": 0.06795511720702052, | |
| "epoch": 1.0573372206025267, | |
| "frac_reward_zero_std": 0.29296875, | |
| "grad_norm": 0.08574163913726807, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0069, | |
| "num_tokens": 86860678.0, | |
| "reward": 10.812386631965637, | |
| "reward_std": 1.0998864620923996, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7638319730758667, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3960200799629092, | |
| "rewards/event_reward_fn/mean": 9.2724609375, | |
| "rewards/event_reward_fn/std": 6.0772674679756165, | |
| "rewards/format_reward_fn/mean": 0.7760937549173832, | |
| "rewards/format_reward_fn/std": 0.4006781214848161, | |
| "step": 1088 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2978515625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 252.0, | |
| "completions/mean_length": 220.5654296875, | |
| "completions/mean_terminated_length": 206.34245109558105, | |
| "completions/min_length": 159.6875, | |
| "completions/min_terminated_length": 159.6875, | |
| "entropy": 0.07582874409854412, | |
| "epoch": 1.0728862973760933, | |
| "frac_reward_zero_std": 0.234375, | |
| "grad_norm": 0.20225036144256592, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0128, | |
| "num_tokens": 88190949.0, | |
| "reward": 10.548758864402771, | |
| "reward_std": 0.9557082541286945, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.6565842125564814, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.42724930588155985, | |
| "rewards/event_reward_fn/mean": 9.2119140625, | |
| "rewards/event_reward_fn/std": 5.52141310274601, | |
| "rewards/format_reward_fn/mean": 0.6802604161202908, | |
| "rewards/format_reward_fn/std": 0.4410219779238105, | |
| "step": 1104 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.068359375, | |
| "completions/max_length": 254.9375, | |
| "completions/max_terminated_length": 243.1875, | |
| "completions/mean_length": 181.650390625, | |
| "completions/mean_terminated_length": 176.39045429229736, | |
| "completions/min_length": 119.75, | |
| "completions/min_terminated_length": 119.75, | |
| "entropy": 0.07010088441893458, | |
| "epoch": 1.08843537414966, | |
| "frac_reward_zero_std": 0.30859375, | |
| "grad_norm": 0.1840677410364151, | |
| "learning_rate": 5e-05, | |
| "loss": 0.002, | |
| "num_tokens": 89433463.0, | |
| "reward": 10.5318962931633, | |
| "reward_std": 1.0016295239329338, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8906202651560307, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2701933770440519, | |
| "rewards/event_reward_fn/mean": 8.732421875, | |
| "rewards/event_reward_fn/std": 5.466498285531998, | |
| "rewards/format_reward_fn/mean": 0.9088541679084301, | |
| "rewards/format_reward_fn/std": 0.26082120556384325, | |
| "step": 1120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.056640625, | |
| "completions/max_length": 252.0, | |
| "completions/max_terminated_length": 241.0625, | |
| "completions/mean_length": 172.640625, | |
| "completions/mean_terminated_length": 167.69061183929443, | |
| "completions/min_length": 109.125, | |
| "completions/min_terminated_length": 109.125, | |
| "entropy": 0.06799150491133332, | |
| "epoch": 1.1039844509232264, | |
| "frac_reward_zero_std": 0.4140625, | |
| "grad_norm": 0.13713772594928741, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 90709603.0, | |
| "reward": 10.70878279209137, | |
| "reward_std": 0.8687677383422852, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.901556234806776, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.25905836455058306, | |
| "rewards/event_reward_fn/mean": 8.890625, | |
| "rewards/event_reward_fn/std": 5.81499570608139, | |
| "rewards/format_reward_fn/mean": 0.9166015610098839, | |
| "rewards/format_reward_fn/std": 0.2588641280308366, | |
| "step": 1136 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0537109375, | |
| "completions/max_length": 245.375, | |
| "completions/max_terminated_length": 232.6875, | |
| "completions/mean_length": 173.802734375, | |
| "completions/mean_terminated_length": 169.22315788269043, | |
| "completions/min_length": 108.6875, | |
| "completions/min_terminated_length": 108.6875, | |
| "entropy": 0.06564864912070334, | |
| "epoch": 1.119533527696793, | |
| "frac_reward_zero_std": 0.390625, | |
| "grad_norm": 0.12585203349590302, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0042, | |
| "num_tokens": 91936041.0, | |
| "reward": 10.107302486896515, | |
| "reward_std": 0.8662888705730438, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8905055709183216, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.27319654333405197, | |
| "rewards/event_reward_fn/mean": 8.3056640625, | |
| "rewards/event_reward_fn/std": 5.399076372385025, | |
| "rewards/format_reward_fn/mean": 0.9111328125, | |
| "rewards/format_reward_fn/std": 0.2696619238704443, | |
| "step": 1152 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.07421875, | |
| "completions/max_length": 251.875, | |
| "completions/max_terminated_length": 239.5, | |
| "completions/mean_length": 181.62109375, | |
| "completions/mean_terminated_length": 175.5115909576416, | |
| "completions/min_length": 116.1875, | |
| "completions/min_terminated_length": 116.1875, | |
| "entropy": 0.06781496806070209, | |
| "epoch": 1.1350826044703597, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.20404520630836487, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0056, | |
| "num_tokens": 93201105.0, | |
| "reward": 10.648929178714752, | |
| "reward_std": 0.9610726498067379, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8819760829210281, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.27036565099842846, | |
| "rewards/event_reward_fn/mean": 8.8681640625, | |
| "rewards/event_reward_fn/std": 5.41285502910614, | |
| "rewards/format_reward_fn/mean": 0.8987890630960464, | |
| "rewards/format_reward_fn/std": 0.2619143519550562, | |
| "step": 1168 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1376953125, | |
| "completions/max_length": 254.375, | |
| "completions/max_terminated_length": 246.9375, | |
| "completions/mean_length": 201.556640625, | |
| "completions/mean_terminated_length": 193.48375415802002, | |
| "completions/min_length": 136.3125, | |
| "completions/min_terminated_length": 136.3125, | |
| "entropy": 0.06725385342724621, | |
| "epoch": 1.1506316812439261, | |
| "frac_reward_zero_std": 0.30078125, | |
| "grad_norm": 0.1181834414601326, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0024, | |
| "num_tokens": 94576891.0, | |
| "reward": 11.315544486045837, | |
| "reward_std": 0.8351979665458202, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8150821626186371, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.34588195278774947, | |
| "rewards/event_reward_fn/mean": 9.6708984375, | |
| "rewards/event_reward_fn/std": 5.589598774909973, | |
| "rewards/format_reward_fn/mean": 0.829563807696104, | |
| "rewards/format_reward_fn/std": 0.34839474968612194, | |
| "step": 1184 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.154296875, | |
| "completions/max_length": 253.375, | |
| "completions/max_terminated_length": 247.1875, | |
| "completions/mean_length": 203.18359375, | |
| "completions/mean_terminated_length": 194.42267608642578, | |
| "completions/min_length": 138.5625, | |
| "completions/min_terminated_length": 138.5625, | |
| "entropy": 0.07394770160317421, | |
| "epoch": 1.1661807580174928, | |
| "frac_reward_zero_std": 0.26171875, | |
| "grad_norm": 0.18629314005374908, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0049, | |
| "num_tokens": 95881507.0, | |
| "reward": 10.813474893569946, | |
| "reward_std": 1.0365745667368174, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8142888676375151, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3391092037782073, | |
| "rewards/event_reward_fn/mean": 9.1708984375, | |
| "rewards/event_reward_fn/std": 5.419242635369301, | |
| "rewards/format_reward_fn/mean": 0.8282877653837204, | |
| "rewards/format_reward_fn/std": 0.34210248570889235, | |
| "step": 1200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0810546875, | |
| "completions/max_length": 251.3125, | |
| "completions/max_terminated_length": 242.875, | |
| "completions/mean_length": 186.673828125, | |
| "completions/mean_terminated_length": 180.55548667907715, | |
| "completions/min_length": 126.1875, | |
| "completions/min_terminated_length": 126.1875, | |
| "entropy": 0.06798666249960661, | |
| "epoch": 1.1817298347910592, | |
| "frac_reward_zero_std": 0.40234375, | |
| "grad_norm": 0.17748276889324188, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0026, | |
| "num_tokens": 97176789.0, | |
| "reward": 10.461668372154236, | |
| "reward_std": 0.7730772253125906, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8607958517968655, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.31750916969031096, | |
| "rewards/event_reward_fn/mean": 8.73046875, | |
| "rewards/event_reward_fn/std": 4.8714660704135895, | |
| "rewards/format_reward_fn/mean": 0.8704036474227905, | |
| "rewards/format_reward_fn/std": 0.3206114452332258, | |
| "step": 1216 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1318359375, | |
| "completions/max_length": 255.125, | |
| "completions/max_terminated_length": 246.6875, | |
| "completions/mean_length": 200.603515625, | |
| "completions/mean_terminated_length": 192.25616931915283, | |
| "completions/min_length": 141.375, | |
| "completions/min_terminated_length": 141.375, | |
| "entropy": 0.07248709676787257, | |
| "epoch": 1.1972789115646258, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.15709719061851501, | |
| "learning_rate": 5e-05, | |
| "loss": -0.002, | |
| "num_tokens": 98527511.0, | |
| "reward": 10.778121054172516, | |
| "reward_std": 0.9276621714234352, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.848999809473753, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.31297336355783045, | |
| "rewards/event_reward_fn/mean": 9.0654296875, | |
| "rewards/event_reward_fn/std": 5.5686564445495605, | |
| "rewards/format_reward_fn/mean": 0.863691408187151, | |
| "rewards/format_reward_fn/std": 0.31423071026802063, | |
| "step": 1232 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.12109375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 246.25, | |
| "completions/mean_length": 201.720703125, | |
| "completions/mean_terminated_length": 194.1233615875244, | |
| "completions/min_length": 144.375, | |
| "completions/min_terminated_length": 144.375, | |
| "entropy": 0.07165544992312789, | |
| "epoch": 1.2128279883381925, | |
| "frac_reward_zero_std": 0.26953125, | |
| "grad_norm": 0.14202959835529327, | |
| "learning_rate": 5e-05, | |
| "loss": 0.001, | |
| "num_tokens": 99860557.0, | |
| "reward": 11.258443832397461, | |
| "reward_std": 0.9464571885764599, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8287562467157841, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3439189847558737, | |
| "rewards/event_reward_fn/mean": 9.5859375, | |
| "rewards/event_reward_fn/std": 5.814349502325058, | |
| "rewards/format_reward_fn/mean": 0.84375, | |
| "rewards/format_reward_fn/std": 0.3489691922441125, | |
| "step": 1248 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.091796875, | |
| "completions/max_length": 254.5, | |
| "completions/max_terminated_length": 245.8125, | |
| "completions/mean_length": 195.880859375, | |
| "completions/mean_terminated_length": 189.886492729187, | |
| "completions/min_length": 131.9375, | |
| "completions/min_terminated_length": 131.9375, | |
| "entropy": 0.07229456026107073, | |
| "epoch": 1.228377065111759, | |
| "frac_reward_zero_std": 0.2890625, | |
| "grad_norm": 0.2180081307888031, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0006, | |
| "num_tokens": 101156375.0, | |
| "reward": 10.139498263597488, | |
| "reward_std": 0.8279522079974413, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8485933281481266, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3134065044578165, | |
| "rewards/event_reward_fn/mean": 8.4267578125, | |
| "rewards/event_reward_fn/std": 5.1162159740924835, | |
| "rewards/format_reward_fn/mean": 0.864147137850523, | |
| "rewards/format_reward_fn/std": 0.31587369833141565, | |
| "step": 1264 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0693359375, | |
| "completions/max_length": 249.0, | |
| "completions/max_terminated_length": 244.3125, | |
| "completions/mean_length": 191.0654296875, | |
| "completions/mean_terminated_length": 186.35216617584229, | |
| "completions/min_length": 133.375, | |
| "completions/min_terminated_length": 133.375, | |
| "entropy": 0.06208949023857713, | |
| "epoch": 1.2439261418853256, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.11896482855081558, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0008, | |
| "num_tokens": 102475714.0, | |
| "reward": 11.101193368434906, | |
| "reward_std": 0.891064302995801, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9093973524868488, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2311963284155354, | |
| "rewards/event_reward_fn/mean": 9.275390625, | |
| "rewards/event_reward_fn/std": 5.045834094285965, | |
| "rewards/format_reward_fn/mean": 0.9164053164422512, | |
| "rewards/format_reward_fn/std": 0.2260741894133389, | |
| "step": 1280 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.126953125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 248.5625, | |
| "completions/mean_length": 203.279296875, | |
| "completions/mean_terminated_length": 195.46306896209717, | |
| "completions/min_length": 141.875, | |
| "completions/min_terminated_length": 141.875, | |
| "entropy": 0.06049947580322623, | |
| "epoch": 1.259475218658892, | |
| "frac_reward_zero_std": 0.3984375, | |
| "grad_norm": 0.32402676343917847, | |
| "learning_rate": 5e-05, | |
| "loss": 0.004, | |
| "num_tokens": 103787612.0, | |
| "reward": 10.865350365638733, | |
| "reward_std": 0.8294984549283981, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8310730122029781, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3465144941583276, | |
| "rewards/event_reward_fn/mean": 9.1904296875, | |
| "rewards/event_reward_fn/std": 5.264712706208229, | |
| "rewards/format_reward_fn/mean": 0.8438476547598839, | |
| "rewards/format_reward_fn/std": 0.35203980933874846, | |
| "step": 1296 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.158203125, | |
| "completions/max_length": 255.9375, | |
| "completions/max_terminated_length": 248.4375, | |
| "completions/mean_length": 210.01953125, | |
| "completions/mean_terminated_length": 201.3692398071289, | |
| "completions/min_length": 153.25, | |
| "completions/min_terminated_length": 153.25, | |
| "entropy": 0.06503300159238279, | |
| "epoch": 1.2750242954324587, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.12493407726287842, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 105090944.0, | |
| "reward": 10.944722652435303, | |
| "reward_std": 0.8488267995417118, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8110702559351921, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3519176107365638, | |
| "rewards/event_reward_fn/mean": 9.3095703125, | |
| "rewards/event_reward_fn/std": 5.613954737782478, | |
| "rewards/format_reward_fn/mean": 0.8240820355713367, | |
| "rewards/format_reward_fn/std": 0.3540602792054415, | |
| "step": 1312 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1943359375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 252.375, | |
| "completions/mean_length": 220.37890625, | |
| "completions/mean_terminated_length": 212.05939102172852, | |
| "completions/min_length": 168.75, | |
| "completions/min_terminated_length": 168.75, | |
| "entropy": 0.07254446996375918, | |
| "epoch": 1.2905733722060253, | |
| "frac_reward_zero_std": 0.28125, | |
| "grad_norm": 0.20231589674949646, | |
| "learning_rate": 5e-05, | |
| "loss": 0.001, | |
| "num_tokens": 106417228.0, | |
| "reward": 10.507185876369476, | |
| "reward_std": 0.8794627524912357, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7671468704938889, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3928522327914834, | |
| "rewards/event_reward_fn/mean": 8.9560546875, | |
| "rewards/event_reward_fn/std": 4.9830086678266525, | |
| "rewards/format_reward_fn/mean": 0.7839843779802322, | |
| "rewards/format_reward_fn/std": 0.3996342560276389, | |
| "step": 1328 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.162109375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 250.5, | |
| "completions/mean_length": 212.796875, | |
| "completions/mean_terminated_length": 205.06715965270996, | |
| "completions/min_length": 154.0625, | |
| "completions/min_terminated_length": 154.0625, | |
| "entropy": 0.07244179910048842, | |
| "epoch": 1.306122448979592, | |
| "frac_reward_zero_std": 0.29296875, | |
| "grad_norm": 0.11867273598909378, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0027, | |
| "num_tokens": 107788032.0, | |
| "reward": 9.871440827846527, | |
| "reward_std": 0.9372463561594486, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7559298947453499, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.39855979569256306, | |
| "rewards/event_reward_fn/mean": 8.3486328125, | |
| "rewards/event_reward_fn/std": 5.14014707505703, | |
| "rewards/format_reward_fn/mean": 0.766878254711628, | |
| "rewards/format_reward_fn/std": 0.40443217288702726, | |
| "step": 1344 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.060546875, | |
| "completions/max_length": 250.5, | |
| "completions/max_terminated_length": 242.5625, | |
| "completions/mean_length": 190.1435546875, | |
| "completions/mean_terminated_length": 186.02939891815186, | |
| "completions/min_length": 129.1875, | |
| "completions/min_terminated_length": 129.1875, | |
| "entropy": 0.07182836486026645, | |
| "epoch": 1.3216715257531584, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.2237968146800995, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0051, | |
| "num_tokens": 109081171.0, | |
| "reward": 11.108476847410202, | |
| "reward_std": 0.8281007707118988, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8979316018521786, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2320653998758644, | |
| "rewards/event_reward_fn/mean": 9.30078125, | |
| "rewards/event_reward_fn/std": 5.342449679970741, | |
| "rewards/format_reward_fn/mean": 0.90976407751441, | |
| "rewards/format_reward_fn/std": 0.2275423549581319, | |
| "step": 1360 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0478515625, | |
| "completions/max_length": 249.8125, | |
| "completions/max_terminated_length": 240.5625, | |
| "completions/mean_length": 181.1884765625, | |
| "completions/mean_terminated_length": 177.4005880355835, | |
| "completions/min_length": 118.5, | |
| "completions/min_terminated_length": 118.5, | |
| "entropy": 0.06897289073094726, | |
| "epoch": 1.337220602526725, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.3482232987880707, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0049, | |
| "num_tokens": 110353716.0, | |
| "reward": 10.944713652133942, | |
| "reward_std": 0.9444422572851181, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8998373299837112, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.23540139599936083, | |
| "rewards/event_reward_fn/mean": 9.1416015625, | |
| "rewards/event_reward_fn/std": 5.0793561935424805, | |
| "rewards/format_reward_fn/mean": 0.9032747447490692, | |
| "rewards/format_reward_fn/std": 0.249714526347816, | |
| "step": 1376 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.083984375, | |
| "completions/max_length": 251.3125, | |
| "completions/max_terminated_length": 241.8125, | |
| "completions/mean_length": 191.3935546875, | |
| "completions/mean_terminated_length": 185.62936782836914, | |
| "completions/min_length": 134.875, | |
| "completions/min_terminated_length": 134.875, | |
| "entropy": 0.07020568964071572, | |
| "epoch": 1.3527696793002915, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.26569458842277527, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0078, | |
| "num_tokens": 111677811.0, | |
| "reward": 11.232036709785461, | |
| "reward_std": 0.8982522189617157, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8747124671936035, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.28446152550168335, | |
| "rewards/event_reward_fn/mean": 9.4736328125, | |
| "rewards/event_reward_fn/std": 5.696656331419945, | |
| "rewards/format_reward_fn/mean": 0.8836914077401161, | |
| "rewards/format_reward_fn/std": 0.2844822397455573, | |
| "step": 1392 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1943359375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 251.3125, | |
| "completions/mean_length": 217.27734375, | |
| "completions/mean_terminated_length": 207.9519443511963, | |
| "completions/min_length": 165.875, | |
| "completions/min_terminated_length": 165.875, | |
| "entropy": 0.07557977363467216, | |
| "epoch": 1.3683187560738581, | |
| "frac_reward_zero_std": 0.3515625, | |
| "grad_norm": 0.19800527393817902, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0051, | |
| "num_tokens": 112955859.0, | |
| "reward": 10.414989709854126, | |
| "reward_std": 0.8083504606038332, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7803216241300106, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3945994917303324, | |
| "rewards/event_reward_fn/mean": 8.841796875, | |
| "rewards/event_reward_fn/std": 4.6407610476017, | |
| "rewards/format_reward_fn/mean": 0.7928710989654064, | |
| "rewards/format_reward_fn/std": 0.39927749149501324, | |
| "step": 1408 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1201171875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 250.0, | |
| "completions/mean_length": 205.212890625, | |
| "completions/mean_terminated_length": 198.47776794433594, | |
| "completions/min_length": 152.8125, | |
| "completions/min_terminated_length": 152.8125, | |
| "entropy": 0.07301379647105932, | |
| "epoch": 1.3838678328474248, | |
| "frac_reward_zero_std": 0.375, | |
| "grad_norm": 0.13404177129268646, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 114336889.0, | |
| "reward": 11.331741988658905, | |
| "reward_std": 0.8269859068095684, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8490662761032581, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.32220354955643415, | |
| "rewards/event_reward_fn/mean": 9.6259765625, | |
| "rewards/event_reward_fn/std": 5.535077631473541, | |
| "rewards/format_reward_fn/mean": 0.8566992208361626, | |
| "rewards/format_reward_fn/std": 0.3254187796264887, | |
| "step": 1424 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0771484375, | |
| "completions/max_length": 254.125, | |
| "completions/max_terminated_length": 244.625, | |
| "completions/mean_length": 195.3828125, | |
| "completions/mean_terminated_length": 190.21417903900146, | |
| "completions/min_length": 143.1875, | |
| "completions/min_terminated_length": 143.1875, | |
| "entropy": 0.07323169219307601, | |
| "epoch": 1.3994169096209912, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.5236871242523193, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 115649713.0, | |
| "reward": 11.00212150812149, | |
| "reward_std": 0.9517039023339748, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8683973699808121, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3078096741810441, | |
| "rewards/event_reward_fn/mean": 9.25390625, | |
| "rewards/event_reward_fn/std": 5.412711590528488, | |
| "rewards/format_reward_fn/mean": 0.8798177093267441, | |
| "rewards/format_reward_fn/std": 0.3116344837471843, | |
| "step": 1440 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05859375, | |
| "completions/max_length": 255.0, | |
| "completions/max_terminated_length": 247.0625, | |
| "completions/mean_length": 194.5361328125, | |
| "completions/mean_terminated_length": 190.66623401641846, | |
| "completions/min_length": 140.625, | |
| "completions/min_terminated_length": 140.625, | |
| "entropy": 0.0805603014305234, | |
| "epoch": 1.4149659863945578, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.24460569024085999, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 116942470.0, | |
| "reward": 10.231546640396118, | |
| "reward_std": 0.7946171164512634, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9014858566224575, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2548077297396958, | |
| "rewards/event_reward_fn/mean": 8.4208984375, | |
| "rewards/event_reward_fn/std": 4.865608409047127, | |
| "rewards/format_reward_fn/mean": 0.9091623313724995, | |
| "rewards/format_reward_fn/std": 0.2567377556115389, | |
| "step": 1456 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05078125, | |
| "completions/max_length": 253.9375, | |
| "completions/max_terminated_length": 245.1875, | |
| "completions/mean_length": 196.1181640625, | |
| "completions/mean_terminated_length": 193.0701208114624, | |
| "completions/min_length": 151.625, | |
| "completions/min_terminated_length": 151.625, | |
| "entropy": 0.08425948722288013, | |
| "epoch": 1.4305150631681243, | |
| "frac_reward_zero_std": 0.27734375, | |
| "grad_norm": 0.207608163356781, | |
| "learning_rate": 5e-05, | |
| "loss": 0.002, | |
| "num_tokens": 118257695.0, | |
| "reward": 10.509873569011688, | |
| "reward_std": 0.8822544571012259, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9213385097682476, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.20133669557981193, | |
| "rewards/event_reward_fn/mean": 8.650390625, | |
| "rewards/event_reward_fn/std": 5.170787841081619, | |
| "rewards/format_reward_fn/mean": 0.9381445348262787, | |
| "rewards/format_reward_fn/std": 0.1984235211275518, | |
| "step": 1472 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.060546875, | |
| "completions/max_length": 249.875, | |
| "completions/max_terminated_length": 240.25, | |
| "completions/mean_length": 186.7001953125, | |
| "completions/mean_terminated_length": 182.21374893188477, | |
| "completions/min_length": 132.8125, | |
| "completions/min_terminated_length": 132.8125, | |
| "entropy": 0.0810700710862875, | |
| "epoch": 1.446064139941691, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.2522001564502716, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0016, | |
| "num_tokens": 119571004.0, | |
| "reward": 11.150494575500488, | |
| "reward_std": 0.9185313917696476, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9072700254619122, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.238927063299343, | |
| "rewards/event_reward_fn/mean": 9.32421875, | |
| "rewards/event_reward_fn/std": 5.231076046824455, | |
| "rewards/format_reward_fn/mean": 0.9190057702362537, | |
| "rewards/format_reward_fn/std": 0.23810118879191577, | |
| "step": 1488 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.07421875, | |
| "completions/max_length": 253.5, | |
| "completions/max_terminated_length": 244.625, | |
| "completions/mean_length": 190.771484375, | |
| "completions/mean_terminated_length": 185.8874397277832, | |
| "completions/min_length": 140.1875, | |
| "completions/min_terminated_length": 140.1875, | |
| "entropy": 0.0769493873231113, | |
| "epoch": 1.4616132167152576, | |
| "frac_reward_zero_std": 0.27734375, | |
| "grad_norm": 0.13221606612205505, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0052, | |
| "num_tokens": 120878670.0, | |
| "reward": 11.72208970785141, | |
| "reward_std": 0.9891778491437435, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.865969829261303, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3025930265430361, | |
| "rewards/event_reward_fn/mean": 9.978515625, | |
| "rewards/event_reward_fn/std": 6.088510304689407, | |
| "rewards/format_reward_fn/mean": 0.8776041679084301, | |
| "rewards/format_reward_fn/std": 0.30370487459003925, | |
| "step": 1504 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 250.875, | |
| "completions/mean_length": 210.828125, | |
| "completions/mean_terminated_length": 206.00187873840332, | |
| "completions/min_length": 159.9375, | |
| "completions/min_terminated_length": 159.9375, | |
| "entropy": 0.09037951100617647, | |
| "epoch": 1.4771622934888242, | |
| "frac_reward_zero_std": 0.31640625, | |
| "grad_norm": 0.303564190864563, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0015, | |
| "num_tokens": 122164070.0, | |
| "reward": 11.119612038135529, | |
| "reward_std": 0.99767005443573, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8756668232381344, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.29590372927486897, | |
| "rewards/event_reward_fn/mean": 9.3515625, | |
| "rewards/event_reward_fn/std": 5.329805389046669, | |
| "rewards/format_reward_fn/mean": 0.8923828117549419, | |
| "rewards/format_reward_fn/std": 0.30003819055855274, | |
| "step": 1520 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1669921875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 253.0, | |
| "completions/mean_length": 215.0888671875, | |
| "completions/mean_terminated_length": 206.6857042312622, | |
| "completions/min_length": 164.4375, | |
| "completions/min_terminated_length": 164.4375, | |
| "entropy": 0.09090339438989758, | |
| "epoch": 1.4927113702623906, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.16249267756938934, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0016, | |
| "num_tokens": 123527081.0, | |
| "reward": 10.766064465045929, | |
| "reward_std": 0.8386576101183891, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.7968913167715073, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3705411199480295, | |
| "rewards/event_reward_fn/mean": 9.1552734375, | |
| "rewards/event_reward_fn/std": 5.637863516807556, | |
| "rewards/format_reward_fn/mean": 0.8138997405767441, | |
| "rewards/format_reward_fn/std": 0.3759169615805149, | |
| "step": 1536 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1435546875, | |
| "completions/max_length": 255.375, | |
| "completions/max_terminated_length": 250.375, | |
| "completions/mean_length": 212.8251953125, | |
| "completions/mean_terminated_length": 205.47227001190186, | |
| "completions/min_length": 157.5625, | |
| "completions/min_terminated_length": 157.5625, | |
| "entropy": 0.10008962173014879, | |
| "epoch": 1.508260447035957, | |
| "frac_reward_zero_std": 0.265625, | |
| "grad_norm": 0.23113620281219482, | |
| "learning_rate": 5e-05, | |
| "loss": 0.004, | |
| "num_tokens": 124865830.0, | |
| "reward": 10.332128584384918, | |
| "reward_std": 1.082621719688177, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8261714465916157, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3391446927562356, | |
| "rewards/event_reward_fn/mean": 8.6630859375, | |
| "rewards/event_reward_fn/std": 5.3445031344890594, | |
| "rewards/format_reward_fn/mean": 0.8428710959851742, | |
| "rewards/format_reward_fn/std": 0.34356776159256697, | |
| "step": 1552 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1103515625, | |
| "completions/max_length": 255.125, | |
| "completions/max_terminated_length": 247.875, | |
| "completions/mean_length": 204.4716796875, | |
| "completions/mean_terminated_length": 198.4902868270874, | |
| "completions/min_length": 149.9375, | |
| "completions/min_terminated_length": 149.9375, | |
| "entropy": 0.09716548025608063, | |
| "epoch": 1.5238095238095237, | |
| "frac_reward_zero_std": 0.30859375, | |
| "grad_norm": 0.13532325625419617, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0014, | |
| "num_tokens": 126156049.0, | |
| "reward": 9.934103816747665, | |
| "reward_std": 0.9690110310912132, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8615453615784645, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2830730821006, | |
| "rewards/event_reward_fn/mean": 8.1953125, | |
| "rewards/event_reward_fn/std": 4.997192412614822, | |
| "rewards/format_reward_fn/mean": 0.8772460930049419, | |
| "rewards/format_reward_fn/std": 0.27953232545405626, | |
| "step": 1568 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0634765625, | |
| "completions/max_length": 253.625, | |
| "completions/max_terminated_length": 241.8125, | |
| "completions/mean_length": 191.8193359375, | |
| "completions/mean_terminated_length": 187.4044713973999, | |
| "completions/min_length": 134.4375, | |
| "completions/min_terminated_length": 134.4375, | |
| "entropy": 0.08724062331020832, | |
| "epoch": 1.5393586005830904, | |
| "frac_reward_zero_std": 0.30078125, | |
| "grad_norm": 0.13813965022563934, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0036, | |
| "num_tokens": 127483244.0, | |
| "reward": 11.109964549541473, | |
| "reward_std": 0.9232164584100246, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9082068763673306, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.25597723573446274, | |
| "rewards/event_reward_fn/mean": 9.279296875, | |
| "rewards/event_reward_fn/std": 5.3837059289216995, | |
| "rewards/format_reward_fn/mean": 0.9224609360098839, | |
| "rewards/format_reward_fn/std": 0.2576202508062124, | |
| "step": 1584 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0458984375, | |
| "completions/max_length": 251.0, | |
| "completions/max_terminated_length": 242.375, | |
| "completions/mean_length": 193.9111328125, | |
| "completions/mean_terminated_length": 190.90945529937744, | |
| "completions/min_length": 148.625, | |
| "completions/min_terminated_length": 148.625, | |
| "entropy": 0.08152232086285949, | |
| "epoch": 1.554907677356657, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.35102641582489014, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0013, | |
| "num_tokens": 128764293.0, | |
| "reward": 11.371211469173431, | |
| "reward_std": 0.8595849685370922, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9204303659498692, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.21888624806888402, | |
| "rewards/event_reward_fn/mean": 9.513671875, | |
| "rewards/event_reward_fn/std": 5.4597727209329605, | |
| "rewards/format_reward_fn/mean": 0.9371093735098839, | |
| "rewards/format_reward_fn/std": 0.2127007795497775, | |
| "step": 1600 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.091796875, | |
| "completions/max_length": 252.375, | |
| "completions/max_terminated_length": 245.375, | |
| "completions/mean_length": 200.150390625, | |
| "completions/mean_terminated_length": 194.58474922180176, | |
| "completions/min_length": 145.125, | |
| "completions/min_terminated_length": 145.125, | |
| "entropy": 0.08945442596450448, | |
| "epoch": 1.5704567541302237, | |
| "frac_reward_zero_std": 0.28125, | |
| "grad_norm": 0.11586015671491623, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 130147811.0, | |
| "reward": 10.688360095024109, | |
| "reward_std": 0.8784848563373089, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8703912869095802, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.26367771509103477, | |
| "rewards/event_reward_fn/mean": 8.9267578125, | |
| "rewards/event_reward_fn/std": 5.635714888572693, | |
| "rewards/format_reward_fn/mean": 0.8912109360098839, | |
| "rewards/format_reward_fn/std": 0.2533010635524988, | |
| "step": 1616 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.07421875, | |
| "completions/max_length": 254.3125, | |
| "completions/max_terminated_length": 239.75, | |
| "completions/mean_length": 195.1494140625, | |
| "completions/mean_terminated_length": 190.32571697235107, | |
| "completions/min_length": 143.625, | |
| "completions/min_terminated_length": 143.625, | |
| "entropy": 0.08721820963546634, | |
| "epoch": 1.58600583090379, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.1575620472431183, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0, | |
| "num_tokens": 131479512.0, | |
| "reward": 10.922975957393646, | |
| "reward_std": 0.7370323836803436, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8828910291194916, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2897054869681597, | |
| "rewards/event_reward_fn/mean": 9.146484375, | |
| "rewards/event_reward_fn/std": 5.057717680931091, | |
| "rewards/format_reward_fn/mean": 0.8936002627015114, | |
| "rewards/format_reward_fn/std": 0.28906678687781096, | |
| "step": 1632 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0703125, | |
| "completions/max_length": 254.0, | |
| "completions/max_terminated_length": 241.6875, | |
| "completions/mean_length": 198.5185546875, | |
| "completions/mean_terminated_length": 194.11609935760498, | |
| "completions/min_length": 149.1875, | |
| "completions/min_terminated_length": 149.1875, | |
| "entropy": 0.08794478559866548, | |
| "epoch": 1.6015549076773565, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.16397124528884888, | |
| "learning_rate": 5e-05, | |
| "loss": 0.001, | |
| "num_tokens": 132797395.0, | |
| "reward": 10.608273446559906, | |
| "reward_std": 0.8345479369163513, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8834850341081619, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.27482672582846135, | |
| "rewards/event_reward_fn/mean": 8.8251953125, | |
| "rewards/event_reward_fn/std": 5.233703002333641, | |
| "rewards/format_reward_fn/mean": 0.8995930962264538, | |
| "rewards/format_reward_fn/std": 0.27503635361790657, | |
| "step": 1648 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.064453125, | |
| "completions/max_length": 254.6875, | |
| "completions/max_terminated_length": 241.25, | |
| "completions/mean_length": 192.1123046875, | |
| "completions/mean_terminated_length": 187.6938066482544, | |
| "completions/min_length": 137.3125, | |
| "completions/min_terminated_length": 137.3125, | |
| "entropy": 0.08001765748485923, | |
| "epoch": 1.6171039844509232, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.16833443939685822, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 134069966.0, | |
| "reward": 11.113677322864532, | |
| "reward_std": 0.9350622501224279, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9033686555922031, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2602922786027193, | |
| "rewards/event_reward_fn/mean": 9.2939453125, | |
| "rewards/event_reward_fn/std": 5.6752976179122925, | |
| "rewards/format_reward_fn/mean": 0.9163634702563286, | |
| "rewards/format_reward_fn/std": 0.2618194241076708, | |
| "step": 1664 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.06640625, | |
| "completions/max_length": 249.3125, | |
| "completions/max_terminated_length": 241.75, | |
| "completions/mean_length": 196.7158203125, | |
| "completions/mean_terminated_length": 192.56449699401855, | |
| "completions/min_length": 129.6875, | |
| "completions/min_terminated_length": 129.6875, | |
| "entropy": 0.08379031391814351, | |
| "epoch": 1.6326530612244898, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.14574581384658813, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0047, | |
| "num_tokens": 135435439.0, | |
| "reward": 11.539310336112976, | |
| "reward_std": 0.9443789459764957, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8967322260141373, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.22321847162675112, | |
| "rewards/event_reward_fn/mean": 9.7314453125, | |
| "rewards/event_reward_fn/std": 5.278485506772995, | |
| "rewards/format_reward_fn/mean": 0.9111328125, | |
| "rewards/format_reward_fn/std": 0.21248832251876593, | |
| "step": 1680 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0791015625, | |
| "completions/max_length": 254.8125, | |
| "completions/max_terminated_length": 244.8125, | |
| "completions/mean_length": 202.83203125, | |
| "completions/mean_terminated_length": 198.2507667541504, | |
| "completions/min_length": 150.625, | |
| "completions/min_terminated_length": 150.625, | |
| "entropy": 0.08855495927855372, | |
| "epoch": 1.6482021379980565, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.17940281331539154, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0056, | |
| "num_tokens": 136778675.0, | |
| "reward": 11.134308993816376, | |
| "reward_std": 0.9293302595615387, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8743740394711494, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.26636734034400433, | |
| "rewards/event_reward_fn/mean": 9.3662109375, | |
| "rewards/event_reward_fn/std": 5.85838320851326, | |
| "rewards/format_reward_fn/mean": 0.8937239646911621, | |
| "rewards/format_reward_fn/std": 0.2652863524854183, | |
| "step": 1696 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.138671875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 251.6875, | |
| "completions/mean_length": 213.9638671875, | |
| "completions/mean_terminated_length": 207.2107219696045, | |
| "completions/min_length": 162.375, | |
| "completions/min_terminated_length": 162.375, | |
| "entropy": 0.09064092021435499, | |
| "epoch": 1.663751214771623, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.15384909510612488, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0029, | |
| "num_tokens": 138044578.0, | |
| "reward": 11.337530732154846, | |
| "reward_std": 0.9400022551417351, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.824542474001646, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.33465168718248606, | |
| "rewards/event_reward_fn/mean": 9.6669921875, | |
| "rewards/event_reward_fn/std": 5.503222852945328, | |
| "rewards/format_reward_fn/mean": 0.8459960930049419, | |
| "rewards/format_reward_fn/std": 0.3366972776129842, | |
| "step": 1712 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 251.5625, | |
| "completions/max_terminated_length": 245.5625, | |
| "completions/mean_length": 203.677734375, | |
| "completions/mean_terminated_length": 201.15838241577148, | |
| "completions/min_length": 159.1875, | |
| "completions/min_terminated_length": 159.1875, | |
| "entropy": 0.08698790520429611, | |
| "epoch": 1.6793002915451893, | |
| "frac_reward_zero_std": 0.296875, | |
| "grad_norm": 0.11867301166057587, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 139288124.0, | |
| "reward": 11.192306399345398, | |
| "reward_std": 0.9463471882045269, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9286414235830307, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.20035810582339764, | |
| "rewards/event_reward_fn/mean": 9.3193359375, | |
| "rewards/event_reward_fn/std": 5.200570702552795, | |
| "rewards/format_reward_fn/mean": 0.9443289637565613, | |
| "rewards/format_reward_fn/std": 0.19202105328440666, | |
| "step": 1728 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0556640625, | |
| "completions/max_length": 252.125, | |
| "completions/max_terminated_length": 244.875, | |
| "completions/mean_length": 193.6708984375, | |
| "completions/mean_terminated_length": 189.9529905319214, | |
| "completions/min_length": 136.625, | |
| "completions/min_terminated_length": 136.625, | |
| "entropy": 0.08362232241779566, | |
| "epoch": 1.694849368318756, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.11613152176141739, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0019, | |
| "num_tokens": 140615163.0, | |
| "reward": 11.211718916893005, | |
| "reward_std": 0.8285622540861368, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9016408734023571, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.25260637141764164, | |
| "rewards/event_reward_fn/mean": 9.390625, | |
| "rewards/event_reward_fn/std": 5.310590535402298, | |
| "rewards/format_reward_fn/mean": 0.9194531291723251, | |
| "rewards/format_reward_fn/std": 0.251515906304121, | |
| "step": 1744 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0751953125, | |
| "completions/max_length": 248.6875, | |
| "completions/max_terminated_length": 240.3125, | |
| "completions/mean_length": 194.3291015625, | |
| "completions/mean_terminated_length": 189.4942626953125, | |
| "completions/min_length": 139.25, | |
| "completions/min_terminated_length": 139.25, | |
| "entropy": 0.08920921664685011, | |
| "epoch": 1.7103984450923226, | |
| "frac_reward_zero_std": 0.265625, | |
| "grad_norm": 0.1495039016008377, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0026, | |
| "num_tokens": 141995908.0, | |
| "reward": 11.331986844539642, | |
| "reward_std": 0.9946209099143744, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8805676624178886, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2605485112289898, | |
| "rewards/event_reward_fn/mean": 9.5615234375, | |
| "rewards/event_reward_fn/std": 5.626507669687271, | |
| "rewards/format_reward_fn/mean": 0.8898958377540112, | |
| "rewards/format_reward_fn/std": 0.25742682348936796, | |
| "step": 1760 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0556640625, | |
| "completions/max_length": 250.625, | |
| "completions/max_terminated_length": 242.25, | |
| "completions/mean_length": 194.5751953125, | |
| "completions/mean_terminated_length": 190.86159992218018, | |
| "completions/min_length": 141.5625, | |
| "completions/min_terminated_length": 141.5625, | |
| "entropy": 0.09618484182283282, | |
| "epoch": 1.7259475218658893, | |
| "frac_reward_zero_std": 0.26953125, | |
| "grad_norm": 0.20417290925979614, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0055, | |
| "num_tokens": 143301673.0, | |
| "reward": 10.538148939609528, | |
| "reward_std": 0.9361699968576431, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9052551127970219, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2257093784864992, | |
| "rewards/event_reward_fn/mean": 8.7158203125, | |
| "rewards/event_reward_fn/std": 4.607826009392738, | |
| "rewards/format_reward_fn/mean": 0.9170735664665699, | |
| "rewards/format_reward_fn/std": 0.22840105323120952, | |
| "step": 1776 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0576171875, | |
| "completions/max_length": 252.8125, | |
| "completions/max_terminated_length": 244.4375, | |
| "completions/mean_length": 200.8662109375, | |
| "completions/mean_terminated_length": 197.39703178405762, | |
| "completions/min_length": 149.9375, | |
| "completions/min_terminated_length": 149.9375, | |
| "entropy": 0.08653424866497517, | |
| "epoch": 1.741496598639456, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.14243784546852112, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0011, | |
| "num_tokens": 144603412.0, | |
| "reward": 11.493825078010559, | |
| "reward_std": 0.8755283299833536, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9135190099477768, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.24690337451465894, | |
| "rewards/event_reward_fn/mean": 9.658203125, | |
| "rewards/event_reward_fn/std": 5.445283606648445, | |
| "rewards/format_reward_fn/mean": 0.922102864831686, | |
| "rewards/format_reward_fn/std": 0.2481938637793064, | |
| "step": 1792 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0556640625, | |
| "completions/max_length": 252.0625, | |
| "completions/max_terminated_length": 246.75, | |
| "completions/mean_length": 196.5146484375, | |
| "completions/mean_terminated_length": 193.12859344482422, | |
| "completions/min_length": 140.8125, | |
| "completions/min_terminated_length": 140.8125, | |
| "entropy": 0.08316960139200091, | |
| "epoch": 1.7570456754130224, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.109793521463871, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0022, | |
| "num_tokens": 145919099.0, | |
| "reward": 11.740033328533173, | |
| "reward_std": 0.9224549978971481, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9150333367288113, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.24498367216438055, | |
| "rewards/event_reward_fn/mean": 9.90234375, | |
| "rewards/event_reward_fn/std": 5.425331294536591, | |
| "rewards/format_reward_fn/mean": 0.9226562492549419, | |
| "rewards/format_reward_fn/std": 0.24409929476678371, | |
| "step": 1808 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.072265625, | |
| "completions/max_length": 254.75, | |
| "completions/max_terminated_length": 249.3125, | |
| "completions/mean_length": 208.4755859375, | |
| "completions/mean_terminated_length": 204.96116065979004, | |
| "completions/min_length": 160.5625, | |
| "completions/min_terminated_length": 160.5625, | |
| "entropy": 0.08932856796309352, | |
| "epoch": 1.7725947521865888, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.24029314517974854, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0045, | |
| "num_tokens": 147275350.0, | |
| "reward": 11.133660674095154, | |
| "reward_std": 1.0420608818531036, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8887974470853806, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2559172356268391, | |
| "rewards/event_reward_fn/mean": 9.34375, | |
| "rewards/event_reward_fn/std": 5.51551166176796, | |
| "rewards/format_reward_fn/mean": 0.9011132828891277, | |
| "rewards/format_reward_fn/std": 0.2507179146632552, | |
| "step": 1824 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 252.375, | |
| "completions/max_terminated_length": 240.875, | |
| "completions/mean_length": 198.576171875, | |
| "completions/mean_terminated_length": 195.7430601119995, | |
| "completions/min_length": 151.25, | |
| "completions/min_terminated_length": 151.25, | |
| "entropy": 0.08435806119814515, | |
| "epoch": 1.7881438289601554, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.13869501650333405, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0026, | |
| "num_tokens": 148539184.0, | |
| "reward": 11.031599402427673, | |
| "reward_std": 0.7965468689799309, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9208246804773808, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2361440734239295, | |
| "rewards/event_reward_fn/mean": 9.1796875, | |
| "rewards/event_reward_fn/std": 4.907300844788551, | |
| "rewards/format_reward_fn/mean": 0.9310872405767441, | |
| "rewards/format_reward_fn/std": 0.2394925099797547, | |
| "step": 1840 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0380859375, | |
| "completions/max_length": 252.5, | |
| "completions/max_terminated_length": 250.125, | |
| "completions/mean_length": 202.90234375, | |
| "completions/mean_terminated_length": 200.88229370117188, | |
| "completions/min_length": 155.875, | |
| "completions/min_terminated_length": 155.875, | |
| "entropy": 0.08052209811285138, | |
| "epoch": 1.803692905733722, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.1878909021615982, | |
| "learning_rate": 5e-05, | |
| "loss": 0.001, | |
| "num_tokens": 149840940.0, | |
| "reward": 10.959127485752106, | |
| "reward_std": 0.9578492008149624, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9300258904695511, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18775073438882828, | |
| "rewards/event_reward_fn/mean": 9.0859375, | |
| "rewards/event_reward_fn/std": 5.149698540568352, | |
| "rewards/format_reward_fn/mean": 0.9431640617549419, | |
| "rewards/format_reward_fn/std": 0.17401384096592665, | |
| "step": 1856 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.09765625, | |
| "completions/max_length": 255.9375, | |
| "completions/max_terminated_length": 248.875, | |
| "completions/mean_length": 211.8564453125, | |
| "completions/mean_terminated_length": 207.1767454147339, | |
| "completions/min_length": 164.5625, | |
| "completions/min_terminated_length": 164.5625, | |
| "entropy": 0.08094025542959571, | |
| "epoch": 1.8192419825072887, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.14807139337062836, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0023, | |
| "num_tokens": 151221749.0, | |
| "reward": 11.752990126609802, | |
| "reward_std": 0.9537594802677631, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8724758252501488, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.29270493309013546, | |
| "rewards/event_reward_fn/mean": 9.9970703125, | |
| "rewards/event_reward_fn/std": 5.857491314411163, | |
| "rewards/format_reward_fn/mean": 0.8834440112113953, | |
| "rewards/format_reward_fn/std": 0.29244135320186615, | |
| "step": 1872 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0546875, | |
| "completions/max_length": 254.3125, | |
| "completions/max_terminated_length": 250.25, | |
| "completions/mean_length": 208.7001953125, | |
| "completions/mean_terminated_length": 206.05935287475586, | |
| "completions/min_length": 159.125, | |
| "completions/min_terminated_length": 159.125, | |
| "entropy": 0.08766834484413266, | |
| "epoch": 1.8347910592808552, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.17317424714565277, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 152521294.0, | |
| "reward": 11.364756107330322, | |
| "reward_std": 0.9098326228559017, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8889735676348209, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2667266938369721, | |
| "rewards/event_reward_fn/mean": 9.568359375, | |
| "rewards/event_reward_fn/std": 5.424193903803825, | |
| "rewards/format_reward_fn/mean": 0.9074231162667274, | |
| "rewards/format_reward_fn/std": 0.2601332040503621, | |
| "step": 1888 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0478515625, | |
| "completions/max_length": 253.1875, | |
| "completions/max_terminated_length": 248.625, | |
| "completions/mean_length": 205.2333984375, | |
| "completions/mean_terminated_length": 202.61692428588867, | |
| "completions/min_length": 158.0625, | |
| "completions/min_terminated_length": 158.0625, | |
| "entropy": 0.0873062857426703, | |
| "epoch": 1.8503401360544216, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.16510100662708282, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 153814765.0, | |
| "reward": 10.840591430664062, | |
| "reward_std": 0.806601133197546, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8800444230437279, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2592724412679672, | |
| "rewards/event_reward_fn/mean": 9.05078125, | |
| "rewards/event_reward_fn/std": 5.017846331000328, | |
| "rewards/format_reward_fn/mean": 0.9097656235098839, | |
| "rewards/format_reward_fn/std": 0.2350642140954733, | |
| "step": 1904 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0478515625, | |
| "completions/max_length": 250.6875, | |
| "completions/max_terminated_length": 246.625, | |
| "completions/mean_length": 199.6025390625, | |
| "completions/mean_terminated_length": 196.8941469192505, | |
| "completions/min_length": 146.75, | |
| "completions/min_terminated_length": 146.75, | |
| "entropy": 0.08616631478071213, | |
| "epoch": 1.8658892128279883, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.10711020976305008, | |
| "learning_rate": 5e-05, | |
| "loss": 0.003, | |
| "num_tokens": 155159530.0, | |
| "reward": 11.430678129196167, | |
| "reward_std": 0.7845460455864668, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9038552716374397, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.21465440141037107, | |
| "rewards/event_reward_fn/mean": 9.5986328125, | |
| "rewards/event_reward_fn/std": 5.15682627260685, | |
| "rewards/format_reward_fn/mean": 0.928190104663372, | |
| "rewards/format_reward_fn/std": 0.19767758785746992, | |
| "step": 1920 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 253.1875, | |
| "completions/max_terminated_length": 246.6875, | |
| "completions/mean_length": 200.734375, | |
| "completions/mean_terminated_length": 197.25225925445557, | |
| "completions/min_length": 150.1875, | |
| "completions/min_terminated_length": 150.1875, | |
| "entropy": 0.08771243086084723, | |
| "epoch": 1.881438289601555, | |
| "frac_reward_zero_std": 0.27734375, | |
| "grad_norm": 0.119595006108284, | |
| "learning_rate": 5e-05, | |
| "loss": -0.001, | |
| "num_tokens": 156525614.0, | |
| "reward": 11.453014373779297, | |
| "reward_std": 1.111331295222044, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8604980707168579, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2871107269311324, | |
| "rewards/event_reward_fn/mean": 9.708984375, | |
| "rewards/event_reward_fn/std": 5.215842500329018, | |
| "rewards/format_reward_fn/mean": 0.8835319019854069, | |
| "rewards/format_reward_fn/std": 0.2829501121304929, | |
| "step": 1936 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.021484375, | |
| "completions/max_length": 250.25, | |
| "completions/max_terminated_length": 245.8125, | |
| "completions/mean_length": 194.017578125, | |
| "completions/mean_terminated_length": 192.65657711029053, | |
| "completions/min_length": 140.6875, | |
| "completions/min_terminated_length": 140.6875, | |
| "entropy": 0.08764936728402972, | |
| "epoch": 1.8969873663751216, | |
| "frac_reward_zero_std": 0.28515625, | |
| "grad_norm": 0.1511303335428238, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0032, | |
| "num_tokens": 157786400.0, | |
| "reward": 10.632731199264526, | |
| "reward_std": 0.9243863355368376, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9324383623898029, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18536719167605042, | |
| "rewards/event_reward_fn/mean": 8.7451171875, | |
| "rewards/event_reward_fn/std": 5.235057607293129, | |
| "rewards/format_reward_fn/mean": 0.9551757834851742, | |
| "rewards/format_reward_fn/std": 0.16272677155211568, | |
| "step": 1952 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 254.125, | |
| "completions/max_terminated_length": 250.6875, | |
| "completions/mean_length": 200.95703125, | |
| "completions/mean_terminated_length": 198.30670166015625, | |
| "completions/min_length": 154.6875, | |
| "completions/min_terminated_length": 154.6875, | |
| "entropy": 0.08874167408794165, | |
| "epoch": 1.9125364431486882, | |
| "frac_reward_zero_std": 0.33984375, | |
| "grad_norm": 0.17749741673469543, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0037, | |
| "num_tokens": 159093888.0, | |
| "reward": 11.275705397129059, | |
| "reward_std": 0.856599148362875, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9131891131401062, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2150915495294612, | |
| "rewards/event_reward_fn/mean": 9.4345703125, | |
| "rewards/event_reward_fn/std": 5.729633465409279, | |
| "rewards/format_reward_fn/mean": 0.927945964038372, | |
| "rewards/format_reward_fn/std": 0.2096583191305399, | |
| "step": 1968 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.041015625, | |
| "completions/max_length": 253.875, | |
| "completions/max_terminated_length": 246.5625, | |
| "completions/mean_length": 200.9677734375, | |
| "completions/mean_terminated_length": 198.57855701446533, | |
| "completions/min_length": 142.8125, | |
| "completions/min_terminated_length": 142.8125, | |
| "entropy": 0.0850910097360611, | |
| "epoch": 1.9280855199222546, | |
| "frac_reward_zero_std": 0.375, | |
| "grad_norm": 0.12046821415424347, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 160340479.0, | |
| "reward": 10.719317555427551, | |
| "reward_std": 0.8128865994513035, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9061989188194275, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.24043723253998905, | |
| "rewards/event_reward_fn/mean": 8.892578125, | |
| "rewards/event_reward_fn/std": 5.485840782523155, | |
| "rewards/format_reward_fn/mean": 0.9205403625965118, | |
| "rewards/format_reward_fn/std": 0.2409290496725589, | |
| "step": 1984 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.12109375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 251.125, | |
| "completions/mean_length": 213.712890625, | |
| "completions/mean_terminated_length": 207.85150337219238, | |
| "completions/min_length": 160.9375, | |
| "completions/min_terminated_length": 160.9375, | |
| "entropy": 0.08267078269273043, | |
| "epoch": 1.943634596695821, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.09311431646347046, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0044, | |
| "num_tokens": 161744217.0, | |
| "reward": 10.99679410457611, | |
| "reward_std": 0.9773008767515421, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.856908455491066, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3204036271199584, | |
| "rewards/event_reward_fn/mean": 9.2744140625, | |
| "rewards/event_reward_fn/std": 5.77374792098999, | |
| "rewards/format_reward_fn/mean": 0.8654715418815613, | |
| "rewards/format_reward_fn/std": 0.3263047467917204, | |
| "step": 2000 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0732421875, | |
| "completions/max_length": 255.6875, | |
| "completions/max_terminated_length": 246.0625, | |
| "completions/mean_length": 203.2724609375, | |
| "completions/mean_terminated_length": 199.26644325256348, | |
| "completions/min_length": 149.0625, | |
| "completions/min_terminated_length": 149.0625, | |
| "entropy": 0.08016827004030347, | |
| "epoch": 1.9591836734693877, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.28214797377586365, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0028, | |
| "num_tokens": 163027312.0, | |
| "reward": 10.820812225341797, | |
| "reward_std": 0.9312072917819023, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8814437240362167, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2926495522260666, | |
| "rewards/event_reward_fn/mean": 9.048828125, | |
| "rewards/event_reward_fn/std": 5.542583703994751, | |
| "rewards/format_reward_fn/mean": 0.890540361404419, | |
| "rewards/format_reward_fn/std": 0.2912682769820094, | |
| "step": 2016 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.080078125, | |
| "completions/max_length": 255.8125, | |
| "completions/max_terminated_length": 251.4375, | |
| "completions/mean_length": 210.033203125, | |
| "completions/mean_terminated_length": 206.1150426864624, | |
| "completions/min_length": 157.75, | |
| "completions/min_terminated_length": 157.75, | |
| "entropy": 0.07839876413345337, | |
| "epoch": 1.9747327502429544, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.1044137179851532, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 164352274.0, | |
| "reward": 11.44101220369339, | |
| "reward_std": 0.9166238941252232, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8769194670021534, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2891712624114007, | |
| "rewards/event_reward_fn/mean": 9.6767578125, | |
| "rewards/event_reward_fn/std": 5.10790191590786, | |
| "rewards/format_reward_fn/mean": 0.8873349130153656, | |
| "rewards/format_reward_fn/std": 0.2897007022984326, | |
| "step": 2032 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0849609375, | |
| "completions/max_length": 255.125, | |
| "completions/max_terminated_length": 249.125, | |
| "completions/mean_length": 207.380859375, | |
| "completions/mean_terminated_length": 202.79765510559082, | |
| "completions/min_length": 162.625, | |
| "completions/min_terminated_length": 162.625, | |
| "entropy": 0.07377080479636788, | |
| "epoch": 1.990281827016521, | |
| "frac_reward_zero_std": 0.37890625, | |
| "grad_norm": 0.16017131507396698, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0026, | |
| "num_tokens": 165649512.0, | |
| "reward": 11.277841091156006, | |
| "reward_std": 0.9176982510834932, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8867846131324768, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.275547455297783, | |
| "rewards/event_reward_fn/mean": 9.4931640625, | |
| "rewards/event_reward_fn/std": 5.252700716257095, | |
| "rewards/format_reward_fn/mean": 0.8978923298418522, | |
| "rewards/format_reward_fn/std": 0.27646369859576225, | |
| "step": 2048 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0537109375, | |
| "completions/max_length": 255.8125, | |
| "completions/max_terminated_length": 249.6875, | |
| "completions/mean_length": 204.119140625, | |
| "completions/mean_terminated_length": 201.15938472747803, | |
| "completions/min_length": 153.9375, | |
| "completions/min_terminated_length": 153.9375, | |
| "entropy": 0.08090341417118907, | |
| "epoch": 2.0058309037900877, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.14778926968574524, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0041, | |
| "num_tokens": 166928214.0, | |
| "reward": 10.784139513969421, | |
| "reward_std": 0.845683254301548, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9085673242807388, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.22585340135265142, | |
| "rewards/event_reward_fn/mean": 8.958984375, | |
| "rewards/event_reward_fn/std": 5.853749170899391, | |
| "rewards/format_reward_fn/mean": 0.9165879562497139, | |
| "rewards/format_reward_fn/std": 0.2265966208651662, | |
| "step": 2064 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.107421875, | |
| "completions/max_length": 255.9375, | |
| "completions/max_terminated_length": 252.5625, | |
| "completions/mean_length": 217.998046875, | |
| "completions/mean_terminated_length": 213.3584222793579, | |
| "completions/min_length": 168.625, | |
| "completions/min_terminated_length": 168.625, | |
| "entropy": 0.07874821173027158, | |
| "epoch": 2.021379980563654, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.2152583748102188, | |
| "learning_rate": 5e-05, | |
| "loss": 0.004, | |
| "num_tokens": 168321184.0, | |
| "reward": 11.825278103351593, | |
| "reward_std": 0.8600351363420486, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8512045294046402, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3247975427657366, | |
| "rewards/event_reward_fn/mean": 10.119140625, | |
| "rewards/event_reward_fn/std": 5.912083759903908, | |
| "rewards/format_reward_fn/mean": 0.8549330346286297, | |
| "rewards/format_reward_fn/std": 0.32650260720402, | |
| "step": 2080 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.103515625, | |
| "completions/max_length": 255.6875, | |
| "completions/max_terminated_length": 251.375, | |
| "completions/mean_length": 215.453125, | |
| "completions/mean_terminated_length": 210.9893503189087, | |
| "completions/min_length": 169.125, | |
| "completions/min_terminated_length": 169.125, | |
| "entropy": 0.07949487678706646, | |
| "epoch": 2.0369290573372205, | |
| "frac_reward_zero_std": 0.26953125, | |
| "grad_norm": 0.17531728744506836, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0025, | |
| "num_tokens": 169684588.0, | |
| "reward": 11.59043002128601, | |
| "reward_std": 0.8503808788955212, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8546094782650471, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.29849119763821363, | |
| "rewards/event_reward_fn/mean": 9.8740234375, | |
| "rewards/event_reward_fn/std": 5.960100635886192, | |
| "rewards/format_reward_fn/mean": 0.8617968708276749, | |
| "rewards/format_reward_fn/std": 0.30179503839462996, | |
| "step": 2096 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.048828125, | |
| "completions/max_length": 254.9375, | |
| "completions/max_terminated_length": 249.9375, | |
| "completions/mean_length": 208.2587890625, | |
| "completions/mean_terminated_length": 205.81217098236084, | |
| "completions/min_length": 162.8125, | |
| "completions/min_terminated_length": 162.8125, | |
| "entropy": 0.08167764730751514, | |
| "epoch": 2.052478134110787, | |
| "frac_reward_zero_std": 0.30078125, | |
| "grad_norm": 0.17342492938041687, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0046, | |
| "num_tokens": 171007325.0, | |
| "reward": 11.406825065612793, | |
| "reward_std": 0.9372135195881128, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8985451720654964, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2477037919452414, | |
| "rewards/event_reward_fn/mean": 9.6015625, | |
| "rewards/event_reward_fn/std": 5.504318922758102, | |
| "rewards/format_reward_fn/mean": 0.9067176692187786, | |
| "rewards/format_reward_fn/std": 0.2522282497957349, | |
| "step": 2112 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0595703125, | |
| "completions/max_length": 255.5625, | |
| "completions/max_terminated_length": 251.4375, | |
| "completions/mean_length": 209.2939453125, | |
| "completions/mean_terminated_length": 206.3635711669922, | |
| "completions/min_length": 164.3125, | |
| "completions/min_terminated_length": 164.3125, | |
| "entropy": 0.08719462575390935, | |
| "epoch": 2.068027210884354, | |
| "frac_reward_zero_std": 0.26171875, | |
| "grad_norm": 0.21987557411193848, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 172342766.0, | |
| "reward": 11.441301941871643, | |
| "reward_std": 1.013813877478242, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8949129357933998, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.23817135416902602, | |
| "rewards/event_reward_fn/mean": 9.6416015625, | |
| "rewards/event_reward_fn/std": 5.478666722774506, | |
| "rewards/format_reward_fn/mean": 0.904787328094244, | |
| "rewards/format_reward_fn/std": 0.24113686219789088, | |
| "step": 2128 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0751953125, | |
| "completions/max_length": 254.6875, | |
| "completions/max_terminated_length": 251.3125, | |
| "completions/mean_length": 214.8193359375, | |
| "completions/mean_terminated_length": 211.62517070770264, | |
| "completions/min_length": 171.75, | |
| "completions/min_terminated_length": 171.75, | |
| "entropy": 0.08591812197118998, | |
| "epoch": 2.0835762876579205, | |
| "frac_reward_zero_std": 0.25, | |
| "grad_norm": 0.12275879085063934, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0019, | |
| "num_tokens": 173671761.0, | |
| "reward": 11.561739206314087, | |
| "reward_std": 0.9223730489611626, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8886811174452305, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.24229650711640716, | |
| "rewards/event_reward_fn/mean": 9.76953125, | |
| "rewards/event_reward_fn/std": 5.791984856128693, | |
| "rewards/format_reward_fn/mean": 0.9035267867147923, | |
| "rewards/format_reward_fn/std": 0.23866780381649733, | |
| "step": 2144 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0439453125, | |
| "completions/max_length": 253.125, | |
| "completions/max_terminated_length": 248.3125, | |
| "completions/mean_length": 208.9638671875, | |
| "completions/mean_terminated_length": 206.84466552734375, | |
| "completions/min_length": 168.875, | |
| "completions/min_terminated_length": 168.875, | |
| "entropy": 0.08218491962179542, | |
| "epoch": 2.0991253644314867, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.2145640254020691, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0021, | |
| "num_tokens": 175000300.0, | |
| "reward": 11.832590639591217, | |
| "reward_std": 0.8098492994904518, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9014225117862225, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.24191926792263985, | |
| "rewards/event_reward_fn/mean": 10.02734375, | |
| "rewards/event_reward_fn/std": 5.564895883202553, | |
| "rewards/format_reward_fn/mean": 0.9038244113326073, | |
| "rewards/format_reward_fn/std": 0.2553019989281893, | |
| "step": 2160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.060546875, | |
| "completions/max_length": 255.0625, | |
| "completions/max_terminated_length": 251.375, | |
| "completions/mean_length": 212.275390625, | |
| "completions/mean_terminated_length": 209.4294490814209, | |
| "completions/min_length": 172.875, | |
| "completions/min_terminated_length": 172.875, | |
| "entropy": 0.08632302051410079, | |
| "epoch": 2.1146744412050533, | |
| "frac_reward_zero_std": 0.27734375, | |
| "grad_norm": 0.12819725275039673, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0027, | |
| "num_tokens": 176397358.0, | |
| "reward": 11.750000655651093, | |
| "reward_std": 1.0350622907280922, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8829724602401257, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.26738651166670024, | |
| "rewards/event_reward_fn/mean": 9.9794921875, | |
| "rewards/event_reward_fn/std": 5.499541476368904, | |
| "rewards/format_reward_fn/mean": 0.8875359706580639, | |
| "rewards/format_reward_fn/std": 0.27499296236783266, | |
| "step": 2176 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0771484375, | |
| "completions/max_length": 253.9375, | |
| "completions/max_terminated_length": 248.5, | |
| "completions/mean_length": 211.8388671875, | |
| "completions/mean_terminated_length": 208.1127519607544, | |
| "completions/min_length": 169.125, | |
| "completions/min_terminated_length": 169.125, | |
| "entropy": 0.09303951309993863, | |
| "epoch": 2.13022351797862, | |
| "frac_reward_zero_std": 0.28515625, | |
| "grad_norm": 0.22441552579402924, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0018, | |
| "num_tokens": 177793437.0, | |
| "reward": 11.069317996501923, | |
| "reward_std": 1.0597262904047966, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8652172312140465, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.28404899896122515, | |
| "rewards/event_reward_fn/mean": 9.337890625, | |
| "rewards/event_reward_fn/std": 5.563613697886467, | |
| "rewards/format_reward_fn/mean": 0.8662101663649082, | |
| "rewards/format_reward_fn/std": 0.2897696476429701, | |
| "step": 2192 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1318359375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 253.8125, | |
| "completions/mean_length": 223.1318359375, | |
| "completions/mean_terminated_length": 218.348069190979, | |
| "completions/min_length": 178.5625, | |
| "completions/min_terminated_length": 178.5625, | |
| "entropy": 0.08407490560784936, | |
| "epoch": 2.1457725947521866, | |
| "frac_reward_zero_std": 0.2734375, | |
| "grad_norm": 0.20042170584201813, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0042, | |
| "num_tokens": 179158840.0, | |
| "reward": 11.508206486701965, | |
| "reward_std": 0.8616610933095217, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8278331160545349, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.32595117576420307, | |
| "rewards/event_reward_fn/mean": 9.8291015625, | |
| "rewards/event_reward_fn/std": 5.2151205241680145, | |
| "rewards/format_reward_fn/mean": 0.8512718565762043, | |
| "rewards/format_reward_fn/std": 0.33205954916775227, | |
| "step": 2208 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05078125, | |
| "completions/max_length": 254.6875, | |
| "completions/max_terminated_length": 247.75, | |
| "completions/mean_length": 205.880859375, | |
| "completions/mean_terminated_length": 203.19688606262207, | |
| "completions/min_length": 166.625, | |
| "completions/min_terminated_length": 166.625, | |
| "entropy": 0.07565503777004778, | |
| "epoch": 2.1613216715257533, | |
| "frac_reward_zero_std": 0.31640625, | |
| "grad_norm": 0.15642929077148438, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0029, | |
| "num_tokens": 180532374.0, | |
| "reward": 11.076000154018402, | |
| "reward_std": 0.8950711917132139, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9075712524354458, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.24491250491701066, | |
| "rewards/event_reward_fn/mean": 9.248046875, | |
| "rewards/event_reward_fn/std": 5.206828847527504, | |
| "rewards/format_reward_fn/mean": 0.9203820116817951, | |
| "rewards/format_reward_fn/std": 0.24691881332546473, | |
| "step": 2224 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0498046875, | |
| "completions/max_length": 253.6875, | |
| "completions/max_terminated_length": 248.5625, | |
| "completions/mean_length": 204.2744140625, | |
| "completions/mean_terminated_length": 201.6161012649536, | |
| "completions/min_length": 162.0625, | |
| "completions/min_terminated_length": 162.0625, | |
| "entropy": 0.07292898930609226, | |
| "epoch": 2.17687074829932, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.20497596263885498, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0018, | |
| "num_tokens": 181834967.0, | |
| "reward": 11.745607078075409, | |
| "reward_std": 0.847535029053688, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8826499357819557, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2826285846531391, | |
| "rewards/event_reward_fn/mean": 9.96875, | |
| "rewards/event_reward_fn/std": 5.558514207601547, | |
| "rewards/format_reward_fn/mean": 0.8942071311175823, | |
| "rewards/format_reward_fn/std": 0.2861539525911212, | |
| "step": 2240 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0615234375, | |
| "completions/max_length": 255.1875, | |
| "completions/max_terminated_length": 250.8125, | |
| "completions/mean_length": 212.1318359375, | |
| "completions/mean_terminated_length": 209.29450035095215, | |
| "completions/min_length": 174.1875, | |
| "completions/min_terminated_length": 174.1875, | |
| "entropy": 0.07543019764125347, | |
| "epoch": 2.192419825072886, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.10211238265037537, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0013, | |
| "num_tokens": 183156678.0, | |
| "reward": 11.885134816169739, | |
| "reward_std": 0.7728028316050768, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9035747610032558, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.24522930546663702, | |
| "rewards/event_reward_fn/mean": 10.0634765625, | |
| "rewards/event_reward_fn/std": 5.761056482791901, | |
| "rewards/format_reward_fn/mean": 0.9180834665894508, | |
| "rewards/format_reward_fn/std": 0.24741819500923157, | |
| "step": 2256 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.125, | |
| "completions/max_length": 255.5, | |
| "completions/max_terminated_length": 252.125, | |
| "completions/mean_length": 217.482421875, | |
| "completions/mean_terminated_length": 212.08176708221436, | |
| "completions/min_length": 176.875, | |
| "completions/min_terminated_length": 176.875, | |
| "entropy": 0.07963799126446247, | |
| "epoch": 2.207968901846453, | |
| "frac_reward_zero_std": 0.30078125, | |
| "grad_norm": 0.2214186191558838, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 184464124.0, | |
| "reward": 11.618954241275787, | |
| "reward_std": 0.9292504880577326, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8386580236256123, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.32745907083153725, | |
| "rewards/event_reward_fn/mean": 9.9248046875, | |
| "rewards/event_reward_fn/std": 6.44027054309845, | |
| "rewards/format_reward_fn/mean": 0.8554915376007557, | |
| "rewards/format_reward_fn/std": 0.3294975752942264, | |
| "step": 2272 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0498046875, | |
| "completions/max_length": 254.4375, | |
| "completions/max_terminated_length": 249.375, | |
| "completions/mean_length": 206.373046875, | |
| "completions/mean_terminated_length": 203.74801063537598, | |
| "completions/min_length": 163.625, | |
| "completions/min_terminated_length": 163.625, | |
| "entropy": 0.0740656116977334, | |
| "epoch": 2.2235179786200194, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.15276597440242767, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0032, | |
| "num_tokens": 185812542.0, | |
| "reward": 11.359130620956421, | |
| "reward_std": 0.6935872584581375, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9103271588683128, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.23343394591938704, | |
| "rewards/event_reward_fn/mean": 9.5244140625, | |
| "rewards/event_reward_fn/std": 5.075364321470261, | |
| "rewards/format_reward_fn/mean": 0.9243892580270767, | |
| "rewards/format_reward_fn/std": 0.2387167038396001, | |
| "step": 2288 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 254.5, | |
| "completions/max_terminated_length": 250.9375, | |
| "completions/mean_length": 210.4521484375, | |
| "completions/mean_terminated_length": 207.25661849975586, | |
| "completions/min_length": 169.8125, | |
| "completions/min_terminated_length": 169.8125, | |
| "entropy": 0.07526633841916919, | |
| "epoch": 2.239067055393586, | |
| "frac_reward_zero_std": 0.390625, | |
| "grad_norm": 0.17438335716724396, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 187174565.0, | |
| "reward": 11.606376469135284, | |
| "reward_std": 0.8281035982072353, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8901838399469852, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2384987068362534, | |
| "rewards/event_reward_fn/mean": 9.806640625, | |
| "rewards/event_reward_fn/std": 5.699323073029518, | |
| "rewards/format_reward_fn/mean": 0.9095519706606865, | |
| "rewards/format_reward_fn/std": 0.2393078247550875, | |
| "step": 2304 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0322265625, | |
| "completions/max_length": 249.0, | |
| "completions/max_terminated_length": 243.6875, | |
| "completions/mean_length": 203.9052734375, | |
| "completions/mean_terminated_length": 202.31891632080078, | |
| "completions/min_length": 159.4375, | |
| "completions/min_terminated_length": 159.4375, | |
| "entropy": 0.07370157795958221, | |
| "epoch": 2.2546161321671527, | |
| "frac_reward_zero_std": 0.390625, | |
| "grad_norm": 0.12816278636455536, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0022, | |
| "num_tokens": 188422072.0, | |
| "reward": 11.180573999881744, | |
| "reward_std": 0.7691474985331297, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9313375540077686, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16660339455120265, | |
| "rewards/event_reward_fn/mean": 9.302734375, | |
| "rewards/event_reward_fn/std": 5.455415144562721, | |
| "rewards/format_reward_fn/mean": 0.946502048522234, | |
| "rewards/format_reward_fn/std": 0.16397117311134934, | |
| "step": 2320 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0419921875, | |
| "completions/max_length": 250.875, | |
| "completions/max_terminated_length": 246.125, | |
| "completions/mean_length": 201.8203125, | |
| "completions/mean_terminated_length": 199.46971607208252, | |
| "completions/min_length": 160.6875, | |
| "completions/min_terminated_length": 160.6875, | |
| "entropy": 0.0713472084607929, | |
| "epoch": 2.2701652089407194, | |
| "frac_reward_zero_std": 0.45703125, | |
| "grad_norm": 0.15345498919487, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 189749628.0, | |
| "reward": 11.676891207695007, | |
| "reward_std": 0.7641248423606157, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9076020866632462, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.25307453935965896, | |
| "rewards/event_reward_fn/mean": 9.8544921875, | |
| "rewards/event_reward_fn/std": 5.772381603717804, | |
| "rewards/format_reward_fn/mean": 0.9147970490157604, | |
| "rewards/format_reward_fn/std": 0.25177112873643637, | |
| "step": 2336 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.052734375, | |
| "completions/max_length": 253.0, | |
| "completions/max_terminated_length": 245.9375, | |
| "completions/mean_length": 206.1015625, | |
| "completions/mean_terminated_length": 203.27950382232666, | |
| "completions/min_length": 162.625, | |
| "completions/min_terminated_length": 162.625, | |
| "entropy": 0.06717114523053169, | |
| "epoch": 2.2857142857142856, | |
| "frac_reward_zero_std": 0.3984375, | |
| "grad_norm": 0.14800839126110077, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0018, | |
| "num_tokens": 191059400.0, | |
| "reward": 11.288125574588776, | |
| "reward_std": 0.8216591961681843, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9098481498658657, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2527642482891679, | |
| "rewards/event_reward_fn/mean": 9.45703125, | |
| "rewards/event_reward_fn/std": 6.013585805892944, | |
| "rewards/format_reward_fn/mean": 0.9212462790310383, | |
| "rewards/format_reward_fn/std": 0.255017863586545, | |
| "step": 2352 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.119140625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 252.3125, | |
| "completions/mean_length": 222.3125, | |
| "completions/mean_terminated_length": 217.90027904510498, | |
| "completions/min_length": 175.625, | |
| "completions/min_terminated_length": 175.625, | |
| "entropy": 0.0761020069476217, | |
| "epoch": 2.3012633624878522, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.24630595743656158, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0025, | |
| "num_tokens": 192350156.0, | |
| "reward": 11.402837812900543, | |
| "reward_std": 0.8105970397591591, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8388476483523846, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.32804084848612547, | |
| "rewards/event_reward_fn/mean": 9.7060546875, | |
| "rewards/event_reward_fn/std": 5.547398820519447, | |
| "rewards/format_reward_fn/mean": 0.8579354099929333, | |
| "rewards/format_reward_fn/std": 0.33059023320674896, | |
| "step": 2368 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0537109375, | |
| "completions/max_length": 253.0625, | |
| "completions/max_terminated_length": 248.125, | |
| "completions/mean_length": 211.1552734375, | |
| "completions/mean_terminated_length": 208.6152687072754, | |
| "completions/min_length": 169.0, | |
| "completions/min_terminated_length": 169.0, | |
| "entropy": 0.07612166181206703, | |
| "epoch": 2.316812439261419, | |
| "frac_reward_zero_std": 0.390625, | |
| "grad_norm": 0.08691530674695969, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0024, | |
| "num_tokens": 193665307.0, | |
| "reward": 11.495616167783737, | |
| "reward_std": 0.7880423050373793, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.902044016867876, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.23614092892967165, | |
| "rewards/event_reward_fn/mean": 9.6865234375, | |
| "rewards/event_reward_fn/std": 6.028887152671814, | |
| "rewards/format_reward_fn/mean": 0.9070489220321178, | |
| "rewards/format_reward_fn/std": 0.24180734669789672, | |
| "step": 2384 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0185546875, | |
| "completions/max_length": 246.4375, | |
| "completions/max_terminated_length": 242.0625, | |
| "completions/mean_length": 193.6845703125, | |
| "completions/mean_terminated_length": 192.51368141174316, | |
| "completions/min_length": 148.6875, | |
| "completions/min_terminated_length": 148.6875, | |
| "entropy": 0.07832195260562003, | |
| "epoch": 2.3323615160349855, | |
| "frac_reward_zero_std": 0.40234375, | |
| "grad_norm": 0.1274639219045639, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0048, | |
| "num_tokens": 194920676.0, | |
| "reward": 11.40235447883606, | |
| "reward_std": 0.7331925742328167, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9401694796979427, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17933110590092838, | |
| "rewards/event_reward_fn/mean": 9.5068359375, | |
| "rewards/event_reward_fn/std": 5.196439817547798, | |
| "rewards/format_reward_fn/mean": 0.9553493969142437, | |
| "rewards/format_reward_fn/std": 0.172615127870813, | |
| "step": 2400 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0234375, | |
| "completions/max_length": 246.9375, | |
| "completions/max_terminated_length": 243.6875, | |
| "completions/mean_length": 197.6689453125, | |
| "completions/mean_terminated_length": 196.28760814666748, | |
| "completions/min_length": 158.0, | |
| "completions/min_terminated_length": 158.0, | |
| "entropy": 0.07638159766793251, | |
| "epoch": 2.347910592808552, | |
| "frac_reward_zero_std": 0.3671875, | |
| "grad_norm": 0.18019680678844452, | |
| "learning_rate": 5e-05, | |
| "loss": -0.002, | |
| "num_tokens": 196239697.0, | |
| "reward": 11.403923392295837, | |
| "reward_std": 0.7415321134030819, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9454397931694984, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.15600819129031152, | |
| "rewards/event_reward_fn/mean": 9.4951171875, | |
| "rewards/event_reward_fn/std": 5.713589310646057, | |
| "rewards/format_reward_fn/mean": 0.9633664786815643, | |
| "rewards/format_reward_fn/std": 0.14195893332362175, | |
| "step": 2416 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0927734375, | |
| "completions/max_length": 255.75, | |
| "completions/max_terminated_length": 252.25, | |
| "completions/mean_length": 216.240234375, | |
| "completions/mean_terminated_length": 212.49735260009766, | |
| "completions/min_length": 171.4375, | |
| "completions/min_terminated_length": 171.4375, | |
| "entropy": 0.07729306910187006, | |
| "epoch": 2.3634596695821184, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.1351216435432434, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0052, | |
| "num_tokens": 197585395.0, | |
| "reward": 11.078547358512878, | |
| "reward_std": 0.8885079212486744, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8728507719933987, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2855207370594144, | |
| "rewards/event_reward_fn/mean": 9.3125, | |
| "rewards/event_reward_fn/std": 5.477887436747551, | |
| "rewards/format_reward_fn/mean": 0.8931966163218021, | |
| "rewards/format_reward_fn/std": 0.2850890662521124, | |
| "step": 2432 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1064453125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 254.25, | |
| "completions/mean_length": 222.671875, | |
| "completions/mean_terminated_length": 218.8065061569214, | |
| "completions/min_length": 170.75, | |
| "completions/min_terminated_length": 170.75, | |
| "entropy": 0.07714059529826045, | |
| "epoch": 2.379008746355685, | |
| "frac_reward_zero_std": 0.359375, | |
| "grad_norm": 0.14632916450500488, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0014, | |
| "num_tokens": 198898579.0, | |
| "reward": 10.960965871810913, | |
| "reward_std": 0.8031115736812353, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8641464188694954, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3086322648450732, | |
| "rewards/event_reward_fn/mean": 9.2119140625, | |
| "rewards/event_reward_fn/std": 5.528163373470306, | |
| "rewards/format_reward_fn/mean": 0.8849051333963871, | |
| "rewards/format_reward_fn/std": 0.31144819781184196, | |
| "step": 2448 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0419921875, | |
| "completions/max_length": 251.8125, | |
| "completions/max_terminated_length": 247.875, | |
| "completions/mean_length": 210.1025390625, | |
| "completions/mean_terminated_length": 208.211838722229, | |
| "completions/min_length": 161.5625, | |
| "completions/min_terminated_length": 161.5625, | |
| "entropy": 0.07773100049234927, | |
| "epoch": 2.3945578231292517, | |
| "frac_reward_zero_std": 0.37890625, | |
| "grad_norm": 0.16118626296520233, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0013, | |
| "num_tokens": 200201952.0, | |
| "reward": 10.956557631492615, | |
| "reward_std": 0.7257527317851782, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9221826978027821, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.20850562094710767, | |
| "rewards/event_reward_fn/mean": 9.0927734375, | |
| "rewards/event_reward_fn/std": 5.140145808458328, | |
| "rewards/format_reward_fn/mean": 0.9416015632450581, | |
| "rewards/format_reward_fn/std": 0.1953780883923173, | |
| "step": 2464 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0224609375, | |
| "completions/max_length": 245.9375, | |
| "completions/max_terminated_length": 240.75, | |
| "completions/mean_length": 202.029296875, | |
| "completions/mean_terminated_length": 200.80841064453125, | |
| "completions/min_length": 159.5, | |
| "completions/min_terminated_length": 159.5, | |
| "entropy": 0.0753114647231996, | |
| "epoch": 2.4101068999028183, | |
| "frac_reward_zero_std": 0.41015625, | |
| "grad_norm": 0.15847159922122955, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0026, | |
| "num_tokens": 201522430.0, | |
| "reward": 11.323036313056946, | |
| "reward_std": 0.7887390460819006, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9379655607044697, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17426727525889874, | |
| "rewards/event_reward_fn/mean": 9.4345703125, | |
| "rewards/event_reward_fn/std": 5.223158270120621, | |
| "rewards/format_reward_fn/mean": 0.9505006894469261, | |
| "rewards/format_reward_fn/std": 0.1740011121146381, | |
| "step": 2480 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025390625, | |
| "completions/max_length": 250.125, | |
| "completions/max_terminated_length": 246.9375, | |
| "completions/mean_length": 205.435546875, | |
| "completions/mean_terminated_length": 204.02826595306396, | |
| "completions/min_length": 164.875, | |
| "completions/min_terminated_length": 164.875, | |
| "entropy": 0.07844416983425617, | |
| "epoch": 2.425655976676385, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.0907289907336235, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0048, | |
| "num_tokens": 202805596.0, | |
| "reward": 11.417901694774628, | |
| "reward_std": 0.9192124493420124, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9360267631709576, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18138159497175366, | |
| "rewards/event_reward_fn/mean": 9.52734375, | |
| "rewards/event_reward_fn/std": 5.153972968459129, | |
| "rewards/format_reward_fn/mean": 0.9545312523841858, | |
| "rewards/format_reward_fn/std": 0.17224382143467665, | |
| "step": 2496 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029947916666666668, | |
| "completions/max_length": 253.5, | |
| "completions/max_terminated_length": 249.08333333333334, | |
| "completions/mean_length": 210.64192708333334, | |
| "completions/mean_terminated_length": 209.20124689737955, | |
| "completions/min_length": 172.41666666666666, | |
| "completions/min_terminated_length": 172.41666666666666, | |
| "entropy": 0.08086393773555756, | |
| "epoch": 2.441205053449951, | |
| "frac_reward_zero_std": 0.3802083333333333, | |
| "grad_norm": 0.10650806128978729, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 204152564.0, | |
| "reward": 11.6739342212677, | |
| "reward_std": 0.807344543437163, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9262488782405853, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19760222919285297, | |
| "rewards/event_reward_fn/mean": 9.798177083333334, | |
| "rewards/event_reward_fn/std": 5.788699746131897, | |
| "rewards/format_reward_fn/mean": 0.9495081007480621, | |
| "rewards/format_reward_fn/std": 0.18829844643672308, | |
| "step": 2512 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0712890625, | |
| "completions/max_length": 254.8125, | |
| "completions/max_terminated_length": 251.625, | |
| "completions/mean_length": 213.2236328125, | |
| "completions/mean_terminated_length": 209.86785411834717, | |
| "completions/min_length": 171.125, | |
| "completions/min_terminated_length": 171.125, | |
| "entropy": 0.08136322861537337, | |
| "epoch": 2.456754130223518, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.2153819352388382, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 205500469.0, | |
| "reward": 12.446550607681274, | |
| "reward_std": 1.034587848931551, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8715435974299908, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.28421050729230046, | |
| "rewards/event_reward_fn/mean": 10.6826171875, | |
| "rewards/event_reward_fn/std": 6.132740959525108, | |
| "rewards/format_reward_fn/mean": 0.892389789223671, | |
| "rewards/format_reward_fn/std": 0.2872252073138952, | |
| "step": 2528 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.052734375, | |
| "completions/max_length": 255.5, | |
| "completions/max_terminated_length": 251.9375, | |
| "completions/mean_length": 216.9306640625, | |
| "completions/mean_terminated_length": 214.75403022766113, | |
| "completions/min_length": 176.1875, | |
| "completions/min_terminated_length": 176.1875, | |
| "entropy": 0.08582799974828959, | |
| "epoch": 2.4723032069970845, | |
| "frac_reward_zero_std": 0.26953125, | |
| "grad_norm": 0.1978168785572052, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0028, | |
| "num_tokens": 206796278.0, | |
| "reward": 11.944559633731842, | |
| "reward_std": 0.9803863354027271, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9094629287719727, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2257095631211996, | |
| "rewards/event_reward_fn/mean": 10.103515625, | |
| "rewards/event_reward_fn/std": 5.498953863978386, | |
| "rewards/format_reward_fn/mean": 0.9315809458494186, | |
| "rewards/format_reward_fn/std": 0.228111170232296, | |
| "step": 2544 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.099609375, | |
| "completions/max_length": 255.625, | |
| "completions/max_terminated_length": 250.5, | |
| "completions/mean_length": 217.259765625, | |
| "completions/mean_terminated_length": 213.05884075164795, | |
| "completions/min_length": 170.0, | |
| "completions/min_terminated_length": 170.0, | |
| "entropy": 0.08552914392203093, | |
| "epoch": 2.487852283770651, | |
| "frac_reward_zero_std": 0.28515625, | |
| "grad_norm": 0.16805820167064667, | |
| "learning_rate": 5e-05, | |
| "loss": 0.007, | |
| "num_tokens": 208168132.0, | |
| "reward": 11.79398000240326, | |
| "reward_std": 0.9481483921408653, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.868280190974474, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.28656442323699594, | |
| "rewards/event_reward_fn/mean": 10.037109375, | |
| "rewards/event_reward_fn/std": 6.124383822083473, | |
| "rewards/format_reward_fn/mean": 0.8885904960334301, | |
| "rewards/format_reward_fn/std": 0.28199191950261593, | |
| "step": 2560 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0458984375, | |
| "completions/max_length": 253.1875, | |
| "completions/max_terminated_length": 248.0, | |
| "completions/mean_length": 210.638671875, | |
| "completions/mean_terminated_length": 208.40313148498535, | |
| "completions/min_length": 173.625, | |
| "completions/min_terminated_length": 173.625, | |
| "entropy": 0.08260456612333655, | |
| "epoch": 2.503401360544218, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.09216822683811188, | |
| "learning_rate": 5e-05, | |
| "loss": -0.001, | |
| "num_tokens": 209501810.0, | |
| "reward": 11.016095101833344, | |
| "reward_std": 0.860798167064786, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9306312911212444, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19113765214569867, | |
| "rewards/event_reward_fn/mean": 9.1396484375, | |
| "rewards/event_reward_fn/std": 5.75250081717968, | |
| "rewards/format_reward_fn/mean": 0.9458155073225498, | |
| "rewards/format_reward_fn/std": 0.19293752522207797, | |
| "step": 2576 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01953125, | |
| "completions/max_length": 247.375, | |
| "completions/max_terminated_length": 242.875, | |
| "completions/mean_length": 203.1708984375, | |
| "completions/mean_terminated_length": 202.13115978240967, | |
| "completions/min_length": 163.9375, | |
| "completions/min_terminated_length": 163.9375, | |
| "entropy": 0.08376244455575943, | |
| "epoch": 2.518950437317784, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.21585437655448914, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0033, | |
| "num_tokens": 210795597.0, | |
| "reward": 10.72483429312706, | |
| "reward_std": 0.7541004437953234, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9423710107803345, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14322513493243605, | |
| "rewards/event_reward_fn/mean": 8.8203125, | |
| "rewards/event_reward_fn/std": 5.188908696174622, | |
| "rewards/format_reward_fn/mean": 0.9621507674455643, | |
| "rewards/format_reward_fn/std": 0.13946166937239468, | |
| "step": 2592 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0595703125, | |
| "completions/max_length": 253.9375, | |
| "completions/max_terminated_length": 249.0, | |
| "completions/mean_length": 210.505859375, | |
| "completions/mean_terminated_length": 207.62176704406738, | |
| "completions/min_length": 173.25, | |
| "completions/min_terminated_length": 173.25, | |
| "entropy": 0.08947332156822085, | |
| "epoch": 2.534499514091351, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.14945168793201447, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0063, | |
| "num_tokens": 212112795.0, | |
| "reward": 11.506966352462769, | |
| "reward_std": 0.794132512062788, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9178526736795902, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.22195658483542502, | |
| "rewards/event_reward_fn/mean": 9.6572265625, | |
| "rewards/event_reward_fn/std": 5.744891852140427, | |
| "rewards/format_reward_fn/mean": 0.9318870939314365, | |
| "rewards/format_reward_fn/std": 0.2204800380859524, | |
| "step": 2608 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0888671875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 250.4375, | |
| "completions/mean_length": 216.943359375, | |
| "completions/mean_terminated_length": 213.17963314056396, | |
| "completions/min_length": 173.0625, | |
| "completions/min_terminated_length": 173.0625, | |
| "entropy": 0.08383294614031911, | |
| "epoch": 2.5500485908649173, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.1446155160665512, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0028, | |
| "num_tokens": 213441821.0, | |
| "reward": 11.958227455615997, | |
| "reward_std": 0.8823277465999126, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8775606565177441, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2921582367271185, | |
| "rewards/event_reward_fn/mean": 10.185546875, | |
| "rewards/event_reward_fn/std": 5.809098601341248, | |
| "rewards/format_reward_fn/mean": 0.8951199762523174, | |
| "rewards/format_reward_fn/std": 0.29349780175834894, | |
| "step": 2624 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.052734375, | |
| "completions/max_length": 253.0, | |
| "completions/max_terminated_length": 251.0625, | |
| "completions/mean_length": 213.916015625, | |
| "completions/mean_terminated_length": 211.6506052017212, | |
| "completions/min_length": 178.5, | |
| "completions/min_terminated_length": 178.5, | |
| "entropy": 0.08359973039478064, | |
| "epoch": 2.565597667638484, | |
| "frac_reward_zero_std": 0.28125, | |
| "grad_norm": 0.16694338619709015, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 214813067.0, | |
| "reward": 11.792637586593628, | |
| "reward_std": 0.8656186051666737, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8819389827549458, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.24387728050351143, | |
| "rewards/event_reward_fn/mean": 9.998046875, | |
| "rewards/event_reward_fn/std": 5.933807298541069, | |
| "rewards/format_reward_fn/mean": 0.9126519113779068, | |
| "rewards/format_reward_fn/std": 0.2305635418742895, | |
| "step": 2640 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0771484375, | |
| "completions/max_length": 255.4375, | |
| "completions/max_terminated_length": 249.875, | |
| "completions/mean_length": 215.8505859375, | |
| "completions/mean_terminated_length": 212.57020092010498, | |
| "completions/min_length": 178.0, | |
| "completions/min_terminated_length": 178.0, | |
| "entropy": 0.08900781767442822, | |
| "epoch": 2.5811467444120506, | |
| "frac_reward_zero_std": 0.29296875, | |
| "grad_norm": 0.1385410875082016, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 216172750.0, | |
| "reward": 11.35420310497284, | |
| "reward_std": 0.8416576944291592, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8935875110328197, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.26079373457469046, | |
| "rewards/event_reward_fn/mean": 9.5556640625, | |
| "rewards/event_reward_fn/std": 5.99031862616539, | |
| "rewards/format_reward_fn/mean": 0.904951486736536, | |
| "rewards/format_reward_fn/std": 0.2644943995401263, | |
| "step": 2656 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0361328125, | |
| "completions/max_length": 254.625, | |
| "completions/max_terminated_length": 249.75, | |
| "completions/mean_length": 212.0947265625, | |
| "completions/mean_terminated_length": 210.445143699646, | |
| "completions/min_length": 177.1875, | |
| "completions/min_terminated_length": 177.1875, | |
| "entropy": 0.08960987254977226, | |
| "epoch": 2.5966958211856173, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.15648344159126282, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0023, | |
| "num_tokens": 217499543.0, | |
| "reward": 11.51008290052414, | |
| "reward_std": 0.7766602244228125, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9093844145536423, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.23443537193816155, | |
| "rewards/event_reward_fn/mean": 9.6806640625, | |
| "rewards/event_reward_fn/std": 5.529717803001404, | |
| "rewards/format_reward_fn/mean": 0.9200344160199165, | |
| "rewards/format_reward_fn/std": 0.23509666486643255, | |
| "step": 2672 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.037109375, | |
| "completions/max_length": 251.6875, | |
| "completions/max_terminated_length": 247.0, | |
| "completions/mean_length": 208.2880859375, | |
| "completions/mean_terminated_length": 206.4787950515747, | |
| "completions/min_length": 165.625, | |
| "completions/min_terminated_length": 165.625, | |
| "entropy": 0.09177634166553617, | |
| "epoch": 2.612244897959184, | |
| "frac_reward_zero_std": 0.30078125, | |
| "grad_norm": 0.14749974012374878, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 218822370.0, | |
| "reward": 11.045877933502197, | |
| "reward_std": 0.9622980132699013, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9265813454985619, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.20256941742263734, | |
| "rewards/event_reward_fn/mean": 9.1845703125, | |
| "rewards/event_reward_fn/std": 5.212202668190002, | |
| "rewards/format_reward_fn/mean": 0.9347261041402817, | |
| "rewards/format_reward_fn/std": 0.2086858821567148, | |
| "step": 2688 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01953125, | |
| "completions/max_length": 247.8125, | |
| "completions/max_terminated_length": 243.4375, | |
| "completions/mean_length": 200.87890625, | |
| "completions/mean_terminated_length": 199.8343276977539, | |
| "completions/min_length": 167.4375, | |
| "completions/min_terminated_length": 167.4375, | |
| "entropy": 0.09632771136239171, | |
| "epoch": 2.62779397473275, | |
| "frac_reward_zero_std": 0.28515625, | |
| "grad_norm": 0.1771780103445053, | |
| "learning_rate": 5e-05, | |
| "loss": -0.008, | |
| "num_tokens": 220135962.0, | |
| "reward": 12.231472432613373, | |
| "reward_std": 0.8915320560336113, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9349792711436749, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16558197524864227, | |
| "rewards/event_reward_fn/mean": 10.35546875, | |
| "rewards/event_reward_fn/std": 5.747018381953239, | |
| "rewards/format_reward_fn/mean": 0.9410244673490524, | |
| "rewards/format_reward_fn/std": 0.17926215915940702, | |
| "step": 2704 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.044921875, | |
| "completions/max_length": 252.8125, | |
| "completions/max_terminated_length": 246.3125, | |
| "completions/mean_length": 209.3701171875, | |
| "completions/mean_terminated_length": 207.07564544677734, | |
| "completions/min_length": 170.625, | |
| "completions/min_terminated_length": 170.625, | |
| "entropy": 0.10564424749463797, | |
| "epoch": 2.6433430515063168, | |
| "frac_reward_zero_std": 0.22265625, | |
| "grad_norm": 0.10102769732475281, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0013, | |
| "num_tokens": 221542285.0, | |
| "reward": 12.009974837303162, | |
| "reward_std": 0.9928734712302685, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9175033271312714, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1976611790014431, | |
| "rewards/event_reward_fn/mean": 10.1767578125, | |
| "rewards/event_reward_fn/std": 6.012263968586922, | |
| "rewards/format_reward_fn/mean": 0.9157139807939529, | |
| "rewards/format_reward_fn/std": 0.21656434168107808, | |
| "step": 2720 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03515625, | |
| "completions/max_length": 255.0625, | |
| "completions/max_terminated_length": 253.375, | |
| "completions/mean_length": 213.7919921875, | |
| "completions/mean_terminated_length": 212.31354141235352, | |
| "completions/min_length": 174.5, | |
| "completions/min_terminated_length": 174.5, | |
| "entropy": 0.10326679470017552, | |
| "epoch": 2.6588921282798834, | |
| "frac_reward_zero_std": 0.23828125, | |
| "grad_norm": 0.15221992135047913, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0011, | |
| "num_tokens": 222797308.0, | |
| "reward": 11.387903690338135, | |
| "reward_std": 0.911373607814312, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9273334704339504, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18763835495337844, | |
| "rewards/event_reward_fn/mean": 9.5263671875, | |
| "rewards/event_reward_fn/std": 5.706304341554642, | |
| "rewards/format_reward_fn/mean": 0.9342031031847, | |
| "rewards/format_reward_fn/std": 0.2074666447006166, | |
| "step": 2736 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0224609375, | |
| "completions/max_length": 250.375, | |
| "completions/max_terminated_length": 247.4375, | |
| "completions/mean_length": 207.4228515625, | |
| "completions/mean_terminated_length": 206.33260917663574, | |
| "completions/min_length": 170.25, | |
| "completions/min_terminated_length": 170.25, | |
| "entropy": 0.09272929606959224, | |
| "epoch": 2.67444120505345, | |
| "frac_reward_zero_std": 0.30859375, | |
| "grad_norm": 0.21459202468395233, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0035, | |
| "num_tokens": 224091517.0, | |
| "reward": 11.830156862735748, | |
| "reward_std": 0.8045283071696758, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9445540346205235, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.15531712002120912, | |
| "rewards/event_reward_fn/mean": 9.9248046875, | |
| "rewards/event_reward_fn/std": 5.416922226548195, | |
| "rewards/format_reward_fn/mean": 0.9607979953289032, | |
| "rewards/format_reward_fn/std": 0.1495908577926457, | |
| "step": 2752 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 250.4375, | |
| "completions/max_terminated_length": 247.125, | |
| "completions/mean_length": 213.107421875, | |
| "completions/mean_terminated_length": 211.65931701660156, | |
| "completions/min_length": 173.0625, | |
| "completions/min_terminated_length": 173.0625, | |
| "entropy": 0.08341792924329638, | |
| "epoch": 2.6899902818270167, | |
| "frac_reward_zero_std": 0.3515625, | |
| "grad_norm": 0.06316018104553223, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0016, | |
| "num_tokens": 225405591.0, | |
| "reward": 11.755984246730804, | |
| "reward_std": 0.8010260127484798, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9238302148878574, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17838482139632106, | |
| "rewards/event_reward_fn/mean": 9.8876953125, | |
| "rewards/event_reward_fn/std": 5.315806642174721, | |
| "rewards/format_reward_fn/mean": 0.9444587081670761, | |
| "rewards/format_reward_fn/std": 0.16798695269972086, | |
| "step": 2768 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0849609375, | |
| "completions/max_length": 255.375, | |
| "completions/max_terminated_length": 251.875, | |
| "completions/mean_length": 216.7353515625, | |
| "completions/mean_terminated_length": 213.09019565582275, | |
| "completions/min_length": 174.0625, | |
| "completions/min_terminated_length": 174.0625, | |
| "entropy": 0.08323041070252657, | |
| "epoch": 2.705539358600583, | |
| "frac_reward_zero_std": 0.3515625, | |
| "grad_norm": 0.2660459578037262, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0082, | |
| "num_tokens": 226754744.0, | |
| "reward": 11.574803471565247, | |
| "reward_std": 0.8111933209002018, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8713752776384354, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.28300391032826155, | |
| "rewards/event_reward_fn/mean": 9.8173828125, | |
| "rewards/event_reward_fn/std": 5.8233465403318405, | |
| "rewards/format_reward_fn/mean": 0.8860453926026821, | |
| "rewards/format_reward_fn/std": 0.28504633717238903, | |
| "step": 2784 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0537109375, | |
| "completions/max_length": 254.125, | |
| "completions/max_terminated_length": 250.625, | |
| "completions/mean_length": 212.8642578125, | |
| "completions/mean_terminated_length": 210.45547103881836, | |
| "completions/min_length": 169.0, | |
| "completions/min_terminated_length": 169.0, | |
| "entropy": 0.08199881995096803, | |
| "epoch": 2.7210884353741496, | |
| "frac_reward_zero_std": 0.296875, | |
| "grad_norm": 0.08463400602340698, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 228098621.0, | |
| "reward": 11.454033315181732, | |
| "reward_std": 0.836145743727684, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9158100821077824, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19612007169052958, | |
| "rewards/event_reward_fn/mean": 9.603515625, | |
| "rewards/event_reward_fn/std": 5.212661325931549, | |
| "rewards/format_reward_fn/mean": 0.9347075000405312, | |
| "rewards/format_reward_fn/std": 0.1920458609238267, | |
| "step": 2800 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0380859375, | |
| "completions/max_length": 254.125, | |
| "completions/max_terminated_length": 249.0625, | |
| "completions/mean_length": 209.9765625, | |
| "completions/mean_terminated_length": 208.1564769744873, | |
| "completions/min_length": 168.875, | |
| "completions/min_terminated_length": 168.875, | |
| "entropy": 0.0759361800737679, | |
| "epoch": 2.7366375121477162, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.14759230613708496, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0053, | |
| "num_tokens": 229429565.0, | |
| "reward": 12.068866312503815, | |
| "reward_std": 0.8678888715803623, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9354286342859268, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19999602530151606, | |
| "rewards/event_reward_fn/mean": 10.185546875, | |
| "rewards/event_reward_fn/std": 6.09708933532238, | |
| "rewards/format_reward_fn/mean": 0.9478906244039536, | |
| "rewards/format_reward_fn/std": 0.2007538639008999, | |
| "step": 2816 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.07421875, | |
| "completions/max_length": 255.0, | |
| "completions/max_terminated_length": 251.4375, | |
| "completions/mean_length": 217.4892578125, | |
| "completions/mean_terminated_length": 214.5166711807251, | |
| "completions/min_length": 171.125, | |
| "completions/min_terminated_length": 171.125, | |
| "entropy": 0.07404683344066143, | |
| "epoch": 2.752186588921283, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.18848936259746552, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0016, | |
| "num_tokens": 230773106.0, | |
| "reward": 12.326741218566895, | |
| "reward_std": 0.9671976566314697, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.89621976390481, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.23690359899774194, | |
| "rewards/event_reward_fn/mean": 10.515625, | |
| "rewards/event_reward_fn/std": 5.634042501449585, | |
| "rewards/format_reward_fn/mean": 0.9148964546620846, | |
| "rewards/format_reward_fn/std": 0.2338833932299167, | |
| "step": 2832 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.060546875, | |
| "completions/max_length": 255.875, | |
| "completions/max_terminated_length": 252.625, | |
| "completions/mean_length": 217.236328125, | |
| "completions/mean_terminated_length": 214.71324062347412, | |
| "completions/min_length": 172.9375, | |
| "completions/min_terminated_length": 172.9375, | |
| "entropy": 0.08189457282423973, | |
| "epoch": 2.7677356656948495, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.20657600462436676, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0028, | |
| "num_tokens": 232070576.0, | |
| "reward": 11.5172398686409, | |
| "reward_std": 0.8970336727797985, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9108828380703926, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.22826198721304536, | |
| "rewards/event_reward_fn/mean": 9.673828125, | |
| "rewards/event_reward_fn/std": 5.733745768666267, | |
| "rewards/format_reward_fn/mean": 0.9325288347899914, | |
| "rewards/format_reward_fn/std": 0.226588967256248, | |
| "step": 2848 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.056640625, | |
| "completions/max_length": 252.5625, | |
| "completions/max_terminated_length": 249.4375, | |
| "completions/mean_length": 213.166015625, | |
| "completions/mean_terminated_length": 210.57254600524902, | |
| "completions/min_length": 167.75, | |
| "completions/min_terminated_length": 167.75, | |
| "entropy": 0.08049681456759572, | |
| "epoch": 2.7832847424684157, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.05886400490999222, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0027, | |
| "num_tokens": 233415010.0, | |
| "reward": 11.385997593402863, | |
| "reward_std": 0.7553573679178953, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9164049662649632, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2016591742867604, | |
| "rewards/event_reward_fn/mean": 9.53515625, | |
| "rewards/event_reward_fn/std": 5.440419033169746, | |
| "rewards/format_reward_fn/mean": 0.93443638458848, | |
| "rewards/format_reward_fn/std": 0.19143922347575426, | |
| "step": 2864 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0498046875, | |
| "completions/max_length": 254.3125, | |
| "completions/max_terminated_length": 250.375, | |
| "completions/mean_length": 214.185546875, | |
| "completions/mean_terminated_length": 212.15652561187744, | |
| "completions/min_length": 172.1875, | |
| "completions/min_terminated_length": 172.1875, | |
| "entropy": 0.0816779644228518, | |
| "epoch": 2.7988338192419824, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.0916222557425499, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0032, | |
| "num_tokens": 234744436.0, | |
| "reward": 11.663362562656403, | |
| "reward_std": 0.9015852566808462, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9179877303540707, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2022923786425963, | |
| "rewards/event_reward_fn/mean": 9.8125, | |
| "rewards/event_reward_fn/std": 5.319433629512787, | |
| "rewards/format_reward_fn/mean": 0.9328748136758804, | |
| "rewards/format_reward_fn/std": 0.20254582911729813, | |
| "step": 2880 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0966796875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 254.5625, | |
| "completions/mean_length": 220.1044921875, | |
| "completions/mean_terminated_length": 216.32228183746338, | |
| "completions/min_length": 171.5, | |
| "completions/min_terminated_length": 171.5, | |
| "entropy": 0.08011228078976274, | |
| "epoch": 2.814382896015549, | |
| "frac_reward_zero_std": 0.3515625, | |
| "grad_norm": 0.08450505882501602, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0032, | |
| "num_tokens": 236075035.0, | |
| "reward": 11.988969624042511, | |
| "reward_std": 0.7974276356399059, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8782762736082077, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2924462389200926, | |
| "rewards/event_reward_fn/mean": 10.2158203125, | |
| "rewards/event_reward_fn/std": 5.5798052698373795, | |
| "rewards/format_reward_fn/mean": 0.8948730453848839, | |
| "rewards/format_reward_fn/std": 0.2953194109722972, | |
| "step": 2896 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0849609375, | |
| "completions/max_length": 255.875, | |
| "completions/max_terminated_length": 254.3125, | |
| "completions/mean_length": 219.5263671875, | |
| "completions/mean_terminated_length": 216.11603832244873, | |
| "completions/min_length": 175.8125, | |
| "completions/min_terminated_length": 175.8125, | |
| "entropy": 0.08505099918693304, | |
| "epoch": 2.8299319727891157, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.12416191399097443, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0027, | |
| "num_tokens": 237421442.0, | |
| "reward": 11.693400919437408, | |
| "reward_std": 0.8284243606030941, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8833912238478661, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.267708154162392, | |
| "rewards/event_reward_fn/mean": 9.90625, | |
| "rewards/event_reward_fn/std": 5.605697572231293, | |
| "rewards/format_reward_fn/mean": 0.9037597663700581, | |
| "rewards/format_reward_fn/std": 0.2683409294113517, | |
| "step": 2912 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.033203125, | |
| "completions/max_length": 254.5625, | |
| "completions/max_terminated_length": 252.0, | |
| "completions/mean_length": 213.083984375, | |
| "completions/mean_terminated_length": 211.6382074356079, | |
| "completions/min_length": 169.9375, | |
| "completions/min_terminated_length": 169.9375, | |
| "entropy": 0.08473130548372865, | |
| "epoch": 2.8454810495626823, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.06745623797178268, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 238715276.0, | |
| "reward": 11.77674776315689, | |
| "reward_std": 0.685878150165081, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9190886318683624, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19958114624023438, | |
| "rewards/event_reward_fn/mean": 9.9150390625, | |
| "rewards/event_reward_fn/std": 5.207145616412163, | |
| "rewards/format_reward_fn/mean": 0.9426199793815613, | |
| "rewards/format_reward_fn/std": 0.1927571757696569, | |
| "step": 2928 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05078125, | |
| "completions/max_length": 254.25, | |
| "completions/max_terminated_length": 250.6875, | |
| "completions/mean_length": 214.3232421875, | |
| "completions/mean_terminated_length": 212.1032657623291, | |
| "completions/min_length": 169.9375, | |
| "completions/min_terminated_length": 169.9375, | |
| "entropy": 0.08542184252291918, | |
| "epoch": 2.8610301263362485, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.10482887178659439, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 240057215.0, | |
| "reward": 11.639575242996216, | |
| "reward_std": 0.9158763885498047, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8979970328509808, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.23725404776632786, | |
| "rewards/event_reward_fn/mean": 9.826171875, | |
| "rewards/event_reward_fn/std": 5.315482467412949, | |
| "rewards/format_reward_fn/mean": 0.9154064357280731, | |
| "rewards/format_reward_fn/std": 0.2300750371068716, | |
| "step": 2944 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.103515625, | |
| "completions/max_length": 255.5, | |
| "completions/max_terminated_length": 253.1875, | |
| "completions/mean_length": 220.142578125, | |
| "completions/mean_terminated_length": 216.11438083648682, | |
| "completions/min_length": 169.6875, | |
| "completions/min_terminated_length": 169.6875, | |
| "entropy": 0.0917358947917819, | |
| "epoch": 2.8765792031098156, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.29249680042266846, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0023, | |
| "num_tokens": 241437397.0, | |
| "reward": 12.01733946800232, | |
| "reward_std": 0.8955757319927216, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8503146581351757, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3139411583542824, | |
| "rewards/event_reward_fn/mean": 10.298828125, | |
| "rewards/event_reward_fn/std": 5.663209050893784, | |
| "rewards/format_reward_fn/mean": 0.868196614086628, | |
| "rewards/format_reward_fn/std": 0.318668226711452, | |
| "step": 2960 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0791015625, | |
| "completions/max_length": 254.0, | |
| "completions/max_terminated_length": 250.0, | |
| "completions/mean_length": 215.6435546875, | |
| "completions/mean_terminated_length": 212.3185043334961, | |
| "completions/min_length": 165.4375, | |
| "completions/min_terminated_length": 165.4375, | |
| "entropy": 0.09182127751410007, | |
| "epoch": 2.892128279883382, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.17700594663619995, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 242749992.0, | |
| "reward": 11.478153705596924, | |
| "reward_std": 0.8844601437449455, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.880519162863493, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.25536160822957754, | |
| "rewards/event_reward_fn/mean": 9.697265625, | |
| "rewards/event_reward_fn/std": 5.846217334270477, | |
| "rewards/format_reward_fn/mean": 0.9003689214587212, | |
| "rewards/format_reward_fn/std": 0.24951867014169693, | |
| "step": 2976 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0615234375, | |
| "completions/max_length": 255.5625, | |
| "completions/max_terminated_length": 251.8125, | |
| "completions/mean_length": 214.3974609375, | |
| "completions/mean_terminated_length": 211.83877277374268, | |
| "completions/min_length": 166.3125, | |
| "completions/min_terminated_length": 166.3125, | |
| "entropy": 0.0888472800143063, | |
| "epoch": 2.9076773566569485, | |
| "frac_reward_zero_std": 0.359375, | |
| "grad_norm": 0.18143412470817566, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0024, | |
| "num_tokens": 244089207.0, | |
| "reward": 11.602717459201813, | |
| "reward_std": 0.7762532383203506, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8992017544806004, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.23547889525070786, | |
| "rewards/event_reward_fn/mean": 9.78125, | |
| "rewards/event_reward_fn/std": 5.857791095972061, | |
| "rewards/format_reward_fn/mean": 0.9222656264901161, | |
| "rewards/format_reward_fn/std": 0.2304223021492362, | |
| "step": 2992 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0234375, | |
| "completions/max_length": 247.5, | |
| "completions/max_terminated_length": 245.1875, | |
| "completions/mean_length": 204.5185546875, | |
| "completions/mean_terminated_length": 203.2769651412964, | |
| "completions/min_length": 161.625, | |
| "completions/min_terminated_length": 161.625, | |
| "entropy": 0.08608831372112036, | |
| "epoch": 2.923226433430515, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.18656234443187714, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 245366298.0, | |
| "reward": 11.507224977016449, | |
| "reward_std": 0.8891939371824265, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9319907054305077, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16323891701176763, | |
| "rewards/event_reward_fn/mean": 9.6142578125, | |
| "rewards/event_reward_fn/std": 6.0165297240018845, | |
| "rewards/format_reward_fn/mean": 0.9609765633940697, | |
| "rewards/format_reward_fn/std": 0.1338434610515833, | |
| "step": 3008 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.013671875, | |
| "completions/max_length": 243.125, | |
| "completions/max_terminated_length": 239.3125, | |
| "completions/mean_length": 190.7744140625, | |
| "completions/mean_terminated_length": 189.83202362060547, | |
| "completions/min_length": 149.75, | |
| "completions/min_terminated_length": 149.75, | |
| "entropy": 0.0839080074802041, | |
| "epoch": 2.938775510204082, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.16527223587036133, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0029, | |
| "num_tokens": 246703351.0, | |
| "reward": 11.010808229446411, | |
| "reward_std": 0.8288077171891928, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9462215937674046, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14411098731216043, | |
| "rewards/event_reward_fn/mean": 9.1025390625, | |
| "rewards/event_reward_fn/std": 5.691614359617233, | |
| "rewards/format_reward_fn/mean": 0.9620475210249424, | |
| "rewards/format_reward_fn/std": 0.13395208539441228, | |
| "step": 3024 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0146484375, | |
| "completions/max_length": 246.125, | |
| "completions/max_terminated_length": 240.25, | |
| "completions/mean_length": 197.7421875, | |
| "completions/mean_terminated_length": 196.85297679901123, | |
| "completions/min_length": 157.125, | |
| "completions/min_terminated_length": 157.125, | |
| "entropy": 0.0805180431343615, | |
| "epoch": 2.9543245869776484, | |
| "frac_reward_zero_std": 0.390625, | |
| "grad_norm": 0.16700057685375214, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0039, | |
| "num_tokens": 247982459.0, | |
| "reward": 11.723504066467285, | |
| "reward_std": 0.775495782494545, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9559190906584263, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.11062386812409386, | |
| "rewards/event_reward_fn/mean": 9.7900390625, | |
| "rewards/event_reward_fn/std": 5.5720172971487045, | |
| "rewards/format_reward_fn/mean": 0.9775458797812462, | |
| "rewards/format_reward_fn/std": 0.08744857460260391, | |
| "step": 3040 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 252.125, | |
| "completions/max_terminated_length": 244.875, | |
| "completions/mean_length": 207.955078125, | |
| "completions/mean_terminated_length": 206.359375, | |
| "completions/min_length": 170.625, | |
| "completions/min_terminated_length": 170.625, | |
| "entropy": 0.07879460602998734, | |
| "epoch": 2.9698736637512146, | |
| "frac_reward_zero_std": 0.30078125, | |
| "grad_norm": 0.1064968854188919, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 249284553.0, | |
| "reward": 12.131292760372162, | |
| "reward_std": 0.7837562952190638, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9279702864587307, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17746176407672465, | |
| "rewards/event_reward_fn/mean": 10.2548828125, | |
| "rewards/event_reward_fn/std": 5.574135601520538, | |
| "rewards/format_reward_fn/mean": 0.9484398253262043, | |
| "rewards/format_reward_fn/std": 0.1754506565630436, | |
| "step": 3056 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0146484375, | |
| "completions/max_length": 240.0, | |
| "completions/max_terminated_length": 236.1875, | |
| "completions/mean_length": 195.7041015625, | |
| "completions/mean_terminated_length": 194.84180450439453, | |
| "completions/min_length": 159.0625, | |
| "completions/min_terminated_length": 159.0625, | |
| "entropy": 0.07347549963742495, | |
| "epoch": 2.9854227405247813, | |
| "frac_reward_zero_std": 0.359375, | |
| "grad_norm": 0.12872080504894257, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0021, | |
| "num_tokens": 250584122.0, | |
| "reward": 11.6422598361969, | |
| "reward_std": 0.8738533556461334, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9514190852642059, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.12013476574793458, | |
| "rewards/event_reward_fn/mean": 9.7216796875, | |
| "rewards/event_reward_fn/std": 4.768230766057968, | |
| "rewards/format_reward_fn/mean": 0.9691610857844353, | |
| "rewards/format_reward_fn/std": 0.10613342700526118, | |
| "step": 3072 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0107421875, | |
| "completions/max_length": 234.25, | |
| "completions/max_terminated_length": 229.25, | |
| "completions/mean_length": 173.66015625, | |
| "completions/mean_terminated_length": 172.75994396209717, | |
| "completions/min_length": 120.375, | |
| "completions/min_terminated_length": 120.375, | |
| "entropy": 0.06631791149266064, | |
| "epoch": 3.000971817298348, | |
| "frac_reward_zero_std": 0.37890625, | |
| "grad_norm": 0.268284410238266, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0048, | |
| "num_tokens": 251857282.0, | |
| "reward": 11.125836312770844, | |
| "reward_std": 0.8690453059971333, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9682918414473534, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.09923727967543527, | |
| "rewards/event_reward_fn/mean": 9.181640625, | |
| "rewards/event_reward_fn/std": 5.686726421117783, | |
| "rewards/format_reward_fn/mean": 0.9759038686752319, | |
| "rewards/format_reward_fn/std": 0.10093728080391884, | |
| "step": 3088 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.021484375, | |
| "completions/max_length": 236.875, | |
| "completions/max_terminated_length": 231.75, | |
| "completions/mean_length": 185.73828125, | |
| "completions/mean_terminated_length": 184.20531558990479, | |
| "completions/min_length": 140.75, | |
| "completions/min_terminated_length": 140.75, | |
| "entropy": 0.07297877874225378, | |
| "epoch": 3.0165208940719146, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.14597758650779724, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0027, | |
| "num_tokens": 253203846.0, | |
| "reward": 11.508928120136261, | |
| "reward_std": 0.8718460761010647, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9537964537739754, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.133202571363654, | |
| "rewards/event_reward_fn/mean": 9.58984375, | |
| "rewards/event_reward_fn/std": 5.781486123800278, | |
| "rewards/format_reward_fn/mean": 0.9652878567576408, | |
| "rewards/format_reward_fn/std": 0.11640464654192328, | |
| "step": 3104 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0234375, | |
| "completions/max_length": 246.5625, | |
| "completions/max_terminated_length": 239.75, | |
| "completions/mean_length": 192.57421875, | |
| "completions/mean_terminated_length": 191.06838130950928, | |
| "completions/min_length": 151.75, | |
| "completions/min_terminated_length": 151.75, | |
| "entropy": 0.07239698874764144, | |
| "epoch": 3.0320699708454812, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.15297071635723114, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0015, | |
| "num_tokens": 254532774.0, | |
| "reward": 12.089753448963165, | |
| "reward_std": 0.8900428749620914, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9412974454462528, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17066996253561229, | |
| "rewards/event_reward_fn/mean": 10.1943359375, | |
| "rewards/event_reward_fn/std": 6.097415968775749, | |
| "rewards/format_reward_fn/mean": 0.9541201665997505, | |
| "rewards/format_reward_fn/std": 0.16532070748507977, | |
| "step": 3120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.017578125, | |
| "completions/max_length": 236.9375, | |
| "completions/max_terminated_length": 232.375, | |
| "completions/mean_length": 185.283203125, | |
| "completions/mean_terminated_length": 183.95048999786377, | |
| "completions/min_length": 140.375, | |
| "completions/min_terminated_length": 140.375, | |
| "entropy": 0.06962199346162379, | |
| "epoch": 3.0476190476190474, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.1586209386587143, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0013, | |
| "num_tokens": 255823680.0, | |
| "reward": 11.430478930473328, | |
| "reward_std": 0.8875509612262249, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9428003318607807, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14735072664916515, | |
| "rewards/event_reward_fn/mean": 9.525390625, | |
| "rewards/event_reward_fn/std": 5.551691547036171, | |
| "rewards/format_reward_fn/mean": 0.9622879475355148, | |
| "rewards/format_reward_fn/std": 0.13102243188768625, | |
| "step": 3136 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0107421875, | |
| "completions/max_length": 241.5, | |
| "completions/max_terminated_length": 237.625, | |
| "completions/mean_length": 194.337890625, | |
| "completions/mean_terminated_length": 193.67024612426758, | |
| "completions/min_length": 160.5, | |
| "completions/min_terminated_length": 160.5, | |
| "entropy": 0.07421417301520705, | |
| "epoch": 3.063168124392614, | |
| "frac_reward_zero_std": 0.30859375, | |
| "grad_norm": 0.32556113600730896, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0055, | |
| "num_tokens": 257162266.0, | |
| "reward": 11.765808463096619, | |
| "reward_std": 0.8776319213211536, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9556044563651085, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.12997814314439893, | |
| "rewards/event_reward_fn/mean": 9.837890625, | |
| "rewards/event_reward_fn/std": 5.4677809327840805, | |
| "rewards/format_reward_fn/mean": 0.972313366830349, | |
| "rewards/format_reward_fn/std": 0.11132679507136345, | |
| "step": 3152 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 246.4375, | |
| "completions/max_terminated_length": 241.25, | |
| "completions/mean_length": 201.2490234375, | |
| "completions/mean_terminated_length": 200.61123752593994, | |
| "completions/min_length": 161.0625, | |
| "completions/min_terminated_length": 161.0625, | |
| "entropy": 0.08062579715624452, | |
| "epoch": 3.0787172011661808, | |
| "frac_reward_zero_std": 0.31640625, | |
| "grad_norm": 0.0818430706858635, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0011, | |
| "num_tokens": 258469721.0, | |
| "reward": 12.113971889019012, | |
| "reward_std": 0.8606467135250568, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9496653489768505, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13710944051854312, | |
| "rewards/event_reward_fn/mean": 10.19921875, | |
| "rewards/event_reward_fn/std": 5.852348044514656, | |
| "rewards/format_reward_fn/mean": 0.9650877378880978, | |
| "rewards/format_reward_fn/std": 0.12316453643143177, | |
| "step": 3168 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 249.8125, | |
| "completions/max_terminated_length": 245.25, | |
| "completions/mean_length": 207.7333984375, | |
| "completions/mean_terminated_length": 206.9770908355713, | |
| "completions/min_length": 165.0, | |
| "completions/min_terminated_length": 165.0, | |
| "entropy": 0.07632905803620815, | |
| "epoch": 3.0942662779397474, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.20121721923351288, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 259793840.0, | |
| "reward": 11.953573882579803, | |
| "reward_std": 0.7916774693876505, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9334963597357273, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18081966822501272, | |
| "rewards/event_reward_fn/mean": 10.07421875, | |
| "rewards/event_reward_fn/std": 5.564135581254959, | |
| "rewards/format_reward_fn/mean": 0.9458589181303978, | |
| "rewards/format_reward_fn/std": 0.1874212771654129, | |
| "step": 3184 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.04296875, | |
| "completions/max_length": 254.1875, | |
| "completions/max_terminated_length": 251.4375, | |
| "completions/mean_length": 213.3369140625, | |
| "completions/mean_terminated_length": 211.478759765625, | |
| "completions/min_length": 170.6875, | |
| "completions/min_terminated_length": 170.6875, | |
| "entropy": 0.07402227586135268, | |
| "epoch": 3.109815354713314, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.17332999408245087, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 261173137.0, | |
| "reward": 12.101770102977753, | |
| "reward_std": 0.8903996516019106, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9108825102448463, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.22369781765155494, | |
| "rewards/event_reward_fn/mean": 10.2685546875, | |
| "rewards/event_reward_fn/std": 5.45231431722641, | |
| "rewards/format_reward_fn/mean": 0.9223329871892929, | |
| "rewards/format_reward_fn/std": 0.2215037615969777, | |
| "step": 3200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 253.875, | |
| "completions/max_terminated_length": 250.5625, | |
| "completions/mean_length": 212.9697265625, | |
| "completions/mean_terminated_length": 210.7997007369995, | |
| "completions/min_length": 174.8125, | |
| "completions/min_terminated_length": 174.8125, | |
| "entropy": 0.07718956796452403, | |
| "epoch": 3.1253644314868803, | |
| "frac_reward_zero_std": 0.29296875, | |
| "grad_norm": 0.1662418693304062, | |
| "learning_rate": 5e-05, | |
| "loss": 0.005, | |
| "num_tokens": 262593334.0, | |
| "reward": 11.946735978126526, | |
| "reward_std": 0.8465413227677345, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8991723321378231, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.24368242837954313, | |
| "rewards/event_reward_fn/mean": 10.126953125, | |
| "rewards/event_reward_fn/std": 5.943547070026398, | |
| "rewards/format_reward_fn/mean": 0.9206105917692184, | |
| "rewards/format_reward_fn/std": 0.24353812169283628, | |
| "step": 3216 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0205078125, | |
| "completions/max_length": 250.5625, | |
| "completions/max_terminated_length": 248.25, | |
| "completions/mean_length": 205.919921875, | |
| "completions/mean_terminated_length": 204.90336322784424, | |
| "completions/min_length": 166.75, | |
| "completions/min_terminated_length": 166.75, | |
| "entropy": 0.074956723023206, | |
| "epoch": 3.140913508260447, | |
| "frac_reward_zero_std": 0.3828125, | |
| "grad_norm": 0.09807421267032623, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0016, | |
| "num_tokens": 263927740.0, | |
| "reward": 11.93316513299942, | |
| "reward_std": 0.7299522124230862, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9466019049286842, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13588694983627647, | |
| "rewards/event_reward_fn/mean": 10.0244140625, | |
| "rewards/event_reward_fn/std": 5.49932274222374, | |
| "rewards/format_reward_fn/mean": 0.9621492139995098, | |
| "rewards/format_reward_fn/std": 0.13393445825204253, | |
| "step": 3232 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 240.125, | |
| "completions/max_terminated_length": 232.5, | |
| "completions/mean_length": 195.6865234375, | |
| "completions/mean_terminated_length": 194.702299118042, | |
| "completions/min_length": 154.9375, | |
| "completions/min_terminated_length": 154.9375, | |
| "entropy": 0.0733064110390842, | |
| "epoch": 3.1564625850340136, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.09938943386077881, | |
| "learning_rate": 5e-05, | |
| "loss": -0.003, | |
| "num_tokens": 265309295.0, | |
| "reward": 11.604403555393219, | |
| "reward_std": 0.7237380500882864, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9366314336657524, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.15581788471899927, | |
| "rewards/event_reward_fn/mean": 9.7099609375, | |
| "rewards/event_reward_fn/std": 5.092981055378914, | |
| "rewards/format_reward_fn/mean": 0.9578111059963703, | |
| "rewards/format_reward_fn/std": 0.14878937718458474, | |
| "step": 3248 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0126953125, | |
| "completions/max_length": 242.6875, | |
| "completions/max_terminated_length": 239.1875, | |
| "completions/mean_length": 197.634765625, | |
| "completions/mean_terminated_length": 196.94854736328125, | |
| "completions/min_length": 162.4375, | |
| "completions/min_terminated_length": 162.4375, | |
| "entropy": 0.0749533399939537, | |
| "epoch": 3.17201166180758, | |
| "frac_reward_zero_std": 0.30859375, | |
| "grad_norm": 0.12545543909072876, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0078, | |
| "num_tokens": 266596509.0, | |
| "reward": 11.91082489490509, | |
| "reward_std": 0.8464185632765293, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9437732025980949, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.15822483785450459, | |
| "rewards/event_reward_fn/mean": 10.00390625, | |
| "rewards/event_reward_fn/std": 5.849025100469589, | |
| "rewards/format_reward_fn/mean": 0.9631454646587372, | |
| "rewards/format_reward_fn/std": 0.14366952097043395, | |
| "step": 3264 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 247.4375, | |
| "completions/max_terminated_length": 245.125, | |
| "completions/mean_length": 199.3134765625, | |
| "completions/mean_terminated_length": 198.85503959655762, | |
| "completions/min_length": 164.0, | |
| "completions/min_terminated_length": 164.0, | |
| "entropy": 0.07353265816345811, | |
| "epoch": 3.187560738581147, | |
| "frac_reward_zero_std": 0.2890625, | |
| "grad_norm": 0.21840062737464905, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0031, | |
| "num_tokens": 267914170.0, | |
| "reward": 12.273382246494293, | |
| "reward_std": 0.83649617806077, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9368309266865253, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1605551114771515, | |
| "rewards/event_reward_fn/mean": 10.3828125, | |
| "rewards/event_reward_fn/std": 5.610780626535416, | |
| "rewards/format_reward_fn/mean": 0.9537388421595097, | |
| "rewards/format_reward_fn/std": 0.1550324517302215, | |
| "step": 3280 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02734375, | |
| "completions/max_length": 244.125, | |
| "completions/max_terminated_length": 233.5625, | |
| "completions/mean_length": 188.4580078125, | |
| "completions/mean_terminated_length": 186.60568237304688, | |
| "completions/min_length": 136.875, | |
| "completions/min_terminated_length": 136.875, | |
| "entropy": 0.07190486835315824, | |
| "epoch": 3.2031098153547135, | |
| "frac_reward_zero_std": 0.40234375, | |
| "grad_norm": 0.10517474263906479, | |
| "learning_rate": 5e-05, | |
| "loss": 0.002, | |
| "num_tokens": 269206483.0, | |
| "reward": 11.828980565071106, | |
| "reward_std": 0.7958023902028799, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9383596889674664, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17574476334266365, | |
| "rewards/event_reward_fn/mean": 9.9365234375, | |
| "rewards/event_reward_fn/std": 6.193027026951313, | |
| "rewards/format_reward_fn/mean": 0.9540975317358971, | |
| "rewards/format_reward_fn/std": 0.1640933039598167, | |
| "step": 3296 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 246.125, | |
| "completions/max_terminated_length": 242.0, | |
| "completions/mean_length": 199.916015625, | |
| "completions/mean_terminated_length": 199.05778789520264, | |
| "completions/min_length": 159.6875, | |
| "completions/min_terminated_length": 159.6875, | |
| "entropy": 0.06998816644772887, | |
| "epoch": 3.2186588921282797, | |
| "frac_reward_zero_std": 0.3828125, | |
| "grad_norm": 0.16048327088356018, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0047, | |
| "num_tokens": 270519973.0, | |
| "reward": 11.97882354259491, | |
| "reward_std": 0.769274152815342, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9493311978876591, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13935352605767548, | |
| "rewards/event_reward_fn/mean": 10.0625, | |
| "rewards/event_reward_fn/std": 5.823389694094658, | |
| "rewards/format_reward_fn/mean": 0.9669921882450581, | |
| "rewards/format_reward_fn/std": 0.11799978371709585, | |
| "step": 3312 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.017578125, | |
| "completions/max_length": 250.375, | |
| "completions/max_terminated_length": 244.75, | |
| "completions/mean_length": 198.3857421875, | |
| "completions/mean_terminated_length": 197.3439645767212, | |
| "completions/min_length": 157.5625, | |
| "completions/min_terminated_length": 157.5625, | |
| "entropy": 0.06354829482734203, | |
| "epoch": 3.2342079689018464, | |
| "frac_reward_zero_std": 0.4609375, | |
| "grad_norm": 0.1142616868019104, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0017, | |
| "num_tokens": 271864552.0, | |
| "reward": 11.812129974365234, | |
| "reward_std": 0.7051564212888479, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9286896027624607, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19053616502787918, | |
| "rewards/event_reward_fn/mean": 9.935546875, | |
| "rewards/event_reward_fn/std": 5.488028556108475, | |
| "rewards/format_reward_fn/mean": 0.9478934183716774, | |
| "rewards/format_reward_fn/std": 0.1760867452248931, | |
| "step": 3328 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029296875, | |
| "completions/max_length": 250.5, | |
| "completions/max_terminated_length": 242.75, | |
| "completions/mean_length": 200.3955078125, | |
| "completions/mean_terminated_length": 198.8016004562378, | |
| "completions/min_length": 156.375, | |
| "completions/min_terminated_length": 156.375, | |
| "entropy": 0.06551302410662174, | |
| "epoch": 3.249757045675413, | |
| "frac_reward_zero_std": 0.42578125, | |
| "grad_norm": 0.10454633086919785, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0045, | |
| "num_tokens": 273133345.0, | |
| "reward": 11.325606524944305, | |
| "reward_std": 0.8327386099845171, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9499945268034935, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13595928740687668, | |
| "rewards/event_reward_fn/mean": 9.4111328125, | |
| "rewards/event_reward_fn/std": 5.7270414382219315, | |
| "rewards/format_reward_fn/mean": 0.9644791670143604, | |
| "rewards/format_reward_fn/std": 0.12074299133382738, | |
| "step": 3344 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.072265625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 253.4375, | |
| "completions/mean_length": 211.5947265625, | |
| "completions/mean_terminated_length": 208.13346004486084, | |
| "completions/min_length": 161.6875, | |
| "completions/min_terminated_length": 161.6875, | |
| "entropy": 0.06520150555297732, | |
| "epoch": 3.2653061224489797, | |
| "frac_reward_zero_std": 0.37109375, | |
| "grad_norm": 0.08934107422828674, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 274438762.0, | |
| "reward": 11.95677363872528, | |
| "reward_std": 0.7490882519632578, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8981688618659973, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2647989746183157, | |
| "rewards/event_reward_fn/mean": 10.14453125, | |
| "rewards/event_reward_fn/std": 6.044169589877129, | |
| "rewards/format_reward_fn/mean": 0.9140736609697342, | |
| "rewards/format_reward_fn/std": 0.2635462637990713, | |
| "step": 3360 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0478515625, | |
| "completions/max_length": 253.5, | |
| "completions/max_terminated_length": 245.5, | |
| "completions/mean_length": 207.7646484375, | |
| "completions/mean_terminated_length": 205.32763290405273, | |
| "completions/min_length": 165.4375, | |
| "completions/min_terminated_length": 165.4375, | |
| "entropy": 0.06412141490727663, | |
| "epoch": 3.2808551992225463, | |
| "frac_reward_zero_std": 0.359375, | |
| "grad_norm": 0.19203978776931763, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 275769713.0, | |
| "reward": 11.799296379089355, | |
| "reward_std": 0.7531254291534424, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9147956632077694, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2275586040923372, | |
| "rewards/event_reward_fn/mean": 9.947265625, | |
| "rewards/event_reward_fn/std": 5.504115954041481, | |
| "rewards/format_reward_fn/mean": 0.9372349306941032, | |
| "rewards/format_reward_fn/std": 0.2251202268525958, | |
| "step": 3376 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0380859375, | |
| "completions/max_length": 254.3125, | |
| "completions/max_terminated_length": 250.8125, | |
| "completions/mean_length": 213.9912109375, | |
| "completions/mean_terminated_length": 212.369891166687, | |
| "completions/min_length": 172.9375, | |
| "completions/min_terminated_length": 172.9375, | |
| "entropy": 0.06467607943341136, | |
| "epoch": 3.296404275996113, | |
| "frac_reward_zero_std": 0.43359375, | |
| "grad_norm": 0.10870110988616943, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0, | |
| "num_tokens": 277049204.0, | |
| "reward": 11.820498406887054, | |
| "reward_std": 0.6512585924938321, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9223602823913097, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18132009892724454, | |
| "rewards/event_reward_fn/mean": 9.9521484375, | |
| "rewards/event_reward_fn/std": 5.741435334086418, | |
| "rewards/format_reward_fn/mean": 0.9459895864129066, | |
| "rewards/format_reward_fn/std": 0.16436301171779633, | |
| "step": 3392 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.078125, | |
| "completions/max_length": 255.0, | |
| "completions/max_terminated_length": 248.3125, | |
| "completions/mean_length": 216.11328125, | |
| "completions/mean_terminated_length": 212.78788471221924, | |
| "completions/min_length": 169.875, | |
| "completions/min_terminated_length": 169.875, | |
| "entropy": 0.06166102201677859, | |
| "epoch": 3.311953352769679, | |
| "frac_reward_zero_std": 0.453125, | |
| "grad_norm": 0.0720224604010582, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0008, | |
| "num_tokens": 278391292.0, | |
| "reward": 11.994673013687134, | |
| "reward_std": 0.6659458577632904, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8917759135365486, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.25888109114021063, | |
| "rewards/event_reward_fn/mean": 10.1962890625, | |
| "rewards/event_reward_fn/std": 5.986569568514824, | |
| "rewards/format_reward_fn/mean": 0.9066080749034882, | |
| "rewards/format_reward_fn/std": 0.25973749114200473, | |
| "step": 3408 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0234375, | |
| "completions/max_length": 248.5, | |
| "completions/max_terminated_length": 245.1875, | |
| "completions/mean_length": 204.8330078125, | |
| "completions/mean_terminated_length": 203.63013172149658, | |
| "completions/min_length": 162.875, | |
| "completions/min_terminated_length": 162.875, | |
| "entropy": 0.06246258458122611, | |
| "epoch": 3.327502429543246, | |
| "frac_reward_zero_std": 0.453125, | |
| "grad_norm": 0.10192258656024933, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0042, | |
| "num_tokens": 279658405.0, | |
| "reward": 12.02261358499527, | |
| "reward_std": 0.7050989326089621, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9559198245406151, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14632097427966073, | |
| "rewards/event_reward_fn/mean": 10.1015625, | |
| "rewards/event_reward_fn/std": 5.938043773174286, | |
| "rewards/format_reward_fn/mean": 0.9651312977075577, | |
| "rewards/format_reward_fn/std": 0.14133254252374172, | |
| "step": 3424 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025390625, | |
| "completions/max_length": 249.8125, | |
| "completions/max_terminated_length": 246.5625, | |
| "completions/mean_length": 202.8173828125, | |
| "completions/mean_terminated_length": 201.44210147857666, | |
| "completions/min_length": 161.1875, | |
| "completions/min_terminated_length": 161.1875, | |
| "entropy": 0.05804568435996771, | |
| "epoch": 3.3430515063168125, | |
| "frac_reward_zero_std": 0.4375, | |
| "grad_norm": 0.09696446359157562, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0006, | |
| "num_tokens": 280979450.0, | |
| "reward": 11.77318161725998, | |
| "reward_std": 0.6543006896972656, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9384081587195396, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1720411020796746, | |
| "rewards/event_reward_fn/mean": 9.8798828125, | |
| "rewards/event_reward_fn/std": 5.3430622816085815, | |
| "rewards/format_reward_fn/mean": 0.954890564084053, | |
| "rewards/format_reward_fn/std": 0.1621245201677084, | |
| "step": 3440 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0283203125, | |
| "completions/max_length": 250.875, | |
| "completions/max_terminated_length": 243.375, | |
| "completions/mean_length": 204.373046875, | |
| "completions/mean_terminated_length": 202.94876098632812, | |
| "completions/min_length": 164.75, | |
| "completions/min_terminated_length": 164.75, | |
| "entropy": 0.06122026569209993, | |
| "epoch": 3.358600583090379, | |
| "frac_reward_zero_std": 0.4453125, | |
| "grad_norm": 0.12297879159450531, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0054, | |
| "num_tokens": 282332772.0, | |
| "reward": 11.589432656764984, | |
| "reward_std": 0.7322587119415402, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9409170113503933, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17370267800288275, | |
| "rewards/event_reward_fn/mean": 9.6923828125, | |
| "rewards/event_reward_fn/std": 6.004166945815086, | |
| "rewards/format_reward_fn/mean": 0.9561328142881393, | |
| "rewards/format_reward_fn/std": 0.16955442121252418, | |
| "step": 3456 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0400390625, | |
| "completions/max_length": 252.8125, | |
| "completions/max_terminated_length": 249.25, | |
| "completions/mean_length": 212.8505859375, | |
| "completions/mean_terminated_length": 211.24829959869385, | |
| "completions/min_length": 171.4375, | |
| "completions/min_terminated_length": 171.4375, | |
| "entropy": 0.06467843032442033, | |
| "epoch": 3.3741496598639458, | |
| "frac_reward_zero_std": 0.453125, | |
| "grad_norm": 0.11574175953865051, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0041, | |
| "num_tokens": 283596291.0, | |
| "reward": 11.298483848571777, | |
| "reward_std": 0.699859144166112, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9317664988338947, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1917368060676381, | |
| "rewards/event_reward_fn/mean": 9.4208984375, | |
| "rewards/event_reward_fn/std": 5.096125215291977, | |
| "rewards/format_reward_fn/mean": 0.9458189196884632, | |
| "rewards/format_reward_fn/std": 0.17844219831749797, | |
| "step": 3472 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0302734375, | |
| "completions/max_length": 253.125, | |
| "completions/max_terminated_length": 248.8125, | |
| "completions/mean_length": 213.7158203125, | |
| "completions/mean_terminated_length": 212.43880653381348, | |
| "completions/min_length": 174.75, | |
| "completions/min_terminated_length": 174.75, | |
| "entropy": 0.07107805530540645, | |
| "epoch": 3.389698736637512, | |
| "frac_reward_zero_std": 0.4765625, | |
| "grad_norm": 0.14759734272956848, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 284865764.0, | |
| "reward": 11.34376209974289, | |
| "reward_std": 0.6468667350709438, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9472848623991013, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16370269027538598, | |
| "rewards/event_reward_fn/mean": 9.4375, | |
| "rewards/event_reward_fn/std": 5.265123501420021, | |
| "rewards/format_reward_fn/mean": 0.9589774012565613, | |
| "rewards/format_reward_fn/std": 0.15847991104237735, | |
| "step": 3488 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0615234375, | |
| "completions/max_length": 255.875, | |
| "completions/max_terminated_length": 252.5625, | |
| "completions/mean_length": 216.7431640625, | |
| "completions/mean_terminated_length": 214.12075424194336, | |
| "completions/min_length": 169.1875, | |
| "completions/min_terminated_length": 169.1875, | |
| "entropy": 0.07305671996437013, | |
| "epoch": 3.4052478134110786, | |
| "frac_reward_zero_std": 0.390625, | |
| "grad_norm": 0.17206382751464844, | |
| "learning_rate": 5e-05, | |
| "loss": 0.003, | |
| "num_tokens": 286236689.0, | |
| "reward": 11.9978928565979, | |
| "reward_std": 0.8643622100353241, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9041581116616726, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2597373647149652, | |
| "rewards/event_reward_fn/mean": 10.1796875, | |
| "rewards/event_reward_fn/std": 5.569000482559204, | |
| "rewards/format_reward_fn/mean": 0.9140473119914532, | |
| "rewards/format_reward_fn/std": 0.26027182303369045, | |
| "step": 3504 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0458984375, | |
| "completions/max_length": 254.8125, | |
| "completions/max_terminated_length": 252.75, | |
| "completions/mean_length": 216.333984375, | |
| "completions/mean_terminated_length": 214.39577198028564, | |
| "completions/min_length": 169.125, | |
| "completions/min_terminated_length": 169.125, | |
| "entropy": 0.07661846978589892, | |
| "epoch": 3.4207968901846453, | |
| "frac_reward_zero_std": 0.375, | |
| "grad_norm": 0.12547151744365692, | |
| "learning_rate": 5e-05, | |
| "loss": -0.002, | |
| "num_tokens": 287559795.0, | |
| "reward": 11.89124745130539, | |
| "reward_std": 0.7206966131925583, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.931918803602457, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19364137423690408, | |
| "rewards/event_reward_fn/mean": 10.0146484375, | |
| "rewards/event_reward_fn/std": 5.905052408576012, | |
| "rewards/format_reward_fn/mean": 0.9446800611913204, | |
| "rewards/format_reward_fn/std": 0.1913862293586135, | |
| "step": 3520 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0654296875, | |
| "completions/max_length": 254.75, | |
| "completions/max_terminated_length": 251.125, | |
| "completions/mean_length": 213.421875, | |
| "completions/mean_terminated_length": 210.42116260528564, | |
| "completions/min_length": 163.4375, | |
| "completions/min_terminated_length": 163.4375, | |
| "entropy": 0.07146193599328399, | |
| "epoch": 3.436345966958212, | |
| "frac_reward_zero_std": 0.37890625, | |
| "grad_norm": 0.17685569822788239, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0017, | |
| "num_tokens": 288922347.0, | |
| "reward": 11.876615107059479, | |
| "reward_std": 0.8880495801568031, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9142365269362926, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.22835631167981774, | |
| "rewards/event_reward_fn/mean": 10.0341796875, | |
| "rewards/event_reward_fn/std": 5.525876700878143, | |
| "rewards/format_reward_fn/mean": 0.92819894105196, | |
| "rewards/format_reward_fn/std": 0.22682476695626974, | |
| "step": 3536 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02734375, | |
| "completions/max_length": 253.8125, | |
| "completions/max_terminated_length": 248.5, | |
| "completions/mean_length": 208.0751953125, | |
| "completions/mean_terminated_length": 206.6888551712036, | |
| "completions/min_length": 161.5, | |
| "completions/min_terminated_length": 161.5, | |
| "entropy": 0.06790656200610101, | |
| "epoch": 3.4518950437317786, | |
| "frac_reward_zero_std": 0.421875, | |
| "grad_norm": 0.17039044201374054, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0012, | |
| "num_tokens": 290290916.0, | |
| "reward": 12.047883689403534, | |
| "reward_std": 0.7042225562036037, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.948846910148859, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14023484371136874, | |
| "rewards/event_reward_fn/mean": 10.134765625, | |
| "rewards/event_reward_fn/std": 5.774273455142975, | |
| "rewards/format_reward_fn/mean": 0.9642711319029331, | |
| "rewards/format_reward_fn/std": 0.12487931735813618, | |
| "step": 3552 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0498046875, | |
| "completions/max_length": 254.25, | |
| "completions/max_terminated_length": 253.3125, | |
| "completions/mean_length": 217.3359375, | |
| "completions/mean_terminated_length": 215.44786262512207, | |
| "completions/min_length": 175.3125, | |
| "completions/min_terminated_length": 175.3125, | |
| "entropy": 0.07336867321282625, | |
| "epoch": 3.467444120505345, | |
| "frac_reward_zero_std": 0.38671875, | |
| "grad_norm": 0.17650093138217926, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 291484148.0, | |
| "reward": 11.402482271194458, | |
| "reward_std": 0.7676825225353241, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9262127205729485, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19901330675929785, | |
| "rewards/event_reward_fn/mean": 9.5341796875, | |
| "rewards/event_reward_fn/std": 5.093527913093567, | |
| "rewards/format_reward_fn/mean": 0.9420898444950581, | |
| "rewards/format_reward_fn/std": 0.19261480076238513, | |
| "step": 3568 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0703125, | |
| "completions/max_length": 255.125, | |
| "completions/max_terminated_length": 252.25, | |
| "completions/mean_length": 217.9296875, | |
| "completions/mean_terminated_length": 214.96538639068604, | |
| "completions/min_length": 174.375, | |
| "completions/min_terminated_length": 174.375, | |
| "entropy": 0.07610900048166513, | |
| "epoch": 3.4829931972789114, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.2133682370185852, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0011, | |
| "num_tokens": 292856568.0, | |
| "reward": 11.831640183925629, | |
| "reward_std": 0.8922509625554085, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9065587967634201, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.23603793187066913, | |
| "rewards/event_reward_fn/mean": 10.00390625, | |
| "rewards/event_reward_fn/std": 5.970632314682007, | |
| "rewards/format_reward_fn/mean": 0.9211751334369183, | |
| "rewards/format_reward_fn/std": 0.23383971489965916, | |
| "step": 3584 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.052734375, | |
| "completions/max_length": 254.0625, | |
| "completions/max_terminated_length": 251.4375, | |
| "completions/mean_length": 218.853515625, | |
| "completions/mean_terminated_length": 216.94453525543213, | |
| "completions/min_length": 179.1875, | |
| "completions/min_terminated_length": 179.1875, | |
| "entropy": 0.0776207884773612, | |
| "epoch": 3.498542274052478, | |
| "frac_reward_zero_std": 0.3515625, | |
| "grad_norm": 0.08279092609882355, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 294202350.0, | |
| "reward": 12.301475286483765, | |
| "reward_std": 0.799699567258358, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9285315871238708, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2019159458577633, | |
| "rewards/event_reward_fn/mean": 10.4345703125, | |
| "rewards/event_reward_fn/std": 5.9011257737874985, | |
| "rewards/format_reward_fn/mean": 0.9383733309805393, | |
| "rewards/format_reward_fn/std": 0.20106245297938585, | |
| "step": 3600 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0087890625, | |
| "completions/max_length": 249.0625, | |
| "completions/max_terminated_length": 247.9375, | |
| "completions/mean_length": 211.4404296875, | |
| "completions/mean_terminated_length": 211.08014678955078, | |
| "completions/min_length": 170.5, | |
| "completions/min_terminated_length": 170.5, | |
| "entropy": 0.07809598417952657, | |
| "epoch": 3.5140913508260447, | |
| "frac_reward_zero_std": 0.29296875, | |
| "grad_norm": 0.15841352939605713, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0026, | |
| "num_tokens": 295538057.0, | |
| "reward": 11.816706955432892, | |
| "reward_std": 0.8686831034719944, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9696620255708694, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.08471985626965761, | |
| "rewards/event_reward_fn/mean": 9.865234375, | |
| "rewards/event_reward_fn/std": 5.069239139556885, | |
| "rewards/format_reward_fn/mean": 0.9818103611469269, | |
| "rewards/format_reward_fn/std": 0.07833539508283138, | |
| "step": 3616 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.021484375, | |
| "completions/max_length": 248.5, | |
| "completions/max_terminated_length": 244.0625, | |
| "completions/mean_length": 207.5703125, | |
| "completions/mean_terminated_length": 206.40735816955566, | |
| "completions/min_length": 167.8125, | |
| "completions/min_terminated_length": 167.8125, | |
| "entropy": 0.07908624736592174, | |
| "epoch": 3.5296404275996114, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.11043195426464081, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0023, | |
| "num_tokens": 296834797.0, | |
| "reward": 11.86592173576355, | |
| "reward_std": 0.8323170337826014, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9380478039383888, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17831697227666155, | |
| "rewards/event_reward_fn/mean": 9.974609375, | |
| "rewards/event_reward_fn/std": 5.845152243971825, | |
| "rewards/format_reward_fn/mean": 0.9532645083963871, | |
| "rewards/format_reward_fn/std": 0.17017430812120438, | |
| "step": 3632 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.013671875, | |
| "completions/max_length": 250.1875, | |
| "completions/max_terminated_length": 247.3125, | |
| "completions/mean_length": 207.181640625, | |
| "completions/mean_terminated_length": 206.51923084259033, | |
| "completions/min_length": 165.0, | |
| "completions/min_terminated_length": 165.0, | |
| "entropy": 0.08088134974241257, | |
| "epoch": 3.5451895043731776, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.15753485262393951, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 298106083.0, | |
| "reward": 11.870498239994049, | |
| "reward_std": 0.757409542798996, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.955335222184658, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13619694579392672, | |
| "rewards/event_reward_fn/mean": 9.94921875, | |
| "rewards/event_reward_fn/std": 5.25686414539814, | |
| "rewards/format_reward_fn/mean": 0.9659440144896507, | |
| "rewards/format_reward_fn/std": 0.13222627015784383, | |
| "step": 3648 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0107421875, | |
| "completions/max_length": 244.625, | |
| "completions/max_terminated_length": 241.5, | |
| "completions/mean_length": 203.419921875, | |
| "completions/mean_terminated_length": 202.85576915740967, | |
| "completions/min_length": 161.8125, | |
| "completions/min_terminated_length": 161.8125, | |
| "entropy": 0.07925571827217937, | |
| "epoch": 3.5607385811467447, | |
| "frac_reward_zero_std": 0.3984375, | |
| "grad_norm": 0.06831870973110199, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0004, | |
| "num_tokens": 299424197.0, | |
| "reward": 11.959875285625458, | |
| "reward_std": 0.8259032759815454, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9445747584104538, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1655241074040532, | |
| "rewards/event_reward_fn/mean": 10.0576171875, | |
| "rewards/event_reward_fn/std": 5.623490899801254, | |
| "rewards/format_reward_fn/mean": 0.9576835297048092, | |
| "rewards/format_reward_fn/std": 0.15519985277205706, | |
| "step": 3664 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 247.1875, | |
| "completions/max_terminated_length": 243.25, | |
| "completions/mean_length": 200.2255859375, | |
| "completions/mean_terminated_length": 199.5936861038208, | |
| "completions/min_length": 159.9375, | |
| "completions/min_terminated_length": 159.9375, | |
| "entropy": 0.07751462701708078, | |
| "epoch": 3.576287657920311, | |
| "frac_reward_zero_std": 0.37109375, | |
| "grad_norm": 0.1378462016582489, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0039, | |
| "num_tokens": 300693292.0, | |
| "reward": 11.735808372497559, | |
| "reward_std": 0.7609035409986973, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9578013271093369, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.11039561772486195, | |
| "rewards/event_reward_fn/mean": 9.8076171875, | |
| "rewards/event_reward_fn/std": 5.369291961193085, | |
| "rewards/format_reward_fn/mean": 0.9703896977007389, | |
| "rewards/format_reward_fn/std": 0.10083504673093557, | |
| "step": 3680 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01953125, | |
| "completions/max_length": 244.625, | |
| "completions/max_terminated_length": 240.25, | |
| "completions/mean_length": 204.474609375, | |
| "completions/mean_terminated_length": 203.49135780334473, | |
| "completions/min_length": 162.5625, | |
| "completions/min_terminated_length": 162.5625, | |
| "entropy": 0.07588420668616891, | |
| "epoch": 3.5918367346938775, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.15248270332813263, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0039, | |
| "num_tokens": 302052422.0, | |
| "reward": 11.87961357831955, | |
| "reward_std": 0.9203954320400953, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.931057620793581, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16641236003488302, | |
| "rewards/event_reward_fn/mean": 9.994140625, | |
| "rewards/event_reward_fn/std": 5.567791044712067, | |
| "rewards/format_reward_fn/mean": 0.9544154554605484, | |
| "rewards/format_reward_fn/std": 0.14701158134266734, | |
| "step": 3696 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0224609375, | |
| "completions/max_length": 250.5625, | |
| "completions/max_terminated_length": 246.6875, | |
| "completions/mean_length": 208.583984375, | |
| "completions/mean_terminated_length": 207.5050506591797, | |
| "completions/min_length": 171.0625, | |
| "completions/min_terminated_length": 171.0625, | |
| "entropy": 0.07612508768215775, | |
| "epoch": 3.607385811467444, | |
| "frac_reward_zero_std": 0.3046875, | |
| "grad_norm": 0.23306649923324585, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0009, | |
| "num_tokens": 303372112.0, | |
| "reward": 11.453840970993042, | |
| "reward_std": 0.7904142383486032, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9510433189570904, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14118389890063554, | |
| "rewards/event_reward_fn/mean": 9.5380859375, | |
| "rewards/event_reward_fn/std": 5.8418983072042465, | |
| "rewards/format_reward_fn/mean": 0.9647116847336292, | |
| "rewards/format_reward_fn/std": 0.1381231863051653, | |
| "step": 3712 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.009765625, | |
| "completions/max_length": 249.25, | |
| "completions/max_terminated_length": 248.0, | |
| "completions/mean_length": 209.95703125, | |
| "completions/mean_terminated_length": 209.5190019607544, | |
| "completions/min_length": 171.8125, | |
| "completions/min_terminated_length": 171.8125, | |
| "entropy": 0.0707268959376961, | |
| "epoch": 3.622934888241011, | |
| "frac_reward_zero_std": 0.375, | |
| "grad_norm": 0.07385765016078949, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0036, | |
| "num_tokens": 304684204.0, | |
| "reward": 11.709131598472595, | |
| "reward_std": 0.8085791561752558, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9635815359652042, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.10360126395244151, | |
| "rewards/event_reward_fn/mean": 9.76953125, | |
| "rewards/event_reward_fn/std": 5.746441006660461, | |
| "rewards/format_reward_fn/mean": 0.9760188795626163, | |
| "rewards/format_reward_fn/std": 0.09353231685236096, | |
| "step": 3728 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025390625, | |
| "completions/max_length": 252.5, | |
| "completions/max_terminated_length": 251.4375, | |
| "completions/mean_length": 213.353515625, | |
| "completions/mean_terminated_length": 212.29650974273682, | |
| "completions/min_length": 169.625, | |
| "completions/min_terminated_length": 169.625, | |
| "entropy": 0.0739557440392673, | |
| "epoch": 3.6384839650145775, | |
| "frac_reward_zero_std": 0.30078125, | |
| "grad_norm": 0.1414438933134079, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 306009662.0, | |
| "reward": 11.917364478111267, | |
| "reward_std": 0.9005591496825218, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9439749345183372, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1546652951510623, | |
| "rewards/event_reward_fn/mean": 10.013671875, | |
| "rewards/event_reward_fn/std": 6.16796900331974, | |
| "rewards/format_reward_fn/mean": 0.9597175717353821, | |
| "rewards/format_reward_fn/std": 0.14394072350114584, | |
| "step": 3744 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0107421875, | |
| "completions/max_length": 251.375, | |
| "completions/max_terminated_length": 247.9375, | |
| "completions/mean_length": 210.458984375, | |
| "completions/mean_terminated_length": 209.95756244659424, | |
| "completions/min_length": 171.9375, | |
| "completions/min_terminated_length": 171.9375, | |
| "entropy": 0.07131304289214313, | |
| "epoch": 3.6540330417881437, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.2263481467962265, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0028, | |
| "num_tokens": 307342912.0, | |
| "reward": 11.548463881015778, | |
| "reward_std": 0.7522790785878897, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.93173423781991, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17529538087546825, | |
| "rewards/event_reward_fn/mean": 9.6767578125, | |
| "rewards/event_reward_fn/std": 6.1354245990514755, | |
| "rewards/format_reward_fn/mean": 0.9399717897176743, | |
| "rewards/format_reward_fn/std": 0.17958084493875504, | |
| "step": 3760 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0009765625, | |
| "completions/max_length": 238.25, | |
| "completions/max_terminated_length": 238.0625, | |
| "completions/mean_length": 201.1005859375, | |
| "completions/mean_terminated_length": 201.05244064331055, | |
| "completions/min_length": 159.5, | |
| "completions/min_terminated_length": 159.5, | |
| "entropy": 0.06464898842386901, | |
| "epoch": 3.6695821185617103, | |
| "frac_reward_zero_std": 0.390625, | |
| "grad_norm": 0.09882862865924835, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0046, | |
| "num_tokens": 308668915.0, | |
| "reward": 11.542174577713013, | |
| "reward_std": 0.6965284887701273, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9554044380784035, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.11500480188988149, | |
| "rewards/event_reward_fn/mean": 9.619140625, | |
| "rewards/event_reward_fn/std": 5.47371518611908, | |
| "rewards/format_reward_fn/mean": 0.9676294289529324, | |
| "rewards/format_reward_fn/std": 0.11533198039978743, | |
| "step": 3776 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 241.8125, | |
| "completions/max_terminated_length": 241.0625, | |
| "completions/mean_length": 203.2158203125, | |
| "completions/mean_terminated_length": 202.8022975921631, | |
| "completions/min_length": 164.75, | |
| "completions/min_terminated_length": 164.75, | |
| "entropy": 0.07011180557310581, | |
| "epoch": 3.685131195335277, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.13302014768123627, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0071, | |
| "num_tokens": 309952960.0, | |
| "reward": 12.114792346954346, | |
| "reward_std": 0.7427178621292114, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9610139429569244, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.09992504899855703, | |
| "rewards/event_reward_fn/mean": 10.1787109375, | |
| "rewards/event_reward_fn/std": 5.834049671888351, | |
| "rewards/format_reward_fn/mean": 0.9750674292445183, | |
| "rewards/format_reward_fn/std": 0.08985681226477027, | |
| "step": 3792 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.021484375, | |
| "completions/max_length": 248.75, | |
| "completions/max_terminated_length": 247.3125, | |
| "completions/mean_length": 212.1357421875, | |
| "completions/mean_terminated_length": 211.2209234237671, | |
| "completions/min_length": 172.25, | |
| "completions/min_terminated_length": 172.25, | |
| "entropy": 0.07245555147528648, | |
| "epoch": 3.7006802721088436, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.16854174435138702, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 311247611.0, | |
| "reward": 11.610692858695984, | |
| "reward_std": 0.6659273523837328, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9450487568974495, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.11994595197029412, | |
| "rewards/event_reward_fn/mean": 9.697265625, | |
| "rewards/event_reward_fn/std": 5.562545984983444, | |
| "rewards/format_reward_fn/mean": 0.9683784395456314, | |
| "rewards/format_reward_fn/std": 0.11825172184035182, | |
| "step": 3808 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.044921875, | |
| "completions/max_length": 254.6875, | |
| "completions/max_terminated_length": 252.3125, | |
| "completions/mean_length": 217.888671875, | |
| "completions/mean_terminated_length": 216.13310527801514, | |
| "completions/min_length": 172.5, | |
| "completions/min_terminated_length": 172.5, | |
| "entropy": 0.0743629289790988, | |
| "epoch": 3.7162293488824103, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.16505570709705353, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0016, | |
| "num_tokens": 312542821.0, | |
| "reward": 11.156748652458191, | |
| "reward_std": 0.7675234004855156, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9249761961400509, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.20397475379286334, | |
| "rewards/event_reward_fn/mean": 9.2919921875, | |
| "rewards/event_reward_fn/std": 5.000220879912376, | |
| "rewards/format_reward_fn/mean": 0.939780205488205, | |
| "rewards/format_reward_fn/std": 0.20532220043241978, | |
| "step": 3824 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.033203125, | |
| "completions/max_length": 254.25, | |
| "completions/max_terminated_length": 251.5, | |
| "completions/mean_length": 212.4443359375, | |
| "completions/mean_terminated_length": 210.93013668060303, | |
| "completions/min_length": 162.625, | |
| "completions/min_terminated_length": 162.625, | |
| "entropy": 0.07023975322954357, | |
| "epoch": 3.7317784256559765, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.060382600873708725, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 313934308.0, | |
| "reward": 11.860353231430054, | |
| "reward_std": 0.8548767194151878, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9291994869709015, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19955480488715693, | |
| "rewards/event_reward_fn/mean": 9.9921875, | |
| "rewards/event_reward_fn/std": 6.334605395793915, | |
| "rewards/format_reward_fn/mean": 0.9389663524925709, | |
| "rewards/format_reward_fn/std": 0.20007416978478432, | |
| "step": 3840 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0185546875, | |
| "completions/max_length": 253.8125, | |
| "completions/max_terminated_length": 250.0625, | |
| "completions/mean_length": 210.4931640625, | |
| "completions/mean_terminated_length": 209.59508609771729, | |
| "completions/min_length": 168.1875, | |
| "completions/min_terminated_length": 168.1875, | |
| "entropy": 0.07439161464571953, | |
| "epoch": 3.747327502429543, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.2331985980272293, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 315250945.0, | |
| "reward": 11.873708844184875, | |
| "reward_std": 0.8904512841254473, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9391452148556709, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17918783274944872, | |
| "rewards/event_reward_fn/mean": 9.984375, | |
| "rewards/event_reward_fn/std": 5.551610827445984, | |
| "rewards/format_reward_fn/mean": 0.9501884989440441, | |
| "rewards/format_reward_fn/std": 0.1802559308707714, | |
| "step": 3856 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.00390625, | |
| "completions/max_length": 247.875, | |
| "completions/max_terminated_length": 246.25, | |
| "completions/mean_length": 208.615234375, | |
| "completions/mean_terminated_length": 208.4278688430786, | |
| "completions/min_length": 165.25, | |
| "completions/min_terminated_length": 165.25, | |
| "entropy": 0.06975571275688708, | |
| "epoch": 3.76287657920311, | |
| "frac_reward_zero_std": 0.3828125, | |
| "grad_norm": 0.12414126843214035, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0, | |
| "num_tokens": 316553199.0, | |
| "reward": 12.228178679943085, | |
| "reward_std": 0.7352720461785793, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.961551733314991, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.11315030924743041, | |
| "rewards/event_reward_fn/mean": 10.298828125, | |
| "rewards/event_reward_fn/std": 5.328288942575455, | |
| "rewards/format_reward_fn/mean": 0.9677988588809967, | |
| "rewards/format_reward_fn/std": 0.11893212096765637, | |
| "step": 3872 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0068359375, | |
| "completions/max_length": 245.6875, | |
| "completions/max_terminated_length": 244.5, | |
| "completions/mean_length": 206.140625, | |
| "completions/mean_terminated_length": 205.78240394592285, | |
| "completions/min_length": 165.625, | |
| "completions/min_terminated_length": 165.625, | |
| "entropy": 0.07621000800281763, | |
| "epoch": 3.7784256559766765, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.20296157896518707, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0012, | |
| "num_tokens": 317903031.0, | |
| "reward": 11.587688386440277, | |
| "reward_std": 0.7579143699258566, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9600600115954876, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.12998962739948183, | |
| "rewards/event_reward_fn/mean": 9.66796875, | |
| "rewards/event_reward_fn/std": 5.291949540376663, | |
| "rewards/format_reward_fn/mean": 0.9596595913171768, | |
| "rewards/format_reward_fn/std": 0.14018871169537306, | |
| "step": 3888 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 249.25, | |
| "completions/max_terminated_length": 244.5, | |
| "completions/mean_length": 201.33203125, | |
| "completions/mean_terminated_length": 200.6855239868164, | |
| "completions/min_length": 158.8125, | |
| "completions/min_terminated_length": 158.8125, | |
| "entropy": 0.07235691323876381, | |
| "epoch": 3.793974732750243, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.10855089873075485, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0024, | |
| "num_tokens": 319240939.0, | |
| "reward": 11.683230638504028, | |
| "reward_std": 0.7290781699120998, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9578493759036064, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13556190475355834, | |
| "rewards/event_reward_fn/mean": 9.7548828125, | |
| "rewards/event_reward_fn/std": 5.5955929309129715, | |
| "rewards/format_reward_fn/mean": 0.970498513430357, | |
| "rewards/format_reward_fn/std": 0.12245456594973803, | |
| "step": 3904 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 247.1875, | |
| "completions/max_terminated_length": 244.0625, | |
| "completions/mean_length": 205.2587890625, | |
| "completions/mean_terminated_length": 204.43410301208496, | |
| "completions/min_length": 162.8125, | |
| "completions/min_terminated_length": 162.8125, | |
| "entropy": 0.07462676661089063, | |
| "epoch": 3.8095238095238093, | |
| "frac_reward_zero_std": 0.38671875, | |
| "grad_norm": 0.23824094235897064, | |
| "learning_rate": 5e-05, | |
| "loss": -0.004, | |
| "num_tokens": 320572924.0, | |
| "reward": 12.206887483596802, | |
| "reward_std": 0.8420960828661919, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9535179361701012, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13041677000001073, | |
| "rewards/event_reward_fn/mean": 10.291015625, | |
| "rewards/event_reward_fn/std": 5.89475154876709, | |
| "rewards/format_reward_fn/mean": 0.9623538255691528, | |
| "rewards/format_reward_fn/std": 0.1202199412509799, | |
| "step": 3920 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.017578125, | |
| "completions/max_length": 253.1875, | |
| "completions/max_terminated_length": 251.5, | |
| "completions/mean_length": 210.583984375, | |
| "completions/mean_terminated_length": 209.80176162719727, | |
| "completions/min_length": 169.9375, | |
| "completions/min_terminated_length": 169.9375, | |
| "entropy": 0.07527026068419218, | |
| "epoch": 3.825072886297376, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.09440134465694427, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0029, | |
| "num_tokens": 321928146.0, | |
| "reward": 12.062871038913727, | |
| "reward_std": 0.6898276209831238, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9501573704183102, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13593709268025123, | |
| "rewards/event_reward_fn/mean": 10.1474609375, | |
| "rewards/event_reward_fn/std": 5.55875962972641, | |
| "rewards/format_reward_fn/mean": 0.9652528204023838, | |
| "rewards/format_reward_fn/std": 0.13466597883962095, | |
| "step": 3936 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.126953125, | |
| "completions/max_length": 255.125, | |
| "completions/max_terminated_length": 253.9375, | |
| "completions/mean_length": 223.7958984375, | |
| "completions/mean_terminated_length": 219.62405586242676, | |
| "completions/min_length": 177.5, | |
| "completions/min_terminated_length": 177.5, | |
| "entropy": 0.07991416612640023, | |
| "epoch": 3.8406219630709426, | |
| "frac_reward_zero_std": 0.27734375, | |
| "grad_norm": 0.15865761041641235, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0038, | |
| "num_tokens": 323291405.0, | |
| "reward": 12.076306104660034, | |
| "reward_std": 0.9530179928988218, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.854170698672533, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2860095868818462, | |
| "rewards/event_reward_fn/mean": 10.3525390625, | |
| "rewards/event_reward_fn/std": 5.96200692653656, | |
| "rewards/format_reward_fn/mean": 0.869596354663372, | |
| "rewards/format_reward_fn/std": 0.2936730571091175, | |
| "step": 3952 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0986328125, | |
| "completions/max_length": 255.0, | |
| "completions/max_terminated_length": 252.75, | |
| "completions/mean_length": 222.8603515625, | |
| "completions/mean_terminated_length": 219.4017848968506, | |
| "completions/min_length": 178.6875, | |
| "completions/min_terminated_length": 178.6875, | |
| "entropy": 0.07927003409713507, | |
| "epoch": 3.8561710398445093, | |
| "frac_reward_zero_std": 0.30859375, | |
| "grad_norm": 0.12801237404346466, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0036, | |
| "num_tokens": 324610378.0, | |
| "reward": 11.521483957767487, | |
| "reward_std": 0.8510149158537388, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8705999292433262, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.26414805941749364, | |
| "rewards/event_reward_fn/mean": 9.7568359375, | |
| "rewards/event_reward_fn/std": 5.6417785584926605, | |
| "rewards/format_reward_fn/mean": 0.8940479382872581, | |
| "rewards/format_reward_fn/std": 0.26540128607302904, | |
| "step": 3968 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 253.25, | |
| "completions/max_terminated_length": 251.8125, | |
| "completions/mean_length": 213.95703125, | |
| "completions/mean_terminated_length": 211.99990463256836, | |
| "completions/min_length": 172.375, | |
| "completions/min_terminated_length": 172.375, | |
| "entropy": 0.07248568348586559, | |
| "epoch": 3.871720116618076, | |
| "frac_reward_zero_std": 0.31640625, | |
| "grad_norm": 0.14652633666992188, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 325886034.0, | |
| "reward": 11.792625546455383, | |
| "reward_std": 0.7393063232302666, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9195492528378963, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19524600135628134, | |
| "rewards/event_reward_fn/mean": 9.9345703125, | |
| "rewards/event_reward_fn/std": 5.343173682689667, | |
| "rewards/format_reward_fn/mean": 0.9385060183703899, | |
| "rewards/format_reward_fn/std": 0.1972238675225526, | |
| "step": 3984 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0341796875, | |
| "completions/max_length": 251.8125, | |
| "completions/max_terminated_length": 249.875, | |
| "completions/mean_length": 212.359375, | |
| "completions/mean_terminated_length": 210.8219394683838, | |
| "completions/min_length": 172.0, | |
| "completions/min_terminated_length": 172.0, | |
| "entropy": 0.07265612436458468, | |
| "epoch": 3.887269193391642, | |
| "frac_reward_zero_std": 0.359375, | |
| "grad_norm": 0.16204002499580383, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0017, | |
| "num_tokens": 327199118.0, | |
| "reward": 11.693296015262604, | |
| "reward_std": 0.753578519448638, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9361162185668945, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16267945885192603, | |
| "rewards/event_reward_fn/mean": 9.806640625, | |
| "rewards/event_reward_fn/std": 5.245450034737587, | |
| "rewards/format_reward_fn/mean": 0.9505392760038376, | |
| "rewards/format_reward_fn/std": 0.15567059512250125, | |
| "step": 4000 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 250.8125, | |
| "completions/max_terminated_length": 247.9375, | |
| "completions/mean_length": 209.841796875, | |
| "completions/mean_terminated_length": 209.13395309448242, | |
| "completions/min_length": 168.9375, | |
| "completions/min_terminated_length": 168.9375, | |
| "entropy": 0.07126375450752676, | |
| "epoch": 3.902818270165209, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.2677111327648163, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0018, | |
| "num_tokens": 328480424.0, | |
| "reward": 11.549213945865631, | |
| "reward_std": 0.8479338348843157, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9514924548566341, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1373359472490847, | |
| "rewards/event_reward_fn/mean": 9.6357421875, | |
| "rewards/event_reward_fn/std": 5.592780202627182, | |
| "rewards/format_reward_fn/mean": 0.9619791656732559, | |
| "rewards/format_reward_fn/std": 0.13886410370469093, | |
| "step": 4016 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 255.125, | |
| "completions/max_terminated_length": 252.75, | |
| "completions/mean_length": 217.701171875, | |
| "completions/mean_terminated_length": 215.90612506866455, | |
| "completions/min_length": 175.25, | |
| "completions/min_terminated_length": 175.25, | |
| "entropy": 0.0726012377999723, | |
| "epoch": 3.9183673469387754, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.1763237565755844, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0015, | |
| "num_tokens": 329827018.0, | |
| "reward": 11.842731773853302, | |
| "reward_std": 0.6917910370975733, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9177339598536491, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18734293011948466, | |
| "rewards/event_reward_fn/mean": 9.9873046875, | |
| "rewards/event_reward_fn/std": 6.566082060337067, | |
| "rewards/format_reward_fn/mean": 0.9376931414008141, | |
| "rewards/format_reward_fn/std": 0.17624951899051666, | |
| "step": 4032 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029296875, | |
| "completions/max_length": 253.25, | |
| "completions/max_terminated_length": 250.625, | |
| "completions/mean_length": 218.74609375, | |
| "completions/mean_terminated_length": 217.62857151031494, | |
| "completions/min_length": 182.8125, | |
| "completions/min_terminated_length": 182.8125, | |
| "entropy": 0.07676544087007642, | |
| "epoch": 3.933916423712342, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.2421189248561859, | |
| "learning_rate": 5e-05, | |
| "loss": 0.003, | |
| "num_tokens": 331133410.0, | |
| "reward": 11.190333425998688, | |
| "reward_std": 0.7282675616443157, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9304395839571953, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1906340589048341, | |
| "rewards/event_reward_fn/mean": 9.3154296875, | |
| "rewards/event_reward_fn/std": 5.540377572178841, | |
| "rewards/format_reward_fn/mean": 0.9444642849266529, | |
| "rewards/format_reward_fn/std": 0.1848340081050992, | |
| "step": 4048 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0556640625, | |
| "completions/max_length": 255.1875, | |
| "completions/max_terminated_length": 253.125, | |
| "completions/mean_length": 217.5888671875, | |
| "completions/mean_terminated_length": 215.29860401153564, | |
| "completions/min_length": 177.25, | |
| "completions/min_terminated_length": 177.25, | |
| "entropy": 0.07212572102434933, | |
| "epoch": 3.9494655004859087, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.10932140052318573, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 332477061.0, | |
| "reward": 12.174343943595886, | |
| "reward_std": 0.8341186344623566, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9223817475140095, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2118896566098556, | |
| "rewards/event_reward_fn/mean": 10.31640625, | |
| "rewards/event_reward_fn/std": 5.680862247943878, | |
| "rewards/format_reward_fn/mean": 0.9355558678507805, | |
| "rewards/format_reward_fn/std": 0.21579903922975063, | |
| "step": 4064 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.033203125, | |
| "completions/max_length": 254.6875, | |
| "completions/max_terminated_length": 251.5, | |
| "completions/mean_length": 215.8310546875, | |
| "completions/mean_terminated_length": 214.54155158996582, | |
| "completions/min_length": 171.75, | |
| "completions/min_terminated_length": 171.75, | |
| "entropy": 0.07576306629925966, | |
| "epoch": 3.9650145772594754, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.1289004385471344, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0046, | |
| "num_tokens": 333788636.0, | |
| "reward": 12.123865008354187, | |
| "reward_std": 0.7025708928704262, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9466467574238777, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1455282896058634, | |
| "rewards/event_reward_fn/mean": 10.212890625, | |
| "rewards/event_reward_fn/std": 5.390189632773399, | |
| "rewards/format_reward_fn/mean": 0.9643276371061802, | |
| "rewards/format_reward_fn/std": 0.14344790298491716, | |
| "step": 4080 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02734375, | |
| "completions/max_length": 253.5, | |
| "completions/max_terminated_length": 249.625, | |
| "completions/mean_length": 215.7744140625, | |
| "completions/mean_terminated_length": 214.6886749267578, | |
| "completions/min_length": 172.5, | |
| "completions/min_terminated_length": 172.5, | |
| "entropy": 0.08155694557353854, | |
| "epoch": 3.980563654033042, | |
| "frac_reward_zero_std": 0.33984375, | |
| "grad_norm": 0.10887642949819565, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 335104729.0, | |
| "reward": 12.040693879127502, | |
| "reward_std": 0.7290069870650768, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9345810934901237, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1722267406876199, | |
| "rewards/event_reward_fn/mean": 10.150390625, | |
| "rewards/event_reward_fn/std": 5.202703006565571, | |
| "rewards/format_reward_fn/mean": 0.9557221904397011, | |
| "rewards/format_reward_fn/std": 0.16959328716620803, | |
| "step": 4096 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0458984375, | |
| "completions/max_length": 254.375, | |
| "completions/max_terminated_length": 251.0, | |
| "completions/mean_length": 214.6845703125, | |
| "completions/mean_terminated_length": 212.69677925109863, | |
| "completions/min_length": 165.9375, | |
| "completions/min_terminated_length": 165.9375, | |
| "entropy": 0.08474897220730782, | |
| "epoch": 3.9961127308066082, | |
| "frac_reward_zero_std": 0.2421875, | |
| "grad_norm": 0.11697645485401154, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0037, | |
| "num_tokens": 336411186.0, | |
| "reward": 11.956246078014374, | |
| "reward_std": 0.9525406192988157, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9217490442097187, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17466088302899152, | |
| "rewards/event_reward_fn/mean": 10.0849609375, | |
| "rewards/event_reward_fn/std": 5.622701555490494, | |
| "rewards/format_reward_fn/mean": 0.9495361521840096, | |
| "rewards/format_reward_fn/std": 0.1639328496530652, | |
| "step": 4112 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029296875, | |
| "completions/max_length": 247.9375, | |
| "completions/max_terminated_length": 247.0625, | |
| "completions/mean_length": 208.591796875, | |
| "completions/mean_terminated_length": 207.1873264312744, | |
| "completions/min_length": 165.8125, | |
| "completions/min_terminated_length": 165.8125, | |
| "entropy": 0.08839736273512244, | |
| "epoch": 4.011661807580175, | |
| "frac_reward_zero_std": 0.31640625, | |
| "grad_norm": 0.11042279005050659, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0009, | |
| "num_tokens": 337786032.0, | |
| "reward": 11.83129894733429, | |
| "reward_std": 0.8202849626541138, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9457570128142834, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1432389054680243, | |
| "rewards/event_reward_fn/mean": 9.9228515625, | |
| "rewards/event_reward_fn/std": 6.496607750654221, | |
| "rewards/format_reward_fn/mean": 0.9626903533935547, | |
| "rewards/format_reward_fn/std": 0.1279599037952721, | |
| "step": 4128 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.017578125, | |
| "completions/max_length": 249.125, | |
| "completions/max_terminated_length": 246.25, | |
| "completions/mean_length": 205.5087890625, | |
| "completions/mean_terminated_length": 204.59263134002686, | |
| "completions/min_length": 160.125, | |
| "completions/min_terminated_length": 160.125, | |
| "entropy": 0.08152704173699021, | |
| "epoch": 4.0272108843537415, | |
| "frac_reward_zero_std": 0.375, | |
| "grad_norm": 0.23320095241069794, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0061, | |
| "num_tokens": 339108101.0, | |
| "reward": 11.968081533908844, | |
| "reward_std": 0.7537729293107986, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9513398185372353, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1318189318990335, | |
| "rewards/event_reward_fn/mean": 10.0517578125, | |
| "rewards/event_reward_fn/std": 5.673033118247986, | |
| "rewards/format_reward_fn/mean": 0.9649838842451572, | |
| "rewards/format_reward_fn/std": 0.12552043702453375, | |
| "step": 4144 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.048828125, | |
| "completions/max_length": 253.625, | |
| "completions/max_terminated_length": 249.9375, | |
| "completions/mean_length": 212.2314453125, | |
| "completions/mean_terminated_length": 210.16049671173096, | |
| "completions/min_length": 164.8125, | |
| "completions/min_terminated_length": 164.8125, | |
| "entropy": 0.08093675132840872, | |
| "epoch": 4.042759961127308, | |
| "frac_reward_zero_std": 0.265625, | |
| "grad_norm": 0.126747727394104, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0013, | |
| "num_tokens": 340463642.0, | |
| "reward": 11.930916666984558, | |
| "reward_std": 1.0069974604994059, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9255944900214672, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19054480863269418, | |
| "rewards/event_reward_fn/mean": 10.0576171875, | |
| "rewards/event_reward_fn/std": 5.881117805838585, | |
| "rewards/format_reward_fn/mean": 0.9477050788700581, | |
| "rewards/format_reward_fn/std": 0.1838087635114789, | |
| "step": 4160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 254.625, | |
| "completions/mean_length": 223.8154296875, | |
| "completions/mean_terminated_length": 217.5712661743164, | |
| "completions/min_length": 164.0625, | |
| "completions/min_terminated_length": 164.0625, | |
| "entropy": 0.07918633660301566, | |
| "epoch": 4.058309037900875, | |
| "frac_reward_zero_std": 0.24609375, | |
| "grad_norm": 0.3148137629032135, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0083, | |
| "num_tokens": 341802333.0, | |
| "reward": 12.038546562194824, | |
| "reward_std": 0.9452639296650887, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8190805651247501, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.34027846716344357, | |
| "rewards/event_reward_fn/mean": 10.375, | |
| "rewards/event_reward_fn/std": 5.990983292460442, | |
| "rewards/format_reward_fn/mean": 0.844466146081686, | |
| "rewards/format_reward_fn/std": 0.3460330106317997, | |
| "step": 4176 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.037109375, | |
| "completions/max_length": 253.1875, | |
| "completions/max_terminated_length": 249.6875, | |
| "completions/mean_length": 212.3359375, | |
| "completions/mean_terminated_length": 210.6906509399414, | |
| "completions/min_length": 165.5625, | |
| "completions/min_terminated_length": 165.5625, | |
| "entropy": 0.07842250727117062, | |
| "epoch": 4.073858114674441, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.1384952962398529, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 343114541.0, | |
| "reward": 12.094359815120697, | |
| "reward_std": 0.7967111878097057, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9312739335000515, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17515901010483503, | |
| "rewards/event_reward_fn/mean": 10.20703125, | |
| "rewards/event_reward_fn/std": 6.081012561917305, | |
| "rewards/format_reward_fn/mean": 0.9560546875, | |
| "rewards/format_reward_fn/std": 0.16031964495778084, | |
| "step": 4192 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0361328125, | |
| "completions/max_length": 254.1875, | |
| "completions/max_terminated_length": 249.9375, | |
| "completions/mean_length": 213.181640625, | |
| "completions/mean_terminated_length": 211.6773853302002, | |
| "completions/min_length": 171.3125, | |
| "completions/min_terminated_length": 171.3125, | |
| "entropy": 0.08099523605778813, | |
| "epoch": 4.089407191448008, | |
| "frac_reward_zero_std": 0.26953125, | |
| "grad_norm": 0.09165678173303604, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0015, | |
| "num_tokens": 344439075.0, | |
| "reward": 12.245625615119934, | |
| "reward_std": 0.8452774472534657, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9267520122230053, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18223469553049654, | |
| "rewards/event_reward_fn/mean": 10.365234375, | |
| "rewards/event_reward_fn/std": 6.175159931182861, | |
| "rewards/format_reward_fn/mean": 0.9536393284797668, | |
| "rewards/format_reward_fn/std": 0.17545001558028162, | |
| "step": 4208 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0263671875, | |
| "completions/max_length": 252.5, | |
| "completions/max_terminated_length": 248.75, | |
| "completions/mean_length": 211.978515625, | |
| "completions/mean_terminated_length": 210.79150772094727, | |
| "completions/min_length": 172.3125, | |
| "completions/min_terminated_length": 172.3125, | |
| "entropy": 0.07734930235892534, | |
| "epoch": 4.104956268221574, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.1310277283191681, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0012, | |
| "num_tokens": 345764597.0, | |
| "reward": 12.470105409622192, | |
| "reward_std": 0.7958364505320787, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9391484148800373, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.15891925140749663, | |
| "rewards/event_reward_fn/mean": 10.5673828125, | |
| "rewards/event_reward_fn/std": 5.292239099740982, | |
| "rewards/format_reward_fn/mean": 0.9635742194950581, | |
| "rewards/format_reward_fn/std": 0.13998021464794874, | |
| "step": 4224 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05859375, | |
| "completions/max_length": 255.4375, | |
| "completions/max_terminated_length": 252.25, | |
| "completions/mean_length": 215.884765625, | |
| "completions/mean_terminated_length": 213.35174751281738, | |
| "completions/min_length": 174.375, | |
| "completions/min_terminated_length": 174.375, | |
| "entropy": 0.08016611728817225, | |
| "epoch": 4.1205053449951405, | |
| "frac_reward_zero_std": 0.328125, | |
| "grad_norm": 0.16507475078105927, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0012, | |
| "num_tokens": 347241199.0, | |
| "reward": 12.658133804798126, | |
| "reward_std": 0.771758034825325, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9066168181598186, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.22669256868539378, | |
| "rewards/event_reward_fn/mean": 10.82421875, | |
| "rewards/event_reward_fn/std": 6.56681552529335, | |
| "rewards/format_reward_fn/mean": 0.9272981770336628, | |
| "rewards/format_reward_fn/std": 0.2251733886078, | |
| "step": 4240 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 255.0, | |
| "completions/max_terminated_length": 251.8125, | |
| "completions/mean_length": 213.9658203125, | |
| "completions/mean_terminated_length": 212.64927101135254, | |
| "completions/min_length": 169.25, | |
| "completions/min_terminated_length": 169.25, | |
| "entropy": 0.07921183155849576, | |
| "epoch": 4.136054421768708, | |
| "frac_reward_zero_std": 0.3828125, | |
| "grad_norm": 0.10566289722919464, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0023, | |
| "num_tokens": 348577168.0, | |
| "reward": 12.272885859012604, | |
| "reward_std": 0.759581271559, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9340510219335556, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18297056294977665, | |
| "rewards/event_reward_fn/mean": 10.3828125, | |
| "rewards/event_reward_fn/std": 6.081824213266373, | |
| "rewards/format_reward_fn/mean": 0.956022135913372, | |
| "rewards/format_reward_fn/std": 0.1737882625311613, | |
| "step": 4256 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0361328125, | |
| "completions/max_length": 254.375, | |
| "completions/max_terminated_length": 250.1875, | |
| "completions/mean_length": 212.3984375, | |
| "completions/mean_terminated_length": 210.8444414138794, | |
| "completions/min_length": 169.5, | |
| "completions/min_terminated_length": 169.5, | |
| "entropy": 0.08075638441368937, | |
| "epoch": 4.151603498542274, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.23219875991344452, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 349949360.0, | |
| "reward": 11.566748321056366, | |
| "reward_std": 0.6999896839261055, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9331243596971035, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1664692930644378, | |
| "rewards/event_reward_fn/mean": 9.6806640625, | |
| "rewards/event_reward_fn/std": 4.993004456162453, | |
| "rewards/format_reward_fn/mean": 0.952959917485714, | |
| "rewards/format_reward_fn/std": 0.15185340540483594, | |
| "step": 4272 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.033203125, | |
| "completions/max_length": 253.375, | |
| "completions/max_terminated_length": 251.0, | |
| "completions/mean_length": 209.7763671875, | |
| "completions/mean_terminated_length": 208.1946315765381, | |
| "completions/min_length": 167.75, | |
| "completions/min_terminated_length": 167.75, | |
| "entropy": 0.07447958691045642, | |
| "epoch": 4.167152575315841, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.11696634441614151, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0037, | |
| "num_tokens": 351294483.0, | |
| "reward": 12.276101768016815, | |
| "reward_std": 0.7893455550074577, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9404859393835068, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1471004862105474, | |
| "rewards/event_reward_fn/mean": 10.375, | |
| "rewards/event_reward_fn/std": 5.744745135307312, | |
| "rewards/format_reward_fn/mean": 0.9606158547103405, | |
| "rewards/format_reward_fn/std": 0.12628577766008675, | |
| "step": 4288 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02734375, | |
| "completions/max_length": 253.1875, | |
| "completions/max_terminated_length": 252.3125, | |
| "completions/mean_length": 213.9658203125, | |
| "completions/mean_terminated_length": 212.74925136566162, | |
| "completions/min_length": 172.25, | |
| "completions/min_terminated_length": 172.25, | |
| "entropy": 0.07558452151715755, | |
| "epoch": 4.182701652089407, | |
| "frac_reward_zero_std": 0.421875, | |
| "grad_norm": 0.09304752945899963, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 352616868.0, | |
| "reward": 12.646113216876984, | |
| "reward_std": 0.6985902674496174, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9403096325695515, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16551246301969513, | |
| "rewards/event_reward_fn/mean": 10.7451171875, | |
| "rewards/event_reward_fn/std": 6.137658327817917, | |
| "rewards/format_reward_fn/mean": 0.9606863856315613, | |
| "rewards/format_reward_fn/std": 0.15825087064877152, | |
| "step": 4304 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0400390625, | |
| "completions/max_length": 254.1875, | |
| "completions/max_terminated_length": 252.5625, | |
| "completions/mean_length": 215.220703125, | |
| "completions/mean_terminated_length": 213.58215522766113, | |
| "completions/min_length": 175.3125, | |
| "completions/min_terminated_length": 175.3125, | |
| "entropy": 0.08136412966996431, | |
| "epoch": 4.198250728862973, | |
| "frac_reward_zero_std": 0.37109375, | |
| "grad_norm": 0.11478164047002792, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0012, | |
| "num_tokens": 353947786.0, | |
| "reward": 12.772146999835968, | |
| "reward_std": 0.8985444996505976, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9437018856406212, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17041826335480437, | |
| "rewards/event_reward_fn/mean": 10.8681640625, | |
| "rewards/event_reward_fn/std": 6.595528960227966, | |
| "rewards/format_reward_fn/mean": 0.9602811932563782, | |
| "rewards/format_reward_fn/std": 0.16472775302827358, | |
| "step": 4320 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0673828125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 253.0, | |
| "completions/mean_length": 220.6962890625, | |
| "completions/mean_terminated_length": 218.14343070983887, | |
| "completions/min_length": 178.3125, | |
| "completions/min_terminated_length": 178.3125, | |
| "entropy": 0.08518477249890566, | |
| "epoch": 4.21379980563654, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.39661943912506104, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0026, | |
| "num_tokens": 355262159.0, | |
| "reward": 12.166186690330505, | |
| "reward_std": 0.8557635508477688, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9099063575267792, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.22038055513985455, | |
| "rewards/event_reward_fn/mean": 10.322265625, | |
| "rewards/event_reward_fn/std": 6.33206932246685, | |
| "rewards/format_reward_fn/mean": 0.9340147599577904, | |
| "rewards/format_reward_fn/std": 0.2138909688219428, | |
| "step": 4336 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0322265625, | |
| "completions/max_length": 254.125, | |
| "completions/max_terminated_length": 252.5, | |
| "completions/mean_length": 215.99609375, | |
| "completions/mean_terminated_length": 214.6772975921631, | |
| "completions/min_length": 171.6875, | |
| "completions/min_terminated_length": 171.6875, | |
| "entropy": 0.08633322129026055, | |
| "epoch": 4.229348882410107, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.08727186918258667, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 356636515.0, | |
| "reward": 12.367621660232544, | |
| "reward_std": 0.7627793811261654, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9262477792799473, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.19048181863036007, | |
| "rewards/event_reward_fn/mean": 10.494140625, | |
| "rewards/event_reward_fn/std": 5.73208275437355, | |
| "rewards/format_reward_fn/mean": 0.9472332261502743, | |
| "rewards/format_reward_fn/std": 0.18207533098757267, | |
| "step": 4352 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.021484375, | |
| "completions/max_length": 252.6875, | |
| "completions/max_terminated_length": 251.125, | |
| "completions/mean_length": 214.400390625, | |
| "completions/mean_terminated_length": 213.53761100769043, | |
| "completions/min_length": 171.1875, | |
| "completions/min_terminated_length": 171.1875, | |
| "entropy": 0.09359000297263265, | |
| "epoch": 4.244897959183674, | |
| "frac_reward_zero_std": 0.359375, | |
| "grad_norm": 0.207139253616333, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0012, | |
| "num_tokens": 357958357.0, | |
| "reward": 11.592573344707489, | |
| "reward_std": 0.7030898407101631, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9569450728595257, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1302037090063095, | |
| "rewards/event_reward_fn/mean": 9.658203125, | |
| "rewards/event_reward_fn/std": 5.755192518234253, | |
| "rewards/format_reward_fn/mean": 0.9774251356720924, | |
| "rewards/format_reward_fn/std": 0.11306497757323086, | |
| "step": 4368 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0478515625, | |
| "completions/max_length": 254.25, | |
| "completions/max_terminated_length": 252.875, | |
| "completions/mean_length": 218.54296875, | |
| "completions/mean_terminated_length": 216.6855926513672, | |
| "completions/min_length": 176.625, | |
| "completions/min_terminated_length": 176.625, | |
| "entropy": 0.09238096605986357, | |
| "epoch": 4.26044703595724, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.09042836725711823, | |
| "learning_rate": 5e-05, | |
| "loss": -0.005, | |
| "num_tokens": 359265841.0, | |
| "reward": 12.531767010688782, | |
| "reward_std": 0.789877756498754, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9228904247283936, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18061268841847777, | |
| "rewards/event_reward_fn/mean": 10.66015625, | |
| "rewards/event_reward_fn/std": 5.896639049053192, | |
| "rewards/format_reward_fn/mean": 0.9487202428281307, | |
| "rewards/format_reward_fn/std": 0.16975674428977072, | |
| "step": 4384 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0810546875, | |
| "completions/max_length": 254.75, | |
| "completions/max_terminated_length": 253.5, | |
| "completions/mean_length": 223.12109375, | |
| "completions/mean_terminated_length": 220.2361536026001, | |
| "completions/min_length": 183.375, | |
| "completions/min_terminated_length": 183.375, | |
| "entropy": 0.09400972304865718, | |
| "epoch": 4.275996112730807, | |
| "frac_reward_zero_std": 0.26171875, | |
| "grad_norm": 0.13734078407287598, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0066, | |
| "num_tokens": 360610689.0, | |
| "reward": 12.017096877098083, | |
| "reward_std": 0.8595972079783678, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8854949772357941, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.252409529639408, | |
| "rewards/event_reward_fn/mean": 10.2197265625, | |
| "rewards/event_reward_fn/std": 5.637563467025757, | |
| "rewards/format_reward_fn/mean": 0.9118753150105476, | |
| "rewards/format_reward_fn/std": 0.2438918575644493, | |
| "step": 4400 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0068359375, | |
| "completions/max_length": 249.25, | |
| "completions/max_terminated_length": 247.8125, | |
| "completions/mean_length": 207.8759765625, | |
| "completions/mean_terminated_length": 207.5603437423706, | |
| "completions/min_length": 174.75, | |
| "completions/min_terminated_length": 174.75, | |
| "entropy": 0.08496905583888292, | |
| "epoch": 4.291545189504373, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.09454280883073807, | |
| "learning_rate": 5e-05, | |
| "loss": -0.003, | |
| "num_tokens": 361880382.0, | |
| "reward": 11.80184918642044, | |
| "reward_std": 0.6977823339402676, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9602529257535934, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.10920671455096453, | |
| "rewards/event_reward_fn/mean": 9.8564453125, | |
| "rewards/event_reward_fn/std": 5.88802507519722, | |
| "rewards/format_reward_fn/mean": 0.9851508289575577, | |
| "rewards/format_reward_fn/std": 0.08428931841626763, | |
| "step": 4416 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 248.125, | |
| "completions/max_terminated_length": 246.375, | |
| "completions/mean_length": 209.8857421875, | |
| "completions/mean_terminated_length": 209.5314416885376, | |
| "completions/min_length": 176.8125, | |
| "completions/min_terminated_length": 176.8125, | |
| "entropy": 0.07903782045468688, | |
| "epoch": 4.3070942662779395, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.2290063053369522, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0031, | |
| "num_tokens": 363192181.0, | |
| "reward": 12.047638177871704, | |
| "reward_std": 0.7197975367307663, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9533804319798946, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13947686227038503, | |
| "rewards/event_reward_fn/mean": 10.1259765625, | |
| "rewards/event_reward_fn/std": 5.645702123641968, | |
| "rewards/format_reward_fn/mean": 0.9682812504470348, | |
| "rewards/format_reward_fn/std": 0.13028152799233794, | |
| "step": 4432 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0126953125, | |
| "completions/max_length": 250.625, | |
| "completions/max_terminated_length": 248.0625, | |
| "completions/mean_length": 211.4912109375, | |
| "completions/mean_terminated_length": 210.90715408325195, | |
| "completions/min_length": 173.6875, | |
| "completions/min_terminated_length": 173.6875, | |
| "entropy": 0.08106682682409883, | |
| "epoch": 4.3226433430515065, | |
| "frac_reward_zero_std": 0.3828125, | |
| "grad_norm": 0.12199271470308304, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0031, | |
| "num_tokens": 364506372.0, | |
| "reward": 12.547775268554688, | |
| "reward_std": 0.7325320076197386, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9543510787189007, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1384572609094903, | |
| "rewards/event_reward_fn/mean": 10.6259765625, | |
| "rewards/event_reward_fn/std": 6.172823116183281, | |
| "rewards/format_reward_fn/mean": 0.9674477651715279, | |
| "rewards/format_reward_fn/std": 0.13982101762667298, | |
| "step": 4448 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 250.75, | |
| "completions/max_terminated_length": 248.75, | |
| "completions/mean_length": 214.36328125, | |
| "completions/mean_terminated_length": 213.90539455413818, | |
| "completions/min_length": 178.3125, | |
| "completions/min_terminated_length": 178.3125, | |
| "entropy": 0.08011418208479881, | |
| "epoch": 4.338192419825073, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.10289853811264038, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 365784232.0, | |
| "reward": 12.01547396183014, | |
| "reward_std": 0.627776425331831, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.949002493172884, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.12272156146354973, | |
| "rewards/event_reward_fn/mean": 10.0888671875, | |
| "rewards/event_reward_fn/std": 5.482552140951157, | |
| "rewards/format_reward_fn/mean": 0.9776041693985462, | |
| "rewards/format_reward_fn/std": 0.09575722855515778, | |
| "step": 4464 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0146484375, | |
| "completions/max_length": 250.0625, | |
| "completions/max_terminated_length": 247.25, | |
| "completions/mean_length": 214.0712890625, | |
| "completions/mean_terminated_length": 213.4475040435791, | |
| "completions/min_length": 177.6875, | |
| "completions/min_terminated_length": 177.6875, | |
| "entropy": 0.08435638947412372, | |
| "epoch": 4.35374149659864, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.08181194961071014, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 367170541.0, | |
| "reward": 11.442171514034271, | |
| "reward_std": 0.7030144482851028, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9583823382854462, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.106479546520859, | |
| "rewards/event_reward_fn/mean": 9.4990234375, | |
| "rewards/event_reward_fn/std": 5.919656038284302, | |
| "rewards/format_reward_fn/mean": 0.9847656264901161, | |
| "rewards/format_reward_fn/std": 0.07690410828217864, | |
| "step": 4480 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025390625, | |
| "completions/max_length": 252.4375, | |
| "completions/max_terminated_length": 250.9375, | |
| "completions/mean_length": 215.8115234375, | |
| "completions/mean_terminated_length": 214.75897407531738, | |
| "completions/min_length": 180.4375, | |
| "completions/min_terminated_length": 180.4375, | |
| "entropy": 0.0830869055353105, | |
| "epoch": 4.369290573372206, | |
| "frac_reward_zero_std": 0.375, | |
| "grad_norm": 0.14057432115077972, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0004, | |
| "num_tokens": 368451128.0, | |
| "reward": 12.22743684053421, | |
| "reward_std": 0.7562771774828434, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9470355100929737, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1319723033811897, | |
| "rewards/event_reward_fn/mean": 10.3154296875, | |
| "rewards/event_reward_fn/std": 5.559557408094406, | |
| "rewards/format_reward_fn/mean": 0.9649716354906559, | |
| "rewards/format_reward_fn/std": 0.11026093480177224, | |
| "step": 4496 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029296875, | |
| "completions/max_length": 254.3125, | |
| "completions/max_terminated_length": 252.1875, | |
| "completions/mean_length": 219.9169921875, | |
| "completions/mean_terminated_length": 218.84753227233887, | |
| "completions/min_length": 177.5, | |
| "completions/min_terminated_length": 177.5, | |
| "entropy": 0.0869236602447927, | |
| "epoch": 4.384839650145772, | |
| "frac_reward_zero_std": 0.3515625, | |
| "grad_norm": 0.12595002353191376, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 369721867.0, | |
| "reward": 11.16374546289444, | |
| "reward_std": 0.7131532970815897, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9536585137248039, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13799344294238836, | |
| "rewards/event_reward_fn/mean": 9.2431640625, | |
| "rewards/event_reward_fn/std": 4.887677401304245, | |
| "rewards/format_reward_fn/mean": 0.9669227488338947, | |
| "rewards/format_reward_fn/std": 0.1246087858453393, | |
| "step": 4512 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1357421875, | |
| "completions/max_length": 255.5, | |
| "completions/max_terminated_length": 253.875, | |
| "completions/mean_length": 225.7529296875, | |
| "completions/mean_terminated_length": 221.23834419250488, | |
| "completions/min_length": 174.875, | |
| "completions/min_terminated_length": 174.875, | |
| "entropy": 0.08455759705975652, | |
| "epoch": 4.400388726919339, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.0808541551232338, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0018, | |
| "num_tokens": 371102210.0, | |
| "reward": 11.924792170524597, | |
| "reward_std": 0.7625311650335789, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.858902994543314, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3007725060451776, | |
| "rewards/event_reward_fn/mean": 10.1962890625, | |
| "rewards/event_reward_fn/std": 5.602404475212097, | |
| "rewards/format_reward_fn/mean": 0.869599923491478, | |
| "rewards/format_reward_fn/std": 0.3008687235414982, | |
| "step": 4528 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.048828125, | |
| "completions/max_length": 253.75, | |
| "completions/max_terminated_length": 252.0625, | |
| "completions/mean_length": 220.9267578125, | |
| "completions/mean_terminated_length": 219.16146183013916, | |
| "completions/min_length": 178.9375, | |
| "completions/min_terminated_length": 178.9375, | |
| "entropy": 0.08046228950843215, | |
| "epoch": 4.415937803692906, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.09979816526174545, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0016, | |
| "num_tokens": 372432039.0, | |
| "reward": 12.132369935512543, | |
| "reward_std": 0.7911638263612986, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9381347000598907, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.184728623367846, | |
| "rewards/event_reward_fn/mean": 10.2431640625, | |
| "rewards/event_reward_fn/std": 6.270698189735413, | |
| "rewards/format_reward_fn/mean": 0.9510712176561356, | |
| "rewards/format_reward_fn/std": 0.17310465592890978, | |
| "step": 4544 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 250.5, | |
| "completions/max_terminated_length": 249.5, | |
| "completions/mean_length": 212.8916015625, | |
| "completions/mean_terminated_length": 212.41744136810303, | |
| "completions/min_length": 170.1875, | |
| "completions/min_terminated_length": 170.1875, | |
| "entropy": 0.07715298281982541, | |
| "epoch": 4.431486880466473, | |
| "frac_reward_zero_std": 0.43359375, | |
| "grad_norm": 0.12553343176841736, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0034, | |
| "num_tokens": 373759536.0, | |
| "reward": 11.972736299037933, | |
| "reward_std": 0.7828308828175068, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9649507515132427, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.10414338018745184, | |
| "rewards/event_reward_fn/mean": 10.0283203125, | |
| "rewards/event_reward_fn/std": 5.264572739601135, | |
| "rewards/format_reward_fn/mean": 0.9794652201235294, | |
| "rewards/format_reward_fn/std": 0.09091756423003972, | |
| "step": 4560 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0234375, | |
| "completions/max_length": 252.5, | |
| "completions/max_terminated_length": 246.875, | |
| "completions/mean_length": 211.771484375, | |
| "completions/mean_terminated_length": 210.6889190673828, | |
| "completions/min_length": 175.1875, | |
| "completions/min_terminated_length": 175.1875, | |
| "entropy": 0.0748588687274605, | |
| "epoch": 4.447035957240039, | |
| "frac_reward_zero_std": 0.38671875, | |
| "grad_norm": 0.14217767119407654, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0013, | |
| "num_tokens": 375162258.0, | |
| "reward": 12.27553415298462, | |
| "reward_std": 0.8959056548774242, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9571230597794056, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13766769837820902, | |
| "rewards/event_reward_fn/mean": 10.3515625, | |
| "rewards/event_reward_fn/std": 5.9426403641700745, | |
| "rewards/format_reward_fn/mean": 0.966848649084568, | |
| "rewards/format_reward_fn/std": 0.14077154966071248, | |
| "step": 4576 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0146484375, | |
| "completions/max_length": 251.75, | |
| "completions/max_terminated_length": 250.0, | |
| "completions/mean_length": 214.3134765625, | |
| "completions/mean_terminated_length": 213.71609592437744, | |
| "completions/min_length": 172.4375, | |
| "completions/min_terminated_length": 172.4375, | |
| "entropy": 0.08040324132889509, | |
| "epoch": 4.462585034013605, | |
| "frac_reward_zero_std": 0.37890625, | |
| "grad_norm": 0.10610129684209824, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0025, | |
| "num_tokens": 376386051.0, | |
| "reward": 11.649299383163452, | |
| "reward_std": 0.7246943525969982, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9564645774662495, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.128838874574285, | |
| "rewards/event_reward_fn/mean": 9.7177734375, | |
| "rewards/event_reward_fn/std": 4.996019497513771, | |
| "rewards/format_reward_fn/mean": 0.9750613868236542, | |
| "rewards/format_reward_fn/std": 0.10886701662093401, | |
| "step": 4592 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0234375, | |
| "completions/max_length": 253.75, | |
| "completions/max_terminated_length": 252.25, | |
| "completions/mean_length": 217.634765625, | |
| "completions/mean_terminated_length": 216.70848751068115, | |
| "completions/min_length": 180.125, | |
| "completions/min_terminated_length": 180.125, | |
| "entropy": 0.0827216855250299, | |
| "epoch": 4.478134110787172, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.06507635116577148, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0014, | |
| "num_tokens": 377674649.0, | |
| "reward": 12.131418943405151, | |
| "reward_std": 0.6769323218613863, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9498664774000645, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13098848913796246, | |
| "rewards/event_reward_fn/mean": 10.2109375, | |
| "rewards/event_reward_fn/std": 5.762604504823685, | |
| "rewards/format_reward_fn/mean": 0.9706148952245712, | |
| "rewards/format_reward_fn/std": 0.10980169754475355, | |
| "step": 4608 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0263671875, | |
| "completions/max_length": 254.3125, | |
| "completions/max_terminated_length": 250.375, | |
| "completions/mean_length": 216.69921875, | |
| "completions/mean_terminated_length": 215.64843940734863, | |
| "completions/min_length": 181.5625, | |
| "completions/min_terminated_length": 181.5625, | |
| "entropy": 0.08147751213982701, | |
| "epoch": 4.493683187560738, | |
| "frac_reward_zero_std": 0.27734375, | |
| "grad_norm": 0.09443672001361847, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0015, | |
| "num_tokens": 379054909.0, | |
| "reward": 12.458342015743256, | |
| "reward_std": 0.8503458648920059, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9344224706292152, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16761525836773217, | |
| "rewards/event_reward_fn/mean": 10.5703125, | |
| "rewards/event_reward_fn/std": 6.101438790559769, | |
| "rewards/format_reward_fn/mean": 0.9536067768931389, | |
| "rewards/format_reward_fn/std": 0.15860154060646892, | |
| "step": 4624 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0078125, | |
| "completions/max_length": 247.625, | |
| "completions/max_terminated_length": 245.9375, | |
| "completions/mean_length": 211.0009765625, | |
| "completions/mean_terminated_length": 210.65614318847656, | |
| "completions/min_length": 179.6875, | |
| "completions/min_terminated_length": 179.6875, | |
| "entropy": 0.08105296548455954, | |
| "epoch": 4.5092322643343055, | |
| "frac_reward_zero_std": 0.2734375, | |
| "grad_norm": 0.13886581361293793, | |
| "learning_rate": 5e-05, | |
| "loss": -0.003, | |
| "num_tokens": 380383922.0, | |
| "reward": 11.8076793551445, | |
| "reward_std": 0.8404425587505102, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9563166238367558, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.10713632625993341, | |
| "rewards/event_reward_fn/mean": 9.876953125, | |
| "rewards/event_reward_fn/std": 5.32866133749485, | |
| "rewards/format_reward_fn/mean": 0.974409569054842, | |
| "rewards/format_reward_fn/std": 0.09941133600659668, | |
| "step": 4640 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.005859375, | |
| "completions/max_length": 244.9375, | |
| "completions/max_terminated_length": 243.75, | |
| "completions/mean_length": 210.9580078125, | |
| "completions/mean_terminated_length": 210.7022762298584, | |
| "completions/min_length": 176.9375, | |
| "completions/min_terminated_length": 176.9375, | |
| "entropy": 0.07536840462125838, | |
| "epoch": 4.524781341107872, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.12522609531879425, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0024, | |
| "num_tokens": 381693591.0, | |
| "reward": 11.885925889015198, | |
| "reward_std": 0.6851696334779263, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9520582780241966, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.12435426318552345, | |
| "rewards/event_reward_fn/mean": 9.9599609375, | |
| "rewards/event_reward_fn/std": 5.595930874347687, | |
| "rewards/format_reward_fn/mean": 0.9739067144691944, | |
| "rewards/format_reward_fn/std": 0.11203544959425926, | |
| "step": 4656 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.02734375, | |
| "completions/max_length": 253.3125, | |
| "completions/max_terminated_length": 249.125, | |
| "completions/mean_length": 215.0244140625, | |
| "completions/mean_terminated_length": 213.8462839126587, | |
| "completions/min_length": 169.6875, | |
| "completions/min_terminated_length": 169.6875, | |
| "entropy": 0.0726128879468888, | |
| "epoch": 4.540330417881439, | |
| "frac_reward_zero_std": 0.4140625, | |
| "grad_norm": 0.08706779778003693, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0018, | |
| "num_tokens": 382994364.0, | |
| "reward": 12.469999372959137, | |
| "reward_std": 0.6653014570474625, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9453453533351421, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14407718984875828, | |
| "rewards/event_reward_fn/mean": 10.560546875, | |
| "rewards/event_reward_fn/std": 5.933085352182388, | |
| "rewards/format_reward_fn/mean": 0.9641071446239948, | |
| "rewards/format_reward_fn/std": 0.13372006034478545, | |
| "step": 4672 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0390625, | |
| "completions/max_length": 255.625, | |
| "completions/max_terminated_length": 252.3125, | |
| "completions/mean_length": 220.556640625, | |
| "completions/mean_terminated_length": 219.14645195007324, | |
| "completions/min_length": 177.5625, | |
| "completions/min_terminated_length": 177.5625, | |
| "entropy": 0.07643542671576142, | |
| "epoch": 4.555879494655005, | |
| "frac_reward_zero_std": 0.37109375, | |
| "grad_norm": 0.11401298642158508, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0008, | |
| "num_tokens": 384316582.0, | |
| "reward": 11.956826388835907, | |
| "reward_std": 0.752917755395174, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9352249316871166, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17955804523080587, | |
| "rewards/event_reward_fn/mean": 10.072265625, | |
| "rewards/event_reward_fn/std": 5.650606602430344, | |
| "rewards/format_reward_fn/mean": 0.949335940182209, | |
| "rewards/format_reward_fn/std": 0.18370232917368412, | |
| "step": 4688 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0419921875, | |
| "completions/max_length": 255.125, | |
| "completions/max_terminated_length": 252.5625, | |
| "completions/mean_length": 220.98828125, | |
| "completions/mean_terminated_length": 219.46258068084717, | |
| "completions/min_length": 176.75, | |
| "completions/min_terminated_length": 176.75, | |
| "entropy": 0.08043610630556941, | |
| "epoch": 4.571428571428571, | |
| "frac_reward_zero_std": 0.32421875, | |
| "grad_norm": 0.1541932076215744, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0023, | |
| "num_tokens": 385619530.0, | |
| "reward": 12.387038767337799, | |
| "reward_std": 0.9704460687935352, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9340765401721001, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17475787783041596, | |
| "rewards/event_reward_fn/mean": 10.5029296875, | |
| "rewards/event_reward_fn/std": 5.490474209189415, | |
| "rewards/format_reward_fn/mean": 0.9500325620174408, | |
| "rewards/format_reward_fn/std": 0.17480701440945268, | |
| "step": 4704 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0244140625, | |
| "completions/max_length": 254.5625, | |
| "completions/max_terminated_length": 253.25, | |
| "completions/mean_length": 219.203125, | |
| "completions/mean_terminated_length": 218.32938385009766, | |
| "completions/min_length": 173.25, | |
| "completions/min_terminated_length": 173.25, | |
| "entropy": 0.07852176204323769, | |
| "epoch": 4.586977648202138, | |
| "frac_reward_zero_std": 0.30859375, | |
| "grad_norm": 0.154206320643425, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0036, | |
| "num_tokens": 386950750.0, | |
| "reward": 12.324360966682434, | |
| "reward_std": 0.8198084980249405, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9449009336531162, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.12958236644044518, | |
| "rewards/event_reward_fn/mean": 10.4111328125, | |
| "rewards/event_reward_fn/std": 5.862970620393753, | |
| "rewards/format_reward_fn/mean": 0.9683272950351238, | |
| "rewards/format_reward_fn/std": 0.1171579877845943, | |
| "step": 4720 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.1611328125, | |
| "completions/max_length": 255.875, | |
| "completions/max_terminated_length": 254.8125, | |
| "completions/mean_length": 229.4404296875, | |
| "completions/mean_terminated_length": 224.22948551177979, | |
| "completions/min_length": 182.875, | |
| "completions/min_terminated_length": 182.875, | |
| "entropy": 0.08241429831832647, | |
| "epoch": 4.6025267249757045, | |
| "frac_reward_zero_std": 0.27734375, | |
| "grad_norm": 0.07539704442024231, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0041, | |
| "num_tokens": 388299461.0, | |
| "reward": 11.465377151966095, | |
| "reward_std": 0.8274618536233902, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.827308963984251, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3203687067143619, | |
| "rewards/event_reward_fn/mean": 9.7841796875, | |
| "rewards/event_reward_fn/std": 5.850812315940857, | |
| "rewards/format_reward_fn/mean": 0.8538884222507477, | |
| "rewards/format_reward_fn/std": 0.3226360958069563, | |
| "step": 4736 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.046875, | |
| "completions/max_length": 254.125, | |
| "completions/max_terminated_length": 251.5625, | |
| "completions/mean_length": 218.27734375, | |
| "completions/mean_terminated_length": 216.53139400482178, | |
| "completions/min_length": 176.125, | |
| "completions/min_terminated_length": 176.125, | |
| "entropy": 0.08156100008636713, | |
| "epoch": 4.618075801749271, | |
| "frac_reward_zero_std": 0.34375, | |
| "grad_norm": 0.16608324646949768, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0028, | |
| "num_tokens": 389634989.0, | |
| "reward": 11.895731985569, | |
| "reward_std": 0.7938553486019373, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9349915757775307, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1642955782590434, | |
| "rewards/event_reward_fn/mean": 10.0078125, | |
| "rewards/event_reward_fn/std": 5.811924383044243, | |
| "rewards/format_reward_fn/mean": 0.952927827835083, | |
| "rewards/format_reward_fn/std": 0.16472134506329894, | |
| "step": 4752 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0029296875, | |
| "completions/max_length": 242.6875, | |
| "completions/max_terminated_length": 240.75, | |
| "completions/mean_length": 204.7666015625, | |
| "completions/mean_terminated_length": 204.61684799194336, | |
| "completions/min_length": 162.8125, | |
| "completions/min_terminated_length": 162.8125, | |
| "entropy": 0.07700010249391198, | |
| "epoch": 4.633624878522838, | |
| "frac_reward_zero_std": 0.33984375, | |
| "grad_norm": 0.09481830894947052, | |
| "learning_rate": 5e-05, | |
| "loss": -0.004, | |
| "num_tokens": 390946810.0, | |
| "reward": 11.631008863449097, | |
| "reward_std": 0.7731746193021536, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9748496301472187, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.060335727874189615, | |
| "rewards/event_reward_fn/mean": 9.6630859375, | |
| "rewards/event_reward_fn/std": 5.950529634952545, | |
| "rewards/format_reward_fn/mean": 0.9930733852088451, | |
| "rewards/format_reward_fn/std": 0.03480626177042723, | |
| "step": 4768 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.001953125, | |
| "completions/max_length": 240.5, | |
| "completions/max_terminated_length": 240.25, | |
| "completions/mean_length": 204.19921875, | |
| "completions/mean_terminated_length": 204.1085557937622, | |
| "completions/min_length": 163.8125, | |
| "completions/min_terminated_length": 163.8125, | |
| "entropy": 0.0720194885507226, | |
| "epoch": 4.649173955296404, | |
| "frac_reward_zero_std": 0.42578125, | |
| "grad_norm": 0.24066881835460663, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0053, | |
| "num_tokens": 392285786.0, | |
| "reward": 11.837441265583038, | |
| "reward_std": 0.6970504522323608, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9701077155768871, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.06552095845108852, | |
| "rewards/event_reward_fn/mean": 9.8798828125, | |
| "rewards/event_reward_fn/std": 6.1304861307144165, | |
| "rewards/format_reward_fn/mean": 0.987450860440731, | |
| "rewards/format_reward_fn/std": 0.050714970799162984, | |
| "step": 4784 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0068359375, | |
| "completions/max_length": 247.6875, | |
| "completions/max_terminated_length": 245.4375, | |
| "completions/mean_length": 207.455078125, | |
| "completions/mean_terminated_length": 207.11551570892334, | |
| "completions/min_length": 166.0, | |
| "completions/min_terminated_length": 166.0, | |
| "entropy": 0.06734136585146189, | |
| "epoch": 4.664723032069971, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.24333445727825165, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 393620732.0, | |
| "reward": 11.832262814044952, | |
| "reward_std": 0.7198988972231746, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9663410037755966, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.10767719999421388, | |
| "rewards/event_reward_fn/mean": 9.888671875, | |
| "rewards/event_reward_fn/std": 6.22281976044178, | |
| "rewards/format_reward_fn/mean": 0.9772499725222588, | |
| "rewards/format_reward_fn/std": 0.09165127645246685, | |
| "step": 4800 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.013671875, | |
| "completions/max_length": 244.5625, | |
| "completions/max_terminated_length": 241.875, | |
| "completions/mean_length": 206.2265625, | |
| "completions/mean_terminated_length": 205.5375509262085, | |
| "completions/min_length": 168.4375, | |
| "completions/min_terminated_length": 168.4375, | |
| "entropy": 0.068746835924685, | |
| "epoch": 4.680272108843537, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.13866840302944183, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0036, | |
| "num_tokens": 394912384.0, | |
| "reward": 12.363645255565643, | |
| "reward_std": 0.6785086588934064, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9618904925882816, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.111503601889126, | |
| "rewards/event_reward_fn/mean": 10.42578125, | |
| "rewards/event_reward_fn/std": 5.934814959764481, | |
| "rewards/format_reward_fn/mean": 0.9759734608232975, | |
| "rewards/format_reward_fn/std": 0.08922615088522434, | |
| "step": 4816 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0107421875, | |
| "completions/max_length": 247.75, | |
| "completions/max_terminated_length": 243.8125, | |
| "completions/mean_length": 207.9228515625, | |
| "completions/mean_terminated_length": 207.40814781188965, | |
| "completions/min_length": 171.5, | |
| "completions/min_terminated_length": 171.5, | |
| "entropy": 0.0720445194747299, | |
| "epoch": 4.695821185617104, | |
| "frac_reward_zero_std": 0.40234375, | |
| "grad_norm": 0.15189078450202942, | |
| "learning_rate": 5e-05, | |
| "loss": 0.001, | |
| "num_tokens": 396201629.0, | |
| "reward": 12.15940910577774, | |
| "reward_std": 0.7553090676665306, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9640238769352436, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.10036803607363254, | |
| "rewards/event_reward_fn/mean": 10.22265625, | |
| "rewards/event_reward_fn/std": 5.967520788311958, | |
| "rewards/format_reward_fn/mean": 0.9727289602160454, | |
| "rewards/format_reward_fn/std": 0.09985992661677301, | |
| "step": 4832 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.009765625, | |
| "completions/max_length": 247.25, | |
| "completions/max_terminated_length": 246.3125, | |
| "completions/mean_length": 208.2490234375, | |
| "completions/mean_terminated_length": 207.7828426361084, | |
| "completions/min_length": 171.5, | |
| "completions/min_terminated_length": 171.5, | |
| "entropy": 0.0726703389082104, | |
| "epoch": 4.711370262390671, | |
| "frac_reward_zero_std": 0.4140625, | |
| "grad_norm": 0.12212812900543213, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 397504832.0, | |
| "reward": 11.704569518566132, | |
| "reward_std": 0.8146627731621265, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9750679209828377, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.06717803853098303, | |
| "rewards/event_reward_fn/mean": 9.7451171875, | |
| "rewards/event_reward_fn/std": 5.367679685354233, | |
| "rewards/format_reward_fn/mean": 0.9843843020498753, | |
| "rewards/format_reward_fn/std": 0.061483025085181, | |
| "step": 4848 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.015625, | |
| "completions/max_length": 250.0, | |
| "completions/max_terminated_length": 246.6875, | |
| "completions/mean_length": 208.2158203125, | |
| "completions/mean_terminated_length": 207.4158697128296, | |
| "completions/min_length": 169.8125, | |
| "completions/min_terminated_length": 169.8125, | |
| "entropy": 0.07577541843056679, | |
| "epoch": 4.726919339164237, | |
| "frac_reward_zero_std": 0.40234375, | |
| "grad_norm": 0.13794957101345062, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0009, | |
| "num_tokens": 398863225.0, | |
| "reward": 11.808978796005249, | |
| "reward_std": 0.94271419942379, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9500653333961964, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1544017958221957, | |
| "rewards/event_reward_fn/mean": 9.9072265625, | |
| "rewards/event_reward_fn/std": 5.786400109529495, | |
| "rewards/format_reward_fn/mean": 0.9516868181526661, | |
| "rewards/format_reward_fn/std": 0.16639976995065808, | |
| "step": 4864 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.013671875, | |
| "completions/max_length": 248.3125, | |
| "completions/max_terminated_length": 245.3125, | |
| "completions/mean_length": 208.3583984375, | |
| "completions/mean_terminated_length": 207.6431589126587, | |
| "completions/min_length": 172.25, | |
| "completions/min_terminated_length": 172.25, | |
| "entropy": 0.07559068105183542, | |
| "epoch": 4.742468415937804, | |
| "frac_reward_zero_std": 0.4375, | |
| "grad_norm": 0.16644462943077087, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0009, | |
| "num_tokens": 400194388.0, | |
| "reward": 12.06817501783371, | |
| "reward_std": 0.8118875231593847, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9551199078559875, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1355801696772687, | |
| "rewards/event_reward_fn/mean": 10.15234375, | |
| "rewards/event_reward_fn/std": 6.247093990445137, | |
| "rewards/format_reward_fn/mean": 0.9607114940881729, | |
| "rewards/format_reward_fn/std": 0.13486498198471963, | |
| "step": 4880 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 249.8125, | |
| "completions/max_terminated_length": 245.25, | |
| "completions/mean_length": 208.1708984375, | |
| "completions/mean_terminated_length": 207.60796546936035, | |
| "completions/min_length": 172.5, | |
| "completions/min_terminated_length": 172.5, | |
| "entropy": 0.07368506025522947, | |
| "epoch": 4.75801749271137, | |
| "frac_reward_zero_std": 0.42578125, | |
| "grad_norm": 0.09005624800920486, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0009, | |
| "num_tokens": 401506939.0, | |
| "reward": 12.930509805679321, | |
| "reward_std": 0.7797500379383564, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9727158024907112, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.09574340214021504, | |
| "rewards/event_reward_fn/mean": 10.9794921875, | |
| "rewards/event_reward_fn/std": 6.308150812983513, | |
| "rewards/format_reward_fn/mean": 0.9783018380403519, | |
| "rewards/format_reward_fn/std": 0.09503353573381901, | |
| "step": 4896 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.013671875, | |
| "completions/max_length": 248.125, | |
| "completions/max_terminated_length": 246.9375, | |
| "completions/mean_length": 212.078125, | |
| "completions/mean_terminated_length": 211.53092288970947, | |
| "completions/min_length": 175.0625, | |
| "completions/min_terminated_length": 175.0625, | |
| "entropy": 0.08496078243479133, | |
| "epoch": 4.773566569484937, | |
| "frac_reward_zero_std": 0.35546875, | |
| "grad_norm": 0.1415478140115738, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0029, | |
| "num_tokens": 402830559.0, | |
| "reward": 11.695642411708832, | |
| "reward_std": 0.9189990721642971, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9574025720357895, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1232752677751705, | |
| "rewards/event_reward_fn/mean": 9.7783203125, | |
| "rewards/event_reward_fn/std": 5.1400560438632965, | |
| "rewards/format_reward_fn/mean": 0.9599194973707199, | |
| "rewards/format_reward_fn/std": 0.13297926378436387, | |
| "step": 4912 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0380859375, | |
| "completions/max_length": 253.5, | |
| "completions/max_terminated_length": 251.5625, | |
| "completions/mean_length": 216.451171875, | |
| "completions/mean_terminated_length": 214.94143295288086, | |
| "completions/min_length": 179.0625, | |
| "completions/min_terminated_length": 179.0625, | |
| "entropy": 0.08813147945329547, | |
| "epoch": 4.789115646258503, | |
| "frac_reward_zero_std": 0.3203125, | |
| "grad_norm": 0.07135059684515, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0027, | |
| "num_tokens": 404200177.0, | |
| "reward": 12.266283452510834, | |
| "reward_std": 0.8791834656149149, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9312233664095402, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2005148883908987, | |
| "rewards/event_reward_fn/mean": 10.400390625, | |
| "rewards/event_reward_fn/std": 6.1433481723070145, | |
| "rewards/format_reward_fn/mean": 0.9346696473658085, | |
| "rewards/format_reward_fn/std": 0.20454937545582652, | |
| "step": 4928 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.025390625, | |
| "completions/max_length": 251.75, | |
| "completions/max_terminated_length": 249.375, | |
| "completions/mean_length": 214.8701171875, | |
| "completions/mean_terminated_length": 213.78954410552979, | |
| "completions/min_length": 175.6875, | |
| "completions/min_terminated_length": 175.6875, | |
| "entropy": 0.08725593006238341, | |
| "epoch": 4.80466472303207, | |
| "frac_reward_zero_std": 0.3671875, | |
| "grad_norm": 0.14082126319408417, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0043, | |
| "num_tokens": 405533868.0, | |
| "reward": 12.019694983959198, | |
| "reward_std": 0.8072663694620132, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9528482407331467, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1505628222366795, | |
| "rewards/event_reward_fn/mean": 10.111328125, | |
| "rewards/event_reward_fn/std": 5.826146110892296, | |
| "rewards/format_reward_fn/mean": 0.9555186629295349, | |
| "rewards/format_reward_fn/std": 0.16073728911578655, | |
| "step": 4944 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029296875, | |
| "completions/max_length": 253.5625, | |
| "completions/max_terminated_length": 249.375, | |
| "completions/mean_length": 216.126953125, | |
| "completions/mean_terminated_length": 214.97348499298096, | |
| "completions/min_length": 179.25, | |
| "completions/min_terminated_length": 179.25, | |
| "entropy": 0.07998230727389455, | |
| "epoch": 4.820213799805637, | |
| "frac_reward_zero_std": 0.38671875, | |
| "grad_norm": 0.13172361254692078, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 406892362.0, | |
| "reward": 12.584176301956177, | |
| "reward_std": 0.8060983493924141, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9421436227858067, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17349553992971778, | |
| "rewards/event_reward_fn/mean": 10.697265625, | |
| "rewards/event_reward_fn/std": 6.188347667455673, | |
| "rewards/format_reward_fn/mean": 0.944767028093338, | |
| "rewards/format_reward_fn/std": 0.17709117522463202, | |
| "step": 4960 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0146484375, | |
| "completions/max_length": 249.0625, | |
| "completions/max_terminated_length": 248.5, | |
| "completions/mean_length": 211.833984375, | |
| "completions/mean_terminated_length": 211.20284271240234, | |
| "completions/min_length": 171.625, | |
| "completions/min_terminated_length": 171.625, | |
| "entropy": 0.07674705190584064, | |
| "epoch": 4.835762876579203, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.27421054244041443, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 408234032.0, | |
| "reward": 12.206626057624817, | |
| "reward_std": 0.839366152882576, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9551889784634113, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.12793191766832024, | |
| "rewards/event_reward_fn/mean": 10.2958984375, | |
| "rewards/event_reward_fn/std": 5.720379784703255, | |
| "rewards/format_reward_fn/mean": 0.955538809299469, | |
| "rewards/format_reward_fn/std": 0.13686896581202745, | |
| "step": 4976 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0439453125, | |
| "completions/max_length": 253.875, | |
| "completions/max_terminated_length": 251.25, | |
| "completions/mean_length": 219.1953125, | |
| "completions/mean_terminated_length": 217.51319122314453, | |
| "completions/min_length": 177.875, | |
| "completions/min_terminated_length": 177.875, | |
| "entropy": 0.0816301996819675, | |
| "epoch": 4.85131195335277, | |
| "frac_reward_zero_std": 0.41015625, | |
| "grad_norm": 0.18152131140232086, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0008, | |
| "num_tokens": 409579660.0, | |
| "reward": 11.989133656024933, | |
| "reward_std": 0.8817657474428415, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.932769563049078, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18802405067253858, | |
| "rewards/event_reward_fn/mean": 10.119140625, | |
| "rewards/event_reward_fn/std": 5.9855871349573135, | |
| "rewards/format_reward_fn/mean": 0.9372236467897892, | |
| "rewards/format_reward_fn/std": 0.1983571257442236, | |
| "step": 4992 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 254.4375, | |
| "completions/max_terminated_length": 252.1875, | |
| "completions/mean_length": 217.8271484375, | |
| "completions/mean_terminated_length": 216.6529426574707, | |
| "completions/min_length": 175.625, | |
| "completions/min_terminated_length": 175.625, | |
| "entropy": 0.07996702333912253, | |
| "epoch": 4.866861030126336, | |
| "frac_reward_zero_std": 0.41015625, | |
| "grad_norm": 0.11488137394189835, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 410859759.0, | |
| "reward": 11.951142311096191, | |
| "reward_std": 0.7472913395613432, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9457512833178043, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17608064785599709, | |
| "rewards/event_reward_fn/mean": 10.0546875, | |
| "rewards/event_reward_fn/std": 5.59067901968956, | |
| "rewards/format_reward_fn/mean": 0.950703427195549, | |
| "rewards/format_reward_fn/std": 0.17153813573531806, | |
| "step": 5008 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0244140625, | |
| "completions/max_length": 253.0, | |
| "completions/max_terminated_length": 250.0625, | |
| "completions/mean_length": 213.92578125, | |
| "completions/mean_terminated_length": 212.86985301971436, | |
| "completions/min_length": 169.25, | |
| "completions/min_terminated_length": 169.25, | |
| "entropy": 0.07613265956752002, | |
| "epoch": 4.882410106899902, | |
| "frac_reward_zero_std": 0.46484375, | |
| "grad_norm": 0.15658670663833618, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0075, | |
| "num_tokens": 412146683.0, | |
| "reward": 11.895162045955658, | |
| "reward_std": 0.7129523921757936, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9551772475242615, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1298921147827059, | |
| "rewards/event_reward_fn/mean": 9.9853515625, | |
| "rewards/event_reward_fn/std": 5.476163282990456, | |
| "rewards/format_reward_fn/mean": 0.9546333625912666, | |
| "rewards/format_reward_fn/std": 0.14684197306632996, | |
| "step": 5024 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0205078125, | |
| "completions/max_length": 252.0625, | |
| "completions/max_terminated_length": 250.0625, | |
| "completions/mean_length": 210.5185546875, | |
| "completions/mean_terminated_length": 209.6259593963623, | |
| "completions/min_length": 170.0, | |
| "completions/min_terminated_length": 170.0, | |
| "entropy": 0.07199173117987812, | |
| "epoch": 4.8979591836734695, | |
| "frac_reward_zero_std": 0.4453125, | |
| "grad_norm": 0.21756106615066528, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0021, | |
| "num_tokens": 413506590.0, | |
| "reward": 12.365760207176208, | |
| "reward_std": 0.745238907635212, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9555656909942627, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13530326791806147, | |
| "rewards/event_reward_fn/mean": 10.4541015625, | |
| "rewards/event_reward_fn/std": 5.998309597373009, | |
| "rewards/format_reward_fn/mean": 0.9560929797589779, | |
| "rewards/format_reward_fn/std": 0.14361765328794718, | |
| "step": 5040 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0166015625, | |
| "completions/max_length": 251.75, | |
| "completions/max_terminated_length": 249.6875, | |
| "completions/mean_length": 211.0107421875, | |
| "completions/mean_terminated_length": 210.25583267211914, | |
| "completions/min_length": 172.8125, | |
| "completions/min_terminated_length": 172.8125, | |
| "entropy": 0.07869777269661427, | |
| "epoch": 4.913508260447036, | |
| "frac_reward_zero_std": 0.4296875, | |
| "grad_norm": 0.07694108039140701, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0025, | |
| "num_tokens": 414872965.0, | |
| "reward": 12.788713455200195, | |
| "reward_std": 0.7702588140964508, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9623404443264008, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.11270316521404311, | |
| "rewards/event_reward_fn/mean": 10.876953125, | |
| "rewards/event_reward_fn/std": 6.525661692023277, | |
| "rewards/format_reward_fn/mean": 0.9494199566543102, | |
| "rewards/format_reward_fn/std": 0.1416560309007764, | |
| "step": 5056 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0146484375, | |
| "completions/max_length": 252.75, | |
| "completions/max_terminated_length": 251.125, | |
| "completions/mean_length": 214.677734375, | |
| "completions/mean_terminated_length": 214.11969184875488, | |
| "completions/min_length": 172.3125, | |
| "completions/min_terminated_length": 172.3125, | |
| "entropy": 0.08010096289217472, | |
| "epoch": 4.929057337220603, | |
| "frac_reward_zero_std": 0.3359375, | |
| "grad_norm": 0.19344469904899597, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 416184203.0, | |
| "reward": 12.021986961364746, | |
| "reward_std": 0.8370283525437117, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9379756189882755, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.18868807784747332, | |
| "rewards/event_reward_fn/mean": 10.1474609375, | |
| "rewards/event_reward_fn/std": 5.739748045802116, | |
| "rewards/format_reward_fn/mean": 0.9365505129098892, | |
| "rewards/format_reward_fn/std": 0.18898644018918276, | |
| "step": 5072 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0791015625, | |
| "completions/max_length": 255.625, | |
| "completions/max_terminated_length": 254.1875, | |
| "completions/mean_length": 223.2958984375, | |
| "completions/mean_terminated_length": 220.47602653503418, | |
| "completions/min_length": 179.0, | |
| "completions/min_terminated_length": 179.0, | |
| "entropy": 0.09088941430673003, | |
| "epoch": 4.944606413994169, | |
| "frac_reward_zero_std": 0.296875, | |
| "grad_norm": 0.20225514471530914, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0018, | |
| "num_tokens": 417535758.0, | |
| "reward": 12.14924430847168, | |
| "reward_std": 0.9182783588767052, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9020416662096977, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.25824297685176134, | |
| "rewards/event_reward_fn/mean": 10.345703125, | |
| "rewards/event_reward_fn/std": 5.885191112756729, | |
| "rewards/format_reward_fn/mean": 0.9014995731413364, | |
| "rewards/format_reward_fn/std": 0.2582928016781807, | |
| "step": 5088 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.10546875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 252.4375, | |
| "completions/mean_length": 226.154296875, | |
| "completions/mean_terminated_length": 222.74933338165283, | |
| "completions/min_length": 181.875, | |
| "completions/min_terminated_length": 181.875, | |
| "entropy": 0.09840598748996854, | |
| "epoch": 4.960155490767736, | |
| "frac_reward_zero_std": 0.34765625, | |
| "grad_norm": 0.13706494867801666, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0047, | |
| "num_tokens": 418836740.0, | |
| "reward": 11.611397385597229, | |
| "reward_std": 0.8141429983079433, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.8677695393562317, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.3009802335873246, | |
| "rewards/event_reward_fn/mean": 9.8740234375, | |
| "rewards/event_reward_fn/std": 5.783898174762726, | |
| "rewards/format_reward_fn/mean": 0.8696044124662876, | |
| "rewards/format_reward_fn/std": 0.3018876016139984, | |
| "step": 5104 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0400390625, | |
| "completions/max_length": 252.625, | |
| "completions/max_terminated_length": 250.375, | |
| "completions/mean_length": 216.8056640625, | |
| "completions/mean_terminated_length": 215.23225784301758, | |
| "completions/min_length": 174.125, | |
| "completions/min_terminated_length": 174.125, | |
| "entropy": 0.09257404552772641, | |
| "epoch": 4.975704567541302, | |
| "frac_reward_zero_std": 0.37890625, | |
| "grad_norm": 0.1244494840502739, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 420147069.0, | |
| "reward": 12.01289427280426, | |
| "reward_std": 0.9822186566889286, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9450008794665337, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16165780992014334, | |
| "rewards/event_reward_fn/mean": 10.1240234375, | |
| "rewards/event_reward_fn/std": 5.432392194867134, | |
| "rewards/format_reward_fn/mean": 0.9438699819147587, | |
| "rewards/format_reward_fn/std": 0.1795343121048063, | |
| "step": 5120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0283203125, | |
| "completions/max_length": 251.0, | |
| "completions/max_terminated_length": 247.75, | |
| "completions/mean_length": 212.3759765625, | |
| "completions/mean_terminated_length": 211.170654296875, | |
| "completions/min_length": 175.5, | |
| "completions/min_terminated_length": 175.5, | |
| "entropy": 0.08855262584984303, | |
| "epoch": 4.9912536443148685, | |
| "frac_reward_zero_std": 0.421875, | |
| "grad_norm": 0.08971451967954636, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0012, | |
| "num_tokens": 421466326.0, | |
| "reward": 12.228147089481354, | |
| "reward_std": 0.8180289585143328, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9339725822210312, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.20187974988948554, | |
| "rewards/event_reward_fn/mean": 10.3623046875, | |
| "rewards/event_reward_fn/std": 6.2895103842020035, | |
| "rewards/format_reward_fn/mean": 0.9318698942661285, | |
| "rewards/format_reward_fn/std": 0.2051788135431707, | |
| "step": 5136 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.009765625, | |
| "completions/max_length": 250.6875, | |
| "completions/max_terminated_length": 248.625, | |
| "completions/mean_length": 210.5322265625, | |
| "completions/mean_terminated_length": 210.076735496521, | |
| "completions/min_length": 171.8125, | |
| "completions/min_terminated_length": 171.8125, | |
| "entropy": 0.09253401588648558, | |
| "epoch": 5.006802721088436, | |
| "frac_reward_zero_std": 0.38671875, | |
| "grad_norm": 0.12024762481451035, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0011, | |
| "num_tokens": 422781699.0, | |
| "reward": 12.157626032829285, | |
| "reward_std": 0.7537666261196136, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9695763923227787, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.10307722567813471, | |
| "rewards/event_reward_fn/mean": 10.2197265625, | |
| "rewards/event_reward_fn/std": 5.788311317563057, | |
| "rewards/format_reward_fn/mean": 0.9683229438960552, | |
| "rewards/format_reward_fn/std": 0.1254920600913465, | |
| "step": 5152 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0087890625, | |
| "completions/max_length": 247.625, | |
| "completions/max_terminated_length": 246.0, | |
| "completions/mean_length": 210.9833984375, | |
| "completions/mean_terminated_length": 210.60074615478516, | |
| "completions/min_length": 176.0, | |
| "completions/min_terminated_length": 176.0, | |
| "entropy": 0.09116627182811499, | |
| "epoch": 5.022351797862002, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.12762567400932312, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0022, | |
| "num_tokens": 423997238.0, | |
| "reward": 11.60620766878128, | |
| "reward_std": 0.7599927112460136, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9486931003630161, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.15455256192944944, | |
| "rewards/event_reward_fn/mean": 9.7001953125, | |
| "rewards/event_reward_fn/std": 5.030976966023445, | |
| "rewards/format_reward_fn/mean": 0.9573194123804569, | |
| "rewards/format_reward_fn/std": 0.15234834514558315, | |
| "step": 5168 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01953125, | |
| "completions/max_length": 251.125, | |
| "completions/max_terminated_length": 246.875, | |
| "completions/mean_length": 213.2099609375, | |
| "completions/mean_terminated_length": 212.3174467086792, | |
| "completions/min_length": 177.75, | |
| "completions/min_terminated_length": 177.75, | |
| "entropy": 0.0908237830735743, | |
| "epoch": 5.037900874635569, | |
| "frac_reward_zero_std": 0.38671875, | |
| "grad_norm": 0.09851375967264175, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0016, | |
| "num_tokens": 425258569.0, | |
| "reward": 12.098788142204285, | |
| "reward_std": 0.7658343818038702, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9538118988275528, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14205206802580506, | |
| "rewards/event_reward_fn/mean": 10.189453125, | |
| "rewards/event_reward_fn/std": 5.354374468326569, | |
| "rewards/format_reward_fn/mean": 0.9555229768157005, | |
| "rewards/format_reward_fn/std": 0.16036075167357922, | |
| "step": 5184 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.052734375, | |
| "completions/max_length": 255.0625, | |
| "completions/max_terminated_length": 251.875, | |
| "completions/mean_length": 219.5107421875, | |
| "completions/mean_terminated_length": 217.43249702453613, | |
| "completions/min_length": 176.375, | |
| "completions/min_terminated_length": 176.375, | |
| "entropy": 0.08694863086566329, | |
| "epoch": 5.053449951409135, | |
| "frac_reward_zero_std": 0.40625, | |
| "grad_norm": 0.15762481093406677, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0041, | |
| "num_tokens": 426630832.0, | |
| "reward": 12.561151146888733, | |
| "reward_std": 0.8509459039196372, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9281382337212563, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.21045659307856113, | |
| "rewards/event_reward_fn/mean": 10.69921875, | |
| "rewards/event_reward_fn/std": 6.01863569021225, | |
| "rewards/format_reward_fn/mean": 0.9337940216064453, | |
| "rewards/format_reward_fn/std": 0.2126003741286695, | |
| "step": 5200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.037109375, | |
| "completions/max_length": 254.6875, | |
| "completions/max_terminated_length": 252.0, | |
| "completions/mean_length": 218.7431640625, | |
| "completions/mean_terminated_length": 217.25553607940674, | |
| "completions/min_length": 182.0625, | |
| "completions/min_terminated_length": 182.0625, | |
| "entropy": 0.08299205871298909, | |
| "epoch": 5.068999028182701, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.15465058386325836, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0061, | |
| "num_tokens": 427929997.0, | |
| "reward": 12.534621059894562, | |
| "reward_std": 0.8048240784555674, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9337992817163467, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1961612788727507, | |
| "rewards/event_reward_fn/mean": 10.6591796875, | |
| "rewards/event_reward_fn/std": 5.7010853588581085, | |
| "rewards/format_reward_fn/mean": 0.9416420236229897, | |
| "rewards/format_reward_fn/std": 0.19764397107064724, | |
| "step": 5216 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01953125, | |
| "completions/max_length": 252.0625, | |
| "completions/max_terminated_length": 250.6875, | |
| "completions/mean_length": 213.8251953125, | |
| "completions/mean_terminated_length": 213.00183773040771, | |
| "completions/min_length": 173.75, | |
| "completions/min_terminated_length": 173.75, | |
| "entropy": 0.07737769559025764, | |
| "epoch": 5.084548104956268, | |
| "frac_reward_zero_std": 0.4765625, | |
| "grad_norm": 0.1074177548289299, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0011, | |
| "num_tokens": 429246174.0, | |
| "reward": 12.525750398635864, | |
| "reward_std": 0.6775472220033407, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9568941742181778, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14373183471616358, | |
| "rewards/event_reward_fn/mean": 10.611328125, | |
| "rewards/event_reward_fn/std": 5.806536749005318, | |
| "rewards/format_reward_fn/mean": 0.9575280509889126, | |
| "rewards/format_reward_fn/std": 0.15517638879828155, | |
| "step": 5232 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.037109375, | |
| "completions/max_length": 253.25, | |
| "completions/max_terminated_length": 249.6875, | |
| "completions/mean_length": 217.044921875, | |
| "completions/mean_terminated_length": 215.57467937469482, | |
| "completions/min_length": 174.4375, | |
| "completions/min_terminated_length": 174.4375, | |
| "entropy": 0.07887168414890766, | |
| "epoch": 5.100097181729835, | |
| "frac_reward_zero_std": 0.40234375, | |
| "grad_norm": 0.100493885576725, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 430597164.0, | |
| "reward": 12.71329003572464, | |
| "reward_std": 0.7885365970432758, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9540953673422337, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14646161976270378, | |
| "rewards/event_reward_fn/mean": 10.8056640625, | |
| "rewards/event_reward_fn/std": 6.382747828960419, | |
| "rewards/format_reward_fn/mean": 0.9535306617617607, | |
| "rewards/format_reward_fn/std": 0.15756959468126297, | |
| "step": 5248 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0517578125, | |
| "completions/max_length": 255.6875, | |
| "completions/max_terminated_length": 249.375, | |
| "completions/mean_length": 215.978515625, | |
| "completions/mean_terminated_length": 213.8056936264038, | |
| "completions/min_length": 172.3125, | |
| "completions/min_terminated_length": 172.3125, | |
| "entropy": 0.07278733002021909, | |
| "epoch": 5.115646258503402, | |
| "frac_reward_zero_std": 0.41015625, | |
| "grad_norm": 0.10533929616212845, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0052, | |
| "num_tokens": 431979870.0, | |
| "reward": 12.67308360338211, | |
| "reward_std": 0.8340425789356232, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9266472458839417, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.2227602507919073, | |
| "rewards/event_reward_fn/mean": 10.8173828125, | |
| "rewards/event_reward_fn/std": 6.615099906921387, | |
| "rewards/format_reward_fn/mean": 0.9290535151958466, | |
| "rewards/format_reward_fn/std": 0.22935187257826328, | |
| "step": 5264 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0166015625, | |
| "completions/max_length": 254.375, | |
| "completions/max_terminated_length": 251.125, | |
| "completions/mean_length": 215.640625, | |
| "completions/mean_terminated_length": 214.95352268218994, | |
| "completions/min_length": 173.5, | |
| "completions/min_terminated_length": 173.5, | |
| "entropy": 0.07842768542468548, | |
| "epoch": 5.131195335276968, | |
| "frac_reward_zero_std": 0.38671875, | |
| "grad_norm": 0.0805554911494255, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0007, | |
| "num_tokens": 433291126.0, | |
| "reward": 12.185501873493195, | |
| "reward_std": 0.7526118885725737, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9601360224187374, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13111005796235986, | |
| "rewards/event_reward_fn/mean": 10.2646484375, | |
| "rewards/event_reward_fn/std": 6.194005638360977, | |
| "rewards/format_reward_fn/mean": 0.9607173651456833, | |
| "rewards/format_reward_fn/std": 0.14473758242093027, | |
| "step": 5280 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.021484375, | |
| "completions/max_length": 253.0625, | |
| "completions/max_terminated_length": 252.0, | |
| "completions/mean_length": 216.375, | |
| "completions/mean_terminated_length": 215.49572944641113, | |
| "completions/min_length": 170.0625, | |
| "completions/min_terminated_length": 170.0625, | |
| "entropy": 0.08066530339419842, | |
| "epoch": 5.146744412050534, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.15137051045894623, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 434651818.0, | |
| "reward": 12.391696512699127, | |
| "reward_std": 0.8405030891299248, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9705284647643566, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.10062799096340314, | |
| "rewards/event_reward_fn/mean": 10.4541015625, | |
| "rewards/event_reward_fn/std": 5.5794040858745575, | |
| "rewards/format_reward_fn/mean": 0.9670664444565773, | |
| "rewards/format_reward_fn/std": 0.12125032884068787, | |
| "step": 5296 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0185546875, | |
| "completions/max_length": 253.0625, | |
| "completions/max_terminated_length": 250.75, | |
| "completions/mean_length": 214.7646484375, | |
| "completions/mean_terminated_length": 214.00485610961914, | |
| "completions/min_length": 173.375, | |
| "completions/min_terminated_length": 173.375, | |
| "entropy": 0.08317997679114342, | |
| "epoch": 5.162293488824101, | |
| "frac_reward_zero_std": 0.3828125, | |
| "grad_norm": 0.1106635183095932, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 436045961.0, | |
| "reward": 12.810622453689575, | |
| "reward_std": 0.8223424591124058, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.960607685148716, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13043461105553433, | |
| "rewards/event_reward_fn/mean": 10.8955078125, | |
| "rewards/event_reward_fn/std": 6.705768942832947, | |
| "rewards/format_reward_fn/mean": 0.9545068889856339, | |
| "rewards/format_reward_fn/std": 0.16276416694745421, | |
| "step": 5312 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0244140625, | |
| "completions/max_length": 253.5625, | |
| "completions/max_terminated_length": 250.0, | |
| "completions/mean_length": 212.7001953125, | |
| "completions/mean_terminated_length": 211.5588846206665, | |
| "completions/min_length": 165.875, | |
| "completions/min_terminated_length": 165.875, | |
| "entropy": 0.07951848162338138, | |
| "epoch": 5.177842565597667, | |
| "frac_reward_zero_std": 0.40625, | |
| "grad_norm": 0.08208124339580536, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0005, | |
| "num_tokens": 437368618.0, | |
| "reward": 12.59171849489212, | |
| "reward_std": 0.7564017958939075, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.952126257121563, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.14843681757338345, | |
| "rewards/event_reward_fn/mean": 10.6962890625, | |
| "rewards/event_reward_fn/std": 5.812787741422653, | |
| "rewards/format_reward_fn/mean": 0.9433031603693962, | |
| "rewards/format_reward_fn/std": 0.16410461044870317, | |
| "step": 5328 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.017578125, | |
| "completions/max_length": 253.375, | |
| "completions/max_terminated_length": 251.25, | |
| "completions/mean_length": 216.083984375, | |
| "completions/mean_terminated_length": 215.36172103881836, | |
| "completions/min_length": 177.1875, | |
| "completions/min_terminated_length": 177.1875, | |
| "entropy": 0.08092350885272026, | |
| "epoch": 5.1933916423712345, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.18105757236480713, | |
| "learning_rate": 5e-05, | |
| "loss": -0.001, | |
| "num_tokens": 438724608.0, | |
| "reward": 12.999197125434875, | |
| "reward_std": 0.9012727215886116, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9417021572589874, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17351033969316632, | |
| "rewards/event_reward_fn/mean": 11.1259765625, | |
| "rewards/event_reward_fn/std": 5.892582669854164, | |
| "rewards/format_reward_fn/mean": 0.9315183274447918, | |
| "rewards/format_reward_fn/std": 0.18927003536373377, | |
| "step": 5344 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0341796875, | |
| "completions/max_length": 254.625, | |
| "completions/max_terminated_length": 251.5, | |
| "completions/mean_length": 215.775390625, | |
| "completions/mean_terminated_length": 214.33139896392822, | |
| "completions/min_length": 172.0, | |
| "completions/min_terminated_length": 172.0, | |
| "entropy": 0.08449570368975401, | |
| "epoch": 5.208940719144801, | |
| "frac_reward_zero_std": 0.39453125, | |
| "grad_norm": 0.07373011112213135, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0017, | |
| "num_tokens": 439989246.0, | |
| "reward": 11.719346940517426, | |
| "reward_std": 0.6102266386151314, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9474032297730446, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1586525976890698, | |
| "rewards/event_reward_fn/mean": 9.826171875, | |
| "rewards/event_reward_fn/std": 5.788570657372475, | |
| "rewards/format_reward_fn/mean": 0.9457719549536705, | |
| "rewards/format_reward_fn/std": 0.176345658255741, | |
| "step": 5360 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.01171875, | |
| "completions/max_length": 250.3125, | |
| "completions/max_terminated_length": 249.1875, | |
| "completions/mean_length": 213.5673828125, | |
| "completions/mean_terminated_length": 213.04458808898926, | |
| "completions/min_length": 174.0625, | |
| "completions/min_terminated_length": 174.0625, | |
| "entropy": 0.08259732741862535, | |
| "epoch": 5.224489795918367, | |
| "frac_reward_zero_std": 0.4140625, | |
| "grad_norm": 0.09539102017879486, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0017, | |
| "num_tokens": 441313295.0, | |
| "reward": 12.156057178974152, | |
| "reward_std": 0.6409936174750328, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9664278998970985, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.0996909779496491, | |
| "rewards/event_reward_fn/mean": 10.23046875, | |
| "rewards/event_reward_fn/std": 6.2610200345516205, | |
| "rewards/format_reward_fn/mean": 0.9591603875160217, | |
| "rewards/format_reward_fn/std": 0.11145789409056306, | |
| "step": 5376 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.029296875, | |
| "completions/max_length": 253.125, | |
| "completions/max_terminated_length": 250.1875, | |
| "completions/mean_length": 214.4453125, | |
| "completions/mean_terminated_length": 213.2188024520874, | |
| "completions/min_length": 173.0, | |
| "completions/min_terminated_length": 173.0, | |
| "entropy": 0.0805953610688448, | |
| "epoch": 5.240038872691934, | |
| "frac_reward_zero_std": 0.37890625, | |
| "grad_norm": 0.1731644719839096, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0044, | |
| "num_tokens": 442696435.0, | |
| "reward": 12.83944684267044, | |
| "reward_std": 0.8571697734296322, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9432271271944046, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.17556072783190757, | |
| "rewards/event_reward_fn/mean": 10.9716796875, | |
| "rewards/event_reward_fn/std": 6.772303909063339, | |
| "rewards/format_reward_fn/mean": 0.9245400987565517, | |
| "rewards/format_reward_fn/std": 0.20365634886547923, | |
| "step": 5392 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0263671875, | |
| "completions/max_length": 254.75, | |
| "completions/max_terminated_length": 252.3125, | |
| "completions/mean_length": 219.8271484375, | |
| "completions/mean_terminated_length": 218.86177444458008, | |
| "completions/min_length": 183.75, | |
| "completions/min_terminated_length": 183.75, | |
| "entropy": 0.08317722985520959, | |
| "epoch": 5.2555879494655, | |
| "frac_reward_zero_std": 0.41796875, | |
| "grad_norm": 0.12763230502605438, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0011, | |
| "num_tokens": 443952362.0, | |
| "reward": 11.955138087272644, | |
| "reward_std": 0.7103091701865196, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.946000337600708, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1564433122985065, | |
| "rewards/event_reward_fn/mean": 10.068359375, | |
| "rewards/event_reward_fn/std": 6.221550449728966, | |
| "rewards/format_reward_fn/mean": 0.9407782070338726, | |
| "rewards/format_reward_fn/std": 0.15689158393070102, | |
| "step": 5408 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.052734375, | |
| "completions/max_length": 254.875, | |
| "completions/max_terminated_length": 251.6875, | |
| "completions/mean_length": 220.3798828125, | |
| "completions/mean_terminated_length": 218.413254737854, | |
| "completions/min_length": 178.125, | |
| "completions/min_terminated_length": 178.125, | |
| "entropy": 0.0799885387532413, | |
| "epoch": 5.271137026239067, | |
| "frac_reward_zero_std": 0.37890625, | |
| "grad_norm": 0.19113846123218536, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0014, | |
| "num_tokens": 445303815.0, | |
| "reward": 12.331937193870544, | |
| "reward_std": 0.80683533847332, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9195377230644226, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.22412633727071807, | |
| "rewards/event_reward_fn/mean": 10.5029296875, | |
| "rewards/event_reward_fn/std": 6.035468250513077, | |
| "rewards/format_reward_fn/mean": 0.9094697572290897, | |
| "rewards/format_reward_fn/std": 0.23325852677226067, | |
| "step": 5424 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.033203125, | |
| "completions/max_length": 254.875, | |
| "completions/max_terminated_length": 253.25, | |
| "completions/mean_length": 219.4560546875, | |
| "completions/mean_terminated_length": 218.1432819366455, | |
| "completions/min_length": 179.625, | |
| "completions/min_terminated_length": 179.625, | |
| "entropy": 0.08487229980528355, | |
| "epoch": 5.2866861030126335, | |
| "frac_reward_zero_std": 0.3828125, | |
| "grad_norm": 0.15567469596862793, | |
| "learning_rate": 5e-05, | |
| "loss": 0.002, | |
| "num_tokens": 446613174.0, | |
| "reward": 12.17100590467453, | |
| "reward_std": 0.8579323226585984, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9449139796197414, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1787831949768588, | |
| "rewards/event_reward_fn/mean": 10.2880859375, | |
| "rewards/event_reward_fn/std": 5.768570572137833, | |
| "rewards/format_reward_fn/mean": 0.9380059503018856, | |
| "rewards/format_reward_fn/std": 0.20308683020994067, | |
| "step": 5440 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0341796875, | |
| "completions/max_length": 252.875, | |
| "completions/max_terminated_length": 251.25, | |
| "completions/mean_length": 218.060546875, | |
| "completions/mean_terminated_length": 216.607084274292, | |
| "completions/min_length": 173.25, | |
| "completions/min_terminated_length": 173.25, | |
| "entropy": 0.08181583508849144, | |
| "epoch": 5.3022351797862, | |
| "frac_reward_zero_std": 0.3984375, | |
| "grad_norm": 0.0759691596031189, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0023, | |
| "num_tokens": 447935032.0, | |
| "reward": 12.78515636920929, | |
| "reward_std": 0.7136668600142002, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9424515776336193, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.16937226109439507, | |
| "rewards/event_reward_fn/mean": 10.8974609375, | |
| "rewards/event_reward_fn/std": 6.170664951205254, | |
| "rewards/format_reward_fn/mean": 0.9452439360320568, | |
| "rewards/format_reward_fn/std": 0.17747941031120718, | |
| "step": 5456 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0244140625, | |
| "completions/max_length": 252.5625, | |
| "completions/max_terminated_length": 250.125, | |
| "completions/mean_length": 216.1611328125, | |
| "completions/mean_terminated_length": 215.20103359222412, | |
| "completions/min_length": 176.5, | |
| "completions/min_terminated_length": 176.5, | |
| "entropy": 0.0815726825967431, | |
| "epoch": 5.317784256559767, | |
| "frac_reward_zero_std": 0.33203125, | |
| "grad_norm": 0.08463463187217712, | |
| "learning_rate": 5e-05, | |
| "loss": 0.001, | |
| "num_tokens": 449236385.0, | |
| "reward": 12.28379362821579, | |
| "reward_std": 0.7982866708189249, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9552294872701168, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.13586847530677915, | |
| "rewards/event_reward_fn/mean": 10.3662109375, | |
| "rewards/event_reward_fn/std": 5.656336680054665, | |
| "rewards/format_reward_fn/mean": 0.962353054434061, | |
| "rewards/format_reward_fn/std": 0.13789360341615975, | |
| "step": 5472 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0263671875, | |
| "completions/max_length": 253.3125, | |
| "completions/max_terminated_length": 251.5, | |
| "completions/mean_length": 217.390625, | |
| "completions/mean_terminated_length": 216.35210609436035, | |
| "completions/min_length": 176.8125, | |
| "completions/min_terminated_length": 176.8125, | |
| "entropy": 0.08938139118254185, | |
| "epoch": 5.333333333333333, | |
| "frac_reward_zero_std": 0.36328125, | |
| "grad_norm": 0.1346571296453476, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0015, | |
| "num_tokens": 450614525.0, | |
| "reward": 12.205934286117554, | |
| "reward_std": 0.8306602947413921, | |
| "rewards/bm25_retrieval_reward_fn/mean": 0.9631715565919876, | |
| "rewards/bm25_retrieval_reward_fn/std": 0.1261273269483354, | |
| "rewards/event_reward_fn/mean": 10.283203125, | |
| "rewards/event_reward_fn/std": 5.946963086724281, | |
| "rewards/format_reward_fn/mean": 0.9595597796142101, | |
| "rewards/format_reward_fn/std": 0.14464661804959178, | |
| "step": 5488 | |
| } | |
| ], | |
| "logging_steps": 16, | |
| "max_steps": 10290, | |
| "num_input_tokens_seen": 451600012, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |