Instructions to use usr256864/ee_gol_f1_2000 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use usr256864/ee_gol_f1_2000 with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("HiTZ/GoLLIE-7B") model = PeftModel.from_pretrained(base_model, "usr256864/ee_gol_f1_2000") - Transformers
How to use usr256864/ee_gol_f1_2000 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="usr256864/ee_gol_f1_2000")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("usr256864/ee_gol_f1_2000", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use usr256864/ee_gol_f1_2000 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "usr256864/ee_gol_f1_2000" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "usr256864/ee_gol_f1_2000", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker
docker model run hf.co/usr256864/ee_gol_f1_2000
- SGLang
How to use usr256864/ee_gol_f1_2000 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "usr256864/ee_gol_f1_2000" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "usr256864/ee_gol_f1_2000", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "usr256864/ee_gol_f1_2000" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "usr256864/ee_gol_f1_2000", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }' - Docker Model Runner
How to use usr256864/ee_gol_f1_2000 with Docker Model Runner:
docker model run hf.co/usr256864/ee_gol_f1_2000
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.2070006035003018, | |
| "eval_steps": 250, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.12125, | |
| "completions/max_length": 255.34, | |
| "completions/max_terminated_length": 252.14, | |
| "completions/mean_length": 221.534375, | |
| "completions/mean_terminated_length": 216.93697082519532, | |
| "completions/min_length": 173.54, | |
| "completions/min_terminated_length": 173.54, | |
| "entropy": 0.10048629969358444, | |
| "epoch": 0.030175015087507542, | |
| "frac_reward_zero_std": 0.3225, | |
| "grad_norm": 0.46380576491355896, | |
| "learning_rate": 5e-05, | |
| "loss": 0.004, | |
| "num_tokens": 8142396.0, | |
| "reward": 7.30375, | |
| "reward_std": 1.5006456315517425, | |
| "rewards/event_reward_fn/mean": 7.30375, | |
| "rewards/event_reward_fn/std": 6.278198585510254, | |
| "step": 50, | |
| "step_time": 40.824848868116966 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.068125, | |
| "completions/max_length": 251.74, | |
| "completions/max_terminated_length": 248.06, | |
| "completions/mean_length": 215.08625, | |
| "completions/mean_terminated_length": 212.25316284179686, | |
| "completions/min_length": 171.76, | |
| "completions/min_terminated_length": 171.76, | |
| "entropy": 0.10318506792187691, | |
| "epoch": 0.060350030175015085, | |
| "frac_reward_zero_std": 0.325, | |
| "grad_norm": 0.21978232264518738, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0025, | |
| "num_tokens": 16421719.0, | |
| "reward": 7.36875, | |
| "reward_std": 1.3263894939422607, | |
| "rewards/event_reward_fn/mean": 7.36875, | |
| "rewards/event_reward_fn/std": 6.119045643806458, | |
| "step": 100, | |
| "step_time": 38.99798643006128 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4825, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 251.32, | |
| "completions/mean_length": 238.104375, | |
| "completions/mean_terminated_length": 221.8957485961914, | |
| "completions/min_length": 191.34, | |
| "completions/min_terminated_length": 191.34, | |
| "entropy": 0.10444845259189606, | |
| "epoch": 0.09052504526252263, | |
| "frac_reward_zero_std": 0.2925, | |
| "grad_norm": 0.5579063892364502, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0006, | |
| "num_tokens": 24885844.0, | |
| "reward": 7.74625, | |
| "reward_std": 1.5345598912239076, | |
| "rewards/event_reward_fn/mean": 7.74625, | |
| "rewards/event_reward_fn/std": 6.464660973548889, | |
| "step": 150, | |
| "step_time": 41.26081488572061 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.7925, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 202.92, | |
| "completions/mean_length": 245.916875, | |
| "completions/mean_terminated_length": 184.6587713623047, | |
| "completions/min_length": 199.94, | |
| "completions/min_terminated_length": 169.22, | |
| "entropy": 0.10581055819988251, | |
| "epoch": 0.12070006035003017, | |
| "frac_reward_zero_std": 0.33, | |
| "grad_norm": 0.31808722019195557, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0003, | |
| "num_tokens": 33226966.0, | |
| "reward": 7.19125, | |
| "reward_std": 1.4298825466632843, | |
| "rewards/event_reward_fn/mean": 7.19125, | |
| "rewards/event_reward_fn/std": 5.8599746036529545, | |
| "step": 200, | |
| "step_time": 41.91275953448203 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.825, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 181.12, | |
| "completions/mean_length": 245.851875, | |
| "completions/mean_terminated_length": 163.72261688232422, | |
| "completions/min_length": 198.46, | |
| "completions/min_terminated_length": 152.38, | |
| "entropy": 0.10499135926365852, | |
| "epoch": 0.15087507543753773, | |
| "frac_reward_zero_std": 0.2875, | |
| "grad_norm": 0.2646925449371338, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0005, | |
| "num_tokens": 41523308.0, | |
| "reward": 7.9475, | |
| "reward_std": 1.5300491595268249, | |
| "rewards/event_reward_fn/mean": 7.9475, | |
| "rewards/event_reward_fn/std": 6.3965685844421385, | |
| "step": 250, | |
| "step_time": 41.663273623897695 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.898125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 149.62, | |
| "completions/mean_length": 250.625625, | |
| "completions/mean_terminated_length": 144.78653198242188, | |
| "completions/min_length": 215.68, | |
| "completions/min_terminated_length": 138.88, | |
| "entropy": 0.10884671121835708, | |
| "epoch": 0.18105009052504525, | |
| "frac_reward_zero_std": 0.3325, | |
| "grad_norm": 0.5418329834938049, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0002, | |
| "num_tokens": 49889481.0, | |
| "reward": 7.489375, | |
| "reward_std": 1.5504147619009019, | |
| "rewards/event_reward_fn/mean": 7.489375, | |
| "rewards/event_reward_fn/std": 6.099679977893829, | |
| "step": 300, | |
| "step_time": 40.817094522019616 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9275, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 131.48, | |
| "completions/mean_length": 253.1625, | |
| "completions/mean_terminated_length": 125.53590209960937, | |
| "completions/min_length": 228.04, | |
| "completions/min_terminated_length": 120.52, | |
| "entropy": 0.10796756476163864, | |
| "epoch": 0.2112251056125528, | |
| "frac_reward_zero_std": 0.3175, | |
| "grad_norm": 0.4433981776237488, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0019, | |
| "num_tokens": 58206892.0, | |
| "reward": 7.89625, | |
| "reward_std": 1.573977051973343, | |
| "rewards/event_reward_fn/mean": 7.89625, | |
| "rewards/event_reward_fn/std": 6.586006484031677, | |
| "step": 350, | |
| "step_time": 42.12015992245928 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.945625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 151.42, | |
| "completions/mean_length": 254.76625, | |
| "completions/mean_terminated_length": 146.34000091552736, | |
| "completions/min_length": 238.42, | |
| "completions/min_terminated_length": 141.14, | |
| "entropy": 0.11530103281140328, | |
| "epoch": 0.24140012070006034, | |
| "frac_reward_zero_std": 0.29, | |
| "grad_norm": 0.3932775855064392, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 66513664.0, | |
| "reward": 7.304375, | |
| "reward_std": 1.552179645895958, | |
| "rewards/event_reward_fn/mean": 7.304375, | |
| "rewards/event_reward_fn/std": 5.687906408309937, | |
| "step": 400, | |
| "step_time": 40.78123372233997 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.92375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 185.1, | |
| "completions/mean_length": 254.35875, | |
| "completions/mean_terminated_length": 178.61883544921875, | |
| "completions/min_length": 232.56, | |
| "completions/min_terminated_length": 171.12, | |
| "entropy": 0.13443249970674515, | |
| "epoch": 0.27157513578756787, | |
| "frac_reward_zero_std": 0.315, | |
| "grad_norm": 0.2284364551305771, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0013, | |
| "num_tokens": 74493599.0, | |
| "reward": 7.766875, | |
| "reward_std": 1.5890911322832109, | |
| "rewards/event_reward_fn/mean": 7.766875, | |
| "rewards/event_reward_fn/std": 6.074563751220703, | |
| "step": 450, | |
| "step_time": 40.8964025861409 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.983125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 73.22, | |
| "completions/mean_length": 255.728125, | |
| "completions/mean_terminated_length": 72.48666687011719, | |
| "completions/min_length": 250.14, | |
| "completions/min_terminated_length": 70.94, | |
| "entropy": 0.1348781806230545, | |
| "epoch": 0.30175015087507545, | |
| "frac_reward_zero_std": 0.32, | |
| "grad_norm": 0.44683775305747986, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0006, | |
| "num_tokens": 82766712.0, | |
| "reward": 7.835625, | |
| "reward_std": 1.6530324041843414, | |
| "rewards/event_reward_fn/mean": 7.835625, | |
| "rewards/event_reward_fn/std": 6.139980282783508, | |
| "step": 500, | |
| "step_time": 41.13054014526191 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.996875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 24.36, | |
| "completions/mean_length": 255.96125, | |
| "completions/mean_terminated_length": 24.36, | |
| "completions/min_length": 254.76, | |
| "completions/min_terminated_length": 24.36, | |
| "entropy": 0.13759294494986535, | |
| "epoch": 0.331925165962583, | |
| "frac_reward_zero_std": 0.265, | |
| "grad_norm": 0.40625813603401184, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0, | |
| "num_tokens": 91249822.0, | |
| "reward": 7.55375, | |
| "reward_std": 1.706419097185135, | |
| "rewards/event_reward_fn/mean": 7.55375, | |
| "rewards/event_reward_fn/std": 5.799948143959045, | |
| "step": 550, | |
| "step_time": 42.41818935459829 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9975, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 15.02, | |
| "completions/mean_length": 255.981875, | |
| "completions/mean_terminated_length": 14.98, | |
| "completions/min_length": 255.58, | |
| "completions/min_terminated_length": 14.94, | |
| "entropy": 0.135696639418602, | |
| "epoch": 0.3621001810500905, | |
| "frac_reward_zero_std": 0.27, | |
| "grad_norm": 0.34435781836509705, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0, | |
| "num_tokens": 99526582.0, | |
| "reward": 8.249375, | |
| "reward_std": 1.7093309688568115, | |
| "rewards/event_reward_fn/mean": 8.249375, | |
| "rewards/event_reward_fn/std": 6.437053818702697, | |
| "step": 600, | |
| "step_time": 42.50718544923991 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 256.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 256.0, | |
| "completions/min_terminated_length": 0.0, | |
| "entropy": 0.14394851058721542, | |
| "epoch": 0.3922751961375981, | |
| "frac_reward_zero_std": 0.3125, | |
| "grad_norm": 0.2780283987522125, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0, | |
| "num_tokens": 107729490.0, | |
| "reward": 8.115625, | |
| "reward_std": 1.5425574934482575, | |
| "rewards/event_reward_fn/mean": 8.115625, | |
| "rewards/event_reward_fn/std": 6.014267163276672, | |
| "step": 650, | |
| "step_time": 60.873589005278774 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 256.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 256.0, | |
| "completions/min_terminated_length": 0.0, | |
| "entropy": 0.1508351384103298, | |
| "epoch": 0.4224502112251056, | |
| "frac_reward_zero_std": 0.275, | |
| "grad_norm": 0.21461114287376404, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0, | |
| "num_tokens": 116090467.0, | |
| "reward": 8.166875, | |
| "reward_std": 1.743121521472931, | |
| "rewards/event_reward_fn/mean": 8.166875, | |
| "rewards/event_reward_fn/std": 6.155384964942932, | |
| "step": 700, | |
| "step_time": 41.52520179868036 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.99875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 9.66, | |
| "completions/mean_length": 255.981875, | |
| "completions/mean_terminated_length": 9.66, | |
| "completions/min_length": 255.42, | |
| "completions/min_terminated_length": 9.66, | |
| "entropy": 0.16852732509374618, | |
| "epoch": 0.45262522631261315, | |
| "frac_reward_zero_std": 0.2975, | |
| "grad_norm": 0.30243417620658875, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0, | |
| "num_tokens": 124432282.0, | |
| "reward": 7.599375, | |
| "reward_std": 1.4733531725406648, | |
| "rewards/event_reward_fn/mean": 7.599375, | |
| "rewards/event_reward_fn/std": 5.72325975894928, | |
| "step": 750, | |
| "step_time": 45.15769186520076 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 256.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 256.0, | |
| "completions/min_terminated_length": 0.0, | |
| "entropy": 0.18111263811588288, | |
| "epoch": 0.4828002414001207, | |
| "frac_reward_zero_std": 0.28, | |
| "grad_norm": 0.3852519989013672, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0, | |
| "num_tokens": 132857345.0, | |
| "reward": 8.109375, | |
| "reward_std": 1.542456374168396, | |
| "rewards/event_reward_fn/mean": 8.109375, | |
| "rewards/event_reward_fn/std": 5.9935719728469845, | |
| "step": 800, | |
| "step_time": 42.812528482141204 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.99875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 10.0, | |
| "completions/mean_length": 255.9925, | |
| "completions/mean_terminated_length": 10.0, | |
| "completions/min_length": 255.76, | |
| "completions/min_terminated_length": 10.0, | |
| "entropy": 0.21682359665632248, | |
| "epoch": 0.5129752564876282, | |
| "frac_reward_zero_std": 0.2425, | |
| "grad_norm": 0.34055572748184204, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0001, | |
| "num_tokens": 141348381.0, | |
| "reward": 7.83125, | |
| "reward_std": 1.7194657081365585, | |
| "rewards/event_reward_fn/mean": 7.83125, | |
| "rewards/event_reward_fn/std": 6.176298160552978, | |
| "step": 850, | |
| "step_time": 42.62233325715526 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.960625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 101.5, | |
| "completions/mean_length": 254.8675, | |
| "completions/mean_terminated_length": 95.82615142822266, | |
| "completions/min_length": 237.06, | |
| "completions/min_terminated_length": 88.58, | |
| "entropy": 1.2534486263990403, | |
| "epoch": 0.5431502715751357, | |
| "frac_reward_zero_std": 0.22, | |
| "grad_norm": 1.9650917053222656, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0044, | |
| "num_tokens": 149758372.0, | |
| "reward": 7.47875, | |
| "reward_std": 1.8692259776592255, | |
| "rewards/event_reward_fn/mean": 7.47875, | |
| "rewards/event_reward_fn/std": 5.956067395210266, | |
| "step": 900, | |
| "step_time": 41.407225477900354 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.944375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 140.22, | |
| "completions/mean_length": 253.330625, | |
| "completions/mean_terminated_length": 132.0480009460449, | |
| "completions/min_length": 209.16, | |
| "completions/min_terminated_length": 122.12, | |
| "entropy": 2.884652135372162, | |
| "epoch": 0.5733252866626434, | |
| "frac_reward_zero_std": 0.3375, | |
| "grad_norm": 2.167388916015625, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0175, | |
| "num_tokens": 158149273.0, | |
| "reward": 6.6125, | |
| "reward_std": 1.6964978063106537, | |
| "rewards/event_reward_fn/mean": 6.6125, | |
| "rewards/event_reward_fn/std": 6.0062398338317875, | |
| "step": 950, | |
| "step_time": 41.2563528472418 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.994375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 31.2, | |
| "completions/mean_length": 255.541875, | |
| "completions/mean_terminated_length": 31.16, | |
| "completions/min_length": 246.16, | |
| "completions/min_terminated_length": 31.12, | |
| "entropy": 1.2761903527379035, | |
| "epoch": 0.6035003017501509, | |
| "frac_reward_zero_std": 0.3825, | |
| "grad_norm": 0.4628017842769623, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0033, | |
| "num_tokens": 166538998.0, | |
| "reward": 6.94875, | |
| "reward_std": 1.378657329082489, | |
| "rewards/event_reward_fn/mean": 6.94875, | |
| "rewards/event_reward_fn/std": 6.216048922538757, | |
| "step": 1000, | |
| "step_time": 42.322889749883906 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.984375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 61.4, | |
| "completions/mean_length": 255.64625, | |
| "completions/mean_terminated_length": 60.46533386230469, | |
| "completions/min_length": 248.46, | |
| "completions/min_terminated_length": 59.02, | |
| "entropy": 0.4890740931034088, | |
| "epoch": 0.6336753168376584, | |
| "frac_reward_zero_std": 0.395, | |
| "grad_norm": 0.33829060196876526, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 174874216.0, | |
| "reward": 6.793125, | |
| "reward_std": 1.1867496293783188, | |
| "rewards/event_reward_fn/mean": 6.793125, | |
| "rewards/event_reward_fn/std": 5.404484539031983, | |
| "step": 1050, | |
| "step_time": 42.91726479450008 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.993125, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 28.78, | |
| "completions/mean_length": 255.443125, | |
| "completions/mean_terminated_length": 28.65, | |
| "completions/min_length": 243.56, | |
| "completions/min_terminated_length": 28.52, | |
| "entropy": 0.7336188541352748, | |
| "epoch": 0.663850331925166, | |
| "frac_reward_zero_std": 0.3925, | |
| "grad_norm": 0.5986895561218262, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0053, | |
| "num_tokens": 183293167.0, | |
| "reward": 6.371875, | |
| "reward_std": 1.27341972053051, | |
| "rewards/event_reward_fn/mean": 6.371875, | |
| "rewards/event_reward_fn/std": 5.160589256286621, | |
| "step": 1100, | |
| "step_time": 42.857495513077595 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.99875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 8.74, | |
| "completions/mean_length": 255.953125, | |
| "completions/mean_terminated_length": 8.74, | |
| "completions/min_length": 254.5, | |
| "completions/min_terminated_length": 8.74, | |
| "entropy": 0.41359326869249347, | |
| "epoch": 0.6940253470126735, | |
| "frac_reward_zero_std": 0.365, | |
| "grad_norm": 0.714463472366333, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 191713508.0, | |
| "reward": 6.96625, | |
| "reward_std": 1.294020129442215, | |
| "rewards/event_reward_fn/mean": 6.96625, | |
| "rewards/event_reward_fn/std": 5.736661648750305, | |
| "step": 1150, | |
| "step_time": 41.747385050542654 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.98875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 64.84, | |
| "completions/mean_length": 255.530625, | |
| "completions/mean_terminated_length": 64.83, | |
| "completions/min_length": 244.02, | |
| "completions/min_terminated_length": 64.82, | |
| "entropy": 0.7842025232315063, | |
| "epoch": 0.724200362100181, | |
| "frac_reward_zero_std": 0.3825, | |
| "grad_norm": 2.366915225982666, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 199636639.0, | |
| "reward": 7.296875, | |
| "reward_std": 1.2271459007263184, | |
| "rewards/event_reward_fn/mean": 7.296875, | |
| "rewards/event_reward_fn/std": 5.948999562263489, | |
| "step": 1200, | |
| "step_time": 41.03189678700059 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.99875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 8.64, | |
| "completions/mean_length": 255.95, | |
| "completions/mean_terminated_length": 8.64, | |
| "completions/min_length": 254.4, | |
| "completions/min_terminated_length": 8.64, | |
| "entropy": 1.2313472920656203, | |
| "epoch": 0.7543753771876885, | |
| "frac_reward_zero_std": 0.35, | |
| "grad_norm": 2.256929636001587, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0001, | |
| "num_tokens": 207997095.0, | |
| "reward": 6.69375, | |
| "reward_std": 1.492494255900383, | |
| "rewards/event_reward_fn/mean": 6.69375, | |
| "rewards/event_reward_fn/std": 5.9429325056076046, | |
| "step": 1250, | |
| "step_time": 42.135132130276176 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.98875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 45.68, | |
| "completions/mean_length": 255.489375, | |
| "completions/mean_terminated_length": 45.05666687011719, | |
| "completions/min_length": 244.34, | |
| "completions/min_terminated_length": 44.66, | |
| "entropy": 1.172162665054202, | |
| "epoch": 0.7845503922751962, | |
| "frac_reward_zero_std": 0.36, | |
| "grad_norm": 0.8358303904533386, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0024, | |
| "num_tokens": 216307264.0, | |
| "reward": 6.914375, | |
| "reward_std": 1.4417870903015138, | |
| "rewards/event_reward_fn/mean": 6.914375, | |
| "rewards/event_reward_fn/std": 5.760623688697815, | |
| "step": 1300, | |
| "step_time": 41.30298829050036 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.996875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 18.28, | |
| "completions/mean_length": 255.91, | |
| "completions/mean_terminated_length": 18.09, | |
| "completions/min_length": 253.42, | |
| "completions/min_terminated_length": 17.9, | |
| "entropy": 1.4694241133332253, | |
| "epoch": 0.8147254073627037, | |
| "frac_reward_zero_std": 0.34, | |
| "grad_norm": 6.394040107727051, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 224510279.0, | |
| "reward": 6.273125, | |
| "reward_std": 1.626619552373886, | |
| "rewards/event_reward_fn/mean": 6.273125, | |
| "rewards/event_reward_fn/std": 5.248045358657837, | |
| "step": 1350, | |
| "step_time": 40.860976390804865 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 1.0, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 0.0, | |
| "completions/mean_length": 256.0, | |
| "completions/mean_terminated_length": 0.0, | |
| "completions/min_length": 256.0, | |
| "completions/min_terminated_length": 0.0, | |
| "entropy": 2.0628131467103956, | |
| "epoch": 0.8449004224502112, | |
| "frac_reward_zero_std": 0.3375, | |
| "grad_norm": 1.5476884841918945, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0, | |
| "num_tokens": 232969228.0, | |
| "reward": 6.45875, | |
| "reward_std": 1.7393629193305968, | |
| "rewards/event_reward_fn/mean": 6.45875, | |
| "rewards/event_reward_fn/std": 5.68415337562561, | |
| "step": 1400, | |
| "step_time": 43.151147363660854 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.999375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 0.04, | |
| "completions/mean_length": 255.84125, | |
| "completions/mean_terminated_length": 0.04, | |
| "completions/min_length": 250.92, | |
| "completions/min_terminated_length": 0.04, | |
| "entropy": 1.3854397583007811, | |
| "epoch": 0.8750754375377188, | |
| "frac_reward_zero_std": 0.3375, | |
| "grad_norm": 3.6241378784179688, | |
| "learning_rate": 5e-05, | |
| "loss": -0.001, | |
| "num_tokens": 241277888.0, | |
| "reward": 6.719375, | |
| "reward_std": 1.752496111392975, | |
| "rewards/event_reward_fn/mean": 6.719375, | |
| "rewards/event_reward_fn/std": 6.006656441688538, | |
| "step": 1450, | |
| "step_time": 41.94062339906115 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.994375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 6.86, | |
| "completions/mean_length": 254.820625, | |
| "completions/mean_terminated_length": 6.8333333349227905, | |
| "completions/min_length": 237.22, | |
| "completions/min_terminated_length": 6.82, | |
| "entropy": 1.1917432191967965, | |
| "epoch": 0.9052504526252263, | |
| "frac_reward_zero_std": 0.36, | |
| "grad_norm": 7.937113285064697, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0277, | |
| "num_tokens": 249570792.0, | |
| "reward": 6.343125, | |
| "reward_std": 1.5732547068595886, | |
| "rewards/event_reward_fn/mean": 6.343125, | |
| "rewards/event_reward_fn/std": 5.813223929405212, | |
| "step": 1500, | |
| "step_time": 43.07453210723819 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.926875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 131.5, | |
| "completions/mean_length": 253.175, | |
| "completions/mean_terminated_length": 125.9899688720703, | |
| "completions/min_length": 228.9, | |
| "completions/min_terminated_length": 121.38, | |
| "entropy": 0.5228479199111462, | |
| "epoch": 0.9354254677127338, | |
| "frac_reward_zero_std": 0.3875, | |
| "grad_norm": 1.1253968477249146, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 257734094.0, | |
| "reward": 7.513125, | |
| "reward_std": 1.2554410457611085, | |
| "rewards/event_reward_fn/mean": 7.513125, | |
| "rewards/event_reward_fn/std": 5.942786226272583, | |
| "step": 1550, | |
| "step_time": 42.567975415034454 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.926875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 131.88, | |
| "completions/mean_length": 253.125625, | |
| "completions/mean_terminated_length": 128.5973336791992, | |
| "completions/min_length": 223.42, | |
| "completions/min_terminated_length": 126.14, | |
| "entropy": 0.8324106151610613, | |
| "epoch": 0.9656004828002414, | |
| "frac_reward_zero_std": 0.35, | |
| "grad_norm": 0.8266918659210205, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0003, | |
| "num_tokens": 266238921.0, | |
| "reward": 6.545625, | |
| "reward_std": 1.3680988204479219, | |
| "rewards/event_reward_fn/mean": 6.545625, | |
| "rewards/event_reward_fn/std": 5.431567845344543, | |
| "step": 1600, | |
| "step_time": 42.35816644520266 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.979375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 52.06, | |
| "completions/mean_length": 255.3625, | |
| "completions/mean_terminated_length": 50.906666870117185, | |
| "completions/min_length": 244.64, | |
| "completions/min_terminated_length": 50.08, | |
| "entropy": 1.4035920506715776, | |
| "epoch": 0.995775497887749, | |
| "frac_reward_zero_std": 0.3475, | |
| "grad_norm": 11.164112091064453, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0041, | |
| "num_tokens": 274713769.0, | |
| "reward": 6.30375, | |
| "reward_std": 1.4735591614246368, | |
| "rewards/event_reward_fn/mean": 6.30375, | |
| "rewards/event_reward_fn/std": 5.667113132476807, | |
| "step": 1650, | |
| "step_time": 41.3417172247381 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.99875, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 4.16, | |
| "completions/mean_length": 255.81, | |
| "completions/mean_terminated_length": 4.16, | |
| "completions/min_length": 249.92, | |
| "completions/min_terminated_length": 4.16, | |
| "entropy": 1.0942185708135366, | |
| "epoch": 1.0259505129752564, | |
| "frac_reward_zero_std": 0.4125, | |
| "grad_norm": 2.0238513946533203, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0042, | |
| "num_tokens": 282976766.0, | |
| "reward": 7.19375, | |
| "reward_std": 1.4945010322332382, | |
| "rewards/event_reward_fn/mean": 7.19375, | |
| "rewards/event_reward_fn/std": 5.62495879650116, | |
| "step": 1700, | |
| "step_time": 41.58126259226352 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.995, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 27.9, | |
| "completions/mean_length": 255.874375, | |
| "completions/mean_terminated_length": 27.873333435058594, | |
| "completions/min_length": 253.14, | |
| "completions/min_terminated_length": 27.86, | |
| "entropy": 0.6756551740318537, | |
| "epoch": 1.056125528062764, | |
| "frac_reward_zero_std": 0.4125, | |
| "grad_norm": 0.9028272032737732, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 291424307.0, | |
| "reward": 6.863125, | |
| "reward_std": 1.4053943872451782, | |
| "rewards/event_reward_fn/mean": 6.863125, | |
| "rewards/event_reward_fn/std": 5.494638476371765, | |
| "step": 1750, | |
| "step_time": 41.3957015178015 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.985, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 71.64, | |
| "completions/mean_length": 255.53375, | |
| "completions/mean_terminated_length": 71.1, | |
| "completions/min_length": 244.52, | |
| "completions/min_terminated_length": 70.44, | |
| "entropy": 0.8187148047238588, | |
| "epoch": 1.0863005431502715, | |
| "frac_reward_zero_std": 0.3525, | |
| "grad_norm": 4.235013484954834, | |
| "learning_rate": 5e-05, | |
| "loss": -0.0009, | |
| "num_tokens": 300026957.0, | |
| "reward": 7.29375, | |
| "reward_std": 1.5134036219120026, | |
| "rewards/event_reward_fn/mean": 7.29375, | |
| "rewards/event_reward_fn/std": 6.304830470085144, | |
| "step": 1800, | |
| "step_time": 42.75003189910087 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9275, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 154.7, | |
| "completions/mean_length": 253.389375, | |
| "completions/mean_terminated_length": 145.82189025878907, | |
| "completions/min_length": 221.36, | |
| "completions/min_terminated_length": 134.32, | |
| "entropy": 0.8770341634750366, | |
| "epoch": 1.1164755582377792, | |
| "frac_reward_zero_std": 0.42, | |
| "grad_norm": 1.056412696838379, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0016, | |
| "num_tokens": 308294562.0, | |
| "reward": 7.195, | |
| "reward_std": 1.413882914185524, | |
| "rewards/event_reward_fn/mean": 7.195, | |
| "rewards/event_reward_fn/std": 5.875013728141784, | |
| "step": 1850, | |
| "step_time": 42.22391830024426 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.9925, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 50.76, | |
| "completions/mean_length": 255.740625, | |
| "completions/mean_terminated_length": 49.8, | |
| "completions/min_length": 248.52, | |
| "completions/min_terminated_length": 48.84, | |
| "entropy": 0.7402717351168394, | |
| "epoch": 1.1466505733252867, | |
| "frac_reward_zero_std": 0.3725, | |
| "grad_norm": 1.3885732889175415, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0007, | |
| "num_tokens": 316652055.0, | |
| "reward": 7.290625, | |
| "reward_std": 1.5660697519779205, | |
| "rewards/event_reward_fn/mean": 7.290625, | |
| "rewards/event_reward_fn/std": 6.019521760940552, | |
| "step": 1900, | |
| "step_time": 42.37986288452754 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.99375, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 35.46, | |
| "completions/mean_length": 255.766875, | |
| "completions/mean_terminated_length": 35.0, | |
| "completions/min_length": 249.58, | |
| "completions/min_terminated_length": 34.54, | |
| "entropy": 1.1067684018611907, | |
| "epoch": 1.1768255884127943, | |
| "frac_reward_zero_std": 0.3925, | |
| "grad_norm": 1.0810959339141846, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0014, | |
| "num_tokens": 325016448.0, | |
| "reward": 6.665, | |
| "reward_std": 1.538350248336792, | |
| "rewards/event_reward_fn/mean": 6.665, | |
| "rewards/event_reward_fn/std": 6.030116739273072, | |
| "step": 1950, | |
| "step_time": 40.92805422256584 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.990625, | |
| "completions/max_length": 256.0, | |
| "completions/max_terminated_length": 63.46, | |
| "completions/mean_length": 255.716875, | |
| "completions/mean_terminated_length": 63.21, | |
| "completions/min_length": 247.28, | |
| "completions/min_terminated_length": 62.96, | |
| "entropy": 0.7298687703162432, | |
| "epoch": 1.2070006035003018, | |
| "frac_reward_zero_std": 0.3375, | |
| "grad_norm": 0.858845591545105, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0002, | |
| "num_tokens": 333296057.0, | |
| "reward": 7.73125, | |
| "reward_std": 1.5106933176517487, | |
| "rewards/event_reward_fn/mean": 7.73125, | |
| "rewards/event_reward_fn/std": 6.448639197349548, | |
| "step": 2000, | |
| "step_time": 41.15218346009729 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 16570, | |
| "num_input_tokens_seen": 333296057, | |
| "num_train_epochs": 10, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |