Text Generation
Transformers
Safetensors
English
qwen2
Generated from Trainer
grpo
trl
security
smart-contracts
solidity
audit
web3
conversational
text-generation-inference
Instructions to use oxdev/security-auditor-grpo with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use oxdev/security-auditor-grpo with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="oxdev/security-auditor-grpo") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("oxdev/security-auditor-grpo") model = AutoModelForCausalLM.from_pretrained("oxdev/security-auditor-grpo") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use oxdev/security-auditor-grpo with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "oxdev/security-auditor-grpo" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "oxdev/security-auditor-grpo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/oxdev/security-auditor-grpo
- SGLang
How to use oxdev/security-auditor-grpo with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "oxdev/security-auditor-grpo" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "oxdev/security-auditor-grpo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "oxdev/security-auditor-grpo" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "oxdev/security-auditor-grpo", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use oxdev/security-auditor-grpo with Docker Model Runner:
docker model run hf.co/oxdev/security-auditor-grpo
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 326, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.75, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 37.0, | |
| "completions/mean_length": 393.25, | |
| "completions/mean_terminated_length": 37.0, | |
| "completions/min_length": 37.0, | |
| "completions/min_terminated_length": 37.0, | |
| "entropy": 1.4897738695144653, | |
| "epoch": 0.006134969325153374, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 2.2988293170928955, | |
| "learning_rate": 5e-07, | |
| "loss": -0.21252349019050598, | |
| "num_tokens": 3567.0, | |
| "reward": -0.3424999713897705, | |
| "reward_std": 0.01500000525265932, | |
| "rewards/format_reward/mean": 0.02500000037252903, | |
| "rewards/format_reward/std": 0.05000000074505806, | |
| "rewards/security_audit_reward/mean": -0.5, | |
| "rewards/security_audit_reward/std": 0.0, | |
| "step": 1, | |
| "step_time": 39.508622552999896 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 248.75, | |
| "completions/mean_length": 389.625, | |
| "completions/mean_terminated_length": 192.79166793823242, | |
| "completions/min_length": 272.75, | |
| "completions/min_terminated_length": 144.75, | |
| "entropy": 1.363443061709404, | |
| "epoch": 0.03067484662576687, | |
| "frac_reward_zero_std": 0.375, | |
| "grad_norm": 4.688082218170166, | |
| "learning_rate": 4.938650306748465e-07, | |
| "loss": 0.04808004945516586, | |
| "num_tokens": 17675.0, | |
| "reward": -0.2981249839067459, | |
| "reward_std": 0.08178356755524874, | |
| "rewards/format_reward/mean": 0.10000000381842256, | |
| "rewards/format_reward/std": 0.12774468399584293, | |
| "rewards/security_audit_reward/mean": -0.46875, | |
| "rewards/security_audit_reward/std": 0.0625, | |
| "step": 5, | |
| "step_time": 38.500043476749966 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.65, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 345.6, | |
| "completions/mean_length": 463.0, | |
| "completions/mean_terminated_length": 305.6, | |
| "completions/min_length": 363.8, | |
| "completions/min_terminated_length": 261.4, | |
| "entropy": 1.4113845229148865, | |
| "epoch": 0.06134969325153374, | |
| "frac_reward_zero_std": 0.4, | |
| "grad_norm": 3.245452880859375, | |
| "learning_rate": 4.86196319018405e-07, | |
| "loss": -0.00041331946849823, | |
| "num_tokens": 37093.0, | |
| "reward": -0.29424998760223386, | |
| "reward_std": 0.08391451295465231, | |
| "rewards/format_reward/mean": 0.12750000804662703, | |
| "rewards/format_reward/std": 0.16304838731884957, | |
| "rewards/security_audit_reward/mean": -0.475, | |
| "rewards/security_audit_reward/std": 0.05, | |
| "step": 10, | |
| "step_time": 39.192330704800135 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 394.0, | |
| "completions/mean_length": 455.5, | |
| "completions/mean_terminated_length": 352.9, | |
| "completions/min_length": 311.8, | |
| "completions/min_terminated_length": 311.8, | |
| "entropy": 1.179759132862091, | |
| "epoch": 0.09202453987730061, | |
| "frac_reward_zero_std": 0.7, | |
| "grad_norm": 2.9624693393707275, | |
| "learning_rate": 4.785276073619632e-07, | |
| "loss": 0.03452911972999573, | |
| "num_tokens": 55311.0, | |
| "reward": -0.2887499898672104, | |
| "reward_std": 0.09658594038337469, | |
| "rewards/format_reward/mean": 0.0875, | |
| "rewards/format_reward/std": 0.08947573080658913, | |
| "rewards/security_audit_reward/mean": -0.45, | |
| "rewards/security_audit_reward/std": 0.1, | |
| "step": 15, | |
| "step_time": 38.30515608799997 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.45, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 395.2, | |
| "completions/mean_length": 416.9, | |
| "completions/mean_terminated_length": 328.76666870117185, | |
| "completions/min_length": 260.8, | |
| "completions/min_terminated_length": 260.8, | |
| "entropy": 1.298638153076172, | |
| "epoch": 0.12269938650306748, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 4.034470081329346, | |
| "learning_rate": 4.7085889570552147e-07, | |
| "loss": -0.008246073126792907, | |
| "num_tokens": 72771.0, | |
| "reward": -0.23124998807907104, | |
| "reward_std": 0.16768747363239528, | |
| "rewards/format_reward/mean": 0.19750000424683095, | |
| "rewards/format_reward/std": 0.2057904489338398, | |
| "rewards/security_audit_reward/mean": -0.4149999976158142, | |
| "rewards/security_audit_reward/std": 0.16999999880790712, | |
| "step": 20, | |
| "step_time": 37.87772348239996 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 423.4, | |
| "completions/mean_length": 382.1, | |
| "completions/mean_terminated_length": 334.1000091552734, | |
| "completions/min_length": 236.0, | |
| "completions/min_terminated_length": 236.0, | |
| "entropy": 1.317835807800293, | |
| "epoch": 0.15337423312883436, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 2.853423595428467, | |
| "learning_rate": 4.631901840490797e-07, | |
| "loss": -0.013739901781082153, | |
| "num_tokens": 89889.0, | |
| "reward": -0.2974999874830246, | |
| "reward_std": 0.15671177953481674, | |
| "rewards/format_reward/mean": 0.17500000596046447, | |
| "rewards/format_reward/std": 0.18444484770298003, | |
| "rewards/security_audit_reward/mean": -0.5, | |
| "rewards/security_audit_reward/std": 0.15773502588272095, | |
| "step": 25, | |
| "step_time": 38.74009619139997 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.65, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 346.0, | |
| "completions/mean_length": 463.35, | |
| "completions/mean_terminated_length": 295.3, | |
| "completions/min_length": 337.8, | |
| "completions/min_terminated_length": 235.4, | |
| "entropy": 1.1444598376750945, | |
| "epoch": 0.18404907975460122, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 3.64375901222229, | |
| "learning_rate": 4.55521472392638e-07, | |
| "loss": -0.03970654606819153, | |
| "num_tokens": 108664.0, | |
| "reward": -0.3184999763965607, | |
| "reward_std": 0.04019503518939018, | |
| "rewards/format_reward/mean": 0.10499999970197678, | |
| "rewards/format_reward/std": 0.13398344144225122, | |
| "rewards/security_audit_reward/mean": -0.5, | |
| "rewards/security_audit_reward/std": 0.0, | |
| "step": 30, | |
| "step_time": 38.56538706479987 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4, | |
| "completions/max_length": 504.2, | |
| "completions/max_terminated_length": 454.6, | |
| "completions/mean_length": 421.25, | |
| "completions/mean_terminated_length": 386.0, | |
| "completions/min_length": 328.6, | |
| "completions/min_terminated_length": 328.6, | |
| "entropy": 1.3522289156913758, | |
| "epoch": 0.2147239263803681, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 3.4385552406311035, | |
| "learning_rate": 4.4785276073619634e-07, | |
| "loss": -0.06348788738250732, | |
| "num_tokens": 126953.0, | |
| "reward": -0.32824997901916503, | |
| "reward_std": 0.03220053892582655, | |
| "rewards/format_reward/mean": 0.07250000201165677, | |
| "rewards/format_reward/std": 0.10733511671423912, | |
| "rewards/security_audit_reward/mean": -0.5, | |
| "rewards/security_audit_reward/std": 0.0, | |
| "step": 35, | |
| "step_time": 37.87626404739986 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.55, | |
| "completions/max_length": 505.2, | |
| "completions/max_terminated_length": 334.4, | |
| "completions/mean_length": 414.9, | |
| "completions/mean_terminated_length": 240.3, | |
| "completions/min_length": 243.0, | |
| "completions/min_terminated_length": 140.6, | |
| "entropy": 1.230024951696396, | |
| "epoch": 0.24539877300613497, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 4.400479793548584, | |
| "learning_rate": 4.401840490797546e-07, | |
| "loss": 0.11927952766418456, | |
| "num_tokens": 144785.0, | |
| "reward": -0.2897499829530716, | |
| "reward_std": 0.12973095811903476, | |
| "rewards/format_reward/mean": 0.14250000044703484, | |
| "rewards/format_reward/std": 0.14365934804081917, | |
| "rewards/security_audit_reward/mean": -0.475, | |
| "rewards/security_audit_reward/std": 0.13164966106414794, | |
| "step": 40, | |
| "step_time": 37.8069536416001 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6, | |
| "completions/max_length": 472.2, | |
| "completions/max_terminated_length": 190.6, | |
| "completions/mean_length": 412.7, | |
| "completions/mean_terminated_length": 160.85, | |
| "completions/min_length": 333.8, | |
| "completions/min_terminated_length": 129.0, | |
| "entropy": 1.2133947968482972, | |
| "epoch": 0.27607361963190186, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 4.325937271118164, | |
| "learning_rate": 4.3251533742331285e-07, | |
| "loss": 0.025146520137786864, | |
| "num_tokens": 162443.0, | |
| "reward": -0.1574999876320362, | |
| "reward_std": 0.2636621415615082, | |
| "rewards/format_reward/mean": 0.24500001072883607, | |
| "rewards/format_reward/std": 0.23762110471725464, | |
| "rewards/security_audit_reward/mean": -0.32999999523162843, | |
| "rewards/security_audit_reward/std": 0.28574271202087403, | |
| "step": 45, | |
| "step_time": 34.90182834920015 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.55, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 308.6, | |
| "completions/mean_length": 397.05, | |
| "completions/mean_terminated_length": 259.2, | |
| "completions/min_length": 203.0, | |
| "completions/min_terminated_length": 203.0, | |
| "entropy": 1.4294291973114013, | |
| "epoch": 0.3067484662576687, | |
| "frac_reward_zero_std": 0.4, | |
| "grad_norm": 3.9505743980407715, | |
| "learning_rate": 4.2484662576687116e-07, | |
| "loss": -0.08058007955551147, | |
| "num_tokens": 180200.0, | |
| "reward": -0.29249998927116394, | |
| "reward_std": 0.10127481501549482, | |
| "rewards/format_reward/mean": 0.0750000026077032, | |
| "rewards/format_reward/std": 0.1127780631184578, | |
| "rewards/security_audit_reward/mean": -0.45, | |
| "rewards/security_audit_reward/std": 0.1, | |
| "step": 50, | |
| "step_time": 38.750808009400046 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 389.8, | |
| "completions/mean_length": 404.65, | |
| "completions/mean_terminated_length": 297.43333740234374, | |
| "completions/min_length": 192.8, | |
| "completions/min_terminated_length": 192.8, | |
| "entropy": 1.2564165532588958, | |
| "epoch": 0.3374233128834356, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 3.3762269020080566, | |
| "learning_rate": 4.171779141104294e-07, | |
| "loss": -0.030467823147773743, | |
| "num_tokens": 198109.0, | |
| "reward": -0.2542499825358391, | |
| "reward_std": 0.07489922866225243, | |
| "rewards/format_reward/mean": 0.20250000841915608, | |
| "rewards/format_reward/std": 0.1368803471326828, | |
| "rewards/security_audit_reward/mean": -0.45, | |
| "rewards/security_audit_reward/std": 0.05773502588272095, | |
| "step": 55, | |
| "step_time": 38.411275500399825 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.35, | |
| "completions/max_length": 496.2, | |
| "completions/max_terminated_length": 448.8, | |
| "completions/mean_length": 394.3, | |
| "completions/mean_terminated_length": 358.6166687011719, | |
| "completions/min_length": 285.4, | |
| "completions/min_terminated_length": 285.4, | |
| "entropy": 1.2620218694210052, | |
| "epoch": 0.36809815950920244, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 2.8227944374084473, | |
| "learning_rate": 4.095092024539877e-07, | |
| "loss": 0.039707571268081665, | |
| "num_tokens": 215747.0, | |
| "reward": -0.2729999750852585, | |
| "reward_std": 0.13599938787519933, | |
| "rewards/format_reward/mean": 0.14000000432133675, | |
| "rewards/format_reward/std": 0.14343783408403396, | |
| "rewards/security_audit_reward/mean": -0.45, | |
| "rewards/security_audit_reward/std": 0.1393846869468689, | |
| "step": 60, | |
| "step_time": 37.50645367139987 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.35, | |
| "completions/max_length": 484.8, | |
| "completions/max_terminated_length": 334.8, | |
| "completions/mean_length": 381.35, | |
| "completions/mean_terminated_length": 255.98333740234375, | |
| "completions/min_length": 275.6, | |
| "completions/min_terminated_length": 173.2, | |
| "entropy": 1.2798833012580872, | |
| "epoch": 0.3987730061349693, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 3.4819753170013428, | |
| "learning_rate": 4.01840490797546e-07, | |
| "loss": -0.06275686025619506, | |
| "num_tokens": 233162.0, | |
| "reward": -0.201749986410141, | |
| "reward_std": 0.2016347900032997, | |
| "rewards/format_reward/mean": 0.20250000804662704, | |
| "rewards/format_reward/std": 0.22137173414230346, | |
| "rewards/security_audit_reward/mean": -0.375, | |
| "rewards/security_audit_reward/std": 0.20773502588272094, | |
| "step": 65, | |
| "step_time": 37.139902984000216 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 289.8, | |
| "completions/mean_length": 408.45, | |
| "completions/mean_terminated_length": 251.7, | |
| "completions/min_length": 207.0, | |
| "completions/min_terminated_length": 207.0, | |
| "entropy": 1.2134525895118713, | |
| "epoch": 0.4294478527607362, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 4.019806861877441, | |
| "learning_rate": 3.941717791411043e-07, | |
| "loss": 0.08099154829978943, | |
| "num_tokens": 251321.0, | |
| "reward": -0.27599998414516447, | |
| "reward_std": 0.08945702444761991, | |
| "rewards/format_reward/mean": 0.1300000037997961, | |
| "rewards/format_reward/std": 0.1645726040005684, | |
| "rewards/security_audit_reward/mean": -0.45, | |
| "rewards/security_audit_reward/std": 0.05773502588272095, | |
| "step": 70, | |
| "step_time": 38.02127088899997 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 459.8, | |
| "completions/max_terminated_length": 206.2, | |
| "completions/mean_length": 348.9, | |
| "completions/mean_terminated_length": 176.9, | |
| "completions/min_length": 148.6, | |
| "completions/min_terminated_length": 148.6, | |
| "entropy": 1.3179432690143584, | |
| "epoch": 0.4601226993865031, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 6.28598690032959, | |
| "learning_rate": 3.8650306748466255e-07, | |
| "loss": -0.11171818971633911, | |
| "num_tokens": 267725.0, | |
| "reward": -0.19474998638033866, | |
| "reward_std": 0.17031802013516426, | |
| "rewards/format_reward/mean": 0.23750000447034836, | |
| "rewards/format_reward/std": 0.17114628925919534, | |
| "rewards/security_audit_reward/mean": -0.3800000011920929, | |
| "rewards/security_audit_reward/std": 0.180902099609375, | |
| "step": 75, | |
| "step_time": 34.450158203000136 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 332.8, | |
| "completions/mean_length": 400.75, | |
| "completions/mean_terminated_length": 289.06666870117186, | |
| "completions/min_length": 251.0, | |
| "completions/min_terminated_length": 251.0, | |
| "entropy": 1.1514661133289337, | |
| "epoch": 0.49079754601226994, | |
| "frac_reward_zero_std": 0.4, | |
| "grad_norm": 2.8479247093200684, | |
| "learning_rate": 3.788343558282208e-07, | |
| "loss": 0.03145935535430908, | |
| "num_tokens": 285726.0, | |
| "reward": -0.2569999933242798, | |
| "reward_std": 0.15333212018013, | |
| "rewards/format_reward/mean": 0.1350000023841858, | |
| "rewards/format_reward/std": 0.18636635541915894, | |
| "rewards/security_audit_reward/mean": -0.425, | |
| "rewards/security_audit_reward/std": 0.15, | |
| "step": 80, | |
| "step_time": 38.869779922999626 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 321.2, | |
| "completions/mean_length": 398.5, | |
| "completions/mean_terminated_length": 253.0, | |
| "completions/min_length": 183.0, | |
| "completions/min_terminated_length": 183.0, | |
| "entropy": 1.244500571489334, | |
| "epoch": 0.5214723926380368, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 2.146970272064209, | |
| "learning_rate": 3.7116564417177916e-07, | |
| "loss": 0.06171210408210755, | |
| "num_tokens": 304148.0, | |
| "reward": -0.22524999380111693, | |
| "reward_std": 0.19191497713327407, | |
| "rewards/format_reward/mean": 0.18250000327825547, | |
| "rewards/format_reward/std": 0.19969657957553863, | |
| "rewards/security_audit_reward/mean": -0.4, | |
| "rewards/security_audit_reward/std": 0.2, | |
| "step": 85, | |
| "step_time": 39.297288996000134 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.55, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 360.0, | |
| "completions/mean_length": 419.5, | |
| "completions/mean_terminated_length": 325.6, | |
| "completions/min_length": 291.2, | |
| "completions/min_terminated_length": 291.2, | |
| "entropy": 1.206581747531891, | |
| "epoch": 0.5521472392638037, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 2.1158456802368164, | |
| "learning_rate": 3.634969325153374e-07, | |
| "loss": -0.06664568185806274, | |
| "num_tokens": 321680.0, | |
| "reward": -0.23324998915195466, | |
| "reward_std": 0.1919491995126009, | |
| "rewards/format_reward/mean": 0.1675000049173832, | |
| "rewards/format_reward/std": 0.19863576367497443, | |
| "rewards/security_audit_reward/mean": -0.40499999523162844, | |
| "rewards/security_audit_reward/std": 0.1899999976158142, | |
| "step": 90, | |
| "step_time": 38.49561594039933 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4, | |
| "completions/max_length": 469.2, | |
| "completions/max_terminated_length": 364.0, | |
| "completions/mean_length": 383.2, | |
| "completions/mean_terminated_length": 291.8333374023438, | |
| "completions/min_length": 217.6, | |
| "completions/min_terminated_length": 217.6, | |
| "entropy": 1.217250692844391, | |
| "epoch": 0.5828220858895705, | |
| "frac_reward_zero_std": 0.4, | |
| "grad_norm": 4.098232269287109, | |
| "learning_rate": 3.558282208588957e-07, | |
| "loss": 0.05211906433105469, | |
| "num_tokens": 339350.0, | |
| "reward": -0.2119999945163727, | |
| "reward_std": 0.19894140996038914, | |
| "rewards/format_reward/mean": 0.1799999989569187, | |
| "rewards/format_reward/std": 0.23350853994488716, | |
| "rewards/security_audit_reward/mean": -0.37999999821186065, | |
| "rewards/security_audit_reward/std": 0.1911805212497711, | |
| "step": 95, | |
| "step_time": 35.83687614579994 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 345.4, | |
| "completions/mean_length": 377.45, | |
| "completions/mean_terminated_length": 290.6333343505859, | |
| "completions/min_length": 240.4, | |
| "completions/min_terminated_length": 240.4, | |
| "entropy": 1.2783292949199676, | |
| "epoch": 0.6134969325153374, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 2.361516237258911, | |
| "learning_rate": 3.48159509202454e-07, | |
| "loss": 0.06258203387260437, | |
| "num_tokens": 356239.0, | |
| "reward": -0.20649999231100083, | |
| "reward_std": 0.18195689767599105, | |
| "rewards/format_reward/mean": 0.24499999433755876, | |
| "rewards/format_reward/std": 0.19310407042503358, | |
| "rewards/security_audit_reward/mean": -0.4, | |
| "rewards/security_audit_reward/std": 0.2, | |
| "step": 100, | |
| "step_time": 38.47846096040011 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 463.2, | |
| "completions/max_terminated_length": 394.6, | |
| "completions/mean_length": 335.8, | |
| "completions/mean_terminated_length": 268.3, | |
| "completions/min_length": 139.6, | |
| "completions/min_terminated_length": 139.6, | |
| "entropy": 1.2529696226119995, | |
| "epoch": 0.6441717791411042, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 3.356074094772339, | |
| "learning_rate": 3.4049079754601224e-07, | |
| "loss": 0.003340443968772888, | |
| "num_tokens": 373237.0, | |
| "reward": -0.2567499876022339, | |
| "reward_std": 0.27417250275611876, | |
| "rewards/format_reward/mean": 0.14750000461935997, | |
| "rewards/format_reward/std": 0.19759280756115913, | |
| "rewards/security_audit_reward/mean": -0.4299999952316284, | |
| "rewards/security_audit_reward/std": 0.3186576545238495, | |
| "step": 105, | |
| "step_time": 35.43083410320014 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.45, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 372.8, | |
| "completions/mean_length": 419.15, | |
| "completions/mean_terminated_length": 319.2666748046875, | |
| "completions/min_length": 266.8, | |
| "completions/min_terminated_length": 266.8, | |
| "entropy": 1.1685741186141967, | |
| "epoch": 0.6748466257668712, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 4.318619728088379, | |
| "learning_rate": 3.3282208588957055e-07, | |
| "loss": -0.026089027523994446, | |
| "num_tokens": 391784.0, | |
| "reward": -0.2662499874830246, | |
| "reward_std": 0.07884115856140853, | |
| "rewards/format_reward/mean": 0.16250000558793545, | |
| "rewards/format_reward/std": 0.14070439487695693, | |
| "rewards/security_audit_reward/mean": -0.45, | |
| "rewards/security_audit_reward/std": 0.05773502588272095, | |
| "step": 110, | |
| "step_time": 38.89281254739999 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 468.0, | |
| "completions/max_terminated_length": 337.6, | |
| "completions/mean_length": 330.1, | |
| "completions/mean_terminated_length": 240.4166687011719, | |
| "completions/min_length": 160.8, | |
| "completions/min_terminated_length": 160.8, | |
| "entropy": 1.2954379856586455, | |
| "epoch": 0.7055214723926381, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 3.293928384780884, | |
| "learning_rate": 3.251533742331288e-07, | |
| "loss": 0.17276796102523803, | |
| "num_tokens": 408446.0, | |
| "reward": -0.22849999666213988, | |
| "reward_std": 0.1390242099761963, | |
| "rewards/format_reward/mean": 0.2300000011920929, | |
| "rewards/format_reward/std": 0.23302415013313293, | |
| "rewards/security_audit_reward/mean": -0.425, | |
| "rewards/security_audit_reward/std": 0.10773502588272095, | |
| "step": 115, | |
| "step_time": 35.73082293679981 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.55, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 281.2, | |
| "completions/mean_length": 400.5, | |
| "completions/mean_terminated_length": 248.73333740234375, | |
| "completions/min_length": 211.8, | |
| "completions/min_terminated_length": 211.8, | |
| "entropy": 1.2283548831939697, | |
| "epoch": 0.7361963190184049, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 2.632479190826416, | |
| "learning_rate": 3.174846625766871e-07, | |
| "loss": 0.05111231803894043, | |
| "num_tokens": 426822.0, | |
| "reward": -0.22074998915195465, | |
| "reward_std": 0.15957241374999284, | |
| "rewards/format_reward/mean": 0.1975000012665987, | |
| "rewards/format_reward/std": 0.1847505249083042, | |
| "rewards/security_audit_reward/mean": -0.4, | |
| "rewards/security_audit_reward/std": 0.15773502588272095, | |
| "step": 120, | |
| "step_time": 39.14503100519996 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 340.2, | |
| "completions/mean_length": 401.4, | |
| "completions/mean_terminated_length": 236.03333740234376, | |
| "completions/min_length": 245.2, | |
| "completions/min_terminated_length": 142.8, | |
| "entropy": 1.307636547088623, | |
| "epoch": 0.7668711656441718, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 5.566491603851318, | |
| "learning_rate": 3.0981595092024537e-07, | |
| "loss": 0.003215853124856949, | |
| "num_tokens": 444322.0, | |
| "reward": -0.11199999079108239, | |
| "reward_std": 0.2506739288568497, | |
| "rewards/format_reward/mean": 0.2449999988079071, | |
| "rewards/format_reward/std": 0.20622505843639374, | |
| "rewards/security_audit_reward/mean": -0.26500000059604645, | |
| "rewards/security_audit_reward/std": 0.278915548324585, | |
| "step": 125, | |
| "step_time": 38.70229864360026 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.35, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 390.6, | |
| "completions/mean_length": 367.0, | |
| "completions/mean_terminated_length": 264.0000061035156, | |
| "completions/min_length": 123.2, | |
| "completions/min_terminated_length": 123.2, | |
| "entropy": 1.248900693655014, | |
| "epoch": 0.7975460122699386, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 4.395384311676025, | |
| "learning_rate": 3.021472392638036e-07, | |
| "loss": 0.06482647061347961, | |
| "num_tokens": 461894.0, | |
| "reward": -0.2042499899864197, | |
| "reward_std": 0.17003463432192803, | |
| "rewards/format_reward/mean": 0.2525000125169754, | |
| "rewards/format_reward/std": 0.21560870110988617, | |
| "rewards/security_audit_reward/mean": -0.4, | |
| "rewards/security_audit_reward/std": 0.15773502588272095, | |
| "step": 130, | |
| "step_time": 39.174271353800215 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.35, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 370.0, | |
| "completions/mean_length": 359.95, | |
| "completions/mean_terminated_length": 276.9666687011719, | |
| "completions/min_length": 203.8, | |
| "completions/min_terminated_length": 203.8, | |
| "entropy": 1.3299469709396363, | |
| "epoch": 0.8282208588957055, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.3037519454956055, | |
| "learning_rate": 2.94478527607362e-07, | |
| "loss": 0.026153716444969177, | |
| "num_tokens": 478783.0, | |
| "reward": -0.19949999153614045, | |
| "reward_std": 0.15296672135591508, | |
| "rewards/format_reward/mean": 0.24500000178813935, | |
| "rewards/format_reward/std": 0.2273508906364441, | |
| "rewards/security_audit_reward/mean": -0.39000000059604645, | |
| "rewards/security_audit_reward/std": 0.12891554832458496, | |
| "step": 135, | |
| "step_time": 38.658669441000164 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.45, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 354.0, | |
| "completions/mean_length": 380.3, | |
| "completions/mean_terminated_length": 271.6666687011719, | |
| "completions/min_length": 206.4, | |
| "completions/min_terminated_length": 206.4, | |
| "entropy": 1.0997539341449738, | |
| "epoch": 0.8588957055214724, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 2.3693976402282715, | |
| "learning_rate": 2.8680981595092024e-07, | |
| "loss": -0.01876506209373474, | |
| "num_tokens": 496243.0, | |
| "reward": -0.17974998727440833, | |
| "reward_std": 0.18710523881018162, | |
| "rewards/format_reward/mean": 0.21750000193715097, | |
| "rewards/format_reward/std": 0.1843859799206257, | |
| "rewards/security_audit_reward/mean": -0.35, | |
| "rewards/security_audit_reward/std": 0.20347774028778076, | |
| "step": 140, | |
| "step_time": 39.171343391999834 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.35, | |
| "completions/max_length": 456.2, | |
| "completions/max_terminated_length": 355.2, | |
| "completions/mean_length": 348.35, | |
| "completions/mean_terminated_length": 270.3, | |
| "completions/min_length": 164.8, | |
| "completions/min_terminated_length": 164.8, | |
| "entropy": 1.2017314374446868, | |
| "epoch": 0.8895705521472392, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 4.230531215667725, | |
| "learning_rate": 2.791411042944785e-07, | |
| "loss": 0.0029310762882232668, | |
| "num_tokens": 513422.0, | |
| "reward": -0.1637499898672104, | |
| "reward_std": 0.2463478922843933, | |
| "rewards/format_reward/mean": 0.2475000023841858, | |
| "rewards/format_reward/std": 0.2085829883813858, | |
| "rewards/security_audit_reward/mean": -0.34000000059604646, | |
| "rewards/security_audit_reward/std": 0.26830023527145386, | |
| "step": 145, | |
| "step_time": 34.95672115479992 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 482.4, | |
| "completions/max_terminated_length": 431.6, | |
| "completions/mean_length": 355.35, | |
| "completions/mean_terminated_length": 319.4166687011719, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 1.258862280845642, | |
| "epoch": 0.9202453987730062, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 6.071740627288818, | |
| "learning_rate": 2.714723926380368e-07, | |
| "loss": 0.0822126567363739, | |
| "num_tokens": 530643.0, | |
| "reward": -0.1912499874830246, | |
| "reward_std": 0.1670845106244087, | |
| "rewards/format_reward/mean": 0.27249999940395353, | |
| "rewards/format_reward/std": 0.1733592666685581, | |
| "rewards/security_audit_reward/mean": -0.39000000059604645, | |
| "rewards/security_audit_reward/std": 0.17118052244186402, | |
| "step": 150, | |
| "step_time": 37.038782767599876 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2, | |
| "completions/max_length": 442.2, | |
| "completions/max_terminated_length": 316.0, | |
| "completions/mean_length": 283.75, | |
| "completions/mean_terminated_length": 237.85000305175782, | |
| "completions/min_length": 166.2, | |
| "completions/min_terminated_length": 166.2, | |
| "entropy": 1.4853489220142364, | |
| "epoch": 0.950920245398773, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 5.2570695877075195, | |
| "learning_rate": 2.6380368098159506e-07, | |
| "loss": 0.11004064083099366, | |
| "num_tokens": 545966.0, | |
| "reward": -0.15949999541044235, | |
| "reward_std": 0.19192611873149873, | |
| "rewards/format_reward/mean": 0.28500000238418577, | |
| "rewards/format_reward/std": 0.22434256076812745, | |
| "rewards/security_audit_reward/mean": -0.35, | |
| "rewards/security_audit_reward/std": 0.2, | |
| "step": 155, | |
| "step_time": 33.86747411140077 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.55, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 340.8, | |
| "completions/mean_length": 411.45, | |
| "completions/mean_terminated_length": 265.9666687011719, | |
| "completions/min_length": 192.0, | |
| "completions/min_terminated_length": 192.0, | |
| "entropy": 1.0828768193721772, | |
| "epoch": 0.9815950920245399, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 2.6537587642669678, | |
| "learning_rate": 2.5613496932515337e-07, | |
| "loss": 0.03556116819381714, | |
| "num_tokens": 563683.0, | |
| "reward": -0.20099999010562897, | |
| "reward_std": 0.1888158166781068, | |
| "rewards/format_reward/mean": 0.24000000059604645, | |
| "rewards/format_reward/std": 0.16870398968458175, | |
| "rewards/security_audit_reward/mean": -0.3899999976158142, | |
| "rewards/security_audit_reward/std": 0.2199999988079071, | |
| "step": 160, | |
| "step_time": 38.665304075799575 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 483.2, | |
| "completions/max_terminated_length": 393.2, | |
| "completions/mean_length": 359.2, | |
| "completions/mean_terminated_length": 303.3666687011719, | |
| "completions/min_length": 178.2, | |
| "completions/min_terminated_length": 178.2, | |
| "entropy": 1.1811485469341279, | |
| "epoch": 1.0122699386503067, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.203860282897949, | |
| "learning_rate": 2.4846625766871163e-07, | |
| "loss": -0.02532302737236023, | |
| "num_tokens": 580183.0, | |
| "reward": -0.1227499857544899, | |
| "reward_std": 0.2651766210794449, | |
| "rewards/format_reward/mean": 0.2675000011920929, | |
| "rewards/format_reward/std": 0.24115291833877564, | |
| "rewards/security_audit_reward/mean": -0.2899999976158142, | |
| "rewards/security_audit_reward/std": 0.2812127649784088, | |
| "step": 165, | |
| "step_time": 36.34888075860035 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 345.8, | |
| "completions/mean_length": 358.5, | |
| "completions/mean_terminated_length": 235.96666870117187, | |
| "completions/min_length": 123.0, | |
| "completions/min_terminated_length": 123.0, | |
| "entropy": 1.2863860994577407, | |
| "epoch": 1.0429447852760736, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 2.71185302734375, | |
| "learning_rate": 2.4079754601226994e-07, | |
| "loss": 0.12254136800765991, | |
| "num_tokens": 597345.0, | |
| "reward": -0.20274999886751174, | |
| "reward_std": 0.1825057201087475, | |
| "rewards/format_reward/mean": 0.25750000327825545, | |
| "rewards/format_reward/std": 0.19183385372161865, | |
| "rewards/security_audit_reward/mean": -0.4, | |
| "rewards/security_audit_reward/std": 0.19711971282958984, | |
| "step": 170, | |
| "step_time": 38.96664929399922 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 323.4, | |
| "completions/mean_length": 407.1, | |
| "completions/mean_terminated_length": 234.40000610351564, | |
| "completions/min_length": 242.8, | |
| "completions/min_terminated_length": 140.4, | |
| "entropy": 1.179810070991516, | |
| "epoch": 1.0736196319018405, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 4.413055419921875, | |
| "learning_rate": 2.331288343558282e-07, | |
| "loss": 0.041037318110466, | |
| "num_tokens": 615063.0, | |
| "reward": -0.20374999046325684, | |
| "reward_std": 0.2052689865231514, | |
| "rewards/format_reward/mean": 0.31250000894069674, | |
| "rewards/format_reward/std": 0.22553626000881194, | |
| "rewards/security_audit_reward/mean": -0.425, | |
| "rewards/security_audit_reward/std": 0.23164966106414794, | |
| "step": 175, | |
| "step_time": 39.00492364500023 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 511.6, | |
| "completions/max_terminated_length": 424.0, | |
| "completions/mean_length": 399.45, | |
| "completions/mean_terminated_length": 347.9166748046875, | |
| "completions/min_length": 262.6, | |
| "completions/min_terminated_length": 262.6, | |
| "entropy": 1.1026120364665986, | |
| "epoch": 1.1042944785276074, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.408414840698242, | |
| "learning_rate": 2.254601226993865e-07, | |
| "loss": 0.058766734600067136, | |
| "num_tokens": 632696.0, | |
| "reward": -0.16599998623132706, | |
| "reward_std": 0.26377752125263215, | |
| "rewards/format_reward/mean": 0.24000000655651094, | |
| "rewards/format_reward/std": 0.21778101623058319, | |
| "rewards/security_audit_reward/mean": -0.34000000059604646, | |
| "rewards/security_audit_reward/std": 0.3105652093887329, | |
| "step": 180, | |
| "step_time": 39.09616019519963 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2, | |
| "completions/max_length": 477.4, | |
| "completions/max_terminated_length": 421.6, | |
| "completions/mean_length": 322.45, | |
| "completions/mean_terminated_length": 273.1000030517578, | |
| "completions/min_length": 156.0, | |
| "completions/min_terminated_length": 156.0, | |
| "entropy": 1.2888785600662231, | |
| "epoch": 1.1349693251533743, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.6877431869506836, | |
| "learning_rate": 2.1779141104294476e-07, | |
| "loss": -0.0771723210811615, | |
| "num_tokens": 649353.0, | |
| "reward": -0.1799999952316284, | |
| "reward_std": 0.3134476348757744, | |
| "rewards/format_reward/mean": 0.2750000089406967, | |
| "rewards/format_reward/std": 0.24135999679565429, | |
| "rewards/security_audit_reward/mean": -0.375, | |
| "rewards/security_audit_reward/std": 0.3593961834907532, | |
| "step": 185, | |
| "step_time": 36.71157897000012 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 495.0, | |
| "completions/max_terminated_length": 351.0, | |
| "completions/mean_length": 328.8, | |
| "completions/mean_terminated_length": 243.7, | |
| "completions/min_length": 164.8, | |
| "completions/min_terminated_length": 164.8, | |
| "entropy": 1.4585140287876128, | |
| "epoch": 1.165644171779141, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 4.987306118011475, | |
| "learning_rate": 2.1012269938650307e-07, | |
| "loss": -0.15080010890960693, | |
| "num_tokens": 665513.0, | |
| "reward": -0.050499990582466125, | |
| "reward_std": 0.2684710592031479, | |
| "rewards/format_reward/mean": 0.31000000387430193, | |
| "rewards/format_reward/std": 0.1994625985622406, | |
| "rewards/security_audit_reward/mean": -0.20500000119209288, | |
| "rewards/security_audit_reward/std": 0.31255176067352297, | |
| "step": 190, | |
| "step_time": 37.63543628939988 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.5, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 307.8, | |
| "completions/mean_length": 391.75, | |
| "completions/mean_terminated_length": 253.06666870117186, | |
| "completions/min_length": 178.0, | |
| "completions/min_terminated_length": 178.0, | |
| "entropy": 1.1293343544006347, | |
| "epoch": 1.196319018404908, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 5.247244358062744, | |
| "learning_rate": 2.0245398773006135e-07, | |
| "loss": -0.04229157567024231, | |
| "num_tokens": 683268.0, | |
| "reward": -0.10224998965859414, | |
| "reward_std": 0.19266743455082178, | |
| "rewards/format_reward/mean": 0.3124999929219484, | |
| "rewards/format_reward/std": 0.1390557773411274, | |
| "rewards/security_audit_reward/mean": -0.2800000011920929, | |
| "rewards/security_audit_reward/std": 0.23863712549209595, | |
| "step": 195, | |
| "step_time": 38.93936442300037 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4, | |
| "completions/max_length": 485.0, | |
| "completions/max_terminated_length": 346.2, | |
| "completions/mean_length": 364.6, | |
| "completions/mean_terminated_length": 265.8166748046875, | |
| "completions/min_length": 165.8, | |
| "completions/min_terminated_length": 165.8, | |
| "entropy": 0.8287177711725235, | |
| "epoch": 1.2269938650306749, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 2.4230945110321045, | |
| "learning_rate": 1.9478527607361963e-07, | |
| "loss": -0.05633368492126465, | |
| "num_tokens": 700760.0, | |
| "reward": -0.1807499848306179, | |
| "reward_std": 0.18529897555708885, | |
| "rewards/format_reward/mean": 0.3075000137090683, | |
| "rewards/format_reward/std": 0.15467575192451477, | |
| "rewards/security_audit_reward/mean": -0.39000000059604645, | |
| "rewards/security_audit_reward/std": 0.2105652093887329, | |
| "step": 200, | |
| "step_time": 37.065423558799736 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 431.6, | |
| "completions/max_terminated_length": 338.8, | |
| "completions/mean_length": 284.1, | |
| "completions/mean_terminated_length": 225.65, | |
| "completions/min_length": 119.4, | |
| "completions/min_terminated_length": 119.4, | |
| "entropy": 1.2736368715763091, | |
| "epoch": 1.2576687116564418, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 4.797567367553711, | |
| "learning_rate": 1.8711656441717791e-07, | |
| "loss": 0.08297693133354186, | |
| "num_tokens": 716344.0, | |
| "reward": -0.07274999544024467, | |
| "reward_std": 0.24350565671920776, | |
| "rewards/format_reward/mean": 0.31749999821186065, | |
| "rewards/format_reward/std": 0.19669782146811485, | |
| "rewards/security_audit_reward/mean": -0.23999999985098838, | |
| "rewards/security_audit_reward/std": 0.2692204549908638, | |
| "step": 205, | |
| "step_time": 33.11877055760014 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 484.0, | |
| "completions/max_terminated_length": 403.2, | |
| "completions/mean_length": 335.45, | |
| "completions/mean_terminated_length": 276.4500030517578, | |
| "completions/min_length": 173.8, | |
| "completions/min_terminated_length": 173.8, | |
| "entropy": 1.1084223449230195, | |
| "epoch": 1.2883435582822087, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 2.4603023529052734, | |
| "learning_rate": 1.7944785276073617e-07, | |
| "loss": 0.07945090532302856, | |
| "num_tokens": 733245.0, | |
| "reward": -0.13774999380111694, | |
| "reward_std": 0.2730386942625046, | |
| "rewards/format_reward/mean": 0.2174999989569187, | |
| "rewards/format_reward/std": 0.22229814901947975, | |
| "rewards/security_audit_reward/mean": -0.29000000059604647, | |
| "rewards/security_audit_reward/std": 0.3105652093887329, | |
| "step": 210, | |
| "step_time": 37.03591289120122 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.4, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 361.8, | |
| "completions/mean_length": 345.0, | |
| "completions/mean_terminated_length": 261.3000030517578, | |
| "completions/min_length": 184.8, | |
| "completions/min_terminated_length": 184.8, | |
| "entropy": 1.2274070978164673, | |
| "epoch": 1.3190184049079754, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.573819637298584, | |
| "learning_rate": 1.7177914110429448e-07, | |
| "loss": -0.07497722506523133, | |
| "num_tokens": 749917.0, | |
| "reward": -0.01174999624490738, | |
| "reward_std": 0.3002330154180527, | |
| "rewards/format_reward/mean": 0.3225000023841858, | |
| "rewards/format_reward/std": 0.1751384623348713, | |
| "rewards/security_audit_reward/mean": -0.15499999821186067, | |
| "rewards/security_audit_reward/std": 0.3648489773273468, | |
| "step": 215, | |
| "step_time": 38.89012140319937 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2, | |
| "completions/max_length": 481.8, | |
| "completions/max_terminated_length": 344.6, | |
| "completions/mean_length": 292.3, | |
| "completions/mean_terminated_length": 234.26666870117188, | |
| "completions/min_length": 122.2, | |
| "completions/min_terminated_length": 122.2, | |
| "entropy": 1.1711494624614716, | |
| "epoch": 1.3496932515337423, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 3.879939556121826, | |
| "learning_rate": 1.6411042944785276e-07, | |
| "loss": 0.06901218891143798, | |
| "num_tokens": 765457.0, | |
| "reward": -0.2002499908208847, | |
| "reward_std": 0.19745510853827, | |
| "rewards/format_reward/mean": 0.20749999657273294, | |
| "rewards/format_reward/std": 0.2098293460905552, | |
| "rewards/security_audit_reward/mean": -0.375, | |
| "rewards/security_audit_reward/std": 0.20773502588272094, | |
| "step": 220, | |
| "step_time": 36.50884771559977 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.35, | |
| "completions/max_length": 451.4, | |
| "completions/max_terminated_length": 281.8, | |
| "completions/mean_length": 310.85, | |
| "completions/mean_terminated_length": 210.58333435058594, | |
| "completions/min_length": 144.0, | |
| "completions/min_terminated_length": 144.0, | |
| "entropy": 1.4326449751853942, | |
| "epoch": 1.3803680981595092, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 5.483170986175537, | |
| "learning_rate": 1.5644171779141104e-07, | |
| "loss": -0.03490494191646576, | |
| "num_tokens": 782226.0, | |
| "reward": -0.14649999141693115, | |
| "reward_std": 0.19791007936000823, | |
| "rewards/format_reward/mean": 0.3049999952316284, | |
| "rewards/format_reward/std": 0.19433450996875762, | |
| "rewards/security_audit_reward/mean": -0.3399999998509884, | |
| "rewards/security_audit_reward/std": 0.22000000029802322, | |
| "step": 225, | |
| "step_time": 35.079213985799655 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.35, | |
| "completions/max_length": 511.2, | |
| "completions/max_terminated_length": 315.2, | |
| "completions/mean_length": 338.15, | |
| "completions/mean_terminated_length": 226.28333435058593, | |
| "completions/min_length": 134.8, | |
| "completions/min_terminated_length": 134.8, | |
| "entropy": 1.1364098012447357, | |
| "epoch": 1.4110429447852761, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 3.3578364849090576, | |
| "learning_rate": 1.4877300613496933e-07, | |
| "loss": 0.0896155834197998, | |
| "num_tokens": 798571.0, | |
| "reward": -0.11574998870491982, | |
| "reward_std": 0.19651760943233967, | |
| "rewards/format_reward/mean": 0.2674999989569187, | |
| "rewards/format_reward/std": 0.15046989992260934, | |
| "rewards/security_audit_reward/mean": -0.27999999821186067, | |
| "rewards/security_audit_reward/std": 0.2297215759754181, | |
| "step": 230, | |
| "step_time": 38.61798697480081 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.35, | |
| "completions/max_length": 505.4, | |
| "completions/max_terminated_length": 385.4, | |
| "completions/mean_length": 360.65, | |
| "completions/mean_terminated_length": 295.23333740234375, | |
| "completions/min_length": 210.2, | |
| "completions/min_terminated_length": 210.2, | |
| "entropy": 1.0565216183662414, | |
| "epoch": 1.441717791411043, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.266097068786621, | |
| "learning_rate": 1.4110429447852758e-07, | |
| "loss": 0.07759050726890564, | |
| "num_tokens": 815570.0, | |
| "reward": -0.06749999299645423, | |
| "reward_std": 0.27374918162822726, | |
| "rewards/format_reward/mean": 0.37000001072883604, | |
| "rewards/format_reward/std": 0.1865294199436903, | |
| "rewards/security_audit_reward/mean": -0.2550000011920929, | |
| "rewards/security_audit_reward/std": 0.32802181243896483, | |
| "step": 235, | |
| "step_time": 38.44030983600023 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 495.6, | |
| "completions/max_terminated_length": 352.6, | |
| "completions/mean_length": 336.05, | |
| "completions/mean_terminated_length": 243.60000915527343, | |
| "completions/min_length": 145.8, | |
| "completions/min_terminated_length": 145.8, | |
| "entropy": 1.417020809650421, | |
| "epoch": 1.4723926380368098, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 4.767548084259033, | |
| "learning_rate": 1.334355828220859e-07, | |
| "loss": 0.03671485185623169, | |
| "num_tokens": 831713.0, | |
| "reward": -0.12849999219179153, | |
| "reward_std": 0.19381159394979477, | |
| "rewards/format_reward/mean": 0.3300000041723251, | |
| "rewards/format_reward/std": 0.20168980173766612, | |
| "rewards/security_audit_reward/mean": -0.325, | |
| "rewards/security_audit_reward/std": 0.20773502588272094, | |
| "step": 240, | |
| "step_time": 37.49174958900003 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.6, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 343.0, | |
| "completions/mean_length": 417.3, | |
| "completions/mean_terminated_length": 293.7, | |
| "completions/min_length": 246.8, | |
| "completions/min_terminated_length": 246.8, | |
| "entropy": 1.0088598132133484, | |
| "epoch": 1.5030674846625767, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 3.1936607360839844, | |
| "learning_rate": 1.2576687116564417e-07, | |
| "loss": -0.03240810632705689, | |
| "num_tokens": 849855.0, | |
| "reward": -0.11324999332427979, | |
| "reward_std": 0.2602782666683197, | |
| "rewards/format_reward/mean": 0.3224999994039536, | |
| "rewards/format_reward/std": 0.20757876634597777, | |
| "rewards/security_audit_reward/mean": -0.3, | |
| "rewards/security_audit_reward/std": 0.3154700517654419, | |
| "step": 245, | |
| "step_time": 39.07302968719996 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2, | |
| "completions/max_length": 459.6, | |
| "completions/max_terminated_length": 428.2, | |
| "completions/mean_length": 325.35, | |
| "completions/mean_terminated_length": 277.5833374023438, | |
| "completions/min_length": 137.0, | |
| "completions/min_terminated_length": 137.0, | |
| "entropy": 1.1867628961801528, | |
| "epoch": 1.5337423312883436, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 3.9449942111968994, | |
| "learning_rate": 1.1809815950920244e-07, | |
| "loss": -0.005416367202997208, | |
| "num_tokens": 866330.0, | |
| "reward": -0.08199999034404755, | |
| "reward_std": 0.2747137784957886, | |
| "rewards/format_reward/mean": 0.3099999874830246, | |
| "rewards/format_reward/std": 0.17935641929507257, | |
| "rewards/security_audit_reward/mean": -0.25, | |
| "rewards/security_audit_reward/std": 0.33094010353088377, | |
| "step": 250, | |
| "step_time": 35.27377968320034 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 450.0, | |
| "completions/max_terminated_length": 311.2, | |
| "completions/mean_length": 283.8, | |
| "completions/mean_terminated_length": 202.06666870117186, | |
| "completions/min_length": 128.0, | |
| "completions/min_terminated_length": 128.0, | |
| "entropy": 1.322118791937828, | |
| "epoch": 1.5644171779141103, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 6.9948039054870605, | |
| "learning_rate": 1.1042944785276073e-07, | |
| "loss": 0.055895209312438965, | |
| "num_tokens": 881650.0, | |
| "reward": -0.11924999132752419, | |
| "reward_std": 0.14510822538286447, | |
| "rewards/format_reward/mean": 0.3024999976158142, | |
| "rewards/format_reward/std": 0.15741010159254074, | |
| "rewards/security_audit_reward/mean": -0.3, | |
| "rewards/security_audit_reward/std": 0.15773502588272095, | |
| "step": 255, | |
| "step_time": 34.332786842800125 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 485.0, | |
| "completions/max_terminated_length": 312.2, | |
| "completions/mean_length": 312.0, | |
| "completions/mean_terminated_length": 225.13333740234376, | |
| "completions/min_length": 141.8, | |
| "completions/min_terminated_length": 141.8, | |
| "entropy": 1.1261488378047944, | |
| "epoch": 1.5950920245398774, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 5.633542537689209, | |
| "learning_rate": 1.0276073619631902e-07, | |
| "loss": 0.12422184944152832, | |
| "num_tokens": 898170.0, | |
| "reward": -0.09424999356269836, | |
| "reward_std": 0.2452640563249588, | |
| "rewards/format_reward/mean": 0.3274999916553497, | |
| "rewards/format_reward/std": 0.19424656331539153, | |
| "rewards/security_audit_reward/mean": -0.275, | |
| "rewards/security_audit_reward/std": 0.29574271440505984, | |
| "step": 260, | |
| "step_time": 37.18088837539908 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 481.4, | |
| "completions/max_terminated_length": 333.8, | |
| "completions/mean_length": 323.4, | |
| "completions/mean_terminated_length": 233.95, | |
| "completions/min_length": 174.6, | |
| "completions/min_terminated_length": 174.6, | |
| "entropy": 1.3164357602596284, | |
| "epoch": 1.6257668711656441, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 4.177126884460449, | |
| "learning_rate": 9.50920245398773e-08, | |
| "loss": -0.0031075358390808107, | |
| "num_tokens": 914438.0, | |
| "reward": -0.109499990940094, | |
| "reward_std": 0.19355954378843307, | |
| "rewards/format_reward/mean": 0.3700000077486038, | |
| "rewards/format_reward/std": 0.19431518614292145, | |
| "rewards/security_audit_reward/mean": -0.31500000059604644, | |
| "rewards/security_audit_reward/std": 0.21745660305023193, | |
| "step": 265, | |
| "step_time": 36.75480746599969 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15, | |
| "completions/max_length": 460.2, | |
| "completions/max_terminated_length": 279.2, | |
| "completions/mean_length": 224.9, | |
| "completions/mean_terminated_length": 170.45000305175782, | |
| "completions/min_length": 84.4, | |
| "completions/min_terminated_length": 84.4, | |
| "entropy": 1.267154586315155, | |
| "epoch": 1.656441717791411, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 7.552680015563965, | |
| "learning_rate": 8.742331288343557e-08, | |
| "loss": -0.12154214382171631, | |
| "num_tokens": 928536.0, | |
| "reward": -0.07999998778104782, | |
| "reward_std": 0.16925212144851684, | |
| "rewards/format_reward/mean": 0.37499999403953554, | |
| "rewards/format_reward/std": 0.1521439790725708, | |
| "rewards/security_audit_reward/mean": -0.275, | |
| "rewards/security_audit_reward/std": 0.20773502588272094, | |
| "step": 270, | |
| "step_time": 35.04094967719975 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 469.4, | |
| "completions/max_terminated_length": 287.6, | |
| "completions/mean_length": 300.95, | |
| "completions/mean_terminated_length": 204.76667175292968, | |
| "completions/min_length": 130.0, | |
| "completions/min_terminated_length": 130.0, | |
| "entropy": 1.1936017721891403, | |
| "epoch": 1.687116564417178, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 7.106525421142578, | |
| "learning_rate": 7.975460122699386e-08, | |
| "loss": -0.0458857923746109, | |
| "num_tokens": 944307.0, | |
| "reward": -0.05174999088048935, | |
| "reward_std": 0.23333178758621215, | |
| "rewards/format_reward/mean": 0.3874999940395355, | |
| "rewards/format_reward/std": 0.16559004038572311, | |
| "rewards/security_audit_reward/mean": -0.24000000059604645, | |
| "rewards/security_audit_reward/std": 0.2866505742073059, | |
| "step": 275, | |
| "step_time": 36.0470411268001 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.2, | |
| "completions/max_length": 498.6, | |
| "completions/max_terminated_length": 375.0, | |
| "completions/mean_length": 295.25, | |
| "completions/mean_terminated_length": 232.6666717529297, | |
| "completions/min_length": 97.8, | |
| "completions/min_terminated_length": 97.8, | |
| "entropy": 1.460896384716034, | |
| "epoch": 1.7177914110429446, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 5.004129409790039, | |
| "learning_rate": 7.208588957055214e-08, | |
| "loss": -0.10658804178237916, | |
| "num_tokens": 960078.0, | |
| "reward": -0.01824999153614044, | |
| "reward_std": 0.2521414369344711, | |
| "rewards/format_reward/mean": 0.3825000047683716, | |
| "rewards/format_reward/std": 0.15327396541833876, | |
| "rewards/security_audit_reward/mean": -0.1899999976158142, | |
| "rewards/security_audit_reward/std": 0.3234777390956879, | |
| "step": 280, | |
| "step_time": 37.26412723539943 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 501.0, | |
| "completions/max_terminated_length": 363.2, | |
| "completions/mean_length": 340.6, | |
| "completions/mean_terminated_length": 257.06666870117186, | |
| "completions/min_length": 146.2, | |
| "completions/min_terminated_length": 146.2, | |
| "entropy": 1.0431257128715514, | |
| "epoch": 1.7484662576687118, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 4.059199333190918, | |
| "learning_rate": 6.441717791411043e-08, | |
| "loss": -0.10386581420898437, | |
| "num_tokens": 976992.0, | |
| "reward": -0.06924999207258224, | |
| "reward_std": 0.2484972782433033, | |
| "rewards/format_reward/mean": 0.38750000596046447, | |
| "rewards/format_reward/std": 0.1250488668680191, | |
| "rewards/security_audit_reward/mean": -0.26500000059604645, | |
| "rewards/security_audit_reward/std": 0.3080150008201599, | |
| "step": 285, | |
| "step_time": 38.17630281240017 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.05, | |
| "completions/max_length": 435.0, | |
| "completions/max_terminated_length": 413.6, | |
| "completions/mean_length": 281.65, | |
| "completions/mean_terminated_length": 274.0, | |
| "completions/min_length": 152.0, | |
| "completions/min_terminated_length": 152.0, | |
| "entropy": 1.1775987446308136, | |
| "epoch": 1.7791411042944785, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 4.12350606918335, | |
| "learning_rate": 5.674846625766871e-08, | |
| "loss": -0.03856886327266693, | |
| "num_tokens": 992819.0, | |
| "reward": -0.05999999046325684, | |
| "reward_std": 0.1456713281571865, | |
| "rewards/format_reward/mean": 0.360000005364418, | |
| "rewards/format_reward/std": 0.12044776938855647, | |
| "rewards/security_audit_reward/mean": -0.23999999985098838, | |
| "rewards/security_audit_reward/std": 0.17773502618074416, | |
| "step": 290, | |
| "step_time": 33.279480656001034 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15, | |
| "completions/max_length": 470.0, | |
| "completions/max_terminated_length": 383.0, | |
| "completions/mean_length": 306.15, | |
| "completions/mean_terminated_length": 264.66666717529296, | |
| "completions/min_length": 170.8, | |
| "completions/min_terminated_length": 170.8, | |
| "entropy": 1.3437508165836334, | |
| "epoch": 1.8098159509202454, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 5.014013290405273, | |
| "learning_rate": 4.907975460122699e-08, | |
| "loss": 0.1800641179084778, | |
| "num_tokens": 1008676.0, | |
| "reward": -0.14524998962879182, | |
| "reward_std": 0.19439554661512376, | |
| "rewards/format_reward/mean": 0.33249999284744264, | |
| "rewards/format_reward/std": 0.21220951080322265, | |
| "rewards/security_audit_reward/mean": -0.35, | |
| "rewards/security_audit_reward/std": 0.2154700517654419, | |
| "step": 295, | |
| "step_time": 35.92737517459973 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 471.6, | |
| "completions/max_terminated_length": 299.6, | |
| "completions/mean_length": 360.45, | |
| "completions/mean_terminated_length": 234.9166687011719, | |
| "completions/min_length": 229.0, | |
| "completions/min_terminated_length": 126.6, | |
| "entropy": 1.1840724140405654, | |
| "epoch": 1.8404907975460123, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 2.570652484893799, | |
| "learning_rate": 4.1411042944785274e-08, | |
| "loss": 0.019638296961784363, | |
| "num_tokens": 1025285.0, | |
| "reward": -0.06699999049305916, | |
| "reward_std": 0.2400740846991539, | |
| "rewards/format_reward/mean": 0.3600000023841858, | |
| "rewards/format_reward/std": 0.1531308189034462, | |
| "rewards/security_audit_reward/mean": -0.25, | |
| "rewards/security_audit_reward/std": 0.2868344783782959, | |
| "step": 300, | |
| "step_time": 35.41554451999982 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 491.0, | |
| "completions/max_terminated_length": 357.4, | |
| "completions/mean_length": 330.65, | |
| "completions/mean_terminated_length": 249.31666870117186, | |
| "completions/min_length": 125.0, | |
| "completions/min_terminated_length": 125.0, | |
| "entropy": 1.2406673014163971, | |
| "epoch": 1.871165644171779, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 4.481863975524902, | |
| "learning_rate": 3.3742331288343556e-08, | |
| "loss": 0.2060640573501587, | |
| "num_tokens": 1041362.0, | |
| "reward": -0.005999994277954101, | |
| "reward_std": 0.23084985613822936, | |
| "rewards/format_reward/mean": 0.4000000059604645, | |
| "rewards/format_reward/std": 0.12440616972744464, | |
| "rewards/security_audit_reward/mean": -0.1800000011920929, | |
| "rewards/security_audit_reward/std": 0.2963721513748169, | |
| "step": 305, | |
| "step_time": 37.082924159199685 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.25, | |
| "completions/max_length": 440.8, | |
| "completions/max_terminated_length": 313.2, | |
| "completions/mean_length": 296.75, | |
| "completions/mean_terminated_length": 235.5666748046875, | |
| "completions/min_length": 162.4, | |
| "completions/min_terminated_length": 162.4, | |
| "entropy": 1.4028007209300994, | |
| "epoch": 1.9018404907975461, | |
| "frac_reward_zero_std": 0.1, | |
| "grad_norm": 3.5625224113464355, | |
| "learning_rate": 2.607361963190184e-08, | |
| "loss": -0.09365988969802856, | |
| "num_tokens": 1056493.0, | |
| "reward": -0.07374998778104783, | |
| "reward_std": 0.17730526700615884, | |
| "rewards/format_reward/mean": 0.3725000023841858, | |
| "rewards/format_reward/std": 0.1375160299241543, | |
| "rewards/security_audit_reward/mean": -0.26500000059604645, | |
| "rewards/security_audit_reward/std": 0.21745660305023193, | |
| "step": 310, | |
| "step_time": 32.90120237959964 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.35, | |
| "completions/max_length": 508.8, | |
| "completions/max_terminated_length": 354.0, | |
| "completions/mean_length": 356.8, | |
| "completions/mean_terminated_length": 257.8333343505859, | |
| "completions/min_length": 172.4, | |
| "completions/min_terminated_length": 172.4, | |
| "entropy": 1.2189550220966339, | |
| "epoch": 1.9325153374233128, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 4.627664089202881, | |
| "learning_rate": 1.8404907975460124e-08, | |
| "loss": -0.043705222010612485, | |
| "num_tokens": 1073209.0, | |
| "reward": -0.10774998962879181, | |
| "reward_std": 0.1972955085337162, | |
| "rewards/format_reward/mean": 0.35250000059604647, | |
| "rewards/format_reward/std": 0.13575982302427292, | |
| "rewards/security_audit_reward/mean": -0.3050000011920929, | |
| "rewards/security_audit_reward/std": 0.25009607076644896, | |
| "step": 315, | |
| "step_time": 38.71515165839992 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.3, | |
| "completions/max_length": 512.0, | |
| "completions/max_terminated_length": 394.2, | |
| "completions/mean_length": 341.55, | |
| "completions/mean_terminated_length": 271.6666717529297, | |
| "completions/min_length": 180.4, | |
| "completions/min_terminated_length": 180.4, | |
| "entropy": 1.1277358770370483, | |
| "epoch": 1.9631901840490797, | |
| "frac_reward_zero_std": 0.3, | |
| "grad_norm": 3.979893207550049, | |
| "learning_rate": 1.0736196319018405e-08, | |
| "loss": -0.07816079258918762, | |
| "num_tokens": 1089918.0, | |
| "reward": -0.08449999019503593, | |
| "reward_std": 0.14596682507544756, | |
| "rewards/format_reward/mean": 0.3949999988079071, | |
| "rewards/format_reward/std": 0.13996364884078502, | |
| "rewards/security_audit_reward/mean": -0.29000000059604647, | |
| "rewards/security_audit_reward/std": 0.17118052244186402, | |
| "step": 320, | |
| "step_time": 39.41049809280048 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.15, | |
| "completions/max_length": 488.4, | |
| "completions/max_terminated_length": 420.6, | |
| "completions/mean_length": 319.75, | |
| "completions/mean_terminated_length": 284.56667175292966, | |
| "completions/min_length": 199.2, | |
| "completions/min_terminated_length": 199.2, | |
| "entropy": 1.3403348803520203, | |
| "epoch": 1.9938650306748467, | |
| "frac_reward_zero_std": 0.2, | |
| "grad_norm": 3.1303930282592773, | |
| "learning_rate": 3.067484662576687e-09, | |
| "loss": -0.08682631254196167, | |
| "num_tokens": 1105841.0, | |
| "reward": -0.05374999940395355, | |
| "reward_std": 0.21490582572296263, | |
| "rewards/format_reward/mean": 0.2875, | |
| "rewards/format_reward/std": 0.19336618185043336, | |
| "rewards/security_audit_reward/mean": -0.2, | |
| "rewards/security_audit_reward/std": 0.22739237546920776, | |
| "step": 325, | |
| "step_time": 37.026589032400445 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 326, | |
| "num_input_tokens_seen": 1108991, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |