Instructions to use Gege24/bir-real-gin with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Gege24/bir-real-gin with PEFT:
Base model is not found.
- Transformers
How to use Gege24/bir-real-gin with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Gege24/bir-real-gin") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Gege24/bir-real-gin", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use Gege24/bir-real-gin with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Gege24/bir-real-gin" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Gege24/bir-real-gin", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/Gege24/bir-real-gin
- SGLang
How to use Gege24/bir-real-gin with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Gege24/bir-real-gin" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Gege24/bir-real-gin", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Gege24/bir-real-gin" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Gege24/bir-real-gin", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use Gege24/bir-real-gin with Docker Model Runner:
docker model run hf.co/Gege24/bir-real-gin
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.00075, | |
| "eval_steps": 500, | |
| "global_step": 75, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1843.0, | |
| "completions/max_terminated_length": 1843.0, | |
| "completions/mean_length": 1586.6875, | |
| "completions/mean_terminated_length": 1586.6875, | |
| "completions/min_length": 274.0, | |
| "completions/min_terminated_length": 274.0, | |
| "entropy": 2.3952305614948273, | |
| "epoch": 1e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.051290396600961685, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": -0.0056, | |
| "num_tokens": 71951.0, | |
| "reward": -8.338340759277344, | |
| "reward_std": 11.816058158874512, | |
| "rewards/rollout_reward_func/mean": -8.338340759277344, | |
| "rewards/rollout_reward_func/std": 12.670792579650879, | |
| "sampling/importance_sampling_ratio/max": 0.24942533671855927, | |
| "sampling/importance_sampling_ratio/mean": 0.020489878952503204, | |
| "sampling/importance_sampling_ratio/min": 3.589864120350601e-15, | |
| "sampling/sampling_logp_difference/max": 13.334996223449707, | |
| "sampling/sampling_logp_difference/mean": 0.37166523933410645, | |
| "step": 1, | |
| "step_time": 40.07221162000002 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 2.3952305614948273, | |
| "epoch": 2e-05, | |
| "grad_norm": 0.051755864173173904, | |
| "kl": 0.0, | |
| "learning_rate": 2.8571428571428575e-07, | |
| "loss": -0.0056, | |
| "step": 2, | |
| "step_time": 6.337008879999928 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.005321558099240065, | |
| "clip_ratio/high_mean": 0.003962862421758473, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003962862421758473, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1870.0, | |
| "completions/max_terminated_length": 1870.0, | |
| "completions/mean_length": 1689.21875, | |
| "completions/mean_terminated_length": 1689.21875, | |
| "completions/min_length": 1392.0, | |
| "completions/min_terminated_length": 1392.0, | |
| "entropy": 2.241749197244644, | |
| "epoch": 3e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.032931093126535416, | |
| "kl": 0.0010736112235463224, | |
| "learning_rate": 5.714285714285715e-07, | |
| "loss": 0.0001, | |
| "num_tokens": 147616.0, | |
| "reward": -8.11627197265625, | |
| "reward_std": 9.453106880187988, | |
| "rewards/rollout_reward_func/mean": -8.11627197265625, | |
| "rewards/rollout_reward_func/std": 10.086986541748047, | |
| "sampling/importance_sampling_ratio/max": 0.038908205926418304, | |
| "sampling/importance_sampling_ratio/mean": 0.013574006035923958, | |
| "sampling/importance_sampling_ratio/min": 9.307732536101287e-13, | |
| "sampling/sampling_logp_difference/max": 8.446062088012695, | |
| "sampling/sampling_logp_difference/mean": 0.23850713670253754, | |
| "step": 3, | |
| "step_time": 42.26439957300016 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.010532152839004993, | |
| "clip_ratio/high_mean": 0.005266076419502497, | |
| "clip_ratio/low_mean": 0.0013297871919348836, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.00659586361143738, | |
| "entropy": 2.2402877509593964, | |
| "epoch": 4e-05, | |
| "grad_norm": 0.030311495065689087, | |
| "kl": 0.0013166169228497893, | |
| "learning_rate": 8.571428571428572e-07, | |
| "loss": 0.0001, | |
| "step": 4, | |
| "step_time": 6.949824775000025 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0014204545877873898, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0014204545877873898, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1775.0, | |
| "completions/max_terminated_length": 1775.0, | |
| "completions/mean_length": 1671.5, | |
| "completions/mean_terminated_length": 1671.5, | |
| "completions/min_length": 758.0, | |
| "completions/min_terminated_length": 758.0, | |
| "entropy": 2.280731201171875, | |
| "epoch": 5e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.035965703427791595, | |
| "kl": 0.0009596906966180541, | |
| "learning_rate": 1.142857142857143e-06, | |
| "loss": -0.0034, | |
| "num_tokens": 222393.0, | |
| "reward": -2.6346850395202637, | |
| "reward_std": 16.439598083496094, | |
| "rewards/rollout_reward_func/mean": -2.6346850395202637, | |
| "rewards/rollout_reward_func/std": 17.489192962646484, | |
| "sampling/importance_sampling_ratio/max": 0.07573997974395752, | |
| "sampling/importance_sampling_ratio/mean": 0.014259650371968746, | |
| "sampling/importance_sampling_ratio/min": 0.00022367587371263653, | |
| "sampling/sampling_logp_difference/max": 1.2608689069747925, | |
| "sampling/sampling_logp_difference/mean": 0.21580657362937927, | |
| "step": 5, | |
| "step_time": 42.164671801000054 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0012499999720603228, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0012499999720603228, | |
| "entropy": 2.279596298933029, | |
| "epoch": 6e-05, | |
| "grad_norm": 0.038707196712493896, | |
| "kl": 0.0009941701391653623, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": -0.0034, | |
| "step": 6, | |
| "step_time": 6.197612690000028 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.008154743583872914, | |
| "clip_ratio/high_mean": 0.004077371791936457, | |
| "clip_ratio/low_mean": 0.0013888889225199819, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.005466260714456439, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1863.0, | |
| "completions/max_terminated_length": 1863.0, | |
| "completions/mean_length": 1571.40625, | |
| "completions/mean_terminated_length": 1571.40625, | |
| "completions/min_length": 311.0, | |
| "completions/min_terminated_length": 311.0, | |
| "entropy": 2.281369060277939, | |
| "epoch": 7e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.043208107352256775, | |
| "kl": 0.0007937230257084593, | |
| "learning_rate": 1.7142857142857145e-06, | |
| "loss": 0.0023, | |
| "num_tokens": 294768.0, | |
| "reward": -12.273736953735352, | |
| "reward_std": 11.2011137008667, | |
| "rewards/rollout_reward_func/mean": -12.273736953735352, | |
| "rewards/rollout_reward_func/std": 10.951950073242188, | |
| "sampling/importance_sampling_ratio/max": 0.2913915514945984, | |
| "sampling/importance_sampling_ratio/mean": 0.029564352706074715, | |
| "sampling/importance_sampling_ratio/min": 1.11443969016186e-13, | |
| "sampling/sampling_logp_difference/max": 11.392870903015137, | |
| "sampling/sampling_logp_difference/mean": 0.24621161818504333, | |
| "step": 7, | |
| "step_time": 41.23813219300007 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.006393861956894398, | |
| "clip_ratio/high_mean": 0.003196930978447199, | |
| "clip_ratio/low_mean": 0.0026959646493196487, | |
| "clip_ratio/low_min": 0.0025510203558951616, | |
| "clip_ratio/region_mean": 0.005892895627766848, | |
| "entropy": 2.281323105096817, | |
| "epoch": 8e-05, | |
| "grad_norm": 0.037575479596853256, | |
| "kl": 0.0011317383105051704, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.0023, | |
| "step": 8, | |
| "step_time": 6.953839896999966 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.009784075664356351, | |
| "clip_ratio/high_mean": 0.006221824907697737, | |
| "clip_ratio/low_mean": 0.0013297871919348836, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.007551612099632621, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1839.0, | |
| "completions/max_terminated_length": 1839.0, | |
| "completions/mean_length": 1631.96875, | |
| "completions/mean_terminated_length": 1631.96875, | |
| "completions/min_length": 758.0, | |
| "completions/min_terminated_length": 758.0, | |
| "entropy": 2.2518777698278427, | |
| "epoch": 9e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.024869710206985474, | |
| "kl": 0.0008677325677126646, | |
| "learning_rate": 2.285714285714286e-06, | |
| "loss": 0.002, | |
| "num_tokens": 368485.0, | |
| "reward": -5.027488708496094, | |
| "reward_std": 9.159595489501953, | |
| "rewards/rollout_reward_func/mean": -5.027488708496094, | |
| "rewards/rollout_reward_func/std": 10.158669471740723, | |
| "sampling/importance_sampling_ratio/max": 0.0989551916718483, | |
| "sampling/importance_sampling_ratio/mean": 0.01718847081065178, | |
| "sampling/importance_sampling_ratio/min": 3.3642640756559317e-11, | |
| "sampling/sampling_logp_difference/max": 9.060235977172852, | |
| "sampling/sampling_logp_difference/mean": 0.25719159841537476, | |
| "step": 9, | |
| "step_time": 45.75393526599987 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.004852556856349111, | |
| "clip_ratio/high_mean": 0.0024262784281745553, | |
| "clip_ratio/low_mean": 0.0013297871919348836, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003756065620109439, | |
| "entropy": 2.247532531619072, | |
| "epoch": 0.0001, | |
| "grad_norm": 0.025674326345324516, | |
| "kl": 0.0010098924503836315, | |
| "learning_rate": 2.571428571428571e-06, | |
| "loss": 0.002, | |
| "step": 10, | |
| "step_time": 6.291163318000031 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0013020833721384406, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0013020833721384406, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 1848.0, | |
| "completions/max_terminated_length": 1848.0, | |
| "completions/mean_length": 1693.3125, | |
| "completions/mean_terminated_length": 1702.54833984375, | |
| "completions/min_length": 1389.0, | |
| "completions/min_terminated_length": 1389.0, | |
| "entropy": 2.2023140490055084, | |
| "epoch": 0.00011, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.025482358410954475, | |
| "kl": 0.0009734490522532724, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": -0.0018, | |
| "num_tokens": 444310.0, | |
| "reward": -6.636538028717041, | |
| "reward_std": 13.808101654052734, | |
| "rewards/rollout_reward_func/mean": -6.636538028717041, | |
| "rewards/rollout_reward_func/std": 13.634783744812012, | |
| "sampling/importance_sampling_ratio/max": 0.04252217337489128, | |
| "sampling/importance_sampling_ratio/mean": 0.015543580055236816, | |
| "sampling/importance_sampling_ratio/min": 8.082223128483037e-25, | |
| "sampling/sampling_logp_difference/max": 17.322786331176758, | |
| "sampling/sampling_logp_difference/mean": 0.3324906826019287, | |
| "step": 11, | |
| "step_time": 49.50245009800017 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.005110554862767458, | |
| "clip_ratio/high_mean": 0.002555277431383729, | |
| "clip_ratio/low_mean": 0.0014204545877873898, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003975732019171119, | |
| "entropy": 2.2029761970043182, | |
| "epoch": 0.00012, | |
| "grad_norm": 0.030912674963474274, | |
| "kl": 0.0009688141508377157, | |
| "learning_rate": 3.142857142857143e-06, | |
| "loss": -0.0018, | |
| "step": 12, | |
| "step_time": 7.69119761200011 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.013187056640163064, | |
| "clip_ratio/high_mean": 0.006593528320081532, | |
| "clip_ratio/low_mean": 0.0013586956774815917, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.007952223997563124, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1780.0, | |
| "completions/max_terminated_length": 1780.0, | |
| "completions/mean_length": 1663.0, | |
| "completions/mean_terminated_length": 1663.0, | |
| "completions/min_length": 1240.0, | |
| "completions/min_terminated_length": 1240.0, | |
| "entropy": 2.258611023426056, | |
| "epoch": 0.00013, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.026398880407214165, | |
| "kl": 0.0010122761159436777, | |
| "learning_rate": 3.428571428571429e-06, | |
| "loss": 0.0013, | |
| "num_tokens": 519593.0, | |
| "reward": -8.102012634277344, | |
| "reward_std": 10.601648330688477, | |
| "rewards/rollout_reward_func/mean": -8.102012634277344, | |
| "rewards/rollout_reward_func/std": 11.444524765014648, | |
| "sampling/importance_sampling_ratio/max": 0.0403943695127964, | |
| "sampling/importance_sampling_ratio/mean": 0.01241688709706068, | |
| "sampling/importance_sampling_ratio/min": 6.642031217564404e-14, | |
| "sampling/sampling_logp_difference/max": 11.826959609985352, | |
| "sampling/sampling_logp_difference/mean": 0.2676909267902374, | |
| "step": 13, | |
| "step_time": 51.398302784000066 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.005434782709926367, | |
| "clip_ratio/high_mean": 0.0027173913549631834, | |
| "clip_ratio/low_mean": 0.002418017713353038, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.005135408951900899, | |
| "entropy": 2.251807004213333, | |
| "epoch": 0.00014, | |
| "grad_norm": 0.027899302542209625, | |
| "kl": 0.001143605462857522, | |
| "learning_rate": 3.7142857142857146e-06, | |
| "loss": 0.0013, | |
| "step": 14, | |
| "step_time": 6.214198535999913 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.010549242608249187, | |
| "clip_ratio/high_mean": 0.0066044083796441555, | |
| "clip_ratio/low_mean": 0.002659574383869767, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.009263982763513923, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1846.0, | |
| "completions/max_terminated_length": 1846.0, | |
| "completions/mean_length": 1673.4375, | |
| "completions/mean_terminated_length": 1673.4375, | |
| "completions/min_length": 1376.0, | |
| "completions/min_terminated_length": 1376.0, | |
| "entropy": 2.269966244697571, | |
| "epoch": 0.00015, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.028693798929452896, | |
| "kl": 0.0012732810355373658, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": -0.0002, | |
| "num_tokens": 594885.0, | |
| "reward": -9.916947364807129, | |
| "reward_std": 10.726478576660156, | |
| "rewards/rollout_reward_func/mean": -9.916947364807129, | |
| "rewards/rollout_reward_func/std": 11.980547904968262, | |
| "sampling/importance_sampling_ratio/max": 0.02854122966527939, | |
| "sampling/importance_sampling_ratio/mean": 0.01421053521335125, | |
| "sampling/importance_sampling_ratio/min": 2.1058147088061363e-13, | |
| "sampling/sampling_logp_difference/max": 9.987288475036621, | |
| "sampling/sampling_logp_difference/mean": 0.2753928303718567, | |
| "step": 15, | |
| "step_time": 52.00545187799992 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.007995169144123793, | |
| "clip_ratio/high_mean": 0.003997584572061896, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003997584572061896, | |
| "entropy": 2.2705667912960052, | |
| "epoch": 0.00016, | |
| "grad_norm": 0.021315351128578186, | |
| "kl": 0.0012536912836367264, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": -0.0002, | |
| "step": 16, | |
| "step_time": 7.3722107160000405 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0069444444961845875, | |
| "clip_ratio/high_mean": 0.0034722222480922937, | |
| "clip_ratio/low_mean": 0.00638433254789561, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.009856554795987904, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2086.0, | |
| "completions/max_terminated_length": 2086.0, | |
| "completions/mean_length": 1863.21875, | |
| "completions/mean_terminated_length": 1863.21875, | |
| "completions/min_length": 740.0, | |
| "completions/min_terminated_length": 740.0, | |
| "entropy": 2.299890086054802, | |
| "epoch": 0.00017, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.01769721694290638, | |
| "kl": 0.0012724917105515487, | |
| "learning_rate": 4.571428571428572e-06, | |
| "loss": 0.0017, | |
| "num_tokens": 676591.0, | |
| "reward": -8.684309959411621, | |
| "reward_std": 9.59238338470459, | |
| "rewards/rollout_reward_func/mean": -8.684309959411621, | |
| "rewards/rollout_reward_func/std": 10.695920944213867, | |
| "sampling/importance_sampling_ratio/max": 0.041902750730514526, | |
| "sampling/importance_sampling_ratio/mean": 0.009395781904459, | |
| "sampling/importance_sampling_ratio/min": 3.6452324480957535e-20, | |
| "sampling/sampling_logp_difference/max": 12.486509323120117, | |
| "sampling/sampling_logp_difference/mean": 0.3327711820602417, | |
| "step": 17, | |
| "step_time": 58.969859735 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0069444444961845875, | |
| "clip_ratio/high_mean": 0.0034722222480922937, | |
| "clip_ratio/low_mean": 0.004615322104655206, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0080875443527475, | |
| "entropy": 2.297407776117325, | |
| "epoch": 0.00018, | |
| "grad_norm": 0.01824762113392353, | |
| "kl": 0.001823120612243656, | |
| "learning_rate": 4.857142857142858e-06, | |
| "loss": 0.0017, | |
| "step": 18, | |
| "step_time": 6.952743392999764 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.009362624026834965, | |
| "clip_ratio/high_mean": 0.005838719545863569, | |
| "clip_ratio/low_mean": 0.00231799460016191, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.008156714029610157, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2051.0, | |
| "completions/max_terminated_length": 2051.0, | |
| "completions/mean_length": 1801.53125, | |
| "completions/mean_terminated_length": 1801.53125, | |
| "completions/min_length": 527.0, | |
| "completions/min_terminated_length": 527.0, | |
| "entropy": 2.322386711835861, | |
| "epoch": 0.00019, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.02586973085999489, | |
| "kl": 0.0017871049421955831, | |
| "learning_rate": 5.142857142857142e-06, | |
| "loss": -0.0024, | |
| "num_tokens": 756167.0, | |
| "reward": -7.00605583190918, | |
| "reward_std": 16.651020050048828, | |
| "rewards/rollout_reward_func/mean": -7.00605583190918, | |
| "rewards/rollout_reward_func/std": 18.44582176208496, | |
| "sampling/importance_sampling_ratio/max": 0.07095864415168762, | |
| "sampling/importance_sampling_ratio/mean": 0.011077712289988995, | |
| "sampling/importance_sampling_ratio/min": 2.652188067117779e-20, | |
| "sampling/sampling_logp_difference/max": 15.625991821289062, | |
| "sampling/sampling_logp_difference/mean": 0.3300427198410034, | |
| "step": 19, | |
| "step_time": 58.73655633899966 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0068662879057228565, | |
| "clip_ratio/high_mean": 0.0034331439528614283, | |
| "clip_ratio/low_mean": 0.0032655425602570176, | |
| "clip_ratio/low_min": 0.0019841270986944437, | |
| "clip_ratio/region_mean": 0.006698686513118446, | |
| "entropy": 2.3259487748146057, | |
| "epoch": 0.0002, | |
| "grad_norm": 0.030740659683942795, | |
| "kl": 0.0018880682764574885, | |
| "learning_rate": 5.428571428571429e-06, | |
| "loss": -0.0023, | |
| "step": 20, | |
| "step_time": 7.368399762000195 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0018939394503831863, | |
| "clip_ratio/high_mean": 0.0009469697251915932, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0009469697251915932, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2070.0, | |
| "completions/max_terminated_length": 2070.0, | |
| "completions/mean_length": 1879.1875, | |
| "completions/mean_terminated_length": 1879.1875, | |
| "completions/min_length": 700.0, | |
| "completions/min_terminated_length": 700.0, | |
| "entropy": 2.2097203731536865, | |
| "epoch": 0.00021, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.02687995322048664, | |
| "kl": 0.001916136905492749, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": -0.0002, | |
| "num_tokens": 838107.0, | |
| "reward": -2.38564395904541, | |
| "reward_std": 11.885160446166992, | |
| "rewards/rollout_reward_func/mean": -2.38564395904541, | |
| "rewards/rollout_reward_func/std": 15.615160942077637, | |
| "sampling/importance_sampling_ratio/max": 0.08406510949134827, | |
| "sampling/importance_sampling_ratio/mean": 0.012403802014887333, | |
| "sampling/importance_sampling_ratio/min": 7.145396301283091e-16, | |
| "sampling/sampling_logp_difference/max": 12.1469087600708, | |
| "sampling/sampling_logp_difference/mean": 0.2651059329509735, | |
| "step": 21, | |
| "step_time": 59.30298631099993 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0018939394503831863, | |
| "clip_ratio/high_mean": 0.0009469697251915932, | |
| "clip_ratio/low_mean": 0.0012019231216982007, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.002148892846889794, | |
| "entropy": 2.212069094181061, | |
| "epoch": 0.00022, | |
| "grad_norm": 0.02957003004848957, | |
| "kl": 0.0021696449111914262, | |
| "learning_rate": 6e-06, | |
| "loss": -0.0002, | |
| "step": 22, | |
| "step_time": 6.887106450000374 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.002358490601181984, | |
| "clip_ratio/high_mean": 0.001179245300590992, | |
| "clip_ratio/low_mean": 0.0033517052652314305, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0045309505658224225, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2068.0, | |
| "completions/max_terminated_length": 2068.0, | |
| "completions/mean_length": 1889.875, | |
| "completions/mean_terminated_length": 1889.875, | |
| "completions/min_length": 1525.0, | |
| "completions/min_terminated_length": 1525.0, | |
| "entropy": 2.1723521649837494, | |
| "epoch": 0.00023, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.022851742804050446, | |
| "kl": 0.003631043611676432, | |
| "learning_rate": 6.285714285714286e-06, | |
| "loss": -0.0045, | |
| "num_tokens": 919971.0, | |
| "reward": -6.60746955871582, | |
| "reward_std": 11.911310195922852, | |
| "rewards/rollout_reward_func/mean": -6.60746955871582, | |
| "rewards/rollout_reward_func/std": 12.598552703857422, | |
| "sampling/importance_sampling_ratio/max": 0.022619599476456642, | |
| "sampling/importance_sampling_ratio/mean": 0.00866013765335083, | |
| "sampling/importance_sampling_ratio/min": 1.8946926625573762e-16, | |
| "sampling/sampling_logp_difference/max": 12.770915031433105, | |
| "sampling/sampling_logp_difference/mean": 0.2685585618019104, | |
| "step": 23, | |
| "step_time": 66.98576966499968 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.004673305433243513, | |
| "clip_ratio/high_mean": 0.0023366527166217566, | |
| "clip_ratio/low_mean": 0.0012254902394488454, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003562142956070602, | |
| "entropy": 2.1722511053085327, | |
| "epoch": 0.00024, | |
| "grad_norm": 0.026614658534526825, | |
| "kl": 0.004136091796681285, | |
| "learning_rate": 6.571428571428572e-06, | |
| "loss": -0.0045, | |
| "step": 24, | |
| "step_time": 6.895327041999735 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.005816170945763588, | |
| "clip_ratio/high_mean": 0.002908085472881794, | |
| "clip_ratio/low_mean": 0.002100988756865263, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.005009074229747057, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2360.0, | |
| "completions/max_terminated_length": 2360.0, | |
| "completions/mean_length": 2112.0625, | |
| "completions/mean_terminated_length": 2112.0625, | |
| "completions/min_length": 386.0, | |
| "completions/min_terminated_length": 386.0, | |
| "entropy": 2.1737034767866135, | |
| "epoch": 0.00025, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.013818159699440002, | |
| "kl": 0.0030502460795105435, | |
| "learning_rate": 6.857142857142858e-06, | |
| "loss": 0.0031, | |
| "num_tokens": 1008470.0, | |
| "reward": -4.425507068634033, | |
| "reward_std": 14.518598556518555, | |
| "rewards/rollout_reward_func/mean": -4.425507068634033, | |
| "rewards/rollout_reward_func/std": 15.62421989440918, | |
| "sampling/importance_sampling_ratio/max": 0.2350578010082245, | |
| "sampling/importance_sampling_ratio/mean": 0.013511145487427711, | |
| "sampling/importance_sampling_ratio/min": 2.6986267040271933e-18, | |
| "sampling/sampling_logp_difference/max": 18.317167282104492, | |
| "sampling/sampling_logp_difference/mean": 0.2883983850479126, | |
| "step": 25, | |
| "step_time": 75.20574624999995 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0036231884732842445, | |
| "clip_ratio/high_mean": 0.0018115942366421223, | |
| "clip_ratio/low_mean": 0.003012447035871446, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.004824041272513568, | |
| "entropy": 2.1722775399684906, | |
| "epoch": 0.00026, | |
| "grad_norm": 0.013290848582983017, | |
| "kl": 0.0030411222542170435, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.0031, | |
| "step": 26, | |
| "step_time": 7.616344220999963 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.006330503383651376, | |
| "clip_ratio/high_mean": 0.003165251691825688, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003165251691825688, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2344.0, | |
| "completions/max_terminated_length": 2344.0, | |
| "completions/mean_length": 2142.9375, | |
| "completions/mean_terminated_length": 2142.9375, | |
| "completions/min_length": 634.0, | |
| "completions/min_terminated_length": 634.0, | |
| "entropy": 2.320455104112625, | |
| "epoch": 0.00027, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.01896420307457447, | |
| "kl": 0.002832541649695486, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": 0.0027, | |
| "num_tokens": 1098577.0, | |
| "reward": -13.397226333618164, | |
| "reward_std": 12.59660816192627, | |
| "rewards/rollout_reward_func/mean": -13.397226333618164, | |
| "rewards/rollout_reward_func/std": 14.914674758911133, | |
| "sampling/importance_sampling_ratio/max": 0.1061810627579689, | |
| "sampling/importance_sampling_ratio/mean": 0.008775782771408558, | |
| "sampling/importance_sampling_ratio/min": 7.311078333300841e-31, | |
| "sampling/sampling_logp_difference/max": 18.154491424560547, | |
| "sampling/sampling_logp_difference/mean": 0.3568466603755951, | |
| "step": 27, | |
| "step_time": 76.95750552300024 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00859679956920445, | |
| "clip_ratio/high_mean": 0.004298399784602225, | |
| "clip_ratio/low_mean": 0.0019767729099839926, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.006275172694586217, | |
| "entropy": 2.319840043783188, | |
| "epoch": 0.00028, | |
| "grad_norm": 0.01972121186554432, | |
| "kl": 0.003210447379387915, | |
| "learning_rate": 7.714285714285716e-06, | |
| "loss": 0.0027, | |
| "step": 28, | |
| "step_time": 7.581604469000013 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.004324290552176535, | |
| "clip_ratio/high_mean": 0.0021621452760882676, | |
| "clip_ratio/low_mean": 0.0010593220358714461, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0032214673119597137, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 2252.0, | |
| "completions/max_terminated_length": 2252.0, | |
| "completions/mean_length": 2059.75, | |
| "completions/mean_terminated_length": 2059.258056640625, | |
| "completions/min_length": 654.0, | |
| "completions/min_terminated_length": 654.0, | |
| "entropy": 2.2485389709472656, | |
| "epoch": 0.00029, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.017093749716877937, | |
| "kl": 0.0025801300653256476, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": -0.0023, | |
| "num_tokens": 1185175.0, | |
| "reward": -10.598124504089355, | |
| "reward_std": 11.341144561767578, | |
| "rewards/rollout_reward_func/mean": -10.598124504089355, | |
| "rewards/rollout_reward_func/std": 12.110883712768555, | |
| "sampling/importance_sampling_ratio/max": 0.04109551012516022, | |
| "sampling/importance_sampling_ratio/mean": 0.006022544577717781, | |
| "sampling/importance_sampling_ratio/min": 2.8597005269629276e-29, | |
| "sampling/sampling_logp_difference/max": 13.06977367401123, | |
| "sampling/sampling_logp_difference/mean": 0.28142935037612915, | |
| "step": 29, | |
| "step_time": 77.39934379400052 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.010767225176095963, | |
| "clip_ratio/high_mean": 0.005383612588047981, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.005383612588047981, | |
| "entropy": 2.2509743869304657, | |
| "epoch": 0.0003, | |
| "grad_norm": 0.01782657578587532, | |
| "kl": 0.002700227312743664, | |
| "learning_rate": 8.285714285714287e-06, | |
| "loss": -0.0023, | |
| "step": 30, | |
| "step_time": 7.306394946999944 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.008653939235955477, | |
| "clip_ratio/high_mean": 0.004326969617977738, | |
| "clip_ratio/low_mean": 0.0040421567391604185, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.008369126415345818, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2400.0, | |
| "completions/max_terminated_length": 2400.0, | |
| "completions/mean_length": 2102.53125, | |
| "completions/mean_terminated_length": 2102.53125, | |
| "completions/min_length": 718.0, | |
| "completions/min_terminated_length": 718.0, | |
| "entropy": 2.3924789130687714, | |
| "epoch": 0.00031, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.02806187979876995, | |
| "kl": 0.0026975919608958066, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.0024, | |
| "num_tokens": 1273982.0, | |
| "reward": -8.974254608154297, | |
| "reward_std": 14.775591850280762, | |
| "rewards/rollout_reward_func/mean": -8.974254608154297, | |
| "rewards/rollout_reward_func/std": 14.762958526611328, | |
| "sampling/importance_sampling_ratio/max": 0.08338890224695206, | |
| "sampling/importance_sampling_ratio/mean": 0.00720847537741065, | |
| "sampling/importance_sampling_ratio/min": 3.938114484780906e-20, | |
| "sampling/sampling_logp_difference/max": 12.979628562927246, | |
| "sampling/sampling_logp_difference/mean": 0.3850451111793518, | |
| "step": 31, | |
| "step_time": 78.26030889900039 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.013500758213922381, | |
| "clip_ratio/high_mean": 0.006750379106961191, | |
| "clip_ratio/low_mean": 0.0008680555620230734, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.007618434610776603, | |
| "entropy": 2.3886922001838684, | |
| "epoch": 0.00032, | |
| "grad_norm": 0.02739325352013111, | |
| "kl": 0.0021556210485869087, | |
| "learning_rate": 8.857142857142858e-06, | |
| "loss": 0.0024, | |
| "step": 32, | |
| "step_time": 7.6260315599995465 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.005404347903095186, | |
| "clip_ratio/high_mean": 0.002702173951547593, | |
| "clip_ratio/low_mean": 0.001923076924867928, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.004625250876415521, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2525.0, | |
| "completions/max_terminated_length": 2525.0, | |
| "completions/mean_length": 2267.0, | |
| "completions/mean_terminated_length": 2267.0, | |
| "completions/min_length": 1508.0, | |
| "completions/min_terminated_length": 1508.0, | |
| "entropy": 2.2084928154945374, | |
| "epoch": 0.00033, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.00787295587360859, | |
| "kl": 0.0015827158640604466, | |
| "learning_rate": 9.142857142857144e-06, | |
| "loss": -0.0001, | |
| "num_tokens": 1367561.0, | |
| "reward": -5.180222988128662, | |
| "reward_std": 17.71061134338379, | |
| "rewards/rollout_reward_func/mean": -5.180222988128662, | |
| "rewards/rollout_reward_func/std": 20.794387817382812, | |
| "sampling/importance_sampling_ratio/max": 0.011497444473206997, | |
| "sampling/importance_sampling_ratio/mean": 0.004206728655844927, | |
| "sampling/importance_sampling_ratio/min": 2.4751660744964696e-22, | |
| "sampling/sampling_logp_difference/max": 21.120386123657227, | |
| "sampling/sampling_logp_difference/mean": 0.29462122917175293, | |
| "step": 33, | |
| "step_time": 80.87171731000012 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.003289473708719015, | |
| "clip_ratio/high_mean": 0.0016447368543595076, | |
| "clip_ratio/low_mean": 0.0037393163074739277, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.005384053161833435, | |
| "entropy": 2.217702329158783, | |
| "epoch": 0.00034, | |
| "grad_norm": 0.01408409047871828, | |
| "kl": 0.002227816090453416, | |
| "learning_rate": 9.42857142857143e-06, | |
| "loss": -0.0001, | |
| "step": 34, | |
| "step_time": 8.029501602999972 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2603.0, | |
| "completions/max_terminated_length": 2603.0, | |
| "completions/mean_length": 2335.4375, | |
| "completions/mean_terminated_length": 2335.4375, | |
| "completions/min_length": 891.0, | |
| "completions/min_terminated_length": 891.0, | |
| "entropy": 2.327633857727051, | |
| "epoch": 0.00035, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.01498799491673708, | |
| "kl": 0.0023266323769348674, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": 0.0035, | |
| "num_tokens": 1463740.0, | |
| "reward": -9.247687339782715, | |
| "reward_std": 12.140151977539062, | |
| "rewards/rollout_reward_func/mean": -9.247687339782715, | |
| "rewards/rollout_reward_func/std": 13.351397514343262, | |
| "sampling/importance_sampling_ratio/max": 0.0675792247056961, | |
| "sampling/importance_sampling_ratio/mean": 0.005599465221166611, | |
| "sampling/importance_sampling_ratio/min": 7.004530503774031e-41, | |
| "sampling/sampling_logp_difference/max": 17.645164489746094, | |
| "sampling/sampling_logp_difference/mean": 0.38000231981277466, | |
| "step": 35, | |
| "step_time": 80.66461238600027 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.009387426427565515, | |
| "clip_ratio/high_mean": 0.0046937132137827575, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0046937132137827575, | |
| "entropy": 2.3282170593738556, | |
| "epoch": 0.00036, | |
| "grad_norm": 0.016069183126091957, | |
| "kl": 0.0027750690060202032, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0035, | |
| "step": 36, | |
| "step_time": 8.139321300999654 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0018382353009656072, | |
| "clip_ratio/high_mean": 0.0009191176504828036, | |
| "clip_ratio/low_mean": 0.0010080644860863686, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0019271821365691721, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 2526.0, | |
| "completions/max_terminated_length": 2526.0, | |
| "completions/mean_length": 2198.28125, | |
| "completions/mean_terminated_length": 2192.419189453125, | |
| "completions/min_length": 582.0, | |
| "completions/min_terminated_length": 582.0, | |
| "entropy": 2.2252254486083984, | |
| "epoch": 0.00037, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.0396745391190052, | |
| "kl": 0.0024436857129330747, | |
| "learning_rate": 9.999999999948591e-06, | |
| "loss": 0.0037, | |
| "num_tokens": 1555280.0, | |
| "reward": -10.988698959350586, | |
| "reward_std": 11.484918594360352, | |
| "rewards/rollout_reward_func/mean": -10.988698959350586, | |
| "rewards/rollout_reward_func/std": 13.74112606048584, | |
| "sampling/importance_sampling_ratio/max": 0.1169009730219841, | |
| "sampling/importance_sampling_ratio/mean": 0.009122872725129128, | |
| "sampling/importance_sampling_ratio/min": 6.878056424215178e-17, | |
| "sampling/sampling_logp_difference/max": 17.073537826538086, | |
| "sampling/sampling_logp_difference/mean": 0.2771769165992737, | |
| "step": 37, | |
| "step_time": 79.2830818729999 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.001923076924867928, | |
| "clip_ratio/high_mean": 0.000961538462433964, | |
| "clip_ratio/low_mean": 0.0015243901871144772, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.002485928649548441, | |
| "entropy": 2.230591207742691, | |
| "epoch": 0.00038, | |
| "grad_norm": 0.0169509444385767, | |
| "kl": 0.002311948119313456, | |
| "learning_rate": 9.999999999794362e-06, | |
| "loss": 0.0037, | |
| "step": 38, | |
| "step_time": 8.018443904999458 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.003639505594037473, | |
| "clip_ratio/high_mean": 0.0018197527970187366, | |
| "clip_ratio/low_mean": 0.0008333333535119891, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0026530861505307257, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2566.0, | |
| "completions/max_terminated_length": 2566.0, | |
| "completions/mean_length": 2258.4375, | |
| "completions/mean_terminated_length": 2258.4375, | |
| "completions/min_length": 1440.0, | |
| "completions/min_terminated_length": 1440.0, | |
| "entropy": 2.2081351578235626, | |
| "epoch": 0.00039, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.009336345829069614, | |
| "kl": 0.0021164097925066017, | |
| "learning_rate": 9.999999999537309e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 1649454.0, | |
| "reward": -10.517179489135742, | |
| "reward_std": 9.59807014465332, | |
| "rewards/rollout_reward_func/mean": -10.517179489135742, | |
| "rewards/rollout_reward_func/std": 9.964343070983887, | |
| "sampling/importance_sampling_ratio/max": 0.012029355391860008, | |
| "sampling/importance_sampling_ratio/mean": 0.003381735645234585, | |
| "sampling/importance_sampling_ratio/min": 1.5594409263266153e-17, | |
| "sampling/sampling_logp_difference/max": 18.87306022644043, | |
| "sampling/sampling_logp_difference/mean": 0.362891286611557, | |
| "step": 39, | |
| "step_time": 80.33267479000028 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.009275426273234189, | |
| "clip_ratio/high_mean": 0.005543510254938155, | |
| "clip_ratio/low_mean": 0.0009765625, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.006520072813145816, | |
| "entropy": 2.216031640768051, | |
| "epoch": 0.0004, | |
| "grad_norm": 0.010434896685183048, | |
| "kl": 0.001769829061231576, | |
| "learning_rate": 9.999999999177437e-06, | |
| "loss": 0.0004, | |
| "step": 40, | |
| "step_time": 8.050674242000014 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00331838964484632, | |
| "clip_ratio/high_mean": 0.00165919482242316, | |
| "clip_ratio/low_mean": 0.001736446050927043, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003395640873350203, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2887.0, | |
| "completions/max_terminated_length": 2887.0, | |
| "completions/mean_length": 2683.46875, | |
| "completions/mean_terminated_length": 2683.46875, | |
| "completions/min_length": 2241.0, | |
| "completions/min_terminated_length": 2241.0, | |
| "entropy": 2.259254366159439, | |
| "epoch": 0.00041, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.012253516353666782, | |
| "kl": 0.0026411395665491, | |
| "learning_rate": 9.999999998714745e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 1756670.0, | |
| "reward": -11.294092178344727, | |
| "reward_std": 12.929180145263672, | |
| "rewards/rollout_reward_func/mean": -11.294092178344727, | |
| "rewards/rollout_reward_func/std": 12.838841438293457, | |
| "sampling/importance_sampling_ratio/max": 0.005970899015665054, | |
| "sampling/importance_sampling_ratio/mean": 0.0017320181941613555, | |
| "sampling/importance_sampling_ratio/min": 1.393524575839586e-16, | |
| "sampling/sampling_logp_difference/max": 12.961386680603027, | |
| "sampling/sampling_logp_difference/mean": 0.3150481879711151, | |
| "step": 41, | |
| "step_time": 91.85784664900052 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0034722222480922937, | |
| "clip_ratio/high_mean": 0.0017361111240461469, | |
| "clip_ratio/low_mean": 0.0017123287543654442, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.00344843982020393, | |
| "entropy": 2.2678737342357635, | |
| "epoch": 0.00042, | |
| "grad_norm": 0.009364346042275429, | |
| "kl": 0.002545906128943898, | |
| "learning_rate": 9.999999998149234e-06, | |
| "loss": 0.0007, | |
| "step": 42, | |
| "step_time": 8.943521398999792 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0031066687079146504, | |
| "clip_ratio/high_mean": 0.0015533343539573252, | |
| "clip_ratio/low_mean": 0.0063974635559134185, | |
| "clip_ratio/low_min": 0.0017857142956927419, | |
| "clip_ratio/region_mean": 0.007950797851663083, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2973.0, | |
| "completions/max_terminated_length": 2973.0, | |
| "completions/mean_length": 2525.125, | |
| "completions/mean_terminated_length": 2525.125, | |
| "completions/min_length": 375.0, | |
| "completions/min_terminated_length": 375.0, | |
| "entropy": 2.2919381260871887, | |
| "epoch": 0.00043, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.012219212017953396, | |
| "kl": 0.002040403662249446, | |
| "learning_rate": 9.999999997480901e-06, | |
| "loss": -0.0004, | |
| "num_tokens": 1858672.0, | |
| "reward": -11.096410751342773, | |
| "reward_std": 12.537565231323242, | |
| "rewards/rollout_reward_func/mean": -11.096410751342773, | |
| "rewards/rollout_reward_func/std": 12.965230941772461, | |
| "sampling/importance_sampling_ratio/max": 0.17080777883529663, | |
| "sampling/importance_sampling_ratio/mean": 0.0070959883742034435, | |
| "sampling/importance_sampling_ratio/min": 2.0908910633189203e-31, | |
| "sampling/sampling_logp_difference/max": 18.829500198364258, | |
| "sampling/sampling_logp_difference/mean": 0.3667473793029785, | |
| "step": 43, | |
| "step_time": 91.86805722899953 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0007183908019214869, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0007183908019214869, | |
| "entropy": 2.2995532751083374, | |
| "epoch": 0.00044, | |
| "grad_norm": 0.011969598941504955, | |
| "kl": 0.0019854862766806036, | |
| "learning_rate": 9.999999996709749e-06, | |
| "loss": -0.0004, | |
| "step": 44, | |
| "step_time": 9.18175687799976 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.012487898580729961, | |
| "clip_ratio/high_mean": 0.007980060297995806, | |
| "clip_ratio/low_mean": 0.0025955072487704456, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.010575567546766251, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 2880.0, | |
| "completions/max_terminated_length": 2880.0, | |
| "completions/mean_length": 2663.9375, | |
| "completions/mean_terminated_length": 2665.419189453125, | |
| "completions/min_length": 2200.0, | |
| "completions/min_terminated_length": 2200.0, | |
| "entropy": 2.3340508341789246, | |
| "epoch": 0.00045, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.02003672532737255, | |
| "kl": 0.002692480251425877, | |
| "learning_rate": 9.999999995835775e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 1965272.0, | |
| "reward": -13.600004196166992, | |
| "reward_std": 14.701010704040527, | |
| "rewards/rollout_reward_func/mean": -13.600004196166992, | |
| "rewards/rollout_reward_func/std": 14.582523345947266, | |
| "sampling/importance_sampling_ratio/max": 0.005967509467154741, | |
| "sampling/importance_sampling_ratio/mean": 0.001800880883820355, | |
| "sampling/importance_sampling_ratio/min": 3.8321957037099815e-21, | |
| "sampling/sampling_logp_difference/max": 16.676918029785156, | |
| "sampling/sampling_logp_difference/mean": 0.38897034525871277, | |
| "step": 45, | |
| "step_time": 90.54392121900082 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0064415192464366555, | |
| "clip_ratio/high_mean": 0.00408881512703374, | |
| "clip_ratio/low_mean": 0.0017152255750261247, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.005804040702059865, | |
| "entropy": 2.334753155708313, | |
| "epoch": 0.00046, | |
| "grad_norm": 0.009386158548295498, | |
| "kl": 0.0021710961300414056, | |
| "learning_rate": 9.999999994858982e-06, | |
| "loss": 0.0006, | |
| "step": 46, | |
| "step_time": 10.22533744600014 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.003598798648454249, | |
| "clip_ratio/high_mean": 0.002667454886250198, | |
| "clip_ratio/low_mean": 0.0009057971183210611, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003573252004571259, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2865.0, | |
| "completions/max_terminated_length": 2865.0, | |
| "completions/mean_length": 2587.71875, | |
| "completions/mean_terminated_length": 2587.71875, | |
| "completions/min_length": 1094.0, | |
| "completions/min_terminated_length": 1094.0, | |
| "entropy": 2.2846151292324066, | |
| "epoch": 0.00047, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.01407544780522585, | |
| "kl": 0.003350261024024803, | |
| "learning_rate": 9.999999993779367e-06, | |
| "loss": -0.0001, | |
| "num_tokens": 2069829.0, | |
| "reward": -9.709592819213867, | |
| "reward_std": 8.844062805175781, | |
| "rewards/rollout_reward_func/mean": -9.709592819213867, | |
| "rewards/rollout_reward_func/std": 9.250896453857422, | |
| "sampling/importance_sampling_ratio/max": 0.03837426379323006, | |
| "sampling/importance_sampling_ratio/mean": 0.003228831337764859, | |
| "sampling/importance_sampling_ratio/min": 1.3482284797513267e-27, | |
| "sampling/sampling_logp_difference/max": 19.295942306518555, | |
| "sampling/sampling_logp_difference/mean": 0.35356980562210083, | |
| "step": 47, | |
| "step_time": 92.8158456350011 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.008580301189795136, | |
| "clip_ratio/high_mean": 0.004290150594897568, | |
| "clip_ratio/low_mean": 0.001728165545500815, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.006018316023983061, | |
| "entropy": 2.2845455408096313, | |
| "epoch": 0.00048, | |
| "grad_norm": 0.013240635395050049, | |
| "kl": 0.003361153867444955, | |
| "learning_rate": 9.999999992596935e-06, | |
| "loss": -0.0001, | |
| "step": 48, | |
| "step_time": 8.908816245000708 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0016025641234591603, | |
| "clip_ratio/high_mean": 0.0008012820617295802, | |
| "clip_ratio/low_mean": 0.0010775862028822303, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0018788682646118104, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 3007.0, | |
| "completions/max_terminated_length": 3007.0, | |
| "completions/mean_length": 2473.59375, | |
| "completions/mean_terminated_length": 2460.774169921875, | |
| "completions/min_length": 185.0, | |
| "completions/min_terminated_length": 185.0, | |
| "entropy": 2.234508216381073, | |
| "epoch": 0.00049, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.013633599504828453, | |
| "kl": 0.0016216770527535118, | |
| "learning_rate": 9.999999991311679e-06, | |
| "loss": -0.0006, | |
| "num_tokens": 2169988.0, | |
| "reward": -11.157156944274902, | |
| "reward_std": 9.950779914855957, | |
| "rewards/rollout_reward_func/mean": -11.157156944274902, | |
| "rewards/rollout_reward_func/std": 11.374809265136719, | |
| "sampling/importance_sampling_ratio/max": 0.28713250160217285, | |
| "sampling/importance_sampling_ratio/mean": 0.018523240461945534, | |
| "sampling/importance_sampling_ratio/min": 2.006449198810831e-20, | |
| "sampling/sampling_logp_difference/max": 18.636167526245117, | |
| "sampling/sampling_logp_difference/mean": 0.3026620149612427, | |
| "step": 49, | |
| "step_time": 87.0453093810006 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.009976116823963821, | |
| "clip_ratio/high_mean": 0.006601028784643859, | |
| "clip_ratio/low_mean": 0.002857419603969902, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.009458448505029082, | |
| "entropy": 2.2327195405960083, | |
| "epoch": 0.0005, | |
| "grad_norm": 0.013543589971959591, | |
| "kl": 0.001988013260415755, | |
| "learning_rate": 9.999999989923604e-06, | |
| "loss": -0.0006, | |
| "step": 50, | |
| "step_time": 10.626580007000484 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0028511597774922848, | |
| "clip_ratio/high_mean": 0.0014255798887461424, | |
| "clip_ratio/low_mean": 0.0024358974769711494, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003861477307509631, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2977.0, | |
| "completions/max_terminated_length": 2977.0, | |
| "completions/mean_length": 2764.375, | |
| "completions/mean_terminated_length": 2764.375, | |
| "completions/min_length": 2371.0, | |
| "completions/min_terminated_length": 2371.0, | |
| "entropy": 2.372197538614273, | |
| "epoch": 0.00051, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.00828390009701252, | |
| "kl": 0.002092195674777031, | |
| "learning_rate": 9.999999988432709e-06, | |
| "loss": -0.0009, | |
| "num_tokens": 2279279.0, | |
| "reward": -9.928876876831055, | |
| "reward_std": 11.465417861938477, | |
| "rewards/rollout_reward_func/mean": -9.928876876831055, | |
| "rewards/rollout_reward_func/std": 12.258295059204102, | |
| "sampling/importance_sampling_ratio/max": 0.005203698296099901, | |
| "sampling/importance_sampling_ratio/mean": 0.0008916730294004083, | |
| "sampling/importance_sampling_ratio/min": 6.668663636418266e-29, | |
| "sampling/sampling_logp_difference/max": 15.371074676513672, | |
| "sampling/sampling_logp_difference/mean": 0.41038042306900024, | |
| "step": 51, | |
| "step_time": 97.36285335000048 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.004987157532013953, | |
| "clip_ratio/high_mean": 0.0024935787660069764, | |
| "clip_ratio/low_mean": 0.002425754675641656, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.004919333383440971, | |
| "entropy": 2.3701044023036957, | |
| "epoch": 0.00052, | |
| "grad_norm": 0.009349919855594635, | |
| "kl": 0.0021661788632627577, | |
| "learning_rate": 9.999999986838993e-06, | |
| "loss": -0.001, | |
| "step": 52, | |
| "step_time": 9.234601858999667 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0029559049289673567, | |
| "clip_ratio/high_mean": 0.0014779524644836783, | |
| "clip_ratio/low_mean": 0.0007022471982054412, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0021801996626891196, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3143.0, | |
| "completions/max_terminated_length": 3143.0, | |
| "completions/mean_length": 2911.6875, | |
| "completions/mean_terminated_length": 2911.6875, | |
| "completions/min_length": 1747.0, | |
| "completions/min_terminated_length": 1747.0, | |
| "entropy": 2.095397859811783, | |
| "epoch": 0.00053, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.007544730789959431, | |
| "kl": 0.0014459962039836682, | |
| "learning_rate": 9.999999985142457e-06, | |
| "loss": 0.0005, | |
| "num_tokens": 2393822.0, | |
| "reward": -9.227861404418945, | |
| "reward_std": 13.615285873413086, | |
| "rewards/rollout_reward_func/mean": -9.227861404418945, | |
| "rewards/rollout_reward_func/std": 13.280508995056152, | |
| "sampling/importance_sampling_ratio/max": 0.00818893313407898, | |
| "sampling/importance_sampling_ratio/mean": 0.0016003338387236, | |
| "sampling/importance_sampling_ratio/min": 1.1838428666831955e-38, | |
| "sampling/sampling_logp_difference/max": 19.036222457885742, | |
| "sampling/sampling_logp_difference/mean": 0.27891838550567627, | |
| "step": 53, | |
| "step_time": 94.85144506799998 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.007602313125971705, | |
| "clip_ratio/high_mean": 0.004487969708861783, | |
| "clip_ratio/low_mean": 0.0009469697251915932, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.005434939434053376, | |
| "entropy": 2.092746779322624, | |
| "epoch": 0.00054, | |
| "grad_norm": 0.007749093230813742, | |
| "kl": 0.001536271462100558, | |
| "learning_rate": 9.999999983343101e-06, | |
| "loss": 0.0005, | |
| "step": 54, | |
| "step_time": 10.820570559000771 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0016666667070239782, | |
| "clip_ratio/high_mean": 0.0008333333535119891, | |
| "clip_ratio/low_mean": 0.0015822785208001733, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0024156119325198233, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 3139.0, | |
| "completions/max_terminated_length": 3139.0, | |
| "completions/mean_length": 2945.40625, | |
| "completions/mean_terminated_length": 2943.9677734375, | |
| "completions/min_length": 2679.0, | |
| "completions/min_terminated_length": 2679.0, | |
| "entropy": 2.1801984012126923, | |
| "epoch": 0.00055, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.006448639556765556, | |
| "kl": 0.0018499534926377237, | |
| "learning_rate": 9.999999981440923e-06, | |
| "loss": -0.0001, | |
| "num_tokens": 2508892.0, | |
| "reward": -13.496590614318848, | |
| "reward_std": 9.91647720336914, | |
| "rewards/rollout_reward_func/mean": -13.496590614318848, | |
| "rewards/rollout_reward_func/std": 10.541536331176758, | |
| "sampling/importance_sampling_ratio/max": 0.004021179396659136, | |
| "sampling/importance_sampling_ratio/mean": 0.0013151702005416155, | |
| "sampling/importance_sampling_ratio/min": 1.5261994142437563e-12, | |
| "sampling/sampling_logp_difference/max": 8.930527687072754, | |
| "sampling/sampling_logp_difference/mean": 0.2391294538974762, | |
| "step": 55, | |
| "step_time": 97.94088426899998 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.004934780183248222, | |
| "clip_ratio/high_mean": 0.0032585294102318585, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0032585294102318585, | |
| "entropy": 2.1794978380203247, | |
| "epoch": 0.00056, | |
| "grad_norm": 0.006780738476663828, | |
| "kl": 0.0018694552418310195, | |
| "learning_rate": 9.999999979435926e-06, | |
| "loss": -0.0001, | |
| "step": 56, | |
| "step_time": 9.654078997999477 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.005736267426982522, | |
| "clip_ratio/high_mean": 0.002868133713491261, | |
| "clip_ratio/low_mean": 0.002268664597067982, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.005136798310559243, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 3438.0, | |
| "completions/max_terminated_length": 3438.0, | |
| "completions/mean_length": 3171.25, | |
| "completions/mean_terminated_length": 3164.322509765625, | |
| "completions/min_length": 2483.0, | |
| "completions/min_terminated_length": 2483.0, | |
| "entropy": 2.1486852020025253, | |
| "epoch": 0.00057, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.005101061426103115, | |
| "kl": 0.001483209984144196, | |
| "learning_rate": 9.999999977328107e-06, | |
| "loss": 0.0002, | |
| "num_tokens": 2631790.0, | |
| "reward": -14.85343074798584, | |
| "reward_std": 14.20746898651123, | |
| "rewards/rollout_reward_func/mean": -14.85343074798584, | |
| "rewards/rollout_reward_func/std": 14.325145721435547, | |
| "sampling/importance_sampling_ratio/max": 0.005566018167883158, | |
| "sampling/importance_sampling_ratio/mean": 0.0012064384063705802, | |
| "sampling/importance_sampling_ratio/min": 5.148599772634619e-16, | |
| "sampling/sampling_logp_difference/max": 12.547440528869629, | |
| "sampling/sampling_logp_difference/mean": 0.24704553186893463, | |
| "step": 57, | |
| "step_time": 104.96052551200137 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.004558469052426517, | |
| "clip_ratio/high_mean": 0.0022792345262132585, | |
| "clip_ratio/low_mean": 0.0007716049440205097, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0030508394702337682, | |
| "entropy": 2.1473026871681213, | |
| "epoch": 0.00058, | |
| "grad_norm": 0.004236625507473946, | |
| "kl": 0.001287619597860612, | |
| "learning_rate": 9.99999997511747e-06, | |
| "loss": 0.0002, | |
| "step": 58, | |
| "step_time": 11.613219467000363 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0030487803742289543, | |
| "clip_ratio/high_mean": 0.0015243901871144772, | |
| "clip_ratio/low_mean": 0.005422163347247988, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.006946553534362465, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3445.0, | |
| "completions/max_terminated_length": 3445.0, | |
| "completions/mean_length": 3177.1875, | |
| "completions/mean_terminated_length": 3177.1875, | |
| "completions/min_length": 903.0, | |
| "completions/min_terminated_length": 903.0, | |
| "entropy": 2.161461815237999, | |
| "epoch": 0.00059, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.012280529364943504, | |
| "kl": 0.002224226642283611, | |
| "learning_rate": 9.999999972804012e-06, | |
| "loss": -0.0006, | |
| "num_tokens": 2754194.0, | |
| "reward": -13.905904769897461, | |
| "reward_std": 10.438383102416992, | |
| "rewards/rollout_reward_func/mean": -13.905904769897461, | |
| "rewards/rollout_reward_func/std": 11.29019832611084, | |
| "sampling/importance_sampling_ratio/max": 0.02268226072192192, | |
| "sampling/importance_sampling_ratio/mean": 0.0018939973087981343, | |
| "sampling/importance_sampling_ratio/min": 2.697007293984262e-28, | |
| "sampling/sampling_logp_difference/max": 17.155614852905273, | |
| "sampling/sampling_logp_difference/mean": 0.2826978266239166, | |
| "step": 59, | |
| "step_time": 100.61191374800092 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.008298700326122344, | |
| "clip_ratio/high_mean": 0.004149350163061172, | |
| "clip_ratio/low_mean": 0.006984907551668584, | |
| "clip_ratio/low_min": 0.0015432098880410194, | |
| "clip_ratio/region_mean": 0.011134257889352739, | |
| "entropy": 2.1591842770576477, | |
| "epoch": 0.0006, | |
| "grad_norm": 0.01143638975918293, | |
| "kl": 0.0023646633271710016, | |
| "learning_rate": 9.999999970387732e-06, | |
| "loss": -0.0006, | |
| "step": 60, | |
| "step_time": 10.382511724000324 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0015432098880410194, | |
| "clip_ratio/high_mean": 0.0007716049440205097, | |
| "clip_ratio/low_mean": 0.002259911096189171, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003031516040209681, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3416.0, | |
| "completions/max_terminated_length": 3416.0, | |
| "completions/mean_length": 3135.84375, | |
| "completions/mean_terminated_length": 3135.84375, | |
| "completions/min_length": 195.0, | |
| "completions/min_terminated_length": 195.0, | |
| "entropy": 2.217515081167221, | |
| "epoch": 0.00061, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.006441728211939335, | |
| "kl": 0.0010476857314642984, | |
| "learning_rate": 9.999999967868633e-06, | |
| "loss": 0.0009, | |
| "num_tokens": 2876044.0, | |
| "reward": -14.489093780517578, | |
| "reward_std": 11.084500312805176, | |
| "rewards/rollout_reward_func/mean": -14.489093780517578, | |
| "rewards/rollout_reward_func/std": 12.003732681274414, | |
| "sampling/importance_sampling_ratio/max": 0.25691941380500793, | |
| "sampling/importance_sampling_ratio/mean": 0.008919022977352142, | |
| "sampling/importance_sampling_ratio/min": 6.2882773049331024e-24, | |
| "sampling/sampling_logp_difference/max": 12.479897499084473, | |
| "sampling/sampling_logp_difference/mean": 0.29724666476249695, | |
| "step": 61, | |
| "step_time": 102.78593670800046 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0007812500116415322, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0007812500116415322, | |
| "entropy": 2.212161585688591, | |
| "epoch": 0.00062, | |
| "grad_norm": 0.005762570537626743, | |
| "kl": 0.001050639031745959, | |
| "learning_rate": 9.999999965246713e-06, | |
| "loss": 0.0009, | |
| "step": 62, | |
| "step_time": 10.780980200999693 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00550881412345916, | |
| "clip_ratio/high_mean": 0.00275440706172958, | |
| "clip_ratio/low_mean": 0.0014889392768964171, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.004243346338625997, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3413.0, | |
| "completions/max_terminated_length": 3413.0, | |
| "completions/mean_length": 2979.78125, | |
| "completions/mean_terminated_length": 2979.78125, | |
| "completions/min_length": 280.0, | |
| "completions/min_terminated_length": 280.0, | |
| "entropy": 2.111812949180603, | |
| "epoch": 0.00063, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.03990187868475914, | |
| "kl": 0.001456589438021183, | |
| "learning_rate": 9.999999962521974e-06, | |
| "loss": -0.0043, | |
| "num_tokens": 2992561.0, | |
| "reward": -9.381729125976562, | |
| "reward_std": 10.014938354492188, | |
| "rewards/rollout_reward_func/mean": -9.381729125976562, | |
| "rewards/rollout_reward_func/std": 10.544285774230957, | |
| "sampling/importance_sampling_ratio/max": 0.23116491734981537, | |
| "sampling/importance_sampling_ratio/mean": 0.008492819964885712, | |
| "sampling/importance_sampling_ratio/min": 1.2417058145283537e-26, | |
| "sampling/sampling_logp_difference/max": 17.79161262512207, | |
| "sampling/sampling_logp_difference/mean": 0.31178855895996094, | |
| "step": 63, | |
| "step_time": 105.0415441539999 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.004596683429554105, | |
| "clip_ratio/high_mean": 0.0022983417147770524, | |
| "clip_ratio/low_mean": 0.002260544220916927, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.004558885877486318, | |
| "entropy": 2.108848959207535, | |
| "epoch": 0.00064, | |
| "grad_norm": 0.039187680929899216, | |
| "kl": 0.00118074486090336, | |
| "learning_rate": 9.999999959694412e-06, | |
| "loss": -0.0042, | |
| "step": 64, | |
| "step_time": 10.22475211300025 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0014204545877873898, | |
| "clip_ratio/high_mean": 0.0014204545877873898, | |
| "clip_ratio/low_mean": 0.0014124744920991361, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.002832929079886526, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3596.0, | |
| "completions/max_terminated_length": 3596.0, | |
| "completions/mean_length": 3147.21875, | |
| "completions/mean_terminated_length": 3147.21875, | |
| "completions/min_length": 487.0, | |
| "completions/min_terminated_length": 487.0, | |
| "entropy": 2.150854080915451, | |
| "epoch": 0.00065, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.009450007230043411, | |
| "kl": 0.0011926015722565353, | |
| "learning_rate": 9.999999956764034e-06, | |
| "loss": -0.0003, | |
| "num_tokens": 3114343.0, | |
| "reward": -11.12032413482666, | |
| "reward_std": 18.984477996826172, | |
| "rewards/rollout_reward_func/mean": -11.12032413482666, | |
| "rewards/rollout_reward_func/std": 21.110273361206055, | |
| "sampling/importance_sampling_ratio/max": 0.04695241525769234, | |
| "sampling/importance_sampling_ratio/mean": 0.002132077468559146, | |
| "sampling/importance_sampling_ratio/min": 4.1585366301487205e-15, | |
| "sampling/sampling_logp_difference/max": 12.402358055114746, | |
| "sampling/sampling_logp_difference/mean": 0.2630866467952728, | |
| "step": 65, | |
| "step_time": 106.09026804299992 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0006793478387407959, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0006793478387407959, | |
| "entropy": 2.1468848437070847, | |
| "epoch": 0.00066, | |
| "grad_norm": 0.006407527253031731, | |
| "kl": 0.0014346542666316964, | |
| "learning_rate": 9.999999953730833e-06, | |
| "loss": -0.0003, | |
| "step": 66, | |
| "step_time": 10.735191880000002 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0025255101500079036, | |
| "clip_ratio/high_mean": 0.0012627550750039518, | |
| "clip_ratio/low_mean": 0.0006443298771046102, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.001907084952108562, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3504.0, | |
| "completions/max_terminated_length": 3504.0, | |
| "completions/mean_length": 2932.6875, | |
| "completions/mean_terminated_length": 2932.6875, | |
| "completions/min_length": 194.0, | |
| "completions/min_terminated_length": 194.0, | |
| "entropy": 2.0600955486297607, | |
| "epoch": 0.00067, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.018519844859838486, | |
| "kl": 0.001339123715297319, | |
| "learning_rate": 9.99999995059481e-06, | |
| "loss": 0.0045, | |
| "num_tokens": 3229150.0, | |
| "reward": -14.67924690246582, | |
| "reward_std": 13.668952941894531, | |
| "rewards/rollout_reward_func/mean": -14.67924690246582, | |
| "rewards/rollout_reward_func/std": 13.640398979187012, | |
| "sampling/importance_sampling_ratio/max": 0.251010000705719, | |
| "sampling/importance_sampling_ratio/mean": 0.013134480454027653, | |
| "sampling/importance_sampling_ratio/min": 3.752377749352698e-26, | |
| "sampling/sampling_logp_difference/max": 17.058979034423828, | |
| "sampling/sampling_logp_difference/mean": 0.27970272302627563, | |
| "step": 67, | |
| "step_time": 98.07139246399856 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0014369714772328734, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0014369714772328734, | |
| "entropy": 2.056602507829666, | |
| "epoch": 0.00068, | |
| "grad_norm": 0.018268967047333717, | |
| "kl": 0.0017321475461358204, | |
| "learning_rate": 9.99999994735597e-06, | |
| "loss": 0.0045, | |
| "step": 68, | |
| "step_time": 10.463535391000278 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0014367816038429737, | |
| "clip_ratio/high_mean": 0.0013193523045629263, | |
| "clip_ratio/low_mean": 0.00210745120421052, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.003426803508773446, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3584.0, | |
| "completions/max_terminated_length": 3584.0, | |
| "completions/mean_length": 2997.875, | |
| "completions/mean_terminated_length": 2997.875, | |
| "completions/min_length": 387.0, | |
| "completions/min_terminated_length": 387.0, | |
| "entropy": 2.179373413324356, | |
| "epoch": 0.00069, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.021575244143605232, | |
| "kl": 0.0025506179299554788, | |
| "learning_rate": 9.999999944014306e-06, | |
| "loss": 0.003, | |
| "num_tokens": 3346039.0, | |
| "reward": -12.414968490600586, | |
| "reward_std": 17.504671096801758, | |
| "rewards/rollout_reward_func/mean": -12.414968490600586, | |
| "rewards/rollout_reward_func/std": 18.227092742919922, | |
| "sampling/importance_sampling_ratio/max": 0.16923412680625916, | |
| "sampling/importance_sampling_ratio/mean": 0.007539688143879175, | |
| "sampling/importance_sampling_ratio/min": 5.152622264460075e-33, | |
| "sampling/sampling_logp_difference/max": 20.113666534423828, | |
| "sampling/sampling_logp_difference/mean": 0.35040098428726196, | |
| "step": 69, | |
| "step_time": 98.19044223200035 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.006122596096247435, | |
| "clip_ratio/high_mean": 0.0036622595507651567, | |
| "clip_ratio/low_mean": 0.0029755067662335932, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.00663776631699875, | |
| "entropy": 2.1783816814422607, | |
| "epoch": 0.0007, | |
| "grad_norm": 0.014992612414062023, | |
| "kl": 0.0022299339179880917, | |
| "learning_rate": 9.999999940569825e-06, | |
| "loss": 0.003, | |
| "step": 70, | |
| "step_time": 10.582511048000015 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0025389643851667643, | |
| "clip_ratio/high_mean": 0.0012694821925833821, | |
| "clip_ratio/low_mean": 0.0027696280158124864, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.004039110150188208, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3695.0, | |
| "completions/max_terminated_length": 3695.0, | |
| "completions/mean_length": 3466.03125, | |
| "completions/mean_terminated_length": 3466.03125, | |
| "completions/min_length": 2833.0, | |
| "completions/min_terminated_length": 2833.0, | |
| "entropy": 2.0841893553733826, | |
| "epoch": 0.00071, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.015231302939355373, | |
| "kl": 0.002882544751628302, | |
| "learning_rate": 9.999999937022522e-06, | |
| "loss": -0.0002, | |
| "num_tokens": 3477790.0, | |
| "reward": -11.157001495361328, | |
| "reward_std": 9.339273452758789, | |
| "rewards/rollout_reward_func/mean": -11.157001495361328, | |
| "rewards/rollout_reward_func/std": 10.505151748657227, | |
| "sampling/importance_sampling_ratio/max": 0.002954554045572877, | |
| "sampling/importance_sampling_ratio/mean": 0.0008290851255878806, | |
| "sampling/importance_sampling_ratio/min": 4.875183740817929e-34, | |
| "sampling/sampling_logp_difference/max": 12.618881225585938, | |
| "sampling/sampling_logp_difference/mean": 0.25001800060272217, | |
| "step": 71, | |
| "step_time": 109.58023633999892 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0010683761211112142, | |
| "clip_ratio/high_mean": 0.0005341880605556071, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0005341880605556071, | |
| "entropy": 2.0846132934093475, | |
| "epoch": 0.00072, | |
| "grad_norm": 0.009933187626302242, | |
| "kl": 0.002236059895949438, | |
| "learning_rate": 9.999999933372398e-06, | |
| "loss": -0.0002, | |
| "step": 72, | |
| "step_time": 10.986284594999688 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.010420707054436207, | |
| "clip_ratio/high_mean": 0.006717559706885368, | |
| "clip_ratio/low_mean": 0.006201559328474104, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.012919119151774794, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3855.0, | |
| "completions/max_terminated_length": 3855.0, | |
| "completions/mean_length": 3299.0, | |
| "completions/mean_terminated_length": 3299.0, | |
| "completions/min_length": 727.0, | |
| "completions/min_terminated_length": 727.0, | |
| "entropy": 2.229044407606125, | |
| "epoch": 0.00073, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.014358018524944782, | |
| "kl": 0.0021708139684051275, | |
| "learning_rate": 9.999999929619456e-06, | |
| "loss": -0.0008, | |
| "num_tokens": 3603657.0, | |
| "reward": -13.489961624145508, | |
| "reward_std": 14.68436336517334, | |
| "rewards/rollout_reward_func/mean": -13.489961624145508, | |
| "rewards/rollout_reward_func/std": 15.650424003601074, | |
| "sampling/importance_sampling_ratio/max": 0.059434566646814346, | |
| "sampling/importance_sampling_ratio/mean": 0.002726445673033595, | |
| "sampling/importance_sampling_ratio/min": 5.1101291228379537e-39, | |
| "sampling/sampling_logp_difference/max": 17.89449691772461, | |
| "sampling/sampling_logp_difference/mean": 0.3607054650783539, | |
| "step": 73, | |
| "step_time": 111.9296666260002 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.010632613790221512, | |
| "clip_ratio/high_mean": 0.0059673485811799765, | |
| "clip_ratio/low_mean": 0.0011278195888735354, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.007095168228261173, | |
| "entropy": 2.2271364331245422, | |
| "epoch": 0.00074, | |
| "grad_norm": 0.01777312532067299, | |
| "kl": 0.0019678229436976835, | |
| "learning_rate": 9.99999992576369e-06, | |
| "loss": -0.0008, | |
| "step": 74, | |
| "step_time": 11.365622766999422 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.007670001010410488, | |
| "clip_ratio/high_mean": 0.004492895153816789, | |
| "clip_ratio/low_mean": 0.005284888495225459, | |
| "clip_ratio/low_min": 0.0012886597542092204, | |
| "clip_ratio/region_mean": 0.009777783416211605, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 3973.0, | |
| "completions/max_terminated_length": 3973.0, | |
| "completions/mean_length": 3476.15625, | |
| "completions/mean_terminated_length": 3471.67724609375, | |
| "completions/min_length": 1666.0, | |
| "completions/min_terminated_length": 1666.0, | |
| "entropy": 2.1275693476200104, | |
| "epoch": 0.00075, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.00603072764351964, | |
| "kl": 0.0015271206430043094, | |
| "learning_rate": 9.999999921805106e-06, | |
| "loss": -0.0, | |
| "num_tokens": 3735493.0, | |
| "reward": -10.968659400939941, | |
| "reward_std": 9.847198486328125, | |
| "rewards/rollout_reward_func/mean": -10.968659400939941, | |
| "rewards/rollout_reward_func/std": 9.763160705566406, | |
| "sampling/importance_sampling_ratio/max": 0.007295163348317146, | |
| "sampling/importance_sampling_ratio/mean": 0.0008178789867088199, | |
| "sampling/importance_sampling_ratio/min": 3.297161219819182e-30, | |
| "sampling/sampling_logp_difference/max": 16.79660987854004, | |
| "sampling/sampling_logp_difference/mean": 0.2770848870277405, | |
| "step": 75, | |
| "step_time": 116.77516848400046 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 600000, | |
| "num_input_tokens_seen": 3735493, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |