Instructions to use Masnuy/smk-ld with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Masnuy/smk-ld with PEFT:
Base model is not found.
- Transformers
How to use Masnuy/smk-ld with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Masnuy/smk-ld") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Masnuy/smk-ld", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use Masnuy/smk-ld with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Masnuy/smk-ld" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Masnuy/smk-ld", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/Masnuy/smk-ld
- SGLang
How to use Masnuy/smk-ld with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Masnuy/smk-ld" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Masnuy/smk-ld", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Masnuy/smk-ld" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Masnuy/smk-ld", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use Masnuy/smk-ld with Docker Model Runner:
docker model run hf.co/Masnuy/smk-ld
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.00055, | |
| "eval_steps": 500, | |
| "global_step": 55, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.940144538879395, | |
| "epoch": 1e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.03423422574996948, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": -0.0013, | |
| "num_tokens": 35616.0, | |
| "reward": -0.7051675319671631, | |
| "reward_std": 0.7764065265655518, | |
| "rewards/rollout_reward_func/mean": -0.7051675319671631, | |
| "rewards/rollout_reward_func/std": 0.75037682056427, | |
| "sampling/importance_sampling_ratio/max": 0.06733503937721252, | |
| "sampling/importance_sampling_ratio/mean": 0.035891756415367126, | |
| "sampling/importance_sampling_ratio/min": 0.012922381982207298, | |
| "sampling/sampling_logp_difference/max": 2.4574475288391113, | |
| "sampling/sampling_logp_difference/mean": 1.7373101711273193, | |
| "step": 1, | |
| "step_time": 6.607899043003272 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 8.940144538879395, | |
| "epoch": 2e-05, | |
| "grad_norm": 0.03577549755573273, | |
| "kl": 0.0, | |
| "learning_rate": 2.8571428571428575e-07, | |
| "loss": -0.0013, | |
| "step": 2, | |
| "step_time": 2.9063545979988703 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.965680599212646, | |
| "epoch": 3e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.017016781494021416, | |
| "kl": 0.0007822737097740173, | |
| "learning_rate": 5.714285714285715e-07, | |
| "loss": -0.0006, | |
| "num_tokens": 71095.0, | |
| "reward": -0.9110076427459717, | |
| "reward_std": 0.6931561231613159, | |
| "rewards/rollout_reward_func/mean": -0.9110076427459717, | |
| "rewards/rollout_reward_func/std": 0.6800154447555542, | |
| "sampling/importance_sampling_ratio/max": 0.06864165514707565, | |
| "sampling/importance_sampling_ratio/mean": 0.03215230628848076, | |
| "sampling/importance_sampling_ratio/min": 0.011430883780121803, | |
| "sampling/sampling_logp_difference/max": 2.474456548690796, | |
| "sampling/sampling_logp_difference/mean": 1.8041703701019287, | |
| "step": 3, | |
| "step_time": 5.5894952089984145 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 8.965598821640015, | |
| "epoch": 4e-05, | |
| "grad_norm": 0.01733771711587906, | |
| "kl": 0.0007491949945688248, | |
| "learning_rate": 8.571428571428572e-07, | |
| "loss": -0.0006, | |
| "step": 4, | |
| "step_time": 3.4044442560007155 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 16.0, | |
| "completions/max_terminated_length": 5.0, | |
| "completions/mean_length": 2.53125, | |
| "completions/mean_terminated_length": 2.096774101257324, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.834780097007751, | |
| "epoch": 5e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.02108524739742279, | |
| "kl": 0.0009654137102188542, | |
| "learning_rate": 1.142857142857143e-06, | |
| "loss": -0.0003, | |
| "num_tokens": 106490.0, | |
| "reward": -0.5540984869003296, | |
| "reward_std": 0.8771607279777527, | |
| "rewards/rollout_reward_func/mean": -0.5540984869003296, | |
| "rewards/rollout_reward_func/std": 0.8618184924125671, | |
| "sampling/importance_sampling_ratio/max": 0.07213470339775085, | |
| "sampling/importance_sampling_ratio/mean": 0.03297191113233566, | |
| "sampling/importance_sampling_ratio/min": 3.0050444771445584e-11, | |
| "sampling/sampling_logp_difference/max": 4.576776504516602, | |
| "sampling/sampling_logp_difference/mean": 1.773134469985962, | |
| "step": 5, | |
| "step_time": 6.008008040997083 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 8.827986240386963, | |
| "epoch": 6e-05, | |
| "grad_norm": 0.021368548274040222, | |
| "kl": 0.0009469666983932257, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": -0.0004, | |
| "step": 6, | |
| "step_time": 2.88994878000085 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.03125, | |
| "clip_ratio/high_mean": 0.015625, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.015625, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 16.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.875, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.796356916427612, | |
| "epoch": 7e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.013663483783602715, | |
| "kl": 0.0008234605193138123, | |
| "learning_rate": 1.7142857142857145e-06, | |
| "loss": 0.0, | |
| "num_tokens": 142069.0, | |
| "reward": -0.8088920712471008, | |
| "reward_std": 0.7424027323722839, | |
| "rewards/rollout_reward_func/mean": -0.8088920712471008, | |
| "rewards/rollout_reward_func/std": 0.7662962675094604, | |
| "sampling/importance_sampling_ratio/max": 0.057457707822322845, | |
| "sampling/importance_sampling_ratio/mean": 0.02730659209191799, | |
| "sampling/importance_sampling_ratio/min": 7.280681058041694e-10, | |
| "sampling/sampling_logp_difference/max": 4.222927093505859, | |
| "sampling/sampling_logp_difference/mean": 1.6366889476776123, | |
| "step": 7, | |
| "step_time": 5.921918200005166 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 8.786743104457855, | |
| "epoch": 8e-05, | |
| "grad_norm": 0.013285573571920395, | |
| "kl": 0.0009508101793471724, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": -0.0, | |
| "step": 8, | |
| "step_time": 2.9387520060008683 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.015625, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.015625, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.922944903373718, | |
| "epoch": 9e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.01735098287463188, | |
| "kl": 0.0008866805583238602, | |
| "learning_rate": 2.285714285714286e-06, | |
| "loss": -0.0002, | |
| "num_tokens": 176547.0, | |
| "reward": -0.618694543838501, | |
| "reward_std": 0.8990023136138916, | |
| "rewards/rollout_reward_func/mean": -0.618694543838501, | |
| "rewards/rollout_reward_func/std": 0.8754127621650696, | |
| "sampling/importance_sampling_ratio/max": 0.06334654986858368, | |
| "sampling/importance_sampling_ratio/mean": 0.03222377225756645, | |
| "sampling/importance_sampling_ratio/min": 0.011594683863222599, | |
| "sampling/sampling_logp_difference/max": 2.4042437076568604, | |
| "sampling/sampling_logp_difference/mean": 1.7828483581542969, | |
| "step": 9, | |
| "step_time": 5.69735375300661 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 8.913637280464172, | |
| "epoch": 0.0001, | |
| "grad_norm": 0.017596419900655746, | |
| "kl": 0.000972965732216835, | |
| "learning_rate": 2.571428571428571e-06, | |
| "loss": -0.0002, | |
| "step": 10, | |
| "step_time": 3.580516988000454 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 5.0, | |
| "completions/max_terminated_length": 5.0, | |
| "completions/mean_length": 2.15625, | |
| "completions/mean_terminated_length": 2.15625, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.979163527488708, | |
| "epoch": 0.00011, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.01395466923713684, | |
| "kl": 0.0012971882097190246, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": -0.0002, | |
| "num_tokens": 210671.0, | |
| "reward": -0.6838527917861938, | |
| "reward_std": 0.7062864899635315, | |
| "rewards/rollout_reward_func/mean": -0.6838527917861938, | |
| "rewards/rollout_reward_func/std": 0.7574694752693176, | |
| "sampling/importance_sampling_ratio/max": 0.06857945024967194, | |
| "sampling/importance_sampling_ratio/mean": 0.03003668040037155, | |
| "sampling/importance_sampling_ratio/min": 7.147054475353798e-06, | |
| "sampling/sampling_logp_difference/max": 4.250937461853027, | |
| "sampling/sampling_logp_difference/mean": 1.8635720014572144, | |
| "step": 11, | |
| "step_time": 6.033825367005193 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 8.966804146766663, | |
| "epoch": 0.00012, | |
| "grad_norm": 0.01391494832932949, | |
| "kl": 0.0018893439264502376, | |
| "learning_rate": 3.142857142857143e-06, | |
| "loss": -0.0002, | |
| "step": 12, | |
| "step_time": 2.8316939499891305 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 5.0, | |
| "completions/max_terminated_length": 5.0, | |
| "completions/mean_length": 2.28125, | |
| "completions/mean_terminated_length": 2.28125, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.810548543930054, | |
| "epoch": 0.00013, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.014992384240031242, | |
| "kl": 0.003853602087474428, | |
| "learning_rate": 3.428571428571429e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 245676.0, | |
| "reward": -0.6364654302597046, | |
| "reward_std": 0.7521181106567383, | |
| "rewards/rollout_reward_func/mean": -0.6364654302597046, | |
| "rewards/rollout_reward_func/std": 0.7526334524154663, | |
| "sampling/importance_sampling_ratio/max": 0.06722358614206314, | |
| "sampling/importance_sampling_ratio/mean": 0.03307785466313362, | |
| "sampling/importance_sampling_ratio/min": 3.5045477488893084e-06, | |
| "sampling/sampling_logp_difference/max": 4.873165607452393, | |
| "sampling/sampling_logp_difference/mean": 1.8621257543563843, | |
| "step": 13, | |
| "step_time": 5.768471701994713 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 8.802866578102112, | |
| "epoch": 0.00014, | |
| "grad_norm": 0.014974371530115604, | |
| "kl": 0.004468549799639732, | |
| "learning_rate": 3.7142857142857146e-06, | |
| "loss": 0.0001, | |
| "step": 14, | |
| "step_time": 2.8839251570025226 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 16.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.4375, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.717366337776184, | |
| "epoch": 0.00015, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.020232753828167915, | |
| "kl": 0.004971407979610376, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": -0.0008, | |
| "num_tokens": 282194.0, | |
| "reward": -0.7452265024185181, | |
| "reward_std": 0.7260236144065857, | |
| "rewards/rollout_reward_func/mean": -0.7452265024185181, | |
| "rewards/rollout_reward_func/std": 0.7854404449462891, | |
| "sampling/importance_sampling_ratio/max": 0.084043949842453, | |
| "sampling/importance_sampling_ratio/mean": 0.03686349838972092, | |
| "sampling/importance_sampling_ratio/min": 9.963324609785218e-10, | |
| "sampling/sampling_logp_difference/max": 3.4498603343963623, | |
| "sampling/sampling_logp_difference/mean": 1.676363468170166, | |
| "step": 15, | |
| "step_time": 5.806083219005814 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 8.69223439693451, | |
| "epoch": 0.00016, | |
| "grad_norm": 0.020264672115445137, | |
| "kl": 0.005897294729948044, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": -0.0008, | |
| "step": 16, | |
| "step_time": 3.649606159000541 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 8.0, | |
| "completions/max_terminated_length": 8.0, | |
| "completions/mean_length": 2.1875, | |
| "completions/mean_terminated_length": 2.1875, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.732311606407166, | |
| "epoch": 0.00017, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.019782064482569695, | |
| "kl": 0.007078325026668608, | |
| "learning_rate": 4.571428571428572e-06, | |
| "loss": -0.0, | |
| "num_tokens": 317749.0, | |
| "reward": -0.5659611821174622, | |
| "reward_std": 0.7136144042015076, | |
| "rewards/rollout_reward_func/mean": -0.5659611821174622, | |
| "rewards/rollout_reward_func/std": 0.7692865133285522, | |
| "sampling/importance_sampling_ratio/max": 0.08927696198225021, | |
| "sampling/importance_sampling_ratio/mean": 0.034128978848457336, | |
| "sampling/importance_sampling_ratio/min": 6.115115684224293e-05, | |
| "sampling/sampling_logp_difference/max": 2.444645404815674, | |
| "sampling/sampling_logp_difference/mean": 1.729607105255127, | |
| "step": 17, | |
| "step_time": 6.096930697000062 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 8.71469098329544, | |
| "epoch": 0.00018, | |
| "grad_norm": 0.0198439322412014, | |
| "kl": 0.00980698294006288, | |
| "learning_rate": 4.857142857142858e-06, | |
| "loss": -0.0001, | |
| "step": 18, | |
| "step_time": 2.8486052290027146 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 13.0, | |
| "completions/max_terminated_length": 13.0, | |
| "completions/mean_length": 2.34375, | |
| "completions/mean_terminated_length": 2.34375, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.700989127159119, | |
| "epoch": 0.00019, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.02092764899134636, | |
| "kl": 0.016879421891644597, | |
| "learning_rate": 5.142857142857142e-06, | |
| "loss": -0.0005, | |
| "num_tokens": 353593.0, | |
| "reward": -0.5766444802284241, | |
| "reward_std": 0.8734984397888184, | |
| "rewards/rollout_reward_func/mean": -0.5766444802284241, | |
| "rewards/rollout_reward_func/std": 0.8666929602622986, | |
| "sampling/importance_sampling_ratio/max": 0.10328938066959381, | |
| "sampling/importance_sampling_ratio/mean": 0.0412919819355011, | |
| "sampling/importance_sampling_ratio/min": 8.264829792770101e-11, | |
| "sampling/sampling_logp_difference/max": 3.909327507019043, | |
| "sampling/sampling_logp_difference/mean": 1.7047920227050781, | |
| "step": 19, | |
| "step_time": 5.767663798993453 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.03125, | |
| "clip_ratio/high_mean": 0.015625, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.015625, | |
| "entropy": 8.635276675224304, | |
| "epoch": 0.0002, | |
| "grad_norm": 0.02117123454809189, | |
| "kl": 0.022729096352122724, | |
| "learning_rate": 5.428571428571429e-06, | |
| "loss": -0.0005, | |
| "step": 20, | |
| "step_time": 2.8989755920047173 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 7.0, | |
| "completions/max_terminated_length": 7.0, | |
| "completions/mean_length": 2.15625, | |
| "completions/mean_terminated_length": 2.15625, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.578810691833496, | |
| "epoch": 0.00021, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.044351667165756226, | |
| "kl": 0.03346684481948614, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": -0.0024, | |
| "num_tokens": 388691.0, | |
| "reward": -0.6427146196365356, | |
| "reward_std": 0.8122553825378418, | |
| "rewards/rollout_reward_func/mean": -0.6427146196365356, | |
| "rewards/rollout_reward_func/std": 0.7960423827171326, | |
| "sampling/importance_sampling_ratio/max": 0.10920954495668411, | |
| "sampling/importance_sampling_ratio/mean": 0.04724588990211487, | |
| "sampling/importance_sampling_ratio/min": 2.8349152216833318e-06, | |
| "sampling/sampling_logp_difference/max": 3.772367477416992, | |
| "sampling/sampling_logp_difference/mean": 1.6777459383010864, | |
| "step": 21, | |
| "step_time": 5.727157995002926 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.03125, | |
| "clip_ratio/high_mean": 0.015625, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.015625, | |
| "entropy": 8.421014785766602, | |
| "epoch": 0.00022, | |
| "grad_norm": 0.044636089354753494, | |
| "kl": 0.047105960082262754, | |
| "learning_rate": 6e-06, | |
| "loss": -0.0026, | |
| "step": 22, | |
| "step_time": 4.04690631700214 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.332530975341797, | |
| "epoch": 0.00023, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.0429239459335804, | |
| "kl": 0.07239408232271671, | |
| "learning_rate": 6.285714285714286e-06, | |
| "loss": -0.0028, | |
| "num_tokens": 424016.0, | |
| "reward": -0.6825613975524902, | |
| "reward_std": 0.8769230246543884, | |
| "rewards/rollout_reward_func/mean": -0.6825613975524902, | |
| "rewards/rollout_reward_func/std": 0.852479875087738, | |
| "sampling/importance_sampling_ratio/max": 0.14365191757678986, | |
| "sampling/importance_sampling_ratio/mean": 0.05794315040111542, | |
| "sampling/importance_sampling_ratio/min": 0.008735693991184235, | |
| "sampling/sampling_logp_difference/max": 2.5880439281463623, | |
| "sampling/sampling_logp_difference/mean": 1.6414165496826172, | |
| "step": 23, | |
| "step_time": 5.61804673000006 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.21875, | |
| "clip_ratio/high_mean": 0.109375, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.109375, | |
| "entropy": 8.144118428230286, | |
| "epoch": 0.00024, | |
| "grad_norm": 0.0180932879447937, | |
| "kl": 0.0962864700704813, | |
| "learning_rate": 6.571428571428572e-06, | |
| "loss": -0.0031, | |
| "step": 24, | |
| "step_time": 2.8988405260024592 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 5.0, | |
| "completions/max_terminated_length": 5.0, | |
| "completions/mean_length": 2.1875, | |
| "completions/mean_terminated_length": 2.1875, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 8.125683069229126, | |
| "epoch": 0.00025, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.03623996675014496, | |
| "kl": 0.10392077919095755, | |
| "learning_rate": 6.857142857142858e-06, | |
| "loss": -0.0041, | |
| "num_tokens": 459589.0, | |
| "reward": -0.61258465051651, | |
| "reward_std": 0.871542751789093, | |
| "rewards/rollout_reward_func/mean": -0.61258465051651, | |
| "rewards/rollout_reward_func/std": 0.8524011969566345, | |
| "sampling/importance_sampling_ratio/max": 0.16312259435653687, | |
| "sampling/importance_sampling_ratio/mean": 0.06305442750453949, | |
| "sampling/importance_sampling_ratio/min": 1.7614916032471228e-06, | |
| "sampling/sampling_logp_difference/max": 4.772340774536133, | |
| "sampling/sampling_logp_difference/mean": 1.7246109247207642, | |
| "step": 25, | |
| "step_time": 5.539267299005587 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 7.976094305515289, | |
| "epoch": 0.00026, | |
| "grad_norm": 0.030301710590720177, | |
| "kl": 0.13206800539046526, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": -0.0045, | |
| "step": 26, | |
| "step_time": 2.896310984997399 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 16.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.4375, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 7.269331395626068, | |
| "epoch": 0.00027, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.022369084879755974, | |
| "kl": 0.16119840927422047, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": -0.0036, | |
| "num_tokens": 496650.0, | |
| "reward": -0.7243883013725281, | |
| "reward_std": 0.7688334584236145, | |
| "rewards/rollout_reward_func/mean": -0.7243883013725281, | |
| "rewards/rollout_reward_func/std": 0.7527879476547241, | |
| "sampling/importance_sampling_ratio/max": 0.18785437941551208, | |
| "sampling/importance_sampling_ratio/mean": 0.10117587447166443, | |
| "sampling/importance_sampling_ratio/min": 8.512477528421769e-11, | |
| "sampling/sampling_logp_difference/max": 4.909823417663574, | |
| "sampling/sampling_logp_difference/mean": 1.4340462684631348, | |
| "step": 27, | |
| "step_time": 6.4576256859945715 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 7.120794892311096, | |
| "epoch": 0.00028, | |
| "grad_norm": 0.02468658983707428, | |
| "kl": 0.182576522231102, | |
| "learning_rate": 7.714285714285716e-06, | |
| "loss": -0.0038, | |
| "step": 28, | |
| "step_time": 3.5662226489985187 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 7.656641006469727, | |
| "epoch": 0.00029, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.016346270218491554, | |
| "kl": 0.1884312927722931, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": -0.0025, | |
| "num_tokens": 531329.0, | |
| "reward": -0.6714671850204468, | |
| "reward_std": 0.8514942526817322, | |
| "rewards/rollout_reward_func/mean": -0.6714671850204468, | |
| "rewards/rollout_reward_func/std": 0.8725821375846863, | |
| "sampling/importance_sampling_ratio/max": 0.2034609168767929, | |
| "sampling/importance_sampling_ratio/mean": 0.0898696631193161, | |
| "sampling/importance_sampling_ratio/min": 0.008383152075111866, | |
| "sampling/sampling_logp_difference/max": 2.7939882278442383, | |
| "sampling/sampling_logp_difference/mean": 1.5407953262329102, | |
| "step": 29, | |
| "step_time": 5.637962408003659 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0625, | |
| "clip_ratio/high_mean": 0.046875, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.046875, | |
| "entropy": 7.5971901416778564, | |
| "epoch": 0.0003, | |
| "grad_norm": 0.013556623831391335, | |
| "kl": 0.20893656089901924, | |
| "learning_rate": 8.285714285714287e-06, | |
| "loss": -0.0026, | |
| "step": 30, | |
| "step_time": 2.9192343930008064 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 9.0, | |
| "completions/max_terminated_length": 9.0, | |
| "completions/mean_length": 2.21875, | |
| "completions/mean_terminated_length": 2.21875, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 6.758796453475952, | |
| "epoch": 0.00031, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.019501112401485443, | |
| "kl": 0.3079346362501383, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": -0.0031, | |
| "num_tokens": 567805.0, | |
| "reward": -0.568469762802124, | |
| "reward_std": 0.8567708730697632, | |
| "rewards/rollout_reward_func/mean": -0.568469762802124, | |
| "rewards/rollout_reward_func/std": 0.8660122752189636, | |
| "sampling/importance_sampling_ratio/max": 0.22180257737636566, | |
| "sampling/importance_sampling_ratio/mean": 0.12508273124694824, | |
| "sampling/importance_sampling_ratio/min": 3.750224089604792e-11, | |
| "sampling/sampling_logp_difference/max": 5.136954307556152, | |
| "sampling/sampling_logp_difference/mean": 1.4436562061309814, | |
| "step": 31, | |
| "step_time": 5.43906901200171 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0625, | |
| "clip_ratio/high_mean": 0.03125, | |
| "clip_ratio/low_mean": 0.03125, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0625, | |
| "entropy": 6.6603924036026, | |
| "epoch": 0.00032, | |
| "grad_norm": 0.01703478768467903, | |
| "kl": 0.3732527755200863, | |
| "learning_rate": 8.857142857142858e-06, | |
| "loss": -0.0032, | |
| "step": 32, | |
| "step_time": 2.930219074998604 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.03125, | |
| "clip_ratio/high_mean": 0.015625, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.015625, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 6.725336015224457, | |
| "epoch": 0.00033, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.016738811507821083, | |
| "kl": 0.27567504718899727, | |
| "learning_rate": 9.142857142857144e-06, | |
| "loss": -0.0019, | |
| "num_tokens": 603268.0, | |
| "reward": -0.30585941672325134, | |
| "reward_std": 0.6699719429016113, | |
| "rewards/rollout_reward_func/mean": -0.30585941672325134, | |
| "rewards/rollout_reward_func/std": 0.6897762417793274, | |
| "sampling/importance_sampling_ratio/max": 0.2360040694475174, | |
| "sampling/importance_sampling_ratio/mean": 0.1374823898077011, | |
| "sampling/importance_sampling_ratio/min": 0.006810983642935753, | |
| "sampling/sampling_logp_difference/max": 3.1095614433288574, | |
| "sampling/sampling_logp_difference/mean": 1.2982618808746338, | |
| "step": 33, | |
| "step_time": 6.234361916005582 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.09375, | |
| "clip_ratio/high_mean": 0.046875, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.046875, | |
| "entropy": 6.653369903564453, | |
| "epoch": 0.00034, | |
| "grad_norm": 0.01500980369746685, | |
| "kl": 0.29385758377611637, | |
| "learning_rate": 9.42857142857143e-06, | |
| "loss": -0.0019, | |
| "step": 34, | |
| "step_time": 2.948668930999702 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.015625, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.015625, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 5.0, | |
| "completions/max_terminated_length": 5.0, | |
| "completions/mean_length": 2.09375, | |
| "completions/mean_terminated_length": 2.09375, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 6.599472224712372, | |
| "epoch": 0.00035, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.018205825239419937, | |
| "kl": 0.35661908239126205, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": -0.0049, | |
| "num_tokens": 639208.0, | |
| "reward": -0.5350777506828308, | |
| "reward_std": 0.7106601595878601, | |
| "rewards/rollout_reward_func/mean": -0.5350777506828308, | |
| "rewards/rollout_reward_func/std": 0.7991757392883301, | |
| "sampling/importance_sampling_ratio/max": 0.25684407353401184, | |
| "sampling/importance_sampling_ratio/mean": 0.1485980749130249, | |
| "sampling/importance_sampling_ratio/min": 3.692734389915131e-05, | |
| "sampling/sampling_logp_difference/max": 4.381838321685791, | |
| "sampling/sampling_logp_difference/mean": 1.253082275390625, | |
| "step": 35, | |
| "step_time": 5.500268681997113 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 6.5274258852005005, | |
| "epoch": 0.00036, | |
| "grad_norm": 0.026637688279151917, | |
| "kl": 0.36279567517340183, | |
| "learning_rate": 1e-05, | |
| "loss": -0.0049, | |
| "step": 36, | |
| "step_time": 2.9163373929950467 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 6.327381074428558, | |
| "epoch": 0.00037, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.03791587054729462, | |
| "kl": 0.4374086819589138, | |
| "learning_rate": 9.999999999962232e-06, | |
| "loss": -0.0034, | |
| "num_tokens": 675224.0, | |
| "reward": -0.42839378118515015, | |
| "reward_std": 0.7165933847427368, | |
| "rewards/rollout_reward_func/mean": -0.42839378118515015, | |
| "rewards/rollout_reward_func/std": 0.6934623122215271, | |
| "sampling/importance_sampling_ratio/max": 0.2750149071216583, | |
| "sampling/importance_sampling_ratio/mean": 0.16812871396541595, | |
| "sampling/importance_sampling_ratio/min": 0.005278678145259619, | |
| "sampling/sampling_logp_difference/max": 3.2639646530151367, | |
| "sampling/sampling_logp_difference/mean": 1.122904896736145, | |
| "step": 37, | |
| "step_time": 5.68354233199716 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.09375, | |
| "clip_ratio/high_mean": 0.046875, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.046875, | |
| "entropy": 6.15696656703949, | |
| "epoch": 0.00038, | |
| "grad_norm": 0.01739896647632122, | |
| "kl": 0.46510135009884834, | |
| "learning_rate": 9.999999999848919e-06, | |
| "loss": -0.0035, | |
| "step": 38, | |
| "step_time": 2.9220271470039734 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 16.0, | |
| "completions/max_terminated_length": 8.0, | |
| "completions/mean_length": 2.625, | |
| "completions/mean_terminated_length": 2.1935482025146484, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 6.091022729873657, | |
| "epoch": 0.00039, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.0197161715477705, | |
| "kl": 0.42657361552119255, | |
| "learning_rate": 9.99999999966007e-06, | |
| "loss": -0.0024, | |
| "num_tokens": 710988.0, | |
| "reward": -0.3023349940776825, | |
| "reward_std": 0.6465471386909485, | |
| "rewards/rollout_reward_func/mean": -0.3023349940776825, | |
| "rewards/rollout_reward_func/std": 0.6331813335418701, | |
| "sampling/importance_sampling_ratio/max": 0.2962448298931122, | |
| "sampling/importance_sampling_ratio/mean": 0.18444794416427612, | |
| "sampling/importance_sampling_ratio/min": 4.504835306867738e-12, | |
| "sampling/sampling_logp_difference/max": 4.963308334350586, | |
| "sampling/sampling_logp_difference/mean": 1.1856834888458252, | |
| "step": 39, | |
| "step_time": 7.029601453006762 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.1319444444961846, | |
| "clip_ratio/high_mean": 0.07847222150303423, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.07847222150303423, | |
| "entropy": 5.928341567516327, | |
| "epoch": 0.0004, | |
| "grad_norm": 0.028808562085032463, | |
| "kl": 0.44897962361574173, | |
| "learning_rate": 9.99999999939568e-06, | |
| "loss": -0.0025, | |
| "step": 40, | |
| "step_time": 2.9406937890053086 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 6.25755649805069, | |
| "epoch": 0.00041, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.0933663472533226, | |
| "kl": 0.48784746043384075, | |
| "learning_rate": 9.999999999055747e-06, | |
| "loss": 0.0029, | |
| "num_tokens": 745636.0, | |
| "reward": -0.19651329517364502, | |
| "reward_std": 0.5318358540534973, | |
| "rewards/rollout_reward_func/mean": -0.19651329517364502, | |
| "rewards/rollout_reward_func/std": 0.5945489406585693, | |
| "sampling/importance_sampling_ratio/max": 0.31440603733062744, | |
| "sampling/importance_sampling_ratio/mean": 0.18640094995498657, | |
| "sampling/importance_sampling_ratio/min": 0.011243580840528011, | |
| "sampling/sampling_logp_difference/max": 2.6481189727783203, | |
| "sampling/sampling_logp_difference/mean": 1.0152370929718018, | |
| "step": 41, | |
| "step_time": 5.63657486000011 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.1875, | |
| "clip_ratio/high_mean": 0.09375, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.09375, | |
| "entropy": 5.96447890996933, | |
| "epoch": 0.00042, | |
| "grad_norm": 0.02206423319876194, | |
| "kl": 0.5289704687893391, | |
| "learning_rate": 9.999999998640277e-06, | |
| "loss": 0.0027, | |
| "step": 42, | |
| "step_time": 2.8975234469944553 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 5.439069867134094, | |
| "epoch": 0.00043, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.11297624558210373, | |
| "kl": 0.4683471880853176, | |
| "learning_rate": 9.999999998149264e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 781581.0, | |
| "reward": -0.44622302055358887, | |
| "reward_std": 0.68892902135849, | |
| "rewards/rollout_reward_func/mean": -0.44622302055358887, | |
| "rewards/rollout_reward_func/std": 0.7478122711181641, | |
| "sampling/importance_sampling_ratio/max": 0.3280465304851532, | |
| "sampling/importance_sampling_ratio/mean": 0.22684511542320251, | |
| "sampling/importance_sampling_ratio/min": 0.026074819266796112, | |
| "sampling/sampling_logp_difference/max": 2.0682249069213867, | |
| "sampling/sampling_logp_difference/mean": 0.8481977581977844, | |
| "step": 43, | |
| "step_time": 5.7263973810077005 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.15625, | |
| "clip_ratio/high_mean": 0.09375, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.09375, | |
| "entropy": 5.03247994184494, | |
| "epoch": 0.00044, | |
| "grad_norm": 0.06387817859649658, | |
| "kl": 0.5371211282908916, | |
| "learning_rate": 9.999999997582713e-06, | |
| "loss": 0.0004, | |
| "step": 44, | |
| "step_time": 3.3802060630041524 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 5.0, | |
| "completions/max_terminated_length": 5.0, | |
| "completions/mean_length": 2.09375, | |
| "completions/mean_terminated_length": 2.09375, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 4.975203037261963, | |
| "epoch": 0.00045, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.13102945685386658, | |
| "kl": 0.6513971909880638, | |
| "learning_rate": 9.999999996940621e-06, | |
| "loss": -0.0028, | |
| "num_tokens": 817273.0, | |
| "reward": -0.587563157081604, | |
| "reward_std": 0.7007678747177124, | |
| "rewards/rollout_reward_func/mean": -0.587563157081604, | |
| "rewards/rollout_reward_func/std": 0.7760494947433472, | |
| "sampling/importance_sampling_ratio/max": 0.3387902081012726, | |
| "sampling/importance_sampling_ratio/mean": 0.2464321404695511, | |
| "sampling/importance_sampling_ratio/min": 7.80636619310826e-05, | |
| "sampling/sampling_logp_difference/max": 4.357866287231445, | |
| "sampling/sampling_logp_difference/mean": 0.8462474346160889, | |
| "step": 45, | |
| "step_time": 6.2393272499975865 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0625, | |
| "clip_ratio/high_mean": 0.03125, | |
| "clip_ratio/low_mean": 0.0625, | |
| "clip_ratio/low_min": 0.03125, | |
| "clip_ratio/region_mean": 0.09375, | |
| "entropy": 4.856449127197266, | |
| "epoch": 0.00046, | |
| "grad_norm": 0.0859452411532402, | |
| "kl": 0.6537227220833302, | |
| "learning_rate": 9.99999999622299e-06, | |
| "loss": -0.0031, | |
| "step": 46, | |
| "step_time": 2.901350881998951 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 4.492773771286011, | |
| "epoch": 0.00047, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.14956164360046387, | |
| "kl": 0.5590145848691463, | |
| "learning_rate": 9.999999995429816e-06, | |
| "loss": -0.0016, | |
| "num_tokens": 853016.0, | |
| "reward": -0.2771303355693817, | |
| "reward_std": 0.7537246942520142, | |
| "rewards/rollout_reward_func/mean": -0.2771303355693817, | |
| "rewards/rollout_reward_func/std": 0.7401061654090881, | |
| "sampling/importance_sampling_ratio/max": 0.34557926654815674, | |
| "sampling/importance_sampling_ratio/mean": 0.2762402594089508, | |
| "sampling/importance_sampling_ratio/min": 0.03973078727722168, | |
| "sampling/sampling_logp_difference/max": 1.9350109100341797, | |
| "sampling/sampling_logp_difference/mean": 0.7055625915527344, | |
| "step": 47, | |
| "step_time": 5.625663070004521 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.09375, | |
| "clip_ratio/high_mean": 0.046875, | |
| "clip_ratio/low_mean": 0.03125, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.078125, | |
| "entropy": 4.3877677619457245, | |
| "epoch": 0.00048, | |
| "grad_norm": 0.06713134795427322, | |
| "kl": 0.578897014260292, | |
| "learning_rate": 9.999999994561102e-06, | |
| "loss": -0.0019, | |
| "step": 48, | |
| "step_time": 2.886007981996954 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.013888888992369175, | |
| "clip_ratio/high_mean": 0.0069444444961845875, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0069444444961845875, | |
| "completions/clipped_ratio": 0.0625, | |
| "completions/max_length": 16.0, | |
| "completions/max_terminated_length": 5.0, | |
| "completions/mean_length": 2.96875, | |
| "completions/mean_terminated_length": 2.1000001430511475, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 4.886862337589264, | |
| "epoch": 0.00049, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.3026506304740906, | |
| "kl": 0.6998865567147732, | |
| "learning_rate": 9.99999999361685e-06, | |
| "loss": -0.0043, | |
| "num_tokens": 888637.0, | |
| "reward": -0.3646780252456665, | |
| "reward_std": 0.7392382025718689, | |
| "rewards/rollout_reward_func/mean": -0.3646780252456665, | |
| "rewards/rollout_reward_func/std": 0.7437232136726379, | |
| "sampling/importance_sampling_ratio/max": 0.5906126499176025, | |
| "sampling/importance_sampling_ratio/mean": 0.2536194622516632, | |
| "sampling/importance_sampling_ratio/min": 5.002554794020231e-12, | |
| "sampling/sampling_logp_difference/max": 5.405303001403809, | |
| "sampling/sampling_logp_difference/mean": 1.139528751373291, | |
| "step": 49, | |
| "step_time": 5.81962778799425 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.045138888992369175, | |
| "clip_ratio/high_mean": 0.03819444449618459, | |
| "clip_ratio/low_mean": 0.09375, | |
| "clip_ratio/low_min": 0.03125, | |
| "clip_ratio/region_mean": 0.13194444426335394, | |
| "entropy": 4.753438889980316, | |
| "epoch": 0.0005, | |
| "grad_norm": 0.22047115862369537, | |
| "kl": 0.8953660875558853, | |
| "learning_rate": 9.999999992597058e-06, | |
| "loss": -0.0044, | |
| "step": 50, | |
| "step_time": 3.4302545330028806 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.03125, | |
| "completions/max_length": 16.0, | |
| "completions/max_terminated_length": 11.0, | |
| "completions/mean_length": 2.71875, | |
| "completions/mean_terminated_length": 2.2903225421905518, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 4.553148508071899, | |
| "epoch": 0.00051, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.14803296327590942, | |
| "kl": 0.8594339191913605, | |
| "learning_rate": 9.999999991501723e-06, | |
| "loss": 0.0001, | |
| "num_tokens": 925783.0, | |
| "reward": -0.47938936948776245, | |
| "reward_std": 0.6224657893180847, | |
| "rewards/rollout_reward_func/mean": -0.47938936948776245, | |
| "rewards/rollout_reward_func/std": 0.6325410604476929, | |
| "sampling/importance_sampling_ratio/max": 0.8112522959709167, | |
| "sampling/importance_sampling_ratio/mean": 0.29810550808906555, | |
| "sampling/importance_sampling_ratio/min": 3.594766628464696e-13, | |
| "sampling/sampling_logp_difference/max": 5.109455108642578, | |
| "sampling/sampling_logp_difference/mean": 1.0870777368545532, | |
| "step": 51, | |
| "step_time": 6.738885986007517 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.03125, | |
| "clip_ratio/high_mean": 0.02524038404226303, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.02524038404226303, | |
| "entropy": 4.535911321640015, | |
| "epoch": 0.00052, | |
| "grad_norm": 0.04543463885784149, | |
| "kl": 0.7985228635370731, | |
| "learning_rate": 9.99999999033085e-06, | |
| "loss": -0.0004, | |
| "step": 52, | |
| "step_time": 3.152213782999752 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2.0, | |
| "completions/max_terminated_length": 2.0, | |
| "completions/mean_length": 2.0, | |
| "completions/mean_terminated_length": 2.0, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 4.083370506763458, | |
| "epoch": 0.00053, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.04525892809033394, | |
| "kl": 0.874318428337574, | |
| "learning_rate": 9.999999989084436e-06, | |
| "loss": -0.0025, | |
| "num_tokens": 961105.0, | |
| "reward": -0.21413108706474304, | |
| "reward_std": 0.5813945531845093, | |
| "rewards/rollout_reward_func/mean": -0.21413108706474304, | |
| "rewards/rollout_reward_func/std": 0.5861169099807739, | |
| "sampling/importance_sampling_ratio/max": 0.7165222764015198, | |
| "sampling/importance_sampling_ratio/mean": 0.3133776783943176, | |
| "sampling/importance_sampling_ratio/min": 0.013980884104967117, | |
| "sampling/sampling_logp_difference/max": 3.070335626602173, | |
| "sampling/sampling_logp_difference/mean": 0.7299262285232544, | |
| "step": 53, | |
| "step_time": 6.067928731994471 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.03125, | |
| "clip_ratio/high_mean": 0.015625, | |
| "clip_ratio/low_mean": 0.015625, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.03125, | |
| "entropy": 4.059947609901428, | |
| "epoch": 0.00054, | |
| "grad_norm": 0.07141973823308945, | |
| "kl": 0.9976279065012932, | |
| "learning_rate": 9.99999998776248e-06, | |
| "loss": -0.0025, | |
| "step": 54, | |
| "step_time": 3.118995607001125 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.03125, | |
| "clip_ratio/high_mean": 0.015625, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.015625, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 5.0, | |
| "completions/max_terminated_length": 5.0, | |
| "completions/mean_length": 2.09375, | |
| "completions/mean_terminated_length": 2.09375, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 4.261334180831909, | |
| "epoch": 0.00055, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.06153355911374092, | |
| "kl": 0.8010737895965576, | |
| "learning_rate": 9.999999986364988e-06, | |
| "loss": 0.0033, | |
| "num_tokens": 996705.0, | |
| "reward": -0.33633241057395935, | |
| "reward_std": 0.4821242392063141, | |
| "rewards/rollout_reward_func/mean": -0.33633241057395935, | |
| "rewards/rollout_reward_func/std": 0.5220240354537964, | |
| "sampling/importance_sampling_ratio/max": 0.9735277891159058, | |
| "sampling/importance_sampling_ratio/mean": 0.2968878149986267, | |
| "sampling/importance_sampling_ratio/min": 0.0002361015067435801, | |
| "sampling/sampling_logp_difference/max": 4.416370868682861, | |
| "sampling/sampling_logp_difference/mean": 0.8390293717384338, | |
| "step": 55, | |
| "step_time": 5.6938710109971 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 700000, | |
| "num_input_tokens_seen": 996705, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |