Instructions to use Jordansky/smoke-ld-test with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Jordansky/smoke-ld-test with PEFT:
Base model is not found.
- Transformers
How to use Jordansky/smoke-ld-test with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Jordansky/smoke-ld-test") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Jordansky/smoke-ld-test", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use Jordansky/smoke-ld-test with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Jordansky/smoke-ld-test" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Jordansky/smoke-ld-test", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/Jordansky/smoke-ld-test
- SGLang
How to use Jordansky/smoke-ld-test with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Jordansky/smoke-ld-test" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Jordansky/smoke-ld-test", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Jordansky/smoke-ld-test" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Jordansky/smoke-ld-test", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use Jordansky/smoke-ld-test with Docker Model Runner:
docker model run hf.co/Jordansky/smoke-ld-test
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.00023, | |
| "eval_steps": 500, | |
| "global_step": 23, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1045.0, | |
| "completions/max_terminated_length": 1045.0, | |
| "completions/mean_length": 390.1875, | |
| "completions/mean_terminated_length": 390.1875, | |
| "completions/min_length": 3.0, | |
| "completions/min_terminated_length": 3.0, | |
| "entropy": 0.605668731033802, | |
| "epoch": 1e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.8689773678779602, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": -0.009, | |
| "num_tokens": 49602.0, | |
| "reward": 0.49261724948883057, | |
| "reward_std": 1.4282547235488892, | |
| "rewards/rollout_reward_func/mean": 0.49261724948883057, | |
| "rewards/rollout_reward_func/std": 1.4220702648162842, | |
| "sampling/importance_sampling_ratio/max": 1.4268709421157837, | |
| "sampling/importance_sampling_ratio/mean": 0.8554609417915344, | |
| "sampling/importance_sampling_ratio/min": 0.6006377339363098, | |
| "sampling/sampling_logp_difference/max": 0.6218547821044922, | |
| "sampling/sampling_logp_difference/mean": 0.05964243412017822, | |
| "step": 1, | |
| "step_time": 13.768371919000856 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "entropy": 0.605668731033802, | |
| "epoch": 2e-05, | |
| "grad_norm": 0.8696622252464294, | |
| "kl": 0.0, | |
| "learning_rate": 5.333333333333333e-07, | |
| "loss": -0.009, | |
| "step": 2, | |
| "step_time": 6.979965120997804 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1373.0, | |
| "completions/max_terminated_length": 1373.0, | |
| "completions/mean_length": 256.3125, | |
| "completions/mean_terminated_length": 256.3125, | |
| "completions/min_length": 3.0, | |
| "completions/min_terminated_length": 3.0, | |
| "entropy": 0.5112766288220882, | |
| "epoch": 3e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.42978498339653015, | |
| "kl": 0.004170067805148392, | |
| "learning_rate": 1.0666666666666667e-06, | |
| "loss": -0.033, | |
| "num_tokens": 91865.0, | |
| "reward": 0.04000457376241684, | |
| "reward_std": 0.8832277655601501, | |
| "rewards/rollout_reward_func/mean": 0.04000457376241684, | |
| "rewards/rollout_reward_func/std": 1.1484198570251465, | |
| "sampling/importance_sampling_ratio/max": 1.1835894584655762, | |
| "sampling/importance_sampling_ratio/mean": 0.8482605218887329, | |
| "sampling/importance_sampling_ratio/min": 0.33848339319229126, | |
| "sampling/sampling_logp_difference/max": 1.006063461303711, | |
| "sampling/sampling_logp_difference/mean": 0.05736350640654564, | |
| "step": 3, | |
| "step_time": 13.413748369999666 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.02083333395421505, | |
| "clip_ratio/low_min": 0.02083333395421505, | |
| "clip_ratio/region_mean": 0.02083333395421505, | |
| "entropy": 0.5041642189025879, | |
| "epoch": 4e-05, | |
| "grad_norm": 0.42657995223999023, | |
| "kl": 0.005512098512326702, | |
| "learning_rate": 1.6e-06, | |
| "loss": -0.0326, | |
| "step": 4, | |
| "step_time": 7.850364476997129 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1745.0, | |
| "completions/max_terminated_length": 1745.0, | |
| "completions/mean_length": 340.96875, | |
| "completions/mean_terminated_length": 340.96875, | |
| "completions/min_length": 3.0, | |
| "completions/min_terminated_length": 3.0, | |
| "entropy": 0.59855717420578, | |
| "epoch": 5e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 1.033186435699463, | |
| "kl": 0.002424745060807254, | |
| "learning_rate": 2.1333333333333334e-06, | |
| "loss": 0.007, | |
| "num_tokens": 138699.0, | |
| "reward": 0.3879123330116272, | |
| "reward_std": 1.4281163215637207, | |
| "rewards/rollout_reward_func/mean": 0.3879123330116272, | |
| "rewards/rollout_reward_func/std": 1.46486234664917, | |
| "sampling/importance_sampling_ratio/max": 1.9882254600524902, | |
| "sampling/importance_sampling_ratio/mean": 0.8772280812263489, | |
| "sampling/importance_sampling_ratio/min": 2.3855912800740953e-09, | |
| "sampling/sampling_logp_difference/max": 18.587005615234375, | |
| "sampling/sampling_logp_difference/mean": 0.14903730154037476, | |
| "step": 5, | |
| "step_time": 15.827661174997047 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.012500000186264515, | |
| "clip_ratio/high_mean": 0.0062500000931322575, | |
| "clip_ratio/low_mean": 0.03258547093719244, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0388354710303247, | |
| "entropy": 0.5908495783805847, | |
| "epoch": 6e-05, | |
| "grad_norm": 0.27227118611335754, | |
| "kl": 0.009793178239533518, | |
| "learning_rate": 2.6666666666666664e-06, | |
| "loss": 0.0062, | |
| "step": 6, | |
| "step_time": 8.810438610000347 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1027.0, | |
| "completions/max_terminated_length": 1027.0, | |
| "completions/mean_length": 231.6875, | |
| "completions/mean_terminated_length": 231.6875, | |
| "completions/min_length": 3.0, | |
| "completions/min_terminated_length": 3.0, | |
| "entropy": 0.5898670703172684, | |
| "epoch": 7e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 1.3972772359848022, | |
| "kl": 0.002214236554209492, | |
| "learning_rate": 3.2e-06, | |
| "loss": -0.0126, | |
| "num_tokens": 182583.0, | |
| "reward": 0.27162131667137146, | |
| "reward_std": 1.0868947505950928, | |
| "rewards/rollout_reward_func/mean": 0.27162131667137146, | |
| "rewards/rollout_reward_func/std": 1.5912116765975952, | |
| "sampling/importance_sampling_ratio/max": 1.6212953329086304, | |
| "sampling/importance_sampling_ratio/mean": 0.8596766591072083, | |
| "sampling/importance_sampling_ratio/min": 0.3463258147239685, | |
| "sampling/sampling_logp_difference/max": 0.8983482718467712, | |
| "sampling/sampling_logp_difference/mean": 0.06186839938163757, | |
| "step": 7, | |
| "step_time": 12.116631305001647 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.01458333432674408, | |
| "clip_ratio/low_min": 0.008333333767950535, | |
| "clip_ratio/region_mean": 0.01458333432674408, | |
| "entropy": 0.5949340760707855, | |
| "epoch": 8e-05, | |
| "grad_norm": 0.7176365852355957, | |
| "kl": 0.007167384720332848, | |
| "learning_rate": 3.7333333333333333e-06, | |
| "loss": -0.0125, | |
| "step": 8, | |
| "step_time": 7.219443969997883 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 538.0, | |
| "completions/max_terminated_length": 538.0, | |
| "completions/mean_length": 139.40625, | |
| "completions/mean_terminated_length": 139.40625, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 0.560302022844553, | |
| "epoch": 9e-05, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7369521260261536, | |
| "kl": 0.004876748149399646, | |
| "learning_rate": 4.266666666666667e-06, | |
| "loss": -0.0042, | |
| "num_tokens": 222153.0, | |
| "reward": -0.008272513747215271, | |
| "reward_std": 0.5417632460594177, | |
| "rewards/rollout_reward_func/mean": -0.008272513747215271, | |
| "rewards/rollout_reward_func/std": 0.9568253755569458, | |
| "sampling/importance_sampling_ratio/max": 1.1025569438934326, | |
| "sampling/importance_sampling_ratio/mean": 0.843124270439148, | |
| "sampling/importance_sampling_ratio/min": 0.3177212178707123, | |
| "sampling/sampling_logp_difference/max": 0.4518265426158905, | |
| "sampling/sampling_logp_difference/mean": 0.06099293380975723, | |
| "step": 9, | |
| "step_time": 9.450052213000163 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.015625, | |
| "clip_ratio/high_mean": 0.0078125, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0078125, | |
| "entropy": 0.5663095861673355, | |
| "epoch": 0.0001, | |
| "grad_norm": 0.7801238298416138, | |
| "kl": 0.006132202317530755, | |
| "learning_rate": 4.8e-06, | |
| "loss": -0.0044, | |
| "step": 10, | |
| "step_time": 5.177882021996993 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1454.0, | |
| "completions/max_terminated_length": 1454.0, | |
| "completions/mean_length": 456.15625, | |
| "completions/mean_terminated_length": 456.15625, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 0.7295192927122116, | |
| "epoch": 0.00011, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 0.7713008522987366, | |
| "kl": 0.002301187181842579, | |
| "learning_rate": 5.333333333333333e-06, | |
| "loss": -0.0233, | |
| "num_tokens": 276525.0, | |
| "reward": -0.06042708456516266, | |
| "reward_std": 0.5548678636550903, | |
| "rewards/rollout_reward_func/mean": -0.06042708456516266, | |
| "rewards/rollout_reward_func/std": 0.6719298362731934, | |
| "sampling/importance_sampling_ratio/max": 1.2587913274765015, | |
| "sampling/importance_sampling_ratio/mean": 0.7997827529907227, | |
| "sampling/importance_sampling_ratio/min": 0.49780380725860596, | |
| "sampling/sampling_logp_difference/max": 0.3962627649307251, | |
| "sampling/sampling_logp_difference/mean": 0.06726472079753876, | |
| "step": 11, | |
| "step_time": 14.024354443001357 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.005681818351149559, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.005681818351149559, | |
| "entropy": 0.724354475736618, | |
| "epoch": 0.00012, | |
| "grad_norm": 0.5204576849937439, | |
| "kl": 0.004727993551568943, | |
| "learning_rate": 5.866666666666666e-06, | |
| "loss": -0.0237, | |
| "step": 12, | |
| "step_time": 8.684970894000799 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1063.0, | |
| "completions/max_terminated_length": 1063.0, | |
| "completions/mean_length": 458.78125, | |
| "completions/mean_terminated_length": 458.78125, | |
| "completions/min_length": 3.0, | |
| "completions/min_terminated_length": 3.0, | |
| "entropy": 0.6593378074467182, | |
| "epoch": 0.00013, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 1.2321019172668457, | |
| "kl": 0.026883443380938843, | |
| "learning_rate": 6.4e-06, | |
| "loss": -0.0337, | |
| "num_tokens": 329014.0, | |
| "reward": 0.5792493224143982, | |
| "reward_std": 1.281329870223999, | |
| "rewards/rollout_reward_func/mean": 0.5792493224143982, | |
| "rewards/rollout_reward_func/std": 1.3439542055130005, | |
| "sampling/importance_sampling_ratio/max": 2.4977619647979736, | |
| "sampling/importance_sampling_ratio/mean": 0.8369683623313904, | |
| "sampling/importance_sampling_ratio/min": 0.16656683385372162, | |
| "sampling/sampling_logp_difference/max": 1.4865641593933105, | |
| "sampling/sampling_logp_difference/mean": 0.08205842226743698, | |
| "step": 13, | |
| "step_time": 12.781198183001834 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.01726190559566021, | |
| "clip_ratio/high_mean": 0.008630952797830105, | |
| "clip_ratio/low_mean": 0.02495265193283558, | |
| "clip_ratio/low_min": 0.010416666977107525, | |
| "clip_ratio/region_mean": 0.033583604730665684, | |
| "entropy": 0.6469080410897732, | |
| "epoch": 0.00014, | |
| "grad_norm": 1.151137351989746, | |
| "kl": 0.02960980085481424, | |
| "learning_rate": 6.933333333333334e-06, | |
| "loss": -0.0327, | |
| "step": 14, | |
| "step_time": 6.906088376998014 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1202.0, | |
| "completions/max_terminated_length": 1202.0, | |
| "completions/mean_length": 325.0625, | |
| "completions/mean_terminated_length": 325.0625, | |
| "completions/min_length": 3.0, | |
| "completions/min_terminated_length": 3.0, | |
| "entropy": 0.6098557002842426, | |
| "epoch": 0.00015, | |
| "frac_reward_zero_std": 0.25, | |
| "grad_norm": 0.4717327952384949, | |
| "kl": 0.024435755418380722, | |
| "learning_rate": 7.466666666666667e-06, | |
| "loss": -0.0108, | |
| "num_tokens": 374591.0, | |
| "reward": 0.7930901050567627, | |
| "reward_std": 0.8609839081764221, | |
| "rewards/rollout_reward_func/mean": 0.7930901050567627, | |
| "rewards/rollout_reward_func/std": 1.4151973724365234, | |
| "sampling/importance_sampling_ratio/max": 1.0506209135055542, | |
| "sampling/importance_sampling_ratio/mean": 0.8180942535400391, | |
| "sampling/importance_sampling_ratio/min": 0.21363259851932526, | |
| "sampling/sampling_logp_difference/max": 0.7379248142242432, | |
| "sampling/sampling_logp_difference/mean": 0.058229509741067886, | |
| "step": 15, | |
| "step_time": 12.683557893002217 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.009615384973585606, | |
| "clip_ratio/high_mean": 0.004807692486792803, | |
| "clip_ratio/low_mean": 0.022727273404598236, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.02753496589139104, | |
| "entropy": 0.5996548756957054, | |
| "epoch": 0.00016, | |
| "grad_norm": 0.4265834391117096, | |
| "kl": 0.04038397324620746, | |
| "learning_rate": 8e-06, | |
| "loss": -0.0112, | |
| "step": 16, | |
| "step_time": 6.956079359999421 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 544.0, | |
| "completions/max_terminated_length": 544.0, | |
| "completions/mean_length": 171.625, | |
| "completions/mean_terminated_length": 171.625, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 0.644575547426939, | |
| "epoch": 0.00017, | |
| "frac_reward_zero_std": 0.25, | |
| "grad_norm": 0.8146066665649414, | |
| "kl": 0.046186436113202944, | |
| "learning_rate": 7.999999999907465e-06, | |
| "loss": -0.007, | |
| "num_tokens": 414601.0, | |
| "reward": 1.4229528903961182, | |
| "reward_std": 0.9664409160614014, | |
| "rewards/rollout_reward_func/mean": 1.4229528903961182, | |
| "rewards/rollout_reward_func/std": 1.3361284732818604, | |
| "sampling/importance_sampling_ratio/max": 1.2239395380020142, | |
| "sampling/importance_sampling_ratio/mean": 0.8816792964935303, | |
| "sampling/importance_sampling_ratio/min": 0.2790951430797577, | |
| "sampling/sampling_logp_difference/max": 0.7080600261688232, | |
| "sampling/sampling_logp_difference/mean": 0.05067237466573715, | |
| "step": 17, | |
| "step_time": 10.178569630998027 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.029513888992369175, | |
| "clip_ratio/high_mean": 0.014756944496184587, | |
| "clip_ratio/low_mean": 0.041666666977107525, | |
| "clip_ratio/low_min": 0.02083333395421505, | |
| "clip_ratio/region_mean": 0.05642361147329211, | |
| "entropy": 0.6145340763032436, | |
| "epoch": 0.00018, | |
| "grad_norm": 0.23155571520328522, | |
| "kl": 0.23800076835323125, | |
| "learning_rate": 7.999999999629861e-06, | |
| "loss": -0.0083, | |
| "step": 18, | |
| "step_time": 5.286352907998662 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1043.0, | |
| "completions/max_terminated_length": 1043.0, | |
| "completions/mean_length": 226.78125, | |
| "completions/mean_terminated_length": 226.78125, | |
| "completions/min_length": 3.0, | |
| "completions/min_terminated_length": 3.0, | |
| "entropy": 0.475945807993412, | |
| "epoch": 0.00019, | |
| "frac_reward_zero_std": 0.25, | |
| "grad_norm": 0.6718065142631531, | |
| "kl": 0.1313640770076745, | |
| "learning_rate": 7.99999999916719e-06, | |
| "loss": -0.0172, | |
| "num_tokens": 456186.0, | |
| "reward": 0.717463493347168, | |
| "reward_std": 1.0493590831756592, | |
| "rewards/rollout_reward_func/mean": 0.717463493347168, | |
| "rewards/rollout_reward_func/std": 1.385160207748413, | |
| "sampling/importance_sampling_ratio/max": 1.1975980997085571, | |
| "sampling/importance_sampling_ratio/mean": 0.9053879380226135, | |
| "sampling/importance_sampling_ratio/min": 0.5968481302261353, | |
| "sampling/sampling_logp_difference/max": 0.49695074558258057, | |
| "sampling/sampling_logp_difference/mean": 0.04921717196702957, | |
| "step": 19, | |
| "step_time": 11.41534333400341 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.034722222946584225, | |
| "clip_ratio/high_mean": 0.017361111473292112, | |
| "clip_ratio/low_mean": 0.027777778450399637, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.04513888992369175, | |
| "entropy": 0.45112011581659317, | |
| "epoch": 0.0002, | |
| "grad_norm": 0.26766785979270935, | |
| "kl": 0.20291895651462255, | |
| "learning_rate": 7.999999998519449e-06, | |
| "loss": -0.0184, | |
| "step": 20, | |
| "step_time": 6.5778307339987805 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1094.0, | |
| "completions/max_terminated_length": 1094.0, | |
| "completions/mean_length": 417.625, | |
| "completions/mean_terminated_length": 417.625, | |
| "completions/min_length": 2.0, | |
| "completions/min_terminated_length": 2.0, | |
| "entropy": 0.750504732131958, | |
| "epoch": 0.00021, | |
| "frac_reward_zero_std": 0.0, | |
| "grad_norm": 1.421618103981018, | |
| "kl": 0.1448975705425255, | |
| "learning_rate": 7.999999997686637e-06, | |
| "loss": -0.053, | |
| "num_tokens": 508448.0, | |
| "reward": 0.7684807181358337, | |
| "reward_std": 1.7323917150497437, | |
| "rewards/rollout_reward_func/mean": 0.7684807181358337, | |
| "rewards/rollout_reward_func/std": 1.6796200275421143, | |
| "sampling/importance_sampling_ratio/max": 1.4029730558395386, | |
| "sampling/importance_sampling_ratio/mean": 0.7653356194496155, | |
| "sampling/importance_sampling_ratio/min": 0.23433545231819153, | |
| "sampling/sampling_logp_difference/max": 0.8456215858459473, | |
| "sampling/sampling_logp_difference/mean": 0.08127377182245255, | |
| "step": 21, | |
| "step_time": 12.92976628100223 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.012500000186264515, | |
| "clip_ratio/high_mean": 0.0062500000931322575, | |
| "clip_ratio/low_mean": 0.057641143910586834, | |
| "clip_ratio/low_min": 0.025252525694668293, | |
| "clip_ratio/region_mean": 0.06389114400371909, | |
| "entropy": 0.7457476258277893, | |
| "epoch": 0.00022, | |
| "grad_norm": 2.050600528717041, | |
| "kl": 1.61455141013721, | |
| "learning_rate": 7.999999996668758e-06, | |
| "loss": -0.0522, | |
| "step": 22, | |
| "step_time": 6.766588177999438 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1030.0, | |
| "completions/max_terminated_length": 1030.0, | |
| "completions/mean_length": 290.15625, | |
| "completions/mean_terminated_length": 290.15625, | |
| "completions/min_length": 3.0, | |
| "completions/min_terminated_length": 3.0, | |
| "entropy": 0.4916882663965225, | |
| "epoch": 0.00023, | |
| "frac_reward_zero_std": 0.25, | |
| "grad_norm": 1.3506258726119995, | |
| "kl": 1.069765329360962, | |
| "learning_rate": 7.99999999546581e-06, | |
| "loss": -0.0083, | |
| "num_tokens": 555104.0, | |
| "reward": 1.4062399864196777, | |
| "reward_std": 1.1519603729248047, | |
| "rewards/rollout_reward_func/mean": 1.4062399864196777, | |
| "rewards/rollout_reward_func/std": 1.410425066947937, | |
| "sampling/importance_sampling_ratio/max": 1.3898682594299316, | |
| "sampling/importance_sampling_ratio/mean": 0.8012120723724365, | |
| "sampling/importance_sampling_ratio/min": 0.0, | |
| "sampling/sampling_logp_difference/max": 1.8556509017944336, | |
| "sampling/sampling_logp_difference/mean": 0.09328283369541168, | |
| "step": 23, | |
| "step_time": 11.852552100002868 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 400000, | |
| "num_input_tokens_seen": 555104, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |