Instructions to use Gege24/environment_test_affine-7B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Gege24/environment_test_affine-7B with PEFT:
Base model is not found.
- Transformers
How to use Gege24/environment_test_affine-7B with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Gege24/environment_test_affine-7B") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Gege24/environment_test_affine-7B", dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use Gege24/environment_test_affine-7B with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Gege24/environment_test_affine-7B" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Gege24/environment_test_affine-7B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/Gege24/environment_test_affine-7B
- SGLang
How to use Gege24/environment_test_affine-7B with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Gege24/environment_test_affine-7B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Gege24/environment_test_affine-7B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Gege24/environment_test_affine-7B" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Gege24/environment_test_affine-7B", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use Gege24/environment_test_affine-7B with Docker Model Runner:
docker model run hf.co/Gege24/environment_test_affine-7B
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.012, | |
| "eval_steps": 500, | |
| "global_step": 75, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.033851010352373125, | |
| "clip_ratio/high_mean": 0.011871843505650759, | |
| "clip_ratio/low_mean": 0.024242424033582212, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.03611426735296845, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.6, | |
| "completions/max_terminated_length": 374.6, | |
| "completions/mean_length": 297.675, | |
| "completions/mean_terminated_length": 297.675, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.33769991919398307, | |
| "epoch": 0.0008, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 0.030647173523902893, | |
| "kl": 0.022074293252080678, | |
| "learning_rate": 8.529119999999999e-07, | |
| "loss": -0.0006066907197237014, | |
| "num_tokens": 136458.0, | |
| "reward": 0.9300000309944153, | |
| "reward_std": 0.23334523439407348, | |
| "rewards/env_goofspiel_reward/mean": 0.9300000309944153, | |
| "rewards/env_goofspiel_reward/std": 0.3451612591743469, | |
| "sampling/importance_sampling_ratio/max": 1.5456702947616576, | |
| "sampling/importance_sampling_ratio/mean": 0.32863556742668154, | |
| "sampling/importance_sampling_ratio/min": 0.00010910680049249776, | |
| "sampling/sampling_logp_difference/max": 7.469822406768799, | |
| "sampling/sampling_logp_difference/mean": 0.680775272846222, | |
| "step": 5, | |
| "step_time": 4.723534681799992 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.052361111342906955, | |
| "clip_ratio/high_mean": 0.014340277761220932, | |
| "clip_ratio/low_mean": 0.015763888787478208, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.030104166455566884, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.2, | |
| "completions/max_terminated_length": 374.2, | |
| "completions/mean_length": 291.9125, | |
| "completions/mean_terminated_length": 291.9125, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.3392209455370903, | |
| "epoch": 0.0016, | |
| "frac_reward_zero_std": 0.4375, | |
| "grad_norm": 0.02777782641351223, | |
| "kl": 0.03302585552446544, | |
| "learning_rate": 1.919052e-06, | |
| "loss": -0.0005224664695560932, | |
| "num_tokens": 270583.0, | |
| "reward": 0.8775000214576721, | |
| "reward_std": 0.26516505479812624, | |
| "rewards/env_goofspiel_reward/mean": 0.8775000214576721, | |
| "rewards/env_goofspiel_reward/std": 0.3663728296756744, | |
| "sampling/importance_sampling_ratio/max": 1.54481360912323, | |
| "sampling/importance_sampling_ratio/mean": 0.3381913095712662, | |
| "sampling/importance_sampling_ratio/min": 2.4473399389535188e-05, | |
| "sampling/sampling_logp_difference/max": 8.968151187896728, | |
| "sampling/sampling_logp_difference/mean": 0.7433344721794128, | |
| "step": 10, | |
| "step_time": 4.138045286000079 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.05089646503329277, | |
| "clip_ratio/high_mean": 0.018314393889158963, | |
| "clip_ratio/low_mean": 0.026897096075117588, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.04521148977801204, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.4, | |
| "completions/max_terminated_length": 374.4, | |
| "completions/mean_length": 284.84375, | |
| "completions/mean_terminated_length": 284.84375, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.34051789045333863, | |
| "epoch": 0.0024, | |
| "frac_reward_zero_std": 0.5875, | |
| "grad_norm": 0.013990325853228569, | |
| "kl": 0.040778578049503265, | |
| "learning_rate": 2.985192e-06, | |
| "loss": -0.0005226288456469774, | |
| "num_tokens": 403304.0, | |
| "reward": 0.9674375176429748, | |
| "reward_std": 0.19100722074508666, | |
| "rewards/env_goofspiel_reward/mean": 0.9674375176429748, | |
| "rewards/env_goofspiel_reward/std": 0.3267829120159149, | |
| "sampling/importance_sampling_ratio/max": 1.7882837533950806, | |
| "sampling/importance_sampling_ratio/mean": 0.3831939160823822, | |
| "sampling/importance_sampling_ratio/min": 0.00034590021532494576, | |
| "sampling/sampling_logp_difference/max": 6.792400264739991, | |
| "sampling/sampling_logp_difference/mean": 0.6026189684867859, | |
| "step": 15, | |
| "step_time": 4.541797615800033 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.08027777820825577, | |
| "clip_ratio/high_mean": 0.02395833358168602, | |
| "clip_ratio/low_mean": 0.019027777854353188, | |
| "clip_ratio/low_min": 0.00625, | |
| "clip_ratio/region_mean": 0.04298611143603921, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.6, | |
| "completions/max_terminated_length": 373.6, | |
| "completions/mean_length": 281.60625, | |
| "completions/mean_terminated_length": 281.60625, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.28771451860666275, | |
| "epoch": 0.0032, | |
| "frac_reward_zero_std": 0.5375, | |
| "grad_norm": 0.029177065938711166, | |
| "kl": 0.0942368695512414, | |
| "learning_rate": 4.051332e-06, | |
| "loss": -0.00038087132852524517, | |
| "num_tokens": 536119.0, | |
| "reward": 0.9637500643730164, | |
| "reward_std": 0.2068287342786789, | |
| "rewards/env_goofspiel_reward/mean": 0.9637500643730164, | |
| "rewards/env_goofspiel_reward/std": 0.3382142722606659, | |
| "sampling/importance_sampling_ratio/max": 1.751455307006836, | |
| "sampling/importance_sampling_ratio/mean": 0.45835237503051757, | |
| "sampling/importance_sampling_ratio/min": 0.00027077984723291595, | |
| "sampling/sampling_logp_difference/max": 8.25740842819214, | |
| "sampling/sampling_logp_difference/mean": 0.5326088547706604, | |
| "step": 20, | |
| "step_time": 4.071906338800045 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.05111111141741276, | |
| "clip_ratio/high_mean": 0.013914141431450843, | |
| "clip_ratio/low_mean": 0.02760506859049201, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.04151920983567834, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 378.6, | |
| "completions/max_terminated_length": 378.6, | |
| "completions/mean_length": 301.01875, | |
| "completions/mean_terminated_length": 301.01875, | |
| "completions/min_length": 218.8, | |
| "completions/min_terminated_length": 218.8, | |
| "entropy": 0.2572413206100464, | |
| "epoch": 0.004, | |
| "frac_reward_zero_std": 0.5375, | |
| "grad_norm": 0.04749641567468643, | |
| "kl": 0.23950345497578382, | |
| "learning_rate": 5.117472e-06, | |
| "loss": -0.00030293280724436045, | |
| "num_tokens": 674875.0, | |
| "reward": 0.9487500309944152, | |
| "reward_std": 0.20682873725891113, | |
| "rewards/env_goofspiel_reward/mean": 0.9487500309944152, | |
| "rewards/env_goofspiel_reward/std": 0.34452574253082274, | |
| "sampling/importance_sampling_ratio/max": 2.11174156665802, | |
| "sampling/importance_sampling_ratio/mean": 0.43660367727279664, | |
| "sampling/importance_sampling_ratio/min": 0.00013589896843768656, | |
| "sampling/sampling_logp_difference/max": 8.112329578399658, | |
| "sampling/sampling_logp_difference/mean": 0.5582964062690735, | |
| "step": 25, | |
| "step_time": 4.199152118200027 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.032361111417412755, | |
| "clip_ratio/high_mean": 0.009340277779847384, | |
| "clip_ratio/low_mean": 0.018115530349314214, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.027455807756632568, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.2, | |
| "completions/max_terminated_length": 374.2, | |
| "completions/mean_length": 283.40625, | |
| "completions/mean_terminated_length": 283.40625, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.273418403416872, | |
| "epoch": 0.0048, | |
| "frac_reward_zero_std": 0.5875, | |
| "grad_norm": 0.005710980389267206, | |
| "kl": 1.6874765895307065, | |
| "learning_rate": 6.183612e-06, | |
| "loss": -0.0006473449524492025, | |
| "num_tokens": 806865.0, | |
| "reward": 0.9825000166893005, | |
| "reward_std": 0.19091882407665253, | |
| "rewards/env_goofspiel_reward/mean": 0.9825000166893005, | |
| "rewards/env_goofspiel_reward/std": 0.3281997382640839, | |
| "sampling/importance_sampling_ratio/max": 2.13806095123291, | |
| "sampling/importance_sampling_ratio/mean": 0.48566290736198425, | |
| "sampling/importance_sampling_ratio/min": 1.583069079060806e-05, | |
| "sampling/sampling_logp_difference/max": 9.758009147644042, | |
| "sampling/sampling_logp_difference/mean": 0.5621577501296997, | |
| "step": 30, | |
| "step_time": 4.3287319488001685 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.02222222238779068, | |
| "clip_ratio/high_mean": 0.00555555559694767, | |
| "clip_ratio/low_mean": 0.01631944449618459, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.021875000093132257, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 366.4, | |
| "completions/max_terminated_length": 366.4, | |
| "completions/mean_length": 290.65625, | |
| "completions/mean_terminated_length": 290.65625, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.2778049871325493, | |
| "epoch": 0.0056, | |
| "frac_reward_zero_std": 0.55, | |
| "grad_norm": 0.025574276223778725, | |
| "kl": 0.22191586308181285, | |
| "learning_rate": 7.249752e-06, | |
| "loss": -0.0005257311277091503, | |
| "num_tokens": 941915.0, | |
| "reward": 0.99000004529953, | |
| "reward_std": 0.20152543485164642, | |
| "rewards/env_goofspiel_reward/mean": 0.99000004529953, | |
| "rewards/env_goofspiel_reward/std": 0.32238503098487853, | |
| "sampling/importance_sampling_ratio/max": 1.8528586864471435, | |
| "sampling/importance_sampling_ratio/mean": 0.5378320515155792, | |
| "sampling/importance_sampling_ratio/min": 0.0016222307924181223, | |
| "sampling/sampling_logp_difference/max": 6.151937532424927, | |
| "sampling/sampling_logp_difference/mean": 0.41074748039245607, | |
| "step": 35, | |
| "step_time": 4.195248219399855 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.03625000007450581, | |
| "clip_ratio/high_mean": 0.009062500018626452, | |
| "clip_ratio/low_mean": 0.010277777817100287, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.019340277835726737, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.4, | |
| "completions/max_terminated_length": 374.4, | |
| "completions/mean_length": 290.64375, | |
| "completions/mean_terminated_length": 290.64375, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.3076802439987659, | |
| "epoch": 0.0064, | |
| "frac_reward_zero_std": 0.4875, | |
| "grad_norm": 0.03911300376057625, | |
| "kl": 0.30147731937468053, | |
| "learning_rate": 7.4629793691100655e-06, | |
| "loss": -0.0009569000452756881, | |
| "num_tokens": 1076583.0, | |
| "reward": 0.9787500500679016, | |
| "reward_std": 0.23864853978157044, | |
| "rewards/env_goofspiel_reward/mean": 0.9787500500679016, | |
| "rewards/env_goofspiel_reward/std": 0.3355243980884552, | |
| "sampling/importance_sampling_ratio/max": 1.7326099634170533, | |
| "sampling/importance_sampling_ratio/mean": 0.5706644296646118, | |
| "sampling/importance_sampling_ratio/min": 0.008987322356551886, | |
| "sampling/sampling_logp_difference/max": 4.789818382263183, | |
| "sampling/sampling_logp_difference/mean": 0.32595881819725037, | |
| "step": 40, | |
| "step_time": 4.120446558600088 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.02430555559694767, | |
| "clip_ratio/high_mean": 0.006076388899236918, | |
| "clip_ratio/low_mean": 0.018705808185040952, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.02478219708427787, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.6, | |
| "completions/max_terminated_length": 374.6, | |
| "completions/mean_length": 298.1, | |
| "completions/mean_terminated_length": 298.1, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.30067678913474083, | |
| "epoch": 0.0072, | |
| "frac_reward_zero_std": 0.5625, | |
| "grad_norm": 0.019556289538741112, | |
| "kl": 0.4184106796979904, | |
| "learning_rate": 7.462976806120193e-06, | |
| "loss": -0.00040965699590742586, | |
| "num_tokens": 1213169.0, | |
| "reward": 0.9524999856948853, | |
| "reward_std": 0.22273863554000856, | |
| "rewards/env_goofspiel_reward/mean": 0.9524999856948853, | |
| "rewards/env_goofspiel_reward/std": 0.36751508712768555, | |
| "sampling/importance_sampling_ratio/max": 1.7973033905029296, | |
| "sampling/importance_sampling_ratio/mean": 0.6051283955574036, | |
| "sampling/importance_sampling_ratio/min": 0.0006137289259640965, | |
| "sampling/sampling_logp_difference/max": 5.231348609924316, | |
| "sampling/sampling_logp_difference/mean": 0.3217563569545746, | |
| "step": 45, | |
| "step_time": 4.313124376999985 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.02430555559694767, | |
| "clip_ratio/high_mean": 0.006076388899236918, | |
| "clip_ratio/low_mean": 0.018645833339542152, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.024722222238779068, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.0, | |
| "completions/max_terminated_length": 374.0, | |
| "completions/mean_length": 299.70625, | |
| "completions/mean_terminated_length": 299.70625, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.26526937559247016, | |
| "epoch": 0.008, | |
| "frac_reward_zero_std": 0.6, | |
| "grad_norm": 0.09333564341068268, | |
| "kl": 0.6234624680131674, | |
| "learning_rate": 7.4629722716015665e-06, | |
| "loss": -0.0008450452238321305, | |
| "num_tokens": 1351131.0, | |
| "reward": 1.0012500405311584, | |
| "reward_std": 0.19622212946414946, | |
| "rewards/env_goofspiel_reward/mean": 1.0012500405311584, | |
| "rewards/env_goofspiel_reward/std": 0.33813255429267886, | |
| "sampling/importance_sampling_ratio/max": 1.8347083568572997, | |
| "sampling/importance_sampling_ratio/mean": 0.6785839080810547, | |
| "sampling/importance_sampling_ratio/min": 0.002685157069936395, | |
| "sampling/sampling_logp_difference/max": 5.393667411804199, | |
| "sampling/sampling_logp_difference/mean": 0.3046145349740982, | |
| "step": 50, | |
| "step_time": 4.5290676355998585 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.03125, | |
| "clip_ratio/high_mean": 0.0078125, | |
| "clip_ratio/low_mean": 0.014444444514811038, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.022256944421678783, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.6, | |
| "completions/max_terminated_length": 374.6, | |
| "completions/mean_length": 283.7625, | |
| "completions/mean_terminated_length": 283.7625, | |
| "completions/min_length": 207.0, | |
| "completions/min_terminated_length": 207.0, | |
| "entropy": 0.25032062605023386, | |
| "epoch": 0.0088, | |
| "frac_reward_zero_std": 0.5, | |
| "grad_norm": 0.0669218897819519, | |
| "kl": 0.47833866626024246, | |
| "learning_rate": 7.4629657655573805e-06, | |
| "loss": -0.0006249105092138052, | |
| "num_tokens": 1483536.0, | |
| "reward": 0.9561875462532043, | |
| "reward_std": 0.2599501311779022, | |
| "rewards/env_goofspiel_reward/mean": 0.9561875462532043, | |
| "rewards/env_goofspiel_reward/std": 0.3719723880290985, | |
| "sampling/importance_sampling_ratio/max": 1.9648807287216186, | |
| "sampling/importance_sampling_ratio/mean": 0.7620458364486694, | |
| "sampling/importance_sampling_ratio/min": 0.0028414088767021893, | |
| "sampling/sampling_logp_difference/max": 4.346728658676147, | |
| "sampling/sampling_logp_difference/mean": 0.2154034972190857, | |
| "step": 55, | |
| "step_time": 4.209427154400236 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.005000000074505806, | |
| "clip_ratio/high_mean": 0.0012500000186264515, | |
| "clip_ratio/low_mean": 0.015659722313284875, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.016909722238779068, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.8, | |
| "completions/max_terminated_length": 374.8, | |
| "completions/mean_length": 294.2, | |
| "completions/mean_terminated_length": 294.2, | |
| "completions/min_length": 218.8, | |
| "completions/min_terminated_length": 218.8, | |
| "entropy": 0.25066495686769485, | |
| "epoch": 0.0096, | |
| "frac_reward_zero_std": 0.6, | |
| "grad_norm": 0.03984799236059189, | |
| "kl": 0.5187893055379391, | |
| "learning_rate": 7.462957287992218e-06, | |
| "loss": -0.001143309846520424, | |
| "num_tokens": 1618874.0, | |
| "reward": 0.993750023841858, | |
| "reward_std": 0.19622212946414946, | |
| "rewards/env_goofspiel_reward/mean": 0.993750023841858, | |
| "rewards/env_goofspiel_reward/std": 0.35103108882904055, | |
| "sampling/importance_sampling_ratio/max": 1.7096198081970215, | |
| "sampling/importance_sampling_ratio/mean": 0.6896162152290344, | |
| "sampling/importance_sampling_ratio/min": 0.0015241437591612338, | |
| "sampling/sampling_logp_difference/max": 4.673358488082886, | |
| "sampling/sampling_logp_difference/mean": 0.26624326705932616, | |
| "step": 60, | |
| "step_time": 4.191420737200042 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.025, | |
| "clip_ratio/high_mean": 0.00625, | |
| "clip_ratio/low_mean": 0.0078125, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0140625, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.0, | |
| "completions/max_terminated_length": 374.0, | |
| "completions/mean_length": 273.53125, | |
| "completions/mean_terminated_length": 273.53125, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.2336573876440525, | |
| "epoch": 0.0104, | |
| "frac_reward_zero_std": 0.7125, | |
| "grad_norm": 0.009051427245140076, | |
| "kl": 0.6452277667820454, | |
| "learning_rate": 7.462946838912051e-06, | |
| "loss": -0.0009178260341286659, | |
| "num_tokens": 1748056.0, | |
| "reward": 1.0800000190734864, | |
| "reward_std": 0.1484924226999283, | |
| "rewards/env_goofspiel_reward/mean": 1.0800000190734864, | |
| "rewards/env_goofspiel_reward/std": 0.28696190714836123, | |
| "sampling/importance_sampling_ratio/max": 1.254857563972473, | |
| "sampling/importance_sampling_ratio/mean": 0.7130017876625061, | |
| "sampling/importance_sampling_ratio/min": 0.003967047110199929, | |
| "sampling/sampling_logp_difference/max": 3.8618431091308594, | |
| "sampling/sampling_logp_difference/mean": 0.20099806785583496, | |
| "step": 65, | |
| "step_time": 4.494912574199771 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00555555559694767, | |
| "clip_ratio/high_mean": 0.0013888888992369176, | |
| "clip_ratio/low_mean": 0.010104166716337204, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.011493055615574121, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.6, | |
| "completions/max_terminated_length": 374.6, | |
| "completions/mean_length": 281.1125, | |
| "completions/mean_terminated_length": 281.1125, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.19834314305335282, | |
| "epoch": 0.0112, | |
| "frac_reward_zero_std": 0.7125, | |
| "grad_norm": 0.018667038530111313, | |
| "kl": 0.4677882671356201, | |
| "learning_rate": 7.462934418324241e-06, | |
| "loss": -0.0008302273228764534, | |
| "num_tokens": 1879948.0, | |
| "reward": 1.0912500381469727, | |
| "reward_std": 0.14318912029266356, | |
| "rewards/env_goofspiel_reward/mean": 1.0912500381469727, | |
| "rewards/env_goofspiel_reward/std": 0.2670675128698349, | |
| "sampling/importance_sampling_ratio/max": 1.8250314712524414, | |
| "sampling/importance_sampling_ratio/mean": 0.8290145397186279, | |
| "sampling/importance_sampling_ratio/min": 0.05519633814692497, | |
| "sampling/sampling_logp_difference/max": 2.754664158821106, | |
| "sampling/sampling_logp_difference/mean": 0.12412183284759522, | |
| "step": 70, | |
| "step_time": 4.16713200400036 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.00625, | |
| "clip_ratio/high_mean": 0.0015625, | |
| "clip_ratio/low_mean": 0.01158775258809328, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.01315025258809328, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.4, | |
| "completions/max_terminated_length": 374.4, | |
| "completions/mean_length": 292.4125, | |
| "completions/mean_terminated_length": 292.4125, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.20496653467416764, | |
| "epoch": 0.012, | |
| "frac_reward_zero_std": 0.6625, | |
| "grad_norm": 0.03426237776875496, | |
| "kl": 0.4435719080269337, | |
| "learning_rate": 7.4629200262375374e-06, | |
| "loss": -0.000939619354903698, | |
| "num_tokens": 2015567.0, | |
| "reward": 1.0687500715255738, | |
| "reward_std": 0.1644023284316063, | |
| "rewards/env_goofspiel_reward/mean": 1.0687500715255738, | |
| "rewards/env_goofspiel_reward/std": 0.29003112614154813, | |
| "sampling/importance_sampling_ratio/max": 1.699583315849304, | |
| "sampling/importance_sampling_ratio/mean": 0.7789812088012695, | |
| "sampling/importance_sampling_ratio/min": 0.0047567693516612055, | |
| "sampling/sampling_logp_difference/max": 4.406649398803711, | |
| "sampling/sampling_logp_difference/mean": 0.18859796077013016, | |
| "step": 75, | |
| "step_time": 4.104378796799847 | |
| }, | |
| { | |
| "epoch": 0.012, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 373.0, | |
| "eval_completions/max_terminated_length": 373.0, | |
| "eval_completions/mean_length": 286.4583333333333, | |
| "eval_completions/mean_terminated_length": 286.4583333333333, | |
| "eval_completions/min_length": 235.33333333333334, | |
| "eval_completions/min_terminated_length": 235.33333333333334, | |
| "eval_entropy": 0.1776730790734291, | |
| "eval_frac_reward_zero_std": 0.9166666666666666, | |
| "eval_kl": 0.3452555288871129, | |
| "eval_loss": 6.334867521218257e-06, | |
| "eval_num_tokens": 2015567.0, | |
| "eval_reward": 1.1750000317891438, | |
| "eval_reward_std": 0.035355339447657265, | |
| "eval_rewards/env_goofspiel_reward/mean": 1.1750000317891438, | |
| "eval_rewards/env_goofspiel_reward/std": 0.07071068386236827, | |
| "eval_runtime": 2.2216, | |
| "eval_samples_per_second": 4.501, | |
| "eval_sampling/importance_sampling_ratio/max": 1.2363848288853962, | |
| "eval_sampling/importance_sampling_ratio/mean": 0.8729836543401083, | |
| "eval_sampling/importance_sampling_ratio/min": 0.3416567128151655, | |
| "eval_sampling/sampling_logp_difference/max": 1.6411640246709187, | |
| "eval_sampling/sampling_logp_difference/mean": 0.12927521020174026, | |
| "eval_steps_per_second": 0.9, | |
| "step": 75 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 18750, | |
| "num_input_tokens_seen": 2015567, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |