Text Generation
PEFT
Safetensors
Transformers
qwen2
grpo
lora
trl
conversational
text-generation-inference
Instructions to use Gege24/environment_test_affine with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use Gege24/environment_test_affine with PEFT:
Base model is not found.
- Transformers
How to use Gege24/environment_test_affine with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="Gege24/environment_test_affine") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("Gege24/environment_test_affine") model = AutoModelForCausalLM.from_pretrained("Gege24/environment_test_affine") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use Gege24/environment_test_affine with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "Gege24/environment_test_affine" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Gege24/environment_test_affine", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/Gege24/environment_test_affine
- SGLang
How to use Gege24/environment_test_affine with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "Gege24/environment_test_affine" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Gege24/environment_test_affine", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "Gege24/environment_test_affine" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "Gege24/environment_test_affine", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use Gege24/environment_test_affine with Docker Model Runner:
docker model run hf.co/Gege24/environment_test_affine
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.009000360014400577, | |
| "eval_steps": 500, | |
| "global_step": 75, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 365.6, | |
| "completions/max_terminated_length": 365.6, | |
| "completions/mean_length": 292.2166809082031, | |
| "completions/mean_terminated_length": 292.2166809082031, | |
| "completions/min_length": 174.8, | |
| "completions/min_terminated_length": 174.8, | |
| "entropy": 0.7345801413059234, | |
| "epoch": 0.0006000240009600384, | |
| "frac_reward_zero_std": 0.650000023841858, | |
| "grad_norm": 0.578125, | |
| "kl": 0.014322867337614297, | |
| "learning_rate": 1.137216e-06, | |
| "loss": 0.00023176579270511866, | |
| "num_tokens": 101638.0, | |
| "reward": 0.023166669206693767, | |
| "reward_std": 0.03771236310712993, | |
| "rewards/env_goofspiel_reward/mean": 0.023166668484918773, | |
| "rewards/env_goofspiel_reward/std": 0.10488454704172909, | |
| "sampling/importance_sampling_ratio/max": 2.230660581588745, | |
| "sampling/importance_sampling_ratio/mean": 1.0512551069259644, | |
| "sampling/importance_sampling_ratio/min": 0.45719883143901824, | |
| "sampling/sampling_logp_difference/max": 0.9388755321502685, | |
| "sampling/sampling_logp_difference/mean": 0.08014384806156158, | |
| "step": 5, | |
| "step_time": 2.8670244561999425 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.8, | |
| "completions/max_terminated_length": 373.8, | |
| "completions/mean_length": 284.3750061035156, | |
| "completions/mean_terminated_length": 284.3750061035156, | |
| "completions/min_length": 197.4, | |
| "completions/min_terminated_length": 197.4, | |
| "entropy": 0.7396668076515198, | |
| "epoch": 0.0012000480019200767, | |
| "frac_reward_zero_std": 0.5333333492279053, | |
| "grad_norm": 0.6484375, | |
| "kl": 0.008018274139612914, | |
| "learning_rate": 2.5587359999999995e-06, | |
| "loss": 0.0010880917310714723, | |
| "num_tokens": 201985.0, | |
| "reward": 0.1135000076610595, | |
| "reward_std": 0.1355288046877831, | |
| "rewards/env_goofspiel_reward/mean": 0.11350000470411033, | |
| "rewards/env_goofspiel_reward/std": 0.24339603506959975, | |
| "sampling/importance_sampling_ratio/max": 1.9386430501937866, | |
| "sampling/importance_sampling_ratio/mean": 1.0104641795158387, | |
| "sampling/importance_sampling_ratio/min": 0.48966296911239626, | |
| "sampling/sampling_logp_difference/max": 0.7527793884277344, | |
| "sampling/sampling_logp_difference/mean": 0.06743223667144775, | |
| "step": 10, | |
| "step_time": 2.562607514999763 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.8, | |
| "completions/max_terminated_length": 373.8, | |
| "completions/mean_length": 292.29168090820315, | |
| "completions/mean_terminated_length": 292.29168090820315, | |
| "completions/min_length": 205.4, | |
| "completions/min_terminated_length": 205.4, | |
| "entropy": 0.6868447959423065, | |
| "epoch": 0.0018000720028801152, | |
| "frac_reward_zero_std": 0.7666666865348816, | |
| "grad_norm": 0.248046875, | |
| "kl": 0.015538515662774444, | |
| "learning_rate": 3.9802559999999995e-06, | |
| "loss": -0.0002336445264518261, | |
| "num_tokens": 303718.0, | |
| "reward": 0.02416666953358799, | |
| "reward_std": 0.03653385282959789, | |
| "rewards/env_goofspiel_reward/mean": 0.024166667682584374, | |
| "rewards/env_goofspiel_reward/std": 0.10811053770594299, | |
| "sampling/importance_sampling_ratio/max": 1.5562421321868896, | |
| "sampling/importance_sampling_ratio/mean": 0.9962499618530274, | |
| "sampling/importance_sampling_ratio/min": 0.4864930033683777, | |
| "sampling/sampling_logp_difference/max": 0.7846987843513489, | |
| "sampling/sampling_logp_difference/mean": 0.05959557741880417, | |
| "step": 15, | |
| "step_time": 2.4648701715999777 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 365.4, | |
| "completions/max_terminated_length": 365.4, | |
| "completions/mean_length": 286.4166748046875, | |
| "completions/mean_terminated_length": 286.4166748046875, | |
| "completions/min_length": 199.6, | |
| "completions/min_terminated_length": 199.6, | |
| "entropy": 0.6791316926479339, | |
| "epoch": 0.0024000960038401535, | |
| "frac_reward_zero_std": 0.7833333492279053, | |
| "grad_norm": 0.419921875, | |
| "kl": 0.03971561994403601, | |
| "learning_rate": 5.401775999999999e-06, | |
| "loss": 0.00017259303713217377, | |
| "num_tokens": 403266.0, | |
| "reward": 0.05466667115688324, | |
| "reward_std": 0.07825315818190574, | |
| "rewards/env_goofspiel_reward/mean": 0.054666668176651, | |
| "rewards/env_goofspiel_reward/std": 0.1692157879471779, | |
| "sampling/importance_sampling_ratio/max": 1.7059913635253907, | |
| "sampling/importance_sampling_ratio/mean": 1.0254743337631225, | |
| "sampling/importance_sampling_ratio/min": 0.5575755715370179, | |
| "sampling/sampling_logp_difference/max": 0.5765037894248962, | |
| "sampling/sampling_logp_difference/mean": 0.06591257303953171, | |
| "step": 20, | |
| "step_time": 2.4083536974001616 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.6, | |
| "completions/max_terminated_length": 373.6, | |
| "completions/mean_length": 279.07501831054685, | |
| "completions/mean_terminated_length": 279.07501831054685, | |
| "completions/min_length": 206.8, | |
| "completions/min_terminated_length": 206.8, | |
| "entropy": 0.5845985025167465, | |
| "epoch": 0.003000120004800192, | |
| "frac_reward_zero_std": 0.8666666865348815, | |
| "grad_norm": 0.2080078125, | |
| "kl": 0.07888290733098983, | |
| "learning_rate": 6.8232959999999994e-06, | |
| "loss": 0.00014582456788048148, | |
| "num_tokens": 501949.0, | |
| "reward": 0.029750002920627593, | |
| "reward_std": 0.042544259876012805, | |
| "rewards/env_goofspiel_reward/mean": 0.029750000685453415, | |
| "rewards/env_goofspiel_reward/std": 0.11651719957590104, | |
| "sampling/importance_sampling_ratio/max": 1.5395583629608154, | |
| "sampling/importance_sampling_ratio/mean": 0.9871565222740173, | |
| "sampling/importance_sampling_ratio/min": 0.6314660668373108, | |
| "sampling/sampling_logp_difference/max": 0.43876824378967283, | |
| "sampling/sampling_logp_difference/mean": 0.04884573593735695, | |
| "step": 25, | |
| "step_time": 2.4319384599999467 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.6, | |
| "completions/max_terminated_length": 373.6, | |
| "completions/mean_length": 294.0166748046875, | |
| "completions/mean_terminated_length": 294.0166748046875, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.5196643978357315, | |
| "epoch": 0.0036001440057602304, | |
| "frac_reward_zero_std": 0.850000011920929, | |
| "grad_norm": 0.62109375, | |
| "kl": 0.1175543449819088, | |
| "learning_rate": 8.244816e-06, | |
| "loss": 0.0001264215330593288, | |
| "num_tokens": 604591.0, | |
| "reward": 0.044750004261732104, | |
| "reward_std": 0.04985102787613869, | |
| "rewards/env_goofspiel_reward/mean": 0.044750002399086955, | |
| "rewards/env_goofspiel_reward/std": 0.125959412753582, | |
| "sampling/importance_sampling_ratio/max": 1.7827884435653687, | |
| "sampling/importance_sampling_ratio/mean": 0.9959828734397889, | |
| "sampling/importance_sampling_ratio/min": 0.6090725898742676, | |
| "sampling/sampling_logp_difference/max": 0.5527279376983643, | |
| "sampling/sampling_logp_difference/mean": 0.05205402001738548, | |
| "step": 30, | |
| "step_time": 2.3975842315998306 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.0, | |
| "completions/max_terminated_length": 374.0, | |
| "completions/mean_length": 291.40834350585936, | |
| "completions/mean_terminated_length": 291.40834350585936, | |
| "completions/min_length": 219.2, | |
| "completions/min_terminated_length": 219.2, | |
| "entropy": 0.4317367374897003, | |
| "epoch": 0.004200168006720269, | |
| "frac_reward_zero_std": 0.7500000119209289, | |
| "grad_norm": 0.00531005859375, | |
| "kl": 0.12076274678111076, | |
| "learning_rate": 9.666336e-06, | |
| "loss": 0.0003318458097055554, | |
| "num_tokens": 706062.0, | |
| "reward": 0.09500000476837159, | |
| "reward_std": 0.12020815908908844, | |
| "rewards/env_goofspiel_reward/mean": 0.09500000327825546, | |
| "rewards/env_goofspiel_reward/std": 0.21089642345905305, | |
| "sampling/importance_sampling_ratio/max": 1.4750993490219115, | |
| "sampling/importance_sampling_ratio/mean": 0.9911892533302307, | |
| "sampling/importance_sampling_ratio/min": 0.5305798888206482, | |
| "sampling/sampling_logp_difference/max": 0.7321011543273925, | |
| "sampling/sampling_logp_difference/mean": 0.043305123969912526, | |
| "step": 35, | |
| "step_time": 2.43143495660006 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.4, | |
| "completions/max_terminated_length": 373.4, | |
| "completions/mean_length": 288.1250061035156, | |
| "completions/mean_terminated_length": 288.1250061035156, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.351773801445961, | |
| "epoch": 0.004800192007680307, | |
| "frac_reward_zero_std": 0.8666666865348815, | |
| "grad_norm": 0.33203125, | |
| "kl": 0.05839128475636244, | |
| "learning_rate": 9.950639527236806e-06, | |
| "loss": 4.926343681290746e-05, | |
| "num_tokens": 806862.0, | |
| "reward": 0.040000003576278684, | |
| "reward_std": 0.05656854510307312, | |
| "rewards/env_goofspiel_reward/mean": 0.04000000059604645, | |
| "rewards/env_goofspiel_reward/std": 0.11344237923622132, | |
| "sampling/importance_sampling_ratio/max": 1.5833612918853759, | |
| "sampling/importance_sampling_ratio/mean": 1.0104422450065613, | |
| "sampling/importance_sampling_ratio/min": 0.6002241253852845, | |
| "sampling/sampling_logp_difference/max": 0.5244450092315673, | |
| "sampling/sampling_logp_difference/mean": 0.03759892582893372, | |
| "step": 40, | |
| "step_time": 2.415808950400242 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.8, | |
| "completions/max_terminated_length": 373.8, | |
| "completions/mean_length": 291.74168090820314, | |
| "completions/mean_terminated_length": 291.74168090820314, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.3240418329834938, | |
| "epoch": 0.005400216008640346, | |
| "frac_reward_zero_std": 0.9166666865348816, | |
| "grad_norm": 0.0439453125, | |
| "kl": 0.13300706073641777, | |
| "learning_rate": 9.950637606636539e-06, | |
| "loss": 0.0001355916727334261, | |
| "num_tokens": 907008.0, | |
| "reward": 0.034833335876464845, | |
| "reward_std": 0.03535534143447876, | |
| "rewards/env_goofspiel_reward/mean": 0.03483333364129067, | |
| "rewards/env_goofspiel_reward/std": 0.14086373001337052, | |
| "sampling/importance_sampling_ratio/max": 1.4610427141189575, | |
| "sampling/importance_sampling_ratio/mean": 0.9817873358726501, | |
| "sampling/importance_sampling_ratio/min": 0.6326651930809021, | |
| "sampling/sampling_logp_difference/max": 0.46174774169921873, | |
| "sampling/sampling_logp_difference/mean": 0.040712539479136466, | |
| "step": 45, | |
| "step_time": 2.417739940000138 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 365.0, | |
| "completions/max_terminated_length": 365.0, | |
| "completions/mean_length": 279.3333435058594, | |
| "completions/mean_terminated_length": 279.3333435058594, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.3186733976006508, | |
| "epoch": 0.006000240009600384, | |
| "frac_reward_zero_std": 0.8833333611488342, | |
| "grad_norm": 0.353515625, | |
| "kl": 0.06966875828802585, | |
| "learning_rate": 9.950634208652256e-06, | |
| "loss": 0.00012671776348724962, | |
| "num_tokens": 1005578.0, | |
| "reward": 0.034916669048834593, | |
| "reward_std": 0.049615327350329606, | |
| "rewards/env_goofspiel_reward/mean": 0.03491666756453924, | |
| "rewards/env_goofspiel_reward/std": 0.13708889302797617, | |
| "sampling/importance_sampling_ratio/max": 1.5069894075393677, | |
| "sampling/importance_sampling_ratio/mean": 1.0077889800071715, | |
| "sampling/importance_sampling_ratio/min": 0.7747546076774597, | |
| "sampling/sampling_logp_difference/max": 0.3697906732559204, | |
| "sampling/sampling_logp_difference/mean": 0.03059108220040798, | |
| "step": 50, | |
| "step_time": 2.3773288868003872 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.0, | |
| "completions/max_terminated_length": 374.0, | |
| "completions/mean_length": 305.49168090820314, | |
| "completions/mean_terminated_length": 305.49168090820314, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.29847966730594633, | |
| "epoch": 0.006600264010560422, | |
| "frac_reward_zero_std": 0.900000023841858, | |
| "grad_norm": 0.30078125, | |
| "kl": 0.08750866688787937, | |
| "learning_rate": 9.950629333285305e-06, | |
| "loss": -2.145505277439952e-06, | |
| "num_tokens": 1110455.0, | |
| "reward": 0.035000003129243853, | |
| "reward_std": 0.04949747696518898, | |
| "rewards/env_goofspiel_reward/mean": 0.03500000052154064, | |
| "rewards/env_goofspiel_reward/std": 0.1412438616156578, | |
| "sampling/importance_sampling_ratio/max": 1.3705053567886352, | |
| "sampling/importance_sampling_ratio/mean": 1.0030481100082398, | |
| "sampling/importance_sampling_ratio/min": 0.6937733888626099, | |
| "sampling/sampling_logp_difference/max": 0.4317422866821289, | |
| "sampling/sampling_logp_difference/mean": 0.03450411073863506, | |
| "step": 55, | |
| "step_time": 2.4346962132000045 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 374.0, | |
| "completions/max_terminated_length": 374.0, | |
| "completions/mean_length": 294.62501220703126, | |
| "completions/mean_terminated_length": 294.62501220703126, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.2199392184615135, | |
| "epoch": 0.007200288011520461, | |
| "frac_reward_zero_std": 0.7666666746139527, | |
| "grad_norm": 0.1328125, | |
| "kl": 0.20852462351322174, | |
| "learning_rate": 9.950622980537618e-06, | |
| "loss": -1.4243402983993291e-05, | |
| "num_tokens": 1211770.0, | |
| "reward": 0.08483333513140678, | |
| "reward_std": 0.09215958416461945, | |
| "rewards/env_goofspiel_reward/mean": 0.08483333475887775, | |
| "rewards/env_goofspiel_reward/std": 0.2297523573040962, | |
| "sampling/importance_sampling_ratio/max": 1.3913918256759643, | |
| "sampling/importance_sampling_ratio/mean": 0.9923399925231934, | |
| "sampling/importance_sampling_ratio/min": 0.7218815922737122, | |
| "sampling/sampling_logp_difference/max": 0.3741787910461426, | |
| "sampling/sampling_logp_difference/mean": 0.023421294614672662, | |
| "step": 60, | |
| "step_time": 2.4339242404003016 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 365.0, | |
| "completions/max_terminated_length": 365.0, | |
| "completions/mean_length": 299.108349609375, | |
| "completions/mean_terminated_length": 299.108349609375, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.14547686353325845, | |
| "epoch": 0.0078003120124804995, | |
| "frac_reward_zero_std": 0.8666666865348815, | |
| "grad_norm": 0.01318359375, | |
| "kl": 1.5776881486177445, | |
| "learning_rate": 9.950615150411705e-06, | |
| "loss": 0.00020953675266355276, | |
| "num_tokens": 1315125.0, | |
| "reward": 0.03991667032241821, | |
| "reward_std": 0.05668639615178108, | |
| "rewards/env_goofspiel_reward/mean": 0.03991666734218598, | |
| "rewards/env_goofspiel_reward/std": 0.15578595399856568, | |
| "sampling/importance_sampling_ratio/max": 1.5614466190338134, | |
| "sampling/importance_sampling_ratio/mean": 1.0267313480377198, | |
| "sampling/importance_sampling_ratio/min": 0.8062139034271241, | |
| "sampling/sampling_logp_difference/max": 0.38132710456848146, | |
| "sampling/sampling_logp_difference/mean": 0.017732756957411767, | |
| "step": 65, | |
| "step_time": 2.3855804428001646 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 373.4, | |
| "completions/max_terminated_length": 373.4, | |
| "completions/mean_length": 288.8666748046875, | |
| "completions/mean_terminated_length": 288.8666748046875, | |
| "completions/min_length": 212.0, | |
| "completions/min_terminated_length": 212.0, | |
| "entropy": 0.36806915551424024, | |
| "epoch": 0.008400336013440538, | |
| "frac_reward_zero_std": 0.8333333730697632, | |
| "grad_norm": 0.103515625, | |
| "kl": 0.154165069013834, | |
| "learning_rate": 9.950605842910668e-06, | |
| "loss": 3.9057480171322824e-05, | |
| "num_tokens": 1415706.0, | |
| "reward": 0.0650000050663948, | |
| "reward_std": 0.07778174877166748, | |
| "rewards/env_goofspiel_reward/mean": 0.06500000134110451, | |
| "rewards/env_goofspiel_reward/std": 0.20113323032855987, | |
| "sampling/importance_sampling_ratio/max": 1.3685919523239136, | |
| "sampling/importance_sampling_ratio/mean": 0.9923707365989685, | |
| "sampling/importance_sampling_ratio/min": 0.6977368593215942, | |
| "sampling/sampling_logp_difference/max": 0.37837958335876465, | |
| "sampling/sampling_logp_difference/mean": 0.023688069358468056, | |
| "step": 70, | |
| "step_time": 2.419219413600149 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 365.0, | |
| "completions/max_terminated_length": 365.0, | |
| "completions/mean_length": 296.4750122070312, | |
| "completions/mean_terminated_length": 296.4750122070312, | |
| "completions/min_length": 218.8, | |
| "completions/min_terminated_length": 218.8, | |
| "entropy": 0.44495113492012023, | |
| "epoch": 0.009000360014400577, | |
| "frac_reward_zero_std": 0.7666666984558106, | |
| "grad_norm": 0.13671875, | |
| "kl": 0.308458948135376, | |
| "learning_rate": 9.950595058038197e-06, | |
| "loss": 0.00013219380052760245, | |
| "num_tokens": 1517756.0, | |
| "reward": 0.054750004410743715, | |
| "reward_std": 0.07813530415296555, | |
| "rewards/env_goofspiel_reward/mean": 0.05475000143051147, | |
| "rewards/env_goofspiel_reward/std": 0.17614837288856505, | |
| "sampling/importance_sampling_ratio/max": 1.4038194417953491, | |
| "sampling/importance_sampling_ratio/mean": 0.9863661766052246, | |
| "sampling/importance_sampling_ratio/min": 0.6411556363105774, | |
| "sampling/sampling_logp_difference/max": 0.5436622142791748, | |
| "sampling/sampling_logp_difference/mean": 0.029225154593586922, | |
| "step": 75, | |
| "step_time": 2.370857671000158 | |
| }, | |
| { | |
| "epoch": 0.009000360014400577, | |
| "eval_clip_ratio/high_max": 0.0, | |
| "eval_clip_ratio/high_mean": 0.0, | |
| "eval_clip_ratio/low_mean": 0.0, | |
| "eval_clip_ratio/low_min": 0.0, | |
| "eval_clip_ratio/region_mean": 0.0, | |
| "eval_completions/clipped_ratio": 0.0, | |
| "eval_completions/max_length": 274.2, | |
| "eval_completions/max_terminated_length": 274.2, | |
| "eval_completions/mean_length": 254.7, | |
| "eval_completions/mean_terminated_length": 254.7, | |
| "eval_completions/min_length": 235.4, | |
| "eval_completions/min_terminated_length": 235.4, | |
| "eval_entropy": 0.4275285005569458, | |
| "eval_frac_reward_zero_std": 0.6, | |
| "eval_kl": 0.317794269323349, | |
| "eval_loss": 9.086851787287742e-05, | |
| "eval_num_tokens": 1517756.0, | |
| "eval_reward": 0.12000000476837158, | |
| "eval_reward_std": 0.16970562934875488, | |
| "eval_rewards/env_goofspiel_reward/mean": 0.12000000476837158, | |
| "eval_rewards/env_goofspiel_reward/std": 0.24000003337860107, | |
| "eval_runtime": 1.6574, | |
| "eval_samples_per_second": 6.034, | |
| "eval_sampling/importance_sampling_ratio/max": 1.1251073837280274, | |
| "eval_sampling/importance_sampling_ratio/mean": 0.9802677392959595, | |
| "eval_sampling/importance_sampling_ratio/min": 0.8105595707893372, | |
| "eval_sampling/sampling_logp_difference/max": 0.2066459536552429, | |
| "eval_sampling/sampling_logp_difference/mean": 0.03130748393014073, | |
| "eval_steps_per_second": 1.81, | |
| "step": 75 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 24999, | |
| "num_input_tokens_seen": 1517756, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |