Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- added_tokens.json +28 -0
- config.json +30 -0
- dpo_model_3epochs/.gitattributes +35 -0
- dpo_model_3epochs/added_tokens.json +28 -0
- dpo_model_3epochs/config.json +30 -0
- dpo_model_3epochs/dpo_model_3epochs/.gitattributes +35 -0
- dpo_model_3epochs/dpo_model_3epochs/added_tokens.json +28 -0
- dpo_model_3epochs/dpo_model_3epochs/config.json +30 -0
- dpo_model_3epochs/generation_config.json +6 -0
- dpo_model_3epochs/merges.txt +0 -0
- dpo_model_3epochs/model.safetensors +3 -0
- dpo_model_3epochs/optimizer.pt +3 -0
- dpo_model_3epochs/rng_state.pth +3 -0
- dpo_model_3epochs/scheduler.pt +3 -0
- dpo_model_3epochs/special_tokens_map.json +31 -0
- dpo_model_3epochs/tokenizer.json +3 -0
- dpo_model_3epochs/tokenizer_config.json +240 -0
- dpo_model_3epochs/trainer_state.json +1726 -0
- dpo_model_3epochs/training_args.bin +3 -0
- dpo_model_3epochs/vocab.json +0 -0
- generation_config.json +6 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- optimizer.pt +3 -0
- rng_state.pth +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +31 -0
- tokenizer.json +3 -0
- tokenizer_config.json +240 -0
- trainer_state.json +1726 -0
- training_args.bin +3 -0
- vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
dpo_model_3epochs/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151643,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"max_position_embeddings": 32768,
|
| 15 |
+
"max_window_layers": 28,
|
| 16 |
+
"model_type": "qwen3",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 28,
|
| 19 |
+
"num_key_value_heads": 8,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": true,
|
| 25 |
+
"torch_dtype": "bfloat16",
|
| 26 |
+
"transformers_version": "4.51.3",
|
| 27 |
+
"use_cache": false,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936
|
| 30 |
+
}
|
dpo_model_3epochs/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
dpo_model_3epochs/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
dpo_model_3epochs/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151643,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"max_position_embeddings": 32768,
|
| 15 |
+
"max_window_layers": 28,
|
| 16 |
+
"model_type": "qwen3",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 28,
|
| 19 |
+
"num_key_value_heads": 8,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": true,
|
| 25 |
+
"torch_dtype": "bfloat16",
|
| 26 |
+
"transformers_version": "4.51.3",
|
| 27 |
+
"use_cache": false,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936
|
| 30 |
+
}
|
dpo_model_3epochs/dpo_model_3epochs/.gitattributes
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
dpo_model_3epochs/dpo_model_3epochs/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
dpo_model_3epochs/dpo_model_3epochs/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 151643,
|
| 8 |
+
"eos_token_id": 151643,
|
| 9 |
+
"head_dim": 128,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"max_position_embeddings": 32768,
|
| 15 |
+
"max_window_layers": 28,
|
| 16 |
+
"model_type": "qwen3",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 28,
|
| 19 |
+
"num_key_value_heads": 8,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 1000000,
|
| 23 |
+
"sliding_window": null,
|
| 24 |
+
"tie_word_embeddings": true,
|
| 25 |
+
"torch_dtype": "bfloat16",
|
| 26 |
+
"transformers_version": "4.51.3",
|
| 27 |
+
"use_cache": false,
|
| 28 |
+
"use_sliding_window": false,
|
| 29 |
+
"vocab_size": 151936
|
| 30 |
+
}
|
dpo_model_3epochs/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"eos_token_id": 151643,
|
| 4 |
+
"max_new_tokens": 2048,
|
| 5 |
+
"transformers_version": "4.51.3"
|
| 6 |
+
}
|
dpo_model_3epochs/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dpo_model_3epochs/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae8feb1e7622241976a5843cd1b296ffae1b5b65b5adfb1fe1d0ceddae8bfac9
|
| 3 |
+
size 1192135096
|
dpo_model_3epochs/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2485bf9a2879e8c4f855840a82e342ffab34fbb8d5bf28103b4ad7c839efc316
|
| 3 |
+
size 2384460363
|
dpo_model_3epochs/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95d744506ed8242dbe82c0f3357716f73248e5153ff68604326958faa28d9296
|
| 3 |
+
size 14645
|
dpo_model_3epochs/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18d1bfdafd4174e2c90ffd290b1a170a373f9028a1e742c7e6606e40b86c917e
|
| 3 |
+
size 1465
|
dpo_model_3epochs/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|endoftext|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
dpo_model_3epochs/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
dpo_model_3epochs/tokenizer_config.json
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
|
| 231 |
+
"clean_up_tokenization_spaces": false,
|
| 232 |
+
"eos_token": "<|endoftext|>",
|
| 233 |
+
"errors": "replace",
|
| 234 |
+
"extra_special_tokens": {},
|
| 235 |
+
"model_max_length": 131072,
|
| 236 |
+
"pad_token": "<|endoftext|>",
|
| 237 |
+
"split_special_tokens": false,
|
| 238 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 239 |
+
"unk_token": null
|
| 240 |
+
}
|
dpo_model_3epochs/trainer_state.json
ADDED
|
@@ -0,0 +1,1726 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.997458513978173,
|
| 6 |
+
"eval_steps": 200,
|
| 7 |
+
"global_step": 2508,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.02989983555090447,
|
| 14 |
+
"grad_norm": 78.5,
|
| 15 |
+
"learning_rate": 4.780876494023904e-07,
|
| 16 |
+
"logits/chosen": -0.8346603512763977,
|
| 17 |
+
"logits/rejected": -0.5625396966934204,
|
| 18 |
+
"logps/chosen": -311.11248779296875,
|
| 19 |
+
"logps/rejected": -290.71624755859375,
|
| 20 |
+
"loss": 0.6974,
|
| 21 |
+
"rewards/accuracies": 0.3199999928474426,
|
| 22 |
+
"rewards/chosen": -0.005879516713321209,
|
| 23 |
+
"rewards/margins": -0.0028140258509665728,
|
| 24 |
+
"rewards/rejected": -0.003072815015912056,
|
| 25 |
+
"step": 25
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"epoch": 0.05979967110180894,
|
| 29 |
+
"grad_norm": 112.5,
|
| 30 |
+
"learning_rate": 9.760956175298805e-07,
|
| 31 |
+
"logits/chosen": -0.8477816581726074,
|
| 32 |
+
"logits/rejected": -0.5839244723320007,
|
| 33 |
+
"logps/chosen": -341.1449890136719,
|
| 34 |
+
"logps/rejected": -303.2749938964844,
|
| 35 |
+
"loss": 0.6939,
|
| 36 |
+
"rewards/accuracies": 0.33500000834465027,
|
| 37 |
+
"rewards/chosen": -0.01889648474752903,
|
| 38 |
+
"rewards/margins": 0.0013772583333775401,
|
| 39 |
+
"rewards/rejected": -0.020271606743335724,
|
| 40 |
+
"step": 50
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"epoch": 0.08969950665271341,
|
| 44 |
+
"grad_norm": 89.5,
|
| 45 |
+
"learning_rate": 1.4741035856573708e-06,
|
| 46 |
+
"logits/chosen": -0.7348077893257141,
|
| 47 |
+
"logits/rejected": -0.419241338968277,
|
| 48 |
+
"logps/chosen": -311.4237365722656,
|
| 49 |
+
"logps/rejected": -284.5274963378906,
|
| 50 |
+
"loss": 0.7,
|
| 51 |
+
"rewards/accuracies": 0.28999999165534973,
|
| 52 |
+
"rewards/chosen": -0.020579833537340164,
|
| 53 |
+
"rewards/margins": -0.008827819488942623,
|
| 54 |
+
"rewards/rejected": -0.011761474423110485,
|
| 55 |
+
"step": 75
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"epoch": 0.11959934220361788,
|
| 59 |
+
"grad_norm": 89.5,
|
| 60 |
+
"learning_rate": 1.9721115537848607e-06,
|
| 61 |
+
"logits/chosen": -0.9120362997055054,
|
| 62 |
+
"logits/rejected": -0.566675066947937,
|
| 63 |
+
"logps/chosen": -322.989990234375,
|
| 64 |
+
"logps/rejected": -276.8037414550781,
|
| 65 |
+
"loss": 0.6868,
|
| 66 |
+
"rewards/accuracies": 0.3675000071525574,
|
| 67 |
+
"rewards/chosen": -0.027477417141199112,
|
| 68 |
+
"rewards/margins": 0.018669739365577698,
|
| 69 |
+
"rewards/rejected": -0.04612060636281967,
|
| 70 |
+
"step": 100
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 0.14949917775452234,
|
| 74 |
+
"grad_norm": 83.0,
|
| 75 |
+
"learning_rate": 2.470119521912351e-06,
|
| 76 |
+
"logits/chosen": -0.8410671353340149,
|
| 77 |
+
"logits/rejected": -0.43034911155700684,
|
| 78 |
+
"logps/chosen": -297.4024963378906,
|
| 79 |
+
"logps/rejected": -304.4224853515625,
|
| 80 |
+
"loss": 0.6832,
|
| 81 |
+
"rewards/accuracies": 0.36000001430511475,
|
| 82 |
+
"rewards/chosen": -0.05832824856042862,
|
| 83 |
+
"rewards/margins": 0.02584075927734375,
|
| 84 |
+
"rewards/rejected": -0.08419036865234375,
|
| 85 |
+
"step": 125
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"epoch": 0.17939901330542682,
|
| 89 |
+
"grad_norm": 106.5,
|
| 90 |
+
"learning_rate": 2.968127490039841e-06,
|
| 91 |
+
"logits/chosen": -0.9279866814613342,
|
| 92 |
+
"logits/rejected": -0.6811022758483887,
|
| 93 |
+
"logps/chosen": -312.67498779296875,
|
| 94 |
+
"logps/rejected": -285.7799987792969,
|
| 95 |
+
"loss": 0.6709,
|
| 96 |
+
"rewards/accuracies": 0.49000000953674316,
|
| 97 |
+
"rewards/chosen": -0.07547790557146072,
|
| 98 |
+
"rewards/margins": 0.056133728474378586,
|
| 99 |
+
"rewards/rejected": -0.1316046118736267,
|
| 100 |
+
"step": 150
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.2092988488563313,
|
| 104 |
+
"grad_norm": 96.0,
|
| 105 |
+
"learning_rate": 3.466135458167331e-06,
|
| 106 |
+
"logits/chosen": -0.8703573346138,
|
| 107 |
+
"logits/rejected": -0.5601403713226318,
|
| 108 |
+
"logps/chosen": -323.947509765625,
|
| 109 |
+
"logps/rejected": -292.8074951171875,
|
| 110 |
+
"loss": 0.6696,
|
| 111 |
+
"rewards/accuracies": 0.4950000047683716,
|
| 112 |
+
"rewards/chosen": -0.11684814095497131,
|
| 113 |
+
"rewards/margins": 0.06319641321897507,
|
| 114 |
+
"rewards/rejected": -0.1800549328327179,
|
| 115 |
+
"step": 175
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.23919868440723577,
|
| 119 |
+
"grad_norm": 99.0,
|
| 120 |
+
"learning_rate": 3.9641434262948205e-06,
|
| 121 |
+
"logits/chosen": -0.9258654713630676,
|
| 122 |
+
"logits/rejected": -0.5686477422714233,
|
| 123 |
+
"logps/chosen": -328.7449951171875,
|
| 124 |
+
"logps/rejected": -316.5574951171875,
|
| 125 |
+
"loss": 0.6579,
|
| 126 |
+
"rewards/accuracies": 0.550000011920929,
|
| 127 |
+
"rewards/chosen": -0.1883123815059662,
|
| 128 |
+
"rewards/margins": 0.09867187589406967,
|
| 129 |
+
"rewards/rejected": -0.28693297505378723,
|
| 130 |
+
"step": 200
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"epoch": 0.23919868440723577,
|
| 134 |
+
"eval_logits/chosen": -0.9216321706771851,
|
| 135 |
+
"eval_logits/rejected": -0.7277408838272095,
|
| 136 |
+
"eval_logps/chosen": -320.7849426269531,
|
| 137 |
+
"eval_logps/rejected": -293.8709716796875,
|
| 138 |
+
"eval_loss": 0.6465986371040344,
|
| 139 |
+
"eval_rewards/accuracies": 0.560387909412384,
|
| 140 |
+
"eval_rewards/chosen": -0.19119606912136078,
|
| 141 |
+
"eval_rewards/margins": 0.1261032223701477,
|
| 142 |
+
"eval_rewards/rejected": -0.31729716062545776,
|
| 143 |
+
"eval_runtime": 877.9315,
|
| 144 |
+
"eval_samples_per_second": 1.694,
|
| 145 |
+
"eval_steps_per_second": 0.212,
|
| 146 |
+
"step": 200
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"epoch": 0.2690985199581402,
|
| 150 |
+
"grad_norm": 87.0,
|
| 151 |
+
"learning_rate": 4.462151394422311e-06,
|
| 152 |
+
"logits/chosen": -0.8007558584213257,
|
| 153 |
+
"logits/rejected": -0.505867600440979,
|
| 154 |
+
"logps/chosen": -320.7512512207031,
|
| 155 |
+
"logps/rejected": -311.8299865722656,
|
| 156 |
+
"loss": 0.6444,
|
| 157 |
+
"rewards/accuracies": 0.5649999976158142,
|
| 158 |
+
"rewards/chosen": -0.2540551722049713,
|
| 159 |
+
"rewards/margins": 0.14147095382213593,
|
| 160 |
+
"rewards/rejected": -0.3954962193965912,
|
| 161 |
+
"step": 225
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 0.2989983555090447,
|
| 165 |
+
"grad_norm": 96.5,
|
| 166 |
+
"learning_rate": 4.960159362549802e-06,
|
| 167 |
+
"logits/chosen": -0.9090196490287781,
|
| 168 |
+
"logits/rejected": -0.6456773281097412,
|
| 169 |
+
"logps/chosen": -323.7200012207031,
|
| 170 |
+
"logps/rejected": -295.2149963378906,
|
| 171 |
+
"loss": 0.6255,
|
| 172 |
+
"rewards/accuracies": 0.6000000238418579,
|
| 173 |
+
"rewards/chosen": -0.2805468738079071,
|
| 174 |
+
"rewards/margins": 0.19930054247379303,
|
| 175 |
+
"rewards/rejected": -0.47991272807121277,
|
| 176 |
+
"step": 250
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"epoch": 0.32889819105994916,
|
| 180 |
+
"grad_norm": 91.0,
|
| 181 |
+
"learning_rate": 4.9490474080638015e-06,
|
| 182 |
+
"logits/chosen": -0.9534767270088196,
|
| 183 |
+
"logits/rejected": -0.6329247951507568,
|
| 184 |
+
"logps/chosen": -319.1549987792969,
|
| 185 |
+
"logps/rejected": -283.88751220703125,
|
| 186 |
+
"loss": 0.6192,
|
| 187 |
+
"rewards/accuracies": 0.5924999713897705,
|
| 188 |
+
"rewards/chosen": -0.29086607694625854,
|
| 189 |
+
"rewards/margins": 0.23339904844760895,
|
| 190 |
+
"rewards/rejected": -0.5240704417228699,
|
| 191 |
+
"step": 275
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.35879802661085364,
|
| 195 |
+
"grad_norm": 70.5,
|
| 196 |
+
"learning_rate": 4.8936641559592385e-06,
|
| 197 |
+
"logits/chosen": -0.9436456561088562,
|
| 198 |
+
"logits/rejected": -0.7789434790611267,
|
| 199 |
+
"logps/chosen": -349.5050048828125,
|
| 200 |
+
"logps/rejected": -310.48748779296875,
|
| 201 |
+
"loss": 0.627,
|
| 202 |
+
"rewards/accuracies": 0.6349999904632568,
|
| 203 |
+
"rewards/chosen": -0.30020782351493835,
|
| 204 |
+
"rewards/margins": 0.23243407905101776,
|
| 205 |
+
"rewards/rejected": -0.532727062702179,
|
| 206 |
+
"step": 300
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.3886978621617581,
|
| 210 |
+
"grad_norm": 101.0,
|
| 211 |
+
"learning_rate": 4.838280903854675e-06,
|
| 212 |
+
"logits/chosen": -0.9607565402984619,
|
| 213 |
+
"logits/rejected": -0.7166936993598938,
|
| 214 |
+
"logps/chosen": -317.0874938964844,
|
| 215 |
+
"logps/rejected": -289.0824890136719,
|
| 216 |
+
"loss": 0.5906,
|
| 217 |
+
"rewards/accuracies": 0.6524999737739563,
|
| 218 |
+
"rewards/chosen": -0.4176098704338074,
|
| 219 |
+
"rewards/margins": 0.3300067186355591,
|
| 220 |
+
"rewards/rejected": -0.7473974823951721,
|
| 221 |
+
"step": 325
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"epoch": 0.4185976977126626,
|
| 225 |
+
"grad_norm": 94.0,
|
| 226 |
+
"learning_rate": 4.782897651750112e-06,
|
| 227 |
+
"logits/chosen": -0.9818115234375,
|
| 228 |
+
"logits/rejected": -0.6833120584487915,
|
| 229 |
+
"logps/chosen": -321.1875,
|
| 230 |
+
"logps/rejected": -316.58624267578125,
|
| 231 |
+
"loss": 0.577,
|
| 232 |
+
"rewards/accuracies": 0.675000011920929,
|
| 233 |
+
"rewards/chosen": -0.4978076219558716,
|
| 234 |
+
"rewards/margins": 0.39054566621780396,
|
| 235 |
+
"rewards/rejected": -0.8884375095367432,
|
| 236 |
+
"step": 350
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"epoch": 0.44849753326356706,
|
| 240 |
+
"grad_norm": 83.5,
|
| 241 |
+
"learning_rate": 4.727514399645548e-06,
|
| 242 |
+
"logits/chosen": -1.0211011171340942,
|
| 243 |
+
"logits/rejected": -0.7218142747879028,
|
| 244 |
+
"logps/chosen": -307.9674987792969,
|
| 245 |
+
"logps/rejected": -288.7850036621094,
|
| 246 |
+
"loss": 0.5544,
|
| 247 |
+
"rewards/accuracies": 0.6974999904632568,
|
| 248 |
+
"rewards/chosen": -0.4097009301185608,
|
| 249 |
+
"rewards/margins": 0.4377111792564392,
|
| 250 |
+
"rewards/rejected": -0.8475390672683716,
|
| 251 |
+
"step": 375
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"epoch": 0.47839736881447154,
|
| 255 |
+
"grad_norm": 77.5,
|
| 256 |
+
"learning_rate": 4.672131147540984e-06,
|
| 257 |
+
"logits/chosen": -0.9680676460266113,
|
| 258 |
+
"logits/rejected": -0.7582107782363892,
|
| 259 |
+
"logps/chosen": -337.9375,
|
| 260 |
+
"logps/rejected": -313.7749938964844,
|
| 261 |
+
"loss": 0.5977,
|
| 262 |
+
"rewards/accuracies": 0.6549999713897705,
|
| 263 |
+
"rewards/chosen": -0.5489477515220642,
|
| 264 |
+
"rewards/margins": 0.35999757051467896,
|
| 265 |
+
"rewards/rejected": -0.9089636206626892,
|
| 266 |
+
"step": 400
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"epoch": 0.47839736881447154,
|
| 270 |
+
"eval_logits/chosen": -1.041106939315796,
|
| 271 |
+
"eval_logits/rejected": -0.8698605895042419,
|
| 272 |
+
"eval_logps/chosen": -323.7284851074219,
|
| 273 |
+
"eval_logps/rejected": -299.6156005859375,
|
| 274 |
+
"eval_loss": 0.5722406506538391,
|
| 275 |
+
"eval_rewards/accuracies": 0.6610022783279419,
|
| 276 |
+
"eval_rewards/chosen": -0.4932539761066437,
|
| 277 |
+
"eval_rewards/margins": 0.40423059463500977,
|
| 278 |
+
"eval_rewards/rejected": -0.8973480463027954,
|
| 279 |
+
"eval_runtime": 876.344,
|
| 280 |
+
"eval_samples_per_second": 1.697,
|
| 281 |
+
"eval_steps_per_second": 0.212,
|
| 282 |
+
"step": 400
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.508297204365376,
|
| 286 |
+
"grad_norm": 84.0,
|
| 287 |
+
"learning_rate": 4.61674789543642e-06,
|
| 288 |
+
"logits/chosen": -1.2390661239624023,
|
| 289 |
+
"logits/rejected": -0.9836773872375488,
|
| 290 |
+
"logps/chosen": -328.1875,
|
| 291 |
+
"logps/rejected": -317.32501220703125,
|
| 292 |
+
"loss": 0.5527,
|
| 293 |
+
"rewards/accuracies": 0.6675000190734863,
|
| 294 |
+
"rewards/chosen": -0.6254773139953613,
|
| 295 |
+
"rewards/margins": 0.5287072658538818,
|
| 296 |
+
"rewards/rejected": -1.153835415840149,
|
| 297 |
+
"step": 425
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.5381970399162804,
|
| 301 |
+
"grad_norm": 93.0,
|
| 302 |
+
"learning_rate": 4.561364643331857e-06,
|
| 303 |
+
"logits/chosen": -1.0737494230270386,
|
| 304 |
+
"logits/rejected": -0.8683199882507324,
|
| 305 |
+
"logps/chosen": -316.00250244140625,
|
| 306 |
+
"logps/rejected": -295.9649963378906,
|
| 307 |
+
"loss": 0.5736,
|
| 308 |
+
"rewards/accuracies": 0.6600000262260437,
|
| 309 |
+
"rewards/chosen": -0.539447009563446,
|
| 310 |
+
"rewards/margins": 0.46495360136032104,
|
| 311 |
+
"rewards/rejected": -1.0048657655715942,
|
| 312 |
+
"step": 450
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"epoch": 0.5680968754671849,
|
| 316 |
+
"grad_norm": 73.5,
|
| 317 |
+
"learning_rate": 4.505981391227293e-06,
|
| 318 |
+
"logits/chosen": -1.052968144416809,
|
| 319 |
+
"logits/rejected": -0.7523078322410583,
|
| 320 |
+
"logps/chosen": -318.50250244140625,
|
| 321 |
+
"logps/rejected": -313.8175048828125,
|
| 322 |
+
"loss": 0.5422,
|
| 323 |
+
"rewards/accuracies": 0.7149999737739563,
|
| 324 |
+
"rewards/chosen": -0.5196704268455505,
|
| 325 |
+
"rewards/margins": 0.5570727586746216,
|
| 326 |
+
"rewards/rejected": -1.0764819383621216,
|
| 327 |
+
"step": 475
|
| 328 |
+
},
|
| 329 |
+
{
|
| 330 |
+
"epoch": 0.5979967110180894,
|
| 331 |
+
"grad_norm": 70.0,
|
| 332 |
+
"learning_rate": 4.4505981391227295e-06,
|
| 333 |
+
"logits/chosen": -1.1461485624313354,
|
| 334 |
+
"logits/rejected": -0.9354357719421387,
|
| 335 |
+
"logps/chosen": -324.4750061035156,
|
| 336 |
+
"logps/rejected": -294.0775146484375,
|
| 337 |
+
"loss": 0.5415,
|
| 338 |
+
"rewards/accuracies": 0.7074999809265137,
|
| 339 |
+
"rewards/chosen": -0.518980085849762,
|
| 340 |
+
"rewards/margins": 0.5734081864356995,
|
| 341 |
+
"rewards/rejected": -1.092441439628601,
|
| 342 |
+
"step": 500
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"epoch": 0.6278965465689939,
|
| 346 |
+
"grad_norm": 84.0,
|
| 347 |
+
"learning_rate": 4.395214887018166e-06,
|
| 348 |
+
"logits/chosen": -1.091801404953003,
|
| 349 |
+
"logits/rejected": -0.8006445169448853,
|
| 350 |
+
"logps/chosen": -323.1724853515625,
|
| 351 |
+
"logps/rejected": -294.4674987792969,
|
| 352 |
+
"loss": 0.5646,
|
| 353 |
+
"rewards/accuracies": 0.6700000166893005,
|
| 354 |
+
"rewards/chosen": -0.672253429889679,
|
| 355 |
+
"rewards/margins": 0.5069983005523682,
|
| 356 |
+
"rewards/rejected": -1.1792798042297363,
|
| 357 |
+
"step": 525
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"epoch": 0.6577963821198983,
|
| 361 |
+
"grad_norm": 95.0,
|
| 362 |
+
"learning_rate": 4.339831634913603e-06,
|
| 363 |
+
"logits/chosen": -1.220596194267273,
|
| 364 |
+
"logits/rejected": -0.9236291646957397,
|
| 365 |
+
"logps/chosen": -316.7950134277344,
|
| 366 |
+
"logps/rejected": -302.0824890136719,
|
| 367 |
+
"loss": 0.5178,
|
| 368 |
+
"rewards/accuracies": 0.737500011920929,
|
| 369 |
+
"rewards/chosen": -0.7468109130859375,
|
| 370 |
+
"rewards/margins": 0.6105853319168091,
|
| 371 |
+
"rewards/rejected": -1.3566796779632568,
|
| 372 |
+
"step": 550
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"epoch": 0.6876962176708028,
|
| 376 |
+
"grad_norm": 100.0,
|
| 377 |
+
"learning_rate": 4.284448382809039e-06,
|
| 378 |
+
"logits/chosen": -1.0421770811080933,
|
| 379 |
+
"logits/rejected": -0.7285050749778748,
|
| 380 |
+
"logps/chosen": -308.42498779296875,
|
| 381 |
+
"logps/rejected": -269.7037353515625,
|
| 382 |
+
"loss": 0.5448,
|
| 383 |
+
"rewards/accuracies": 0.6850000023841858,
|
| 384 |
+
"rewards/chosen": -0.7317401170730591,
|
| 385 |
+
"rewards/margins": 0.5794018507003784,
|
| 386 |
+
"rewards/rejected": -1.3115381002426147,
|
| 387 |
+
"step": 575
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"epoch": 0.7175960532217073,
|
| 391 |
+
"grad_norm": 97.5,
|
| 392 |
+
"learning_rate": 4.229065130704476e-06,
|
| 393 |
+
"logits/chosen": -1.1298235654830933,
|
| 394 |
+
"logits/rejected": -0.7811802625656128,
|
| 395 |
+
"logps/chosen": -322.0574951171875,
|
| 396 |
+
"logps/rejected": -309.9750061035156,
|
| 397 |
+
"loss": 0.5292,
|
| 398 |
+
"rewards/accuracies": 0.7124999761581421,
|
| 399 |
+
"rewards/chosen": -0.590954601764679,
|
| 400 |
+
"rewards/margins": 0.6085253953933716,
|
| 401 |
+
"rewards/rejected": -1.1989331245422363,
|
| 402 |
+
"step": 600
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.7175960532217073,
|
| 406 |
+
"eval_logits/chosen": -1.078187346458435,
|
| 407 |
+
"eval_logits/rejected": -0.9206746220588684,
|
| 408 |
+
"eval_logps/chosen": -324.5967712402344,
|
| 409 |
+
"eval_logps/rejected": -301.7204284667969,
|
| 410 |
+
"eval_loss": 0.5492891669273376,
|
| 411 |
+
"eval_rewards/accuracies": 0.6757872104644775,
|
| 412 |
+
"eval_rewards/chosen": -0.5633505582809448,
|
| 413 |
+
"eval_rewards/margins": 0.5408346652984619,
|
| 414 |
+
"eval_rewards/rejected": -1.1038333177566528,
|
| 415 |
+
"eval_runtime": 876.4047,
|
| 416 |
+
"eval_samples_per_second": 1.697,
|
| 417 |
+
"eval_steps_per_second": 0.212,
|
| 418 |
+
"step": 600
|
| 419 |
+
},
|
| 420 |
+
{
|
| 421 |
+
"epoch": 0.7474958887726117,
|
| 422 |
+
"grad_norm": 87.5,
|
| 423 |
+
"learning_rate": 4.173681878599912e-06,
|
| 424 |
+
"logits/chosen": -1.1809699535369873,
|
| 425 |
+
"logits/rejected": -0.8887664675712585,
|
| 426 |
+
"logps/chosen": -303.6575012207031,
|
| 427 |
+
"logps/rejected": -294.7774963378906,
|
| 428 |
+
"loss": 0.5261,
|
| 429 |
+
"rewards/accuracies": 0.7275000214576721,
|
| 430 |
+
"rewards/chosen": -0.5871319770812988,
|
| 431 |
+
"rewards/margins": 0.6293676495552063,
|
| 432 |
+
"rewards/rejected": -1.2162939310073853,
|
| 433 |
+
"step": 625
|
| 434 |
+
},
|
| 435 |
+
{
|
| 436 |
+
"epoch": 0.7773957243235162,
|
| 437 |
+
"grad_norm": 99.5,
|
| 438 |
+
"learning_rate": 4.118298626495348e-06,
|
| 439 |
+
"logits/chosen": -1.1009465456008911,
|
| 440 |
+
"logits/rejected": -0.9342904686927795,
|
| 441 |
+
"logps/chosen": -338.12750244140625,
|
| 442 |
+
"logps/rejected": -318.96624755859375,
|
| 443 |
+
"loss": 0.5603,
|
| 444 |
+
"rewards/accuracies": 0.6850000023841858,
|
| 445 |
+
"rewards/chosen": -0.714611828327179,
|
| 446 |
+
"rewards/margins": 0.6232568621635437,
|
| 447 |
+
"rewards/rejected": -1.3377538919448853,
|
| 448 |
+
"step": 650
|
| 449 |
+
},
|
| 450 |
+
{
|
| 451 |
+
"epoch": 0.8072955598744207,
|
| 452 |
+
"grad_norm": 72.5,
|
| 453 |
+
"learning_rate": 4.062915374390784e-06,
|
| 454 |
+
"logits/chosen": -1.2523653507232666,
|
| 455 |
+
"logits/rejected": -1.0046355724334717,
|
| 456 |
+
"logps/chosen": -310.9049987792969,
|
| 457 |
+
"logps/rejected": -297.67498779296875,
|
| 458 |
+
"loss": 0.5135,
|
| 459 |
+
"rewards/accuracies": 0.7099999785423279,
|
| 460 |
+
"rewards/chosen": -0.7437072992324829,
|
| 461 |
+
"rewards/margins": 0.6859521269798279,
|
| 462 |
+
"rewards/rejected": -1.4290771484375,
|
| 463 |
+
"step": 675
|
| 464 |
+
},
|
| 465 |
+
{
|
| 466 |
+
"epoch": 0.8371953954253252,
|
| 467 |
+
"grad_norm": 89.0,
|
| 468 |
+
"learning_rate": 4.007532122286221e-06,
|
| 469 |
+
"logits/chosen": -1.2401965856552124,
|
| 470 |
+
"logits/rejected": -0.8460285663604736,
|
| 471 |
+
"logps/chosen": -336.927490234375,
|
| 472 |
+
"logps/rejected": -318.7799987792969,
|
| 473 |
+
"loss": 0.5186,
|
| 474 |
+
"rewards/accuracies": 0.7250000238418579,
|
| 475 |
+
"rewards/chosen": -0.7741259932518005,
|
| 476 |
+
"rewards/margins": 0.7083032131195068,
|
| 477 |
+
"rewards/rejected": -1.4823095798492432,
|
| 478 |
+
"step": 700
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"epoch": 0.8670952309762296,
|
| 482 |
+
"grad_norm": 78.0,
|
| 483 |
+
"learning_rate": 3.9521488701816575e-06,
|
| 484 |
+
"logits/chosen": -1.1703033447265625,
|
| 485 |
+
"logits/rejected": -0.9548498392105103,
|
| 486 |
+
"logps/chosen": -287.87249755859375,
|
| 487 |
+
"logps/rejected": -300.864990234375,
|
| 488 |
+
"loss": 0.5476,
|
| 489 |
+
"rewards/accuracies": 0.6825000047683716,
|
| 490 |
+
"rewards/chosen": -0.8389843702316284,
|
| 491 |
+
"rewards/margins": 0.608197033405304,
|
| 492 |
+
"rewards/rejected": -1.447534203529358,
|
| 493 |
+
"step": 725
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.8969950665271341,
|
| 497 |
+
"grad_norm": 100.5,
|
| 498 |
+
"learning_rate": 3.896765618077094e-06,
|
| 499 |
+
"logits/chosen": -1.1477763652801514,
|
| 500 |
+
"logits/rejected": -0.9038227796554565,
|
| 501 |
+
"logps/chosen": -338.31500244140625,
|
| 502 |
+
"logps/rejected": -319.9649963378906,
|
| 503 |
+
"loss": 0.5148,
|
| 504 |
+
"rewards/accuracies": 0.7250000238418579,
|
| 505 |
+
"rewards/chosen": -0.8131677508354187,
|
| 506 |
+
"rewards/margins": 0.7464379668235779,
|
| 507 |
+
"rewards/rejected": -1.559140682220459,
|
| 508 |
+
"step": 750
|
| 509 |
+
},
|
| 510 |
+
{
|
| 511 |
+
"epoch": 0.9268949020780386,
|
| 512 |
+
"grad_norm": 92.0,
|
| 513 |
+
"learning_rate": 3.84138236597253e-06,
|
| 514 |
+
"logits/chosen": -1.2342950105667114,
|
| 515 |
+
"logits/rejected": -0.946718156337738,
|
| 516 |
+
"logps/chosen": -331.1512451171875,
|
| 517 |
+
"logps/rejected": -304.0249938964844,
|
| 518 |
+
"loss": 0.528,
|
| 519 |
+
"rewards/accuracies": 0.7149999737739563,
|
| 520 |
+
"rewards/chosen": -0.9154602289199829,
|
| 521 |
+
"rewards/margins": 0.6957080364227295,
|
| 522 |
+
"rewards/rejected": -1.6108520030975342,
|
| 523 |
+
"step": 775
|
| 524 |
+
},
|
| 525 |
+
{
|
| 526 |
+
"epoch": 0.9567947376289431,
|
| 527 |
+
"grad_norm": 102.0,
|
| 528 |
+
"learning_rate": 3.7859991138679664e-06,
|
| 529 |
+
"logits/chosen": -1.0906939506530762,
|
| 530 |
+
"logits/rejected": -0.9649511575698853,
|
| 531 |
+
"logps/chosen": -338.5637512207031,
|
| 532 |
+
"logps/rejected": -338.4674987792969,
|
| 533 |
+
"loss": 0.5151,
|
| 534 |
+
"rewards/accuracies": 0.7200000286102295,
|
| 535 |
+
"rewards/chosen": -0.859545886516571,
|
| 536 |
+
"rewards/margins": 0.7704944014549255,
|
| 537 |
+
"rewards/rejected": -1.630163550376892,
|
| 538 |
+
"step": 800
|
| 539 |
+
},
|
| 540 |
+
{
|
| 541 |
+
"epoch": 0.9567947376289431,
|
| 542 |
+
"eval_logits/chosen": -1.1360965967178345,
|
| 543 |
+
"eval_logits/rejected": -0.9822049736976624,
|
| 544 |
+
"eval_logps/chosen": -326.69891357421875,
|
| 545 |
+
"eval_logps/rejected": -305.0,
|
| 546 |
+
"eval_loss": 0.5390191674232483,
|
| 547 |
+
"eval_rewards/accuracies": 0.687980055809021,
|
| 548 |
+
"eval_rewards/chosen": -0.7810032367706299,
|
| 549 |
+
"eval_rewards/margins": 0.6442182064056396,
|
| 550 |
+
"eval_rewards/rejected": -1.4252588748931885,
|
| 551 |
+
"eval_runtime": 876.4063,
|
| 552 |
+
"eval_samples_per_second": 1.697,
|
| 553 |
+
"eval_steps_per_second": 0.212,
|
| 554 |
+
"step": 800
|
| 555 |
+
},
|
| 556 |
+
{
|
| 557 |
+
"epoch": 0.9866945731798475,
|
| 558 |
+
"grad_norm": 84.5,
|
| 559 |
+
"learning_rate": 3.730615861763403e-06,
|
| 560 |
+
"logits/chosen": -1.2244549989700317,
|
| 561 |
+
"logits/rejected": NaN,
|
| 562 |
+
"logps/chosen": -334.5425109863281,
|
| 563 |
+
"logps/rejected": -339.23748779296875,
|
| 564 |
+
"loss": 0.5275,
|
| 565 |
+
"rewards/accuracies": 0.7149999737739563,
|
| 566 |
+
"rewards/chosen": -0.8379321098327637,
|
| 567 |
+
"rewards/margins": 0.715624988079071,
|
| 568 |
+
"rewards/rejected": -1.554010033607483,
|
| 569 |
+
"step": 825
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"epoch": 1.0155479144864703,
|
| 573 |
+
"grad_norm": 57.25,
|
| 574 |
+
"learning_rate": 3.675232609658839e-06,
|
| 575 |
+
"logits/chosen": -1.2397924661636353,
|
| 576 |
+
"logits/rejected": -1.030158281326294,
|
| 577 |
+
"logps/chosen": -320.9093322753906,
|
| 578 |
+
"logps/rejected": -305.8393859863281,
|
| 579 |
+
"loss": 0.4669,
|
| 580 |
+
"rewards/accuracies": 0.7487046718597412,
|
| 581 |
+
"rewards/chosen": -0.7694060206413269,
|
| 582 |
+
"rewards/margins": 0.8478080630302429,
|
| 583 |
+
"rewards/rejected": -1.6172634363174438,
|
| 584 |
+
"step": 850
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 1.045447750037375,
|
| 588 |
+
"grad_norm": 67.5,
|
| 589 |
+
"learning_rate": 3.6198493575542758e-06,
|
| 590 |
+
"logits/chosen": -1.2220094203948975,
|
| 591 |
+
"logits/rejected": -0.9582018852233887,
|
| 592 |
+
"logps/chosen": -318.0262451171875,
|
| 593 |
+
"logps/rejected": -297.5799865722656,
|
| 594 |
+
"loss": 0.4691,
|
| 595 |
+
"rewards/accuracies": 0.7724999785423279,
|
| 596 |
+
"rewards/chosen": -0.7301892042160034,
|
| 597 |
+
"rewards/margins": 0.9199609160423279,
|
| 598 |
+
"rewards/rejected": -1.6502331495285034,
|
| 599 |
+
"step": 875
|
| 600 |
+
},
|
| 601 |
+
{
|
| 602 |
+
"epoch": 1.0753475855882793,
|
| 603 |
+
"grad_norm": 73.5,
|
| 604 |
+
"learning_rate": 3.564466105449712e-06,
|
| 605 |
+
"logits/chosen": -1.089396357536316,
|
| 606 |
+
"logits/rejected": -0.8958370685577393,
|
| 607 |
+
"logps/chosen": -317.61749267578125,
|
| 608 |
+
"logps/rejected": -295.4825134277344,
|
| 609 |
+
"loss": 0.4746,
|
| 610 |
+
"rewards/accuracies": 0.7574999928474426,
|
| 611 |
+
"rewards/chosen": -0.8305737376213074,
|
| 612 |
+
"rewards/margins": 0.8526538014411926,
|
| 613 |
+
"rewards/rejected": -1.6829102039337158,
|
| 614 |
+
"step": 900
|
| 615 |
+
},
|
| 616 |
+
{
|
| 617 |
+
"epoch": 1.1052474211391838,
|
| 618 |
+
"grad_norm": 64.5,
|
| 619 |
+
"learning_rate": 3.509082853345149e-06,
|
| 620 |
+
"logits/chosen": -1.1403405666351318,
|
| 621 |
+
"logits/rejected": -0.8662219047546387,
|
| 622 |
+
"logps/chosen": -322.0574951171875,
|
| 623 |
+
"logps/rejected": -323.2074890136719,
|
| 624 |
+
"loss": 0.4641,
|
| 625 |
+
"rewards/accuracies": 0.7649999856948853,
|
| 626 |
+
"rewards/chosen": -0.6764746308326721,
|
| 627 |
+
"rewards/margins": 0.8836804032325745,
|
| 628 |
+
"rewards/rejected": -1.5600537061691284,
|
| 629 |
+
"step": 925
|
| 630 |
+
},
|
| 631 |
+
{
|
| 632 |
+
"epoch": 1.1351472566900882,
|
| 633 |
+
"grad_norm": 66.0,
|
| 634 |
+
"learning_rate": 3.453699601240585e-06,
|
| 635 |
+
"logits/chosen": -1.2375200986862183,
|
| 636 |
+
"logits/rejected": -0.9549773931503296,
|
| 637 |
+
"logps/chosen": -321.0874938964844,
|
| 638 |
+
"logps/rejected": -306.6000061035156,
|
| 639 |
+
"loss": 0.4201,
|
| 640 |
+
"rewards/accuracies": 0.8224999904632568,
|
| 641 |
+
"rewards/chosen": -0.7068628072738647,
|
| 642 |
+
"rewards/margins": 1.0075805187225342,
|
| 643 |
+
"rewards/rejected": -1.7146776914596558,
|
| 644 |
+
"step": 950
|
| 645 |
+
},
|
| 646 |
+
{
|
| 647 |
+
"epoch": 1.1650470922409926,
|
| 648 |
+
"grad_norm": 64.0,
|
| 649 |
+
"learning_rate": 3.3983163491360217e-06,
|
| 650 |
+
"logits/chosen": -1.1668496131896973,
|
| 651 |
+
"logits/rejected": -0.8835460543632507,
|
| 652 |
+
"logps/chosen": -320.69000244140625,
|
| 653 |
+
"logps/rejected": -323.0425109863281,
|
| 654 |
+
"loss": 0.459,
|
| 655 |
+
"rewards/accuracies": 0.7825000286102295,
|
| 656 |
+
"rewards/chosen": -0.7173047065734863,
|
| 657 |
+
"rewards/margins": 0.9243432879447937,
|
| 658 |
+
"rewards/rejected": -1.6417040824890137,
|
| 659 |
+
"step": 975
|
| 660 |
+
},
|
| 661 |
+
{
|
| 662 |
+
"epoch": 1.1949469277918972,
|
| 663 |
+
"grad_norm": 62.75,
|
| 664 |
+
"learning_rate": 3.342933097031458e-06,
|
| 665 |
+
"logits/chosen": -1.2166632413864136,
|
| 666 |
+
"logits/rejected": -0.9624554514884949,
|
| 667 |
+
"logps/chosen": -301.0849914550781,
|
| 668 |
+
"logps/rejected": -304.3475036621094,
|
| 669 |
+
"loss": 0.4656,
|
| 670 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 671 |
+
"rewards/chosen": -0.7919347882270813,
|
| 672 |
+
"rewards/margins": 0.9388867020606995,
|
| 673 |
+
"rewards/rejected": -1.73046875,
|
| 674 |
+
"step": 1000
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
"epoch": 1.1949469277918972,
|
| 678 |
+
"eval_logits/chosen": -1.160080075263977,
|
| 679 |
+
"eval_logits/rejected": -1.0079379081726074,
|
| 680 |
+
"eval_logps/chosen": -326.43280029296875,
|
| 681 |
+
"eval_logps/rejected": -305.1102294921875,
|
| 682 |
+
"eval_loss": 0.527574896812439,
|
| 683 |
+
"eval_rewards/accuracies": 0.6892281174659729,
|
| 684 |
+
"eval_rewards/chosen": -0.7565616369247437,
|
| 685 |
+
"eval_rewards/margins": 0.6851438879966736,
|
| 686 |
+
"eval_rewards/rejected": -1.4416320323944092,
|
| 687 |
+
"eval_runtime": 876.3772,
|
| 688 |
+
"eval_samples_per_second": 1.697,
|
| 689 |
+
"eval_steps_per_second": 0.212,
|
| 690 |
+
"step": 1000
|
| 691 |
+
},
|
| 692 |
+
{
|
| 693 |
+
"epoch": 1.2248467633428017,
|
| 694 |
+
"grad_norm": 84.0,
|
| 695 |
+
"learning_rate": 3.2875498449268944e-06,
|
| 696 |
+
"logits/chosen": -1.1776912212371826,
|
| 697 |
+
"logits/rejected": -1.050445556640625,
|
| 698 |
+
"logps/chosen": -343.0050048828125,
|
| 699 |
+
"logps/rejected": -331.1875,
|
| 700 |
+
"loss": 0.4213,
|
| 701 |
+
"rewards/accuracies": 0.8050000071525574,
|
| 702 |
+
"rewards/chosen": -0.6588146686553955,
|
| 703 |
+
"rewards/margins": 1.0112402439117432,
|
| 704 |
+
"rewards/rejected": -1.670253872871399,
|
| 705 |
+
"step": 1025
|
| 706 |
+
},
|
| 707 |
+
{
|
| 708 |
+
"epoch": 1.254746598893706,
|
| 709 |
+
"grad_norm": 66.0,
|
| 710 |
+
"learning_rate": 3.2321665928223306e-06,
|
| 711 |
+
"logits/chosen": -1.2721245288848877,
|
| 712 |
+
"logits/rejected": -0.9186769127845764,
|
| 713 |
+
"logps/chosen": -316.4549865722656,
|
| 714 |
+
"logps/rejected": -315.2925109863281,
|
| 715 |
+
"loss": 0.4838,
|
| 716 |
+
"rewards/accuracies": 0.7825000286102295,
|
| 717 |
+
"rewards/chosen": -0.8342553973197937,
|
| 718 |
+
"rewards/margins": 0.83197021484375,
|
| 719 |
+
"rewards/rejected": -1.665708065032959,
|
| 720 |
+
"step": 1050
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"epoch": 1.2846464344446105,
|
| 724 |
+
"grad_norm": 62.75,
|
| 725 |
+
"learning_rate": 3.176783340717767e-06,
|
| 726 |
+
"logits/chosen": -1.1176886558532715,
|
| 727 |
+
"logits/rejected": -0.9960334300994873,
|
| 728 |
+
"logps/chosen": -328.32501220703125,
|
| 729 |
+
"logps/rejected": -328.3450012207031,
|
| 730 |
+
"loss": 0.4538,
|
| 731 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 732 |
+
"rewards/chosen": -0.7273278832435608,
|
| 733 |
+
"rewards/margins": 0.9573754668235779,
|
| 734 |
+
"rewards/rejected": -1.684999942779541,
|
| 735 |
+
"step": 1075
|
| 736 |
+
},
|
| 737 |
+
{
|
| 738 |
+
"epoch": 1.314546269995515,
|
| 739 |
+
"grad_norm": 84.5,
|
| 740 |
+
"learning_rate": 3.1214000886132033e-06,
|
| 741 |
+
"logits/chosen": -1.1655590534210205,
|
| 742 |
+
"logits/rejected": -0.8922329545021057,
|
| 743 |
+
"logps/chosen": -314.9700012207031,
|
| 744 |
+
"logps/rejected": -301.5050048828125,
|
| 745 |
+
"loss": 0.4483,
|
| 746 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 747 |
+
"rewards/chosen": -0.6278771758079529,
|
| 748 |
+
"rewards/margins": 0.9427502155303955,
|
| 749 |
+
"rewards/rejected": -1.5707299709320068,
|
| 750 |
+
"step": 1100
|
| 751 |
+
},
|
| 752 |
+
{
|
| 753 |
+
"epoch": 1.3444461055464194,
|
| 754 |
+
"grad_norm": 69.5,
|
| 755 |
+
"learning_rate": 3.06601683650864e-06,
|
| 756 |
+
"logits/chosen": -1.2217812538146973,
|
| 757 |
+
"logits/rejected": -0.976731538772583,
|
| 758 |
+
"logps/chosen": -324.7850036621094,
|
| 759 |
+
"logps/rejected": -316.4599914550781,
|
| 760 |
+
"loss": 0.4368,
|
| 761 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 762 |
+
"rewards/chosen": -0.7704944014549255,
|
| 763 |
+
"rewards/margins": 0.9598730206489563,
|
| 764 |
+
"rewards/rejected": -1.7300487756729126,
|
| 765 |
+
"step": 1125
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"epoch": 1.374345941097324,
|
| 769 |
+
"grad_norm": 81.0,
|
| 770 |
+
"learning_rate": 3.010633584404076e-06,
|
| 771 |
+
"logits/chosen": -1.203802466392517,
|
| 772 |
+
"logits/rejected": -0.9061872959136963,
|
| 773 |
+
"logps/chosen": -330.4175109863281,
|
| 774 |
+
"logps/rejected": -312.9987487792969,
|
| 775 |
+
"loss": 0.4787,
|
| 776 |
+
"rewards/accuracies": 0.75,
|
| 777 |
+
"rewards/chosen": -0.7830480933189392,
|
| 778 |
+
"rewards/margins": 0.9129126071929932,
|
| 779 |
+
"rewards/rejected": -1.6956127882003784,
|
| 780 |
+
"step": 1150
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 1.4042457766482284,
|
| 784 |
+
"grad_norm": 118.0,
|
| 785 |
+
"learning_rate": 2.955250332299513e-06,
|
| 786 |
+
"logits/chosen": -1.1928298473358154,
|
| 787 |
+
"logits/rejected": -0.8999917507171631,
|
| 788 |
+
"logps/chosen": -320.2650146484375,
|
| 789 |
+
"logps/rejected": -301.5299987792969,
|
| 790 |
+
"loss": 0.4698,
|
| 791 |
+
"rewards/accuracies": 0.7549999952316284,
|
| 792 |
+
"rewards/chosen": -0.8731860518455505,
|
| 793 |
+
"rewards/margins": 0.9074377417564392,
|
| 794 |
+
"rewards/rejected": -1.7800854444503784,
|
| 795 |
+
"step": 1175
|
| 796 |
+
},
|
| 797 |
+
{
|
| 798 |
+
"epoch": 1.434145612199133,
|
| 799 |
+
"grad_norm": 65.0,
|
| 800 |
+
"learning_rate": 2.8998670801949493e-06,
|
| 801 |
+
"logits/chosen": -1.1984894275665283,
|
| 802 |
+
"logits/rejected": -0.9353277683258057,
|
| 803 |
+
"logps/chosen": -317.625,
|
| 804 |
+
"logps/rejected": -325.4075012207031,
|
| 805 |
+
"loss": 0.4502,
|
| 806 |
+
"rewards/accuracies": 0.7674999833106995,
|
| 807 |
+
"rewards/chosen": -0.9375879168510437,
|
| 808 |
+
"rewards/margins": 0.9699438214302063,
|
| 809 |
+
"rewards/rejected": -1.9072656631469727,
|
| 810 |
+
"step": 1200
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"epoch": 1.434145612199133,
|
| 814 |
+
"eval_logits/chosen": -1.156473159790039,
|
| 815 |
+
"eval_logits/rejected": -1.006028413772583,
|
| 816 |
+
"eval_logps/chosen": -327.82794189453125,
|
| 817 |
+
"eval_logps/rejected": -306.8521423339844,
|
| 818 |
+
"eval_loss": 0.5231196284294128,
|
| 819 |
+
"eval_rewards/accuracies": 0.6926843523979187,
|
| 820 |
+
"eval_rewards/chosen": -0.8996713161468506,
|
| 821 |
+
"eval_rewards/margins": 0.7130159735679626,
|
| 822 |
+
"eval_rewards/rejected": -1.6129347085952759,
|
| 823 |
+
"eval_runtime": 876.3506,
|
| 824 |
+
"eval_samples_per_second": 1.697,
|
| 825 |
+
"eval_steps_per_second": 0.212,
|
| 826 |
+
"step": 1200
|
| 827 |
+
},
|
| 828 |
+
{
|
| 829 |
+
"epoch": 1.4640454477500373,
|
| 830 |
+
"grad_norm": 99.5,
|
| 831 |
+
"learning_rate": 2.844483828090386e-06,
|
| 832 |
+
"logits/chosen": -1.339633822441101,
|
| 833 |
+
"logits/rejected": -1.035129427909851,
|
| 834 |
+
"logps/chosen": -332.54998779296875,
|
| 835 |
+
"logps/rejected": -319.13751220703125,
|
| 836 |
+
"loss": 0.4421,
|
| 837 |
+
"rewards/accuracies": 0.7799999713897705,
|
| 838 |
+
"rewards/chosen": -0.8549670577049255,
|
| 839 |
+
"rewards/margins": 1.0162646770477295,
|
| 840 |
+
"rewards/rejected": -1.8712304830551147,
|
| 841 |
+
"step": 1225
|
| 842 |
+
},
|
| 843 |
+
{
|
| 844 |
+
"epoch": 1.493945283300942,
|
| 845 |
+
"grad_norm": 83.5,
|
| 846 |
+
"learning_rate": 2.789100575985822e-06,
|
| 847 |
+
"logits/chosen": -1.1476205587387085,
|
| 848 |
+
"logits/rejected": -0.9250108599662781,
|
| 849 |
+
"logps/chosen": -322.0050048828125,
|
| 850 |
+
"logps/rejected": -309.3500061035156,
|
| 851 |
+
"loss": 0.4555,
|
| 852 |
+
"rewards/accuracies": 0.7549999952316284,
|
| 853 |
+
"rewards/chosen": -0.8130224347114563,
|
| 854 |
+
"rewards/margins": 0.9434008598327637,
|
| 855 |
+
"rewards/rejected": -1.7563867568969727,
|
| 856 |
+
"step": 1250
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
"epoch": 1.5238451188518463,
|
| 860 |
+
"grad_norm": 63.75,
|
| 861 |
+
"learning_rate": 2.7337173238812586e-06,
|
| 862 |
+
"logits/chosen": -1.2015457153320312,
|
| 863 |
+
"logits/rejected": -0.8530246019363403,
|
| 864 |
+
"logps/chosen": -309.01251220703125,
|
| 865 |
+
"logps/rejected": -297.7825012207031,
|
| 866 |
+
"loss": 0.4501,
|
| 867 |
+
"rewards/accuracies": 0.7875000238418579,
|
| 868 |
+
"rewards/chosen": -0.836810290813446,
|
| 869 |
+
"rewards/margins": 0.9292749166488647,
|
| 870 |
+
"rewards/rejected": -1.7654907703399658,
|
| 871 |
+
"step": 1275
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 1.5537449544027506,
|
| 875 |
+
"grad_norm": 67.0,
|
| 876 |
+
"learning_rate": 2.6783340717766948e-06,
|
| 877 |
+
"logits/chosen": -1.2457306385040283,
|
| 878 |
+
"logits/rejected": -1.0591107606887817,
|
| 879 |
+
"logps/chosen": -337.9775085449219,
|
| 880 |
+
"logps/rejected": -308.5375061035156,
|
| 881 |
+
"loss": 0.4248,
|
| 882 |
+
"rewards/accuracies": 0.800000011920929,
|
| 883 |
+
"rewards/chosen": -0.7735278606414795,
|
| 884 |
+
"rewards/margins": 1.035646915435791,
|
| 885 |
+
"rewards/rejected": -1.8087304830551147,
|
| 886 |
+
"step": 1300
|
| 887 |
+
},
|
| 888 |
+
{
|
| 889 |
+
"epoch": 1.5836447899536552,
|
| 890 |
+
"grad_norm": 51.0,
|
| 891 |
+
"learning_rate": 2.6229508196721314e-06,
|
| 892 |
+
"logits/chosen": -1.216982126235962,
|
| 893 |
+
"logits/rejected": -0.8925817608833313,
|
| 894 |
+
"logps/chosen": -333.2349853515625,
|
| 895 |
+
"logps/rejected": -316.62249755859375,
|
| 896 |
+
"loss": 0.4568,
|
| 897 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 898 |
+
"rewards/chosen": -0.8274877667427063,
|
| 899 |
+
"rewards/margins": 0.9530566334724426,
|
| 900 |
+
"rewards/rejected": -1.7805664539337158,
|
| 901 |
+
"step": 1325
|
| 902 |
+
},
|
| 903 |
+
{
|
| 904 |
+
"epoch": 1.6135446255045598,
|
| 905 |
+
"grad_norm": 82.0,
|
| 906 |
+
"learning_rate": 2.5675675675675675e-06,
|
| 907 |
+
"logits/chosen": -1.3132140636444092,
|
| 908 |
+
"logits/rejected": -1.004296898841858,
|
| 909 |
+
"logps/chosen": -342.4949951171875,
|
| 910 |
+
"logps/rejected": -317.69500732421875,
|
| 911 |
+
"loss": 0.429,
|
| 912 |
+
"rewards/accuracies": 0.8050000071525574,
|
| 913 |
+
"rewards/chosen": -0.9008423089981079,
|
| 914 |
+
"rewards/margins": 1.0281542539596558,
|
| 915 |
+
"rewards/rejected": -1.9285448789596558,
|
| 916 |
+
"step": 1350
|
| 917 |
+
},
|
| 918 |
+
{
|
| 919 |
+
"epoch": 1.6434444610554642,
|
| 920 |
+
"grad_norm": 116.5,
|
| 921 |
+
"learning_rate": 2.5121843154630045e-06,
|
| 922 |
+
"logits/chosen": -1.1408294439315796,
|
| 923 |
+
"logits/rejected": -0.9321377277374268,
|
| 924 |
+
"logps/chosen": -335.291259765625,
|
| 925 |
+
"logps/rejected": -321.29376220703125,
|
| 926 |
+
"loss": 0.453,
|
| 927 |
+
"rewards/accuracies": 0.7749999761581421,
|
| 928 |
+
"rewards/chosen": -0.8236993551254272,
|
| 929 |
+
"rewards/margins": 0.9510498046875,
|
| 930 |
+
"rewards/rejected": -1.77447509765625,
|
| 931 |
+
"step": 1375
|
| 932 |
+
},
|
| 933 |
+
{
|
| 934 |
+
"epoch": 1.6733442966063685,
|
| 935 |
+
"grad_norm": 91.0,
|
| 936 |
+
"learning_rate": 2.4568010633584403e-06,
|
| 937 |
+
"logits/chosen": -1.1858936548233032,
|
| 938 |
+
"logits/rejected": -0.9579010009765625,
|
| 939 |
+
"logps/chosen": -320.9949951171875,
|
| 940 |
+
"logps/rejected": -296.3374938964844,
|
| 941 |
+
"loss": 0.4699,
|
| 942 |
+
"rewards/accuracies": 0.7425000071525574,
|
| 943 |
+
"rewards/chosen": -0.8678625226020813,
|
| 944 |
+
"rewards/margins": 0.9215136766433716,
|
| 945 |
+
"rewards/rejected": -1.7896509170532227,
|
| 946 |
+
"step": 1400
|
| 947 |
+
},
|
| 948 |
+
{
|
| 949 |
+
"epoch": 1.6733442966063685,
|
| 950 |
+
"eval_logits/chosen": -1.1674253940582275,
|
| 951 |
+
"eval_logits/rejected": -1.0171688795089722,
|
| 952 |
+
"eval_logps/chosen": -327.3978576660156,
|
| 953 |
+
"eval_logps/rejected": -306.6209716796875,
|
| 954 |
+
"eval_loss": 0.5191056728363037,
|
| 955 |
+
"eval_rewards/accuracies": 0.6933563947677612,
|
| 956 |
+
"eval_rewards/chosen": -0.8476693630218506,
|
| 957 |
+
"eval_rewards/margins": 0.7431673407554626,
|
| 958 |
+
"eval_rewards/rejected": -1.5906811952590942,
|
| 959 |
+
"eval_runtime": 876.3262,
|
| 960 |
+
"eval_samples_per_second": 1.697,
|
| 961 |
+
"eval_steps_per_second": 0.212,
|
| 962 |
+
"step": 1400
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 1.703244132157273,
|
| 966 |
+
"grad_norm": 82.0,
|
| 967 |
+
"learning_rate": 2.401417811253877e-06,
|
| 968 |
+
"logits/chosen": -1.1833282709121704,
|
| 969 |
+
"logits/rejected": -0.9263910055160522,
|
| 970 |
+
"logps/chosen": -324.5150146484375,
|
| 971 |
+
"logps/rejected": -316.1650085449219,
|
| 972 |
+
"loss": 0.451,
|
| 973 |
+
"rewards/accuracies": 0.7799999713897705,
|
| 974 |
+
"rewards/chosen": -0.8199084401130676,
|
| 975 |
+
"rewards/margins": 0.9980810284614563,
|
| 976 |
+
"rewards/rejected": -1.8175097703933716,
|
| 977 |
+
"step": 1425
|
| 978 |
+
},
|
| 979 |
+
{
|
| 980 |
+
"epoch": 1.7331439677081777,
|
| 981 |
+
"grad_norm": 99.0,
|
| 982 |
+
"learning_rate": 2.3460345591493135e-06,
|
| 983 |
+
"logits/chosen": -1.1936352252960205,
|
| 984 |
+
"logits/rejected": -1.0041576623916626,
|
| 985 |
+
"logps/chosen": -350.885009765625,
|
| 986 |
+
"logps/rejected": -327.0450134277344,
|
| 987 |
+
"loss": 0.4702,
|
| 988 |
+
"rewards/accuracies": 0.75,
|
| 989 |
+
"rewards/chosen": -0.9122155904769897,
|
| 990 |
+
"rewards/margins": 0.9335852265357971,
|
| 991 |
+
"rewards/rejected": -1.8462109565734863,
|
| 992 |
+
"step": 1450
|
| 993 |
+
},
|
| 994 |
+
{
|
| 995 |
+
"epoch": 1.763043803259082,
|
| 996 |
+
"grad_norm": 59.5,
|
| 997 |
+
"learning_rate": 2.2906513070447496e-06,
|
| 998 |
+
"logits/chosen": -1.3379946947097778,
|
| 999 |
+
"logits/rejected": -1.0853075981140137,
|
| 1000 |
+
"logps/chosen": -299.1099853515625,
|
| 1001 |
+
"logps/rejected": -299.9725036621094,
|
| 1002 |
+
"loss": 0.4607,
|
| 1003 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 1004 |
+
"rewards/chosen": -0.905989408493042,
|
| 1005 |
+
"rewards/margins": 1.0363476276397705,
|
| 1006 |
+
"rewards/rejected": -1.942041039466858,
|
| 1007 |
+
"step": 1475
|
| 1008 |
+
},
|
| 1009 |
+
{
|
| 1010 |
+
"epoch": 1.7929436388099864,
|
| 1011 |
+
"grad_norm": 102.0,
|
| 1012 |
+
"learning_rate": 2.235268054940186e-06,
|
| 1013 |
+
"logits/chosen": -1.1545830965042114,
|
| 1014 |
+
"logits/rejected": -0.8675525188446045,
|
| 1015 |
+
"logps/chosen": -321.79998779296875,
|
| 1016 |
+
"logps/rejected": -300.4262390136719,
|
| 1017 |
+
"loss": 0.4854,
|
| 1018 |
+
"rewards/accuracies": 0.7425000071525574,
|
| 1019 |
+
"rewards/chosen": -0.8690832257270813,
|
| 1020 |
+
"rewards/margins": 0.9056127667427063,
|
| 1021 |
+
"rewards/rejected": -1.7749096155166626,
|
| 1022 |
+
"step": 1500
|
| 1023 |
+
},
|
| 1024 |
+
{
|
| 1025 |
+
"epoch": 1.822843474360891,
|
| 1026 |
+
"grad_norm": 60.0,
|
| 1027 |
+
"learning_rate": 2.179884802835623e-06,
|
| 1028 |
+
"logits/chosen": -1.2606717348098755,
|
| 1029 |
+
"logits/rejected": -1.0567920207977295,
|
| 1030 |
+
"logps/chosen": -328.82501220703125,
|
| 1031 |
+
"logps/rejected": -304.1050109863281,
|
| 1032 |
+
"loss": 0.4552,
|
| 1033 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 1034 |
+
"rewards/chosen": -0.743670642375946,
|
| 1035 |
+
"rewards/margins": 1.0134960412979126,
|
| 1036 |
+
"rewards/rejected": -1.7573193311691284,
|
| 1037 |
+
"step": 1525
|
| 1038 |
+
},
|
| 1039 |
+
{
|
| 1040 |
+
"epoch": 1.8527433099117956,
|
| 1041 |
+
"grad_norm": 59.5,
|
| 1042 |
+
"learning_rate": 2.124501550731059e-06,
|
| 1043 |
+
"logits/chosen": -1.2121707201004028,
|
| 1044 |
+
"logits/rejected": -1.002629041671753,
|
| 1045 |
+
"logps/chosen": -323.5950012207031,
|
| 1046 |
+
"logps/rejected": -317.5299987792969,
|
| 1047 |
+
"loss": 0.4645,
|
| 1048 |
+
"rewards/accuracies": 0.7674999833106995,
|
| 1049 |
+
"rewards/chosen": -0.9758337140083313,
|
| 1050 |
+
"rewards/margins": 0.9835278391838074,
|
| 1051 |
+
"rewards/rejected": -1.959287166595459,
|
| 1052 |
+
"step": 1550
|
| 1053 |
+
},
|
| 1054 |
+
{
|
| 1055 |
+
"epoch": 1.8826431454627,
|
| 1056 |
+
"grad_norm": 71.0,
|
| 1057 |
+
"learning_rate": 2.0691182986264955e-06,
|
| 1058 |
+
"logits/chosen": -1.296298861503601,
|
| 1059 |
+
"logits/rejected": NaN,
|
| 1060 |
+
"logps/chosen": -325.7699890136719,
|
| 1061 |
+
"logps/rejected": -299.322509765625,
|
| 1062 |
+
"loss": 0.4515,
|
| 1063 |
+
"rewards/accuracies": 0.7599999904632568,
|
| 1064 |
+
"rewards/chosen": -0.8331592082977295,
|
| 1065 |
+
"rewards/margins": 0.9821679592132568,
|
| 1066 |
+
"rewards/rejected": -1.8158252239227295,
|
| 1067 |
+
"step": 1575
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 1.9125429810136043,
|
| 1071 |
+
"grad_norm": 70.0,
|
| 1072 |
+
"learning_rate": 2.0137350465219317e-06,
|
| 1073 |
+
"logits/chosen": -1.2260925769805908,
|
| 1074 |
+
"logits/rejected": -0.9426334500312805,
|
| 1075 |
+
"logps/chosen": -330.06500244140625,
|
| 1076 |
+
"logps/rejected": -309.68499755859375,
|
| 1077 |
+
"loss": 0.4436,
|
| 1078 |
+
"rewards/accuracies": 0.7649999856948853,
|
| 1079 |
+
"rewards/chosen": -0.830242931842804,
|
| 1080 |
+
"rewards/margins": 0.9743407964706421,
|
| 1081 |
+
"rewards/rejected": -1.804931640625,
|
| 1082 |
+
"step": 1600
|
| 1083 |
+
},
|
| 1084 |
+
{
|
| 1085 |
+
"epoch": 1.9125429810136043,
|
| 1086 |
+
"eval_logits/chosen": -1.1829742193222046,
|
| 1087 |
+
"eval_logits/rejected": -1.033914566040039,
|
| 1088 |
+
"eval_logps/chosen": -327.43011474609375,
|
| 1089 |
+
"eval_logps/rejected": -306.69085693359375,
|
| 1090 |
+
"eval_loss": 0.5206477046012878,
|
| 1091 |
+
"eval_rewards/accuracies": 0.6974846720695496,
|
| 1092 |
+
"eval_rewards/chosen": -0.8544062376022339,
|
| 1093 |
+
"eval_rewards/margins": 0.7440763115882874,
|
| 1094 |
+
"eval_rewards/rejected": -1.598265290260315,
|
| 1095 |
+
"eval_runtime": 876.3416,
|
| 1096 |
+
"eval_samples_per_second": 1.697,
|
| 1097 |
+
"eval_steps_per_second": 0.212,
|
| 1098 |
+
"step": 1600
|
| 1099 |
+
},
|
| 1100 |
+
{
|
| 1101 |
+
"epoch": 1.942442816564509,
|
| 1102 |
+
"grad_norm": 73.5,
|
| 1103 |
+
"learning_rate": 1.9583517944173683e-06,
|
| 1104 |
+
"logits/chosen": -1.246303677558899,
|
| 1105 |
+
"logits/rejected": -0.9357275366783142,
|
| 1106 |
+
"logps/chosen": -332.3599853515625,
|
| 1107 |
+
"logps/rejected": -309.1700134277344,
|
| 1108 |
+
"loss": 0.4702,
|
| 1109 |
+
"rewards/accuracies": 0.762499988079071,
|
| 1110 |
+
"rewards/chosen": -0.8381909132003784,
|
| 1111 |
+
"rewards/margins": 0.9997217059135437,
|
| 1112 |
+
"rewards/rejected": -1.837497591972351,
|
| 1113 |
+
"step": 1625
|
| 1114 |
+
},
|
| 1115 |
+
{
|
| 1116 |
+
"epoch": 1.9723426521154135,
|
| 1117 |
+
"grad_norm": 68.5,
|
| 1118 |
+
"learning_rate": 1.9029685423128047e-06,
|
| 1119 |
+
"logits/chosen": -1.2618129253387451,
|
| 1120 |
+
"logits/rejected": -1.0779250860214233,
|
| 1121 |
+
"logps/chosen": -339.9324951171875,
|
| 1122 |
+
"logps/rejected": -318.04998779296875,
|
| 1123 |
+
"loss": 0.4583,
|
| 1124 |
+
"rewards/accuracies": 0.762499988079071,
|
| 1125 |
+
"rewards/chosen": -0.8390514850616455,
|
| 1126 |
+
"rewards/margins": 1.0396826267242432,
|
| 1127 |
+
"rewards/rejected": -1.878564476966858,
|
| 1128 |
+
"step": 1650
|
| 1129 |
+
},
|
| 1130 |
+
{
|
| 1131 |
+
"epoch": 2.0011959934220362,
|
| 1132 |
+
"grad_norm": 97.0,
|
| 1133 |
+
"learning_rate": 1.847585290208241e-06,
|
| 1134 |
+
"logits/chosen": -1.2342288494110107,
|
| 1135 |
+
"logits/rejected": -0.9683116674423218,
|
| 1136 |
+
"logps/chosen": -332.2409362792969,
|
| 1137 |
+
"logps/rejected": -321.0531005859375,
|
| 1138 |
+
"loss": 0.424,
|
| 1139 |
+
"rewards/accuracies": 0.7642487287521362,
|
| 1140 |
+
"rewards/chosen": -0.7630558013916016,
|
| 1141 |
+
"rewards/margins": 1.0779491662979126,
|
| 1142 |
+
"rewards/rejected": -1.8409063816070557,
|
| 1143 |
+
"step": 1675
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"epoch": 2.0310958289729406,
|
| 1147 |
+
"grad_norm": 76.0,
|
| 1148 |
+
"learning_rate": 1.7922020381036776e-06,
|
| 1149 |
+
"logits/chosen": -1.318371295928955,
|
| 1150 |
+
"logits/rejected": -1.0083489418029785,
|
| 1151 |
+
"logps/chosen": -327.114990234375,
|
| 1152 |
+
"logps/rejected": -336.697509765625,
|
| 1153 |
+
"loss": 0.3965,
|
| 1154 |
+
"rewards/accuracies": 0.8475000262260437,
|
| 1155 |
+
"rewards/chosen": -0.7496582269668579,
|
| 1156 |
+
"rewards/margins": 1.0661474466323853,
|
| 1157 |
+
"rewards/rejected": -1.8159960508346558,
|
| 1158 |
+
"step": 1700
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 2.060995664523845,
|
| 1162 |
+
"grad_norm": 102.5,
|
| 1163 |
+
"learning_rate": 1.736818785999114e-06,
|
| 1164 |
+
"logits/chosen": -1.2396435737609863,
|
| 1165 |
+
"logits/rejected": -0.9828730225563049,
|
| 1166 |
+
"logps/chosen": -332.7074890136719,
|
| 1167 |
+
"logps/rejected": -333.37249755859375,
|
| 1168 |
+
"loss": 0.4101,
|
| 1169 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 1170 |
+
"rewards/chosen": -0.7449682354927063,
|
| 1171 |
+
"rewards/margins": 1.1290674209594727,
|
| 1172 |
+
"rewards/rejected": -1.8738598823547363,
|
| 1173 |
+
"step": 1725
|
| 1174 |
+
},
|
| 1175 |
+
{
|
| 1176 |
+
"epoch": 2.09089550007475,
|
| 1177 |
+
"grad_norm": 62.25,
|
| 1178 |
+
"learning_rate": 1.6814355338945504e-06,
|
| 1179 |
+
"logits/chosen": -1.2273823022842407,
|
| 1180 |
+
"logits/rejected": -0.88829505443573,
|
| 1181 |
+
"logps/chosen": -322.93499755859375,
|
| 1182 |
+
"logps/rejected": -300.385009765625,
|
| 1183 |
+
"loss": 0.4221,
|
| 1184 |
+
"rewards/accuracies": 0.8050000071525574,
|
| 1185 |
+
"rewards/chosen": -0.903369128704071,
|
| 1186 |
+
"rewards/margins": 1.0416357517242432,
|
| 1187 |
+
"rewards/rejected": -1.9447948932647705,
|
| 1188 |
+
"step": 1750
|
| 1189 |
+
},
|
| 1190 |
+
{
|
| 1191 |
+
"epoch": 2.120795335625654,
|
| 1192 |
+
"grad_norm": 86.5,
|
| 1193 |
+
"learning_rate": 1.6260522817899868e-06,
|
| 1194 |
+
"logits/chosen": -1.2524548768997192,
|
| 1195 |
+
"logits/rejected": -1.0671484470367432,
|
| 1196 |
+
"logps/chosen": -333.92999267578125,
|
| 1197 |
+
"logps/rejected": -318.6400146484375,
|
| 1198 |
+
"loss": 0.4119,
|
| 1199 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 1200 |
+
"rewards/chosen": -0.7944982647895813,
|
| 1201 |
+
"rewards/margins": 1.1625818014144897,
|
| 1202 |
+
"rewards/rejected": -1.9566112756729126,
|
| 1203 |
+
"step": 1775
|
| 1204 |
+
},
|
| 1205 |
+
{
|
| 1206 |
+
"epoch": 2.1506951711765585,
|
| 1207 |
+
"grad_norm": 90.0,
|
| 1208 |
+
"learning_rate": 1.5706690296854231e-06,
|
| 1209 |
+
"logits/chosen": -1.2237915992736816,
|
| 1210 |
+
"logits/rejected": -0.956585705280304,
|
| 1211 |
+
"logps/chosen": -320.30999755859375,
|
| 1212 |
+
"logps/rejected": -302.2674865722656,
|
| 1213 |
+
"loss": 0.4528,
|
| 1214 |
+
"rewards/accuracies": 0.7674999833106995,
|
| 1215 |
+
"rewards/chosen": -0.9091894626617432,
|
| 1216 |
+
"rewards/margins": 1.0250316858291626,
|
| 1217 |
+
"rewards/rejected": -1.9344677925109863,
|
| 1218 |
+
"step": 1800
|
| 1219 |
+
},
|
| 1220 |
+
{
|
| 1221 |
+
"epoch": 2.1506951711765585,
|
| 1222 |
+
"eval_logits/chosen": -1.191327452659607,
|
| 1223 |
+
"eval_logits/rejected": -1.0433924198150635,
|
| 1224 |
+
"eval_logps/chosen": -327.741943359375,
|
| 1225 |
+
"eval_logps/rejected": -307.1559143066406,
|
| 1226 |
+
"eval_loss": 0.5188325047492981,
|
| 1227 |
+
"eval_rewards/accuracies": 0.6941244602203369,
|
| 1228 |
+
"eval_rewards/chosen": -0.8884723782539368,
|
| 1229 |
+
"eval_rewards/margins": 0.7567348480224609,
|
| 1230 |
+
"eval_rewards/rejected": -1.6454237699508667,
|
| 1231 |
+
"eval_runtime": 876.3236,
|
| 1232 |
+
"eval_samples_per_second": 1.697,
|
| 1233 |
+
"eval_steps_per_second": 0.212,
|
| 1234 |
+
"step": 1800
|
| 1235 |
+
},
|
| 1236 |
+
{
|
| 1237 |
+
"epoch": 2.180595006727463,
|
| 1238 |
+
"grad_norm": 74.5,
|
| 1239 |
+
"learning_rate": 1.5152857775808597e-06,
|
| 1240 |
+
"logits/chosen": -1.2849377393722534,
|
| 1241 |
+
"logits/rejected": -0.9589782953262329,
|
| 1242 |
+
"logps/chosen": -321.9987487792969,
|
| 1243 |
+
"logps/rejected": -307.2149963378906,
|
| 1244 |
+
"loss": 0.4031,
|
| 1245 |
+
"rewards/accuracies": 0.8349999785423279,
|
| 1246 |
+
"rewards/chosen": -0.7700170874595642,
|
| 1247 |
+
"rewards/margins": 1.1218103170394897,
|
| 1248 |
+
"rewards/rejected": -1.8917040824890137,
|
| 1249 |
+
"step": 1825
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 2.2104948422783677,
|
| 1253 |
+
"grad_norm": 73.5,
|
| 1254 |
+
"learning_rate": 1.459902525476296e-06,
|
| 1255 |
+
"logits/chosen": -1.136842131614685,
|
| 1256 |
+
"logits/rejected": -0.9383144974708557,
|
| 1257 |
+
"logps/chosen": -319.8525085449219,
|
| 1258 |
+
"logps/rejected": -333.6600036621094,
|
| 1259 |
+
"loss": 0.424,
|
| 1260 |
+
"rewards/accuracies": 0.8075000047683716,
|
| 1261 |
+
"rewards/chosen": -0.8708154559135437,
|
| 1262 |
+
"rewards/margins": 1.0324267148971558,
|
| 1263 |
+
"rewards/rejected": -1.903378963470459,
|
| 1264 |
+
"step": 1850
|
| 1265 |
+
},
|
| 1266 |
+
{
|
| 1267 |
+
"epoch": 2.240394677829272,
|
| 1268 |
+
"grad_norm": 72.5,
|
| 1269 |
+
"learning_rate": 1.4045192733717325e-06,
|
| 1270 |
+
"logits/chosen": -1.1802786588668823,
|
| 1271 |
+
"logits/rejected": -0.9680548310279846,
|
| 1272 |
+
"logps/chosen": -317.48748779296875,
|
| 1273 |
+
"logps/rejected": -299.19000244140625,
|
| 1274 |
+
"loss": 0.4262,
|
| 1275 |
+
"rewards/accuracies": 0.8274999856948853,
|
| 1276 |
+
"rewards/chosen": -0.8513085842132568,
|
| 1277 |
+
"rewards/margins": 1.0704809427261353,
|
| 1278 |
+
"rewards/rejected": -1.9216357469558716,
|
| 1279 |
+
"step": 1875
|
| 1280 |
+
},
|
| 1281 |
+
{
|
| 1282 |
+
"epoch": 2.2702945133801764,
|
| 1283 |
+
"grad_norm": 84.0,
|
| 1284 |
+
"learning_rate": 1.3491360212671688e-06,
|
| 1285 |
+
"logits/chosen": -1.2559946775436401,
|
| 1286 |
+
"logits/rejected": -0.9639026522636414,
|
| 1287 |
+
"logps/chosen": -336.9750061035156,
|
| 1288 |
+
"logps/rejected": -323.49249267578125,
|
| 1289 |
+
"loss": 0.4294,
|
| 1290 |
+
"rewards/accuracies": 0.8025000095367432,
|
| 1291 |
+
"rewards/chosen": -0.8724609613418579,
|
| 1292 |
+
"rewards/margins": 1.0881787538528442,
|
| 1293 |
+
"rewards/rejected": -1.960756778717041,
|
| 1294 |
+
"step": 1900
|
| 1295 |
+
},
|
| 1296 |
+
{
|
| 1297 |
+
"epoch": 2.3001943489310808,
|
| 1298 |
+
"grad_norm": 71.0,
|
| 1299 |
+
"learning_rate": 1.2937527691626054e-06,
|
| 1300 |
+
"logits/chosen": -1.3266677856445312,
|
| 1301 |
+
"logits/rejected": -1.0626074075698853,
|
| 1302 |
+
"logps/chosen": -305.86749267578125,
|
| 1303 |
+
"logps/rejected": -291.93499755859375,
|
| 1304 |
+
"loss": 0.4471,
|
| 1305 |
+
"rewards/accuracies": 0.7749999761581421,
|
| 1306 |
+
"rewards/chosen": -0.9192346334457397,
|
| 1307 |
+
"rewards/margins": 1.0141992568969727,
|
| 1308 |
+
"rewards/rejected": -1.9337549209594727,
|
| 1309 |
+
"step": 1925
|
| 1310 |
+
},
|
| 1311 |
+
{
|
| 1312 |
+
"epoch": 2.330094184481985,
|
| 1313 |
+
"grad_norm": 109.5,
|
| 1314 |
+
"learning_rate": 1.2383695170580418e-06,
|
| 1315 |
+
"logits/chosen": -1.1726070642471313,
|
| 1316 |
+
"logits/rejected": -1.0060466527938843,
|
| 1317 |
+
"logps/chosen": -309.7799987792969,
|
| 1318 |
+
"logps/rejected": -311.13751220703125,
|
| 1319 |
+
"loss": 0.4333,
|
| 1320 |
+
"rewards/accuracies": 0.7724999785423279,
|
| 1321 |
+
"rewards/chosen": -0.8455395698547363,
|
| 1322 |
+
"rewards/margins": 1.0642285346984863,
|
| 1323 |
+
"rewards/rejected": -1.9100537300109863,
|
| 1324 |
+
"step": 1950
|
| 1325 |
+
},
|
| 1326 |
+
{
|
| 1327 |
+
"epoch": 2.35999402003289,
|
| 1328 |
+
"grad_norm": 43.0,
|
| 1329 |
+
"learning_rate": 1.1829862649534782e-06,
|
| 1330 |
+
"logits/chosen": -1.189868450164795,
|
| 1331 |
+
"logits/rejected": -1.0110809803009033,
|
| 1332 |
+
"logps/chosen": -343.5849914550781,
|
| 1333 |
+
"logps/rejected": -329.1675109863281,
|
| 1334 |
+
"loss": 0.4071,
|
| 1335 |
+
"rewards/accuracies": 0.8224999904632568,
|
| 1336 |
+
"rewards/chosen": -0.8902783393859863,
|
| 1337 |
+
"rewards/margins": 1.0464379787445068,
|
| 1338 |
+
"rewards/rejected": -1.9371508359909058,
|
| 1339 |
+
"step": 1975
|
| 1340 |
+
},
|
| 1341 |
+
{
|
| 1342 |
+
"epoch": 2.3898938555837943,
|
| 1343 |
+
"grad_norm": 86.5,
|
| 1344 |
+
"learning_rate": 1.1276030128489146e-06,
|
| 1345 |
+
"logits/chosen": -1.3213348388671875,
|
| 1346 |
+
"logits/rejected": -1.0948954820632935,
|
| 1347 |
+
"logps/chosen": -331.0174865722656,
|
| 1348 |
+
"logps/rejected": -307.2900085449219,
|
| 1349 |
+
"loss": 0.4075,
|
| 1350 |
+
"rewards/accuracies": 0.8349999785423279,
|
| 1351 |
+
"rewards/chosen": -0.8052575588226318,
|
| 1352 |
+
"rewards/margins": 1.1002050638198853,
|
| 1353 |
+
"rewards/rejected": -1.9058740139007568,
|
| 1354 |
+
"step": 2000
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"epoch": 2.3898938555837943,
|
| 1358 |
+
"eval_logits/chosen": -1.1904795169830322,
|
| 1359 |
+
"eval_logits/rejected": -1.042686104774475,
|
| 1360 |
+
"eval_logps/chosen": -327.67205810546875,
|
| 1361 |
+
"eval_logps/rejected": -307.0806579589844,
|
| 1362 |
+
"eval_loss": 0.5186262726783752,
|
| 1363 |
+
"eval_rewards/accuracies": 0.6967166662216187,
|
| 1364 |
+
"eval_rewards/chosen": -0.8813358545303345,
|
| 1365 |
+
"eval_rewards/margins": 0.7553303837776184,
|
| 1366 |
+
"eval_rewards/rejected": -1.6366767883300781,
|
| 1367 |
+
"eval_runtime": 876.3711,
|
| 1368 |
+
"eval_samples_per_second": 1.697,
|
| 1369 |
+
"eval_steps_per_second": 0.212,
|
| 1370 |
+
"step": 2000
|
| 1371 |
+
},
|
| 1372 |
+
{
|
| 1373 |
+
"epoch": 2.4197936911346987,
|
| 1374 |
+
"grad_norm": 67.0,
|
| 1375 |
+
"learning_rate": 1.072219760744351e-06,
|
| 1376 |
+
"logits/chosen": -1.2627320289611816,
|
| 1377 |
+
"logits/rejected": -1.0026310682296753,
|
| 1378 |
+
"logps/chosen": -335.5675048828125,
|
| 1379 |
+
"logps/rejected": -301.01251220703125,
|
| 1380 |
+
"loss": 0.4202,
|
| 1381 |
+
"rewards/accuracies": 0.7774999737739563,
|
| 1382 |
+
"rewards/chosen": -0.8969201445579529,
|
| 1383 |
+
"rewards/margins": 1.085205078125,
|
| 1384 |
+
"rewards/rejected": -1.9821679592132568,
|
| 1385 |
+
"step": 2025
|
| 1386 |
+
},
|
| 1387 |
+
{
|
| 1388 |
+
"epoch": 2.4496935266856035,
|
| 1389 |
+
"grad_norm": 86.0,
|
| 1390 |
+
"learning_rate": 1.0168365086397875e-06,
|
| 1391 |
+
"logits/chosen": -1.2463324069976807,
|
| 1392 |
+
"logits/rejected": -0.9855798482894897,
|
| 1393 |
+
"logps/chosen": -332.5849914550781,
|
| 1394 |
+
"logps/rejected": -324.9624938964844,
|
| 1395 |
+
"loss": 0.4193,
|
| 1396 |
+
"rewards/accuracies": 0.7925000190734863,
|
| 1397 |
+
"rewards/chosen": -0.8326050043106079,
|
| 1398 |
+
"rewards/margins": 1.0910131931304932,
|
| 1399 |
+
"rewards/rejected": -1.9229882955551147,
|
| 1400 |
+
"step": 2050
|
| 1401 |
+
},
|
| 1402 |
+
{
|
| 1403 |
+
"epoch": 2.479593362236508,
|
| 1404 |
+
"grad_norm": 53.75,
|
| 1405 |
+
"learning_rate": 9.61453256535224e-07,
|
| 1406 |
+
"logits/chosen": -1.2372454404830933,
|
| 1407 |
+
"logits/rejected": -0.9461462497711182,
|
| 1408 |
+
"logps/chosen": -328.4750061035156,
|
| 1409 |
+
"logps/rejected": -300.5224914550781,
|
| 1410 |
+
"loss": 0.4611,
|
| 1411 |
+
"rewards/accuracies": 0.7524999976158142,
|
| 1412 |
+
"rewards/chosen": -0.8591150045394897,
|
| 1413 |
+
"rewards/margins": 0.9913061261177063,
|
| 1414 |
+
"rewards/rejected": -1.8506054878234863,
|
| 1415 |
+
"step": 2075
|
| 1416 |
+
},
|
| 1417 |
+
{
|
| 1418 |
+
"epoch": 2.509493197787412,
|
| 1419 |
+
"grad_norm": 68.0,
|
| 1420 |
+
"learning_rate": 9.060700044306603e-07,
|
| 1421 |
+
"logits/chosen": -1.2847473621368408,
|
| 1422 |
+
"logits/rejected": -1.0720292329788208,
|
| 1423 |
+
"logps/chosen": -337.26251220703125,
|
| 1424 |
+
"logps/rejected": -307.17498779296875,
|
| 1425 |
+
"loss": 0.4101,
|
| 1426 |
+
"rewards/accuracies": 0.7799999713897705,
|
| 1427 |
+
"rewards/chosen": -0.8909338116645813,
|
| 1428 |
+
"rewards/margins": 1.1306884288787842,
|
| 1429 |
+
"rewards/rejected": -2.021728515625,
|
| 1430 |
+
"step": 2100
|
| 1431 |
+
},
|
| 1432 |
+
{
|
| 1433 |
+
"epoch": 2.5393930333383166,
|
| 1434 |
+
"grad_norm": 101.0,
|
| 1435 |
+
"learning_rate": 8.506867523260968e-07,
|
| 1436 |
+
"logits/chosen": -1.1994116306304932,
|
| 1437 |
+
"logits/rejected": -0.9730746746063232,
|
| 1438 |
+
"logps/chosen": -338.3999938964844,
|
| 1439 |
+
"logps/rejected": -304.99749755859375,
|
| 1440 |
+
"loss": 0.4387,
|
| 1441 |
+
"rewards/accuracies": 0.7875000238418579,
|
| 1442 |
+
"rewards/chosen": -0.7841222882270813,
|
| 1443 |
+
"rewards/margins": 1.0449267625808716,
|
| 1444 |
+
"rewards/rejected": -1.829746127128601,
|
| 1445 |
+
"step": 2125
|
| 1446 |
+
},
|
| 1447 |
+
{
|
| 1448 |
+
"epoch": 2.569292868889221,
|
| 1449 |
+
"grad_norm": 68.5,
|
| 1450 |
+
"learning_rate": 7.953035002215331e-07,
|
| 1451 |
+
"logits/chosen": -1.3298254013061523,
|
| 1452 |
+
"logits/rejected": -1.118627667427063,
|
| 1453 |
+
"logps/chosen": -309.739990234375,
|
| 1454 |
+
"logps/rejected": -308.24749755859375,
|
| 1455 |
+
"loss": 0.4449,
|
| 1456 |
+
"rewards/accuracies": 0.7774999737739563,
|
| 1457 |
+
"rewards/chosen": -0.8520336747169495,
|
| 1458 |
+
"rewards/margins": 0.9700658917427063,
|
| 1459 |
+
"rewards/rejected": -1.8218945264816284,
|
| 1460 |
+
"step": 2150
|
| 1461 |
+
},
|
| 1462 |
+
{
|
| 1463 |
+
"epoch": 2.5991927044401257,
|
| 1464 |
+
"grad_norm": 70.5,
|
| 1465 |
+
"learning_rate": 7.399202481169695e-07,
|
| 1466 |
+
"logits/chosen": -1.1831958293914795,
|
| 1467 |
+
"logits/rejected": NaN,
|
| 1468 |
+
"logps/chosen": -327.49249267578125,
|
| 1469 |
+
"logps/rejected": -289.5924987792969,
|
| 1470 |
+
"loss": 0.4473,
|
| 1471 |
+
"rewards/accuracies": 0.7749999761581421,
|
| 1472 |
+
"rewards/chosen": -0.8408032059669495,
|
| 1473 |
+
"rewards/margins": 0.9420214891433716,
|
| 1474 |
+
"rewards/rejected": -1.7829101085662842,
|
| 1475 |
+
"step": 2175
|
| 1476 |
+
},
|
| 1477 |
+
{
|
| 1478 |
+
"epoch": 2.62909253999103,
|
| 1479 |
+
"grad_norm": 54.0,
|
| 1480 |
+
"learning_rate": 6.845369960124059e-07,
|
| 1481 |
+
"logits/chosen": -1.2656641006469727,
|
| 1482 |
+
"logits/rejected": -0.9782373309135437,
|
| 1483 |
+
"logps/chosen": -324.4200134277344,
|
| 1484 |
+
"logps/rejected": -290.0675048828125,
|
| 1485 |
+
"loss": 0.4419,
|
| 1486 |
+
"rewards/accuracies": 0.7825000286102295,
|
| 1487 |
+
"rewards/chosen": -0.9666149616241455,
|
| 1488 |
+
"rewards/margins": 1.0030114650726318,
|
| 1489 |
+
"rewards/rejected": -1.9694628715515137,
|
| 1490 |
+
"step": 2200
|
| 1491 |
+
},
|
| 1492 |
+
{
|
| 1493 |
+
"epoch": 2.62909253999103,
|
| 1494 |
+
"eval_logits/chosen": -1.1868830919265747,
|
| 1495 |
+
"eval_logits/rejected": -1.0399714708328247,
|
| 1496 |
+
"eval_logps/chosen": -327.6585998535156,
|
| 1497 |
+
"eval_logps/rejected": -306.9704284667969,
|
| 1498 |
+
"eval_loss": 0.5178263783454895,
|
| 1499 |
+
"eval_rewards/accuracies": 0.6993087530136108,
|
| 1500 |
+
"eval_rewards/chosen": -0.8778404593467712,
|
| 1501 |
+
"eval_rewards/margins": 0.7548588514328003,
|
| 1502 |
+
"eval_rewards/rejected": -1.6324502229690552,
|
| 1503 |
+
"eval_runtime": 876.3727,
|
| 1504 |
+
"eval_samples_per_second": 1.697,
|
| 1505 |
+
"eval_steps_per_second": 0.212,
|
| 1506 |
+
"step": 2200
|
| 1507 |
+
},
|
| 1508 |
+
{
|
| 1509 |
+
"epoch": 2.6589923755419345,
|
| 1510 |
+
"grad_norm": 67.5,
|
| 1511 |
+
"learning_rate": 6.291537439078423e-07,
|
| 1512 |
+
"logits/chosen": -1.2253618240356445,
|
| 1513 |
+
"logits/rejected": -1.0349105596542358,
|
| 1514 |
+
"logps/chosen": -336.12249755859375,
|
| 1515 |
+
"logps/rejected": -311.8275146484375,
|
| 1516 |
+
"loss": 0.4574,
|
| 1517 |
+
"rewards/accuracies": 0.7724999785423279,
|
| 1518 |
+
"rewards/chosen": -0.8752642869949341,
|
| 1519 |
+
"rewards/margins": 0.9961340427398682,
|
| 1520 |
+
"rewards/rejected": -1.8713818788528442,
|
| 1521 |
+
"step": 2225
|
| 1522 |
+
},
|
| 1523 |
+
{
|
| 1524 |
+
"epoch": 2.688892211092839,
|
| 1525 |
+
"grad_norm": 100.0,
|
| 1526 |
+
"learning_rate": 5.737704918032787e-07,
|
| 1527 |
+
"logits/chosen": -1.2597771883010864,
|
| 1528 |
+
"logits/rejected": -0.9909564256668091,
|
| 1529 |
+
"logps/chosen": -326.6600036621094,
|
| 1530 |
+
"logps/rejected": -316.19000244140625,
|
| 1531 |
+
"loss": 0.4751,
|
| 1532 |
+
"rewards/accuracies": 0.7674999833106995,
|
| 1533 |
+
"rewards/chosen": -0.9248193502426147,
|
| 1534 |
+
"rewards/margins": 0.9592040777206421,
|
| 1535 |
+
"rewards/rejected": -1.8837096691131592,
|
| 1536 |
+
"step": 2250
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 2.7187920466437436,
|
| 1540 |
+
"grad_norm": 76.0,
|
| 1541 |
+
"learning_rate": 5.183872396987152e-07,
|
| 1542 |
+
"logits/chosen": -1.2072705030441284,
|
| 1543 |
+
"logits/rejected": -0.9592925906181335,
|
| 1544 |
+
"logps/chosen": -322.36248779296875,
|
| 1545 |
+
"logps/rejected": -315.8374938964844,
|
| 1546 |
+
"loss": 0.391,
|
| 1547 |
+
"rewards/accuracies": 0.8274999856948853,
|
| 1548 |
+
"rewards/chosen": -0.7576141357421875,
|
| 1549 |
+
"rewards/margins": 1.160730004310608,
|
| 1550 |
+
"rewards/rejected": -1.9182031154632568,
|
| 1551 |
+
"step": 2275
|
| 1552 |
+
},
|
| 1553 |
+
{
|
| 1554 |
+
"epoch": 2.748691882194648,
|
| 1555 |
+
"grad_norm": 53.0,
|
| 1556 |
+
"learning_rate": 4.630039875941516e-07,
|
| 1557 |
+
"logits/chosen": -1.287199854850769,
|
| 1558 |
+
"logits/rejected": -0.9606054425239563,
|
| 1559 |
+
"logps/chosen": -344.7650146484375,
|
| 1560 |
+
"logps/rejected": -331.24749755859375,
|
| 1561 |
+
"loss": 0.4177,
|
| 1562 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 1563 |
+
"rewards/chosen": -0.7748047113418579,
|
| 1564 |
+
"rewards/margins": 1.1645703315734863,
|
| 1565 |
+
"rewards/rejected": -1.9394140243530273,
|
| 1566 |
+
"step": 2300
|
| 1567 |
+
},
|
| 1568 |
+
{
|
| 1569 |
+
"epoch": 2.7785917177455524,
|
| 1570 |
+
"grad_norm": 87.0,
|
| 1571 |
+
"learning_rate": 4.07620735489588e-07,
|
| 1572 |
+
"logits/chosen": -1.2260528802871704,
|
| 1573 |
+
"logits/rejected": -1.0005972385406494,
|
| 1574 |
+
"logps/chosen": -312.9624938964844,
|
| 1575 |
+
"logps/rejected": -323.0400085449219,
|
| 1576 |
+
"loss": 0.3917,
|
| 1577 |
+
"rewards/accuracies": 0.8349999785423279,
|
| 1578 |
+
"rewards/chosen": -0.7925238013267517,
|
| 1579 |
+
"rewards/margins": 1.185449242591858,
|
| 1580 |
+
"rewards/rejected": -1.9780443906784058,
|
| 1581 |
+
"step": 2325
|
| 1582 |
+
},
|
| 1583 |
+
{
|
| 1584 |
+
"epoch": 2.8084915532964567,
|
| 1585 |
+
"grad_norm": 56.5,
|
| 1586 |
+
"learning_rate": 3.5223748338502434e-07,
|
| 1587 |
+
"logits/chosen": -1.2027392387390137,
|
| 1588 |
+
"logits/rejected": -0.989107608795166,
|
| 1589 |
+
"logps/chosen": -321.3762512207031,
|
| 1590 |
+
"logps/rejected": -318.11749267578125,
|
| 1591 |
+
"loss": 0.4052,
|
| 1592 |
+
"rewards/accuracies": 0.8174999952316284,
|
| 1593 |
+
"rewards/chosen": -0.8751891851425171,
|
| 1594 |
+
"rewards/margins": 1.1021533012390137,
|
| 1595 |
+
"rewards/rejected": -1.976718783378601,
|
| 1596 |
+
"step": 2350
|
| 1597 |
+
},
|
| 1598 |
+
{
|
| 1599 |
+
"epoch": 2.838391388847361,
|
| 1600 |
+
"grad_norm": 54.5,
|
| 1601 |
+
"learning_rate": 2.968542312804608e-07,
|
| 1602 |
+
"logits/chosen": -1.2425882816314697,
|
| 1603 |
+
"logits/rejected": -0.9340093731880188,
|
| 1604 |
+
"logps/chosen": -335.12249755859375,
|
| 1605 |
+
"logps/rejected": -320.2049865722656,
|
| 1606 |
+
"loss": 0.4115,
|
| 1607 |
+
"rewards/accuracies": 0.8224999904632568,
|
| 1608 |
+
"rewards/chosen": -0.8292675614356995,
|
| 1609 |
+
"rewards/margins": 1.1182934045791626,
|
| 1610 |
+
"rewards/rejected": -1.9483104944229126,
|
| 1611 |
+
"step": 2375
|
| 1612 |
+
},
|
| 1613 |
+
{
|
| 1614 |
+
"epoch": 2.868291224398266,
|
| 1615 |
+
"grad_norm": 87.0,
|
| 1616 |
+
"learning_rate": 2.4147097917589725e-07,
|
| 1617 |
+
"logits/chosen": -1.3012477159500122,
|
| 1618 |
+
"logits/rejected": -1.0664279460906982,
|
| 1619 |
+
"logps/chosen": -293.489990234375,
|
| 1620 |
+
"logps/rejected": -285.197509765625,
|
| 1621 |
+
"loss": 0.4277,
|
| 1622 |
+
"rewards/accuracies": 0.8025000095367432,
|
| 1623 |
+
"rewards/chosen": -0.8684576153755188,
|
| 1624 |
+
"rewards/margins": 1.069272518157959,
|
| 1625 |
+
"rewards/rejected": -1.9371191263198853,
|
| 1626 |
+
"step": 2400
|
| 1627 |
+
},
|
| 1628 |
+
{
|
| 1629 |
+
"epoch": 2.868291224398266,
|
| 1630 |
+
"eval_logits/chosen": -1.1853525638580322,
|
| 1631 |
+
"eval_logits/rejected": -1.0373817682266235,
|
| 1632 |
+
"eval_logps/chosen": -327.3817138671875,
|
| 1633 |
+
"eval_logps/rejected": -306.81451416015625,
|
| 1634 |
+
"eval_loss": 0.5165102481842041,
|
| 1635 |
+
"eval_rewards/accuracies": 0.7006528377532959,
|
| 1636 |
+
"eval_rewards/chosen": -0.8549529314041138,
|
| 1637 |
+
"eval_rewards/margins": 0.7583125829696655,
|
| 1638 |
+
"eval_rewards/rejected": -1.6133127212524414,
|
| 1639 |
+
"eval_runtime": 876.3322,
|
| 1640 |
+
"eval_samples_per_second": 1.697,
|
| 1641 |
+
"eval_steps_per_second": 0.212,
|
| 1642 |
+
"step": 2400
|
| 1643 |
+
},
|
| 1644 |
+
{
|
| 1645 |
+
"epoch": 2.8981910599491703,
|
| 1646 |
+
"grad_norm": 46.0,
|
| 1647 |
+
"learning_rate": 1.8608772707133363e-07,
|
| 1648 |
+
"logits/chosen": -1.356745958328247,
|
| 1649 |
+
"logits/rejected": -1.0496530532836914,
|
| 1650 |
+
"logps/chosen": -319.9649963378906,
|
| 1651 |
+
"logps/rejected": -309.7025146484375,
|
| 1652 |
+
"loss": 0.4037,
|
| 1653 |
+
"rewards/accuracies": 0.8025000095367432,
|
| 1654 |
+
"rewards/chosen": -0.8254479765892029,
|
| 1655 |
+
"rewards/margins": 1.1192578077316284,
|
| 1656 |
+
"rewards/rejected": -1.9445117712020874,
|
| 1657 |
+
"step": 2425
|
| 1658 |
+
},
|
| 1659 |
+
{
|
| 1660 |
+
"epoch": 2.9280908955000746,
|
| 1661 |
+
"grad_norm": 70.5,
|
| 1662 |
+
"learning_rate": 1.3070447496677006e-07,
|
| 1663 |
+
"logits/chosen": -1.2751880884170532,
|
| 1664 |
+
"logits/rejected": -1.0796799659729004,
|
| 1665 |
+
"logps/chosen": -316.9425048828125,
|
| 1666 |
+
"logps/rejected": -325.7550048828125,
|
| 1667 |
+
"loss": 0.4306,
|
| 1668 |
+
"rewards/accuracies": 0.7724999785423279,
|
| 1669 |
+
"rewards/chosen": -0.8079773187637329,
|
| 1670 |
+
"rewards/margins": 1.000207543373108,
|
| 1671 |
+
"rewards/rejected": -1.8083984851837158,
|
| 1672 |
+
"step": 2450
|
| 1673 |
+
},
|
| 1674 |
+
{
|
| 1675 |
+
"epoch": 2.9579907310509794,
|
| 1676 |
+
"grad_norm": 74.0,
|
| 1677 |
+
"learning_rate": 7.532122286220647e-08,
|
| 1678 |
+
"logits/chosen": -1.2595221996307373,
|
| 1679 |
+
"logits/rejected": -1.0140166282653809,
|
| 1680 |
+
"logps/chosen": -320.6000061035156,
|
| 1681 |
+
"logps/rejected": -318.6600036621094,
|
| 1682 |
+
"loss": 0.4808,
|
| 1683 |
+
"rewards/accuracies": 0.7749999761581421,
|
| 1684 |
+
"rewards/chosen": -1.000390648841858,
|
| 1685 |
+
"rewards/margins": 0.931530773639679,
|
| 1686 |
+
"rewards/rejected": -1.9319677352905273,
|
| 1687 |
+
"step": 2475
|
| 1688 |
+
},
|
| 1689 |
+
{
|
| 1690 |
+
"epoch": 2.987890566601884,
|
| 1691 |
+
"grad_norm": 78.5,
|
| 1692 |
+
"learning_rate": 1.993797075764289e-08,
|
| 1693 |
+
"logits/chosen": -1.2403491735458374,
|
| 1694 |
+
"logits/rejected": -0.9544309973716736,
|
| 1695 |
+
"logps/chosen": -343.76251220703125,
|
| 1696 |
+
"logps/rejected": -336.38250732421875,
|
| 1697 |
+
"loss": 0.4225,
|
| 1698 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 1699 |
+
"rewards/chosen": -0.7856341600418091,
|
| 1700 |
+
"rewards/margins": 1.0573632717132568,
|
| 1701 |
+
"rewards/rejected": -1.8428466320037842,
|
| 1702 |
+
"step": 2500
|
| 1703 |
+
}
|
| 1704 |
+
],
|
| 1705 |
+
"logging_steps": 25,
|
| 1706 |
+
"max_steps": 2508,
|
| 1707 |
+
"num_input_tokens_seen": 0,
|
| 1708 |
+
"num_train_epochs": 3,
|
| 1709 |
+
"save_steps": 500,
|
| 1710 |
+
"stateful_callbacks": {
|
| 1711 |
+
"TrainerControl": {
|
| 1712 |
+
"args": {
|
| 1713 |
+
"should_epoch_stop": false,
|
| 1714 |
+
"should_evaluate": false,
|
| 1715 |
+
"should_log": false,
|
| 1716 |
+
"should_save": true,
|
| 1717 |
+
"should_training_stop": true
|
| 1718 |
+
},
|
| 1719 |
+
"attributes": {}
|
| 1720 |
+
}
|
| 1721 |
+
},
|
| 1722 |
+
"total_flos": 0.0,
|
| 1723 |
+
"train_batch_size": 2,
|
| 1724 |
+
"trial_name": null,
|
| 1725 |
+
"trial_params": null
|
| 1726 |
+
}
|
dpo_model_3epochs/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26fe0d38674121f3f33fb74b85ccfe78d08f9c5766a0e3ca44f2163d55e9851d
|
| 3 |
+
size 6609
|
dpo_model_3epochs/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"eos_token_id": 151643,
|
| 4 |
+
"max_new_tokens": 2048,
|
| 5 |
+
"transformers_version": "4.51.3"
|
| 6 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae8feb1e7622241976a5843cd1b296ffae1b5b65b5adfb1fe1d0ceddae8bfac9
|
| 3 |
+
size 1192135096
|
optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2485bf9a2879e8c4f855840a82e342ffab34fbb8d5bf28103b4ad7c839efc316
|
| 3 |
+
size 2384460363
|
rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95d744506ed8242dbe82c0f3357716f73248e5153ff68604326958faa28d9296
|
| 3 |
+
size 14645
|
scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18d1bfdafd4174e2c90ffd290b1a170a373f9028a1e742c7e6606e40b86c917e
|
| 3 |
+
size 1465
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|endoftext|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
|
| 231 |
+
"clean_up_tokenization_spaces": false,
|
| 232 |
+
"eos_token": "<|endoftext|>",
|
| 233 |
+
"errors": "replace",
|
| 234 |
+
"extra_special_tokens": {},
|
| 235 |
+
"model_max_length": 131072,
|
| 236 |
+
"pad_token": "<|endoftext|>",
|
| 237 |
+
"split_special_tokens": false,
|
| 238 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 239 |
+
"unk_token": null
|
| 240 |
+
}
|
trainer_state.json
ADDED
|
@@ -0,0 +1,1726 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_global_step": null,
|
| 3 |
+
"best_metric": null,
|
| 4 |
+
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.997458513978173,
|
| 6 |
+
"eval_steps": 200,
|
| 7 |
+
"global_step": 2508,
|
| 8 |
+
"is_hyper_param_search": false,
|
| 9 |
+
"is_local_process_zero": true,
|
| 10 |
+
"is_world_process_zero": true,
|
| 11 |
+
"log_history": [
|
| 12 |
+
{
|
| 13 |
+
"epoch": 0.02989983555090447,
|
| 14 |
+
"grad_norm": 78.5,
|
| 15 |
+
"learning_rate": 4.780876494023904e-07,
|
| 16 |
+
"logits/chosen": -0.8346603512763977,
|
| 17 |
+
"logits/rejected": -0.5625396966934204,
|
| 18 |
+
"logps/chosen": -311.11248779296875,
|
| 19 |
+
"logps/rejected": -290.71624755859375,
|
| 20 |
+
"loss": 0.6974,
|
| 21 |
+
"rewards/accuracies": 0.3199999928474426,
|
| 22 |
+
"rewards/chosen": -0.005879516713321209,
|
| 23 |
+
"rewards/margins": -0.0028140258509665728,
|
| 24 |
+
"rewards/rejected": -0.003072815015912056,
|
| 25 |
+
"step": 25
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"epoch": 0.05979967110180894,
|
| 29 |
+
"grad_norm": 112.5,
|
| 30 |
+
"learning_rate": 9.760956175298805e-07,
|
| 31 |
+
"logits/chosen": -0.8477816581726074,
|
| 32 |
+
"logits/rejected": -0.5839244723320007,
|
| 33 |
+
"logps/chosen": -341.1449890136719,
|
| 34 |
+
"logps/rejected": -303.2749938964844,
|
| 35 |
+
"loss": 0.6939,
|
| 36 |
+
"rewards/accuracies": 0.33500000834465027,
|
| 37 |
+
"rewards/chosen": -0.01889648474752903,
|
| 38 |
+
"rewards/margins": 0.0013772583333775401,
|
| 39 |
+
"rewards/rejected": -0.020271606743335724,
|
| 40 |
+
"step": 50
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"epoch": 0.08969950665271341,
|
| 44 |
+
"grad_norm": 89.5,
|
| 45 |
+
"learning_rate": 1.4741035856573708e-06,
|
| 46 |
+
"logits/chosen": -0.7348077893257141,
|
| 47 |
+
"logits/rejected": -0.419241338968277,
|
| 48 |
+
"logps/chosen": -311.4237365722656,
|
| 49 |
+
"logps/rejected": -284.5274963378906,
|
| 50 |
+
"loss": 0.7,
|
| 51 |
+
"rewards/accuracies": 0.28999999165534973,
|
| 52 |
+
"rewards/chosen": -0.020579833537340164,
|
| 53 |
+
"rewards/margins": -0.008827819488942623,
|
| 54 |
+
"rewards/rejected": -0.011761474423110485,
|
| 55 |
+
"step": 75
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"epoch": 0.11959934220361788,
|
| 59 |
+
"grad_norm": 89.5,
|
| 60 |
+
"learning_rate": 1.9721115537848607e-06,
|
| 61 |
+
"logits/chosen": -0.9120362997055054,
|
| 62 |
+
"logits/rejected": -0.566675066947937,
|
| 63 |
+
"logps/chosen": -322.989990234375,
|
| 64 |
+
"logps/rejected": -276.8037414550781,
|
| 65 |
+
"loss": 0.6868,
|
| 66 |
+
"rewards/accuracies": 0.3675000071525574,
|
| 67 |
+
"rewards/chosen": -0.027477417141199112,
|
| 68 |
+
"rewards/margins": 0.018669739365577698,
|
| 69 |
+
"rewards/rejected": -0.04612060636281967,
|
| 70 |
+
"step": 100
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 0.14949917775452234,
|
| 74 |
+
"grad_norm": 83.0,
|
| 75 |
+
"learning_rate": 2.470119521912351e-06,
|
| 76 |
+
"logits/chosen": -0.8410671353340149,
|
| 77 |
+
"logits/rejected": -0.43034911155700684,
|
| 78 |
+
"logps/chosen": -297.4024963378906,
|
| 79 |
+
"logps/rejected": -304.4224853515625,
|
| 80 |
+
"loss": 0.6832,
|
| 81 |
+
"rewards/accuracies": 0.36000001430511475,
|
| 82 |
+
"rewards/chosen": -0.05832824856042862,
|
| 83 |
+
"rewards/margins": 0.02584075927734375,
|
| 84 |
+
"rewards/rejected": -0.08419036865234375,
|
| 85 |
+
"step": 125
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"epoch": 0.17939901330542682,
|
| 89 |
+
"grad_norm": 106.5,
|
| 90 |
+
"learning_rate": 2.968127490039841e-06,
|
| 91 |
+
"logits/chosen": -0.9279866814613342,
|
| 92 |
+
"logits/rejected": -0.6811022758483887,
|
| 93 |
+
"logps/chosen": -312.67498779296875,
|
| 94 |
+
"logps/rejected": -285.7799987792969,
|
| 95 |
+
"loss": 0.6709,
|
| 96 |
+
"rewards/accuracies": 0.49000000953674316,
|
| 97 |
+
"rewards/chosen": -0.07547790557146072,
|
| 98 |
+
"rewards/margins": 0.056133728474378586,
|
| 99 |
+
"rewards/rejected": -0.1316046118736267,
|
| 100 |
+
"step": 150
|
| 101 |
+
},
|
| 102 |
+
{
|
| 103 |
+
"epoch": 0.2092988488563313,
|
| 104 |
+
"grad_norm": 96.0,
|
| 105 |
+
"learning_rate": 3.466135458167331e-06,
|
| 106 |
+
"logits/chosen": -0.8703573346138,
|
| 107 |
+
"logits/rejected": -0.5601403713226318,
|
| 108 |
+
"logps/chosen": -323.947509765625,
|
| 109 |
+
"logps/rejected": -292.8074951171875,
|
| 110 |
+
"loss": 0.6696,
|
| 111 |
+
"rewards/accuracies": 0.4950000047683716,
|
| 112 |
+
"rewards/chosen": -0.11684814095497131,
|
| 113 |
+
"rewards/margins": 0.06319641321897507,
|
| 114 |
+
"rewards/rejected": -0.1800549328327179,
|
| 115 |
+
"step": 175
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"epoch": 0.23919868440723577,
|
| 119 |
+
"grad_norm": 99.0,
|
| 120 |
+
"learning_rate": 3.9641434262948205e-06,
|
| 121 |
+
"logits/chosen": -0.9258654713630676,
|
| 122 |
+
"logits/rejected": -0.5686477422714233,
|
| 123 |
+
"logps/chosen": -328.7449951171875,
|
| 124 |
+
"logps/rejected": -316.5574951171875,
|
| 125 |
+
"loss": 0.6579,
|
| 126 |
+
"rewards/accuracies": 0.550000011920929,
|
| 127 |
+
"rewards/chosen": -0.1883123815059662,
|
| 128 |
+
"rewards/margins": 0.09867187589406967,
|
| 129 |
+
"rewards/rejected": -0.28693297505378723,
|
| 130 |
+
"step": 200
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"epoch": 0.23919868440723577,
|
| 134 |
+
"eval_logits/chosen": -0.9216321706771851,
|
| 135 |
+
"eval_logits/rejected": -0.7277408838272095,
|
| 136 |
+
"eval_logps/chosen": -320.7849426269531,
|
| 137 |
+
"eval_logps/rejected": -293.8709716796875,
|
| 138 |
+
"eval_loss": 0.6465986371040344,
|
| 139 |
+
"eval_rewards/accuracies": 0.560387909412384,
|
| 140 |
+
"eval_rewards/chosen": -0.19119606912136078,
|
| 141 |
+
"eval_rewards/margins": 0.1261032223701477,
|
| 142 |
+
"eval_rewards/rejected": -0.31729716062545776,
|
| 143 |
+
"eval_runtime": 877.9315,
|
| 144 |
+
"eval_samples_per_second": 1.694,
|
| 145 |
+
"eval_steps_per_second": 0.212,
|
| 146 |
+
"step": 200
|
| 147 |
+
},
|
| 148 |
+
{
|
| 149 |
+
"epoch": 0.2690985199581402,
|
| 150 |
+
"grad_norm": 87.0,
|
| 151 |
+
"learning_rate": 4.462151394422311e-06,
|
| 152 |
+
"logits/chosen": -0.8007558584213257,
|
| 153 |
+
"logits/rejected": -0.505867600440979,
|
| 154 |
+
"logps/chosen": -320.7512512207031,
|
| 155 |
+
"logps/rejected": -311.8299865722656,
|
| 156 |
+
"loss": 0.6444,
|
| 157 |
+
"rewards/accuracies": 0.5649999976158142,
|
| 158 |
+
"rewards/chosen": -0.2540551722049713,
|
| 159 |
+
"rewards/margins": 0.14147095382213593,
|
| 160 |
+
"rewards/rejected": -0.3954962193965912,
|
| 161 |
+
"step": 225
|
| 162 |
+
},
|
| 163 |
+
{
|
| 164 |
+
"epoch": 0.2989983555090447,
|
| 165 |
+
"grad_norm": 96.5,
|
| 166 |
+
"learning_rate": 4.960159362549802e-06,
|
| 167 |
+
"logits/chosen": -0.9090196490287781,
|
| 168 |
+
"logits/rejected": -0.6456773281097412,
|
| 169 |
+
"logps/chosen": -323.7200012207031,
|
| 170 |
+
"logps/rejected": -295.2149963378906,
|
| 171 |
+
"loss": 0.6255,
|
| 172 |
+
"rewards/accuracies": 0.6000000238418579,
|
| 173 |
+
"rewards/chosen": -0.2805468738079071,
|
| 174 |
+
"rewards/margins": 0.19930054247379303,
|
| 175 |
+
"rewards/rejected": -0.47991272807121277,
|
| 176 |
+
"step": 250
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"epoch": 0.32889819105994916,
|
| 180 |
+
"grad_norm": 91.0,
|
| 181 |
+
"learning_rate": 4.9490474080638015e-06,
|
| 182 |
+
"logits/chosen": -0.9534767270088196,
|
| 183 |
+
"logits/rejected": -0.6329247951507568,
|
| 184 |
+
"logps/chosen": -319.1549987792969,
|
| 185 |
+
"logps/rejected": -283.88751220703125,
|
| 186 |
+
"loss": 0.6192,
|
| 187 |
+
"rewards/accuracies": 0.5924999713897705,
|
| 188 |
+
"rewards/chosen": -0.29086607694625854,
|
| 189 |
+
"rewards/margins": 0.23339904844760895,
|
| 190 |
+
"rewards/rejected": -0.5240704417228699,
|
| 191 |
+
"step": 275
|
| 192 |
+
},
|
| 193 |
+
{
|
| 194 |
+
"epoch": 0.35879802661085364,
|
| 195 |
+
"grad_norm": 70.5,
|
| 196 |
+
"learning_rate": 4.8936641559592385e-06,
|
| 197 |
+
"logits/chosen": -0.9436456561088562,
|
| 198 |
+
"logits/rejected": -0.7789434790611267,
|
| 199 |
+
"logps/chosen": -349.5050048828125,
|
| 200 |
+
"logps/rejected": -310.48748779296875,
|
| 201 |
+
"loss": 0.627,
|
| 202 |
+
"rewards/accuracies": 0.6349999904632568,
|
| 203 |
+
"rewards/chosen": -0.30020782351493835,
|
| 204 |
+
"rewards/margins": 0.23243407905101776,
|
| 205 |
+
"rewards/rejected": -0.532727062702179,
|
| 206 |
+
"step": 300
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 0.3886978621617581,
|
| 210 |
+
"grad_norm": 101.0,
|
| 211 |
+
"learning_rate": 4.838280903854675e-06,
|
| 212 |
+
"logits/chosen": -0.9607565402984619,
|
| 213 |
+
"logits/rejected": -0.7166936993598938,
|
| 214 |
+
"logps/chosen": -317.0874938964844,
|
| 215 |
+
"logps/rejected": -289.0824890136719,
|
| 216 |
+
"loss": 0.5906,
|
| 217 |
+
"rewards/accuracies": 0.6524999737739563,
|
| 218 |
+
"rewards/chosen": -0.4176098704338074,
|
| 219 |
+
"rewards/margins": 0.3300067186355591,
|
| 220 |
+
"rewards/rejected": -0.7473974823951721,
|
| 221 |
+
"step": 325
|
| 222 |
+
},
|
| 223 |
+
{
|
| 224 |
+
"epoch": 0.4185976977126626,
|
| 225 |
+
"grad_norm": 94.0,
|
| 226 |
+
"learning_rate": 4.782897651750112e-06,
|
| 227 |
+
"logits/chosen": -0.9818115234375,
|
| 228 |
+
"logits/rejected": -0.6833120584487915,
|
| 229 |
+
"logps/chosen": -321.1875,
|
| 230 |
+
"logps/rejected": -316.58624267578125,
|
| 231 |
+
"loss": 0.577,
|
| 232 |
+
"rewards/accuracies": 0.675000011920929,
|
| 233 |
+
"rewards/chosen": -0.4978076219558716,
|
| 234 |
+
"rewards/margins": 0.39054566621780396,
|
| 235 |
+
"rewards/rejected": -0.8884375095367432,
|
| 236 |
+
"step": 350
|
| 237 |
+
},
|
| 238 |
+
{
|
| 239 |
+
"epoch": 0.44849753326356706,
|
| 240 |
+
"grad_norm": 83.5,
|
| 241 |
+
"learning_rate": 4.727514399645548e-06,
|
| 242 |
+
"logits/chosen": -1.0211011171340942,
|
| 243 |
+
"logits/rejected": -0.7218142747879028,
|
| 244 |
+
"logps/chosen": -307.9674987792969,
|
| 245 |
+
"logps/rejected": -288.7850036621094,
|
| 246 |
+
"loss": 0.5544,
|
| 247 |
+
"rewards/accuracies": 0.6974999904632568,
|
| 248 |
+
"rewards/chosen": -0.4097009301185608,
|
| 249 |
+
"rewards/margins": 0.4377111792564392,
|
| 250 |
+
"rewards/rejected": -0.8475390672683716,
|
| 251 |
+
"step": 375
|
| 252 |
+
},
|
| 253 |
+
{
|
| 254 |
+
"epoch": 0.47839736881447154,
|
| 255 |
+
"grad_norm": 77.5,
|
| 256 |
+
"learning_rate": 4.672131147540984e-06,
|
| 257 |
+
"logits/chosen": -0.9680676460266113,
|
| 258 |
+
"logits/rejected": -0.7582107782363892,
|
| 259 |
+
"logps/chosen": -337.9375,
|
| 260 |
+
"logps/rejected": -313.7749938964844,
|
| 261 |
+
"loss": 0.5977,
|
| 262 |
+
"rewards/accuracies": 0.6549999713897705,
|
| 263 |
+
"rewards/chosen": -0.5489477515220642,
|
| 264 |
+
"rewards/margins": 0.35999757051467896,
|
| 265 |
+
"rewards/rejected": -0.9089636206626892,
|
| 266 |
+
"step": 400
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"epoch": 0.47839736881447154,
|
| 270 |
+
"eval_logits/chosen": -1.041106939315796,
|
| 271 |
+
"eval_logits/rejected": -0.8698605895042419,
|
| 272 |
+
"eval_logps/chosen": -323.7284851074219,
|
| 273 |
+
"eval_logps/rejected": -299.6156005859375,
|
| 274 |
+
"eval_loss": 0.5722406506538391,
|
| 275 |
+
"eval_rewards/accuracies": 0.6610022783279419,
|
| 276 |
+
"eval_rewards/chosen": -0.4932539761066437,
|
| 277 |
+
"eval_rewards/margins": 0.40423059463500977,
|
| 278 |
+
"eval_rewards/rejected": -0.8973480463027954,
|
| 279 |
+
"eval_runtime": 876.344,
|
| 280 |
+
"eval_samples_per_second": 1.697,
|
| 281 |
+
"eval_steps_per_second": 0.212,
|
| 282 |
+
"step": 400
|
| 283 |
+
},
|
| 284 |
+
{
|
| 285 |
+
"epoch": 0.508297204365376,
|
| 286 |
+
"grad_norm": 84.0,
|
| 287 |
+
"learning_rate": 4.61674789543642e-06,
|
| 288 |
+
"logits/chosen": -1.2390661239624023,
|
| 289 |
+
"logits/rejected": -0.9836773872375488,
|
| 290 |
+
"logps/chosen": -328.1875,
|
| 291 |
+
"logps/rejected": -317.32501220703125,
|
| 292 |
+
"loss": 0.5527,
|
| 293 |
+
"rewards/accuracies": 0.6675000190734863,
|
| 294 |
+
"rewards/chosen": -0.6254773139953613,
|
| 295 |
+
"rewards/margins": 0.5287072658538818,
|
| 296 |
+
"rewards/rejected": -1.153835415840149,
|
| 297 |
+
"step": 425
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"epoch": 0.5381970399162804,
|
| 301 |
+
"grad_norm": 93.0,
|
| 302 |
+
"learning_rate": 4.561364643331857e-06,
|
| 303 |
+
"logits/chosen": -1.0737494230270386,
|
| 304 |
+
"logits/rejected": -0.8683199882507324,
|
| 305 |
+
"logps/chosen": -316.00250244140625,
|
| 306 |
+
"logps/rejected": -295.9649963378906,
|
| 307 |
+
"loss": 0.5736,
|
| 308 |
+
"rewards/accuracies": 0.6600000262260437,
|
| 309 |
+
"rewards/chosen": -0.539447009563446,
|
| 310 |
+
"rewards/margins": 0.46495360136032104,
|
| 311 |
+
"rewards/rejected": -1.0048657655715942,
|
| 312 |
+
"step": 450
|
| 313 |
+
},
|
| 314 |
+
{
|
| 315 |
+
"epoch": 0.5680968754671849,
|
| 316 |
+
"grad_norm": 73.5,
|
| 317 |
+
"learning_rate": 4.505981391227293e-06,
|
| 318 |
+
"logits/chosen": -1.052968144416809,
|
| 319 |
+
"logits/rejected": -0.7523078322410583,
|
| 320 |
+
"logps/chosen": -318.50250244140625,
|
| 321 |
+
"logps/rejected": -313.8175048828125,
|
| 322 |
+
"loss": 0.5422,
|
| 323 |
+
"rewards/accuracies": 0.7149999737739563,
|
| 324 |
+
"rewards/chosen": -0.5196704268455505,
|
| 325 |
+
"rewards/margins": 0.5570727586746216,
|
| 326 |
+
"rewards/rejected": -1.0764819383621216,
|
| 327 |
+
"step": 475
|
| 328 |
+
},
|
| 329 |
+
{
|
| 330 |
+
"epoch": 0.5979967110180894,
|
| 331 |
+
"grad_norm": 70.0,
|
| 332 |
+
"learning_rate": 4.4505981391227295e-06,
|
| 333 |
+
"logits/chosen": -1.1461485624313354,
|
| 334 |
+
"logits/rejected": -0.9354357719421387,
|
| 335 |
+
"logps/chosen": -324.4750061035156,
|
| 336 |
+
"logps/rejected": -294.0775146484375,
|
| 337 |
+
"loss": 0.5415,
|
| 338 |
+
"rewards/accuracies": 0.7074999809265137,
|
| 339 |
+
"rewards/chosen": -0.518980085849762,
|
| 340 |
+
"rewards/margins": 0.5734081864356995,
|
| 341 |
+
"rewards/rejected": -1.092441439628601,
|
| 342 |
+
"step": 500
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"epoch": 0.6278965465689939,
|
| 346 |
+
"grad_norm": 84.0,
|
| 347 |
+
"learning_rate": 4.395214887018166e-06,
|
| 348 |
+
"logits/chosen": -1.091801404953003,
|
| 349 |
+
"logits/rejected": -0.8006445169448853,
|
| 350 |
+
"logps/chosen": -323.1724853515625,
|
| 351 |
+
"logps/rejected": -294.4674987792969,
|
| 352 |
+
"loss": 0.5646,
|
| 353 |
+
"rewards/accuracies": 0.6700000166893005,
|
| 354 |
+
"rewards/chosen": -0.672253429889679,
|
| 355 |
+
"rewards/margins": 0.5069983005523682,
|
| 356 |
+
"rewards/rejected": -1.1792798042297363,
|
| 357 |
+
"step": 525
|
| 358 |
+
},
|
| 359 |
+
{
|
| 360 |
+
"epoch": 0.6577963821198983,
|
| 361 |
+
"grad_norm": 95.0,
|
| 362 |
+
"learning_rate": 4.339831634913603e-06,
|
| 363 |
+
"logits/chosen": -1.220596194267273,
|
| 364 |
+
"logits/rejected": -0.9236291646957397,
|
| 365 |
+
"logps/chosen": -316.7950134277344,
|
| 366 |
+
"logps/rejected": -302.0824890136719,
|
| 367 |
+
"loss": 0.5178,
|
| 368 |
+
"rewards/accuracies": 0.737500011920929,
|
| 369 |
+
"rewards/chosen": -0.7468109130859375,
|
| 370 |
+
"rewards/margins": 0.6105853319168091,
|
| 371 |
+
"rewards/rejected": -1.3566796779632568,
|
| 372 |
+
"step": 550
|
| 373 |
+
},
|
| 374 |
+
{
|
| 375 |
+
"epoch": 0.6876962176708028,
|
| 376 |
+
"grad_norm": 100.0,
|
| 377 |
+
"learning_rate": 4.284448382809039e-06,
|
| 378 |
+
"logits/chosen": -1.0421770811080933,
|
| 379 |
+
"logits/rejected": -0.7285050749778748,
|
| 380 |
+
"logps/chosen": -308.42498779296875,
|
| 381 |
+
"logps/rejected": -269.7037353515625,
|
| 382 |
+
"loss": 0.5448,
|
| 383 |
+
"rewards/accuracies": 0.6850000023841858,
|
| 384 |
+
"rewards/chosen": -0.7317401170730591,
|
| 385 |
+
"rewards/margins": 0.5794018507003784,
|
| 386 |
+
"rewards/rejected": -1.3115381002426147,
|
| 387 |
+
"step": 575
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"epoch": 0.7175960532217073,
|
| 391 |
+
"grad_norm": 97.5,
|
| 392 |
+
"learning_rate": 4.229065130704476e-06,
|
| 393 |
+
"logits/chosen": -1.1298235654830933,
|
| 394 |
+
"logits/rejected": -0.7811802625656128,
|
| 395 |
+
"logps/chosen": -322.0574951171875,
|
| 396 |
+
"logps/rejected": -309.9750061035156,
|
| 397 |
+
"loss": 0.5292,
|
| 398 |
+
"rewards/accuracies": 0.7124999761581421,
|
| 399 |
+
"rewards/chosen": -0.590954601764679,
|
| 400 |
+
"rewards/margins": 0.6085253953933716,
|
| 401 |
+
"rewards/rejected": -1.1989331245422363,
|
| 402 |
+
"step": 600
|
| 403 |
+
},
|
| 404 |
+
{
|
| 405 |
+
"epoch": 0.7175960532217073,
|
| 406 |
+
"eval_logits/chosen": -1.078187346458435,
|
| 407 |
+
"eval_logits/rejected": -0.9206746220588684,
|
| 408 |
+
"eval_logps/chosen": -324.5967712402344,
|
| 409 |
+
"eval_logps/rejected": -301.7204284667969,
|
| 410 |
+
"eval_loss": 0.5492891669273376,
|
| 411 |
+
"eval_rewards/accuracies": 0.6757872104644775,
|
| 412 |
+
"eval_rewards/chosen": -0.5633505582809448,
|
| 413 |
+
"eval_rewards/margins": 0.5408346652984619,
|
| 414 |
+
"eval_rewards/rejected": -1.1038333177566528,
|
| 415 |
+
"eval_runtime": 876.4047,
|
| 416 |
+
"eval_samples_per_second": 1.697,
|
| 417 |
+
"eval_steps_per_second": 0.212,
|
| 418 |
+
"step": 600
|
| 419 |
+
},
|
| 420 |
+
{
|
| 421 |
+
"epoch": 0.7474958887726117,
|
| 422 |
+
"grad_norm": 87.5,
|
| 423 |
+
"learning_rate": 4.173681878599912e-06,
|
| 424 |
+
"logits/chosen": -1.1809699535369873,
|
| 425 |
+
"logits/rejected": -0.8887664675712585,
|
| 426 |
+
"logps/chosen": -303.6575012207031,
|
| 427 |
+
"logps/rejected": -294.7774963378906,
|
| 428 |
+
"loss": 0.5261,
|
| 429 |
+
"rewards/accuracies": 0.7275000214576721,
|
| 430 |
+
"rewards/chosen": -0.5871319770812988,
|
| 431 |
+
"rewards/margins": 0.6293676495552063,
|
| 432 |
+
"rewards/rejected": -1.2162939310073853,
|
| 433 |
+
"step": 625
|
| 434 |
+
},
|
| 435 |
+
{
|
| 436 |
+
"epoch": 0.7773957243235162,
|
| 437 |
+
"grad_norm": 99.5,
|
| 438 |
+
"learning_rate": 4.118298626495348e-06,
|
| 439 |
+
"logits/chosen": -1.1009465456008911,
|
| 440 |
+
"logits/rejected": -0.9342904686927795,
|
| 441 |
+
"logps/chosen": -338.12750244140625,
|
| 442 |
+
"logps/rejected": -318.96624755859375,
|
| 443 |
+
"loss": 0.5603,
|
| 444 |
+
"rewards/accuracies": 0.6850000023841858,
|
| 445 |
+
"rewards/chosen": -0.714611828327179,
|
| 446 |
+
"rewards/margins": 0.6232568621635437,
|
| 447 |
+
"rewards/rejected": -1.3377538919448853,
|
| 448 |
+
"step": 650
|
| 449 |
+
},
|
| 450 |
+
{
|
| 451 |
+
"epoch": 0.8072955598744207,
|
| 452 |
+
"grad_norm": 72.5,
|
| 453 |
+
"learning_rate": 4.062915374390784e-06,
|
| 454 |
+
"logits/chosen": -1.2523653507232666,
|
| 455 |
+
"logits/rejected": -1.0046355724334717,
|
| 456 |
+
"logps/chosen": -310.9049987792969,
|
| 457 |
+
"logps/rejected": -297.67498779296875,
|
| 458 |
+
"loss": 0.5135,
|
| 459 |
+
"rewards/accuracies": 0.7099999785423279,
|
| 460 |
+
"rewards/chosen": -0.7437072992324829,
|
| 461 |
+
"rewards/margins": 0.6859521269798279,
|
| 462 |
+
"rewards/rejected": -1.4290771484375,
|
| 463 |
+
"step": 675
|
| 464 |
+
},
|
| 465 |
+
{
|
| 466 |
+
"epoch": 0.8371953954253252,
|
| 467 |
+
"grad_norm": 89.0,
|
| 468 |
+
"learning_rate": 4.007532122286221e-06,
|
| 469 |
+
"logits/chosen": -1.2401965856552124,
|
| 470 |
+
"logits/rejected": -0.8460285663604736,
|
| 471 |
+
"logps/chosen": -336.927490234375,
|
| 472 |
+
"logps/rejected": -318.7799987792969,
|
| 473 |
+
"loss": 0.5186,
|
| 474 |
+
"rewards/accuracies": 0.7250000238418579,
|
| 475 |
+
"rewards/chosen": -0.7741259932518005,
|
| 476 |
+
"rewards/margins": 0.7083032131195068,
|
| 477 |
+
"rewards/rejected": -1.4823095798492432,
|
| 478 |
+
"step": 700
|
| 479 |
+
},
|
| 480 |
+
{
|
| 481 |
+
"epoch": 0.8670952309762296,
|
| 482 |
+
"grad_norm": 78.0,
|
| 483 |
+
"learning_rate": 3.9521488701816575e-06,
|
| 484 |
+
"logits/chosen": -1.1703033447265625,
|
| 485 |
+
"logits/rejected": -0.9548498392105103,
|
| 486 |
+
"logps/chosen": -287.87249755859375,
|
| 487 |
+
"logps/rejected": -300.864990234375,
|
| 488 |
+
"loss": 0.5476,
|
| 489 |
+
"rewards/accuracies": 0.6825000047683716,
|
| 490 |
+
"rewards/chosen": -0.8389843702316284,
|
| 491 |
+
"rewards/margins": 0.608197033405304,
|
| 492 |
+
"rewards/rejected": -1.447534203529358,
|
| 493 |
+
"step": 725
|
| 494 |
+
},
|
| 495 |
+
{
|
| 496 |
+
"epoch": 0.8969950665271341,
|
| 497 |
+
"grad_norm": 100.5,
|
| 498 |
+
"learning_rate": 3.896765618077094e-06,
|
| 499 |
+
"logits/chosen": -1.1477763652801514,
|
| 500 |
+
"logits/rejected": -0.9038227796554565,
|
| 501 |
+
"logps/chosen": -338.31500244140625,
|
| 502 |
+
"logps/rejected": -319.9649963378906,
|
| 503 |
+
"loss": 0.5148,
|
| 504 |
+
"rewards/accuracies": 0.7250000238418579,
|
| 505 |
+
"rewards/chosen": -0.8131677508354187,
|
| 506 |
+
"rewards/margins": 0.7464379668235779,
|
| 507 |
+
"rewards/rejected": -1.559140682220459,
|
| 508 |
+
"step": 750
|
| 509 |
+
},
|
| 510 |
+
{
|
| 511 |
+
"epoch": 0.9268949020780386,
|
| 512 |
+
"grad_norm": 92.0,
|
| 513 |
+
"learning_rate": 3.84138236597253e-06,
|
| 514 |
+
"logits/chosen": -1.2342950105667114,
|
| 515 |
+
"logits/rejected": -0.946718156337738,
|
| 516 |
+
"logps/chosen": -331.1512451171875,
|
| 517 |
+
"logps/rejected": -304.0249938964844,
|
| 518 |
+
"loss": 0.528,
|
| 519 |
+
"rewards/accuracies": 0.7149999737739563,
|
| 520 |
+
"rewards/chosen": -0.9154602289199829,
|
| 521 |
+
"rewards/margins": 0.6957080364227295,
|
| 522 |
+
"rewards/rejected": -1.6108520030975342,
|
| 523 |
+
"step": 775
|
| 524 |
+
},
|
| 525 |
+
{
|
| 526 |
+
"epoch": 0.9567947376289431,
|
| 527 |
+
"grad_norm": 102.0,
|
| 528 |
+
"learning_rate": 3.7859991138679664e-06,
|
| 529 |
+
"logits/chosen": -1.0906939506530762,
|
| 530 |
+
"logits/rejected": -0.9649511575698853,
|
| 531 |
+
"logps/chosen": -338.5637512207031,
|
| 532 |
+
"logps/rejected": -338.4674987792969,
|
| 533 |
+
"loss": 0.5151,
|
| 534 |
+
"rewards/accuracies": 0.7200000286102295,
|
| 535 |
+
"rewards/chosen": -0.859545886516571,
|
| 536 |
+
"rewards/margins": 0.7704944014549255,
|
| 537 |
+
"rewards/rejected": -1.630163550376892,
|
| 538 |
+
"step": 800
|
| 539 |
+
},
|
| 540 |
+
{
|
| 541 |
+
"epoch": 0.9567947376289431,
|
| 542 |
+
"eval_logits/chosen": -1.1360965967178345,
|
| 543 |
+
"eval_logits/rejected": -0.9822049736976624,
|
| 544 |
+
"eval_logps/chosen": -326.69891357421875,
|
| 545 |
+
"eval_logps/rejected": -305.0,
|
| 546 |
+
"eval_loss": 0.5390191674232483,
|
| 547 |
+
"eval_rewards/accuracies": 0.687980055809021,
|
| 548 |
+
"eval_rewards/chosen": -0.7810032367706299,
|
| 549 |
+
"eval_rewards/margins": 0.6442182064056396,
|
| 550 |
+
"eval_rewards/rejected": -1.4252588748931885,
|
| 551 |
+
"eval_runtime": 876.4063,
|
| 552 |
+
"eval_samples_per_second": 1.697,
|
| 553 |
+
"eval_steps_per_second": 0.212,
|
| 554 |
+
"step": 800
|
| 555 |
+
},
|
| 556 |
+
{
|
| 557 |
+
"epoch": 0.9866945731798475,
|
| 558 |
+
"grad_norm": 84.5,
|
| 559 |
+
"learning_rate": 3.730615861763403e-06,
|
| 560 |
+
"logits/chosen": -1.2244549989700317,
|
| 561 |
+
"logits/rejected": NaN,
|
| 562 |
+
"logps/chosen": -334.5425109863281,
|
| 563 |
+
"logps/rejected": -339.23748779296875,
|
| 564 |
+
"loss": 0.5275,
|
| 565 |
+
"rewards/accuracies": 0.7149999737739563,
|
| 566 |
+
"rewards/chosen": -0.8379321098327637,
|
| 567 |
+
"rewards/margins": 0.715624988079071,
|
| 568 |
+
"rewards/rejected": -1.554010033607483,
|
| 569 |
+
"step": 825
|
| 570 |
+
},
|
| 571 |
+
{
|
| 572 |
+
"epoch": 1.0155479144864703,
|
| 573 |
+
"grad_norm": 57.25,
|
| 574 |
+
"learning_rate": 3.675232609658839e-06,
|
| 575 |
+
"logits/chosen": -1.2397924661636353,
|
| 576 |
+
"logits/rejected": -1.030158281326294,
|
| 577 |
+
"logps/chosen": -320.9093322753906,
|
| 578 |
+
"logps/rejected": -305.8393859863281,
|
| 579 |
+
"loss": 0.4669,
|
| 580 |
+
"rewards/accuracies": 0.7487046718597412,
|
| 581 |
+
"rewards/chosen": -0.7694060206413269,
|
| 582 |
+
"rewards/margins": 0.8478080630302429,
|
| 583 |
+
"rewards/rejected": -1.6172634363174438,
|
| 584 |
+
"step": 850
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 1.045447750037375,
|
| 588 |
+
"grad_norm": 67.5,
|
| 589 |
+
"learning_rate": 3.6198493575542758e-06,
|
| 590 |
+
"logits/chosen": -1.2220094203948975,
|
| 591 |
+
"logits/rejected": -0.9582018852233887,
|
| 592 |
+
"logps/chosen": -318.0262451171875,
|
| 593 |
+
"logps/rejected": -297.5799865722656,
|
| 594 |
+
"loss": 0.4691,
|
| 595 |
+
"rewards/accuracies": 0.7724999785423279,
|
| 596 |
+
"rewards/chosen": -0.7301892042160034,
|
| 597 |
+
"rewards/margins": 0.9199609160423279,
|
| 598 |
+
"rewards/rejected": -1.6502331495285034,
|
| 599 |
+
"step": 875
|
| 600 |
+
},
|
| 601 |
+
{
|
| 602 |
+
"epoch": 1.0753475855882793,
|
| 603 |
+
"grad_norm": 73.5,
|
| 604 |
+
"learning_rate": 3.564466105449712e-06,
|
| 605 |
+
"logits/chosen": -1.089396357536316,
|
| 606 |
+
"logits/rejected": -0.8958370685577393,
|
| 607 |
+
"logps/chosen": -317.61749267578125,
|
| 608 |
+
"logps/rejected": -295.4825134277344,
|
| 609 |
+
"loss": 0.4746,
|
| 610 |
+
"rewards/accuracies": 0.7574999928474426,
|
| 611 |
+
"rewards/chosen": -0.8305737376213074,
|
| 612 |
+
"rewards/margins": 0.8526538014411926,
|
| 613 |
+
"rewards/rejected": -1.6829102039337158,
|
| 614 |
+
"step": 900
|
| 615 |
+
},
|
| 616 |
+
{
|
| 617 |
+
"epoch": 1.1052474211391838,
|
| 618 |
+
"grad_norm": 64.5,
|
| 619 |
+
"learning_rate": 3.509082853345149e-06,
|
| 620 |
+
"logits/chosen": -1.1403405666351318,
|
| 621 |
+
"logits/rejected": -0.8662219047546387,
|
| 622 |
+
"logps/chosen": -322.0574951171875,
|
| 623 |
+
"logps/rejected": -323.2074890136719,
|
| 624 |
+
"loss": 0.4641,
|
| 625 |
+
"rewards/accuracies": 0.7649999856948853,
|
| 626 |
+
"rewards/chosen": -0.6764746308326721,
|
| 627 |
+
"rewards/margins": 0.8836804032325745,
|
| 628 |
+
"rewards/rejected": -1.5600537061691284,
|
| 629 |
+
"step": 925
|
| 630 |
+
},
|
| 631 |
+
{
|
| 632 |
+
"epoch": 1.1351472566900882,
|
| 633 |
+
"grad_norm": 66.0,
|
| 634 |
+
"learning_rate": 3.453699601240585e-06,
|
| 635 |
+
"logits/chosen": -1.2375200986862183,
|
| 636 |
+
"logits/rejected": -0.9549773931503296,
|
| 637 |
+
"logps/chosen": -321.0874938964844,
|
| 638 |
+
"logps/rejected": -306.6000061035156,
|
| 639 |
+
"loss": 0.4201,
|
| 640 |
+
"rewards/accuracies": 0.8224999904632568,
|
| 641 |
+
"rewards/chosen": -0.7068628072738647,
|
| 642 |
+
"rewards/margins": 1.0075805187225342,
|
| 643 |
+
"rewards/rejected": -1.7146776914596558,
|
| 644 |
+
"step": 950
|
| 645 |
+
},
|
| 646 |
+
{
|
| 647 |
+
"epoch": 1.1650470922409926,
|
| 648 |
+
"grad_norm": 64.0,
|
| 649 |
+
"learning_rate": 3.3983163491360217e-06,
|
| 650 |
+
"logits/chosen": -1.1668496131896973,
|
| 651 |
+
"logits/rejected": -0.8835460543632507,
|
| 652 |
+
"logps/chosen": -320.69000244140625,
|
| 653 |
+
"logps/rejected": -323.0425109863281,
|
| 654 |
+
"loss": 0.459,
|
| 655 |
+
"rewards/accuracies": 0.7825000286102295,
|
| 656 |
+
"rewards/chosen": -0.7173047065734863,
|
| 657 |
+
"rewards/margins": 0.9243432879447937,
|
| 658 |
+
"rewards/rejected": -1.6417040824890137,
|
| 659 |
+
"step": 975
|
| 660 |
+
},
|
| 661 |
+
{
|
| 662 |
+
"epoch": 1.1949469277918972,
|
| 663 |
+
"grad_norm": 62.75,
|
| 664 |
+
"learning_rate": 3.342933097031458e-06,
|
| 665 |
+
"logits/chosen": -1.2166632413864136,
|
| 666 |
+
"logits/rejected": -0.9624554514884949,
|
| 667 |
+
"logps/chosen": -301.0849914550781,
|
| 668 |
+
"logps/rejected": -304.3475036621094,
|
| 669 |
+
"loss": 0.4656,
|
| 670 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 671 |
+
"rewards/chosen": -0.7919347882270813,
|
| 672 |
+
"rewards/margins": 0.9388867020606995,
|
| 673 |
+
"rewards/rejected": -1.73046875,
|
| 674 |
+
"step": 1000
|
| 675 |
+
},
|
| 676 |
+
{
|
| 677 |
+
"epoch": 1.1949469277918972,
|
| 678 |
+
"eval_logits/chosen": -1.160080075263977,
|
| 679 |
+
"eval_logits/rejected": -1.0079379081726074,
|
| 680 |
+
"eval_logps/chosen": -326.43280029296875,
|
| 681 |
+
"eval_logps/rejected": -305.1102294921875,
|
| 682 |
+
"eval_loss": 0.527574896812439,
|
| 683 |
+
"eval_rewards/accuracies": 0.6892281174659729,
|
| 684 |
+
"eval_rewards/chosen": -0.7565616369247437,
|
| 685 |
+
"eval_rewards/margins": 0.6851438879966736,
|
| 686 |
+
"eval_rewards/rejected": -1.4416320323944092,
|
| 687 |
+
"eval_runtime": 876.3772,
|
| 688 |
+
"eval_samples_per_second": 1.697,
|
| 689 |
+
"eval_steps_per_second": 0.212,
|
| 690 |
+
"step": 1000
|
| 691 |
+
},
|
| 692 |
+
{
|
| 693 |
+
"epoch": 1.2248467633428017,
|
| 694 |
+
"grad_norm": 84.0,
|
| 695 |
+
"learning_rate": 3.2875498449268944e-06,
|
| 696 |
+
"logits/chosen": -1.1776912212371826,
|
| 697 |
+
"logits/rejected": -1.050445556640625,
|
| 698 |
+
"logps/chosen": -343.0050048828125,
|
| 699 |
+
"logps/rejected": -331.1875,
|
| 700 |
+
"loss": 0.4213,
|
| 701 |
+
"rewards/accuracies": 0.8050000071525574,
|
| 702 |
+
"rewards/chosen": -0.6588146686553955,
|
| 703 |
+
"rewards/margins": 1.0112402439117432,
|
| 704 |
+
"rewards/rejected": -1.670253872871399,
|
| 705 |
+
"step": 1025
|
| 706 |
+
},
|
| 707 |
+
{
|
| 708 |
+
"epoch": 1.254746598893706,
|
| 709 |
+
"grad_norm": 66.0,
|
| 710 |
+
"learning_rate": 3.2321665928223306e-06,
|
| 711 |
+
"logits/chosen": -1.2721245288848877,
|
| 712 |
+
"logits/rejected": -0.9186769127845764,
|
| 713 |
+
"logps/chosen": -316.4549865722656,
|
| 714 |
+
"logps/rejected": -315.2925109863281,
|
| 715 |
+
"loss": 0.4838,
|
| 716 |
+
"rewards/accuracies": 0.7825000286102295,
|
| 717 |
+
"rewards/chosen": -0.8342553973197937,
|
| 718 |
+
"rewards/margins": 0.83197021484375,
|
| 719 |
+
"rewards/rejected": -1.665708065032959,
|
| 720 |
+
"step": 1050
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"epoch": 1.2846464344446105,
|
| 724 |
+
"grad_norm": 62.75,
|
| 725 |
+
"learning_rate": 3.176783340717767e-06,
|
| 726 |
+
"logits/chosen": -1.1176886558532715,
|
| 727 |
+
"logits/rejected": -0.9960334300994873,
|
| 728 |
+
"logps/chosen": -328.32501220703125,
|
| 729 |
+
"logps/rejected": -328.3450012207031,
|
| 730 |
+
"loss": 0.4538,
|
| 731 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 732 |
+
"rewards/chosen": -0.7273278832435608,
|
| 733 |
+
"rewards/margins": 0.9573754668235779,
|
| 734 |
+
"rewards/rejected": -1.684999942779541,
|
| 735 |
+
"step": 1075
|
| 736 |
+
},
|
| 737 |
+
{
|
| 738 |
+
"epoch": 1.314546269995515,
|
| 739 |
+
"grad_norm": 84.5,
|
| 740 |
+
"learning_rate": 3.1214000886132033e-06,
|
| 741 |
+
"logits/chosen": -1.1655590534210205,
|
| 742 |
+
"logits/rejected": -0.8922329545021057,
|
| 743 |
+
"logps/chosen": -314.9700012207031,
|
| 744 |
+
"logps/rejected": -301.5050048828125,
|
| 745 |
+
"loss": 0.4483,
|
| 746 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 747 |
+
"rewards/chosen": -0.6278771758079529,
|
| 748 |
+
"rewards/margins": 0.9427502155303955,
|
| 749 |
+
"rewards/rejected": -1.5707299709320068,
|
| 750 |
+
"step": 1100
|
| 751 |
+
},
|
| 752 |
+
{
|
| 753 |
+
"epoch": 1.3444461055464194,
|
| 754 |
+
"grad_norm": 69.5,
|
| 755 |
+
"learning_rate": 3.06601683650864e-06,
|
| 756 |
+
"logits/chosen": -1.2217812538146973,
|
| 757 |
+
"logits/rejected": -0.976731538772583,
|
| 758 |
+
"logps/chosen": -324.7850036621094,
|
| 759 |
+
"logps/rejected": -316.4599914550781,
|
| 760 |
+
"loss": 0.4368,
|
| 761 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 762 |
+
"rewards/chosen": -0.7704944014549255,
|
| 763 |
+
"rewards/margins": 0.9598730206489563,
|
| 764 |
+
"rewards/rejected": -1.7300487756729126,
|
| 765 |
+
"step": 1125
|
| 766 |
+
},
|
| 767 |
+
{
|
| 768 |
+
"epoch": 1.374345941097324,
|
| 769 |
+
"grad_norm": 81.0,
|
| 770 |
+
"learning_rate": 3.010633584404076e-06,
|
| 771 |
+
"logits/chosen": -1.203802466392517,
|
| 772 |
+
"logits/rejected": -0.9061872959136963,
|
| 773 |
+
"logps/chosen": -330.4175109863281,
|
| 774 |
+
"logps/rejected": -312.9987487792969,
|
| 775 |
+
"loss": 0.4787,
|
| 776 |
+
"rewards/accuracies": 0.75,
|
| 777 |
+
"rewards/chosen": -0.7830480933189392,
|
| 778 |
+
"rewards/margins": 0.9129126071929932,
|
| 779 |
+
"rewards/rejected": -1.6956127882003784,
|
| 780 |
+
"step": 1150
|
| 781 |
+
},
|
| 782 |
+
{
|
| 783 |
+
"epoch": 1.4042457766482284,
|
| 784 |
+
"grad_norm": 118.0,
|
| 785 |
+
"learning_rate": 2.955250332299513e-06,
|
| 786 |
+
"logits/chosen": -1.1928298473358154,
|
| 787 |
+
"logits/rejected": -0.8999917507171631,
|
| 788 |
+
"logps/chosen": -320.2650146484375,
|
| 789 |
+
"logps/rejected": -301.5299987792969,
|
| 790 |
+
"loss": 0.4698,
|
| 791 |
+
"rewards/accuracies": 0.7549999952316284,
|
| 792 |
+
"rewards/chosen": -0.8731860518455505,
|
| 793 |
+
"rewards/margins": 0.9074377417564392,
|
| 794 |
+
"rewards/rejected": -1.7800854444503784,
|
| 795 |
+
"step": 1175
|
| 796 |
+
},
|
| 797 |
+
{
|
| 798 |
+
"epoch": 1.434145612199133,
|
| 799 |
+
"grad_norm": 65.0,
|
| 800 |
+
"learning_rate": 2.8998670801949493e-06,
|
| 801 |
+
"logits/chosen": -1.1984894275665283,
|
| 802 |
+
"logits/rejected": -0.9353277683258057,
|
| 803 |
+
"logps/chosen": -317.625,
|
| 804 |
+
"logps/rejected": -325.4075012207031,
|
| 805 |
+
"loss": 0.4502,
|
| 806 |
+
"rewards/accuracies": 0.7674999833106995,
|
| 807 |
+
"rewards/chosen": -0.9375879168510437,
|
| 808 |
+
"rewards/margins": 0.9699438214302063,
|
| 809 |
+
"rewards/rejected": -1.9072656631469727,
|
| 810 |
+
"step": 1200
|
| 811 |
+
},
|
| 812 |
+
{
|
| 813 |
+
"epoch": 1.434145612199133,
|
| 814 |
+
"eval_logits/chosen": -1.156473159790039,
|
| 815 |
+
"eval_logits/rejected": -1.006028413772583,
|
| 816 |
+
"eval_logps/chosen": -327.82794189453125,
|
| 817 |
+
"eval_logps/rejected": -306.8521423339844,
|
| 818 |
+
"eval_loss": 0.5231196284294128,
|
| 819 |
+
"eval_rewards/accuracies": 0.6926843523979187,
|
| 820 |
+
"eval_rewards/chosen": -0.8996713161468506,
|
| 821 |
+
"eval_rewards/margins": 0.7130159735679626,
|
| 822 |
+
"eval_rewards/rejected": -1.6129347085952759,
|
| 823 |
+
"eval_runtime": 876.3506,
|
| 824 |
+
"eval_samples_per_second": 1.697,
|
| 825 |
+
"eval_steps_per_second": 0.212,
|
| 826 |
+
"step": 1200
|
| 827 |
+
},
|
| 828 |
+
{
|
| 829 |
+
"epoch": 1.4640454477500373,
|
| 830 |
+
"grad_norm": 99.5,
|
| 831 |
+
"learning_rate": 2.844483828090386e-06,
|
| 832 |
+
"logits/chosen": -1.339633822441101,
|
| 833 |
+
"logits/rejected": -1.035129427909851,
|
| 834 |
+
"logps/chosen": -332.54998779296875,
|
| 835 |
+
"logps/rejected": -319.13751220703125,
|
| 836 |
+
"loss": 0.4421,
|
| 837 |
+
"rewards/accuracies": 0.7799999713897705,
|
| 838 |
+
"rewards/chosen": -0.8549670577049255,
|
| 839 |
+
"rewards/margins": 1.0162646770477295,
|
| 840 |
+
"rewards/rejected": -1.8712304830551147,
|
| 841 |
+
"step": 1225
|
| 842 |
+
},
|
| 843 |
+
{
|
| 844 |
+
"epoch": 1.493945283300942,
|
| 845 |
+
"grad_norm": 83.5,
|
| 846 |
+
"learning_rate": 2.789100575985822e-06,
|
| 847 |
+
"logits/chosen": -1.1476205587387085,
|
| 848 |
+
"logits/rejected": -0.9250108599662781,
|
| 849 |
+
"logps/chosen": -322.0050048828125,
|
| 850 |
+
"logps/rejected": -309.3500061035156,
|
| 851 |
+
"loss": 0.4555,
|
| 852 |
+
"rewards/accuracies": 0.7549999952316284,
|
| 853 |
+
"rewards/chosen": -0.8130224347114563,
|
| 854 |
+
"rewards/margins": 0.9434008598327637,
|
| 855 |
+
"rewards/rejected": -1.7563867568969727,
|
| 856 |
+
"step": 1250
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
"epoch": 1.5238451188518463,
|
| 860 |
+
"grad_norm": 63.75,
|
| 861 |
+
"learning_rate": 2.7337173238812586e-06,
|
| 862 |
+
"logits/chosen": -1.2015457153320312,
|
| 863 |
+
"logits/rejected": -0.8530246019363403,
|
| 864 |
+
"logps/chosen": -309.01251220703125,
|
| 865 |
+
"logps/rejected": -297.7825012207031,
|
| 866 |
+
"loss": 0.4501,
|
| 867 |
+
"rewards/accuracies": 0.7875000238418579,
|
| 868 |
+
"rewards/chosen": -0.836810290813446,
|
| 869 |
+
"rewards/margins": 0.9292749166488647,
|
| 870 |
+
"rewards/rejected": -1.7654907703399658,
|
| 871 |
+
"step": 1275
|
| 872 |
+
},
|
| 873 |
+
{
|
| 874 |
+
"epoch": 1.5537449544027506,
|
| 875 |
+
"grad_norm": 67.0,
|
| 876 |
+
"learning_rate": 2.6783340717766948e-06,
|
| 877 |
+
"logits/chosen": -1.2457306385040283,
|
| 878 |
+
"logits/rejected": -1.0591107606887817,
|
| 879 |
+
"logps/chosen": -337.9775085449219,
|
| 880 |
+
"logps/rejected": -308.5375061035156,
|
| 881 |
+
"loss": 0.4248,
|
| 882 |
+
"rewards/accuracies": 0.800000011920929,
|
| 883 |
+
"rewards/chosen": -0.7735278606414795,
|
| 884 |
+
"rewards/margins": 1.035646915435791,
|
| 885 |
+
"rewards/rejected": -1.8087304830551147,
|
| 886 |
+
"step": 1300
|
| 887 |
+
},
|
| 888 |
+
{
|
| 889 |
+
"epoch": 1.5836447899536552,
|
| 890 |
+
"grad_norm": 51.0,
|
| 891 |
+
"learning_rate": 2.6229508196721314e-06,
|
| 892 |
+
"logits/chosen": -1.216982126235962,
|
| 893 |
+
"logits/rejected": -0.8925817608833313,
|
| 894 |
+
"logps/chosen": -333.2349853515625,
|
| 895 |
+
"logps/rejected": -316.62249755859375,
|
| 896 |
+
"loss": 0.4568,
|
| 897 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 898 |
+
"rewards/chosen": -0.8274877667427063,
|
| 899 |
+
"rewards/margins": 0.9530566334724426,
|
| 900 |
+
"rewards/rejected": -1.7805664539337158,
|
| 901 |
+
"step": 1325
|
| 902 |
+
},
|
| 903 |
+
{
|
| 904 |
+
"epoch": 1.6135446255045598,
|
| 905 |
+
"grad_norm": 82.0,
|
| 906 |
+
"learning_rate": 2.5675675675675675e-06,
|
| 907 |
+
"logits/chosen": -1.3132140636444092,
|
| 908 |
+
"logits/rejected": -1.004296898841858,
|
| 909 |
+
"logps/chosen": -342.4949951171875,
|
| 910 |
+
"logps/rejected": -317.69500732421875,
|
| 911 |
+
"loss": 0.429,
|
| 912 |
+
"rewards/accuracies": 0.8050000071525574,
|
| 913 |
+
"rewards/chosen": -0.9008423089981079,
|
| 914 |
+
"rewards/margins": 1.0281542539596558,
|
| 915 |
+
"rewards/rejected": -1.9285448789596558,
|
| 916 |
+
"step": 1350
|
| 917 |
+
},
|
| 918 |
+
{
|
| 919 |
+
"epoch": 1.6434444610554642,
|
| 920 |
+
"grad_norm": 116.5,
|
| 921 |
+
"learning_rate": 2.5121843154630045e-06,
|
| 922 |
+
"logits/chosen": -1.1408294439315796,
|
| 923 |
+
"logits/rejected": -0.9321377277374268,
|
| 924 |
+
"logps/chosen": -335.291259765625,
|
| 925 |
+
"logps/rejected": -321.29376220703125,
|
| 926 |
+
"loss": 0.453,
|
| 927 |
+
"rewards/accuracies": 0.7749999761581421,
|
| 928 |
+
"rewards/chosen": -0.8236993551254272,
|
| 929 |
+
"rewards/margins": 0.9510498046875,
|
| 930 |
+
"rewards/rejected": -1.77447509765625,
|
| 931 |
+
"step": 1375
|
| 932 |
+
},
|
| 933 |
+
{
|
| 934 |
+
"epoch": 1.6733442966063685,
|
| 935 |
+
"grad_norm": 91.0,
|
| 936 |
+
"learning_rate": 2.4568010633584403e-06,
|
| 937 |
+
"logits/chosen": -1.1858936548233032,
|
| 938 |
+
"logits/rejected": -0.9579010009765625,
|
| 939 |
+
"logps/chosen": -320.9949951171875,
|
| 940 |
+
"logps/rejected": -296.3374938964844,
|
| 941 |
+
"loss": 0.4699,
|
| 942 |
+
"rewards/accuracies": 0.7425000071525574,
|
| 943 |
+
"rewards/chosen": -0.8678625226020813,
|
| 944 |
+
"rewards/margins": 0.9215136766433716,
|
| 945 |
+
"rewards/rejected": -1.7896509170532227,
|
| 946 |
+
"step": 1400
|
| 947 |
+
},
|
| 948 |
+
{
|
| 949 |
+
"epoch": 1.6733442966063685,
|
| 950 |
+
"eval_logits/chosen": -1.1674253940582275,
|
| 951 |
+
"eval_logits/rejected": -1.0171688795089722,
|
| 952 |
+
"eval_logps/chosen": -327.3978576660156,
|
| 953 |
+
"eval_logps/rejected": -306.6209716796875,
|
| 954 |
+
"eval_loss": 0.5191056728363037,
|
| 955 |
+
"eval_rewards/accuracies": 0.6933563947677612,
|
| 956 |
+
"eval_rewards/chosen": -0.8476693630218506,
|
| 957 |
+
"eval_rewards/margins": 0.7431673407554626,
|
| 958 |
+
"eval_rewards/rejected": -1.5906811952590942,
|
| 959 |
+
"eval_runtime": 876.3262,
|
| 960 |
+
"eval_samples_per_second": 1.697,
|
| 961 |
+
"eval_steps_per_second": 0.212,
|
| 962 |
+
"step": 1400
|
| 963 |
+
},
|
| 964 |
+
{
|
| 965 |
+
"epoch": 1.703244132157273,
|
| 966 |
+
"grad_norm": 82.0,
|
| 967 |
+
"learning_rate": 2.401417811253877e-06,
|
| 968 |
+
"logits/chosen": -1.1833282709121704,
|
| 969 |
+
"logits/rejected": -0.9263910055160522,
|
| 970 |
+
"logps/chosen": -324.5150146484375,
|
| 971 |
+
"logps/rejected": -316.1650085449219,
|
| 972 |
+
"loss": 0.451,
|
| 973 |
+
"rewards/accuracies": 0.7799999713897705,
|
| 974 |
+
"rewards/chosen": -0.8199084401130676,
|
| 975 |
+
"rewards/margins": 0.9980810284614563,
|
| 976 |
+
"rewards/rejected": -1.8175097703933716,
|
| 977 |
+
"step": 1425
|
| 978 |
+
},
|
| 979 |
+
{
|
| 980 |
+
"epoch": 1.7331439677081777,
|
| 981 |
+
"grad_norm": 99.0,
|
| 982 |
+
"learning_rate": 2.3460345591493135e-06,
|
| 983 |
+
"logits/chosen": -1.1936352252960205,
|
| 984 |
+
"logits/rejected": -1.0041576623916626,
|
| 985 |
+
"logps/chosen": -350.885009765625,
|
| 986 |
+
"logps/rejected": -327.0450134277344,
|
| 987 |
+
"loss": 0.4702,
|
| 988 |
+
"rewards/accuracies": 0.75,
|
| 989 |
+
"rewards/chosen": -0.9122155904769897,
|
| 990 |
+
"rewards/margins": 0.9335852265357971,
|
| 991 |
+
"rewards/rejected": -1.8462109565734863,
|
| 992 |
+
"step": 1450
|
| 993 |
+
},
|
| 994 |
+
{
|
| 995 |
+
"epoch": 1.763043803259082,
|
| 996 |
+
"grad_norm": 59.5,
|
| 997 |
+
"learning_rate": 2.2906513070447496e-06,
|
| 998 |
+
"logits/chosen": -1.3379946947097778,
|
| 999 |
+
"logits/rejected": -1.0853075981140137,
|
| 1000 |
+
"logps/chosen": -299.1099853515625,
|
| 1001 |
+
"logps/rejected": -299.9725036621094,
|
| 1002 |
+
"loss": 0.4607,
|
| 1003 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 1004 |
+
"rewards/chosen": -0.905989408493042,
|
| 1005 |
+
"rewards/margins": 1.0363476276397705,
|
| 1006 |
+
"rewards/rejected": -1.942041039466858,
|
| 1007 |
+
"step": 1475
|
| 1008 |
+
},
|
| 1009 |
+
{
|
| 1010 |
+
"epoch": 1.7929436388099864,
|
| 1011 |
+
"grad_norm": 102.0,
|
| 1012 |
+
"learning_rate": 2.235268054940186e-06,
|
| 1013 |
+
"logits/chosen": -1.1545830965042114,
|
| 1014 |
+
"logits/rejected": -0.8675525188446045,
|
| 1015 |
+
"logps/chosen": -321.79998779296875,
|
| 1016 |
+
"logps/rejected": -300.4262390136719,
|
| 1017 |
+
"loss": 0.4854,
|
| 1018 |
+
"rewards/accuracies": 0.7425000071525574,
|
| 1019 |
+
"rewards/chosen": -0.8690832257270813,
|
| 1020 |
+
"rewards/margins": 0.9056127667427063,
|
| 1021 |
+
"rewards/rejected": -1.7749096155166626,
|
| 1022 |
+
"step": 1500
|
| 1023 |
+
},
|
| 1024 |
+
{
|
| 1025 |
+
"epoch": 1.822843474360891,
|
| 1026 |
+
"grad_norm": 60.0,
|
| 1027 |
+
"learning_rate": 2.179884802835623e-06,
|
| 1028 |
+
"logits/chosen": -1.2606717348098755,
|
| 1029 |
+
"logits/rejected": -1.0567920207977295,
|
| 1030 |
+
"logps/chosen": -328.82501220703125,
|
| 1031 |
+
"logps/rejected": -304.1050109863281,
|
| 1032 |
+
"loss": 0.4552,
|
| 1033 |
+
"rewards/accuracies": 0.7850000262260437,
|
| 1034 |
+
"rewards/chosen": -0.743670642375946,
|
| 1035 |
+
"rewards/margins": 1.0134960412979126,
|
| 1036 |
+
"rewards/rejected": -1.7573193311691284,
|
| 1037 |
+
"step": 1525
|
| 1038 |
+
},
|
| 1039 |
+
{
|
| 1040 |
+
"epoch": 1.8527433099117956,
|
| 1041 |
+
"grad_norm": 59.5,
|
| 1042 |
+
"learning_rate": 2.124501550731059e-06,
|
| 1043 |
+
"logits/chosen": -1.2121707201004028,
|
| 1044 |
+
"logits/rejected": -1.002629041671753,
|
| 1045 |
+
"logps/chosen": -323.5950012207031,
|
| 1046 |
+
"logps/rejected": -317.5299987792969,
|
| 1047 |
+
"loss": 0.4645,
|
| 1048 |
+
"rewards/accuracies": 0.7674999833106995,
|
| 1049 |
+
"rewards/chosen": -0.9758337140083313,
|
| 1050 |
+
"rewards/margins": 0.9835278391838074,
|
| 1051 |
+
"rewards/rejected": -1.959287166595459,
|
| 1052 |
+
"step": 1550
|
| 1053 |
+
},
|
| 1054 |
+
{
|
| 1055 |
+
"epoch": 1.8826431454627,
|
| 1056 |
+
"grad_norm": 71.0,
|
| 1057 |
+
"learning_rate": 2.0691182986264955e-06,
|
| 1058 |
+
"logits/chosen": -1.296298861503601,
|
| 1059 |
+
"logits/rejected": NaN,
|
| 1060 |
+
"logps/chosen": -325.7699890136719,
|
| 1061 |
+
"logps/rejected": -299.322509765625,
|
| 1062 |
+
"loss": 0.4515,
|
| 1063 |
+
"rewards/accuracies": 0.7599999904632568,
|
| 1064 |
+
"rewards/chosen": -0.8331592082977295,
|
| 1065 |
+
"rewards/margins": 0.9821679592132568,
|
| 1066 |
+
"rewards/rejected": -1.8158252239227295,
|
| 1067 |
+
"step": 1575
|
| 1068 |
+
},
|
| 1069 |
+
{
|
| 1070 |
+
"epoch": 1.9125429810136043,
|
| 1071 |
+
"grad_norm": 70.0,
|
| 1072 |
+
"learning_rate": 2.0137350465219317e-06,
|
| 1073 |
+
"logits/chosen": -1.2260925769805908,
|
| 1074 |
+
"logits/rejected": -0.9426334500312805,
|
| 1075 |
+
"logps/chosen": -330.06500244140625,
|
| 1076 |
+
"logps/rejected": -309.68499755859375,
|
| 1077 |
+
"loss": 0.4436,
|
| 1078 |
+
"rewards/accuracies": 0.7649999856948853,
|
| 1079 |
+
"rewards/chosen": -0.830242931842804,
|
| 1080 |
+
"rewards/margins": 0.9743407964706421,
|
| 1081 |
+
"rewards/rejected": -1.804931640625,
|
| 1082 |
+
"step": 1600
|
| 1083 |
+
},
|
| 1084 |
+
{
|
| 1085 |
+
"epoch": 1.9125429810136043,
|
| 1086 |
+
"eval_logits/chosen": -1.1829742193222046,
|
| 1087 |
+
"eval_logits/rejected": -1.033914566040039,
|
| 1088 |
+
"eval_logps/chosen": -327.43011474609375,
|
| 1089 |
+
"eval_logps/rejected": -306.69085693359375,
|
| 1090 |
+
"eval_loss": 0.5206477046012878,
|
| 1091 |
+
"eval_rewards/accuracies": 0.6974846720695496,
|
| 1092 |
+
"eval_rewards/chosen": -0.8544062376022339,
|
| 1093 |
+
"eval_rewards/margins": 0.7440763115882874,
|
| 1094 |
+
"eval_rewards/rejected": -1.598265290260315,
|
| 1095 |
+
"eval_runtime": 876.3416,
|
| 1096 |
+
"eval_samples_per_second": 1.697,
|
| 1097 |
+
"eval_steps_per_second": 0.212,
|
| 1098 |
+
"step": 1600
|
| 1099 |
+
},
|
| 1100 |
+
{
|
| 1101 |
+
"epoch": 1.942442816564509,
|
| 1102 |
+
"grad_norm": 73.5,
|
| 1103 |
+
"learning_rate": 1.9583517944173683e-06,
|
| 1104 |
+
"logits/chosen": -1.246303677558899,
|
| 1105 |
+
"logits/rejected": -0.9357275366783142,
|
| 1106 |
+
"logps/chosen": -332.3599853515625,
|
| 1107 |
+
"logps/rejected": -309.1700134277344,
|
| 1108 |
+
"loss": 0.4702,
|
| 1109 |
+
"rewards/accuracies": 0.762499988079071,
|
| 1110 |
+
"rewards/chosen": -0.8381909132003784,
|
| 1111 |
+
"rewards/margins": 0.9997217059135437,
|
| 1112 |
+
"rewards/rejected": -1.837497591972351,
|
| 1113 |
+
"step": 1625
|
| 1114 |
+
},
|
| 1115 |
+
{
|
| 1116 |
+
"epoch": 1.9723426521154135,
|
| 1117 |
+
"grad_norm": 68.5,
|
| 1118 |
+
"learning_rate": 1.9029685423128047e-06,
|
| 1119 |
+
"logits/chosen": -1.2618129253387451,
|
| 1120 |
+
"logits/rejected": -1.0779250860214233,
|
| 1121 |
+
"logps/chosen": -339.9324951171875,
|
| 1122 |
+
"logps/rejected": -318.04998779296875,
|
| 1123 |
+
"loss": 0.4583,
|
| 1124 |
+
"rewards/accuracies": 0.762499988079071,
|
| 1125 |
+
"rewards/chosen": -0.8390514850616455,
|
| 1126 |
+
"rewards/margins": 1.0396826267242432,
|
| 1127 |
+
"rewards/rejected": -1.878564476966858,
|
| 1128 |
+
"step": 1650
|
| 1129 |
+
},
|
| 1130 |
+
{
|
| 1131 |
+
"epoch": 2.0011959934220362,
|
| 1132 |
+
"grad_norm": 97.0,
|
| 1133 |
+
"learning_rate": 1.847585290208241e-06,
|
| 1134 |
+
"logits/chosen": -1.2342288494110107,
|
| 1135 |
+
"logits/rejected": -0.9683116674423218,
|
| 1136 |
+
"logps/chosen": -332.2409362792969,
|
| 1137 |
+
"logps/rejected": -321.0531005859375,
|
| 1138 |
+
"loss": 0.424,
|
| 1139 |
+
"rewards/accuracies": 0.7642487287521362,
|
| 1140 |
+
"rewards/chosen": -0.7630558013916016,
|
| 1141 |
+
"rewards/margins": 1.0779491662979126,
|
| 1142 |
+
"rewards/rejected": -1.8409063816070557,
|
| 1143 |
+
"step": 1675
|
| 1144 |
+
},
|
| 1145 |
+
{
|
| 1146 |
+
"epoch": 2.0310958289729406,
|
| 1147 |
+
"grad_norm": 76.0,
|
| 1148 |
+
"learning_rate": 1.7922020381036776e-06,
|
| 1149 |
+
"logits/chosen": -1.318371295928955,
|
| 1150 |
+
"logits/rejected": -1.0083489418029785,
|
| 1151 |
+
"logps/chosen": -327.114990234375,
|
| 1152 |
+
"logps/rejected": -336.697509765625,
|
| 1153 |
+
"loss": 0.3965,
|
| 1154 |
+
"rewards/accuracies": 0.8475000262260437,
|
| 1155 |
+
"rewards/chosen": -0.7496582269668579,
|
| 1156 |
+
"rewards/margins": 1.0661474466323853,
|
| 1157 |
+
"rewards/rejected": -1.8159960508346558,
|
| 1158 |
+
"step": 1700
|
| 1159 |
+
},
|
| 1160 |
+
{
|
| 1161 |
+
"epoch": 2.060995664523845,
|
| 1162 |
+
"grad_norm": 102.5,
|
| 1163 |
+
"learning_rate": 1.736818785999114e-06,
|
| 1164 |
+
"logits/chosen": -1.2396435737609863,
|
| 1165 |
+
"logits/rejected": -0.9828730225563049,
|
| 1166 |
+
"logps/chosen": -332.7074890136719,
|
| 1167 |
+
"logps/rejected": -333.37249755859375,
|
| 1168 |
+
"loss": 0.4101,
|
| 1169 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 1170 |
+
"rewards/chosen": -0.7449682354927063,
|
| 1171 |
+
"rewards/margins": 1.1290674209594727,
|
| 1172 |
+
"rewards/rejected": -1.8738598823547363,
|
| 1173 |
+
"step": 1725
|
| 1174 |
+
},
|
| 1175 |
+
{
|
| 1176 |
+
"epoch": 2.09089550007475,
|
| 1177 |
+
"grad_norm": 62.25,
|
| 1178 |
+
"learning_rate": 1.6814355338945504e-06,
|
| 1179 |
+
"logits/chosen": -1.2273823022842407,
|
| 1180 |
+
"logits/rejected": -0.88829505443573,
|
| 1181 |
+
"logps/chosen": -322.93499755859375,
|
| 1182 |
+
"logps/rejected": -300.385009765625,
|
| 1183 |
+
"loss": 0.4221,
|
| 1184 |
+
"rewards/accuracies": 0.8050000071525574,
|
| 1185 |
+
"rewards/chosen": -0.903369128704071,
|
| 1186 |
+
"rewards/margins": 1.0416357517242432,
|
| 1187 |
+
"rewards/rejected": -1.9447948932647705,
|
| 1188 |
+
"step": 1750
|
| 1189 |
+
},
|
| 1190 |
+
{
|
| 1191 |
+
"epoch": 2.120795335625654,
|
| 1192 |
+
"grad_norm": 86.5,
|
| 1193 |
+
"learning_rate": 1.6260522817899868e-06,
|
| 1194 |
+
"logits/chosen": -1.2524548768997192,
|
| 1195 |
+
"logits/rejected": -1.0671484470367432,
|
| 1196 |
+
"logps/chosen": -333.92999267578125,
|
| 1197 |
+
"logps/rejected": -318.6400146484375,
|
| 1198 |
+
"loss": 0.4119,
|
| 1199 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 1200 |
+
"rewards/chosen": -0.7944982647895813,
|
| 1201 |
+
"rewards/margins": 1.1625818014144897,
|
| 1202 |
+
"rewards/rejected": -1.9566112756729126,
|
| 1203 |
+
"step": 1775
|
| 1204 |
+
},
|
| 1205 |
+
{
|
| 1206 |
+
"epoch": 2.1506951711765585,
|
| 1207 |
+
"grad_norm": 90.0,
|
| 1208 |
+
"learning_rate": 1.5706690296854231e-06,
|
| 1209 |
+
"logits/chosen": -1.2237915992736816,
|
| 1210 |
+
"logits/rejected": -0.956585705280304,
|
| 1211 |
+
"logps/chosen": -320.30999755859375,
|
| 1212 |
+
"logps/rejected": -302.2674865722656,
|
| 1213 |
+
"loss": 0.4528,
|
| 1214 |
+
"rewards/accuracies": 0.7674999833106995,
|
| 1215 |
+
"rewards/chosen": -0.9091894626617432,
|
| 1216 |
+
"rewards/margins": 1.0250316858291626,
|
| 1217 |
+
"rewards/rejected": -1.9344677925109863,
|
| 1218 |
+
"step": 1800
|
| 1219 |
+
},
|
| 1220 |
+
{
|
| 1221 |
+
"epoch": 2.1506951711765585,
|
| 1222 |
+
"eval_logits/chosen": -1.191327452659607,
|
| 1223 |
+
"eval_logits/rejected": -1.0433924198150635,
|
| 1224 |
+
"eval_logps/chosen": -327.741943359375,
|
| 1225 |
+
"eval_logps/rejected": -307.1559143066406,
|
| 1226 |
+
"eval_loss": 0.5188325047492981,
|
| 1227 |
+
"eval_rewards/accuracies": 0.6941244602203369,
|
| 1228 |
+
"eval_rewards/chosen": -0.8884723782539368,
|
| 1229 |
+
"eval_rewards/margins": 0.7567348480224609,
|
| 1230 |
+
"eval_rewards/rejected": -1.6454237699508667,
|
| 1231 |
+
"eval_runtime": 876.3236,
|
| 1232 |
+
"eval_samples_per_second": 1.697,
|
| 1233 |
+
"eval_steps_per_second": 0.212,
|
| 1234 |
+
"step": 1800
|
| 1235 |
+
},
|
| 1236 |
+
{
|
| 1237 |
+
"epoch": 2.180595006727463,
|
| 1238 |
+
"grad_norm": 74.5,
|
| 1239 |
+
"learning_rate": 1.5152857775808597e-06,
|
| 1240 |
+
"logits/chosen": -1.2849377393722534,
|
| 1241 |
+
"logits/rejected": -0.9589782953262329,
|
| 1242 |
+
"logps/chosen": -321.9987487792969,
|
| 1243 |
+
"logps/rejected": -307.2149963378906,
|
| 1244 |
+
"loss": 0.4031,
|
| 1245 |
+
"rewards/accuracies": 0.8349999785423279,
|
| 1246 |
+
"rewards/chosen": -0.7700170874595642,
|
| 1247 |
+
"rewards/margins": 1.1218103170394897,
|
| 1248 |
+
"rewards/rejected": -1.8917040824890137,
|
| 1249 |
+
"step": 1825
|
| 1250 |
+
},
|
| 1251 |
+
{
|
| 1252 |
+
"epoch": 2.2104948422783677,
|
| 1253 |
+
"grad_norm": 73.5,
|
| 1254 |
+
"learning_rate": 1.459902525476296e-06,
|
| 1255 |
+
"logits/chosen": -1.136842131614685,
|
| 1256 |
+
"logits/rejected": -0.9383144974708557,
|
| 1257 |
+
"logps/chosen": -319.8525085449219,
|
| 1258 |
+
"logps/rejected": -333.6600036621094,
|
| 1259 |
+
"loss": 0.424,
|
| 1260 |
+
"rewards/accuracies": 0.8075000047683716,
|
| 1261 |
+
"rewards/chosen": -0.8708154559135437,
|
| 1262 |
+
"rewards/margins": 1.0324267148971558,
|
| 1263 |
+
"rewards/rejected": -1.903378963470459,
|
| 1264 |
+
"step": 1850
|
| 1265 |
+
},
|
| 1266 |
+
{
|
| 1267 |
+
"epoch": 2.240394677829272,
|
| 1268 |
+
"grad_norm": 72.5,
|
| 1269 |
+
"learning_rate": 1.4045192733717325e-06,
|
| 1270 |
+
"logits/chosen": -1.1802786588668823,
|
| 1271 |
+
"logits/rejected": -0.9680548310279846,
|
| 1272 |
+
"logps/chosen": -317.48748779296875,
|
| 1273 |
+
"logps/rejected": -299.19000244140625,
|
| 1274 |
+
"loss": 0.4262,
|
| 1275 |
+
"rewards/accuracies": 0.8274999856948853,
|
| 1276 |
+
"rewards/chosen": -0.8513085842132568,
|
| 1277 |
+
"rewards/margins": 1.0704809427261353,
|
| 1278 |
+
"rewards/rejected": -1.9216357469558716,
|
| 1279 |
+
"step": 1875
|
| 1280 |
+
},
|
| 1281 |
+
{
|
| 1282 |
+
"epoch": 2.2702945133801764,
|
| 1283 |
+
"grad_norm": 84.0,
|
| 1284 |
+
"learning_rate": 1.3491360212671688e-06,
|
| 1285 |
+
"logits/chosen": -1.2559946775436401,
|
| 1286 |
+
"logits/rejected": -0.9639026522636414,
|
| 1287 |
+
"logps/chosen": -336.9750061035156,
|
| 1288 |
+
"logps/rejected": -323.49249267578125,
|
| 1289 |
+
"loss": 0.4294,
|
| 1290 |
+
"rewards/accuracies": 0.8025000095367432,
|
| 1291 |
+
"rewards/chosen": -0.8724609613418579,
|
| 1292 |
+
"rewards/margins": 1.0881787538528442,
|
| 1293 |
+
"rewards/rejected": -1.960756778717041,
|
| 1294 |
+
"step": 1900
|
| 1295 |
+
},
|
| 1296 |
+
{
|
| 1297 |
+
"epoch": 2.3001943489310808,
|
| 1298 |
+
"grad_norm": 71.0,
|
| 1299 |
+
"learning_rate": 1.2937527691626054e-06,
|
| 1300 |
+
"logits/chosen": -1.3266677856445312,
|
| 1301 |
+
"logits/rejected": -1.0626074075698853,
|
| 1302 |
+
"logps/chosen": -305.86749267578125,
|
| 1303 |
+
"logps/rejected": -291.93499755859375,
|
| 1304 |
+
"loss": 0.4471,
|
| 1305 |
+
"rewards/accuracies": 0.7749999761581421,
|
| 1306 |
+
"rewards/chosen": -0.9192346334457397,
|
| 1307 |
+
"rewards/margins": 1.0141992568969727,
|
| 1308 |
+
"rewards/rejected": -1.9337549209594727,
|
| 1309 |
+
"step": 1925
|
| 1310 |
+
},
|
| 1311 |
+
{
|
| 1312 |
+
"epoch": 2.330094184481985,
|
| 1313 |
+
"grad_norm": 109.5,
|
| 1314 |
+
"learning_rate": 1.2383695170580418e-06,
|
| 1315 |
+
"logits/chosen": -1.1726070642471313,
|
| 1316 |
+
"logits/rejected": -1.0060466527938843,
|
| 1317 |
+
"logps/chosen": -309.7799987792969,
|
| 1318 |
+
"logps/rejected": -311.13751220703125,
|
| 1319 |
+
"loss": 0.4333,
|
| 1320 |
+
"rewards/accuracies": 0.7724999785423279,
|
| 1321 |
+
"rewards/chosen": -0.8455395698547363,
|
| 1322 |
+
"rewards/margins": 1.0642285346984863,
|
| 1323 |
+
"rewards/rejected": -1.9100537300109863,
|
| 1324 |
+
"step": 1950
|
| 1325 |
+
},
|
| 1326 |
+
{
|
| 1327 |
+
"epoch": 2.35999402003289,
|
| 1328 |
+
"grad_norm": 43.0,
|
| 1329 |
+
"learning_rate": 1.1829862649534782e-06,
|
| 1330 |
+
"logits/chosen": -1.189868450164795,
|
| 1331 |
+
"logits/rejected": -1.0110809803009033,
|
| 1332 |
+
"logps/chosen": -343.5849914550781,
|
| 1333 |
+
"logps/rejected": -329.1675109863281,
|
| 1334 |
+
"loss": 0.4071,
|
| 1335 |
+
"rewards/accuracies": 0.8224999904632568,
|
| 1336 |
+
"rewards/chosen": -0.8902783393859863,
|
| 1337 |
+
"rewards/margins": 1.0464379787445068,
|
| 1338 |
+
"rewards/rejected": -1.9371508359909058,
|
| 1339 |
+
"step": 1975
|
| 1340 |
+
},
|
| 1341 |
+
{
|
| 1342 |
+
"epoch": 2.3898938555837943,
|
| 1343 |
+
"grad_norm": 86.5,
|
| 1344 |
+
"learning_rate": 1.1276030128489146e-06,
|
| 1345 |
+
"logits/chosen": -1.3213348388671875,
|
| 1346 |
+
"logits/rejected": -1.0948954820632935,
|
| 1347 |
+
"logps/chosen": -331.0174865722656,
|
| 1348 |
+
"logps/rejected": -307.2900085449219,
|
| 1349 |
+
"loss": 0.4075,
|
| 1350 |
+
"rewards/accuracies": 0.8349999785423279,
|
| 1351 |
+
"rewards/chosen": -0.8052575588226318,
|
| 1352 |
+
"rewards/margins": 1.1002050638198853,
|
| 1353 |
+
"rewards/rejected": -1.9058740139007568,
|
| 1354 |
+
"step": 2000
|
| 1355 |
+
},
|
| 1356 |
+
{
|
| 1357 |
+
"epoch": 2.3898938555837943,
|
| 1358 |
+
"eval_logits/chosen": -1.1904795169830322,
|
| 1359 |
+
"eval_logits/rejected": -1.042686104774475,
|
| 1360 |
+
"eval_logps/chosen": -327.67205810546875,
|
| 1361 |
+
"eval_logps/rejected": -307.0806579589844,
|
| 1362 |
+
"eval_loss": 0.5186262726783752,
|
| 1363 |
+
"eval_rewards/accuracies": 0.6967166662216187,
|
| 1364 |
+
"eval_rewards/chosen": -0.8813358545303345,
|
| 1365 |
+
"eval_rewards/margins": 0.7553303837776184,
|
| 1366 |
+
"eval_rewards/rejected": -1.6366767883300781,
|
| 1367 |
+
"eval_runtime": 876.3711,
|
| 1368 |
+
"eval_samples_per_second": 1.697,
|
| 1369 |
+
"eval_steps_per_second": 0.212,
|
| 1370 |
+
"step": 2000
|
| 1371 |
+
},
|
| 1372 |
+
{
|
| 1373 |
+
"epoch": 2.4197936911346987,
|
| 1374 |
+
"grad_norm": 67.0,
|
| 1375 |
+
"learning_rate": 1.072219760744351e-06,
|
| 1376 |
+
"logits/chosen": -1.2627320289611816,
|
| 1377 |
+
"logits/rejected": -1.0026310682296753,
|
| 1378 |
+
"logps/chosen": -335.5675048828125,
|
| 1379 |
+
"logps/rejected": -301.01251220703125,
|
| 1380 |
+
"loss": 0.4202,
|
| 1381 |
+
"rewards/accuracies": 0.7774999737739563,
|
| 1382 |
+
"rewards/chosen": -0.8969201445579529,
|
| 1383 |
+
"rewards/margins": 1.085205078125,
|
| 1384 |
+
"rewards/rejected": -1.9821679592132568,
|
| 1385 |
+
"step": 2025
|
| 1386 |
+
},
|
| 1387 |
+
{
|
| 1388 |
+
"epoch": 2.4496935266856035,
|
| 1389 |
+
"grad_norm": 86.0,
|
| 1390 |
+
"learning_rate": 1.0168365086397875e-06,
|
| 1391 |
+
"logits/chosen": -1.2463324069976807,
|
| 1392 |
+
"logits/rejected": -0.9855798482894897,
|
| 1393 |
+
"logps/chosen": -332.5849914550781,
|
| 1394 |
+
"logps/rejected": -324.9624938964844,
|
| 1395 |
+
"loss": 0.4193,
|
| 1396 |
+
"rewards/accuracies": 0.7925000190734863,
|
| 1397 |
+
"rewards/chosen": -0.8326050043106079,
|
| 1398 |
+
"rewards/margins": 1.0910131931304932,
|
| 1399 |
+
"rewards/rejected": -1.9229882955551147,
|
| 1400 |
+
"step": 2050
|
| 1401 |
+
},
|
| 1402 |
+
{
|
| 1403 |
+
"epoch": 2.479593362236508,
|
| 1404 |
+
"grad_norm": 53.75,
|
| 1405 |
+
"learning_rate": 9.61453256535224e-07,
|
| 1406 |
+
"logits/chosen": -1.2372454404830933,
|
| 1407 |
+
"logits/rejected": -0.9461462497711182,
|
| 1408 |
+
"logps/chosen": -328.4750061035156,
|
| 1409 |
+
"logps/rejected": -300.5224914550781,
|
| 1410 |
+
"loss": 0.4611,
|
| 1411 |
+
"rewards/accuracies": 0.7524999976158142,
|
| 1412 |
+
"rewards/chosen": -0.8591150045394897,
|
| 1413 |
+
"rewards/margins": 0.9913061261177063,
|
| 1414 |
+
"rewards/rejected": -1.8506054878234863,
|
| 1415 |
+
"step": 2075
|
| 1416 |
+
},
|
| 1417 |
+
{
|
| 1418 |
+
"epoch": 2.509493197787412,
|
| 1419 |
+
"grad_norm": 68.0,
|
| 1420 |
+
"learning_rate": 9.060700044306603e-07,
|
| 1421 |
+
"logits/chosen": -1.2847473621368408,
|
| 1422 |
+
"logits/rejected": -1.0720292329788208,
|
| 1423 |
+
"logps/chosen": -337.26251220703125,
|
| 1424 |
+
"logps/rejected": -307.17498779296875,
|
| 1425 |
+
"loss": 0.4101,
|
| 1426 |
+
"rewards/accuracies": 0.7799999713897705,
|
| 1427 |
+
"rewards/chosen": -0.8909338116645813,
|
| 1428 |
+
"rewards/margins": 1.1306884288787842,
|
| 1429 |
+
"rewards/rejected": -2.021728515625,
|
| 1430 |
+
"step": 2100
|
| 1431 |
+
},
|
| 1432 |
+
{
|
| 1433 |
+
"epoch": 2.5393930333383166,
|
| 1434 |
+
"grad_norm": 101.0,
|
| 1435 |
+
"learning_rate": 8.506867523260968e-07,
|
| 1436 |
+
"logits/chosen": -1.1994116306304932,
|
| 1437 |
+
"logits/rejected": -0.9730746746063232,
|
| 1438 |
+
"logps/chosen": -338.3999938964844,
|
| 1439 |
+
"logps/rejected": -304.99749755859375,
|
| 1440 |
+
"loss": 0.4387,
|
| 1441 |
+
"rewards/accuracies": 0.7875000238418579,
|
| 1442 |
+
"rewards/chosen": -0.7841222882270813,
|
| 1443 |
+
"rewards/margins": 1.0449267625808716,
|
| 1444 |
+
"rewards/rejected": -1.829746127128601,
|
| 1445 |
+
"step": 2125
|
| 1446 |
+
},
|
| 1447 |
+
{
|
| 1448 |
+
"epoch": 2.569292868889221,
|
| 1449 |
+
"grad_norm": 68.5,
|
| 1450 |
+
"learning_rate": 7.953035002215331e-07,
|
| 1451 |
+
"logits/chosen": -1.3298254013061523,
|
| 1452 |
+
"logits/rejected": -1.118627667427063,
|
| 1453 |
+
"logps/chosen": -309.739990234375,
|
| 1454 |
+
"logps/rejected": -308.24749755859375,
|
| 1455 |
+
"loss": 0.4449,
|
| 1456 |
+
"rewards/accuracies": 0.7774999737739563,
|
| 1457 |
+
"rewards/chosen": -0.8520336747169495,
|
| 1458 |
+
"rewards/margins": 0.9700658917427063,
|
| 1459 |
+
"rewards/rejected": -1.8218945264816284,
|
| 1460 |
+
"step": 2150
|
| 1461 |
+
},
|
| 1462 |
+
{
|
| 1463 |
+
"epoch": 2.5991927044401257,
|
| 1464 |
+
"grad_norm": 70.5,
|
| 1465 |
+
"learning_rate": 7.399202481169695e-07,
|
| 1466 |
+
"logits/chosen": -1.1831958293914795,
|
| 1467 |
+
"logits/rejected": NaN,
|
| 1468 |
+
"logps/chosen": -327.49249267578125,
|
| 1469 |
+
"logps/rejected": -289.5924987792969,
|
| 1470 |
+
"loss": 0.4473,
|
| 1471 |
+
"rewards/accuracies": 0.7749999761581421,
|
| 1472 |
+
"rewards/chosen": -0.8408032059669495,
|
| 1473 |
+
"rewards/margins": 0.9420214891433716,
|
| 1474 |
+
"rewards/rejected": -1.7829101085662842,
|
| 1475 |
+
"step": 2175
|
| 1476 |
+
},
|
| 1477 |
+
{
|
| 1478 |
+
"epoch": 2.62909253999103,
|
| 1479 |
+
"grad_norm": 54.0,
|
| 1480 |
+
"learning_rate": 6.845369960124059e-07,
|
| 1481 |
+
"logits/chosen": -1.2656641006469727,
|
| 1482 |
+
"logits/rejected": -0.9782373309135437,
|
| 1483 |
+
"logps/chosen": -324.4200134277344,
|
| 1484 |
+
"logps/rejected": -290.0675048828125,
|
| 1485 |
+
"loss": 0.4419,
|
| 1486 |
+
"rewards/accuracies": 0.7825000286102295,
|
| 1487 |
+
"rewards/chosen": -0.9666149616241455,
|
| 1488 |
+
"rewards/margins": 1.0030114650726318,
|
| 1489 |
+
"rewards/rejected": -1.9694628715515137,
|
| 1490 |
+
"step": 2200
|
| 1491 |
+
},
|
| 1492 |
+
{
|
| 1493 |
+
"epoch": 2.62909253999103,
|
| 1494 |
+
"eval_logits/chosen": -1.1868830919265747,
|
| 1495 |
+
"eval_logits/rejected": -1.0399714708328247,
|
| 1496 |
+
"eval_logps/chosen": -327.6585998535156,
|
| 1497 |
+
"eval_logps/rejected": -306.9704284667969,
|
| 1498 |
+
"eval_loss": 0.5178263783454895,
|
| 1499 |
+
"eval_rewards/accuracies": 0.6993087530136108,
|
| 1500 |
+
"eval_rewards/chosen": -0.8778404593467712,
|
| 1501 |
+
"eval_rewards/margins": 0.7548588514328003,
|
| 1502 |
+
"eval_rewards/rejected": -1.6324502229690552,
|
| 1503 |
+
"eval_runtime": 876.3727,
|
| 1504 |
+
"eval_samples_per_second": 1.697,
|
| 1505 |
+
"eval_steps_per_second": 0.212,
|
| 1506 |
+
"step": 2200
|
| 1507 |
+
},
|
| 1508 |
+
{
|
| 1509 |
+
"epoch": 2.6589923755419345,
|
| 1510 |
+
"grad_norm": 67.5,
|
| 1511 |
+
"learning_rate": 6.291537439078423e-07,
|
| 1512 |
+
"logits/chosen": -1.2253618240356445,
|
| 1513 |
+
"logits/rejected": -1.0349105596542358,
|
| 1514 |
+
"logps/chosen": -336.12249755859375,
|
| 1515 |
+
"logps/rejected": -311.8275146484375,
|
| 1516 |
+
"loss": 0.4574,
|
| 1517 |
+
"rewards/accuracies": 0.7724999785423279,
|
| 1518 |
+
"rewards/chosen": -0.8752642869949341,
|
| 1519 |
+
"rewards/margins": 0.9961340427398682,
|
| 1520 |
+
"rewards/rejected": -1.8713818788528442,
|
| 1521 |
+
"step": 2225
|
| 1522 |
+
},
|
| 1523 |
+
{
|
| 1524 |
+
"epoch": 2.688892211092839,
|
| 1525 |
+
"grad_norm": 100.0,
|
| 1526 |
+
"learning_rate": 5.737704918032787e-07,
|
| 1527 |
+
"logits/chosen": -1.2597771883010864,
|
| 1528 |
+
"logits/rejected": -0.9909564256668091,
|
| 1529 |
+
"logps/chosen": -326.6600036621094,
|
| 1530 |
+
"logps/rejected": -316.19000244140625,
|
| 1531 |
+
"loss": 0.4751,
|
| 1532 |
+
"rewards/accuracies": 0.7674999833106995,
|
| 1533 |
+
"rewards/chosen": -0.9248193502426147,
|
| 1534 |
+
"rewards/margins": 0.9592040777206421,
|
| 1535 |
+
"rewards/rejected": -1.8837096691131592,
|
| 1536 |
+
"step": 2250
|
| 1537 |
+
},
|
| 1538 |
+
{
|
| 1539 |
+
"epoch": 2.7187920466437436,
|
| 1540 |
+
"grad_norm": 76.0,
|
| 1541 |
+
"learning_rate": 5.183872396987152e-07,
|
| 1542 |
+
"logits/chosen": -1.2072705030441284,
|
| 1543 |
+
"logits/rejected": -0.9592925906181335,
|
| 1544 |
+
"logps/chosen": -322.36248779296875,
|
| 1545 |
+
"logps/rejected": -315.8374938964844,
|
| 1546 |
+
"loss": 0.391,
|
| 1547 |
+
"rewards/accuracies": 0.8274999856948853,
|
| 1548 |
+
"rewards/chosen": -0.7576141357421875,
|
| 1549 |
+
"rewards/margins": 1.160730004310608,
|
| 1550 |
+
"rewards/rejected": -1.9182031154632568,
|
| 1551 |
+
"step": 2275
|
| 1552 |
+
},
|
| 1553 |
+
{
|
| 1554 |
+
"epoch": 2.748691882194648,
|
| 1555 |
+
"grad_norm": 53.0,
|
| 1556 |
+
"learning_rate": 4.630039875941516e-07,
|
| 1557 |
+
"logits/chosen": -1.287199854850769,
|
| 1558 |
+
"logits/rejected": -0.9606054425239563,
|
| 1559 |
+
"logps/chosen": -344.7650146484375,
|
| 1560 |
+
"logps/rejected": -331.24749755859375,
|
| 1561 |
+
"loss": 0.4177,
|
| 1562 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 1563 |
+
"rewards/chosen": -0.7748047113418579,
|
| 1564 |
+
"rewards/margins": 1.1645703315734863,
|
| 1565 |
+
"rewards/rejected": -1.9394140243530273,
|
| 1566 |
+
"step": 2300
|
| 1567 |
+
},
|
| 1568 |
+
{
|
| 1569 |
+
"epoch": 2.7785917177455524,
|
| 1570 |
+
"grad_norm": 87.0,
|
| 1571 |
+
"learning_rate": 4.07620735489588e-07,
|
| 1572 |
+
"logits/chosen": -1.2260528802871704,
|
| 1573 |
+
"logits/rejected": -1.0005972385406494,
|
| 1574 |
+
"logps/chosen": -312.9624938964844,
|
| 1575 |
+
"logps/rejected": -323.0400085449219,
|
| 1576 |
+
"loss": 0.3917,
|
| 1577 |
+
"rewards/accuracies": 0.8349999785423279,
|
| 1578 |
+
"rewards/chosen": -0.7925238013267517,
|
| 1579 |
+
"rewards/margins": 1.185449242591858,
|
| 1580 |
+
"rewards/rejected": -1.9780443906784058,
|
| 1581 |
+
"step": 2325
|
| 1582 |
+
},
|
| 1583 |
+
{
|
| 1584 |
+
"epoch": 2.8084915532964567,
|
| 1585 |
+
"grad_norm": 56.5,
|
| 1586 |
+
"learning_rate": 3.5223748338502434e-07,
|
| 1587 |
+
"logits/chosen": -1.2027392387390137,
|
| 1588 |
+
"logits/rejected": -0.989107608795166,
|
| 1589 |
+
"logps/chosen": -321.3762512207031,
|
| 1590 |
+
"logps/rejected": -318.11749267578125,
|
| 1591 |
+
"loss": 0.4052,
|
| 1592 |
+
"rewards/accuracies": 0.8174999952316284,
|
| 1593 |
+
"rewards/chosen": -0.8751891851425171,
|
| 1594 |
+
"rewards/margins": 1.1021533012390137,
|
| 1595 |
+
"rewards/rejected": -1.976718783378601,
|
| 1596 |
+
"step": 2350
|
| 1597 |
+
},
|
| 1598 |
+
{
|
| 1599 |
+
"epoch": 2.838391388847361,
|
| 1600 |
+
"grad_norm": 54.5,
|
| 1601 |
+
"learning_rate": 2.968542312804608e-07,
|
| 1602 |
+
"logits/chosen": -1.2425882816314697,
|
| 1603 |
+
"logits/rejected": -0.9340093731880188,
|
| 1604 |
+
"logps/chosen": -335.12249755859375,
|
| 1605 |
+
"logps/rejected": -320.2049865722656,
|
| 1606 |
+
"loss": 0.4115,
|
| 1607 |
+
"rewards/accuracies": 0.8224999904632568,
|
| 1608 |
+
"rewards/chosen": -0.8292675614356995,
|
| 1609 |
+
"rewards/margins": 1.1182934045791626,
|
| 1610 |
+
"rewards/rejected": -1.9483104944229126,
|
| 1611 |
+
"step": 2375
|
| 1612 |
+
},
|
| 1613 |
+
{
|
| 1614 |
+
"epoch": 2.868291224398266,
|
| 1615 |
+
"grad_norm": 87.0,
|
| 1616 |
+
"learning_rate": 2.4147097917589725e-07,
|
| 1617 |
+
"logits/chosen": -1.3012477159500122,
|
| 1618 |
+
"logits/rejected": -1.0664279460906982,
|
| 1619 |
+
"logps/chosen": -293.489990234375,
|
| 1620 |
+
"logps/rejected": -285.197509765625,
|
| 1621 |
+
"loss": 0.4277,
|
| 1622 |
+
"rewards/accuracies": 0.8025000095367432,
|
| 1623 |
+
"rewards/chosen": -0.8684576153755188,
|
| 1624 |
+
"rewards/margins": 1.069272518157959,
|
| 1625 |
+
"rewards/rejected": -1.9371191263198853,
|
| 1626 |
+
"step": 2400
|
| 1627 |
+
},
|
| 1628 |
+
{
|
| 1629 |
+
"epoch": 2.868291224398266,
|
| 1630 |
+
"eval_logits/chosen": -1.1853525638580322,
|
| 1631 |
+
"eval_logits/rejected": -1.0373817682266235,
|
| 1632 |
+
"eval_logps/chosen": -327.3817138671875,
|
| 1633 |
+
"eval_logps/rejected": -306.81451416015625,
|
| 1634 |
+
"eval_loss": 0.5165102481842041,
|
| 1635 |
+
"eval_rewards/accuracies": 0.7006528377532959,
|
| 1636 |
+
"eval_rewards/chosen": -0.8549529314041138,
|
| 1637 |
+
"eval_rewards/margins": 0.7583125829696655,
|
| 1638 |
+
"eval_rewards/rejected": -1.6133127212524414,
|
| 1639 |
+
"eval_runtime": 876.3322,
|
| 1640 |
+
"eval_samples_per_second": 1.697,
|
| 1641 |
+
"eval_steps_per_second": 0.212,
|
| 1642 |
+
"step": 2400
|
| 1643 |
+
},
|
| 1644 |
+
{
|
| 1645 |
+
"epoch": 2.8981910599491703,
|
| 1646 |
+
"grad_norm": 46.0,
|
| 1647 |
+
"learning_rate": 1.8608772707133363e-07,
|
| 1648 |
+
"logits/chosen": -1.356745958328247,
|
| 1649 |
+
"logits/rejected": -1.0496530532836914,
|
| 1650 |
+
"logps/chosen": -319.9649963378906,
|
| 1651 |
+
"logps/rejected": -309.7025146484375,
|
| 1652 |
+
"loss": 0.4037,
|
| 1653 |
+
"rewards/accuracies": 0.8025000095367432,
|
| 1654 |
+
"rewards/chosen": -0.8254479765892029,
|
| 1655 |
+
"rewards/margins": 1.1192578077316284,
|
| 1656 |
+
"rewards/rejected": -1.9445117712020874,
|
| 1657 |
+
"step": 2425
|
| 1658 |
+
},
|
| 1659 |
+
{
|
| 1660 |
+
"epoch": 2.9280908955000746,
|
| 1661 |
+
"grad_norm": 70.5,
|
| 1662 |
+
"learning_rate": 1.3070447496677006e-07,
|
| 1663 |
+
"logits/chosen": -1.2751880884170532,
|
| 1664 |
+
"logits/rejected": -1.0796799659729004,
|
| 1665 |
+
"logps/chosen": -316.9425048828125,
|
| 1666 |
+
"logps/rejected": -325.7550048828125,
|
| 1667 |
+
"loss": 0.4306,
|
| 1668 |
+
"rewards/accuracies": 0.7724999785423279,
|
| 1669 |
+
"rewards/chosen": -0.8079773187637329,
|
| 1670 |
+
"rewards/margins": 1.000207543373108,
|
| 1671 |
+
"rewards/rejected": -1.8083984851837158,
|
| 1672 |
+
"step": 2450
|
| 1673 |
+
},
|
| 1674 |
+
{
|
| 1675 |
+
"epoch": 2.9579907310509794,
|
| 1676 |
+
"grad_norm": 74.0,
|
| 1677 |
+
"learning_rate": 7.532122286220647e-08,
|
| 1678 |
+
"logits/chosen": -1.2595221996307373,
|
| 1679 |
+
"logits/rejected": -1.0140166282653809,
|
| 1680 |
+
"logps/chosen": -320.6000061035156,
|
| 1681 |
+
"logps/rejected": -318.6600036621094,
|
| 1682 |
+
"loss": 0.4808,
|
| 1683 |
+
"rewards/accuracies": 0.7749999761581421,
|
| 1684 |
+
"rewards/chosen": -1.000390648841858,
|
| 1685 |
+
"rewards/margins": 0.931530773639679,
|
| 1686 |
+
"rewards/rejected": -1.9319677352905273,
|
| 1687 |
+
"step": 2475
|
| 1688 |
+
},
|
| 1689 |
+
{
|
| 1690 |
+
"epoch": 2.987890566601884,
|
| 1691 |
+
"grad_norm": 78.5,
|
| 1692 |
+
"learning_rate": 1.993797075764289e-08,
|
| 1693 |
+
"logits/chosen": -1.2403491735458374,
|
| 1694 |
+
"logits/rejected": -0.9544309973716736,
|
| 1695 |
+
"logps/chosen": -343.76251220703125,
|
| 1696 |
+
"logps/rejected": -336.38250732421875,
|
| 1697 |
+
"loss": 0.4225,
|
| 1698 |
+
"rewards/accuracies": 0.8149999976158142,
|
| 1699 |
+
"rewards/chosen": -0.7856341600418091,
|
| 1700 |
+
"rewards/margins": 1.0573632717132568,
|
| 1701 |
+
"rewards/rejected": -1.8428466320037842,
|
| 1702 |
+
"step": 2500
|
| 1703 |
+
}
|
| 1704 |
+
],
|
| 1705 |
+
"logging_steps": 25,
|
| 1706 |
+
"max_steps": 2508,
|
| 1707 |
+
"num_input_tokens_seen": 0,
|
| 1708 |
+
"num_train_epochs": 3,
|
| 1709 |
+
"save_steps": 500,
|
| 1710 |
+
"stateful_callbacks": {
|
| 1711 |
+
"TrainerControl": {
|
| 1712 |
+
"args": {
|
| 1713 |
+
"should_epoch_stop": false,
|
| 1714 |
+
"should_evaluate": false,
|
| 1715 |
+
"should_log": false,
|
| 1716 |
+
"should_save": true,
|
| 1717 |
+
"should_training_stop": true
|
| 1718 |
+
},
|
| 1719 |
+
"attributes": {}
|
| 1720 |
+
}
|
| 1721 |
+
},
|
| 1722 |
+
"total_flos": 0.0,
|
| 1723 |
+
"train_batch_size": 2,
|
| 1724 |
+
"trial_name": null,
|
| 1725 |
+
"trial_params": null
|
| 1726 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26fe0d38674121f3f33fb74b85ccfe78d08f9c5766a0e3ca44f2163d55e9851d
|
| 3 |
+
size 6609
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|