mjf-su commited on
Commit
00407df
·
verified ·
1 Parent(s): 257a90e

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. grpo-nADE-format-RC/checkpoint-100/added_tokens.json +28 -0
  2. grpo-nADE-format-RC/checkpoint-100/chat_template.jinja +110 -0
  3. grpo-nADE-format-RC/checkpoint-100/config.json +68 -0
  4. grpo-nADE-format-RC/checkpoint-100/generation_config.json +12 -0
  5. grpo-nADE-format-RC/checkpoint-100/merges.txt +0 -0
  6. grpo-nADE-format-RC/checkpoint-100/model.safetensors.index.json +721 -0
  7. grpo-nADE-format-RC/checkpoint-100/special_tokens_map.json +31 -0
  8. grpo-nADE-format-RC/checkpoint-100/tokenizer_config.json +244 -0
  9. grpo-nADE-format-RC/checkpoint-100/trainer_state.json +344 -0
  10. grpo-nADE-format-RC/checkpoint-100/vocab.json +0 -0
  11. grpo-nADE-format-RC/checkpoint-200/added_tokens.json +28 -0
  12. grpo-nADE-format-RC/checkpoint-200/chat_template.jinja +110 -0
  13. grpo-nADE-format-RC/checkpoint-200/config.json +68 -0
  14. grpo-nADE-format-RC/checkpoint-200/generation_config.json +12 -0
  15. grpo-nADE-format-RC/checkpoint-200/merges.txt +0 -0
  16. grpo-nADE-format-RC/checkpoint-200/model.safetensors.index.json +721 -0
  17. grpo-nADE-format-RC/checkpoint-200/special_tokens_map.json +31 -0
  18. grpo-nADE-format-RC/checkpoint-200/tokenizer_config.json +244 -0
  19. grpo-nADE-format-RC/checkpoint-200/trainer_state.json +654 -0
  20. grpo-nADE-format-RC/checkpoint-300/added_tokens.json +28 -0
  21. grpo-nADE-format-RC/checkpoint-300/chat_template.jinja +110 -0
  22. grpo-nADE-format-RC/checkpoint-300/config.json +68 -0
  23. grpo-nADE-format-RC/checkpoint-300/generation_config.json +12 -0
  24. grpo-nADE-format-RC/checkpoint-300/model.safetensors.index.json +721 -0
  25. grpo-nADE-format-RC/checkpoint-300/tokenizer_config.json +244 -0
  26. grpo-nADE-format-RC/checkpoint-300/trainer_state.json +964 -0
  27. grpo-nADE-format-RC/checkpoint-300/vocab.json +0 -0
  28. grpo-nADE-format-RC/checkpoint-400/chat_template.jinja +110 -0
  29. grpo-nADE-format-RC/checkpoint-400/merges.txt +0 -0
  30. grpo-nADE-format-RC/checkpoint-400/tokenizer_config.json +244 -0
  31. grpo-nADE-format-RC/checkpoint-400/vocab.json +0 -0
  32. grpo-nADE-format-RC/checkpoint-417/added_tokens.json +28 -0
  33. grpo-nADE-format-RC/checkpoint-417/chat_template.jinja +110 -0
  34. grpo-nADE-format-RC/checkpoint-417/config.json +68 -0
  35. grpo-nADE-format-RC/checkpoint-417/generation_config.json +12 -0
  36. grpo-nADE-format-RC/checkpoint-417/merges.txt +0 -0
  37. grpo-nADE-format-RC/checkpoint-417/model.safetensors.index.json +721 -0
  38. grpo-nADE-format-RC/checkpoint-417/special_tokens_map.json +31 -0
  39. grpo-nADE-format-RC/checkpoint-417/tokenizer_config.json +244 -0
  40. grpo-nADE-format-RC/checkpoint-417/trainer_state.json +1305 -0
  41. grpo-nADE-format-RC/checkpoint-417/vocab.json +0 -0
  42. grpo-nADE-format-RC/config.json +68 -0
  43. grpo-nADE-format-RC/merges.txt +0 -0
  44. grpo-nADE-format-RC/model.safetensors.index.json +721 -0
  45. grpo-nADE-format-RC/preprocessor_config.json +39 -0
  46. grpo-nADE-format-RC/tokenizer_config.json +244 -0
  47. grpo-nADE-format-RC/vocab.json +0 -0
  48. grpo-nADE-format/README.md +68 -0
  49. grpo-nADE-format/checkpoint-417/merges.txt +0 -0
  50. grpo-nADE-format/checkpoint-417/vocab.json +0 -0
grpo-nADE-format-RC/checkpoint-100/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
grpo-nADE-format-RC/checkpoint-100/chat_template.jinja ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- else %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if do_vision_count %}
10
+ {%- set image_count.value = image_count.value + 1 %}
11
+ {%- endif %}
12
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
13
+ <|vision_start|><|image_pad|><|vision_end|>
14
+ {%- elif 'video' in item or item.type == 'video' %}
15
+ {%- if do_vision_count %}
16
+ {%- set video_count.value = video_count.value + 1 %}
17
+ {%- endif %}
18
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
19
+ <|vision_start|><|video_pad|><|vision_end|>
20
+ {%- elif 'text' in item %}
21
+ {{- item.text }}
22
+ {%- endif %}
23
+ {%- endfor %}
24
+ {%- endif %}
25
+ {%- endmacro %}
26
+ {%- if tools %}
27
+ {{- '<|im_start|>system\n' }}
28
+ {%- if messages[0].role == 'system' %}
29
+ {{- render_content(messages[0].content, false) + '\n\n' }}
30
+ {%- endif %}
31
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
32
+ {%- for tool in tools %}
33
+ {{- "\n" }}
34
+ {{- tool | tojson }}
35
+ {%- endfor %}
36
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
37
+ {%- else %}
38
+ {%- if messages[0].role == 'system' %}
39
+ {{- '<|im_start|>system\n' + render_content(messages[0].content, false) + '<|im_end|>\n' }}
40
+ {%- endif %}
41
+ {%- endif %}
42
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
43
+ {%- for message in messages[::-1] %}
44
+ {%- set index = (messages|length - 1) - loop.index0 %}
45
+ {%- if ns.multi_step_tool and message.role == "user" %}
46
+ {%- set content = render_content(message.content, false) %}
47
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
48
+ {%- set ns.multi_step_tool = false %}
49
+ {%- set ns.last_query_index = index %}
50
+ {%- endif %}
51
+ {%- endif %}
52
+ {%- endfor %}
53
+ {%- for message in messages %}
54
+ {%- set content = render_content(message.content, True) %}
55
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
56
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
57
+ {%- elif message.role == "assistant" %}
58
+ {%- set reasoning_content = '' %}
59
+ {%- if message.reasoning_content is string %}
60
+ {%- set reasoning_content = message.reasoning_content %}
61
+ {%- else %}
62
+ {%- if '</think>' in content %}
63
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
64
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- if loop.index0 > ns.last_query_index %}
68
+ {%- if loop.last or (not loop.last and reasoning_content) %}
69
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
70
+ {%- else %}
71
+ {{- '<|im_start|>' + message.role + '\n' + content }}
72
+ {%- endif %}
73
+ {%- else %}
74
+ {{- '<|im_start|>' + message.role + '\n' + content }}
75
+ {%- endif %}
76
+ {%- if message.tool_calls %}
77
+ {%- for tool_call in message.tool_calls %}
78
+ {%- if (loop.first and content) or (not loop.first) %}
79
+ {{- '\n' }}
80
+ {%- endif %}
81
+ {%- if tool_call.function %}
82
+ {%- set tool_call = tool_call.function %}
83
+ {%- endif %}
84
+ {{- '<tool_call>\n{"name": "' }}
85
+ {{- tool_call.name }}
86
+ {{- '", "arguments": ' }}
87
+ {%- if tool_call.arguments is string %}
88
+ {{- tool_call.arguments }}
89
+ {%- else %}
90
+ {{- tool_call.arguments | tojson }}
91
+ {%- endif %}
92
+ {{- '}\n</tool_call>' }}
93
+ {%- endfor %}
94
+ {%- endif %}
95
+ {{- '<|im_end|>\n' }}
96
+ {%- elif message.role == "tool" %}
97
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
98
+ {{- '<|im_start|>user' }}
99
+ {%- endif %}
100
+ {{- '\n<tool_response>\n' }}
101
+ {{- content }}
102
+ {{- '\n</tool_response>' }}
103
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
104
+ {{- '<|im_end|>\n' }}
105
+ {%- endif %}
106
+ {%- endif %}
107
+ {%- endfor %}
108
+ {%- if add_generation_prompt %}
109
+ {{- '<|im_start|>assistant\n' }}
110
+ {%- endif %}
grpo-nADE-format-RC/checkpoint-100/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3VLForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "eos_token_id": 151645,
7
+ "image_token_id": 151655,
8
+ "model_type": "qwen3_vl",
9
+ "pad_token_id": 151643,
10
+ "text_config": {
11
+ "attention_bias": false,
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 151643,
14
+ "dtype": "bfloat16",
15
+ "eos_token_id": 151645,
16
+ "head_dim": 128,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 2560,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 9728,
21
+ "max_position_embeddings": 262144,
22
+ "model_type": "qwen3_vl_text",
23
+ "num_attention_heads": 32,
24
+ "num_hidden_layers": 36,
25
+ "num_key_value_heads": 8,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_scaling": {
28
+ "mrope_interleaved": true,
29
+ "mrope_section": [
30
+ 24,
31
+ 20,
32
+ 20
33
+ ],
34
+ "rope_type": "default"
35
+ },
36
+ "rope_theta": 5000000,
37
+ "tie_word_embeddings": true,
38
+ "use_cache": true,
39
+ "vocab_size": 151936
40
+ },
41
+ "tie_word_embeddings": true,
42
+ "transformers_version": "4.57.6",
43
+ "use_cache": false,
44
+ "video_token_id": 151656,
45
+ "vision_config": {
46
+ "deepstack_visual_indexes": [
47
+ 5,
48
+ 11,
49
+ 17
50
+ ],
51
+ "depth": 24,
52
+ "dtype": "bfloat16",
53
+ "hidden_act": "gelu_pytorch_tanh",
54
+ "hidden_size": 1024,
55
+ "in_channels": 3,
56
+ "initializer_range": 0.02,
57
+ "intermediate_size": 4096,
58
+ "model_type": "qwen3_vl",
59
+ "num_heads": 16,
60
+ "num_position_embeddings": 2304,
61
+ "out_hidden_size": 2560,
62
+ "patch_size": 16,
63
+ "spatial_merge_size": 2,
64
+ "temporal_patch_size": 2
65
+ },
66
+ "vision_end_token_id": 151653,
67
+ "vision_start_token_id": 151652
68
+ }
grpo-nADE-format-RC/checkpoint-100/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "top_k": 20,
10
+ "top_p": 0.95,
11
+ "transformers_version": "4.57.6"
12
+ }
grpo-nADE-format-RC/checkpoint-100/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format-RC/checkpoint-100/model.safetensors.index.json ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 4437815808,
4
+ "total_size": 8875631616
5
+ },
6
+ "weight_map": {
7
+ "model.language_model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "model.language_model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.language_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.language_model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "model.language_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "model.language_model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
14
+ "model.language_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.language_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
16
+ "model.language_model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
17
+ "model.language_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
18
+ "model.language_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.language_model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
20
+ "model.language_model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.language_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
22
+ "model.language_model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.language_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
24
+ "model.language_model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
25
+ "model.language_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.language_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
27
+ "model.language_model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
28
+ "model.language_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.language_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
30
+ "model.language_model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
31
+ "model.language_model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
32
+ "model.language_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.language_model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.language_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
35
+ "model.language_model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
36
+ "model.language_model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.language_model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.language_model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
39
+ "model.language_model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
40
+ "model.language_model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.language_model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
42
+ "model.language_model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.language_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
44
+ "model.language_model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
45
+ "model.language_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
46
+ "model.language_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
47
+ "model.language_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
48
+ "model.language_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
49
+ "model.language_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
50
+ "model.language_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.language_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.language_model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
53
+ "model.language_model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
54
+ "model.language_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.language_model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.language_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
57
+ "model.language_model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
58
+ "model.language_model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.language_model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
+ "model.language_model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
61
+ "model.language_model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.language_model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.language_model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
64
+ "model.language_model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.language_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
66
+ "model.language_model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
67
+ "model.language_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
68
+ "model.language_model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
69
+ "model.language_model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.language_model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.language_model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
72
+ "model.language_model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
73
+ "model.language_model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.language_model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
75
+ "model.language_model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
76
+ "model.language_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.language_model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
78
+ "model.language_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
79
+ "model.language_model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
80
+ "model.language_model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
81
+ "model.language_model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.language_model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
83
+ "model.language_model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
84
+ "model.language_model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
85
+ "model.language_model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
86
+ "model.language_model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.language_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.language_model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.language_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "model.language_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
91
+ "model.language_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
92
+ "model.language_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.language_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
94
+ "model.language_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.language_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
96
+ "model.language_model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
97
+ "model.language_model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
98
+ "model.language_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
99
+ "model.language_model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.language_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
101
+ "model.language_model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
102
+ "model.language_model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
103
+ "model.language_model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.language_model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
105
+ "model.language_model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.language_model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.language_model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
108
+ "model.language_model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
109
+ "model.language_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
110
+ "model.language_model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
111
+ "model.language_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
112
+ "model.language_model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
113
+ "model.language_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
114
+ "model.language_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
115
+ "model.language_model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
116
+ "model.language_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
117
+ "model.language_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
118
+ "model.language_model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
119
+ "model.language_model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
120
+ "model.language_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
121
+ "model.language_model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
122
+ "model.language_model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
123
+ "model.language_model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
124
+ "model.language_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
125
+ "model.language_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
126
+ "model.language_model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
127
+ "model.language_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
128
+ "model.language_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
129
+ "model.language_model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
130
+ "model.language_model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
131
+ "model.language_model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
132
+ "model.language_model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
133
+ "model.language_model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
134
+ "model.language_model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
135
+ "model.language_model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
136
+ "model.language_model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
137
+ "model.language_model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
138
+ "model.language_model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
139
+ "model.language_model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
140
+ "model.language_model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
141
+ "model.language_model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
142
+ "model.language_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
143
+ "model.language_model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
144
+ "model.language_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
145
+ "model.language_model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
146
+ "model.language_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
147
+ "model.language_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
148
+ "model.language_model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
149
+ "model.language_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
150
+ "model.language_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.language_model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
152
+ "model.language_model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
153
+ "model.language_model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
154
+ "model.language_model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
155
+ "model.language_model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
156
+ "model.language_model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
157
+ "model.language_model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
158
+ "model.language_model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
159
+ "model.language_model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
160
+ "model.language_model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
161
+ "model.language_model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
162
+ "model.language_model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
163
+ "model.language_model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
164
+ "model.language_model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
165
+ "model.language_model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
166
+ "model.language_model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
167
+ "model.language_model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
168
+ "model.language_model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
169
+ "model.language_model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
170
+ "model.language_model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
171
+ "model.language_model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
172
+ "model.language_model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
173
+ "model.language_model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
174
+ "model.language_model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
175
+ "model.language_model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
176
+ "model.language_model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.language_model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
178
+ "model.language_model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
179
+ "model.language_model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
180
+ "model.language_model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
181
+ "model.language_model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
182
+ "model.language_model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
183
+ "model.language_model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
184
+ "model.language_model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
185
+ "model.language_model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
186
+ "model.language_model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
187
+ "model.language_model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
188
+ "model.language_model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
189
+ "model.language_model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
190
+ "model.language_model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.language_model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
192
+ "model.language_model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
193
+ "model.language_model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.language_model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.language_model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
196
+ "model.language_model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.language_model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
198
+ "model.language_model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
199
+ "model.language_model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
200
+ "model.language_model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
201
+ "model.language_model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
202
+ "model.language_model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
203
+ "model.language_model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
204
+ "model.language_model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
+ "model.language_model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
206
+ "model.language_model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
207
+ "model.language_model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
208
+ "model.language_model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
209
+ "model.language_model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
210
+ "model.language_model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
211
+ "model.language_model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
212
+ "model.language_model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
213
+ "model.language_model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
214
+ "model.language_model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
215
+ "model.language_model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
216
+ "model.language_model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
217
+ "model.language_model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
218
+ "model.language_model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.language_model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
220
+ "model.language_model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
221
+ "model.language_model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
222
+ "model.language_model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
223
+ "model.language_model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
224
+ "model.language_model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
225
+ "model.language_model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
226
+ "model.language_model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
227
+ "model.language_model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
228
+ "model.language_model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
229
+ "model.language_model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
230
+ "model.language_model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
231
+ "model.language_model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
232
+ "model.language_model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
233
+ "model.language_model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
234
+ "model.language_model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
235
+ "model.language_model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
236
+ "model.language_model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
237
+ "model.language_model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
238
+ "model.language_model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
239
+ "model.language_model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
240
+ "model.language_model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
241
+ "model.language_model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
242
+ "model.language_model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
243
+ "model.language_model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
244
+ "model.language_model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
245
+ "model.language_model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
246
+ "model.language_model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
247
+ "model.language_model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
248
+ "model.language_model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
249
+ "model.language_model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
250
+ "model.language_model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
251
+ "model.language_model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
252
+ "model.language_model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
253
+ "model.language_model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
254
+ "model.language_model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
255
+ "model.language_model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
256
+ "model.language_model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
257
+ "model.language_model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
258
+ "model.language_model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
259
+ "model.language_model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
260
+ "model.language_model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
261
+ "model.language_model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
262
+ "model.language_model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
263
+ "model.language_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
264
+ "model.language_model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
265
+ "model.language_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
266
+ "model.language_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
267
+ "model.language_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
268
+ "model.language_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
269
+ "model.language_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
270
+ "model.language_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
271
+ "model.language_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
272
+ "model.language_model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
273
+ "model.language_model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
274
+ "model.language_model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
275
+ "model.language_model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
276
+ "model.language_model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
277
+ "model.language_model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
278
+ "model.language_model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
279
+ "model.language_model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
280
+ "model.language_model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
281
+ "model.language_model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
282
+ "model.language_model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
283
+ "model.language_model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
284
+ "model.language_model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
285
+ "model.language_model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
286
+ "model.language_model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
287
+ "model.language_model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
288
+ "model.language_model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
289
+ "model.language_model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
290
+ "model.language_model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
291
+ "model.language_model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
292
+ "model.language_model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
293
+ "model.language_model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
294
+ "model.language_model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
295
+ "model.language_model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
296
+ "model.language_model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
297
+ "model.language_model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
298
+ "model.language_model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
299
+ "model.language_model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
300
+ "model.language_model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
301
+ "model.language_model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
302
+ "model.language_model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
303
+ "model.language_model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
304
+ "model.language_model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
305
+ "model.language_model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
306
+ "model.language_model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
307
+ "model.language_model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
308
+ "model.language_model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
309
+ "model.language_model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
310
+ "model.language_model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
311
+ "model.language_model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
312
+ "model.language_model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
313
+ "model.language_model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
314
+ "model.language_model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
315
+ "model.language_model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
316
+ "model.language_model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
317
+ "model.language_model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
318
+ "model.language_model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
319
+ "model.language_model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
320
+ "model.language_model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
321
+ "model.language_model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
322
+ "model.language_model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
323
+ "model.language_model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
324
+ "model.language_model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
325
+ "model.language_model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
326
+ "model.language_model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
327
+ "model.language_model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
328
+ "model.language_model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
329
+ "model.language_model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
330
+ "model.language_model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
331
+ "model.language_model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
332
+ "model.language_model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
333
+ "model.language_model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
334
+ "model.language_model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
335
+ "model.language_model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
336
+ "model.language_model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
337
+ "model.language_model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
338
+ "model.language_model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
339
+ "model.language_model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
340
+ "model.language_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
341
+ "model.language_model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
342
+ "model.language_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
343
+ "model.language_model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
344
+ "model.language_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
345
+ "model.language_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
346
+ "model.language_model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
347
+ "model.language_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
348
+ "model.language_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
349
+ "model.language_model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
350
+ "model.language_model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
351
+ "model.language_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
352
+ "model.language_model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
353
+ "model.language_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
354
+ "model.language_model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
355
+ "model.language_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
356
+ "model.language_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
357
+ "model.language_model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
358
+ "model.language_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
359
+ "model.language_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
360
+ "model.language_model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
361
+ "model.language_model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
362
+ "model.language_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
363
+ "model.language_model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
364
+ "model.language_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
365
+ "model.language_model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
366
+ "model.language_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
367
+ "model.language_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
368
+ "model.language_model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
369
+ "model.language_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
370
+ "model.language_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
371
+ "model.language_model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
372
+ "model.language_model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
373
+ "model.language_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
374
+ "model.language_model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
375
+ "model.language_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
376
+ "model.language_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
377
+ "model.language_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
378
+ "model.language_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
379
+ "model.language_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
380
+ "model.language_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
381
+ "model.language_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
382
+ "model.language_model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
383
+ "model.language_model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
384
+ "model.language_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
385
+ "model.language_model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
386
+ "model.language_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
387
+ "model.language_model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
388
+ "model.language_model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
389
+ "model.language_model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
390
+ "model.language_model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
391
+ "model.language_model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
392
+ "model.language_model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
393
+ "model.language_model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
394
+ "model.language_model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
395
+ "model.language_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
396
+ "model.language_model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
397
+ "model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
398
+ "model.language_model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
399
+ "model.language_model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
400
+ "model.language_model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
401
+ "model.language_model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
402
+ "model.language_model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
403
+ "model.language_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
404
+ "model.language_model.norm.weight": "model-00002-of-00002.safetensors",
405
+ "model.visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
406
+ "model.visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
407
+ "model.visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
408
+ "model.visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
409
+ "model.visual.blocks.0.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
410
+ "model.visual.blocks.0.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
411
+ "model.visual.blocks.0.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
412
+ "model.visual.blocks.0.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
413
+ "model.visual.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
414
+ "model.visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
415
+ "model.visual.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
416
+ "model.visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
417
+ "model.visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
418
+ "model.visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
419
+ "model.visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
420
+ "model.visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
421
+ "model.visual.blocks.1.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
422
+ "model.visual.blocks.1.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
423
+ "model.visual.blocks.1.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
424
+ "model.visual.blocks.1.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
425
+ "model.visual.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
426
+ "model.visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
427
+ "model.visual.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
428
+ "model.visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
429
+ "model.visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
430
+ "model.visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
431
+ "model.visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
432
+ "model.visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
433
+ "model.visual.blocks.10.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
434
+ "model.visual.blocks.10.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
435
+ "model.visual.blocks.10.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
436
+ "model.visual.blocks.10.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
437
+ "model.visual.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
438
+ "model.visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
439
+ "model.visual.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
440
+ "model.visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
441
+ "model.visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
442
+ "model.visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
443
+ "model.visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
444
+ "model.visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
445
+ "model.visual.blocks.11.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
446
+ "model.visual.blocks.11.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
447
+ "model.visual.blocks.11.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
448
+ "model.visual.blocks.11.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
449
+ "model.visual.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
450
+ "model.visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
451
+ "model.visual.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
452
+ "model.visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
453
+ "model.visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
454
+ "model.visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
455
+ "model.visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
456
+ "model.visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
457
+ "model.visual.blocks.12.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
458
+ "model.visual.blocks.12.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
459
+ "model.visual.blocks.12.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
460
+ "model.visual.blocks.12.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
461
+ "model.visual.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
462
+ "model.visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
463
+ "model.visual.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
464
+ "model.visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
465
+ "model.visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
466
+ "model.visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
467
+ "model.visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
468
+ "model.visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
469
+ "model.visual.blocks.13.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
470
+ "model.visual.blocks.13.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
471
+ "model.visual.blocks.13.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
472
+ "model.visual.blocks.13.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
473
+ "model.visual.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
474
+ "model.visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
475
+ "model.visual.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
476
+ "model.visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
477
+ "model.visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
478
+ "model.visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
479
+ "model.visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
480
+ "model.visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
481
+ "model.visual.blocks.14.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
482
+ "model.visual.blocks.14.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
483
+ "model.visual.blocks.14.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
484
+ "model.visual.blocks.14.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
485
+ "model.visual.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
486
+ "model.visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
487
+ "model.visual.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
488
+ "model.visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
489
+ "model.visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
490
+ "model.visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
491
+ "model.visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
492
+ "model.visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
493
+ "model.visual.blocks.15.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
494
+ "model.visual.blocks.15.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
495
+ "model.visual.blocks.15.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
496
+ "model.visual.blocks.15.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
497
+ "model.visual.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
498
+ "model.visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
499
+ "model.visual.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
500
+ "model.visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
501
+ "model.visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
502
+ "model.visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
503
+ "model.visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
504
+ "model.visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
505
+ "model.visual.blocks.16.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
506
+ "model.visual.blocks.16.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
507
+ "model.visual.blocks.16.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
508
+ "model.visual.blocks.16.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
509
+ "model.visual.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
510
+ "model.visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
511
+ "model.visual.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
512
+ "model.visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
513
+ "model.visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
514
+ "model.visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
515
+ "model.visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
516
+ "model.visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
517
+ "model.visual.blocks.17.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
518
+ "model.visual.blocks.17.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
519
+ "model.visual.blocks.17.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
520
+ "model.visual.blocks.17.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
521
+ "model.visual.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
522
+ "model.visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
523
+ "model.visual.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
524
+ "model.visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
525
+ "model.visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
526
+ "model.visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
527
+ "model.visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
528
+ "model.visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
529
+ "model.visual.blocks.18.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
530
+ "model.visual.blocks.18.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
531
+ "model.visual.blocks.18.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
532
+ "model.visual.blocks.18.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
533
+ "model.visual.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
534
+ "model.visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
535
+ "model.visual.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
536
+ "model.visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
537
+ "model.visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
538
+ "model.visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
539
+ "model.visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
540
+ "model.visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
541
+ "model.visual.blocks.19.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
542
+ "model.visual.blocks.19.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
543
+ "model.visual.blocks.19.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
544
+ "model.visual.blocks.19.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
545
+ "model.visual.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
546
+ "model.visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
547
+ "model.visual.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
548
+ "model.visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
549
+ "model.visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
550
+ "model.visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
551
+ "model.visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
552
+ "model.visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
553
+ "model.visual.blocks.2.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
554
+ "model.visual.blocks.2.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
555
+ "model.visual.blocks.2.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
556
+ "model.visual.blocks.2.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
557
+ "model.visual.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
558
+ "model.visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
559
+ "model.visual.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
560
+ "model.visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
561
+ "model.visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
562
+ "model.visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
563
+ "model.visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
564
+ "model.visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
565
+ "model.visual.blocks.20.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
566
+ "model.visual.blocks.20.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
567
+ "model.visual.blocks.20.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
568
+ "model.visual.blocks.20.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
569
+ "model.visual.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
570
+ "model.visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
571
+ "model.visual.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
572
+ "model.visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
573
+ "model.visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
574
+ "model.visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
575
+ "model.visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
576
+ "model.visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
577
+ "model.visual.blocks.21.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
578
+ "model.visual.blocks.21.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
579
+ "model.visual.blocks.21.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
580
+ "model.visual.blocks.21.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
581
+ "model.visual.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
582
+ "model.visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
583
+ "model.visual.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
584
+ "model.visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
585
+ "model.visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
586
+ "model.visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
587
+ "model.visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
588
+ "model.visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
589
+ "model.visual.blocks.22.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
590
+ "model.visual.blocks.22.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
591
+ "model.visual.blocks.22.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
592
+ "model.visual.blocks.22.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
593
+ "model.visual.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
594
+ "model.visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
595
+ "model.visual.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
596
+ "model.visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
597
+ "model.visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
598
+ "model.visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
599
+ "model.visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
600
+ "model.visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
601
+ "model.visual.blocks.23.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
602
+ "model.visual.blocks.23.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
603
+ "model.visual.blocks.23.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
604
+ "model.visual.blocks.23.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
605
+ "model.visual.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
606
+ "model.visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
607
+ "model.visual.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
608
+ "model.visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
609
+ "model.visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
610
+ "model.visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
611
+ "model.visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
612
+ "model.visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
613
+ "model.visual.blocks.3.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
614
+ "model.visual.blocks.3.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
615
+ "model.visual.blocks.3.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
616
+ "model.visual.blocks.3.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
617
+ "model.visual.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
618
+ "model.visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
619
+ "model.visual.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
620
+ "model.visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
621
+ "model.visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
622
+ "model.visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
623
+ "model.visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
624
+ "model.visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
625
+ "model.visual.blocks.4.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
626
+ "model.visual.blocks.4.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
627
+ "model.visual.blocks.4.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
628
+ "model.visual.blocks.4.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
629
+ "model.visual.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
630
+ "model.visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
631
+ "model.visual.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
632
+ "model.visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
633
+ "model.visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
634
+ "model.visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
635
+ "model.visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
636
+ "model.visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
637
+ "model.visual.blocks.5.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
638
+ "model.visual.blocks.5.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
639
+ "model.visual.blocks.5.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
640
+ "model.visual.blocks.5.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
641
+ "model.visual.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
642
+ "model.visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
643
+ "model.visual.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
644
+ "model.visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
645
+ "model.visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
646
+ "model.visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
647
+ "model.visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
648
+ "model.visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
649
+ "model.visual.blocks.6.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
650
+ "model.visual.blocks.6.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
651
+ "model.visual.blocks.6.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
652
+ "model.visual.blocks.6.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
653
+ "model.visual.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
654
+ "model.visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
655
+ "model.visual.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
656
+ "model.visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
657
+ "model.visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
658
+ "model.visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
659
+ "model.visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
660
+ "model.visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
661
+ "model.visual.blocks.7.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
662
+ "model.visual.blocks.7.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
663
+ "model.visual.blocks.7.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
664
+ "model.visual.blocks.7.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
665
+ "model.visual.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
666
+ "model.visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
667
+ "model.visual.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
668
+ "model.visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
669
+ "model.visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
670
+ "model.visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
671
+ "model.visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
672
+ "model.visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
673
+ "model.visual.blocks.8.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
674
+ "model.visual.blocks.8.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
675
+ "model.visual.blocks.8.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
676
+ "model.visual.blocks.8.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
677
+ "model.visual.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
678
+ "model.visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
679
+ "model.visual.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
680
+ "model.visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
681
+ "model.visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
682
+ "model.visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
683
+ "model.visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
684
+ "model.visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
685
+ "model.visual.blocks.9.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
686
+ "model.visual.blocks.9.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
687
+ "model.visual.blocks.9.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
688
+ "model.visual.blocks.9.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
689
+ "model.visual.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
690
+ "model.visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
691
+ "model.visual.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
692
+ "model.visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
693
+ "model.visual.deepstack_merger_list.0.linear_fc1.bias": "model-00001-of-00002.safetensors",
694
+ "model.visual.deepstack_merger_list.0.linear_fc1.weight": "model-00001-of-00002.safetensors",
695
+ "model.visual.deepstack_merger_list.0.linear_fc2.bias": "model-00001-of-00002.safetensors",
696
+ "model.visual.deepstack_merger_list.0.linear_fc2.weight": "model-00001-of-00002.safetensors",
697
+ "model.visual.deepstack_merger_list.0.norm.bias": "model-00001-of-00002.safetensors",
698
+ "model.visual.deepstack_merger_list.0.norm.weight": "model-00001-of-00002.safetensors",
699
+ "model.visual.deepstack_merger_list.1.linear_fc1.bias": "model-00001-of-00002.safetensors",
700
+ "model.visual.deepstack_merger_list.1.linear_fc1.weight": "model-00001-of-00002.safetensors",
701
+ "model.visual.deepstack_merger_list.1.linear_fc2.bias": "model-00001-of-00002.safetensors",
702
+ "model.visual.deepstack_merger_list.1.linear_fc2.weight": "model-00001-of-00002.safetensors",
703
+ "model.visual.deepstack_merger_list.1.norm.bias": "model-00001-of-00002.safetensors",
704
+ "model.visual.deepstack_merger_list.1.norm.weight": "model-00001-of-00002.safetensors",
705
+ "model.visual.deepstack_merger_list.2.linear_fc1.bias": "model-00001-of-00002.safetensors",
706
+ "model.visual.deepstack_merger_list.2.linear_fc1.weight": "model-00001-of-00002.safetensors",
707
+ "model.visual.deepstack_merger_list.2.linear_fc2.bias": "model-00001-of-00002.safetensors",
708
+ "model.visual.deepstack_merger_list.2.linear_fc2.weight": "model-00001-of-00002.safetensors",
709
+ "model.visual.deepstack_merger_list.2.norm.bias": "model-00001-of-00002.safetensors",
710
+ "model.visual.deepstack_merger_list.2.norm.weight": "model-00001-of-00002.safetensors",
711
+ "model.visual.merger.linear_fc1.bias": "model-00001-of-00002.safetensors",
712
+ "model.visual.merger.linear_fc1.weight": "model-00001-of-00002.safetensors",
713
+ "model.visual.merger.linear_fc2.bias": "model-00001-of-00002.safetensors",
714
+ "model.visual.merger.linear_fc2.weight": "model-00001-of-00002.safetensors",
715
+ "model.visual.merger.norm.bias": "model-00001-of-00002.safetensors",
716
+ "model.visual.merger.norm.weight": "model-00001-of-00002.safetensors",
717
+ "model.visual.patch_embed.proj.bias": "model-00001-of-00002.safetensors",
718
+ "model.visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors",
719
+ "model.visual.pos_embed.weight": "model-00001-of-00002.safetensors"
720
+ }
721
+ }
grpo-nADE-format-RC/checkpoint-100/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
grpo-nADE-format-RC/checkpoint-100/tokenizer_config.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "max_length": null,
235
+ "model_max_length": 262144,
236
+ "pad_to_multiple_of": null,
237
+ "pad_token": "<|endoftext|>",
238
+ "pad_token_type_id": 0,
239
+ "padding_side": "left",
240
+ "processor_class": "Qwen3VLProcessor",
241
+ "split_special_tokens": false,
242
+ "tokenizer_class": "Qwen2Tokenizer",
243
+ "unk_token": null
244
+ }
grpo-nADE-format-RC/checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.015001500150015001,
6
+ "eval_steps": 100,
7
+ "global_step": 100,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "clip_ratio/high_max": 0.0,
14
+ "clip_ratio/high_mean": 0.0,
15
+ "clip_ratio/low_mean": 0.0,
16
+ "clip_ratio/low_min": 0.0,
17
+ "clip_ratio/region_mean": 0.0,
18
+ "completions/clipped_ratio": 0.03750000149011612,
19
+ "completions/max_length": 1533.5,
20
+ "completions/max_terminated_length": 1427.6,
21
+ "completions/mean_length": 820.7687622070313,
22
+ "completions/mean_terminated_length": 795.4251159667969,
23
+ "completions/min_length": 119.1,
24
+ "completions/min_terminated_length": 119.1,
25
+ "entropy": 0.6458343416452408,
26
+ "epoch": 0.0015001500150015,
27
+ "frac_reward_zero_std": 0.0,
28
+ "grad_norm": 1.0625,
29
+ "learning_rate": 9.784172661870503e-07,
30
+ "loss": 0.111,
31
+ "num_tokens": 1052641.0,
32
+ "reward": 0.15892810765653848,
33
+ "reward_std": 1.2357513666152955,
34
+ "rewards/ADEnReward/mean": 0.011186909227399155,
35
+ "rewards/ADEnReward/std": 0.03507473061326891,
36
+ "rewards/ReasoningConfidenceReward/mean": -0.19809213168919088,
37
+ "rewards/ReasoningConfidenceReward/std": 0.45118741542100904,
38
+ "rewards/StrictFormatReward/mean": 0.345833333209157,
39
+ "rewards/StrictFormatReward/std": 0.8626526802778244,
40
+ "step": 10,
41
+ "step_time": 66.8766707284376
42
+ },
43
+ {
44
+ "clip_ratio/high_max": 0.0,
45
+ "clip_ratio/high_mean": 0.0,
46
+ "clip_ratio/low_mean": 0.0,
47
+ "clip_ratio/low_min": 0.0,
48
+ "clip_ratio/region_mean": 0.0,
49
+ "completions/clipped_ratio": 0.01875000037252903,
50
+ "completions/max_length": 1258.4,
51
+ "completions/max_terminated_length": 1106.6,
52
+ "completions/mean_length": 743.8416809082031,
53
+ "completions/mean_terminated_length": 728.8905517578125,
54
+ "completions/min_length": 163.6,
55
+ "completions/min_terminated_length": 163.6,
56
+ "entropy": 0.6466913104057312,
57
+ "epoch": 0.003000300030003,
58
+ "frac_reward_zero_std": 0.0,
59
+ "grad_norm": 1.15625,
60
+ "learning_rate": 9.544364508393285e-07,
61
+ "loss": 0.0171,
62
+ "num_tokens": 2068565.0,
63
+ "reward": 0.8077859580516815,
64
+ "reward_std": 0.8083356320858002,
65
+ "rewards/ADEnReward/mean": 0.03084552166983485,
66
+ "rewards/ADEnReward/std": 0.07702018767595291,
67
+ "rewards/ReasoningConfidenceReward/mean": 0.018607060704380275,
68
+ "rewards/ReasoningConfidenceReward/std": 0.28907840102911,
69
+ "rewards/StrictFormatReward/mean": 0.7583333432674408,
70
+ "rewards/StrictFormatReward/std": 0.6329713940620423,
71
+ "step": 20,
72
+ "step_time": 52.03955397913232
73
+ },
74
+ {
75
+ "clip_ratio/high_max": 0.0,
76
+ "clip_ratio/high_mean": 0.0,
77
+ "clip_ratio/low_mean": 0.0,
78
+ "clip_ratio/low_min": 0.0,
79
+ "clip_ratio/region_mean": 0.0,
80
+ "completions/clipped_ratio": 0.002083333395421505,
81
+ "completions/max_length": 1179.2,
82
+ "completions/max_terminated_length": 1103.8,
83
+ "completions/mean_length": 735.7812744140625,
84
+ "completions/mean_terminated_length": 734.0838012695312,
85
+ "completions/min_length": 424.2,
86
+ "completions/min_terminated_length": 424.2,
87
+ "entropy": 0.6348762333393096,
88
+ "epoch": 0.004500450045004501,
89
+ "frac_reward_zero_std": 0.0,
90
+ "grad_norm": 1.0546875,
91
+ "learning_rate": 9.304556354916066e-07,
92
+ "loss": 0.0134,
93
+ "num_tokens": 3080012.0,
94
+ "reward": 0.9798760175704956,
95
+ "reward_std": 0.5248558193445205,
96
+ "rewards/ADEnReward/mean": 0.032167868409305814,
97
+ "rewards/ADEnReward/std": 0.0745716668665409,
98
+ "rewards/ReasoningConfidenceReward/mean": 0.06437477525323629,
99
+ "rewards/ReasoningConfidenceReward/std": 0.19101330041885375,
100
+ "rewards/StrictFormatReward/mean": 0.8833333373069763,
101
+ "rewards/StrictFormatReward/std": 0.45989986062049865,
102
+ "step": 30,
103
+ "step_time": 47.932181040663274
104
+ },
105
+ {
106
+ "clip_ratio/high_max": 0.0,
107
+ "clip_ratio/high_mean": 0.0,
108
+ "clip_ratio/low_mean": 0.0,
109
+ "clip_ratio/low_min": 0.0,
110
+ "clip_ratio/region_mean": 0.0,
111
+ "completions/clipped_ratio": 0.002083333395421505,
112
+ "completions/max_length": 1017.0,
113
+ "completions/max_terminated_length": 946.0,
114
+ "completions/mean_length": 739.5562744140625,
115
+ "completions/mean_terminated_length": 737.8339294433594,
116
+ "completions/min_length": 563.2,
117
+ "completions/min_terminated_length": 563.2,
118
+ "entropy": 0.6441138684749603,
119
+ "epoch": 0.006000600060006,
120
+ "frac_reward_zero_std": 0.0,
121
+ "grad_norm": 0.98046875,
122
+ "learning_rate": 9.064748201438849e-07,
123
+ "loss": 0.0049,
124
+ "num_tokens": 4093959.0,
125
+ "reward": 1.0465242981910705,
126
+ "reward_std": 0.3045470409095287,
127
+ "rewards/ADEnReward/mean": 0.03476252369582653,
128
+ "rewards/ADEnReward/std": 0.07538308277726173,
129
+ "rewards/ReasoningConfidenceReward/mean": 0.07009507827460766,
130
+ "rewards/ReasoningConfidenceReward/std": 0.11509535983204841,
131
+ "rewards/StrictFormatReward/mean": 0.9416666626930237,
132
+ "rewards/StrictFormatReward/std": 0.2242635190486908,
133
+ "step": 40,
134
+ "step_time": 40.8255105547607
135
+ },
136
+ {
137
+ "clip_ratio/high_max": 0.0,
138
+ "clip_ratio/high_mean": 0.0,
139
+ "clip_ratio/low_mean": 0.0,
140
+ "clip_ratio/low_min": 0.0,
141
+ "clip_ratio/region_mean": 0.0,
142
+ "completions/clipped_ratio": 0.0,
143
+ "completions/max_length": 866.4,
144
+ "completions/max_terminated_length": 866.4,
145
+ "completions/mean_length": 733.5812683105469,
146
+ "completions/mean_terminated_length": 733.5812683105469,
147
+ "completions/min_length": 616.2,
148
+ "completions/min_terminated_length": 616.2,
149
+ "entropy": 0.652310574054718,
150
+ "epoch": 0.007500750075007501,
151
+ "frac_reward_zero_std": 0.0,
152
+ "grad_norm": 1.078125,
153
+ "learning_rate": 8.82494004796163e-07,
154
+ "loss": -0.0002,
155
+ "num_tokens": 5104478.0,
156
+ "reward": 1.1200557351112366,
157
+ "reward_std": 0.18049246706068517,
158
+ "rewards/ADEnReward/mean": 0.032199547812342647,
159
+ "rewards/ADEnReward/std": 0.08372207283973694,
160
+ "rewards/ReasoningConfidenceReward/mean": 0.10868950486183167,
161
+ "rewards/ReasoningConfidenceReward/std": 0.09046642743051052,
162
+ "rewards/StrictFormatReward/mean": 0.9791666686534881,
163
+ "rewards/StrictFormatReward/std": 0.10964388847351074,
164
+ "step": 50,
165
+ "step_time": 34.72850414663553
166
+ },
167
+ {
168
+ "clip_ratio/high_max": 0.0,
169
+ "clip_ratio/high_mean": 0.0,
170
+ "clip_ratio/low_mean": 0.0,
171
+ "clip_ratio/low_min": 0.0,
172
+ "clip_ratio/region_mean": 0.0,
173
+ "completions/clipped_ratio": 0.0,
174
+ "completions/max_length": 820.2,
175
+ "completions/max_terminated_length": 820.2,
176
+ "completions/mean_length": 734.4875244140625,
177
+ "completions/mean_terminated_length": 734.4875244140625,
178
+ "completions/min_length": 597.2,
179
+ "completions/min_terminated_length": 597.2,
180
+ "entropy": 0.6447544604539871,
181
+ "epoch": 0.009000900090009001,
182
+ "frac_reward_zero_std": 0.0,
183
+ "grad_norm": 0.8671875,
184
+ "learning_rate": 8.585131894484412e-07,
185
+ "loss": -0.014,
186
+ "num_tokens": 6115528.0,
187
+ "reward": 1.0801176726818085,
188
+ "reward_std": 0.2073265790939331,
189
+ "rewards/ADEnReward/mean": 0.024589571449905635,
190
+ "rewards/ADEnReward/std": 0.05969331655651331,
191
+ "rewards/ReasoningConfidenceReward/mean": 0.09302806071937084,
192
+ "rewards/ReasoningConfidenceReward/std": 0.08853670731186866,
193
+ "rewards/StrictFormatReward/mean": 0.9624999940395356,
194
+ "rewards/StrictFormatReward/std": 0.22511394023895265,
195
+ "step": 60,
196
+ "step_time": 34.7001038627699
197
+ },
198
+ {
199
+ "clip_ratio/high_max": 0.0,
200
+ "clip_ratio/high_mean": 0.0,
201
+ "clip_ratio/low_mean": 0.0,
202
+ "clip_ratio/low_min": 0.0,
203
+ "clip_ratio/region_mean": 0.0,
204
+ "completions/clipped_ratio": 0.002083333395421505,
205
+ "completions/max_length": 887.8,
206
+ "completions/max_terminated_length": 880.9,
207
+ "completions/mean_length": 739.214599609375,
208
+ "completions/mean_terminated_length": 737.5825012207031,
209
+ "completions/min_length": 674.7,
210
+ "completions/min_terminated_length": 674.7,
211
+ "entropy": 0.648271444439888,
212
+ "epoch": 0.010501050105010502,
213
+ "frac_reward_zero_std": 0.0,
214
+ "grad_norm": 1.046875,
215
+ "learning_rate": 8.345323741007194e-07,
216
+ "loss": 0.0095,
217
+ "num_tokens": 7128591.0,
218
+ "reward": 1.117066776752472,
219
+ "reward_std": 0.14670775569975375,
220
+ "rewards/ADEnReward/mean": 0.030741326790302993,
221
+ "rewards/ADEnReward/std": 0.07235845774412156,
222
+ "rewards/ReasoningConfidenceReward/mean": 0.10299204997718334,
223
+ "rewards/ReasoningConfidenceReward/std": 0.07768557965755463,
224
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
225
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
226
+ "step": 70,
227
+ "step_time": 37.97550033703446
228
+ },
229
+ {
230
+ "clip_ratio/high_max": 0.0,
231
+ "clip_ratio/high_mean": 0.0,
232
+ "clip_ratio/low_mean": 0.0,
233
+ "clip_ratio/low_min": 0.0,
234
+ "clip_ratio/region_mean": 0.0,
235
+ "completions/clipped_ratio": 0.0,
236
+ "completions/max_length": 871.3,
237
+ "completions/max_terminated_length": 871.3,
238
+ "completions/mean_length": 735.6625244140625,
239
+ "completions/mean_terminated_length": 735.6625244140625,
240
+ "completions/min_length": 663.4,
241
+ "completions/min_terminated_length": 663.4,
242
+ "entropy": 0.6467163026332855,
243
+ "epoch": 0.012001200120012,
244
+ "frac_reward_zero_std": 0.0,
245
+ "grad_norm": 1.078125,
246
+ "learning_rate": 8.105515587529975e-07,
247
+ "loss": 0.0099,
248
+ "num_tokens": 8140093.0,
249
+ "reward": 1.124228584766388,
250
+ "reward_std": 0.1685192134231329,
251
+ "rewards/ADEnReward/mean": 0.03326874002814293,
252
+ "rewards/ADEnReward/std": 0.07868262981064618,
253
+ "rewards/ReasoningConfidenceReward/mean": 0.10762646868824959,
254
+ "rewards/ReasoningConfidenceReward/std": 0.08494675308465957,
255
+ "rewards/StrictFormatReward/mean": 0.9833333373069764,
256
+ "rewards/StrictFormatReward/std": 0.08077637553215027,
257
+ "step": 80,
258
+ "step_time": 35.69703020621091
259
+ },
260
+ {
261
+ "clip_ratio/high_max": 0.0,
262
+ "clip_ratio/high_mean": 0.0,
263
+ "clip_ratio/low_mean": 0.0,
264
+ "clip_ratio/low_min": 0.0,
265
+ "clip_ratio/region_mean": 0.0,
266
+ "completions/clipped_ratio": 0.0,
267
+ "completions/max_length": 799.3,
268
+ "completions/max_terminated_length": 799.3,
269
+ "completions/mean_length": 730.2937683105469,
270
+ "completions/mean_terminated_length": 730.2937683105469,
271
+ "completions/min_length": 623.3,
272
+ "completions/min_terminated_length": 623.3,
273
+ "entropy": 0.6420892357826233,
274
+ "epoch": 0.013501350135013501,
275
+ "frac_reward_zero_std": 0.0,
276
+ "grad_norm": 0.828125,
277
+ "learning_rate": 7.865707434052757e-07,
278
+ "loss": -0.0098,
279
+ "num_tokens": 9148426.0,
280
+ "reward": 1.122767400741577,
281
+ "reward_std": 0.154670562595129,
282
+ "rewards/ADEnReward/mean": 0.03107238719239831,
283
+ "rewards/ADEnReward/std": 0.07060995940119028,
284
+ "rewards/ReasoningConfidenceReward/mean": 0.10836165957152843,
285
+ "rewards/ReasoningConfidenceReward/std": 0.07862687073647975,
286
+ "rewards/StrictFormatReward/mean": 0.9833333253860473,
287
+ "rewards/StrictFormatReward/std": 0.1154700517654419,
288
+ "step": 90,
289
+ "step_time": 34.06732882745564
290
+ },
291
+ {
292
+ "clip_ratio/high_max": 0.0,
293
+ "clip_ratio/high_mean": 0.0,
294
+ "clip_ratio/low_mean": 0.0,
295
+ "clip_ratio/low_min": 0.0,
296
+ "clip_ratio/region_mean": 0.0,
297
+ "completions/clipped_ratio": 0.0,
298
+ "completions/max_length": 886.6,
299
+ "completions/max_terminated_length": 886.6,
300
+ "completions/mean_length": 732.9125305175781,
301
+ "completions/mean_terminated_length": 732.9125305175781,
302
+ "completions/min_length": 674.0,
303
+ "completions/min_terminated_length": 674.0,
304
+ "entropy": 0.6424726009368896,
305
+ "epoch": 0.015001500150015001,
306
+ "frac_reward_zero_std": 0.0,
307
+ "grad_norm": 0.921875,
308
+ "learning_rate": 7.62589928057554e-07,
309
+ "loss": 0.0087,
310
+ "num_tokens": 10158000.0,
311
+ "reward": 1.1287578463554382,
312
+ "reward_std": 0.16896428540349007,
313
+ "rewards/ADEnReward/mean": 0.042021069768816234,
314
+ "rewards/ADEnReward/std": 0.08718259073793888,
315
+ "rewards/ReasoningConfidenceReward/mean": 0.10757005885243416,
316
+ "rewards/ReasoningConfidenceReward/std": 0.0734778918325901,
317
+ "rewards/StrictFormatReward/mean": 0.9791666626930237,
318
+ "rewards/StrictFormatReward/std": 0.12699072659015656,
319
+ "step": 100,
320
+ "step_time": 35.849342082161456
321
+ }
322
+ ],
323
+ "logging_steps": 10,
324
+ "max_steps": 417,
325
+ "num_input_tokens_seen": 10158000,
326
+ "num_train_epochs": 1,
327
+ "save_steps": 100,
328
+ "stateful_callbacks": {
329
+ "TrainerControl": {
330
+ "args": {
331
+ "should_epoch_stop": false,
332
+ "should_evaluate": false,
333
+ "should_log": false,
334
+ "should_save": true,
335
+ "should_training_stop": false
336
+ },
337
+ "attributes": {}
338
+ }
339
+ },
340
+ "total_flos": 0.0,
341
+ "train_batch_size": 6,
342
+ "trial_name": null,
343
+ "trial_params": null
344
+ }
grpo-nADE-format-RC/checkpoint-100/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format-RC/checkpoint-200/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
grpo-nADE-format-RC/checkpoint-200/chat_template.jinja ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- else %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if do_vision_count %}
10
+ {%- set image_count.value = image_count.value + 1 %}
11
+ {%- endif %}
12
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
13
+ <|vision_start|><|image_pad|><|vision_end|>
14
+ {%- elif 'video' in item or item.type == 'video' %}
15
+ {%- if do_vision_count %}
16
+ {%- set video_count.value = video_count.value + 1 %}
17
+ {%- endif %}
18
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
19
+ <|vision_start|><|video_pad|><|vision_end|>
20
+ {%- elif 'text' in item %}
21
+ {{- item.text }}
22
+ {%- endif %}
23
+ {%- endfor %}
24
+ {%- endif %}
25
+ {%- endmacro %}
26
+ {%- if tools %}
27
+ {{- '<|im_start|>system\n' }}
28
+ {%- if messages[0].role == 'system' %}
29
+ {{- render_content(messages[0].content, false) + '\n\n' }}
30
+ {%- endif %}
31
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
32
+ {%- for tool in tools %}
33
+ {{- "\n" }}
34
+ {{- tool | tojson }}
35
+ {%- endfor %}
36
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
37
+ {%- else %}
38
+ {%- if messages[0].role == 'system' %}
39
+ {{- '<|im_start|>system\n' + render_content(messages[0].content, false) + '<|im_end|>\n' }}
40
+ {%- endif %}
41
+ {%- endif %}
42
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
43
+ {%- for message in messages[::-1] %}
44
+ {%- set index = (messages|length - 1) - loop.index0 %}
45
+ {%- if ns.multi_step_tool and message.role == "user" %}
46
+ {%- set content = render_content(message.content, false) %}
47
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
48
+ {%- set ns.multi_step_tool = false %}
49
+ {%- set ns.last_query_index = index %}
50
+ {%- endif %}
51
+ {%- endif %}
52
+ {%- endfor %}
53
+ {%- for message in messages %}
54
+ {%- set content = render_content(message.content, True) %}
55
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
56
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
57
+ {%- elif message.role == "assistant" %}
58
+ {%- set reasoning_content = '' %}
59
+ {%- if message.reasoning_content is string %}
60
+ {%- set reasoning_content = message.reasoning_content %}
61
+ {%- else %}
62
+ {%- if '</think>' in content %}
63
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
64
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- if loop.index0 > ns.last_query_index %}
68
+ {%- if loop.last or (not loop.last and reasoning_content) %}
69
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
70
+ {%- else %}
71
+ {{- '<|im_start|>' + message.role + '\n' + content }}
72
+ {%- endif %}
73
+ {%- else %}
74
+ {{- '<|im_start|>' + message.role + '\n' + content }}
75
+ {%- endif %}
76
+ {%- if message.tool_calls %}
77
+ {%- for tool_call in message.tool_calls %}
78
+ {%- if (loop.first and content) or (not loop.first) %}
79
+ {{- '\n' }}
80
+ {%- endif %}
81
+ {%- if tool_call.function %}
82
+ {%- set tool_call = tool_call.function %}
83
+ {%- endif %}
84
+ {{- '<tool_call>\n{"name": "' }}
85
+ {{- tool_call.name }}
86
+ {{- '", "arguments": ' }}
87
+ {%- if tool_call.arguments is string %}
88
+ {{- tool_call.arguments }}
89
+ {%- else %}
90
+ {{- tool_call.arguments | tojson }}
91
+ {%- endif %}
92
+ {{- '}\n</tool_call>' }}
93
+ {%- endfor %}
94
+ {%- endif %}
95
+ {{- '<|im_end|>\n' }}
96
+ {%- elif message.role == "tool" %}
97
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
98
+ {{- '<|im_start|>user' }}
99
+ {%- endif %}
100
+ {{- '\n<tool_response>\n' }}
101
+ {{- content }}
102
+ {{- '\n</tool_response>' }}
103
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
104
+ {{- '<|im_end|>\n' }}
105
+ {%- endif %}
106
+ {%- endif %}
107
+ {%- endfor %}
108
+ {%- if add_generation_prompt %}
109
+ {{- '<|im_start|>assistant\n' }}
110
+ {%- endif %}
grpo-nADE-format-RC/checkpoint-200/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3VLForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "eos_token_id": 151645,
7
+ "image_token_id": 151655,
8
+ "model_type": "qwen3_vl",
9
+ "pad_token_id": 151643,
10
+ "text_config": {
11
+ "attention_bias": false,
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 151643,
14
+ "dtype": "bfloat16",
15
+ "eos_token_id": 151645,
16
+ "head_dim": 128,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 2560,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 9728,
21
+ "max_position_embeddings": 262144,
22
+ "model_type": "qwen3_vl_text",
23
+ "num_attention_heads": 32,
24
+ "num_hidden_layers": 36,
25
+ "num_key_value_heads": 8,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_scaling": {
28
+ "mrope_interleaved": true,
29
+ "mrope_section": [
30
+ 24,
31
+ 20,
32
+ 20
33
+ ],
34
+ "rope_type": "default"
35
+ },
36
+ "rope_theta": 5000000,
37
+ "tie_word_embeddings": true,
38
+ "use_cache": true,
39
+ "vocab_size": 151936
40
+ },
41
+ "tie_word_embeddings": true,
42
+ "transformers_version": "4.57.6",
43
+ "use_cache": false,
44
+ "video_token_id": 151656,
45
+ "vision_config": {
46
+ "deepstack_visual_indexes": [
47
+ 5,
48
+ 11,
49
+ 17
50
+ ],
51
+ "depth": 24,
52
+ "dtype": "bfloat16",
53
+ "hidden_act": "gelu_pytorch_tanh",
54
+ "hidden_size": 1024,
55
+ "in_channels": 3,
56
+ "initializer_range": 0.02,
57
+ "intermediate_size": 4096,
58
+ "model_type": "qwen3_vl",
59
+ "num_heads": 16,
60
+ "num_position_embeddings": 2304,
61
+ "out_hidden_size": 2560,
62
+ "patch_size": 16,
63
+ "spatial_merge_size": 2,
64
+ "temporal_patch_size": 2
65
+ },
66
+ "vision_end_token_id": 151653,
67
+ "vision_start_token_id": 151652
68
+ }
grpo-nADE-format-RC/checkpoint-200/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "top_k": 20,
10
+ "top_p": 0.95,
11
+ "transformers_version": "4.57.6"
12
+ }
grpo-nADE-format-RC/checkpoint-200/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format-RC/checkpoint-200/model.safetensors.index.json ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 4437815808,
4
+ "total_size": 8875631616
5
+ },
6
+ "weight_map": {
7
+ "model.language_model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "model.language_model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.language_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.language_model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "model.language_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "model.language_model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
14
+ "model.language_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.language_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
16
+ "model.language_model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
17
+ "model.language_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
18
+ "model.language_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.language_model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
20
+ "model.language_model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.language_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
22
+ "model.language_model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.language_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
24
+ "model.language_model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
25
+ "model.language_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.language_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
27
+ "model.language_model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
28
+ "model.language_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.language_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
30
+ "model.language_model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
31
+ "model.language_model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
32
+ "model.language_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.language_model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.language_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
35
+ "model.language_model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
36
+ "model.language_model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.language_model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.language_model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
39
+ "model.language_model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
40
+ "model.language_model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.language_model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
42
+ "model.language_model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.language_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
44
+ "model.language_model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
45
+ "model.language_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
46
+ "model.language_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
47
+ "model.language_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
48
+ "model.language_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
49
+ "model.language_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
50
+ "model.language_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.language_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.language_model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
53
+ "model.language_model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
54
+ "model.language_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.language_model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.language_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
57
+ "model.language_model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
58
+ "model.language_model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.language_model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
+ "model.language_model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
61
+ "model.language_model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.language_model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.language_model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
64
+ "model.language_model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.language_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
66
+ "model.language_model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
67
+ "model.language_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
68
+ "model.language_model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
69
+ "model.language_model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.language_model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.language_model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
72
+ "model.language_model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
73
+ "model.language_model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.language_model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
75
+ "model.language_model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
76
+ "model.language_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.language_model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
78
+ "model.language_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
79
+ "model.language_model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
80
+ "model.language_model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
81
+ "model.language_model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.language_model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
83
+ "model.language_model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
84
+ "model.language_model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
85
+ "model.language_model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
86
+ "model.language_model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.language_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.language_model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.language_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "model.language_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
91
+ "model.language_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
92
+ "model.language_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.language_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
94
+ "model.language_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.language_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
96
+ "model.language_model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
97
+ "model.language_model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
98
+ "model.language_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
99
+ "model.language_model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.language_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
101
+ "model.language_model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
102
+ "model.language_model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
103
+ "model.language_model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.language_model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
105
+ "model.language_model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.language_model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.language_model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
108
+ "model.language_model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
109
+ "model.language_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
110
+ "model.language_model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
111
+ "model.language_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
112
+ "model.language_model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
113
+ "model.language_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
114
+ "model.language_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
115
+ "model.language_model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
116
+ "model.language_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
117
+ "model.language_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
118
+ "model.language_model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
119
+ "model.language_model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
120
+ "model.language_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
121
+ "model.language_model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
122
+ "model.language_model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
123
+ "model.language_model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
124
+ "model.language_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
125
+ "model.language_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
126
+ "model.language_model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
127
+ "model.language_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
128
+ "model.language_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
129
+ "model.language_model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
130
+ "model.language_model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
131
+ "model.language_model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
132
+ "model.language_model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
133
+ "model.language_model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
134
+ "model.language_model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
135
+ "model.language_model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
136
+ "model.language_model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
137
+ "model.language_model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
138
+ "model.language_model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
139
+ "model.language_model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
140
+ "model.language_model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
141
+ "model.language_model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
142
+ "model.language_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
143
+ "model.language_model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
144
+ "model.language_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
145
+ "model.language_model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
146
+ "model.language_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
147
+ "model.language_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
148
+ "model.language_model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
149
+ "model.language_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
150
+ "model.language_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.language_model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
152
+ "model.language_model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
153
+ "model.language_model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
154
+ "model.language_model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
155
+ "model.language_model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
156
+ "model.language_model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
157
+ "model.language_model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
158
+ "model.language_model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
159
+ "model.language_model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
160
+ "model.language_model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
161
+ "model.language_model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
162
+ "model.language_model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
163
+ "model.language_model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
164
+ "model.language_model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
165
+ "model.language_model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
166
+ "model.language_model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
167
+ "model.language_model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
168
+ "model.language_model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
169
+ "model.language_model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
170
+ "model.language_model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
171
+ "model.language_model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
172
+ "model.language_model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
173
+ "model.language_model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
174
+ "model.language_model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
175
+ "model.language_model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
176
+ "model.language_model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.language_model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
178
+ "model.language_model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
179
+ "model.language_model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
180
+ "model.language_model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
181
+ "model.language_model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
182
+ "model.language_model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
183
+ "model.language_model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
184
+ "model.language_model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
185
+ "model.language_model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
186
+ "model.language_model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
187
+ "model.language_model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
188
+ "model.language_model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
189
+ "model.language_model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
190
+ "model.language_model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.language_model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
192
+ "model.language_model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
193
+ "model.language_model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.language_model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.language_model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
196
+ "model.language_model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.language_model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
198
+ "model.language_model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
199
+ "model.language_model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
200
+ "model.language_model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
201
+ "model.language_model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
202
+ "model.language_model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
203
+ "model.language_model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
204
+ "model.language_model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
+ "model.language_model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
206
+ "model.language_model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
207
+ "model.language_model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
208
+ "model.language_model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
209
+ "model.language_model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
210
+ "model.language_model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
211
+ "model.language_model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
212
+ "model.language_model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
213
+ "model.language_model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
214
+ "model.language_model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
215
+ "model.language_model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
216
+ "model.language_model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
217
+ "model.language_model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
218
+ "model.language_model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.language_model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
220
+ "model.language_model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
221
+ "model.language_model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
222
+ "model.language_model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
223
+ "model.language_model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
224
+ "model.language_model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
225
+ "model.language_model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
226
+ "model.language_model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
227
+ "model.language_model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
228
+ "model.language_model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
229
+ "model.language_model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
230
+ "model.language_model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
231
+ "model.language_model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
232
+ "model.language_model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
233
+ "model.language_model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
234
+ "model.language_model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
235
+ "model.language_model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
236
+ "model.language_model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
237
+ "model.language_model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
238
+ "model.language_model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
239
+ "model.language_model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
240
+ "model.language_model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
241
+ "model.language_model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
242
+ "model.language_model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
243
+ "model.language_model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
244
+ "model.language_model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
245
+ "model.language_model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
246
+ "model.language_model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
247
+ "model.language_model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
248
+ "model.language_model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
249
+ "model.language_model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
250
+ "model.language_model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
251
+ "model.language_model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
252
+ "model.language_model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
253
+ "model.language_model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
254
+ "model.language_model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
255
+ "model.language_model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
256
+ "model.language_model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
257
+ "model.language_model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
258
+ "model.language_model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
259
+ "model.language_model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
260
+ "model.language_model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
261
+ "model.language_model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
262
+ "model.language_model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
263
+ "model.language_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
264
+ "model.language_model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
265
+ "model.language_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
266
+ "model.language_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
267
+ "model.language_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
268
+ "model.language_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
269
+ "model.language_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
270
+ "model.language_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
271
+ "model.language_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
272
+ "model.language_model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
273
+ "model.language_model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
274
+ "model.language_model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
275
+ "model.language_model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
276
+ "model.language_model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
277
+ "model.language_model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
278
+ "model.language_model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
279
+ "model.language_model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
280
+ "model.language_model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
281
+ "model.language_model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
282
+ "model.language_model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
283
+ "model.language_model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
284
+ "model.language_model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
285
+ "model.language_model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
286
+ "model.language_model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
287
+ "model.language_model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
288
+ "model.language_model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
289
+ "model.language_model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
290
+ "model.language_model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
291
+ "model.language_model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
292
+ "model.language_model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
293
+ "model.language_model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
294
+ "model.language_model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
295
+ "model.language_model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
296
+ "model.language_model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
297
+ "model.language_model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
298
+ "model.language_model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
299
+ "model.language_model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
300
+ "model.language_model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
301
+ "model.language_model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
302
+ "model.language_model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
303
+ "model.language_model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
304
+ "model.language_model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
305
+ "model.language_model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
306
+ "model.language_model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
307
+ "model.language_model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
308
+ "model.language_model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
309
+ "model.language_model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
310
+ "model.language_model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
311
+ "model.language_model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
312
+ "model.language_model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
313
+ "model.language_model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
314
+ "model.language_model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
315
+ "model.language_model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
316
+ "model.language_model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
317
+ "model.language_model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
318
+ "model.language_model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
319
+ "model.language_model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
320
+ "model.language_model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
321
+ "model.language_model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
322
+ "model.language_model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
323
+ "model.language_model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
324
+ "model.language_model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
325
+ "model.language_model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
326
+ "model.language_model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
327
+ "model.language_model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
328
+ "model.language_model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
329
+ "model.language_model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
330
+ "model.language_model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
331
+ "model.language_model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
332
+ "model.language_model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
333
+ "model.language_model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
334
+ "model.language_model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
335
+ "model.language_model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
336
+ "model.language_model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
337
+ "model.language_model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
338
+ "model.language_model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
339
+ "model.language_model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
340
+ "model.language_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
341
+ "model.language_model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
342
+ "model.language_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
343
+ "model.language_model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
344
+ "model.language_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
345
+ "model.language_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
346
+ "model.language_model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
347
+ "model.language_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
348
+ "model.language_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
349
+ "model.language_model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
350
+ "model.language_model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
351
+ "model.language_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
352
+ "model.language_model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
353
+ "model.language_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
354
+ "model.language_model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
355
+ "model.language_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
356
+ "model.language_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
357
+ "model.language_model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
358
+ "model.language_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
359
+ "model.language_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
360
+ "model.language_model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
361
+ "model.language_model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
362
+ "model.language_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
363
+ "model.language_model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
364
+ "model.language_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
365
+ "model.language_model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
366
+ "model.language_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
367
+ "model.language_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
368
+ "model.language_model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
369
+ "model.language_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
370
+ "model.language_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
371
+ "model.language_model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
372
+ "model.language_model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
373
+ "model.language_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
374
+ "model.language_model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
375
+ "model.language_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
376
+ "model.language_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
377
+ "model.language_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
378
+ "model.language_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
379
+ "model.language_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
380
+ "model.language_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
381
+ "model.language_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
382
+ "model.language_model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
383
+ "model.language_model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
384
+ "model.language_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
385
+ "model.language_model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
386
+ "model.language_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
387
+ "model.language_model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
388
+ "model.language_model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
389
+ "model.language_model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
390
+ "model.language_model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
391
+ "model.language_model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
392
+ "model.language_model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
393
+ "model.language_model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
394
+ "model.language_model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
395
+ "model.language_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
396
+ "model.language_model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
397
+ "model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
398
+ "model.language_model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
399
+ "model.language_model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
400
+ "model.language_model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
401
+ "model.language_model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
402
+ "model.language_model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
403
+ "model.language_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
404
+ "model.language_model.norm.weight": "model-00002-of-00002.safetensors",
405
+ "model.visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
406
+ "model.visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
407
+ "model.visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
408
+ "model.visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
409
+ "model.visual.blocks.0.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
410
+ "model.visual.blocks.0.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
411
+ "model.visual.blocks.0.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
412
+ "model.visual.blocks.0.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
413
+ "model.visual.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
414
+ "model.visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
415
+ "model.visual.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
416
+ "model.visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
417
+ "model.visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
418
+ "model.visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
419
+ "model.visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
420
+ "model.visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
421
+ "model.visual.blocks.1.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
422
+ "model.visual.blocks.1.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
423
+ "model.visual.blocks.1.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
424
+ "model.visual.blocks.1.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
425
+ "model.visual.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
426
+ "model.visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
427
+ "model.visual.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
428
+ "model.visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
429
+ "model.visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
430
+ "model.visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
431
+ "model.visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
432
+ "model.visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
433
+ "model.visual.blocks.10.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
434
+ "model.visual.blocks.10.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
435
+ "model.visual.blocks.10.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
436
+ "model.visual.blocks.10.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
437
+ "model.visual.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
438
+ "model.visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
439
+ "model.visual.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
440
+ "model.visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
441
+ "model.visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
442
+ "model.visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
443
+ "model.visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
444
+ "model.visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
445
+ "model.visual.blocks.11.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
446
+ "model.visual.blocks.11.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
447
+ "model.visual.blocks.11.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
448
+ "model.visual.blocks.11.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
449
+ "model.visual.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
450
+ "model.visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
451
+ "model.visual.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
452
+ "model.visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
453
+ "model.visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
454
+ "model.visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
455
+ "model.visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
456
+ "model.visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
457
+ "model.visual.blocks.12.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
458
+ "model.visual.blocks.12.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
459
+ "model.visual.blocks.12.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
460
+ "model.visual.blocks.12.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
461
+ "model.visual.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
462
+ "model.visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
463
+ "model.visual.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
464
+ "model.visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
465
+ "model.visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
466
+ "model.visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
467
+ "model.visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
468
+ "model.visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
469
+ "model.visual.blocks.13.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
470
+ "model.visual.blocks.13.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
471
+ "model.visual.blocks.13.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
472
+ "model.visual.blocks.13.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
473
+ "model.visual.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
474
+ "model.visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
475
+ "model.visual.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
476
+ "model.visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
477
+ "model.visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
478
+ "model.visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
479
+ "model.visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
480
+ "model.visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
481
+ "model.visual.blocks.14.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
482
+ "model.visual.blocks.14.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
483
+ "model.visual.blocks.14.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
484
+ "model.visual.blocks.14.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
485
+ "model.visual.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
486
+ "model.visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
487
+ "model.visual.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
488
+ "model.visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
489
+ "model.visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
490
+ "model.visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
491
+ "model.visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
492
+ "model.visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
493
+ "model.visual.blocks.15.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
494
+ "model.visual.blocks.15.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
495
+ "model.visual.blocks.15.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
496
+ "model.visual.blocks.15.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
497
+ "model.visual.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
498
+ "model.visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
499
+ "model.visual.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
500
+ "model.visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
501
+ "model.visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
502
+ "model.visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
503
+ "model.visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
504
+ "model.visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
505
+ "model.visual.blocks.16.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
506
+ "model.visual.blocks.16.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
507
+ "model.visual.blocks.16.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
508
+ "model.visual.blocks.16.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
509
+ "model.visual.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
510
+ "model.visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
511
+ "model.visual.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
512
+ "model.visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
513
+ "model.visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
514
+ "model.visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
515
+ "model.visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
516
+ "model.visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
517
+ "model.visual.blocks.17.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
518
+ "model.visual.blocks.17.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
519
+ "model.visual.blocks.17.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
520
+ "model.visual.blocks.17.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
521
+ "model.visual.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
522
+ "model.visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
523
+ "model.visual.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
524
+ "model.visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
525
+ "model.visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
526
+ "model.visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
527
+ "model.visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
528
+ "model.visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
529
+ "model.visual.blocks.18.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
530
+ "model.visual.blocks.18.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
531
+ "model.visual.blocks.18.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
532
+ "model.visual.blocks.18.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
533
+ "model.visual.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
534
+ "model.visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
535
+ "model.visual.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
536
+ "model.visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
537
+ "model.visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
538
+ "model.visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
539
+ "model.visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
540
+ "model.visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
541
+ "model.visual.blocks.19.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
542
+ "model.visual.blocks.19.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
543
+ "model.visual.blocks.19.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
544
+ "model.visual.blocks.19.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
545
+ "model.visual.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
546
+ "model.visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
547
+ "model.visual.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
548
+ "model.visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
549
+ "model.visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
550
+ "model.visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
551
+ "model.visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
552
+ "model.visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
553
+ "model.visual.blocks.2.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
554
+ "model.visual.blocks.2.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
555
+ "model.visual.blocks.2.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
556
+ "model.visual.blocks.2.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
557
+ "model.visual.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
558
+ "model.visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
559
+ "model.visual.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
560
+ "model.visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
561
+ "model.visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
562
+ "model.visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
563
+ "model.visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
564
+ "model.visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
565
+ "model.visual.blocks.20.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
566
+ "model.visual.blocks.20.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
567
+ "model.visual.blocks.20.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
568
+ "model.visual.blocks.20.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
569
+ "model.visual.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
570
+ "model.visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
571
+ "model.visual.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
572
+ "model.visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
573
+ "model.visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
574
+ "model.visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
575
+ "model.visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
576
+ "model.visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
577
+ "model.visual.blocks.21.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
578
+ "model.visual.blocks.21.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
579
+ "model.visual.blocks.21.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
580
+ "model.visual.blocks.21.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
581
+ "model.visual.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
582
+ "model.visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
583
+ "model.visual.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
584
+ "model.visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
585
+ "model.visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
586
+ "model.visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
587
+ "model.visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
588
+ "model.visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
589
+ "model.visual.blocks.22.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
590
+ "model.visual.blocks.22.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
591
+ "model.visual.blocks.22.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
592
+ "model.visual.blocks.22.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
593
+ "model.visual.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
594
+ "model.visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
595
+ "model.visual.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
596
+ "model.visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
597
+ "model.visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
598
+ "model.visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
599
+ "model.visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
600
+ "model.visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
601
+ "model.visual.blocks.23.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
602
+ "model.visual.blocks.23.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
603
+ "model.visual.blocks.23.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
604
+ "model.visual.blocks.23.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
605
+ "model.visual.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
606
+ "model.visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
607
+ "model.visual.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
608
+ "model.visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
609
+ "model.visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
610
+ "model.visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
611
+ "model.visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
612
+ "model.visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
613
+ "model.visual.blocks.3.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
614
+ "model.visual.blocks.3.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
615
+ "model.visual.blocks.3.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
616
+ "model.visual.blocks.3.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
617
+ "model.visual.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
618
+ "model.visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
619
+ "model.visual.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
620
+ "model.visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
621
+ "model.visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
622
+ "model.visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
623
+ "model.visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
624
+ "model.visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
625
+ "model.visual.blocks.4.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
626
+ "model.visual.blocks.4.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
627
+ "model.visual.blocks.4.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
628
+ "model.visual.blocks.4.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
629
+ "model.visual.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
630
+ "model.visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
631
+ "model.visual.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
632
+ "model.visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
633
+ "model.visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
634
+ "model.visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
635
+ "model.visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
636
+ "model.visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
637
+ "model.visual.blocks.5.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
638
+ "model.visual.blocks.5.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
639
+ "model.visual.blocks.5.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
640
+ "model.visual.blocks.5.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
641
+ "model.visual.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
642
+ "model.visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
643
+ "model.visual.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
644
+ "model.visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
645
+ "model.visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
646
+ "model.visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
647
+ "model.visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
648
+ "model.visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
649
+ "model.visual.blocks.6.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
650
+ "model.visual.blocks.6.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
651
+ "model.visual.blocks.6.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
652
+ "model.visual.blocks.6.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
653
+ "model.visual.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
654
+ "model.visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
655
+ "model.visual.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
656
+ "model.visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
657
+ "model.visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
658
+ "model.visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
659
+ "model.visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
660
+ "model.visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
661
+ "model.visual.blocks.7.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
662
+ "model.visual.blocks.7.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
663
+ "model.visual.blocks.7.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
664
+ "model.visual.blocks.7.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
665
+ "model.visual.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
666
+ "model.visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
667
+ "model.visual.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
668
+ "model.visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
669
+ "model.visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
670
+ "model.visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
671
+ "model.visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
672
+ "model.visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
673
+ "model.visual.blocks.8.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
674
+ "model.visual.blocks.8.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
675
+ "model.visual.blocks.8.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
676
+ "model.visual.blocks.8.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
677
+ "model.visual.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
678
+ "model.visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
679
+ "model.visual.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
680
+ "model.visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
681
+ "model.visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
682
+ "model.visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
683
+ "model.visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
684
+ "model.visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
685
+ "model.visual.blocks.9.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
686
+ "model.visual.blocks.9.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
687
+ "model.visual.blocks.9.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
688
+ "model.visual.blocks.9.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
689
+ "model.visual.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
690
+ "model.visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
691
+ "model.visual.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
692
+ "model.visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
693
+ "model.visual.deepstack_merger_list.0.linear_fc1.bias": "model-00001-of-00002.safetensors",
694
+ "model.visual.deepstack_merger_list.0.linear_fc1.weight": "model-00001-of-00002.safetensors",
695
+ "model.visual.deepstack_merger_list.0.linear_fc2.bias": "model-00001-of-00002.safetensors",
696
+ "model.visual.deepstack_merger_list.0.linear_fc2.weight": "model-00001-of-00002.safetensors",
697
+ "model.visual.deepstack_merger_list.0.norm.bias": "model-00001-of-00002.safetensors",
698
+ "model.visual.deepstack_merger_list.0.norm.weight": "model-00001-of-00002.safetensors",
699
+ "model.visual.deepstack_merger_list.1.linear_fc1.bias": "model-00001-of-00002.safetensors",
700
+ "model.visual.deepstack_merger_list.1.linear_fc1.weight": "model-00001-of-00002.safetensors",
701
+ "model.visual.deepstack_merger_list.1.linear_fc2.bias": "model-00001-of-00002.safetensors",
702
+ "model.visual.deepstack_merger_list.1.linear_fc2.weight": "model-00001-of-00002.safetensors",
703
+ "model.visual.deepstack_merger_list.1.norm.bias": "model-00001-of-00002.safetensors",
704
+ "model.visual.deepstack_merger_list.1.norm.weight": "model-00001-of-00002.safetensors",
705
+ "model.visual.deepstack_merger_list.2.linear_fc1.bias": "model-00001-of-00002.safetensors",
706
+ "model.visual.deepstack_merger_list.2.linear_fc1.weight": "model-00001-of-00002.safetensors",
707
+ "model.visual.deepstack_merger_list.2.linear_fc2.bias": "model-00001-of-00002.safetensors",
708
+ "model.visual.deepstack_merger_list.2.linear_fc2.weight": "model-00001-of-00002.safetensors",
709
+ "model.visual.deepstack_merger_list.2.norm.bias": "model-00001-of-00002.safetensors",
710
+ "model.visual.deepstack_merger_list.2.norm.weight": "model-00001-of-00002.safetensors",
711
+ "model.visual.merger.linear_fc1.bias": "model-00001-of-00002.safetensors",
712
+ "model.visual.merger.linear_fc1.weight": "model-00001-of-00002.safetensors",
713
+ "model.visual.merger.linear_fc2.bias": "model-00001-of-00002.safetensors",
714
+ "model.visual.merger.linear_fc2.weight": "model-00001-of-00002.safetensors",
715
+ "model.visual.merger.norm.bias": "model-00001-of-00002.safetensors",
716
+ "model.visual.merger.norm.weight": "model-00001-of-00002.safetensors",
717
+ "model.visual.patch_embed.proj.bias": "model-00001-of-00002.safetensors",
718
+ "model.visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors",
719
+ "model.visual.pos_embed.weight": "model-00001-of-00002.safetensors"
720
+ }
721
+ }
grpo-nADE-format-RC/checkpoint-200/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
grpo-nADE-format-RC/checkpoint-200/tokenizer_config.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "max_length": null,
235
+ "model_max_length": 262144,
236
+ "pad_to_multiple_of": null,
237
+ "pad_token": "<|endoftext|>",
238
+ "pad_token_type_id": 0,
239
+ "padding_side": "left",
240
+ "processor_class": "Qwen3VLProcessor",
241
+ "split_special_tokens": false,
242
+ "tokenizer_class": "Qwen2Tokenizer",
243
+ "unk_token": null
244
+ }
grpo-nADE-format-RC/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,654 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.030003000300030003,
6
+ "eval_steps": 100,
7
+ "global_step": 200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "clip_ratio/high_max": 0.0,
14
+ "clip_ratio/high_mean": 0.0,
15
+ "clip_ratio/low_mean": 0.0,
16
+ "clip_ratio/low_min": 0.0,
17
+ "clip_ratio/region_mean": 0.0,
18
+ "completions/clipped_ratio": 0.03750000149011612,
19
+ "completions/max_length": 1533.5,
20
+ "completions/max_terminated_length": 1427.6,
21
+ "completions/mean_length": 820.7687622070313,
22
+ "completions/mean_terminated_length": 795.4251159667969,
23
+ "completions/min_length": 119.1,
24
+ "completions/min_terminated_length": 119.1,
25
+ "entropy": 0.6458343416452408,
26
+ "epoch": 0.0015001500150015,
27
+ "frac_reward_zero_std": 0.0,
28
+ "grad_norm": 1.0625,
29
+ "learning_rate": 9.784172661870503e-07,
30
+ "loss": 0.111,
31
+ "num_tokens": 1052641.0,
32
+ "reward": 0.15892810765653848,
33
+ "reward_std": 1.2357513666152955,
34
+ "rewards/ADEnReward/mean": 0.011186909227399155,
35
+ "rewards/ADEnReward/std": 0.03507473061326891,
36
+ "rewards/ReasoningConfidenceReward/mean": -0.19809213168919088,
37
+ "rewards/ReasoningConfidenceReward/std": 0.45118741542100904,
38
+ "rewards/StrictFormatReward/mean": 0.345833333209157,
39
+ "rewards/StrictFormatReward/std": 0.8626526802778244,
40
+ "step": 10,
41
+ "step_time": 66.8766707284376
42
+ },
43
+ {
44
+ "clip_ratio/high_max": 0.0,
45
+ "clip_ratio/high_mean": 0.0,
46
+ "clip_ratio/low_mean": 0.0,
47
+ "clip_ratio/low_min": 0.0,
48
+ "clip_ratio/region_mean": 0.0,
49
+ "completions/clipped_ratio": 0.01875000037252903,
50
+ "completions/max_length": 1258.4,
51
+ "completions/max_terminated_length": 1106.6,
52
+ "completions/mean_length": 743.8416809082031,
53
+ "completions/mean_terminated_length": 728.8905517578125,
54
+ "completions/min_length": 163.6,
55
+ "completions/min_terminated_length": 163.6,
56
+ "entropy": 0.6466913104057312,
57
+ "epoch": 0.003000300030003,
58
+ "frac_reward_zero_std": 0.0,
59
+ "grad_norm": 1.15625,
60
+ "learning_rate": 9.544364508393285e-07,
61
+ "loss": 0.0171,
62
+ "num_tokens": 2068565.0,
63
+ "reward": 0.8077859580516815,
64
+ "reward_std": 0.8083356320858002,
65
+ "rewards/ADEnReward/mean": 0.03084552166983485,
66
+ "rewards/ADEnReward/std": 0.07702018767595291,
67
+ "rewards/ReasoningConfidenceReward/mean": 0.018607060704380275,
68
+ "rewards/ReasoningConfidenceReward/std": 0.28907840102911,
69
+ "rewards/StrictFormatReward/mean": 0.7583333432674408,
70
+ "rewards/StrictFormatReward/std": 0.6329713940620423,
71
+ "step": 20,
72
+ "step_time": 52.03955397913232
73
+ },
74
+ {
75
+ "clip_ratio/high_max": 0.0,
76
+ "clip_ratio/high_mean": 0.0,
77
+ "clip_ratio/low_mean": 0.0,
78
+ "clip_ratio/low_min": 0.0,
79
+ "clip_ratio/region_mean": 0.0,
80
+ "completions/clipped_ratio": 0.002083333395421505,
81
+ "completions/max_length": 1179.2,
82
+ "completions/max_terminated_length": 1103.8,
83
+ "completions/mean_length": 735.7812744140625,
84
+ "completions/mean_terminated_length": 734.0838012695312,
85
+ "completions/min_length": 424.2,
86
+ "completions/min_terminated_length": 424.2,
87
+ "entropy": 0.6348762333393096,
88
+ "epoch": 0.004500450045004501,
89
+ "frac_reward_zero_std": 0.0,
90
+ "grad_norm": 1.0546875,
91
+ "learning_rate": 9.304556354916066e-07,
92
+ "loss": 0.0134,
93
+ "num_tokens": 3080012.0,
94
+ "reward": 0.9798760175704956,
95
+ "reward_std": 0.5248558193445205,
96
+ "rewards/ADEnReward/mean": 0.032167868409305814,
97
+ "rewards/ADEnReward/std": 0.0745716668665409,
98
+ "rewards/ReasoningConfidenceReward/mean": 0.06437477525323629,
99
+ "rewards/ReasoningConfidenceReward/std": 0.19101330041885375,
100
+ "rewards/StrictFormatReward/mean": 0.8833333373069763,
101
+ "rewards/StrictFormatReward/std": 0.45989986062049865,
102
+ "step": 30,
103
+ "step_time": 47.932181040663274
104
+ },
105
+ {
106
+ "clip_ratio/high_max": 0.0,
107
+ "clip_ratio/high_mean": 0.0,
108
+ "clip_ratio/low_mean": 0.0,
109
+ "clip_ratio/low_min": 0.0,
110
+ "clip_ratio/region_mean": 0.0,
111
+ "completions/clipped_ratio": 0.002083333395421505,
112
+ "completions/max_length": 1017.0,
113
+ "completions/max_terminated_length": 946.0,
114
+ "completions/mean_length": 739.5562744140625,
115
+ "completions/mean_terminated_length": 737.8339294433594,
116
+ "completions/min_length": 563.2,
117
+ "completions/min_terminated_length": 563.2,
118
+ "entropy": 0.6441138684749603,
119
+ "epoch": 0.006000600060006,
120
+ "frac_reward_zero_std": 0.0,
121
+ "grad_norm": 0.98046875,
122
+ "learning_rate": 9.064748201438849e-07,
123
+ "loss": 0.0049,
124
+ "num_tokens": 4093959.0,
125
+ "reward": 1.0465242981910705,
126
+ "reward_std": 0.3045470409095287,
127
+ "rewards/ADEnReward/mean": 0.03476252369582653,
128
+ "rewards/ADEnReward/std": 0.07538308277726173,
129
+ "rewards/ReasoningConfidenceReward/mean": 0.07009507827460766,
130
+ "rewards/ReasoningConfidenceReward/std": 0.11509535983204841,
131
+ "rewards/StrictFormatReward/mean": 0.9416666626930237,
132
+ "rewards/StrictFormatReward/std": 0.2242635190486908,
133
+ "step": 40,
134
+ "step_time": 40.8255105547607
135
+ },
136
+ {
137
+ "clip_ratio/high_max": 0.0,
138
+ "clip_ratio/high_mean": 0.0,
139
+ "clip_ratio/low_mean": 0.0,
140
+ "clip_ratio/low_min": 0.0,
141
+ "clip_ratio/region_mean": 0.0,
142
+ "completions/clipped_ratio": 0.0,
143
+ "completions/max_length": 866.4,
144
+ "completions/max_terminated_length": 866.4,
145
+ "completions/mean_length": 733.5812683105469,
146
+ "completions/mean_terminated_length": 733.5812683105469,
147
+ "completions/min_length": 616.2,
148
+ "completions/min_terminated_length": 616.2,
149
+ "entropy": 0.652310574054718,
150
+ "epoch": 0.007500750075007501,
151
+ "frac_reward_zero_std": 0.0,
152
+ "grad_norm": 1.078125,
153
+ "learning_rate": 8.82494004796163e-07,
154
+ "loss": -0.0002,
155
+ "num_tokens": 5104478.0,
156
+ "reward": 1.1200557351112366,
157
+ "reward_std": 0.18049246706068517,
158
+ "rewards/ADEnReward/mean": 0.032199547812342647,
159
+ "rewards/ADEnReward/std": 0.08372207283973694,
160
+ "rewards/ReasoningConfidenceReward/mean": 0.10868950486183167,
161
+ "rewards/ReasoningConfidenceReward/std": 0.09046642743051052,
162
+ "rewards/StrictFormatReward/mean": 0.9791666686534881,
163
+ "rewards/StrictFormatReward/std": 0.10964388847351074,
164
+ "step": 50,
165
+ "step_time": 34.72850414663553
166
+ },
167
+ {
168
+ "clip_ratio/high_max": 0.0,
169
+ "clip_ratio/high_mean": 0.0,
170
+ "clip_ratio/low_mean": 0.0,
171
+ "clip_ratio/low_min": 0.0,
172
+ "clip_ratio/region_mean": 0.0,
173
+ "completions/clipped_ratio": 0.0,
174
+ "completions/max_length": 820.2,
175
+ "completions/max_terminated_length": 820.2,
176
+ "completions/mean_length": 734.4875244140625,
177
+ "completions/mean_terminated_length": 734.4875244140625,
178
+ "completions/min_length": 597.2,
179
+ "completions/min_terminated_length": 597.2,
180
+ "entropy": 0.6447544604539871,
181
+ "epoch": 0.009000900090009001,
182
+ "frac_reward_zero_std": 0.0,
183
+ "grad_norm": 0.8671875,
184
+ "learning_rate": 8.585131894484412e-07,
185
+ "loss": -0.014,
186
+ "num_tokens": 6115528.0,
187
+ "reward": 1.0801176726818085,
188
+ "reward_std": 0.2073265790939331,
189
+ "rewards/ADEnReward/mean": 0.024589571449905635,
190
+ "rewards/ADEnReward/std": 0.05969331655651331,
191
+ "rewards/ReasoningConfidenceReward/mean": 0.09302806071937084,
192
+ "rewards/ReasoningConfidenceReward/std": 0.08853670731186866,
193
+ "rewards/StrictFormatReward/mean": 0.9624999940395356,
194
+ "rewards/StrictFormatReward/std": 0.22511394023895265,
195
+ "step": 60,
196
+ "step_time": 34.7001038627699
197
+ },
198
+ {
199
+ "clip_ratio/high_max": 0.0,
200
+ "clip_ratio/high_mean": 0.0,
201
+ "clip_ratio/low_mean": 0.0,
202
+ "clip_ratio/low_min": 0.0,
203
+ "clip_ratio/region_mean": 0.0,
204
+ "completions/clipped_ratio": 0.002083333395421505,
205
+ "completions/max_length": 887.8,
206
+ "completions/max_terminated_length": 880.9,
207
+ "completions/mean_length": 739.214599609375,
208
+ "completions/mean_terminated_length": 737.5825012207031,
209
+ "completions/min_length": 674.7,
210
+ "completions/min_terminated_length": 674.7,
211
+ "entropy": 0.648271444439888,
212
+ "epoch": 0.010501050105010502,
213
+ "frac_reward_zero_std": 0.0,
214
+ "grad_norm": 1.046875,
215
+ "learning_rate": 8.345323741007194e-07,
216
+ "loss": 0.0095,
217
+ "num_tokens": 7128591.0,
218
+ "reward": 1.117066776752472,
219
+ "reward_std": 0.14670775569975375,
220
+ "rewards/ADEnReward/mean": 0.030741326790302993,
221
+ "rewards/ADEnReward/std": 0.07235845774412156,
222
+ "rewards/ReasoningConfidenceReward/mean": 0.10299204997718334,
223
+ "rewards/ReasoningConfidenceReward/std": 0.07768557965755463,
224
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
225
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
226
+ "step": 70,
227
+ "step_time": 37.97550033703446
228
+ },
229
+ {
230
+ "clip_ratio/high_max": 0.0,
231
+ "clip_ratio/high_mean": 0.0,
232
+ "clip_ratio/low_mean": 0.0,
233
+ "clip_ratio/low_min": 0.0,
234
+ "clip_ratio/region_mean": 0.0,
235
+ "completions/clipped_ratio": 0.0,
236
+ "completions/max_length": 871.3,
237
+ "completions/max_terminated_length": 871.3,
238
+ "completions/mean_length": 735.6625244140625,
239
+ "completions/mean_terminated_length": 735.6625244140625,
240
+ "completions/min_length": 663.4,
241
+ "completions/min_terminated_length": 663.4,
242
+ "entropy": 0.6467163026332855,
243
+ "epoch": 0.012001200120012,
244
+ "frac_reward_zero_std": 0.0,
245
+ "grad_norm": 1.078125,
246
+ "learning_rate": 8.105515587529975e-07,
247
+ "loss": 0.0099,
248
+ "num_tokens": 8140093.0,
249
+ "reward": 1.124228584766388,
250
+ "reward_std": 0.1685192134231329,
251
+ "rewards/ADEnReward/mean": 0.03326874002814293,
252
+ "rewards/ADEnReward/std": 0.07868262981064618,
253
+ "rewards/ReasoningConfidenceReward/mean": 0.10762646868824959,
254
+ "rewards/ReasoningConfidenceReward/std": 0.08494675308465957,
255
+ "rewards/StrictFormatReward/mean": 0.9833333373069764,
256
+ "rewards/StrictFormatReward/std": 0.08077637553215027,
257
+ "step": 80,
258
+ "step_time": 35.69703020621091
259
+ },
260
+ {
261
+ "clip_ratio/high_max": 0.0,
262
+ "clip_ratio/high_mean": 0.0,
263
+ "clip_ratio/low_mean": 0.0,
264
+ "clip_ratio/low_min": 0.0,
265
+ "clip_ratio/region_mean": 0.0,
266
+ "completions/clipped_ratio": 0.0,
267
+ "completions/max_length": 799.3,
268
+ "completions/max_terminated_length": 799.3,
269
+ "completions/mean_length": 730.2937683105469,
270
+ "completions/mean_terminated_length": 730.2937683105469,
271
+ "completions/min_length": 623.3,
272
+ "completions/min_terminated_length": 623.3,
273
+ "entropy": 0.6420892357826233,
274
+ "epoch": 0.013501350135013501,
275
+ "frac_reward_zero_std": 0.0,
276
+ "grad_norm": 0.828125,
277
+ "learning_rate": 7.865707434052757e-07,
278
+ "loss": -0.0098,
279
+ "num_tokens": 9148426.0,
280
+ "reward": 1.122767400741577,
281
+ "reward_std": 0.154670562595129,
282
+ "rewards/ADEnReward/mean": 0.03107238719239831,
283
+ "rewards/ADEnReward/std": 0.07060995940119028,
284
+ "rewards/ReasoningConfidenceReward/mean": 0.10836165957152843,
285
+ "rewards/ReasoningConfidenceReward/std": 0.07862687073647975,
286
+ "rewards/StrictFormatReward/mean": 0.9833333253860473,
287
+ "rewards/StrictFormatReward/std": 0.1154700517654419,
288
+ "step": 90,
289
+ "step_time": 34.06732882745564
290
+ },
291
+ {
292
+ "clip_ratio/high_max": 0.0,
293
+ "clip_ratio/high_mean": 0.0,
294
+ "clip_ratio/low_mean": 0.0,
295
+ "clip_ratio/low_min": 0.0,
296
+ "clip_ratio/region_mean": 0.0,
297
+ "completions/clipped_ratio": 0.0,
298
+ "completions/max_length": 886.6,
299
+ "completions/max_terminated_length": 886.6,
300
+ "completions/mean_length": 732.9125305175781,
301
+ "completions/mean_terminated_length": 732.9125305175781,
302
+ "completions/min_length": 674.0,
303
+ "completions/min_terminated_length": 674.0,
304
+ "entropy": 0.6424726009368896,
305
+ "epoch": 0.015001500150015001,
306
+ "frac_reward_zero_std": 0.0,
307
+ "grad_norm": 0.921875,
308
+ "learning_rate": 7.62589928057554e-07,
309
+ "loss": 0.0087,
310
+ "num_tokens": 10158000.0,
311
+ "reward": 1.1287578463554382,
312
+ "reward_std": 0.16896428540349007,
313
+ "rewards/ADEnReward/mean": 0.042021069768816234,
314
+ "rewards/ADEnReward/std": 0.08718259073793888,
315
+ "rewards/ReasoningConfidenceReward/mean": 0.10757005885243416,
316
+ "rewards/ReasoningConfidenceReward/std": 0.0734778918325901,
317
+ "rewards/StrictFormatReward/mean": 0.9791666626930237,
318
+ "rewards/StrictFormatReward/std": 0.12699072659015656,
319
+ "step": 100,
320
+ "step_time": 35.849342082161456
321
+ },
322
+ {
323
+ "clip_ratio/high_max": 0.0,
324
+ "clip_ratio/high_mean": 0.0,
325
+ "clip_ratio/low_mean": 0.0,
326
+ "clip_ratio/low_min": 0.0,
327
+ "clip_ratio/region_mean": 0.0,
328
+ "completions/clipped_ratio": 0.002083333395421505,
329
+ "completions/max_length": 922.1,
330
+ "completions/max_terminated_length": 847.3,
331
+ "completions/mean_length": 736.8729309082031,
332
+ "completions/mean_terminated_length": 735.1981018066406,
333
+ "completions/min_length": 680.7,
334
+ "completions/min_terminated_length": 680.7,
335
+ "entropy": 0.6371437162160873,
336
+ "epoch": 0.0165016501650165,
337
+ "frac_reward_zero_std": 0.0,
338
+ "grad_norm": 0.84375,
339
+ "learning_rate": 7.386091127098321e-07,
340
+ "loss": 0.01,
341
+ "num_tokens": 11170099.0,
342
+ "reward": 1.1073094844818114,
343
+ "reward_std": 0.17044325098395346,
344
+ "rewards/ADEnReward/mean": 0.018601356376893818,
345
+ "rewards/ADEnReward/std": 0.05055182706564665,
346
+ "rewards/ReasoningConfidenceReward/mean": 0.11370811760425567,
347
+ "rewards/ReasoningConfidenceReward/std": 0.07379961647093296,
348
+ "rewards/StrictFormatReward/mean": 0.9749999940395355,
349
+ "rewards/StrictFormatReward/std": 0.15585823953151703,
350
+ "step": 110,
351
+ "step_time": 38.995268660690634
352
+ },
353
+ {
354
+ "clip_ratio/high_max": 0.0,
355
+ "clip_ratio/high_mean": 0.0,
356
+ "clip_ratio/low_mean": 0.0,
357
+ "clip_ratio/low_min": 0.0,
358
+ "clip_ratio/region_mean": 0.0,
359
+ "completions/clipped_ratio": 0.002083333395421505,
360
+ "completions/max_length": 956.2,
361
+ "completions/max_terminated_length": 884.7,
362
+ "completions/mean_length": 741.245849609375,
363
+ "completions/mean_terminated_length": 739.5868896484375,
364
+ "completions/min_length": 685.4,
365
+ "completions/min_terminated_length": 685.4,
366
+ "entropy": 0.6423951655626297,
367
+ "epoch": 0.018001800180018002,
368
+ "frac_reward_zero_std": 0.0,
369
+ "grad_norm": 1.03125,
370
+ "learning_rate": 7.146282973621102e-07,
371
+ "loss": 0.01,
372
+ "num_tokens": 12184361.0,
373
+ "reward": 1.127052104473114,
374
+ "reward_std": 0.1497463181614876,
375
+ "rewards/ADEnReward/mean": 0.029763074405491353,
376
+ "rewards/ADEnReward/std": 0.07583294808864594,
377
+ "rewards/ReasoningConfidenceReward/mean": 0.10978899747133256,
378
+ "rewards/ReasoningConfidenceReward/std": 0.08882112912833691,
379
+ "rewards/StrictFormatReward/mean": 0.9874999940395355,
380
+ "rewards/StrictFormatReward/std": 0.08660253882408142,
381
+ "step": 120,
382
+ "step_time": 39.55284757846967
383
+ },
384
+ {
385
+ "clip_ratio/high_max": 0.0,
386
+ "clip_ratio/high_mean": 0.0,
387
+ "clip_ratio/low_mean": 0.0,
388
+ "clip_ratio/low_min": 0.0,
389
+ "clip_ratio/region_mean": 0.0,
390
+ "completions/clipped_ratio": 0.002083333395421505,
391
+ "completions/max_length": 889.2,
392
+ "completions/max_terminated_length": 814.3,
393
+ "completions/mean_length": 738.2166931152344,
394
+ "completions/mean_terminated_length": 736.5271484375,
395
+ "completions/min_length": 684.2,
396
+ "completions/min_terminated_length": 684.2,
397
+ "entropy": 0.6385834395885468,
398
+ "epoch": 0.0195019501950195,
399
+ "frac_reward_zero_std": 0.0,
400
+ "grad_norm": 1.109375,
401
+ "learning_rate": 6.906474820143885e-07,
402
+ "loss": 0.0074,
403
+ "num_tokens": 13197457.0,
404
+ "reward": 1.1243727207183838,
405
+ "reward_std": 0.1370793327689171,
406
+ "rewards/ADEnReward/mean": 0.026768459612503646,
407
+ "rewards/ADEnReward/std": 0.0662717854604125,
408
+ "rewards/ReasoningConfidenceReward/mean": 0.11010420471429824,
409
+ "rewards/ReasoningConfidenceReward/std": 0.07703434824943542,
410
+ "rewards/StrictFormatReward/mean": 0.9875,
411
+ "rewards/StrictFormatReward/std": 0.06925570070743561,
412
+ "step": 130,
413
+ "step_time": 38.24893993083388
414
+ },
415
+ {
416
+ "clip_ratio/high_max": 0.0,
417
+ "clip_ratio/high_mean": 0.0,
418
+ "clip_ratio/low_mean": 0.0,
419
+ "clip_ratio/low_min": 0.0,
420
+ "clip_ratio/region_mean": 0.0,
421
+ "completions/clipped_ratio": 0.0,
422
+ "completions/max_length": 812.3,
423
+ "completions/max_terminated_length": 812.3,
424
+ "completions/mean_length": 736.0104370117188,
425
+ "completions/mean_terminated_length": 736.0104370117188,
426
+ "completions/min_length": 633.8,
427
+ "completions/min_terminated_length": 633.8,
428
+ "entropy": 0.6263688296079636,
429
+ "epoch": 0.021002100210021003,
430
+ "frac_reward_zero_std": 0.0,
431
+ "grad_norm": 0.94140625,
432
+ "learning_rate": 6.666666666666666e-07,
433
+ "loss": -0.0068,
434
+ "num_tokens": 14209414.0,
435
+ "reward": 1.1419667840003966,
436
+ "reward_std": 0.15764849670231343,
437
+ "rewards/ADEnReward/mean": 0.04068564581684768,
438
+ "rewards/ADEnReward/std": 0.08432210255414248,
439
+ "rewards/ReasoningConfidenceReward/mean": 0.11794776618480682,
440
+ "rewards/ReasoningConfidenceReward/std": 0.07809726595878601,
441
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
442
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
443
+ "step": 140,
444
+ "step_time": 34.79935124134645
445
+ },
446
+ {
447
+ "clip_ratio/high_max": 0.0,
448
+ "clip_ratio/high_mean": 0.0,
449
+ "clip_ratio/low_mean": 0.0,
450
+ "clip_ratio/low_min": 0.0,
451
+ "clip_ratio/region_mean": 0.0,
452
+ "completions/clipped_ratio": 0.0,
453
+ "completions/max_length": 809.8,
454
+ "completions/max_terminated_length": 809.8,
455
+ "completions/mean_length": 735.3583557128907,
456
+ "completions/mean_terminated_length": 735.3583557128907,
457
+ "completions/min_length": 675.4,
458
+ "completions/min_terminated_length": 675.4,
459
+ "entropy": 0.6429328173398972,
460
+ "epoch": 0.022502250225022502,
461
+ "frac_reward_zero_std": 0.0,
462
+ "grad_norm": 1.1875,
463
+ "learning_rate": 6.426858513189448e-07,
464
+ "loss": -0.0016,
465
+ "num_tokens": 15220674.0,
466
+ "reward": 1.1485553145408631,
467
+ "reward_std": 0.1378554403781891,
468
+ "rewards/ADEnReward/mean": 0.03246476505883038,
469
+ "rewards/ADEnReward/std": 0.07621528403833508,
470
+ "rewards/ReasoningConfidenceReward/mean": 0.1285905048251152,
471
+ "rewards/ReasoningConfidenceReward/std": 0.07357696481049061,
472
+ "rewards/StrictFormatReward/mean": 0.9874999940395355,
473
+ "rewards/StrictFormatReward/std": 0.08660253882408142,
474
+ "step": 150,
475
+ "step_time": 34.40164418127388
476
+ },
477
+ {
478
+ "clip_ratio/high_max": 0.0,
479
+ "clip_ratio/high_mean": 0.0,
480
+ "clip_ratio/low_mean": 0.0,
481
+ "clip_ratio/low_min": 0.0,
482
+ "clip_ratio/region_mean": 0.0,
483
+ "completions/clipped_ratio": 0.0,
484
+ "completions/max_length": 862.8,
485
+ "completions/max_terminated_length": 862.8,
486
+ "completions/mean_length": 736.8479370117187,
487
+ "completions/mean_terminated_length": 736.8479370117187,
488
+ "completions/min_length": 685.0,
489
+ "completions/min_terminated_length": 685.0,
490
+ "entropy": 0.6302657306194306,
491
+ "epoch": 0.024002400240024,
492
+ "frac_reward_zero_std": 0.0,
493
+ "grad_norm": 0.85546875,
494
+ "learning_rate": 6.187050359712231e-07,
495
+ "loss": 0.0054,
496
+ "num_tokens": 16232265.0,
497
+ "reward": 1.1301079392433167,
498
+ "reward_std": 0.12346492633223534,
499
+ "rewards/ADEnReward/mean": 0.02919836761429906,
500
+ "rewards/ADEnReward/std": 0.06346954144537449,
501
+ "rewards/ReasoningConfidenceReward/mean": 0.10924286767840385,
502
+ "rewards/ReasoningConfidenceReward/std": 0.07436333447694779,
503
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
504
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
505
+ "step": 160,
506
+ "step_time": 35.0627255375497
507
+ },
508
+ {
509
+ "clip_ratio/high_max": 0.0,
510
+ "clip_ratio/high_mean": 0.0,
511
+ "clip_ratio/low_mean": 0.0,
512
+ "clip_ratio/low_min": 0.0,
513
+ "clip_ratio/region_mean": 0.0,
514
+ "completions/clipped_ratio": 0.0,
515
+ "completions/max_length": 872.9,
516
+ "completions/max_terminated_length": 872.9,
517
+ "completions/mean_length": 733.4729309082031,
518
+ "completions/mean_terminated_length": 733.4729309082031,
519
+ "completions/min_length": 674.1,
520
+ "completions/min_terminated_length": 674.1,
521
+ "entropy": 0.6332272559404373,
522
+ "epoch": 0.025502550255025503,
523
+ "frac_reward_zero_std": 0.0,
524
+ "grad_norm": 1.4453125,
525
+ "learning_rate": 5.947242206235011e-07,
526
+ "loss": 0.0041,
527
+ "num_tokens": 17241900.0,
528
+ "reward": 1.1381949663162232,
529
+ "reward_std": 0.12290547527372837,
530
+ "rewards/ADEnReward/mean": 0.034653707128018144,
531
+ "rewards/ADEnReward/std": 0.07986385971307755,
532
+ "rewards/ReasoningConfidenceReward/mean": 0.1118745468556881,
533
+ "rewards/ReasoningConfidenceReward/std": 0.07345958650112153,
534
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
535
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
536
+ "step": 170,
537
+ "step_time": 35.15750455642119
538
+ },
539
+ {
540
+ "clip_ratio/high_max": 0.0,
541
+ "clip_ratio/high_mean": 0.0,
542
+ "clip_ratio/low_mean": 0.0,
543
+ "clip_ratio/low_min": 0.0,
544
+ "clip_ratio/region_mean": 0.0,
545
+ "completions/clipped_ratio": 0.0,
546
+ "completions/max_length": 802.8,
547
+ "completions/max_terminated_length": 802.8,
548
+ "completions/mean_length": 735.7437683105469,
549
+ "completions/mean_terminated_length": 735.7437683105469,
550
+ "completions/min_length": 679.2,
551
+ "completions/min_terminated_length": 679.2,
552
+ "entropy": 0.6316021621227265,
553
+ "epoch": 0.027002700270027002,
554
+ "frac_reward_zero_std": 0.0,
555
+ "grad_norm": 0.9921875,
556
+ "learning_rate": 5.707434052757793e-07,
557
+ "loss": -0.0027,
558
+ "num_tokens": 18253441.0,
559
+ "reward": 1.1503783106803893,
560
+ "reward_std": 0.13333264142274856,
561
+ "rewards/ADEnReward/mean": 0.03611529269255698,
562
+ "rewards/ADEnReward/std": 0.08335062861442566,
563
+ "rewards/ReasoningConfidenceReward/mean": 0.12676299437880517,
564
+ "rewards/ReasoningConfidenceReward/std": 0.07276010811328888,
565
+ "rewards/StrictFormatReward/mean": 0.9875,
566
+ "rewards/StrictFormatReward/std": 0.06925570070743561,
567
+ "step": 180,
568
+ "step_time": 34.76130234096199
569
+ },
570
+ {
571
+ "clip_ratio/high_max": 0.0,
572
+ "clip_ratio/high_mean": 0.0,
573
+ "clip_ratio/low_mean": 0.0,
574
+ "clip_ratio/low_min": 0.0,
575
+ "clip_ratio/region_mean": 0.0,
576
+ "completions/clipped_ratio": 0.0,
577
+ "completions/max_length": 826.4,
578
+ "completions/max_terminated_length": 826.4,
579
+ "completions/mean_length": 736.7250183105468,
580
+ "completions/mean_terminated_length": 736.7250183105468,
581
+ "completions/min_length": 682.4,
582
+ "completions/min_terminated_length": 682.4,
583
+ "entropy": 0.6254515618085861,
584
+ "epoch": 0.028502850285028504,
585
+ "frac_reward_zero_std": 0.0,
586
+ "grad_norm": 1.2734375,
587
+ "learning_rate": 5.467625899280576e-07,
588
+ "loss": -0.0044,
589
+ "num_tokens": 19265725.0,
590
+ "reward": 1.1711225748062133,
591
+ "reward_std": 0.0973996564745903,
592
+ "rewards/ADEnReward/mean": 0.03408026825636625,
593
+ "rewards/ADEnReward/std": 0.07690504901111125,
594
+ "rewards/ReasoningConfidenceReward/mean": 0.13704225420951843,
595
+ "rewards/ReasoningConfidenceReward/std": 0.06723648384213447,
596
+ "rewards/StrictFormatReward/mean": 1.0,
597
+ "rewards/StrictFormatReward/std": 0.0,
598
+ "step": 190,
599
+ "step_time": 34.57357950732112
600
+ },
601
+ {
602
+ "clip_ratio/high_max": 0.0,
603
+ "clip_ratio/high_mean": 0.0,
604
+ "clip_ratio/low_mean": 0.0,
605
+ "clip_ratio/low_min": 0.0,
606
+ "clip_ratio/region_mean": 0.0,
607
+ "completions/clipped_ratio": 0.0,
608
+ "completions/max_length": 805.0,
609
+ "completions/max_terminated_length": 805.0,
610
+ "completions/mean_length": 734.5750183105469,
611
+ "completions/mean_terminated_length": 734.5750183105469,
612
+ "completions/min_length": 678.8,
613
+ "completions/min_terminated_length": 678.8,
614
+ "entropy": 0.639057207107544,
615
+ "epoch": 0.030003000300030003,
616
+ "frac_reward_zero_std": 0.0,
617
+ "grad_norm": 1.4765625,
618
+ "learning_rate": 5.227817745803357e-07,
619
+ "loss": 0.0011,
620
+ "num_tokens": 20276305.0,
621
+ "reward": 1.1531866073608399,
622
+ "reward_std": 0.11019677557051182,
623
+ "rewards/ADEnReward/mean": 0.028415630990639328,
624
+ "rewards/ADEnReward/std": 0.0590023357886821,
625
+ "rewards/ReasoningConfidenceReward/mean": 0.13310426697134972,
626
+ "rewards/ReasoningConfidenceReward/std": 0.0671043038368225,
627
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
628
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
629
+ "step": 200,
630
+ "step_time": 33.87812012191862
631
+ }
632
+ ],
633
+ "logging_steps": 10,
634
+ "max_steps": 417,
635
+ "num_input_tokens_seen": 20276305,
636
+ "num_train_epochs": 1,
637
+ "save_steps": 100,
638
+ "stateful_callbacks": {
639
+ "TrainerControl": {
640
+ "args": {
641
+ "should_epoch_stop": false,
642
+ "should_evaluate": false,
643
+ "should_log": false,
644
+ "should_save": true,
645
+ "should_training_stop": false
646
+ },
647
+ "attributes": {}
648
+ }
649
+ },
650
+ "total_flos": 0.0,
651
+ "train_batch_size": 6,
652
+ "trial_name": null,
653
+ "trial_params": null
654
+ }
grpo-nADE-format-RC/checkpoint-300/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
grpo-nADE-format-RC/checkpoint-300/chat_template.jinja ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- else %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if do_vision_count %}
10
+ {%- set image_count.value = image_count.value + 1 %}
11
+ {%- endif %}
12
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
13
+ <|vision_start|><|image_pad|><|vision_end|>
14
+ {%- elif 'video' in item or item.type == 'video' %}
15
+ {%- if do_vision_count %}
16
+ {%- set video_count.value = video_count.value + 1 %}
17
+ {%- endif %}
18
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
19
+ <|vision_start|><|video_pad|><|vision_end|>
20
+ {%- elif 'text' in item %}
21
+ {{- item.text }}
22
+ {%- endif %}
23
+ {%- endfor %}
24
+ {%- endif %}
25
+ {%- endmacro %}
26
+ {%- if tools %}
27
+ {{- '<|im_start|>system\n' }}
28
+ {%- if messages[0].role == 'system' %}
29
+ {{- render_content(messages[0].content, false) + '\n\n' }}
30
+ {%- endif %}
31
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
32
+ {%- for tool in tools %}
33
+ {{- "\n" }}
34
+ {{- tool | tojson }}
35
+ {%- endfor %}
36
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
37
+ {%- else %}
38
+ {%- if messages[0].role == 'system' %}
39
+ {{- '<|im_start|>system\n' + render_content(messages[0].content, false) + '<|im_end|>\n' }}
40
+ {%- endif %}
41
+ {%- endif %}
42
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
43
+ {%- for message in messages[::-1] %}
44
+ {%- set index = (messages|length - 1) - loop.index0 %}
45
+ {%- if ns.multi_step_tool and message.role == "user" %}
46
+ {%- set content = render_content(message.content, false) %}
47
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
48
+ {%- set ns.multi_step_tool = false %}
49
+ {%- set ns.last_query_index = index %}
50
+ {%- endif %}
51
+ {%- endif %}
52
+ {%- endfor %}
53
+ {%- for message in messages %}
54
+ {%- set content = render_content(message.content, True) %}
55
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
56
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
57
+ {%- elif message.role == "assistant" %}
58
+ {%- set reasoning_content = '' %}
59
+ {%- if message.reasoning_content is string %}
60
+ {%- set reasoning_content = message.reasoning_content %}
61
+ {%- else %}
62
+ {%- if '</think>' in content %}
63
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
64
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- if loop.index0 > ns.last_query_index %}
68
+ {%- if loop.last or (not loop.last and reasoning_content) %}
69
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
70
+ {%- else %}
71
+ {{- '<|im_start|>' + message.role + '\n' + content }}
72
+ {%- endif %}
73
+ {%- else %}
74
+ {{- '<|im_start|>' + message.role + '\n' + content }}
75
+ {%- endif %}
76
+ {%- if message.tool_calls %}
77
+ {%- for tool_call in message.tool_calls %}
78
+ {%- if (loop.first and content) or (not loop.first) %}
79
+ {{- '\n' }}
80
+ {%- endif %}
81
+ {%- if tool_call.function %}
82
+ {%- set tool_call = tool_call.function %}
83
+ {%- endif %}
84
+ {{- '<tool_call>\n{"name": "' }}
85
+ {{- tool_call.name }}
86
+ {{- '", "arguments": ' }}
87
+ {%- if tool_call.arguments is string %}
88
+ {{- tool_call.arguments }}
89
+ {%- else %}
90
+ {{- tool_call.arguments | tojson }}
91
+ {%- endif %}
92
+ {{- '}\n</tool_call>' }}
93
+ {%- endfor %}
94
+ {%- endif %}
95
+ {{- '<|im_end|>\n' }}
96
+ {%- elif message.role == "tool" %}
97
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
98
+ {{- '<|im_start|>user' }}
99
+ {%- endif %}
100
+ {{- '\n<tool_response>\n' }}
101
+ {{- content }}
102
+ {{- '\n</tool_response>' }}
103
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
104
+ {{- '<|im_end|>\n' }}
105
+ {%- endif %}
106
+ {%- endif %}
107
+ {%- endfor %}
108
+ {%- if add_generation_prompt %}
109
+ {{- '<|im_start|>assistant\n' }}
110
+ {%- endif %}
grpo-nADE-format-RC/checkpoint-300/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3VLForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "eos_token_id": 151645,
7
+ "image_token_id": 151655,
8
+ "model_type": "qwen3_vl",
9
+ "pad_token_id": 151643,
10
+ "text_config": {
11
+ "attention_bias": false,
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 151643,
14
+ "dtype": "bfloat16",
15
+ "eos_token_id": 151645,
16
+ "head_dim": 128,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 2560,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 9728,
21
+ "max_position_embeddings": 262144,
22
+ "model_type": "qwen3_vl_text",
23
+ "num_attention_heads": 32,
24
+ "num_hidden_layers": 36,
25
+ "num_key_value_heads": 8,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_scaling": {
28
+ "mrope_interleaved": true,
29
+ "mrope_section": [
30
+ 24,
31
+ 20,
32
+ 20
33
+ ],
34
+ "rope_type": "default"
35
+ },
36
+ "rope_theta": 5000000,
37
+ "tie_word_embeddings": true,
38
+ "use_cache": true,
39
+ "vocab_size": 151936
40
+ },
41
+ "tie_word_embeddings": true,
42
+ "transformers_version": "4.57.6",
43
+ "use_cache": false,
44
+ "video_token_id": 151656,
45
+ "vision_config": {
46
+ "deepstack_visual_indexes": [
47
+ 5,
48
+ 11,
49
+ 17
50
+ ],
51
+ "depth": 24,
52
+ "dtype": "bfloat16",
53
+ "hidden_act": "gelu_pytorch_tanh",
54
+ "hidden_size": 1024,
55
+ "in_channels": 3,
56
+ "initializer_range": 0.02,
57
+ "intermediate_size": 4096,
58
+ "model_type": "qwen3_vl",
59
+ "num_heads": 16,
60
+ "num_position_embeddings": 2304,
61
+ "out_hidden_size": 2560,
62
+ "patch_size": 16,
63
+ "spatial_merge_size": 2,
64
+ "temporal_patch_size": 2
65
+ },
66
+ "vision_end_token_id": 151653,
67
+ "vision_start_token_id": 151652
68
+ }
grpo-nADE-format-RC/checkpoint-300/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "top_k": 20,
10
+ "top_p": 0.95,
11
+ "transformers_version": "4.57.6"
12
+ }
grpo-nADE-format-RC/checkpoint-300/model.safetensors.index.json ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 4437815808,
4
+ "total_size": 8875631616
5
+ },
6
+ "weight_map": {
7
+ "model.language_model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "model.language_model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.language_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.language_model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "model.language_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "model.language_model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
14
+ "model.language_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.language_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
16
+ "model.language_model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
17
+ "model.language_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
18
+ "model.language_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.language_model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
20
+ "model.language_model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.language_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
22
+ "model.language_model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.language_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
24
+ "model.language_model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
25
+ "model.language_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.language_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
27
+ "model.language_model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
28
+ "model.language_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.language_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
30
+ "model.language_model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
31
+ "model.language_model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
32
+ "model.language_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.language_model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.language_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
35
+ "model.language_model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
36
+ "model.language_model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.language_model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.language_model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
39
+ "model.language_model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
40
+ "model.language_model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.language_model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
42
+ "model.language_model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.language_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
44
+ "model.language_model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
45
+ "model.language_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
46
+ "model.language_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
47
+ "model.language_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
48
+ "model.language_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
49
+ "model.language_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
50
+ "model.language_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.language_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.language_model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
53
+ "model.language_model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
54
+ "model.language_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.language_model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.language_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
57
+ "model.language_model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
58
+ "model.language_model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.language_model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
+ "model.language_model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
61
+ "model.language_model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.language_model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.language_model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
64
+ "model.language_model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.language_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
66
+ "model.language_model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
67
+ "model.language_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
68
+ "model.language_model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
69
+ "model.language_model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.language_model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.language_model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
72
+ "model.language_model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
73
+ "model.language_model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.language_model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
75
+ "model.language_model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
76
+ "model.language_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.language_model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
78
+ "model.language_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
79
+ "model.language_model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
80
+ "model.language_model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
81
+ "model.language_model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.language_model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
83
+ "model.language_model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
84
+ "model.language_model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
85
+ "model.language_model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
86
+ "model.language_model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.language_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.language_model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.language_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "model.language_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
91
+ "model.language_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
92
+ "model.language_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.language_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
94
+ "model.language_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.language_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
96
+ "model.language_model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
97
+ "model.language_model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
98
+ "model.language_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
99
+ "model.language_model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.language_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
101
+ "model.language_model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
102
+ "model.language_model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
103
+ "model.language_model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.language_model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
105
+ "model.language_model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.language_model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.language_model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
108
+ "model.language_model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
109
+ "model.language_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
110
+ "model.language_model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
111
+ "model.language_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
112
+ "model.language_model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
113
+ "model.language_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
114
+ "model.language_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
115
+ "model.language_model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
116
+ "model.language_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
117
+ "model.language_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
118
+ "model.language_model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
119
+ "model.language_model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
120
+ "model.language_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
121
+ "model.language_model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
122
+ "model.language_model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
123
+ "model.language_model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
124
+ "model.language_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
125
+ "model.language_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
126
+ "model.language_model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
127
+ "model.language_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
128
+ "model.language_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
129
+ "model.language_model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
130
+ "model.language_model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
131
+ "model.language_model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
132
+ "model.language_model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
133
+ "model.language_model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
134
+ "model.language_model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
135
+ "model.language_model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
136
+ "model.language_model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
137
+ "model.language_model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
138
+ "model.language_model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
139
+ "model.language_model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
140
+ "model.language_model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
141
+ "model.language_model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
142
+ "model.language_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
143
+ "model.language_model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
144
+ "model.language_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
145
+ "model.language_model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
146
+ "model.language_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
147
+ "model.language_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
148
+ "model.language_model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
149
+ "model.language_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
150
+ "model.language_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.language_model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
152
+ "model.language_model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
153
+ "model.language_model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
154
+ "model.language_model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
155
+ "model.language_model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
156
+ "model.language_model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
157
+ "model.language_model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
158
+ "model.language_model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
159
+ "model.language_model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
160
+ "model.language_model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
161
+ "model.language_model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
162
+ "model.language_model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
163
+ "model.language_model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
164
+ "model.language_model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
165
+ "model.language_model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
166
+ "model.language_model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
167
+ "model.language_model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
168
+ "model.language_model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
169
+ "model.language_model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
170
+ "model.language_model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
171
+ "model.language_model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
172
+ "model.language_model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
173
+ "model.language_model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
174
+ "model.language_model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
175
+ "model.language_model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
176
+ "model.language_model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.language_model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
178
+ "model.language_model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
179
+ "model.language_model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
180
+ "model.language_model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
181
+ "model.language_model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
182
+ "model.language_model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
183
+ "model.language_model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
184
+ "model.language_model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
185
+ "model.language_model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
186
+ "model.language_model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
187
+ "model.language_model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
188
+ "model.language_model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
189
+ "model.language_model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
190
+ "model.language_model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.language_model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
192
+ "model.language_model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
193
+ "model.language_model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.language_model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.language_model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
196
+ "model.language_model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.language_model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
198
+ "model.language_model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
199
+ "model.language_model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
200
+ "model.language_model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
201
+ "model.language_model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
202
+ "model.language_model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
203
+ "model.language_model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
204
+ "model.language_model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
+ "model.language_model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
206
+ "model.language_model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
207
+ "model.language_model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
208
+ "model.language_model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
209
+ "model.language_model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
210
+ "model.language_model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
211
+ "model.language_model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
212
+ "model.language_model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
213
+ "model.language_model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
214
+ "model.language_model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
215
+ "model.language_model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
216
+ "model.language_model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
217
+ "model.language_model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
218
+ "model.language_model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.language_model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
220
+ "model.language_model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
221
+ "model.language_model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
222
+ "model.language_model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
223
+ "model.language_model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
224
+ "model.language_model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
225
+ "model.language_model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
226
+ "model.language_model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
227
+ "model.language_model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
228
+ "model.language_model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
229
+ "model.language_model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
230
+ "model.language_model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
231
+ "model.language_model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
232
+ "model.language_model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
233
+ "model.language_model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
234
+ "model.language_model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
235
+ "model.language_model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
236
+ "model.language_model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
237
+ "model.language_model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
238
+ "model.language_model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
239
+ "model.language_model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
240
+ "model.language_model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
241
+ "model.language_model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
242
+ "model.language_model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
243
+ "model.language_model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
244
+ "model.language_model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
245
+ "model.language_model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
246
+ "model.language_model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
247
+ "model.language_model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
248
+ "model.language_model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
249
+ "model.language_model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
250
+ "model.language_model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
251
+ "model.language_model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
252
+ "model.language_model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
253
+ "model.language_model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
254
+ "model.language_model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
255
+ "model.language_model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
256
+ "model.language_model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
257
+ "model.language_model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
258
+ "model.language_model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
259
+ "model.language_model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
260
+ "model.language_model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
261
+ "model.language_model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
262
+ "model.language_model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
263
+ "model.language_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
264
+ "model.language_model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
265
+ "model.language_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
266
+ "model.language_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
267
+ "model.language_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
268
+ "model.language_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
269
+ "model.language_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
270
+ "model.language_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
271
+ "model.language_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
272
+ "model.language_model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
273
+ "model.language_model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
274
+ "model.language_model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
275
+ "model.language_model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
276
+ "model.language_model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
277
+ "model.language_model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
278
+ "model.language_model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
279
+ "model.language_model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
280
+ "model.language_model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
281
+ "model.language_model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
282
+ "model.language_model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
283
+ "model.language_model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
284
+ "model.language_model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
285
+ "model.language_model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
286
+ "model.language_model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
287
+ "model.language_model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
288
+ "model.language_model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
289
+ "model.language_model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
290
+ "model.language_model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
291
+ "model.language_model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
292
+ "model.language_model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
293
+ "model.language_model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
294
+ "model.language_model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
295
+ "model.language_model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
296
+ "model.language_model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
297
+ "model.language_model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
298
+ "model.language_model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
299
+ "model.language_model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
300
+ "model.language_model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
301
+ "model.language_model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
302
+ "model.language_model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
303
+ "model.language_model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
304
+ "model.language_model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
305
+ "model.language_model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
306
+ "model.language_model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
307
+ "model.language_model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
308
+ "model.language_model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
309
+ "model.language_model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
310
+ "model.language_model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
311
+ "model.language_model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
312
+ "model.language_model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
313
+ "model.language_model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
314
+ "model.language_model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
315
+ "model.language_model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
316
+ "model.language_model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
317
+ "model.language_model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
318
+ "model.language_model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
319
+ "model.language_model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
320
+ "model.language_model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
321
+ "model.language_model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
322
+ "model.language_model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
323
+ "model.language_model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
324
+ "model.language_model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
325
+ "model.language_model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
326
+ "model.language_model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
327
+ "model.language_model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
328
+ "model.language_model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
329
+ "model.language_model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
330
+ "model.language_model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
331
+ "model.language_model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
332
+ "model.language_model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
333
+ "model.language_model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
334
+ "model.language_model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
335
+ "model.language_model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
336
+ "model.language_model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
337
+ "model.language_model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
338
+ "model.language_model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
339
+ "model.language_model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
340
+ "model.language_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
341
+ "model.language_model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
342
+ "model.language_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
343
+ "model.language_model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
344
+ "model.language_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
345
+ "model.language_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
346
+ "model.language_model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
347
+ "model.language_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
348
+ "model.language_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
349
+ "model.language_model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
350
+ "model.language_model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
351
+ "model.language_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
352
+ "model.language_model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
353
+ "model.language_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
354
+ "model.language_model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
355
+ "model.language_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
356
+ "model.language_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
357
+ "model.language_model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
358
+ "model.language_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
359
+ "model.language_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
360
+ "model.language_model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
361
+ "model.language_model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
362
+ "model.language_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
363
+ "model.language_model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
364
+ "model.language_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
365
+ "model.language_model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
366
+ "model.language_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
367
+ "model.language_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
368
+ "model.language_model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
369
+ "model.language_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
370
+ "model.language_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
371
+ "model.language_model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
372
+ "model.language_model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
373
+ "model.language_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
374
+ "model.language_model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
375
+ "model.language_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
376
+ "model.language_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
377
+ "model.language_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
378
+ "model.language_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
379
+ "model.language_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
380
+ "model.language_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
381
+ "model.language_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
382
+ "model.language_model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
383
+ "model.language_model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
384
+ "model.language_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
385
+ "model.language_model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
386
+ "model.language_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
387
+ "model.language_model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
388
+ "model.language_model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
389
+ "model.language_model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
390
+ "model.language_model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
391
+ "model.language_model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
392
+ "model.language_model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
393
+ "model.language_model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
394
+ "model.language_model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
395
+ "model.language_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
396
+ "model.language_model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
397
+ "model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
398
+ "model.language_model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
399
+ "model.language_model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
400
+ "model.language_model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
401
+ "model.language_model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
402
+ "model.language_model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
403
+ "model.language_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
404
+ "model.language_model.norm.weight": "model-00002-of-00002.safetensors",
405
+ "model.visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
406
+ "model.visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
407
+ "model.visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
408
+ "model.visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
409
+ "model.visual.blocks.0.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
410
+ "model.visual.blocks.0.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
411
+ "model.visual.blocks.0.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
412
+ "model.visual.blocks.0.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
413
+ "model.visual.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
414
+ "model.visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
415
+ "model.visual.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
416
+ "model.visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
417
+ "model.visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
418
+ "model.visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
419
+ "model.visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
420
+ "model.visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
421
+ "model.visual.blocks.1.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
422
+ "model.visual.blocks.1.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
423
+ "model.visual.blocks.1.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
424
+ "model.visual.blocks.1.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
425
+ "model.visual.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
426
+ "model.visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
427
+ "model.visual.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
428
+ "model.visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
429
+ "model.visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
430
+ "model.visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
431
+ "model.visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
432
+ "model.visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
433
+ "model.visual.blocks.10.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
434
+ "model.visual.blocks.10.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
435
+ "model.visual.blocks.10.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
436
+ "model.visual.blocks.10.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
437
+ "model.visual.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
438
+ "model.visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
439
+ "model.visual.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
440
+ "model.visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
441
+ "model.visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
442
+ "model.visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
443
+ "model.visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
444
+ "model.visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
445
+ "model.visual.blocks.11.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
446
+ "model.visual.blocks.11.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
447
+ "model.visual.blocks.11.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
448
+ "model.visual.blocks.11.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
449
+ "model.visual.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
450
+ "model.visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
451
+ "model.visual.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
452
+ "model.visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
453
+ "model.visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
454
+ "model.visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
455
+ "model.visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
456
+ "model.visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
457
+ "model.visual.blocks.12.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
458
+ "model.visual.blocks.12.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
459
+ "model.visual.blocks.12.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
460
+ "model.visual.blocks.12.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
461
+ "model.visual.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
462
+ "model.visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
463
+ "model.visual.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
464
+ "model.visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
465
+ "model.visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
466
+ "model.visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
467
+ "model.visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
468
+ "model.visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
469
+ "model.visual.blocks.13.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
470
+ "model.visual.blocks.13.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
471
+ "model.visual.blocks.13.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
472
+ "model.visual.blocks.13.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
473
+ "model.visual.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
474
+ "model.visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
475
+ "model.visual.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
476
+ "model.visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
477
+ "model.visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
478
+ "model.visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
479
+ "model.visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
480
+ "model.visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
481
+ "model.visual.blocks.14.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
482
+ "model.visual.blocks.14.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
483
+ "model.visual.blocks.14.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
484
+ "model.visual.blocks.14.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
485
+ "model.visual.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
486
+ "model.visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
487
+ "model.visual.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
488
+ "model.visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
489
+ "model.visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
490
+ "model.visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
491
+ "model.visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
492
+ "model.visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
493
+ "model.visual.blocks.15.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
494
+ "model.visual.blocks.15.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
495
+ "model.visual.blocks.15.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
496
+ "model.visual.blocks.15.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
497
+ "model.visual.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
498
+ "model.visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
499
+ "model.visual.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
500
+ "model.visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
501
+ "model.visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
502
+ "model.visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
503
+ "model.visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
504
+ "model.visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
505
+ "model.visual.blocks.16.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
506
+ "model.visual.blocks.16.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
507
+ "model.visual.blocks.16.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
508
+ "model.visual.blocks.16.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
509
+ "model.visual.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
510
+ "model.visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
511
+ "model.visual.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
512
+ "model.visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
513
+ "model.visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
514
+ "model.visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
515
+ "model.visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
516
+ "model.visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
517
+ "model.visual.blocks.17.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
518
+ "model.visual.blocks.17.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
519
+ "model.visual.blocks.17.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
520
+ "model.visual.blocks.17.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
521
+ "model.visual.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
522
+ "model.visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
523
+ "model.visual.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
524
+ "model.visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
525
+ "model.visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
526
+ "model.visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
527
+ "model.visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
528
+ "model.visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
529
+ "model.visual.blocks.18.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
530
+ "model.visual.blocks.18.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
531
+ "model.visual.blocks.18.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
532
+ "model.visual.blocks.18.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
533
+ "model.visual.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
534
+ "model.visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
535
+ "model.visual.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
536
+ "model.visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
537
+ "model.visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
538
+ "model.visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
539
+ "model.visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
540
+ "model.visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
541
+ "model.visual.blocks.19.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
542
+ "model.visual.blocks.19.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
543
+ "model.visual.blocks.19.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
544
+ "model.visual.blocks.19.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
545
+ "model.visual.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
546
+ "model.visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
547
+ "model.visual.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
548
+ "model.visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
549
+ "model.visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
550
+ "model.visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
551
+ "model.visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
552
+ "model.visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
553
+ "model.visual.blocks.2.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
554
+ "model.visual.blocks.2.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
555
+ "model.visual.blocks.2.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
556
+ "model.visual.blocks.2.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
557
+ "model.visual.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
558
+ "model.visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
559
+ "model.visual.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
560
+ "model.visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
561
+ "model.visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
562
+ "model.visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
563
+ "model.visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
564
+ "model.visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
565
+ "model.visual.blocks.20.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
566
+ "model.visual.blocks.20.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
567
+ "model.visual.blocks.20.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
568
+ "model.visual.blocks.20.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
569
+ "model.visual.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
570
+ "model.visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
571
+ "model.visual.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
572
+ "model.visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
573
+ "model.visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
574
+ "model.visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
575
+ "model.visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
576
+ "model.visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
577
+ "model.visual.blocks.21.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
578
+ "model.visual.blocks.21.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
579
+ "model.visual.blocks.21.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
580
+ "model.visual.blocks.21.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
581
+ "model.visual.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
582
+ "model.visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
583
+ "model.visual.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
584
+ "model.visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
585
+ "model.visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
586
+ "model.visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
587
+ "model.visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
588
+ "model.visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
589
+ "model.visual.blocks.22.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
590
+ "model.visual.blocks.22.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
591
+ "model.visual.blocks.22.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
592
+ "model.visual.blocks.22.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
593
+ "model.visual.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
594
+ "model.visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
595
+ "model.visual.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
596
+ "model.visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
597
+ "model.visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
598
+ "model.visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
599
+ "model.visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
600
+ "model.visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
601
+ "model.visual.blocks.23.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
602
+ "model.visual.blocks.23.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
603
+ "model.visual.blocks.23.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
604
+ "model.visual.blocks.23.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
605
+ "model.visual.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
606
+ "model.visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
607
+ "model.visual.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
608
+ "model.visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
609
+ "model.visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
610
+ "model.visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
611
+ "model.visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
612
+ "model.visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
613
+ "model.visual.blocks.3.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
614
+ "model.visual.blocks.3.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
615
+ "model.visual.blocks.3.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
616
+ "model.visual.blocks.3.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
617
+ "model.visual.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
618
+ "model.visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
619
+ "model.visual.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
620
+ "model.visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
621
+ "model.visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
622
+ "model.visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
623
+ "model.visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
624
+ "model.visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
625
+ "model.visual.blocks.4.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
626
+ "model.visual.blocks.4.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
627
+ "model.visual.blocks.4.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
628
+ "model.visual.blocks.4.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
629
+ "model.visual.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
630
+ "model.visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
631
+ "model.visual.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
632
+ "model.visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
633
+ "model.visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
634
+ "model.visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
635
+ "model.visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
636
+ "model.visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
637
+ "model.visual.blocks.5.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
638
+ "model.visual.blocks.5.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
639
+ "model.visual.blocks.5.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
640
+ "model.visual.blocks.5.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
641
+ "model.visual.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
642
+ "model.visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
643
+ "model.visual.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
644
+ "model.visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
645
+ "model.visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
646
+ "model.visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
647
+ "model.visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
648
+ "model.visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
649
+ "model.visual.blocks.6.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
650
+ "model.visual.blocks.6.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
651
+ "model.visual.blocks.6.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
652
+ "model.visual.blocks.6.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
653
+ "model.visual.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
654
+ "model.visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
655
+ "model.visual.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
656
+ "model.visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
657
+ "model.visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
658
+ "model.visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
659
+ "model.visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
660
+ "model.visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
661
+ "model.visual.blocks.7.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
662
+ "model.visual.blocks.7.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
663
+ "model.visual.blocks.7.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
664
+ "model.visual.blocks.7.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
665
+ "model.visual.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
666
+ "model.visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
667
+ "model.visual.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
668
+ "model.visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
669
+ "model.visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
670
+ "model.visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
671
+ "model.visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
672
+ "model.visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
673
+ "model.visual.blocks.8.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
674
+ "model.visual.blocks.8.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
675
+ "model.visual.blocks.8.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
676
+ "model.visual.blocks.8.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
677
+ "model.visual.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
678
+ "model.visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
679
+ "model.visual.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
680
+ "model.visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
681
+ "model.visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
682
+ "model.visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
683
+ "model.visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
684
+ "model.visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
685
+ "model.visual.blocks.9.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
686
+ "model.visual.blocks.9.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
687
+ "model.visual.blocks.9.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
688
+ "model.visual.blocks.9.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
689
+ "model.visual.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
690
+ "model.visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
691
+ "model.visual.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
692
+ "model.visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
693
+ "model.visual.deepstack_merger_list.0.linear_fc1.bias": "model-00001-of-00002.safetensors",
694
+ "model.visual.deepstack_merger_list.0.linear_fc1.weight": "model-00001-of-00002.safetensors",
695
+ "model.visual.deepstack_merger_list.0.linear_fc2.bias": "model-00001-of-00002.safetensors",
696
+ "model.visual.deepstack_merger_list.0.linear_fc2.weight": "model-00001-of-00002.safetensors",
697
+ "model.visual.deepstack_merger_list.0.norm.bias": "model-00001-of-00002.safetensors",
698
+ "model.visual.deepstack_merger_list.0.norm.weight": "model-00001-of-00002.safetensors",
699
+ "model.visual.deepstack_merger_list.1.linear_fc1.bias": "model-00001-of-00002.safetensors",
700
+ "model.visual.deepstack_merger_list.1.linear_fc1.weight": "model-00001-of-00002.safetensors",
701
+ "model.visual.deepstack_merger_list.1.linear_fc2.bias": "model-00001-of-00002.safetensors",
702
+ "model.visual.deepstack_merger_list.1.linear_fc2.weight": "model-00001-of-00002.safetensors",
703
+ "model.visual.deepstack_merger_list.1.norm.bias": "model-00001-of-00002.safetensors",
704
+ "model.visual.deepstack_merger_list.1.norm.weight": "model-00001-of-00002.safetensors",
705
+ "model.visual.deepstack_merger_list.2.linear_fc1.bias": "model-00001-of-00002.safetensors",
706
+ "model.visual.deepstack_merger_list.2.linear_fc1.weight": "model-00001-of-00002.safetensors",
707
+ "model.visual.deepstack_merger_list.2.linear_fc2.bias": "model-00001-of-00002.safetensors",
708
+ "model.visual.deepstack_merger_list.2.linear_fc2.weight": "model-00001-of-00002.safetensors",
709
+ "model.visual.deepstack_merger_list.2.norm.bias": "model-00001-of-00002.safetensors",
710
+ "model.visual.deepstack_merger_list.2.norm.weight": "model-00001-of-00002.safetensors",
711
+ "model.visual.merger.linear_fc1.bias": "model-00001-of-00002.safetensors",
712
+ "model.visual.merger.linear_fc1.weight": "model-00001-of-00002.safetensors",
713
+ "model.visual.merger.linear_fc2.bias": "model-00001-of-00002.safetensors",
714
+ "model.visual.merger.linear_fc2.weight": "model-00001-of-00002.safetensors",
715
+ "model.visual.merger.norm.bias": "model-00001-of-00002.safetensors",
716
+ "model.visual.merger.norm.weight": "model-00001-of-00002.safetensors",
717
+ "model.visual.patch_embed.proj.bias": "model-00001-of-00002.safetensors",
718
+ "model.visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors",
719
+ "model.visual.pos_embed.weight": "model-00001-of-00002.safetensors"
720
+ }
721
+ }
grpo-nADE-format-RC/checkpoint-300/tokenizer_config.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "max_length": null,
235
+ "model_max_length": 262144,
236
+ "pad_to_multiple_of": null,
237
+ "pad_token": "<|endoftext|>",
238
+ "pad_token_type_id": 0,
239
+ "padding_side": "left",
240
+ "processor_class": "Qwen3VLProcessor",
241
+ "split_special_tokens": false,
242
+ "tokenizer_class": "Qwen2Tokenizer",
243
+ "unk_token": null
244
+ }
grpo-nADE-format-RC/checkpoint-300/trainer_state.json ADDED
@@ -0,0 +1,964 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.045004500450045004,
6
+ "eval_steps": 100,
7
+ "global_step": 300,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "clip_ratio/high_max": 0.0,
14
+ "clip_ratio/high_mean": 0.0,
15
+ "clip_ratio/low_mean": 0.0,
16
+ "clip_ratio/low_min": 0.0,
17
+ "clip_ratio/region_mean": 0.0,
18
+ "completions/clipped_ratio": 0.03750000149011612,
19
+ "completions/max_length": 1533.5,
20
+ "completions/max_terminated_length": 1427.6,
21
+ "completions/mean_length": 820.7687622070313,
22
+ "completions/mean_terminated_length": 795.4251159667969,
23
+ "completions/min_length": 119.1,
24
+ "completions/min_terminated_length": 119.1,
25
+ "entropy": 0.6458343416452408,
26
+ "epoch": 0.0015001500150015,
27
+ "frac_reward_zero_std": 0.0,
28
+ "grad_norm": 1.0625,
29
+ "learning_rate": 9.784172661870503e-07,
30
+ "loss": 0.111,
31
+ "num_tokens": 1052641.0,
32
+ "reward": 0.15892810765653848,
33
+ "reward_std": 1.2357513666152955,
34
+ "rewards/ADEnReward/mean": 0.011186909227399155,
35
+ "rewards/ADEnReward/std": 0.03507473061326891,
36
+ "rewards/ReasoningConfidenceReward/mean": -0.19809213168919088,
37
+ "rewards/ReasoningConfidenceReward/std": 0.45118741542100904,
38
+ "rewards/StrictFormatReward/mean": 0.345833333209157,
39
+ "rewards/StrictFormatReward/std": 0.8626526802778244,
40
+ "step": 10,
41
+ "step_time": 66.8766707284376
42
+ },
43
+ {
44
+ "clip_ratio/high_max": 0.0,
45
+ "clip_ratio/high_mean": 0.0,
46
+ "clip_ratio/low_mean": 0.0,
47
+ "clip_ratio/low_min": 0.0,
48
+ "clip_ratio/region_mean": 0.0,
49
+ "completions/clipped_ratio": 0.01875000037252903,
50
+ "completions/max_length": 1258.4,
51
+ "completions/max_terminated_length": 1106.6,
52
+ "completions/mean_length": 743.8416809082031,
53
+ "completions/mean_terminated_length": 728.8905517578125,
54
+ "completions/min_length": 163.6,
55
+ "completions/min_terminated_length": 163.6,
56
+ "entropy": 0.6466913104057312,
57
+ "epoch": 0.003000300030003,
58
+ "frac_reward_zero_std": 0.0,
59
+ "grad_norm": 1.15625,
60
+ "learning_rate": 9.544364508393285e-07,
61
+ "loss": 0.0171,
62
+ "num_tokens": 2068565.0,
63
+ "reward": 0.8077859580516815,
64
+ "reward_std": 0.8083356320858002,
65
+ "rewards/ADEnReward/mean": 0.03084552166983485,
66
+ "rewards/ADEnReward/std": 0.07702018767595291,
67
+ "rewards/ReasoningConfidenceReward/mean": 0.018607060704380275,
68
+ "rewards/ReasoningConfidenceReward/std": 0.28907840102911,
69
+ "rewards/StrictFormatReward/mean": 0.7583333432674408,
70
+ "rewards/StrictFormatReward/std": 0.6329713940620423,
71
+ "step": 20,
72
+ "step_time": 52.03955397913232
73
+ },
74
+ {
75
+ "clip_ratio/high_max": 0.0,
76
+ "clip_ratio/high_mean": 0.0,
77
+ "clip_ratio/low_mean": 0.0,
78
+ "clip_ratio/low_min": 0.0,
79
+ "clip_ratio/region_mean": 0.0,
80
+ "completions/clipped_ratio": 0.002083333395421505,
81
+ "completions/max_length": 1179.2,
82
+ "completions/max_terminated_length": 1103.8,
83
+ "completions/mean_length": 735.7812744140625,
84
+ "completions/mean_terminated_length": 734.0838012695312,
85
+ "completions/min_length": 424.2,
86
+ "completions/min_terminated_length": 424.2,
87
+ "entropy": 0.6348762333393096,
88
+ "epoch": 0.004500450045004501,
89
+ "frac_reward_zero_std": 0.0,
90
+ "grad_norm": 1.0546875,
91
+ "learning_rate": 9.304556354916066e-07,
92
+ "loss": 0.0134,
93
+ "num_tokens": 3080012.0,
94
+ "reward": 0.9798760175704956,
95
+ "reward_std": 0.5248558193445205,
96
+ "rewards/ADEnReward/mean": 0.032167868409305814,
97
+ "rewards/ADEnReward/std": 0.0745716668665409,
98
+ "rewards/ReasoningConfidenceReward/mean": 0.06437477525323629,
99
+ "rewards/ReasoningConfidenceReward/std": 0.19101330041885375,
100
+ "rewards/StrictFormatReward/mean": 0.8833333373069763,
101
+ "rewards/StrictFormatReward/std": 0.45989986062049865,
102
+ "step": 30,
103
+ "step_time": 47.932181040663274
104
+ },
105
+ {
106
+ "clip_ratio/high_max": 0.0,
107
+ "clip_ratio/high_mean": 0.0,
108
+ "clip_ratio/low_mean": 0.0,
109
+ "clip_ratio/low_min": 0.0,
110
+ "clip_ratio/region_mean": 0.0,
111
+ "completions/clipped_ratio": 0.002083333395421505,
112
+ "completions/max_length": 1017.0,
113
+ "completions/max_terminated_length": 946.0,
114
+ "completions/mean_length": 739.5562744140625,
115
+ "completions/mean_terminated_length": 737.8339294433594,
116
+ "completions/min_length": 563.2,
117
+ "completions/min_terminated_length": 563.2,
118
+ "entropy": 0.6441138684749603,
119
+ "epoch": 0.006000600060006,
120
+ "frac_reward_zero_std": 0.0,
121
+ "grad_norm": 0.98046875,
122
+ "learning_rate": 9.064748201438849e-07,
123
+ "loss": 0.0049,
124
+ "num_tokens": 4093959.0,
125
+ "reward": 1.0465242981910705,
126
+ "reward_std": 0.3045470409095287,
127
+ "rewards/ADEnReward/mean": 0.03476252369582653,
128
+ "rewards/ADEnReward/std": 0.07538308277726173,
129
+ "rewards/ReasoningConfidenceReward/mean": 0.07009507827460766,
130
+ "rewards/ReasoningConfidenceReward/std": 0.11509535983204841,
131
+ "rewards/StrictFormatReward/mean": 0.9416666626930237,
132
+ "rewards/StrictFormatReward/std": 0.2242635190486908,
133
+ "step": 40,
134
+ "step_time": 40.8255105547607
135
+ },
136
+ {
137
+ "clip_ratio/high_max": 0.0,
138
+ "clip_ratio/high_mean": 0.0,
139
+ "clip_ratio/low_mean": 0.0,
140
+ "clip_ratio/low_min": 0.0,
141
+ "clip_ratio/region_mean": 0.0,
142
+ "completions/clipped_ratio": 0.0,
143
+ "completions/max_length": 866.4,
144
+ "completions/max_terminated_length": 866.4,
145
+ "completions/mean_length": 733.5812683105469,
146
+ "completions/mean_terminated_length": 733.5812683105469,
147
+ "completions/min_length": 616.2,
148
+ "completions/min_terminated_length": 616.2,
149
+ "entropy": 0.652310574054718,
150
+ "epoch": 0.007500750075007501,
151
+ "frac_reward_zero_std": 0.0,
152
+ "grad_norm": 1.078125,
153
+ "learning_rate": 8.82494004796163e-07,
154
+ "loss": -0.0002,
155
+ "num_tokens": 5104478.0,
156
+ "reward": 1.1200557351112366,
157
+ "reward_std": 0.18049246706068517,
158
+ "rewards/ADEnReward/mean": 0.032199547812342647,
159
+ "rewards/ADEnReward/std": 0.08372207283973694,
160
+ "rewards/ReasoningConfidenceReward/mean": 0.10868950486183167,
161
+ "rewards/ReasoningConfidenceReward/std": 0.09046642743051052,
162
+ "rewards/StrictFormatReward/mean": 0.9791666686534881,
163
+ "rewards/StrictFormatReward/std": 0.10964388847351074,
164
+ "step": 50,
165
+ "step_time": 34.72850414663553
166
+ },
167
+ {
168
+ "clip_ratio/high_max": 0.0,
169
+ "clip_ratio/high_mean": 0.0,
170
+ "clip_ratio/low_mean": 0.0,
171
+ "clip_ratio/low_min": 0.0,
172
+ "clip_ratio/region_mean": 0.0,
173
+ "completions/clipped_ratio": 0.0,
174
+ "completions/max_length": 820.2,
175
+ "completions/max_terminated_length": 820.2,
176
+ "completions/mean_length": 734.4875244140625,
177
+ "completions/mean_terminated_length": 734.4875244140625,
178
+ "completions/min_length": 597.2,
179
+ "completions/min_terminated_length": 597.2,
180
+ "entropy": 0.6447544604539871,
181
+ "epoch": 0.009000900090009001,
182
+ "frac_reward_zero_std": 0.0,
183
+ "grad_norm": 0.8671875,
184
+ "learning_rate": 8.585131894484412e-07,
185
+ "loss": -0.014,
186
+ "num_tokens": 6115528.0,
187
+ "reward": 1.0801176726818085,
188
+ "reward_std": 0.2073265790939331,
189
+ "rewards/ADEnReward/mean": 0.024589571449905635,
190
+ "rewards/ADEnReward/std": 0.05969331655651331,
191
+ "rewards/ReasoningConfidenceReward/mean": 0.09302806071937084,
192
+ "rewards/ReasoningConfidenceReward/std": 0.08853670731186866,
193
+ "rewards/StrictFormatReward/mean": 0.9624999940395356,
194
+ "rewards/StrictFormatReward/std": 0.22511394023895265,
195
+ "step": 60,
196
+ "step_time": 34.7001038627699
197
+ },
198
+ {
199
+ "clip_ratio/high_max": 0.0,
200
+ "clip_ratio/high_mean": 0.0,
201
+ "clip_ratio/low_mean": 0.0,
202
+ "clip_ratio/low_min": 0.0,
203
+ "clip_ratio/region_mean": 0.0,
204
+ "completions/clipped_ratio": 0.002083333395421505,
205
+ "completions/max_length": 887.8,
206
+ "completions/max_terminated_length": 880.9,
207
+ "completions/mean_length": 739.214599609375,
208
+ "completions/mean_terminated_length": 737.5825012207031,
209
+ "completions/min_length": 674.7,
210
+ "completions/min_terminated_length": 674.7,
211
+ "entropy": 0.648271444439888,
212
+ "epoch": 0.010501050105010502,
213
+ "frac_reward_zero_std": 0.0,
214
+ "grad_norm": 1.046875,
215
+ "learning_rate": 8.345323741007194e-07,
216
+ "loss": 0.0095,
217
+ "num_tokens": 7128591.0,
218
+ "reward": 1.117066776752472,
219
+ "reward_std": 0.14670775569975375,
220
+ "rewards/ADEnReward/mean": 0.030741326790302993,
221
+ "rewards/ADEnReward/std": 0.07235845774412156,
222
+ "rewards/ReasoningConfidenceReward/mean": 0.10299204997718334,
223
+ "rewards/ReasoningConfidenceReward/std": 0.07768557965755463,
224
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
225
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
226
+ "step": 70,
227
+ "step_time": 37.97550033703446
228
+ },
229
+ {
230
+ "clip_ratio/high_max": 0.0,
231
+ "clip_ratio/high_mean": 0.0,
232
+ "clip_ratio/low_mean": 0.0,
233
+ "clip_ratio/low_min": 0.0,
234
+ "clip_ratio/region_mean": 0.0,
235
+ "completions/clipped_ratio": 0.0,
236
+ "completions/max_length": 871.3,
237
+ "completions/max_terminated_length": 871.3,
238
+ "completions/mean_length": 735.6625244140625,
239
+ "completions/mean_terminated_length": 735.6625244140625,
240
+ "completions/min_length": 663.4,
241
+ "completions/min_terminated_length": 663.4,
242
+ "entropy": 0.6467163026332855,
243
+ "epoch": 0.012001200120012,
244
+ "frac_reward_zero_std": 0.0,
245
+ "grad_norm": 1.078125,
246
+ "learning_rate": 8.105515587529975e-07,
247
+ "loss": 0.0099,
248
+ "num_tokens": 8140093.0,
249
+ "reward": 1.124228584766388,
250
+ "reward_std": 0.1685192134231329,
251
+ "rewards/ADEnReward/mean": 0.03326874002814293,
252
+ "rewards/ADEnReward/std": 0.07868262981064618,
253
+ "rewards/ReasoningConfidenceReward/mean": 0.10762646868824959,
254
+ "rewards/ReasoningConfidenceReward/std": 0.08494675308465957,
255
+ "rewards/StrictFormatReward/mean": 0.9833333373069764,
256
+ "rewards/StrictFormatReward/std": 0.08077637553215027,
257
+ "step": 80,
258
+ "step_time": 35.69703020621091
259
+ },
260
+ {
261
+ "clip_ratio/high_max": 0.0,
262
+ "clip_ratio/high_mean": 0.0,
263
+ "clip_ratio/low_mean": 0.0,
264
+ "clip_ratio/low_min": 0.0,
265
+ "clip_ratio/region_mean": 0.0,
266
+ "completions/clipped_ratio": 0.0,
267
+ "completions/max_length": 799.3,
268
+ "completions/max_terminated_length": 799.3,
269
+ "completions/mean_length": 730.2937683105469,
270
+ "completions/mean_terminated_length": 730.2937683105469,
271
+ "completions/min_length": 623.3,
272
+ "completions/min_terminated_length": 623.3,
273
+ "entropy": 0.6420892357826233,
274
+ "epoch": 0.013501350135013501,
275
+ "frac_reward_zero_std": 0.0,
276
+ "grad_norm": 0.828125,
277
+ "learning_rate": 7.865707434052757e-07,
278
+ "loss": -0.0098,
279
+ "num_tokens": 9148426.0,
280
+ "reward": 1.122767400741577,
281
+ "reward_std": 0.154670562595129,
282
+ "rewards/ADEnReward/mean": 0.03107238719239831,
283
+ "rewards/ADEnReward/std": 0.07060995940119028,
284
+ "rewards/ReasoningConfidenceReward/mean": 0.10836165957152843,
285
+ "rewards/ReasoningConfidenceReward/std": 0.07862687073647975,
286
+ "rewards/StrictFormatReward/mean": 0.9833333253860473,
287
+ "rewards/StrictFormatReward/std": 0.1154700517654419,
288
+ "step": 90,
289
+ "step_time": 34.06732882745564
290
+ },
291
+ {
292
+ "clip_ratio/high_max": 0.0,
293
+ "clip_ratio/high_mean": 0.0,
294
+ "clip_ratio/low_mean": 0.0,
295
+ "clip_ratio/low_min": 0.0,
296
+ "clip_ratio/region_mean": 0.0,
297
+ "completions/clipped_ratio": 0.0,
298
+ "completions/max_length": 886.6,
299
+ "completions/max_terminated_length": 886.6,
300
+ "completions/mean_length": 732.9125305175781,
301
+ "completions/mean_terminated_length": 732.9125305175781,
302
+ "completions/min_length": 674.0,
303
+ "completions/min_terminated_length": 674.0,
304
+ "entropy": 0.6424726009368896,
305
+ "epoch": 0.015001500150015001,
306
+ "frac_reward_zero_std": 0.0,
307
+ "grad_norm": 0.921875,
308
+ "learning_rate": 7.62589928057554e-07,
309
+ "loss": 0.0087,
310
+ "num_tokens": 10158000.0,
311
+ "reward": 1.1287578463554382,
312
+ "reward_std": 0.16896428540349007,
313
+ "rewards/ADEnReward/mean": 0.042021069768816234,
314
+ "rewards/ADEnReward/std": 0.08718259073793888,
315
+ "rewards/ReasoningConfidenceReward/mean": 0.10757005885243416,
316
+ "rewards/ReasoningConfidenceReward/std": 0.0734778918325901,
317
+ "rewards/StrictFormatReward/mean": 0.9791666626930237,
318
+ "rewards/StrictFormatReward/std": 0.12699072659015656,
319
+ "step": 100,
320
+ "step_time": 35.849342082161456
321
+ },
322
+ {
323
+ "clip_ratio/high_max": 0.0,
324
+ "clip_ratio/high_mean": 0.0,
325
+ "clip_ratio/low_mean": 0.0,
326
+ "clip_ratio/low_min": 0.0,
327
+ "clip_ratio/region_mean": 0.0,
328
+ "completions/clipped_ratio": 0.002083333395421505,
329
+ "completions/max_length": 922.1,
330
+ "completions/max_terminated_length": 847.3,
331
+ "completions/mean_length": 736.8729309082031,
332
+ "completions/mean_terminated_length": 735.1981018066406,
333
+ "completions/min_length": 680.7,
334
+ "completions/min_terminated_length": 680.7,
335
+ "entropy": 0.6371437162160873,
336
+ "epoch": 0.0165016501650165,
337
+ "frac_reward_zero_std": 0.0,
338
+ "grad_norm": 0.84375,
339
+ "learning_rate": 7.386091127098321e-07,
340
+ "loss": 0.01,
341
+ "num_tokens": 11170099.0,
342
+ "reward": 1.1073094844818114,
343
+ "reward_std": 0.17044325098395346,
344
+ "rewards/ADEnReward/mean": 0.018601356376893818,
345
+ "rewards/ADEnReward/std": 0.05055182706564665,
346
+ "rewards/ReasoningConfidenceReward/mean": 0.11370811760425567,
347
+ "rewards/ReasoningConfidenceReward/std": 0.07379961647093296,
348
+ "rewards/StrictFormatReward/mean": 0.9749999940395355,
349
+ "rewards/StrictFormatReward/std": 0.15585823953151703,
350
+ "step": 110,
351
+ "step_time": 38.995268660690634
352
+ },
353
+ {
354
+ "clip_ratio/high_max": 0.0,
355
+ "clip_ratio/high_mean": 0.0,
356
+ "clip_ratio/low_mean": 0.0,
357
+ "clip_ratio/low_min": 0.0,
358
+ "clip_ratio/region_mean": 0.0,
359
+ "completions/clipped_ratio": 0.002083333395421505,
360
+ "completions/max_length": 956.2,
361
+ "completions/max_terminated_length": 884.7,
362
+ "completions/mean_length": 741.245849609375,
363
+ "completions/mean_terminated_length": 739.5868896484375,
364
+ "completions/min_length": 685.4,
365
+ "completions/min_terminated_length": 685.4,
366
+ "entropy": 0.6423951655626297,
367
+ "epoch": 0.018001800180018002,
368
+ "frac_reward_zero_std": 0.0,
369
+ "grad_norm": 1.03125,
370
+ "learning_rate": 7.146282973621102e-07,
371
+ "loss": 0.01,
372
+ "num_tokens": 12184361.0,
373
+ "reward": 1.127052104473114,
374
+ "reward_std": 0.1497463181614876,
375
+ "rewards/ADEnReward/mean": 0.029763074405491353,
376
+ "rewards/ADEnReward/std": 0.07583294808864594,
377
+ "rewards/ReasoningConfidenceReward/mean": 0.10978899747133256,
378
+ "rewards/ReasoningConfidenceReward/std": 0.08882112912833691,
379
+ "rewards/StrictFormatReward/mean": 0.9874999940395355,
380
+ "rewards/StrictFormatReward/std": 0.08660253882408142,
381
+ "step": 120,
382
+ "step_time": 39.55284757846967
383
+ },
384
+ {
385
+ "clip_ratio/high_max": 0.0,
386
+ "clip_ratio/high_mean": 0.0,
387
+ "clip_ratio/low_mean": 0.0,
388
+ "clip_ratio/low_min": 0.0,
389
+ "clip_ratio/region_mean": 0.0,
390
+ "completions/clipped_ratio": 0.002083333395421505,
391
+ "completions/max_length": 889.2,
392
+ "completions/max_terminated_length": 814.3,
393
+ "completions/mean_length": 738.2166931152344,
394
+ "completions/mean_terminated_length": 736.5271484375,
395
+ "completions/min_length": 684.2,
396
+ "completions/min_terminated_length": 684.2,
397
+ "entropy": 0.6385834395885468,
398
+ "epoch": 0.0195019501950195,
399
+ "frac_reward_zero_std": 0.0,
400
+ "grad_norm": 1.109375,
401
+ "learning_rate": 6.906474820143885e-07,
402
+ "loss": 0.0074,
403
+ "num_tokens": 13197457.0,
404
+ "reward": 1.1243727207183838,
405
+ "reward_std": 0.1370793327689171,
406
+ "rewards/ADEnReward/mean": 0.026768459612503646,
407
+ "rewards/ADEnReward/std": 0.0662717854604125,
408
+ "rewards/ReasoningConfidenceReward/mean": 0.11010420471429824,
409
+ "rewards/ReasoningConfidenceReward/std": 0.07703434824943542,
410
+ "rewards/StrictFormatReward/mean": 0.9875,
411
+ "rewards/StrictFormatReward/std": 0.06925570070743561,
412
+ "step": 130,
413
+ "step_time": 38.24893993083388
414
+ },
415
+ {
416
+ "clip_ratio/high_max": 0.0,
417
+ "clip_ratio/high_mean": 0.0,
418
+ "clip_ratio/low_mean": 0.0,
419
+ "clip_ratio/low_min": 0.0,
420
+ "clip_ratio/region_mean": 0.0,
421
+ "completions/clipped_ratio": 0.0,
422
+ "completions/max_length": 812.3,
423
+ "completions/max_terminated_length": 812.3,
424
+ "completions/mean_length": 736.0104370117188,
425
+ "completions/mean_terminated_length": 736.0104370117188,
426
+ "completions/min_length": 633.8,
427
+ "completions/min_terminated_length": 633.8,
428
+ "entropy": 0.6263688296079636,
429
+ "epoch": 0.021002100210021003,
430
+ "frac_reward_zero_std": 0.0,
431
+ "grad_norm": 0.94140625,
432
+ "learning_rate": 6.666666666666666e-07,
433
+ "loss": -0.0068,
434
+ "num_tokens": 14209414.0,
435
+ "reward": 1.1419667840003966,
436
+ "reward_std": 0.15764849670231343,
437
+ "rewards/ADEnReward/mean": 0.04068564581684768,
438
+ "rewards/ADEnReward/std": 0.08432210255414248,
439
+ "rewards/ReasoningConfidenceReward/mean": 0.11794776618480682,
440
+ "rewards/ReasoningConfidenceReward/std": 0.07809726595878601,
441
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
442
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
443
+ "step": 140,
444
+ "step_time": 34.79935124134645
445
+ },
446
+ {
447
+ "clip_ratio/high_max": 0.0,
448
+ "clip_ratio/high_mean": 0.0,
449
+ "clip_ratio/low_mean": 0.0,
450
+ "clip_ratio/low_min": 0.0,
451
+ "clip_ratio/region_mean": 0.0,
452
+ "completions/clipped_ratio": 0.0,
453
+ "completions/max_length": 809.8,
454
+ "completions/max_terminated_length": 809.8,
455
+ "completions/mean_length": 735.3583557128907,
456
+ "completions/mean_terminated_length": 735.3583557128907,
457
+ "completions/min_length": 675.4,
458
+ "completions/min_terminated_length": 675.4,
459
+ "entropy": 0.6429328173398972,
460
+ "epoch": 0.022502250225022502,
461
+ "frac_reward_zero_std": 0.0,
462
+ "grad_norm": 1.1875,
463
+ "learning_rate": 6.426858513189448e-07,
464
+ "loss": -0.0016,
465
+ "num_tokens": 15220674.0,
466
+ "reward": 1.1485553145408631,
467
+ "reward_std": 0.1378554403781891,
468
+ "rewards/ADEnReward/mean": 0.03246476505883038,
469
+ "rewards/ADEnReward/std": 0.07621528403833508,
470
+ "rewards/ReasoningConfidenceReward/mean": 0.1285905048251152,
471
+ "rewards/ReasoningConfidenceReward/std": 0.07357696481049061,
472
+ "rewards/StrictFormatReward/mean": 0.9874999940395355,
473
+ "rewards/StrictFormatReward/std": 0.08660253882408142,
474
+ "step": 150,
475
+ "step_time": 34.40164418127388
476
+ },
477
+ {
478
+ "clip_ratio/high_max": 0.0,
479
+ "clip_ratio/high_mean": 0.0,
480
+ "clip_ratio/low_mean": 0.0,
481
+ "clip_ratio/low_min": 0.0,
482
+ "clip_ratio/region_mean": 0.0,
483
+ "completions/clipped_ratio": 0.0,
484
+ "completions/max_length": 862.8,
485
+ "completions/max_terminated_length": 862.8,
486
+ "completions/mean_length": 736.8479370117187,
487
+ "completions/mean_terminated_length": 736.8479370117187,
488
+ "completions/min_length": 685.0,
489
+ "completions/min_terminated_length": 685.0,
490
+ "entropy": 0.6302657306194306,
491
+ "epoch": 0.024002400240024,
492
+ "frac_reward_zero_std": 0.0,
493
+ "grad_norm": 0.85546875,
494
+ "learning_rate": 6.187050359712231e-07,
495
+ "loss": 0.0054,
496
+ "num_tokens": 16232265.0,
497
+ "reward": 1.1301079392433167,
498
+ "reward_std": 0.12346492633223534,
499
+ "rewards/ADEnReward/mean": 0.02919836761429906,
500
+ "rewards/ADEnReward/std": 0.06346954144537449,
501
+ "rewards/ReasoningConfidenceReward/mean": 0.10924286767840385,
502
+ "rewards/ReasoningConfidenceReward/std": 0.07436333447694779,
503
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
504
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
505
+ "step": 160,
506
+ "step_time": 35.0627255375497
507
+ },
508
+ {
509
+ "clip_ratio/high_max": 0.0,
510
+ "clip_ratio/high_mean": 0.0,
511
+ "clip_ratio/low_mean": 0.0,
512
+ "clip_ratio/low_min": 0.0,
513
+ "clip_ratio/region_mean": 0.0,
514
+ "completions/clipped_ratio": 0.0,
515
+ "completions/max_length": 872.9,
516
+ "completions/max_terminated_length": 872.9,
517
+ "completions/mean_length": 733.4729309082031,
518
+ "completions/mean_terminated_length": 733.4729309082031,
519
+ "completions/min_length": 674.1,
520
+ "completions/min_terminated_length": 674.1,
521
+ "entropy": 0.6332272559404373,
522
+ "epoch": 0.025502550255025503,
523
+ "frac_reward_zero_std": 0.0,
524
+ "grad_norm": 1.4453125,
525
+ "learning_rate": 5.947242206235011e-07,
526
+ "loss": 0.0041,
527
+ "num_tokens": 17241900.0,
528
+ "reward": 1.1381949663162232,
529
+ "reward_std": 0.12290547527372837,
530
+ "rewards/ADEnReward/mean": 0.034653707128018144,
531
+ "rewards/ADEnReward/std": 0.07986385971307755,
532
+ "rewards/ReasoningConfidenceReward/mean": 0.1118745468556881,
533
+ "rewards/ReasoningConfidenceReward/std": 0.07345958650112153,
534
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
535
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
536
+ "step": 170,
537
+ "step_time": 35.15750455642119
538
+ },
539
+ {
540
+ "clip_ratio/high_max": 0.0,
541
+ "clip_ratio/high_mean": 0.0,
542
+ "clip_ratio/low_mean": 0.0,
543
+ "clip_ratio/low_min": 0.0,
544
+ "clip_ratio/region_mean": 0.0,
545
+ "completions/clipped_ratio": 0.0,
546
+ "completions/max_length": 802.8,
547
+ "completions/max_terminated_length": 802.8,
548
+ "completions/mean_length": 735.7437683105469,
549
+ "completions/mean_terminated_length": 735.7437683105469,
550
+ "completions/min_length": 679.2,
551
+ "completions/min_terminated_length": 679.2,
552
+ "entropy": 0.6316021621227265,
553
+ "epoch": 0.027002700270027002,
554
+ "frac_reward_zero_std": 0.0,
555
+ "grad_norm": 0.9921875,
556
+ "learning_rate": 5.707434052757793e-07,
557
+ "loss": -0.0027,
558
+ "num_tokens": 18253441.0,
559
+ "reward": 1.1503783106803893,
560
+ "reward_std": 0.13333264142274856,
561
+ "rewards/ADEnReward/mean": 0.03611529269255698,
562
+ "rewards/ADEnReward/std": 0.08335062861442566,
563
+ "rewards/ReasoningConfidenceReward/mean": 0.12676299437880517,
564
+ "rewards/ReasoningConfidenceReward/std": 0.07276010811328888,
565
+ "rewards/StrictFormatReward/mean": 0.9875,
566
+ "rewards/StrictFormatReward/std": 0.06925570070743561,
567
+ "step": 180,
568
+ "step_time": 34.76130234096199
569
+ },
570
+ {
571
+ "clip_ratio/high_max": 0.0,
572
+ "clip_ratio/high_mean": 0.0,
573
+ "clip_ratio/low_mean": 0.0,
574
+ "clip_ratio/low_min": 0.0,
575
+ "clip_ratio/region_mean": 0.0,
576
+ "completions/clipped_ratio": 0.0,
577
+ "completions/max_length": 826.4,
578
+ "completions/max_terminated_length": 826.4,
579
+ "completions/mean_length": 736.7250183105468,
580
+ "completions/mean_terminated_length": 736.7250183105468,
581
+ "completions/min_length": 682.4,
582
+ "completions/min_terminated_length": 682.4,
583
+ "entropy": 0.6254515618085861,
584
+ "epoch": 0.028502850285028504,
585
+ "frac_reward_zero_std": 0.0,
586
+ "grad_norm": 1.2734375,
587
+ "learning_rate": 5.467625899280576e-07,
588
+ "loss": -0.0044,
589
+ "num_tokens": 19265725.0,
590
+ "reward": 1.1711225748062133,
591
+ "reward_std": 0.0973996564745903,
592
+ "rewards/ADEnReward/mean": 0.03408026825636625,
593
+ "rewards/ADEnReward/std": 0.07690504901111125,
594
+ "rewards/ReasoningConfidenceReward/mean": 0.13704225420951843,
595
+ "rewards/ReasoningConfidenceReward/std": 0.06723648384213447,
596
+ "rewards/StrictFormatReward/mean": 1.0,
597
+ "rewards/StrictFormatReward/std": 0.0,
598
+ "step": 190,
599
+ "step_time": 34.57357950732112
600
+ },
601
+ {
602
+ "clip_ratio/high_max": 0.0,
603
+ "clip_ratio/high_mean": 0.0,
604
+ "clip_ratio/low_mean": 0.0,
605
+ "clip_ratio/low_min": 0.0,
606
+ "clip_ratio/region_mean": 0.0,
607
+ "completions/clipped_ratio": 0.0,
608
+ "completions/max_length": 805.0,
609
+ "completions/max_terminated_length": 805.0,
610
+ "completions/mean_length": 734.5750183105469,
611
+ "completions/mean_terminated_length": 734.5750183105469,
612
+ "completions/min_length": 678.8,
613
+ "completions/min_terminated_length": 678.8,
614
+ "entropy": 0.639057207107544,
615
+ "epoch": 0.030003000300030003,
616
+ "frac_reward_zero_std": 0.0,
617
+ "grad_norm": 1.4765625,
618
+ "learning_rate": 5.227817745803357e-07,
619
+ "loss": 0.0011,
620
+ "num_tokens": 20276305.0,
621
+ "reward": 1.1531866073608399,
622
+ "reward_std": 0.11019677557051182,
623
+ "rewards/ADEnReward/mean": 0.028415630990639328,
624
+ "rewards/ADEnReward/std": 0.0590023357886821,
625
+ "rewards/ReasoningConfidenceReward/mean": 0.13310426697134972,
626
+ "rewards/ReasoningConfidenceReward/std": 0.0671043038368225,
627
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
628
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
629
+ "step": 200,
630
+ "step_time": 33.87812012191862
631
+ },
632
+ {
633
+ "clip_ratio/high_max": 0.0,
634
+ "clip_ratio/high_mean": 0.0,
635
+ "clip_ratio/low_mean": 0.0,
636
+ "clip_ratio/low_min": 0.0,
637
+ "clip_ratio/region_mean": 0.0,
638
+ "completions/clipped_ratio": 0.0,
639
+ "completions/max_length": 808.3,
640
+ "completions/max_terminated_length": 808.3,
641
+ "completions/mean_length": 734.1604370117187,
642
+ "completions/mean_terminated_length": 734.1604370117187,
643
+ "completions/min_length": 666.7,
644
+ "completions/min_terminated_length": 666.7,
645
+ "entropy": 0.6130730360746384,
646
+ "epoch": 0.0315031503150315,
647
+ "frac_reward_zero_std": 0.0,
648
+ "grad_norm": 0.98828125,
649
+ "learning_rate": 4.988009592326139e-07,
650
+ "loss": -0.0026,
651
+ "num_tokens": 21287070.0,
652
+ "reward": 1.1174390077590943,
653
+ "reward_std": 0.17627520002424718,
654
+ "rewards/ADEnReward/mean": 0.02708783410489559,
655
+ "rewards/ADEnReward/std": 0.06303326403722168,
656
+ "rewards/ReasoningConfidenceReward/mean": 0.11535111740231514,
657
+ "rewards/ReasoningConfidenceReward/std": 0.07561756633222103,
658
+ "rewards/StrictFormatReward/mean": 0.9749999940395355,
659
+ "rewards/StrictFormatReward/std": 0.15585823953151703,
660
+ "step": 210,
661
+ "step_time": 33.49867916693911
662
+ },
663
+ {
664
+ "clip_ratio/high_max": 0.0,
665
+ "clip_ratio/high_mean": 0.0,
666
+ "clip_ratio/low_mean": 0.0,
667
+ "clip_ratio/low_min": 0.0,
668
+ "clip_ratio/region_mean": 0.0,
669
+ "completions/clipped_ratio": 0.0,
670
+ "completions/max_length": 807.7,
671
+ "completions/max_terminated_length": 807.7,
672
+ "completions/mean_length": 736.2333618164063,
673
+ "completions/mean_terminated_length": 736.2333618164063,
674
+ "completions/min_length": 672.9,
675
+ "completions/min_terminated_length": 672.9,
676
+ "entropy": 0.6331382483243942,
677
+ "epoch": 0.033003300330033,
678
+ "frac_reward_zero_std": 0.0,
679
+ "grad_norm": 1.59375,
680
+ "learning_rate": 4.7482014388489204e-07,
681
+ "loss": -0.002,
682
+ "num_tokens": 22299374.0,
683
+ "reward": 1.1317298769950868,
684
+ "reward_std": 0.13332342132925987,
685
+ "rewards/ADEnReward/mean": 0.02584674544632435,
686
+ "rewards/ADEnReward/std": 0.06565526202321052,
687
+ "rewards/ReasoningConfidenceReward/mean": 0.12254976853728294,
688
+ "rewards/ReasoningConfidenceReward/std": 0.06627246364951134,
689
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
690
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
691
+ "step": 220,
692
+ "step_time": 34.399103596247734
693
+ },
694
+ {
695
+ "clip_ratio/high_max": 0.0,
696
+ "clip_ratio/high_mean": 0.0,
697
+ "clip_ratio/low_mean": 0.0,
698
+ "clip_ratio/low_min": 0.0,
699
+ "clip_ratio/region_mean": 0.0,
700
+ "completions/clipped_ratio": 0.0,
701
+ "completions/max_length": 811.9,
702
+ "completions/max_terminated_length": 811.9,
703
+ "completions/mean_length": 735.714599609375,
704
+ "completions/mean_terminated_length": 735.714599609375,
705
+ "completions/min_length": 681.9,
706
+ "completions/min_terminated_length": 681.9,
707
+ "entropy": 0.6306542903184891,
708
+ "epoch": 0.034503450345034506,
709
+ "frac_reward_zero_std": 0.0,
710
+ "grad_norm": 1.5390625,
711
+ "learning_rate": 4.508393285371702e-07,
712
+ "loss": -0.0024,
713
+ "num_tokens": 23310965.0,
714
+ "reward": 1.1562806963920593,
715
+ "reward_std": 0.11753289476037025,
716
+ "rewards/ADEnReward/mean": 0.03307559220120311,
717
+ "rewards/ADEnReward/std": 0.07661229185760021,
718
+ "rewards/ReasoningConfidenceReward/mean": 0.13153844997286795,
719
+ "rewards/ReasoningConfidenceReward/std": 0.07286004684865474,
720
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
721
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
722
+ "step": 230,
723
+ "step_time": 34.26871528588235
724
+ },
725
+ {
726
+ "clip_ratio/high_max": 0.0,
727
+ "clip_ratio/high_mean": 0.0,
728
+ "clip_ratio/low_mean": 0.0,
729
+ "clip_ratio/low_min": 0.0,
730
+ "clip_ratio/region_mean": 0.0,
731
+ "completions/clipped_ratio": 0.0,
732
+ "completions/max_length": 811.3,
733
+ "completions/max_terminated_length": 811.3,
734
+ "completions/mean_length": 731.7271118164062,
735
+ "completions/mean_terminated_length": 731.7271118164062,
736
+ "completions/min_length": 618.4,
737
+ "completions/min_terminated_length": 618.4,
738
+ "entropy": 0.6345497578382492,
739
+ "epoch": 0.036003600360036005,
740
+ "frac_reward_zero_std": 0.0,
741
+ "grad_norm": 1.1328125,
742
+ "learning_rate": 4.2685851318944845e-07,
743
+ "loss": -0.009,
744
+ "num_tokens": 24320578.0,
745
+ "reward": 1.1457740783691406,
746
+ "reward_std": 0.12441672384738922,
747
+ "rewards/ADEnReward/mean": 0.024325233418494463,
748
+ "rewards/ADEnReward/std": 0.055920045264065264,
749
+ "rewards/ReasoningConfidenceReward/mean": 0.12978217378258705,
750
+ "rewards/ReasoningConfidenceReward/std": 0.0885798055678606,
751
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
752
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
753
+ "step": 240,
754
+ "step_time": 35.589772913791236
755
+ },
756
+ {
757
+ "clip_ratio/high_max": 0.0,
758
+ "clip_ratio/high_mean": 0.0,
759
+ "clip_ratio/low_mean": 0.0,
760
+ "clip_ratio/low_min": 0.0,
761
+ "clip_ratio/region_mean": 0.0,
762
+ "completions/clipped_ratio": 0.0,
763
+ "completions/max_length": 808.2,
764
+ "completions/max_terminated_length": 808.2,
765
+ "completions/mean_length": 732.7125183105469,
766
+ "completions/mean_terminated_length": 732.7125183105469,
767
+ "completions/min_length": 676.8,
768
+ "completions/min_terminated_length": 676.8,
769
+ "entropy": 0.6391450583934783,
770
+ "epoch": 0.0375037503750375,
771
+ "frac_reward_zero_std": 0.0,
772
+ "grad_norm": 1.015625,
773
+ "learning_rate": 4.028776978417266e-07,
774
+ "loss": -0.001,
775
+ "num_tokens": 25330312.0,
776
+ "reward": 1.1600376486778259,
777
+ "reward_std": 0.10150842666625977,
778
+ "rewards/ADEnReward/mean": 0.025944713107310235,
779
+ "rewards/ADEnReward/std": 0.0668052526190877,
780
+ "rewards/ReasoningConfidenceReward/mean": 0.13825955241918564,
781
+ "rewards/ReasoningConfidenceReward/std": 0.0673112541437149,
782
+ "rewards/StrictFormatReward/mean": 0.9958333313465119,
783
+ "rewards/StrictFormatReward/std": 0.028867512941360474,
784
+ "step": 250,
785
+ "step_time": 35.417141625192016
786
+ },
787
+ {
788
+ "clip_ratio/high_max": 0.0,
789
+ "clip_ratio/high_mean": 0.0,
790
+ "clip_ratio/low_mean": 0.0,
791
+ "clip_ratio/low_min": 0.0,
792
+ "clip_ratio/region_mean": 0.0,
793
+ "completions/clipped_ratio": 0.0,
794
+ "completions/max_length": 869.5,
795
+ "completions/max_terminated_length": 869.5,
796
+ "completions/mean_length": 734.552099609375,
797
+ "completions/mean_terminated_length": 734.552099609375,
798
+ "completions/min_length": 685.6,
799
+ "completions/min_terminated_length": 685.6,
800
+ "entropy": 0.6193063586950303,
801
+ "epoch": 0.039003900390039,
802
+ "frac_reward_zero_std": 0.0,
803
+ "grad_norm": 1.3359375,
804
+ "learning_rate": 3.7889688249400476e-07,
805
+ "loss": 0.0072,
806
+ "num_tokens": 26341233.0,
807
+ "reward": 1.141334629058838,
808
+ "reward_std": 0.15343550890684127,
809
+ "rewards/ADEnReward/mean": 0.036122958175837994,
810
+ "rewards/ADEnReward/std": 0.08206971623003483,
811
+ "rewards/ReasoningConfidenceReward/mean": 0.11771163120865821,
812
+ "rewards/ReasoningConfidenceReward/std": 0.07470999509096146,
813
+ "rewards/StrictFormatReward/mean": 0.9874999940395355,
814
+ "rewards/StrictFormatReward/std": 0.08660253882408142,
815
+ "step": 260,
816
+ "step_time": 35.400501331407575
817
+ },
818
+ {
819
+ "clip_ratio/high_max": 0.0,
820
+ "clip_ratio/high_mean": 0.0,
821
+ "clip_ratio/low_mean": 0.0,
822
+ "clip_ratio/low_min": 0.0,
823
+ "clip_ratio/region_mean": 0.0,
824
+ "completions/clipped_ratio": 0.0,
825
+ "completions/max_length": 878.0,
826
+ "completions/max_terminated_length": 878.0,
827
+ "completions/mean_length": 735.9958557128906,
828
+ "completions/mean_terminated_length": 735.9958557128906,
829
+ "completions/min_length": 671.3,
830
+ "completions/min_terminated_length": 671.3,
831
+ "entropy": 0.6377118974924088,
832
+ "epoch": 0.0405040504050405,
833
+ "frac_reward_zero_std": 0.0,
834
+ "grad_norm": 0.9609375,
835
+ "learning_rate": 3.54916067146283e-07,
836
+ "loss": 0.0056,
837
+ "num_tokens": 27352527.0,
838
+ "reward": 1.1401844143867492,
839
+ "reward_std": 0.16229173466563224,
840
+ "rewards/ADEnReward/mean": 0.035310357715934514,
841
+ "rewards/ADEnReward/std": 0.07879957482218743,
842
+ "rewards/ReasoningConfidenceReward/mean": 0.12154066711664199,
843
+ "rewards/ReasoningConfidenceReward/std": 0.0928368739783764,
844
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
845
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
846
+ "step": 270,
847
+ "step_time": 37.65524366889149
848
+ },
849
+ {
850
+ "clip_ratio/high_max": 0.0,
851
+ "clip_ratio/high_mean": 0.0,
852
+ "clip_ratio/low_mean": 0.0,
853
+ "clip_ratio/low_min": 0.0,
854
+ "clip_ratio/region_mean": 0.0,
855
+ "completions/clipped_ratio": 0.0,
856
+ "completions/max_length": 806.4,
857
+ "completions/max_terminated_length": 806.4,
858
+ "completions/mean_length": 738.8979370117188,
859
+ "completions/mean_terminated_length": 738.8979370117188,
860
+ "completions/min_length": 683.0,
861
+ "completions/min_terminated_length": 683.0,
862
+ "entropy": 0.6446064680814743,
863
+ "epoch": 0.04200420042004201,
864
+ "frac_reward_zero_std": 0.0,
865
+ "grad_norm": 1.4609375,
866
+ "learning_rate": 3.309352517985611e-07,
867
+ "loss": -0.0014,
868
+ "num_tokens": 28365614.0,
869
+ "reward": 1.1542957901954651,
870
+ "reward_std": 0.12034987881779671,
871
+ "rewards/ADEnReward/mean": 0.027850225754082202,
872
+ "rewards/ADEnReward/std": 0.07261905167251825,
873
+ "rewards/ReasoningConfidenceReward/mean": 0.13477883711457253,
874
+ "rewards/ReasoningConfidenceReward/std": 0.07817486226558686,
875
+ "rewards/StrictFormatReward/mean": 0.9916666686534882,
876
+ "rewards/StrictFormatReward/std": 0.04038818776607513,
877
+ "step": 280,
878
+ "step_time": 35.198139012046155
879
+ },
880
+ {
881
+ "clip_ratio/high_max": 0.0,
882
+ "clip_ratio/high_mean": 0.0,
883
+ "clip_ratio/low_mean": 0.0,
884
+ "clip_ratio/low_min": 0.0,
885
+ "clip_ratio/region_mean": 0.0,
886
+ "completions/clipped_ratio": 0.0,
887
+ "completions/max_length": 809.5,
888
+ "completions/max_terminated_length": 809.5,
889
+ "completions/mean_length": 735.2833557128906,
890
+ "completions/mean_terminated_length": 735.2833557128906,
891
+ "completions/min_length": 665.6,
892
+ "completions/min_terminated_length": 665.6,
893
+ "entropy": 0.6377699196338653,
894
+ "epoch": 0.043504350435043505,
895
+ "frac_reward_zero_std": 0.0,
896
+ "grad_norm": 1.4609375,
897
+ "learning_rate": 3.069544364508393e-07,
898
+ "loss": -0.0057,
899
+ "num_tokens": 29376822.0,
900
+ "reward": 1.1390021562576294,
901
+ "reward_std": 0.18877924084663392,
902
+ "rewards/ADEnReward/mean": 0.03187443232163787,
903
+ "rewards/ADEnReward/std": 0.06868166662752628,
904
+ "rewards/ReasoningConfidenceReward/mean": 0.1362943433225155,
905
+ "rewards/ReasoningConfidenceReward/std": 0.06989900954067707,
906
+ "rewards/StrictFormatReward/mean": 0.9708333313465118,
907
+ "rewards/StrictFormatReward/std": 0.1673789143562317,
908
+ "step": 290,
909
+ "step_time": 35.80791048258543
910
+ },
911
+ {
912
+ "clip_ratio/high_max": 0.0,
913
+ "clip_ratio/high_mean": 0.0,
914
+ "clip_ratio/low_mean": 0.0,
915
+ "clip_ratio/low_min": 0.0,
916
+ "clip_ratio/region_mean": 0.0,
917
+ "completions/clipped_ratio": 0.0,
918
+ "completions/max_length": 868.4,
919
+ "completions/max_terminated_length": 868.4,
920
+ "completions/mean_length": 738.370849609375,
921
+ "completions/mean_terminated_length": 738.370849609375,
922
+ "completions/min_length": 684.6,
923
+ "completions/min_terminated_length": 684.6,
924
+ "entropy": 0.6240056931972504,
925
+ "epoch": 0.045004500450045004,
926
+ "frac_reward_zero_std": 0.0,
927
+ "grad_norm": 1.3671875,
928
+ "learning_rate": 2.8297362110311753e-07,
929
+ "loss": 0.0029,
930
+ "num_tokens": 30389624.0,
931
+ "reward": 1.1787766337394714,
932
+ "reward_std": 0.12015584260225295,
933
+ "rewards/ADEnReward/mean": 0.03684027479030192,
934
+ "rewards/ADEnReward/std": 0.07373644839972257,
935
+ "rewards/ReasoningConfidenceReward/mean": 0.14610298424959184,
936
+ "rewards/ReasoningConfidenceReward/std": 0.08906417265534401,
937
+ "rewards/StrictFormatReward/mean": 0.9958333313465119,
938
+ "rewards/StrictFormatReward/std": 0.028867512941360474,
939
+ "step": 300,
940
+ "step_time": 36.948212468624114
941
+ }
942
+ ],
943
+ "logging_steps": 10,
944
+ "max_steps": 417,
945
+ "num_input_tokens_seen": 30389624,
946
+ "num_train_epochs": 1,
947
+ "save_steps": 100,
948
+ "stateful_callbacks": {
949
+ "TrainerControl": {
950
+ "args": {
951
+ "should_epoch_stop": false,
952
+ "should_evaluate": false,
953
+ "should_log": false,
954
+ "should_save": true,
955
+ "should_training_stop": false
956
+ },
957
+ "attributes": {}
958
+ }
959
+ },
960
+ "total_flos": 0.0,
961
+ "train_batch_size": 6,
962
+ "trial_name": null,
963
+ "trial_params": null
964
+ }
grpo-nADE-format-RC/checkpoint-300/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format-RC/checkpoint-400/chat_template.jinja ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- else %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if do_vision_count %}
10
+ {%- set image_count.value = image_count.value + 1 %}
11
+ {%- endif %}
12
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
13
+ <|vision_start|><|image_pad|><|vision_end|>
14
+ {%- elif 'video' in item or item.type == 'video' %}
15
+ {%- if do_vision_count %}
16
+ {%- set video_count.value = video_count.value + 1 %}
17
+ {%- endif %}
18
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
19
+ <|vision_start|><|video_pad|><|vision_end|>
20
+ {%- elif 'text' in item %}
21
+ {{- item.text }}
22
+ {%- endif %}
23
+ {%- endfor %}
24
+ {%- endif %}
25
+ {%- endmacro %}
26
+ {%- if tools %}
27
+ {{- '<|im_start|>system\n' }}
28
+ {%- if messages[0].role == 'system' %}
29
+ {{- render_content(messages[0].content, false) + '\n\n' }}
30
+ {%- endif %}
31
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
32
+ {%- for tool in tools %}
33
+ {{- "\n" }}
34
+ {{- tool | tojson }}
35
+ {%- endfor %}
36
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
37
+ {%- else %}
38
+ {%- if messages[0].role == 'system' %}
39
+ {{- '<|im_start|>system\n' + render_content(messages[0].content, false) + '<|im_end|>\n' }}
40
+ {%- endif %}
41
+ {%- endif %}
42
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
43
+ {%- for message in messages[::-1] %}
44
+ {%- set index = (messages|length - 1) - loop.index0 %}
45
+ {%- if ns.multi_step_tool and message.role == "user" %}
46
+ {%- set content = render_content(message.content, false) %}
47
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
48
+ {%- set ns.multi_step_tool = false %}
49
+ {%- set ns.last_query_index = index %}
50
+ {%- endif %}
51
+ {%- endif %}
52
+ {%- endfor %}
53
+ {%- for message in messages %}
54
+ {%- set content = render_content(message.content, True) %}
55
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
56
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
57
+ {%- elif message.role == "assistant" %}
58
+ {%- set reasoning_content = '' %}
59
+ {%- if message.reasoning_content is string %}
60
+ {%- set reasoning_content = message.reasoning_content %}
61
+ {%- else %}
62
+ {%- if '</think>' in content %}
63
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
64
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- if loop.index0 > ns.last_query_index %}
68
+ {%- if loop.last or (not loop.last and reasoning_content) %}
69
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
70
+ {%- else %}
71
+ {{- '<|im_start|>' + message.role + '\n' + content }}
72
+ {%- endif %}
73
+ {%- else %}
74
+ {{- '<|im_start|>' + message.role + '\n' + content }}
75
+ {%- endif %}
76
+ {%- if message.tool_calls %}
77
+ {%- for tool_call in message.tool_calls %}
78
+ {%- if (loop.first and content) or (not loop.first) %}
79
+ {{- '\n' }}
80
+ {%- endif %}
81
+ {%- if tool_call.function %}
82
+ {%- set tool_call = tool_call.function %}
83
+ {%- endif %}
84
+ {{- '<tool_call>\n{"name": "' }}
85
+ {{- tool_call.name }}
86
+ {{- '", "arguments": ' }}
87
+ {%- if tool_call.arguments is string %}
88
+ {{- tool_call.arguments }}
89
+ {%- else %}
90
+ {{- tool_call.arguments | tojson }}
91
+ {%- endif %}
92
+ {{- '}\n</tool_call>' }}
93
+ {%- endfor %}
94
+ {%- endif %}
95
+ {{- '<|im_end|>\n' }}
96
+ {%- elif message.role == "tool" %}
97
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
98
+ {{- '<|im_start|>user' }}
99
+ {%- endif %}
100
+ {{- '\n<tool_response>\n' }}
101
+ {{- content }}
102
+ {{- '\n</tool_response>' }}
103
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
104
+ {{- '<|im_end|>\n' }}
105
+ {%- endif %}
106
+ {%- endif %}
107
+ {%- endfor %}
108
+ {%- if add_generation_prompt %}
109
+ {{- '<|im_start|>assistant\n' }}
110
+ {%- endif %}
grpo-nADE-format-RC/checkpoint-400/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format-RC/checkpoint-400/tokenizer_config.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "max_length": null,
235
+ "model_max_length": 262144,
236
+ "pad_to_multiple_of": null,
237
+ "pad_token": "<|endoftext|>",
238
+ "pad_token_type_id": 0,
239
+ "padding_side": "left",
240
+ "processor_class": "Qwen3VLProcessor",
241
+ "split_special_tokens": false,
242
+ "tokenizer_class": "Qwen2Tokenizer",
243
+ "unk_token": null
244
+ }
grpo-nADE-format-RC/checkpoint-400/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format-RC/checkpoint-417/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
grpo-nADE-format-RC/checkpoint-417/chat_template.jinja ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- else %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if do_vision_count %}
10
+ {%- set image_count.value = image_count.value + 1 %}
11
+ {%- endif %}
12
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
13
+ <|vision_start|><|image_pad|><|vision_end|>
14
+ {%- elif 'video' in item or item.type == 'video' %}
15
+ {%- if do_vision_count %}
16
+ {%- set video_count.value = video_count.value + 1 %}
17
+ {%- endif %}
18
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
19
+ <|vision_start|><|video_pad|><|vision_end|>
20
+ {%- elif 'text' in item %}
21
+ {{- item.text }}
22
+ {%- endif %}
23
+ {%- endfor %}
24
+ {%- endif %}
25
+ {%- endmacro %}
26
+ {%- if tools %}
27
+ {{- '<|im_start|>system\n' }}
28
+ {%- if messages[0].role == 'system' %}
29
+ {{- render_content(messages[0].content, false) + '\n\n' }}
30
+ {%- endif %}
31
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
32
+ {%- for tool in tools %}
33
+ {{- "\n" }}
34
+ {{- tool | tojson }}
35
+ {%- endfor %}
36
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
37
+ {%- else %}
38
+ {%- if messages[0].role == 'system' %}
39
+ {{- '<|im_start|>system\n' + render_content(messages[0].content, false) + '<|im_end|>\n' }}
40
+ {%- endif %}
41
+ {%- endif %}
42
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
43
+ {%- for message in messages[::-1] %}
44
+ {%- set index = (messages|length - 1) - loop.index0 %}
45
+ {%- if ns.multi_step_tool and message.role == "user" %}
46
+ {%- set content = render_content(message.content, false) %}
47
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
48
+ {%- set ns.multi_step_tool = false %}
49
+ {%- set ns.last_query_index = index %}
50
+ {%- endif %}
51
+ {%- endif %}
52
+ {%- endfor %}
53
+ {%- for message in messages %}
54
+ {%- set content = render_content(message.content, True) %}
55
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
56
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
57
+ {%- elif message.role == "assistant" %}
58
+ {%- set reasoning_content = '' %}
59
+ {%- if message.reasoning_content is string %}
60
+ {%- set reasoning_content = message.reasoning_content %}
61
+ {%- else %}
62
+ {%- if '</think>' in content %}
63
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
64
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- if loop.index0 > ns.last_query_index %}
68
+ {%- if loop.last or (not loop.last and reasoning_content) %}
69
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
70
+ {%- else %}
71
+ {{- '<|im_start|>' + message.role + '\n' + content }}
72
+ {%- endif %}
73
+ {%- else %}
74
+ {{- '<|im_start|>' + message.role + '\n' + content }}
75
+ {%- endif %}
76
+ {%- if message.tool_calls %}
77
+ {%- for tool_call in message.tool_calls %}
78
+ {%- if (loop.first and content) or (not loop.first) %}
79
+ {{- '\n' }}
80
+ {%- endif %}
81
+ {%- if tool_call.function %}
82
+ {%- set tool_call = tool_call.function %}
83
+ {%- endif %}
84
+ {{- '<tool_call>\n{"name": "' }}
85
+ {{- tool_call.name }}
86
+ {{- '", "arguments": ' }}
87
+ {%- if tool_call.arguments is string %}
88
+ {{- tool_call.arguments }}
89
+ {%- else %}
90
+ {{- tool_call.arguments | tojson }}
91
+ {%- endif %}
92
+ {{- '}\n</tool_call>' }}
93
+ {%- endfor %}
94
+ {%- endif %}
95
+ {{- '<|im_end|>\n' }}
96
+ {%- elif message.role == "tool" %}
97
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
98
+ {{- '<|im_start|>user' }}
99
+ {%- endif %}
100
+ {{- '\n<tool_response>\n' }}
101
+ {{- content }}
102
+ {{- '\n</tool_response>' }}
103
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
104
+ {{- '<|im_end|>\n' }}
105
+ {%- endif %}
106
+ {%- endif %}
107
+ {%- endfor %}
108
+ {%- if add_generation_prompt %}
109
+ {{- '<|im_start|>assistant\n' }}
110
+ {%- endif %}
grpo-nADE-format-RC/checkpoint-417/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3VLForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "eos_token_id": 151645,
7
+ "image_token_id": 151655,
8
+ "model_type": "qwen3_vl",
9
+ "pad_token_id": 151643,
10
+ "text_config": {
11
+ "attention_bias": false,
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 151643,
14
+ "dtype": "bfloat16",
15
+ "eos_token_id": 151645,
16
+ "head_dim": 128,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 2560,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 9728,
21
+ "max_position_embeddings": 262144,
22
+ "model_type": "qwen3_vl_text",
23
+ "num_attention_heads": 32,
24
+ "num_hidden_layers": 36,
25
+ "num_key_value_heads": 8,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_scaling": {
28
+ "mrope_interleaved": true,
29
+ "mrope_section": [
30
+ 24,
31
+ 20,
32
+ 20
33
+ ],
34
+ "rope_type": "default"
35
+ },
36
+ "rope_theta": 5000000,
37
+ "tie_word_embeddings": true,
38
+ "use_cache": true,
39
+ "vocab_size": 151936
40
+ },
41
+ "tie_word_embeddings": true,
42
+ "transformers_version": "4.57.6",
43
+ "use_cache": false,
44
+ "video_token_id": 151656,
45
+ "vision_config": {
46
+ "deepstack_visual_indexes": [
47
+ 5,
48
+ 11,
49
+ 17
50
+ ],
51
+ "depth": 24,
52
+ "dtype": "bfloat16",
53
+ "hidden_act": "gelu_pytorch_tanh",
54
+ "hidden_size": 1024,
55
+ "in_channels": 3,
56
+ "initializer_range": 0.02,
57
+ "intermediate_size": 4096,
58
+ "model_type": "qwen3_vl",
59
+ "num_heads": 16,
60
+ "num_position_embeddings": 2304,
61
+ "out_hidden_size": 2560,
62
+ "patch_size": 16,
63
+ "spatial_merge_size": 2,
64
+ "temporal_patch_size": 2
65
+ },
66
+ "vision_end_token_id": 151653,
67
+ "vision_start_token_id": 151652
68
+ }
grpo-nADE-format-RC/checkpoint-417/generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "top_k": 20,
10
+ "top_p": 0.95,
11
+ "transformers_version": "4.57.6"
12
+ }
grpo-nADE-format-RC/checkpoint-417/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format-RC/checkpoint-417/model.safetensors.index.json ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 4437815808,
4
+ "total_size": 8875631616
5
+ },
6
+ "weight_map": {
7
+ "model.language_model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "model.language_model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.language_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.language_model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "model.language_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "model.language_model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
14
+ "model.language_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.language_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
16
+ "model.language_model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
17
+ "model.language_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
18
+ "model.language_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.language_model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
20
+ "model.language_model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.language_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
22
+ "model.language_model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.language_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
24
+ "model.language_model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
25
+ "model.language_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.language_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
27
+ "model.language_model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
28
+ "model.language_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.language_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
30
+ "model.language_model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
31
+ "model.language_model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
32
+ "model.language_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.language_model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.language_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
35
+ "model.language_model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
36
+ "model.language_model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.language_model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.language_model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
39
+ "model.language_model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
40
+ "model.language_model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.language_model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
42
+ "model.language_model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.language_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
44
+ "model.language_model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
45
+ "model.language_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
46
+ "model.language_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
47
+ "model.language_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
48
+ "model.language_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
49
+ "model.language_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
50
+ "model.language_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.language_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.language_model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
53
+ "model.language_model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
54
+ "model.language_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.language_model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.language_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
57
+ "model.language_model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
58
+ "model.language_model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.language_model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
+ "model.language_model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
61
+ "model.language_model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.language_model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.language_model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
64
+ "model.language_model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.language_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
66
+ "model.language_model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
67
+ "model.language_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
68
+ "model.language_model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
69
+ "model.language_model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.language_model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.language_model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
72
+ "model.language_model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
73
+ "model.language_model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.language_model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
75
+ "model.language_model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
76
+ "model.language_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.language_model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
78
+ "model.language_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
79
+ "model.language_model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
80
+ "model.language_model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
81
+ "model.language_model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.language_model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
83
+ "model.language_model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
84
+ "model.language_model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
85
+ "model.language_model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
86
+ "model.language_model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.language_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.language_model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.language_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "model.language_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
91
+ "model.language_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
92
+ "model.language_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.language_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
94
+ "model.language_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.language_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
96
+ "model.language_model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
97
+ "model.language_model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
98
+ "model.language_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
99
+ "model.language_model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.language_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
101
+ "model.language_model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
102
+ "model.language_model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
103
+ "model.language_model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.language_model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
105
+ "model.language_model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.language_model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.language_model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
108
+ "model.language_model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
109
+ "model.language_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
110
+ "model.language_model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
111
+ "model.language_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
112
+ "model.language_model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
113
+ "model.language_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
114
+ "model.language_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
115
+ "model.language_model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
116
+ "model.language_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
117
+ "model.language_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
118
+ "model.language_model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
119
+ "model.language_model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
120
+ "model.language_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
121
+ "model.language_model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
122
+ "model.language_model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
123
+ "model.language_model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
124
+ "model.language_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
125
+ "model.language_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
126
+ "model.language_model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
127
+ "model.language_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
128
+ "model.language_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
129
+ "model.language_model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
130
+ "model.language_model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
131
+ "model.language_model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
132
+ "model.language_model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
133
+ "model.language_model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
134
+ "model.language_model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
135
+ "model.language_model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
136
+ "model.language_model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
137
+ "model.language_model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
138
+ "model.language_model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
139
+ "model.language_model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
140
+ "model.language_model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
141
+ "model.language_model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
142
+ "model.language_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
143
+ "model.language_model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
144
+ "model.language_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
145
+ "model.language_model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
146
+ "model.language_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
147
+ "model.language_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
148
+ "model.language_model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
149
+ "model.language_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
150
+ "model.language_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.language_model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
152
+ "model.language_model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
153
+ "model.language_model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
154
+ "model.language_model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
155
+ "model.language_model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
156
+ "model.language_model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
157
+ "model.language_model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
158
+ "model.language_model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
159
+ "model.language_model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
160
+ "model.language_model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
161
+ "model.language_model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
162
+ "model.language_model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
163
+ "model.language_model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
164
+ "model.language_model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
165
+ "model.language_model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
166
+ "model.language_model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
167
+ "model.language_model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
168
+ "model.language_model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
169
+ "model.language_model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
170
+ "model.language_model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
171
+ "model.language_model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
172
+ "model.language_model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
173
+ "model.language_model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
174
+ "model.language_model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
175
+ "model.language_model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
176
+ "model.language_model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.language_model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
178
+ "model.language_model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
179
+ "model.language_model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
180
+ "model.language_model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
181
+ "model.language_model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
182
+ "model.language_model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
183
+ "model.language_model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
184
+ "model.language_model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
185
+ "model.language_model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
186
+ "model.language_model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
187
+ "model.language_model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
188
+ "model.language_model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
189
+ "model.language_model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
190
+ "model.language_model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.language_model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
192
+ "model.language_model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
193
+ "model.language_model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.language_model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.language_model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
196
+ "model.language_model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.language_model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
198
+ "model.language_model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
199
+ "model.language_model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
200
+ "model.language_model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
201
+ "model.language_model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
202
+ "model.language_model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
203
+ "model.language_model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
204
+ "model.language_model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
+ "model.language_model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
206
+ "model.language_model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
207
+ "model.language_model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
208
+ "model.language_model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
209
+ "model.language_model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
210
+ "model.language_model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
211
+ "model.language_model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
212
+ "model.language_model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
213
+ "model.language_model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
214
+ "model.language_model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
215
+ "model.language_model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
216
+ "model.language_model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
217
+ "model.language_model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
218
+ "model.language_model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.language_model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
220
+ "model.language_model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
221
+ "model.language_model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
222
+ "model.language_model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
223
+ "model.language_model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
224
+ "model.language_model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
225
+ "model.language_model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
226
+ "model.language_model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
227
+ "model.language_model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
228
+ "model.language_model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
229
+ "model.language_model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
230
+ "model.language_model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
231
+ "model.language_model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
232
+ "model.language_model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
233
+ "model.language_model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
234
+ "model.language_model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
235
+ "model.language_model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
236
+ "model.language_model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
237
+ "model.language_model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
238
+ "model.language_model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
239
+ "model.language_model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
240
+ "model.language_model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
241
+ "model.language_model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
242
+ "model.language_model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
243
+ "model.language_model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
244
+ "model.language_model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
245
+ "model.language_model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
246
+ "model.language_model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
247
+ "model.language_model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
248
+ "model.language_model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
249
+ "model.language_model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
250
+ "model.language_model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
251
+ "model.language_model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
252
+ "model.language_model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
253
+ "model.language_model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
254
+ "model.language_model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
255
+ "model.language_model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
256
+ "model.language_model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
257
+ "model.language_model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
258
+ "model.language_model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
259
+ "model.language_model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
260
+ "model.language_model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
261
+ "model.language_model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
262
+ "model.language_model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
263
+ "model.language_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
264
+ "model.language_model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
265
+ "model.language_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
266
+ "model.language_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
267
+ "model.language_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
268
+ "model.language_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
269
+ "model.language_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
270
+ "model.language_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
271
+ "model.language_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
272
+ "model.language_model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
273
+ "model.language_model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
274
+ "model.language_model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
275
+ "model.language_model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
276
+ "model.language_model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
277
+ "model.language_model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
278
+ "model.language_model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
279
+ "model.language_model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
280
+ "model.language_model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
281
+ "model.language_model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
282
+ "model.language_model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
283
+ "model.language_model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
284
+ "model.language_model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
285
+ "model.language_model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
286
+ "model.language_model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
287
+ "model.language_model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
288
+ "model.language_model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
289
+ "model.language_model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
290
+ "model.language_model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
291
+ "model.language_model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
292
+ "model.language_model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
293
+ "model.language_model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
294
+ "model.language_model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
295
+ "model.language_model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
296
+ "model.language_model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
297
+ "model.language_model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
298
+ "model.language_model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
299
+ "model.language_model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
300
+ "model.language_model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
301
+ "model.language_model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
302
+ "model.language_model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
303
+ "model.language_model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
304
+ "model.language_model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
305
+ "model.language_model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
306
+ "model.language_model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
307
+ "model.language_model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
308
+ "model.language_model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
309
+ "model.language_model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
310
+ "model.language_model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
311
+ "model.language_model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
312
+ "model.language_model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
313
+ "model.language_model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
314
+ "model.language_model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
315
+ "model.language_model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
316
+ "model.language_model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
317
+ "model.language_model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
318
+ "model.language_model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
319
+ "model.language_model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
320
+ "model.language_model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
321
+ "model.language_model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
322
+ "model.language_model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
323
+ "model.language_model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
324
+ "model.language_model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
325
+ "model.language_model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
326
+ "model.language_model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
327
+ "model.language_model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
328
+ "model.language_model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
329
+ "model.language_model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
330
+ "model.language_model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
331
+ "model.language_model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
332
+ "model.language_model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
333
+ "model.language_model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
334
+ "model.language_model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
335
+ "model.language_model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
336
+ "model.language_model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
337
+ "model.language_model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
338
+ "model.language_model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
339
+ "model.language_model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
340
+ "model.language_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
341
+ "model.language_model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
342
+ "model.language_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
343
+ "model.language_model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
344
+ "model.language_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
345
+ "model.language_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
346
+ "model.language_model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
347
+ "model.language_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
348
+ "model.language_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
349
+ "model.language_model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
350
+ "model.language_model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
351
+ "model.language_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
352
+ "model.language_model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
353
+ "model.language_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
354
+ "model.language_model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
355
+ "model.language_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
356
+ "model.language_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
357
+ "model.language_model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
358
+ "model.language_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
359
+ "model.language_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
360
+ "model.language_model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
361
+ "model.language_model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
362
+ "model.language_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
363
+ "model.language_model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
364
+ "model.language_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
365
+ "model.language_model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
366
+ "model.language_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
367
+ "model.language_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
368
+ "model.language_model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
369
+ "model.language_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
370
+ "model.language_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
371
+ "model.language_model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
372
+ "model.language_model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
373
+ "model.language_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
374
+ "model.language_model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
375
+ "model.language_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
376
+ "model.language_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
377
+ "model.language_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
378
+ "model.language_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
379
+ "model.language_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
380
+ "model.language_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
381
+ "model.language_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
382
+ "model.language_model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
383
+ "model.language_model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
384
+ "model.language_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
385
+ "model.language_model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
386
+ "model.language_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
387
+ "model.language_model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
388
+ "model.language_model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
389
+ "model.language_model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
390
+ "model.language_model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
391
+ "model.language_model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
392
+ "model.language_model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
393
+ "model.language_model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
394
+ "model.language_model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
395
+ "model.language_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
396
+ "model.language_model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
397
+ "model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
398
+ "model.language_model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
399
+ "model.language_model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
400
+ "model.language_model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
401
+ "model.language_model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
402
+ "model.language_model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
403
+ "model.language_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
404
+ "model.language_model.norm.weight": "model-00002-of-00002.safetensors",
405
+ "model.visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
406
+ "model.visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
407
+ "model.visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
408
+ "model.visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
409
+ "model.visual.blocks.0.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
410
+ "model.visual.blocks.0.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
411
+ "model.visual.blocks.0.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
412
+ "model.visual.blocks.0.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
413
+ "model.visual.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
414
+ "model.visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
415
+ "model.visual.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
416
+ "model.visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
417
+ "model.visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
418
+ "model.visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
419
+ "model.visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
420
+ "model.visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
421
+ "model.visual.blocks.1.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
422
+ "model.visual.blocks.1.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
423
+ "model.visual.blocks.1.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
424
+ "model.visual.blocks.1.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
425
+ "model.visual.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
426
+ "model.visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
427
+ "model.visual.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
428
+ "model.visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
429
+ "model.visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
430
+ "model.visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
431
+ "model.visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
432
+ "model.visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
433
+ "model.visual.blocks.10.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
434
+ "model.visual.blocks.10.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
435
+ "model.visual.blocks.10.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
436
+ "model.visual.blocks.10.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
437
+ "model.visual.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
438
+ "model.visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
439
+ "model.visual.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
440
+ "model.visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
441
+ "model.visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
442
+ "model.visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
443
+ "model.visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
444
+ "model.visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
445
+ "model.visual.blocks.11.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
446
+ "model.visual.blocks.11.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
447
+ "model.visual.blocks.11.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
448
+ "model.visual.blocks.11.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
449
+ "model.visual.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
450
+ "model.visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
451
+ "model.visual.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
452
+ "model.visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
453
+ "model.visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
454
+ "model.visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
455
+ "model.visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
456
+ "model.visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
457
+ "model.visual.blocks.12.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
458
+ "model.visual.blocks.12.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
459
+ "model.visual.blocks.12.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
460
+ "model.visual.blocks.12.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
461
+ "model.visual.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
462
+ "model.visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
463
+ "model.visual.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
464
+ "model.visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
465
+ "model.visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
466
+ "model.visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
467
+ "model.visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
468
+ "model.visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
469
+ "model.visual.blocks.13.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
470
+ "model.visual.blocks.13.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
471
+ "model.visual.blocks.13.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
472
+ "model.visual.blocks.13.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
473
+ "model.visual.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
474
+ "model.visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
475
+ "model.visual.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
476
+ "model.visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
477
+ "model.visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
478
+ "model.visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
479
+ "model.visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
480
+ "model.visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
481
+ "model.visual.blocks.14.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
482
+ "model.visual.blocks.14.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
483
+ "model.visual.blocks.14.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
484
+ "model.visual.blocks.14.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
485
+ "model.visual.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
486
+ "model.visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
487
+ "model.visual.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
488
+ "model.visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
489
+ "model.visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
490
+ "model.visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
491
+ "model.visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
492
+ "model.visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
493
+ "model.visual.blocks.15.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
494
+ "model.visual.blocks.15.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
495
+ "model.visual.blocks.15.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
496
+ "model.visual.blocks.15.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
497
+ "model.visual.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
498
+ "model.visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
499
+ "model.visual.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
500
+ "model.visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
501
+ "model.visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
502
+ "model.visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
503
+ "model.visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
504
+ "model.visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
505
+ "model.visual.blocks.16.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
506
+ "model.visual.blocks.16.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
507
+ "model.visual.blocks.16.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
508
+ "model.visual.blocks.16.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
509
+ "model.visual.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
510
+ "model.visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
511
+ "model.visual.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
512
+ "model.visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
513
+ "model.visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
514
+ "model.visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
515
+ "model.visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
516
+ "model.visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
517
+ "model.visual.blocks.17.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
518
+ "model.visual.blocks.17.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
519
+ "model.visual.blocks.17.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
520
+ "model.visual.blocks.17.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
521
+ "model.visual.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
522
+ "model.visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
523
+ "model.visual.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
524
+ "model.visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
525
+ "model.visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
526
+ "model.visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
527
+ "model.visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
528
+ "model.visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
529
+ "model.visual.blocks.18.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
530
+ "model.visual.blocks.18.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
531
+ "model.visual.blocks.18.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
532
+ "model.visual.blocks.18.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
533
+ "model.visual.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
534
+ "model.visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
535
+ "model.visual.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
536
+ "model.visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
537
+ "model.visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
538
+ "model.visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
539
+ "model.visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
540
+ "model.visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
541
+ "model.visual.blocks.19.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
542
+ "model.visual.blocks.19.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
543
+ "model.visual.blocks.19.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
544
+ "model.visual.blocks.19.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
545
+ "model.visual.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
546
+ "model.visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
547
+ "model.visual.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
548
+ "model.visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
549
+ "model.visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
550
+ "model.visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
551
+ "model.visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
552
+ "model.visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
553
+ "model.visual.blocks.2.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
554
+ "model.visual.blocks.2.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
555
+ "model.visual.blocks.2.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
556
+ "model.visual.blocks.2.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
557
+ "model.visual.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
558
+ "model.visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
559
+ "model.visual.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
560
+ "model.visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
561
+ "model.visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
562
+ "model.visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
563
+ "model.visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
564
+ "model.visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
565
+ "model.visual.blocks.20.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
566
+ "model.visual.blocks.20.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
567
+ "model.visual.blocks.20.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
568
+ "model.visual.blocks.20.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
569
+ "model.visual.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
570
+ "model.visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
571
+ "model.visual.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
572
+ "model.visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
573
+ "model.visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
574
+ "model.visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
575
+ "model.visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
576
+ "model.visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
577
+ "model.visual.blocks.21.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
578
+ "model.visual.blocks.21.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
579
+ "model.visual.blocks.21.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
580
+ "model.visual.blocks.21.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
581
+ "model.visual.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
582
+ "model.visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
583
+ "model.visual.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
584
+ "model.visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
585
+ "model.visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
586
+ "model.visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
587
+ "model.visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
588
+ "model.visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
589
+ "model.visual.blocks.22.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
590
+ "model.visual.blocks.22.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
591
+ "model.visual.blocks.22.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
592
+ "model.visual.blocks.22.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
593
+ "model.visual.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
594
+ "model.visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
595
+ "model.visual.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
596
+ "model.visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
597
+ "model.visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
598
+ "model.visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
599
+ "model.visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
600
+ "model.visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
601
+ "model.visual.blocks.23.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
602
+ "model.visual.blocks.23.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
603
+ "model.visual.blocks.23.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
604
+ "model.visual.blocks.23.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
605
+ "model.visual.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
606
+ "model.visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
607
+ "model.visual.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
608
+ "model.visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
609
+ "model.visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
610
+ "model.visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
611
+ "model.visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
612
+ "model.visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
613
+ "model.visual.blocks.3.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
614
+ "model.visual.blocks.3.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
615
+ "model.visual.blocks.3.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
616
+ "model.visual.blocks.3.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
617
+ "model.visual.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
618
+ "model.visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
619
+ "model.visual.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
620
+ "model.visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
621
+ "model.visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
622
+ "model.visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
623
+ "model.visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
624
+ "model.visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
625
+ "model.visual.blocks.4.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
626
+ "model.visual.blocks.4.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
627
+ "model.visual.blocks.4.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
628
+ "model.visual.blocks.4.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
629
+ "model.visual.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
630
+ "model.visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
631
+ "model.visual.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
632
+ "model.visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
633
+ "model.visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
634
+ "model.visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
635
+ "model.visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
636
+ "model.visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
637
+ "model.visual.blocks.5.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
638
+ "model.visual.blocks.5.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
639
+ "model.visual.blocks.5.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
640
+ "model.visual.blocks.5.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
641
+ "model.visual.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
642
+ "model.visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
643
+ "model.visual.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
644
+ "model.visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
645
+ "model.visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
646
+ "model.visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
647
+ "model.visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
648
+ "model.visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
649
+ "model.visual.blocks.6.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
650
+ "model.visual.blocks.6.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
651
+ "model.visual.blocks.6.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
652
+ "model.visual.blocks.6.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
653
+ "model.visual.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
654
+ "model.visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
655
+ "model.visual.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
656
+ "model.visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
657
+ "model.visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
658
+ "model.visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
659
+ "model.visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
660
+ "model.visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
661
+ "model.visual.blocks.7.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
662
+ "model.visual.blocks.7.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
663
+ "model.visual.blocks.7.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
664
+ "model.visual.blocks.7.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
665
+ "model.visual.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
666
+ "model.visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
667
+ "model.visual.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
668
+ "model.visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
669
+ "model.visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
670
+ "model.visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
671
+ "model.visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
672
+ "model.visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
673
+ "model.visual.blocks.8.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
674
+ "model.visual.blocks.8.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
675
+ "model.visual.blocks.8.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
676
+ "model.visual.blocks.8.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
677
+ "model.visual.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
678
+ "model.visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
679
+ "model.visual.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
680
+ "model.visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
681
+ "model.visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
682
+ "model.visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
683
+ "model.visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
684
+ "model.visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
685
+ "model.visual.blocks.9.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
686
+ "model.visual.blocks.9.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
687
+ "model.visual.blocks.9.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
688
+ "model.visual.blocks.9.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
689
+ "model.visual.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
690
+ "model.visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
691
+ "model.visual.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
692
+ "model.visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
693
+ "model.visual.deepstack_merger_list.0.linear_fc1.bias": "model-00001-of-00002.safetensors",
694
+ "model.visual.deepstack_merger_list.0.linear_fc1.weight": "model-00001-of-00002.safetensors",
695
+ "model.visual.deepstack_merger_list.0.linear_fc2.bias": "model-00001-of-00002.safetensors",
696
+ "model.visual.deepstack_merger_list.0.linear_fc2.weight": "model-00001-of-00002.safetensors",
697
+ "model.visual.deepstack_merger_list.0.norm.bias": "model-00001-of-00002.safetensors",
698
+ "model.visual.deepstack_merger_list.0.norm.weight": "model-00001-of-00002.safetensors",
699
+ "model.visual.deepstack_merger_list.1.linear_fc1.bias": "model-00001-of-00002.safetensors",
700
+ "model.visual.deepstack_merger_list.1.linear_fc1.weight": "model-00001-of-00002.safetensors",
701
+ "model.visual.deepstack_merger_list.1.linear_fc2.bias": "model-00001-of-00002.safetensors",
702
+ "model.visual.deepstack_merger_list.1.linear_fc2.weight": "model-00001-of-00002.safetensors",
703
+ "model.visual.deepstack_merger_list.1.norm.bias": "model-00001-of-00002.safetensors",
704
+ "model.visual.deepstack_merger_list.1.norm.weight": "model-00001-of-00002.safetensors",
705
+ "model.visual.deepstack_merger_list.2.linear_fc1.bias": "model-00001-of-00002.safetensors",
706
+ "model.visual.deepstack_merger_list.2.linear_fc1.weight": "model-00001-of-00002.safetensors",
707
+ "model.visual.deepstack_merger_list.2.linear_fc2.bias": "model-00001-of-00002.safetensors",
708
+ "model.visual.deepstack_merger_list.2.linear_fc2.weight": "model-00001-of-00002.safetensors",
709
+ "model.visual.deepstack_merger_list.2.norm.bias": "model-00001-of-00002.safetensors",
710
+ "model.visual.deepstack_merger_list.2.norm.weight": "model-00001-of-00002.safetensors",
711
+ "model.visual.merger.linear_fc1.bias": "model-00001-of-00002.safetensors",
712
+ "model.visual.merger.linear_fc1.weight": "model-00001-of-00002.safetensors",
713
+ "model.visual.merger.linear_fc2.bias": "model-00001-of-00002.safetensors",
714
+ "model.visual.merger.linear_fc2.weight": "model-00001-of-00002.safetensors",
715
+ "model.visual.merger.norm.bias": "model-00001-of-00002.safetensors",
716
+ "model.visual.merger.norm.weight": "model-00001-of-00002.safetensors",
717
+ "model.visual.patch_embed.proj.bias": "model-00001-of-00002.safetensors",
718
+ "model.visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors",
719
+ "model.visual.pos_embed.weight": "model-00001-of-00002.safetensors"
720
+ }
721
+ }
grpo-nADE-format-RC/checkpoint-417/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
grpo-nADE-format-RC/checkpoint-417/tokenizer_config.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "max_length": null,
235
+ "model_max_length": 262144,
236
+ "pad_to_multiple_of": null,
237
+ "pad_token": "<|endoftext|>",
238
+ "pad_token_type_id": 0,
239
+ "padding_side": "left",
240
+ "processor_class": "Qwen3VLProcessor",
241
+ "split_special_tokens": false,
242
+ "tokenizer_class": "Qwen2Tokenizer",
243
+ "unk_token": null
244
+ }
grpo-nADE-format-RC/checkpoint-417/trainer_state.json ADDED
@@ -0,0 +1,1305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.06255625562556255,
6
+ "eval_steps": 100,
7
+ "global_step": 417,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "clip_ratio/high_max": 0.0,
14
+ "clip_ratio/high_mean": 0.0,
15
+ "clip_ratio/low_mean": 0.0,
16
+ "clip_ratio/low_min": 0.0,
17
+ "clip_ratio/region_mean": 0.0,
18
+ "completions/clipped_ratio": 0.03750000149011612,
19
+ "completions/max_length": 1533.5,
20
+ "completions/max_terminated_length": 1427.6,
21
+ "completions/mean_length": 820.7687622070313,
22
+ "completions/mean_terminated_length": 795.4251159667969,
23
+ "completions/min_length": 119.1,
24
+ "completions/min_terminated_length": 119.1,
25
+ "entropy": 0.6458343416452408,
26
+ "epoch": 0.0015001500150015,
27
+ "frac_reward_zero_std": 0.0,
28
+ "grad_norm": 1.0625,
29
+ "learning_rate": 9.784172661870503e-07,
30
+ "loss": 0.111,
31
+ "num_tokens": 1052641.0,
32
+ "reward": 0.15892810765653848,
33
+ "reward_std": 1.2357513666152955,
34
+ "rewards/ADEnReward/mean": 0.011186909227399155,
35
+ "rewards/ADEnReward/std": 0.03507473061326891,
36
+ "rewards/ReasoningConfidenceReward/mean": -0.19809213168919088,
37
+ "rewards/ReasoningConfidenceReward/std": 0.45118741542100904,
38
+ "rewards/StrictFormatReward/mean": 0.345833333209157,
39
+ "rewards/StrictFormatReward/std": 0.8626526802778244,
40
+ "step": 10,
41
+ "step_time": 66.8766707284376
42
+ },
43
+ {
44
+ "clip_ratio/high_max": 0.0,
45
+ "clip_ratio/high_mean": 0.0,
46
+ "clip_ratio/low_mean": 0.0,
47
+ "clip_ratio/low_min": 0.0,
48
+ "clip_ratio/region_mean": 0.0,
49
+ "completions/clipped_ratio": 0.01875000037252903,
50
+ "completions/max_length": 1258.4,
51
+ "completions/max_terminated_length": 1106.6,
52
+ "completions/mean_length": 743.8416809082031,
53
+ "completions/mean_terminated_length": 728.8905517578125,
54
+ "completions/min_length": 163.6,
55
+ "completions/min_terminated_length": 163.6,
56
+ "entropy": 0.6466913104057312,
57
+ "epoch": 0.003000300030003,
58
+ "frac_reward_zero_std": 0.0,
59
+ "grad_norm": 1.15625,
60
+ "learning_rate": 9.544364508393285e-07,
61
+ "loss": 0.0171,
62
+ "num_tokens": 2068565.0,
63
+ "reward": 0.8077859580516815,
64
+ "reward_std": 0.8083356320858002,
65
+ "rewards/ADEnReward/mean": 0.03084552166983485,
66
+ "rewards/ADEnReward/std": 0.07702018767595291,
67
+ "rewards/ReasoningConfidenceReward/mean": 0.018607060704380275,
68
+ "rewards/ReasoningConfidenceReward/std": 0.28907840102911,
69
+ "rewards/StrictFormatReward/mean": 0.7583333432674408,
70
+ "rewards/StrictFormatReward/std": 0.6329713940620423,
71
+ "step": 20,
72
+ "step_time": 52.03955397913232
73
+ },
74
+ {
75
+ "clip_ratio/high_max": 0.0,
76
+ "clip_ratio/high_mean": 0.0,
77
+ "clip_ratio/low_mean": 0.0,
78
+ "clip_ratio/low_min": 0.0,
79
+ "clip_ratio/region_mean": 0.0,
80
+ "completions/clipped_ratio": 0.002083333395421505,
81
+ "completions/max_length": 1179.2,
82
+ "completions/max_terminated_length": 1103.8,
83
+ "completions/mean_length": 735.7812744140625,
84
+ "completions/mean_terminated_length": 734.0838012695312,
85
+ "completions/min_length": 424.2,
86
+ "completions/min_terminated_length": 424.2,
87
+ "entropy": 0.6348762333393096,
88
+ "epoch": 0.004500450045004501,
89
+ "frac_reward_zero_std": 0.0,
90
+ "grad_norm": 1.0546875,
91
+ "learning_rate": 9.304556354916066e-07,
92
+ "loss": 0.0134,
93
+ "num_tokens": 3080012.0,
94
+ "reward": 0.9798760175704956,
95
+ "reward_std": 0.5248558193445205,
96
+ "rewards/ADEnReward/mean": 0.032167868409305814,
97
+ "rewards/ADEnReward/std": 0.0745716668665409,
98
+ "rewards/ReasoningConfidenceReward/mean": 0.06437477525323629,
99
+ "rewards/ReasoningConfidenceReward/std": 0.19101330041885375,
100
+ "rewards/StrictFormatReward/mean": 0.8833333373069763,
101
+ "rewards/StrictFormatReward/std": 0.45989986062049865,
102
+ "step": 30,
103
+ "step_time": 47.932181040663274
104
+ },
105
+ {
106
+ "clip_ratio/high_max": 0.0,
107
+ "clip_ratio/high_mean": 0.0,
108
+ "clip_ratio/low_mean": 0.0,
109
+ "clip_ratio/low_min": 0.0,
110
+ "clip_ratio/region_mean": 0.0,
111
+ "completions/clipped_ratio": 0.002083333395421505,
112
+ "completions/max_length": 1017.0,
113
+ "completions/max_terminated_length": 946.0,
114
+ "completions/mean_length": 739.5562744140625,
115
+ "completions/mean_terminated_length": 737.8339294433594,
116
+ "completions/min_length": 563.2,
117
+ "completions/min_terminated_length": 563.2,
118
+ "entropy": 0.6441138684749603,
119
+ "epoch": 0.006000600060006,
120
+ "frac_reward_zero_std": 0.0,
121
+ "grad_norm": 0.98046875,
122
+ "learning_rate": 9.064748201438849e-07,
123
+ "loss": 0.0049,
124
+ "num_tokens": 4093959.0,
125
+ "reward": 1.0465242981910705,
126
+ "reward_std": 0.3045470409095287,
127
+ "rewards/ADEnReward/mean": 0.03476252369582653,
128
+ "rewards/ADEnReward/std": 0.07538308277726173,
129
+ "rewards/ReasoningConfidenceReward/mean": 0.07009507827460766,
130
+ "rewards/ReasoningConfidenceReward/std": 0.11509535983204841,
131
+ "rewards/StrictFormatReward/mean": 0.9416666626930237,
132
+ "rewards/StrictFormatReward/std": 0.2242635190486908,
133
+ "step": 40,
134
+ "step_time": 40.8255105547607
135
+ },
136
+ {
137
+ "clip_ratio/high_max": 0.0,
138
+ "clip_ratio/high_mean": 0.0,
139
+ "clip_ratio/low_mean": 0.0,
140
+ "clip_ratio/low_min": 0.0,
141
+ "clip_ratio/region_mean": 0.0,
142
+ "completions/clipped_ratio": 0.0,
143
+ "completions/max_length": 866.4,
144
+ "completions/max_terminated_length": 866.4,
145
+ "completions/mean_length": 733.5812683105469,
146
+ "completions/mean_terminated_length": 733.5812683105469,
147
+ "completions/min_length": 616.2,
148
+ "completions/min_terminated_length": 616.2,
149
+ "entropy": 0.652310574054718,
150
+ "epoch": 0.007500750075007501,
151
+ "frac_reward_zero_std": 0.0,
152
+ "grad_norm": 1.078125,
153
+ "learning_rate": 8.82494004796163e-07,
154
+ "loss": -0.0002,
155
+ "num_tokens": 5104478.0,
156
+ "reward": 1.1200557351112366,
157
+ "reward_std": 0.18049246706068517,
158
+ "rewards/ADEnReward/mean": 0.032199547812342647,
159
+ "rewards/ADEnReward/std": 0.08372207283973694,
160
+ "rewards/ReasoningConfidenceReward/mean": 0.10868950486183167,
161
+ "rewards/ReasoningConfidenceReward/std": 0.09046642743051052,
162
+ "rewards/StrictFormatReward/mean": 0.9791666686534881,
163
+ "rewards/StrictFormatReward/std": 0.10964388847351074,
164
+ "step": 50,
165
+ "step_time": 34.72850414663553
166
+ },
167
+ {
168
+ "clip_ratio/high_max": 0.0,
169
+ "clip_ratio/high_mean": 0.0,
170
+ "clip_ratio/low_mean": 0.0,
171
+ "clip_ratio/low_min": 0.0,
172
+ "clip_ratio/region_mean": 0.0,
173
+ "completions/clipped_ratio": 0.0,
174
+ "completions/max_length": 820.2,
175
+ "completions/max_terminated_length": 820.2,
176
+ "completions/mean_length": 734.4875244140625,
177
+ "completions/mean_terminated_length": 734.4875244140625,
178
+ "completions/min_length": 597.2,
179
+ "completions/min_terminated_length": 597.2,
180
+ "entropy": 0.6447544604539871,
181
+ "epoch": 0.009000900090009001,
182
+ "frac_reward_zero_std": 0.0,
183
+ "grad_norm": 0.8671875,
184
+ "learning_rate": 8.585131894484412e-07,
185
+ "loss": -0.014,
186
+ "num_tokens": 6115528.0,
187
+ "reward": 1.0801176726818085,
188
+ "reward_std": 0.2073265790939331,
189
+ "rewards/ADEnReward/mean": 0.024589571449905635,
190
+ "rewards/ADEnReward/std": 0.05969331655651331,
191
+ "rewards/ReasoningConfidenceReward/mean": 0.09302806071937084,
192
+ "rewards/ReasoningConfidenceReward/std": 0.08853670731186866,
193
+ "rewards/StrictFormatReward/mean": 0.9624999940395356,
194
+ "rewards/StrictFormatReward/std": 0.22511394023895265,
195
+ "step": 60,
196
+ "step_time": 34.7001038627699
197
+ },
198
+ {
199
+ "clip_ratio/high_max": 0.0,
200
+ "clip_ratio/high_mean": 0.0,
201
+ "clip_ratio/low_mean": 0.0,
202
+ "clip_ratio/low_min": 0.0,
203
+ "clip_ratio/region_mean": 0.0,
204
+ "completions/clipped_ratio": 0.002083333395421505,
205
+ "completions/max_length": 887.8,
206
+ "completions/max_terminated_length": 880.9,
207
+ "completions/mean_length": 739.214599609375,
208
+ "completions/mean_terminated_length": 737.5825012207031,
209
+ "completions/min_length": 674.7,
210
+ "completions/min_terminated_length": 674.7,
211
+ "entropy": 0.648271444439888,
212
+ "epoch": 0.010501050105010502,
213
+ "frac_reward_zero_std": 0.0,
214
+ "grad_norm": 1.046875,
215
+ "learning_rate": 8.345323741007194e-07,
216
+ "loss": 0.0095,
217
+ "num_tokens": 7128591.0,
218
+ "reward": 1.117066776752472,
219
+ "reward_std": 0.14670775569975375,
220
+ "rewards/ADEnReward/mean": 0.030741326790302993,
221
+ "rewards/ADEnReward/std": 0.07235845774412156,
222
+ "rewards/ReasoningConfidenceReward/mean": 0.10299204997718334,
223
+ "rewards/ReasoningConfidenceReward/std": 0.07768557965755463,
224
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
225
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
226
+ "step": 70,
227
+ "step_time": 37.97550033703446
228
+ },
229
+ {
230
+ "clip_ratio/high_max": 0.0,
231
+ "clip_ratio/high_mean": 0.0,
232
+ "clip_ratio/low_mean": 0.0,
233
+ "clip_ratio/low_min": 0.0,
234
+ "clip_ratio/region_mean": 0.0,
235
+ "completions/clipped_ratio": 0.0,
236
+ "completions/max_length": 871.3,
237
+ "completions/max_terminated_length": 871.3,
238
+ "completions/mean_length": 735.6625244140625,
239
+ "completions/mean_terminated_length": 735.6625244140625,
240
+ "completions/min_length": 663.4,
241
+ "completions/min_terminated_length": 663.4,
242
+ "entropy": 0.6467163026332855,
243
+ "epoch": 0.012001200120012,
244
+ "frac_reward_zero_std": 0.0,
245
+ "grad_norm": 1.078125,
246
+ "learning_rate": 8.105515587529975e-07,
247
+ "loss": 0.0099,
248
+ "num_tokens": 8140093.0,
249
+ "reward": 1.124228584766388,
250
+ "reward_std": 0.1685192134231329,
251
+ "rewards/ADEnReward/mean": 0.03326874002814293,
252
+ "rewards/ADEnReward/std": 0.07868262981064618,
253
+ "rewards/ReasoningConfidenceReward/mean": 0.10762646868824959,
254
+ "rewards/ReasoningConfidenceReward/std": 0.08494675308465957,
255
+ "rewards/StrictFormatReward/mean": 0.9833333373069764,
256
+ "rewards/StrictFormatReward/std": 0.08077637553215027,
257
+ "step": 80,
258
+ "step_time": 35.69703020621091
259
+ },
260
+ {
261
+ "clip_ratio/high_max": 0.0,
262
+ "clip_ratio/high_mean": 0.0,
263
+ "clip_ratio/low_mean": 0.0,
264
+ "clip_ratio/low_min": 0.0,
265
+ "clip_ratio/region_mean": 0.0,
266
+ "completions/clipped_ratio": 0.0,
267
+ "completions/max_length": 799.3,
268
+ "completions/max_terminated_length": 799.3,
269
+ "completions/mean_length": 730.2937683105469,
270
+ "completions/mean_terminated_length": 730.2937683105469,
271
+ "completions/min_length": 623.3,
272
+ "completions/min_terminated_length": 623.3,
273
+ "entropy": 0.6420892357826233,
274
+ "epoch": 0.013501350135013501,
275
+ "frac_reward_zero_std": 0.0,
276
+ "grad_norm": 0.828125,
277
+ "learning_rate": 7.865707434052757e-07,
278
+ "loss": -0.0098,
279
+ "num_tokens": 9148426.0,
280
+ "reward": 1.122767400741577,
281
+ "reward_std": 0.154670562595129,
282
+ "rewards/ADEnReward/mean": 0.03107238719239831,
283
+ "rewards/ADEnReward/std": 0.07060995940119028,
284
+ "rewards/ReasoningConfidenceReward/mean": 0.10836165957152843,
285
+ "rewards/ReasoningConfidenceReward/std": 0.07862687073647975,
286
+ "rewards/StrictFormatReward/mean": 0.9833333253860473,
287
+ "rewards/StrictFormatReward/std": 0.1154700517654419,
288
+ "step": 90,
289
+ "step_time": 34.06732882745564
290
+ },
291
+ {
292
+ "clip_ratio/high_max": 0.0,
293
+ "clip_ratio/high_mean": 0.0,
294
+ "clip_ratio/low_mean": 0.0,
295
+ "clip_ratio/low_min": 0.0,
296
+ "clip_ratio/region_mean": 0.0,
297
+ "completions/clipped_ratio": 0.0,
298
+ "completions/max_length": 886.6,
299
+ "completions/max_terminated_length": 886.6,
300
+ "completions/mean_length": 732.9125305175781,
301
+ "completions/mean_terminated_length": 732.9125305175781,
302
+ "completions/min_length": 674.0,
303
+ "completions/min_terminated_length": 674.0,
304
+ "entropy": 0.6424726009368896,
305
+ "epoch": 0.015001500150015001,
306
+ "frac_reward_zero_std": 0.0,
307
+ "grad_norm": 0.921875,
308
+ "learning_rate": 7.62589928057554e-07,
309
+ "loss": 0.0087,
310
+ "num_tokens": 10158000.0,
311
+ "reward": 1.1287578463554382,
312
+ "reward_std": 0.16896428540349007,
313
+ "rewards/ADEnReward/mean": 0.042021069768816234,
314
+ "rewards/ADEnReward/std": 0.08718259073793888,
315
+ "rewards/ReasoningConfidenceReward/mean": 0.10757005885243416,
316
+ "rewards/ReasoningConfidenceReward/std": 0.0734778918325901,
317
+ "rewards/StrictFormatReward/mean": 0.9791666626930237,
318
+ "rewards/StrictFormatReward/std": 0.12699072659015656,
319
+ "step": 100,
320
+ "step_time": 35.849342082161456
321
+ },
322
+ {
323
+ "clip_ratio/high_max": 0.0,
324
+ "clip_ratio/high_mean": 0.0,
325
+ "clip_ratio/low_mean": 0.0,
326
+ "clip_ratio/low_min": 0.0,
327
+ "clip_ratio/region_mean": 0.0,
328
+ "completions/clipped_ratio": 0.002083333395421505,
329
+ "completions/max_length": 922.1,
330
+ "completions/max_terminated_length": 847.3,
331
+ "completions/mean_length": 736.8729309082031,
332
+ "completions/mean_terminated_length": 735.1981018066406,
333
+ "completions/min_length": 680.7,
334
+ "completions/min_terminated_length": 680.7,
335
+ "entropy": 0.6371437162160873,
336
+ "epoch": 0.0165016501650165,
337
+ "frac_reward_zero_std": 0.0,
338
+ "grad_norm": 0.84375,
339
+ "learning_rate": 7.386091127098321e-07,
340
+ "loss": 0.01,
341
+ "num_tokens": 11170099.0,
342
+ "reward": 1.1073094844818114,
343
+ "reward_std": 0.17044325098395346,
344
+ "rewards/ADEnReward/mean": 0.018601356376893818,
345
+ "rewards/ADEnReward/std": 0.05055182706564665,
346
+ "rewards/ReasoningConfidenceReward/mean": 0.11370811760425567,
347
+ "rewards/ReasoningConfidenceReward/std": 0.07379961647093296,
348
+ "rewards/StrictFormatReward/mean": 0.9749999940395355,
349
+ "rewards/StrictFormatReward/std": 0.15585823953151703,
350
+ "step": 110,
351
+ "step_time": 38.995268660690634
352
+ },
353
+ {
354
+ "clip_ratio/high_max": 0.0,
355
+ "clip_ratio/high_mean": 0.0,
356
+ "clip_ratio/low_mean": 0.0,
357
+ "clip_ratio/low_min": 0.0,
358
+ "clip_ratio/region_mean": 0.0,
359
+ "completions/clipped_ratio": 0.002083333395421505,
360
+ "completions/max_length": 956.2,
361
+ "completions/max_terminated_length": 884.7,
362
+ "completions/mean_length": 741.245849609375,
363
+ "completions/mean_terminated_length": 739.5868896484375,
364
+ "completions/min_length": 685.4,
365
+ "completions/min_terminated_length": 685.4,
366
+ "entropy": 0.6423951655626297,
367
+ "epoch": 0.018001800180018002,
368
+ "frac_reward_zero_std": 0.0,
369
+ "grad_norm": 1.03125,
370
+ "learning_rate": 7.146282973621102e-07,
371
+ "loss": 0.01,
372
+ "num_tokens": 12184361.0,
373
+ "reward": 1.127052104473114,
374
+ "reward_std": 0.1497463181614876,
375
+ "rewards/ADEnReward/mean": 0.029763074405491353,
376
+ "rewards/ADEnReward/std": 0.07583294808864594,
377
+ "rewards/ReasoningConfidenceReward/mean": 0.10978899747133256,
378
+ "rewards/ReasoningConfidenceReward/std": 0.08882112912833691,
379
+ "rewards/StrictFormatReward/mean": 0.9874999940395355,
380
+ "rewards/StrictFormatReward/std": 0.08660253882408142,
381
+ "step": 120,
382
+ "step_time": 39.55284757846967
383
+ },
384
+ {
385
+ "clip_ratio/high_max": 0.0,
386
+ "clip_ratio/high_mean": 0.0,
387
+ "clip_ratio/low_mean": 0.0,
388
+ "clip_ratio/low_min": 0.0,
389
+ "clip_ratio/region_mean": 0.0,
390
+ "completions/clipped_ratio": 0.002083333395421505,
391
+ "completions/max_length": 889.2,
392
+ "completions/max_terminated_length": 814.3,
393
+ "completions/mean_length": 738.2166931152344,
394
+ "completions/mean_terminated_length": 736.5271484375,
395
+ "completions/min_length": 684.2,
396
+ "completions/min_terminated_length": 684.2,
397
+ "entropy": 0.6385834395885468,
398
+ "epoch": 0.0195019501950195,
399
+ "frac_reward_zero_std": 0.0,
400
+ "grad_norm": 1.109375,
401
+ "learning_rate": 6.906474820143885e-07,
402
+ "loss": 0.0074,
403
+ "num_tokens": 13197457.0,
404
+ "reward": 1.1243727207183838,
405
+ "reward_std": 0.1370793327689171,
406
+ "rewards/ADEnReward/mean": 0.026768459612503646,
407
+ "rewards/ADEnReward/std": 0.0662717854604125,
408
+ "rewards/ReasoningConfidenceReward/mean": 0.11010420471429824,
409
+ "rewards/ReasoningConfidenceReward/std": 0.07703434824943542,
410
+ "rewards/StrictFormatReward/mean": 0.9875,
411
+ "rewards/StrictFormatReward/std": 0.06925570070743561,
412
+ "step": 130,
413
+ "step_time": 38.24893993083388
414
+ },
415
+ {
416
+ "clip_ratio/high_max": 0.0,
417
+ "clip_ratio/high_mean": 0.0,
418
+ "clip_ratio/low_mean": 0.0,
419
+ "clip_ratio/low_min": 0.0,
420
+ "clip_ratio/region_mean": 0.0,
421
+ "completions/clipped_ratio": 0.0,
422
+ "completions/max_length": 812.3,
423
+ "completions/max_terminated_length": 812.3,
424
+ "completions/mean_length": 736.0104370117188,
425
+ "completions/mean_terminated_length": 736.0104370117188,
426
+ "completions/min_length": 633.8,
427
+ "completions/min_terminated_length": 633.8,
428
+ "entropy": 0.6263688296079636,
429
+ "epoch": 0.021002100210021003,
430
+ "frac_reward_zero_std": 0.0,
431
+ "grad_norm": 0.94140625,
432
+ "learning_rate": 6.666666666666666e-07,
433
+ "loss": -0.0068,
434
+ "num_tokens": 14209414.0,
435
+ "reward": 1.1419667840003966,
436
+ "reward_std": 0.15764849670231343,
437
+ "rewards/ADEnReward/mean": 0.04068564581684768,
438
+ "rewards/ADEnReward/std": 0.08432210255414248,
439
+ "rewards/ReasoningConfidenceReward/mean": 0.11794776618480682,
440
+ "rewards/ReasoningConfidenceReward/std": 0.07809726595878601,
441
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
442
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
443
+ "step": 140,
444
+ "step_time": 34.79935124134645
445
+ },
446
+ {
447
+ "clip_ratio/high_max": 0.0,
448
+ "clip_ratio/high_mean": 0.0,
449
+ "clip_ratio/low_mean": 0.0,
450
+ "clip_ratio/low_min": 0.0,
451
+ "clip_ratio/region_mean": 0.0,
452
+ "completions/clipped_ratio": 0.0,
453
+ "completions/max_length": 809.8,
454
+ "completions/max_terminated_length": 809.8,
455
+ "completions/mean_length": 735.3583557128907,
456
+ "completions/mean_terminated_length": 735.3583557128907,
457
+ "completions/min_length": 675.4,
458
+ "completions/min_terminated_length": 675.4,
459
+ "entropy": 0.6429328173398972,
460
+ "epoch": 0.022502250225022502,
461
+ "frac_reward_zero_std": 0.0,
462
+ "grad_norm": 1.1875,
463
+ "learning_rate": 6.426858513189448e-07,
464
+ "loss": -0.0016,
465
+ "num_tokens": 15220674.0,
466
+ "reward": 1.1485553145408631,
467
+ "reward_std": 0.1378554403781891,
468
+ "rewards/ADEnReward/mean": 0.03246476505883038,
469
+ "rewards/ADEnReward/std": 0.07621528403833508,
470
+ "rewards/ReasoningConfidenceReward/mean": 0.1285905048251152,
471
+ "rewards/ReasoningConfidenceReward/std": 0.07357696481049061,
472
+ "rewards/StrictFormatReward/mean": 0.9874999940395355,
473
+ "rewards/StrictFormatReward/std": 0.08660253882408142,
474
+ "step": 150,
475
+ "step_time": 34.40164418127388
476
+ },
477
+ {
478
+ "clip_ratio/high_max": 0.0,
479
+ "clip_ratio/high_mean": 0.0,
480
+ "clip_ratio/low_mean": 0.0,
481
+ "clip_ratio/low_min": 0.0,
482
+ "clip_ratio/region_mean": 0.0,
483
+ "completions/clipped_ratio": 0.0,
484
+ "completions/max_length": 862.8,
485
+ "completions/max_terminated_length": 862.8,
486
+ "completions/mean_length": 736.8479370117187,
487
+ "completions/mean_terminated_length": 736.8479370117187,
488
+ "completions/min_length": 685.0,
489
+ "completions/min_terminated_length": 685.0,
490
+ "entropy": 0.6302657306194306,
491
+ "epoch": 0.024002400240024,
492
+ "frac_reward_zero_std": 0.0,
493
+ "grad_norm": 0.85546875,
494
+ "learning_rate": 6.187050359712231e-07,
495
+ "loss": 0.0054,
496
+ "num_tokens": 16232265.0,
497
+ "reward": 1.1301079392433167,
498
+ "reward_std": 0.12346492633223534,
499
+ "rewards/ADEnReward/mean": 0.02919836761429906,
500
+ "rewards/ADEnReward/std": 0.06346954144537449,
501
+ "rewards/ReasoningConfidenceReward/mean": 0.10924286767840385,
502
+ "rewards/ReasoningConfidenceReward/std": 0.07436333447694779,
503
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
504
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
505
+ "step": 160,
506
+ "step_time": 35.0627255375497
507
+ },
508
+ {
509
+ "clip_ratio/high_max": 0.0,
510
+ "clip_ratio/high_mean": 0.0,
511
+ "clip_ratio/low_mean": 0.0,
512
+ "clip_ratio/low_min": 0.0,
513
+ "clip_ratio/region_mean": 0.0,
514
+ "completions/clipped_ratio": 0.0,
515
+ "completions/max_length": 872.9,
516
+ "completions/max_terminated_length": 872.9,
517
+ "completions/mean_length": 733.4729309082031,
518
+ "completions/mean_terminated_length": 733.4729309082031,
519
+ "completions/min_length": 674.1,
520
+ "completions/min_terminated_length": 674.1,
521
+ "entropy": 0.6332272559404373,
522
+ "epoch": 0.025502550255025503,
523
+ "frac_reward_zero_std": 0.0,
524
+ "grad_norm": 1.4453125,
525
+ "learning_rate": 5.947242206235011e-07,
526
+ "loss": 0.0041,
527
+ "num_tokens": 17241900.0,
528
+ "reward": 1.1381949663162232,
529
+ "reward_std": 0.12290547527372837,
530
+ "rewards/ADEnReward/mean": 0.034653707128018144,
531
+ "rewards/ADEnReward/std": 0.07986385971307755,
532
+ "rewards/ReasoningConfidenceReward/mean": 0.1118745468556881,
533
+ "rewards/ReasoningConfidenceReward/std": 0.07345958650112153,
534
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
535
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
536
+ "step": 170,
537
+ "step_time": 35.15750455642119
538
+ },
539
+ {
540
+ "clip_ratio/high_max": 0.0,
541
+ "clip_ratio/high_mean": 0.0,
542
+ "clip_ratio/low_mean": 0.0,
543
+ "clip_ratio/low_min": 0.0,
544
+ "clip_ratio/region_mean": 0.0,
545
+ "completions/clipped_ratio": 0.0,
546
+ "completions/max_length": 802.8,
547
+ "completions/max_terminated_length": 802.8,
548
+ "completions/mean_length": 735.7437683105469,
549
+ "completions/mean_terminated_length": 735.7437683105469,
550
+ "completions/min_length": 679.2,
551
+ "completions/min_terminated_length": 679.2,
552
+ "entropy": 0.6316021621227265,
553
+ "epoch": 0.027002700270027002,
554
+ "frac_reward_zero_std": 0.0,
555
+ "grad_norm": 0.9921875,
556
+ "learning_rate": 5.707434052757793e-07,
557
+ "loss": -0.0027,
558
+ "num_tokens": 18253441.0,
559
+ "reward": 1.1503783106803893,
560
+ "reward_std": 0.13333264142274856,
561
+ "rewards/ADEnReward/mean": 0.03611529269255698,
562
+ "rewards/ADEnReward/std": 0.08335062861442566,
563
+ "rewards/ReasoningConfidenceReward/mean": 0.12676299437880517,
564
+ "rewards/ReasoningConfidenceReward/std": 0.07276010811328888,
565
+ "rewards/StrictFormatReward/mean": 0.9875,
566
+ "rewards/StrictFormatReward/std": 0.06925570070743561,
567
+ "step": 180,
568
+ "step_time": 34.76130234096199
569
+ },
570
+ {
571
+ "clip_ratio/high_max": 0.0,
572
+ "clip_ratio/high_mean": 0.0,
573
+ "clip_ratio/low_mean": 0.0,
574
+ "clip_ratio/low_min": 0.0,
575
+ "clip_ratio/region_mean": 0.0,
576
+ "completions/clipped_ratio": 0.0,
577
+ "completions/max_length": 826.4,
578
+ "completions/max_terminated_length": 826.4,
579
+ "completions/mean_length": 736.7250183105468,
580
+ "completions/mean_terminated_length": 736.7250183105468,
581
+ "completions/min_length": 682.4,
582
+ "completions/min_terminated_length": 682.4,
583
+ "entropy": 0.6254515618085861,
584
+ "epoch": 0.028502850285028504,
585
+ "frac_reward_zero_std": 0.0,
586
+ "grad_norm": 1.2734375,
587
+ "learning_rate": 5.467625899280576e-07,
588
+ "loss": -0.0044,
589
+ "num_tokens": 19265725.0,
590
+ "reward": 1.1711225748062133,
591
+ "reward_std": 0.0973996564745903,
592
+ "rewards/ADEnReward/mean": 0.03408026825636625,
593
+ "rewards/ADEnReward/std": 0.07690504901111125,
594
+ "rewards/ReasoningConfidenceReward/mean": 0.13704225420951843,
595
+ "rewards/ReasoningConfidenceReward/std": 0.06723648384213447,
596
+ "rewards/StrictFormatReward/mean": 1.0,
597
+ "rewards/StrictFormatReward/std": 0.0,
598
+ "step": 190,
599
+ "step_time": 34.57357950732112
600
+ },
601
+ {
602
+ "clip_ratio/high_max": 0.0,
603
+ "clip_ratio/high_mean": 0.0,
604
+ "clip_ratio/low_mean": 0.0,
605
+ "clip_ratio/low_min": 0.0,
606
+ "clip_ratio/region_mean": 0.0,
607
+ "completions/clipped_ratio": 0.0,
608
+ "completions/max_length": 805.0,
609
+ "completions/max_terminated_length": 805.0,
610
+ "completions/mean_length": 734.5750183105469,
611
+ "completions/mean_terminated_length": 734.5750183105469,
612
+ "completions/min_length": 678.8,
613
+ "completions/min_terminated_length": 678.8,
614
+ "entropy": 0.639057207107544,
615
+ "epoch": 0.030003000300030003,
616
+ "frac_reward_zero_std": 0.0,
617
+ "grad_norm": 1.4765625,
618
+ "learning_rate": 5.227817745803357e-07,
619
+ "loss": 0.0011,
620
+ "num_tokens": 20276305.0,
621
+ "reward": 1.1531866073608399,
622
+ "reward_std": 0.11019677557051182,
623
+ "rewards/ADEnReward/mean": 0.028415630990639328,
624
+ "rewards/ADEnReward/std": 0.0590023357886821,
625
+ "rewards/ReasoningConfidenceReward/mean": 0.13310426697134972,
626
+ "rewards/ReasoningConfidenceReward/std": 0.0671043038368225,
627
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
628
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
629
+ "step": 200,
630
+ "step_time": 33.87812012191862
631
+ },
632
+ {
633
+ "clip_ratio/high_max": 0.0,
634
+ "clip_ratio/high_mean": 0.0,
635
+ "clip_ratio/low_mean": 0.0,
636
+ "clip_ratio/low_min": 0.0,
637
+ "clip_ratio/region_mean": 0.0,
638
+ "completions/clipped_ratio": 0.0,
639
+ "completions/max_length": 808.3,
640
+ "completions/max_terminated_length": 808.3,
641
+ "completions/mean_length": 734.1604370117187,
642
+ "completions/mean_terminated_length": 734.1604370117187,
643
+ "completions/min_length": 666.7,
644
+ "completions/min_terminated_length": 666.7,
645
+ "entropy": 0.6130730360746384,
646
+ "epoch": 0.0315031503150315,
647
+ "frac_reward_zero_std": 0.0,
648
+ "grad_norm": 0.98828125,
649
+ "learning_rate": 4.988009592326139e-07,
650
+ "loss": -0.0026,
651
+ "num_tokens": 21287070.0,
652
+ "reward": 1.1174390077590943,
653
+ "reward_std": 0.17627520002424718,
654
+ "rewards/ADEnReward/mean": 0.02708783410489559,
655
+ "rewards/ADEnReward/std": 0.06303326403722168,
656
+ "rewards/ReasoningConfidenceReward/mean": 0.11535111740231514,
657
+ "rewards/ReasoningConfidenceReward/std": 0.07561756633222103,
658
+ "rewards/StrictFormatReward/mean": 0.9749999940395355,
659
+ "rewards/StrictFormatReward/std": 0.15585823953151703,
660
+ "step": 210,
661
+ "step_time": 33.49867916693911
662
+ },
663
+ {
664
+ "clip_ratio/high_max": 0.0,
665
+ "clip_ratio/high_mean": 0.0,
666
+ "clip_ratio/low_mean": 0.0,
667
+ "clip_ratio/low_min": 0.0,
668
+ "clip_ratio/region_mean": 0.0,
669
+ "completions/clipped_ratio": 0.0,
670
+ "completions/max_length": 807.7,
671
+ "completions/max_terminated_length": 807.7,
672
+ "completions/mean_length": 736.2333618164063,
673
+ "completions/mean_terminated_length": 736.2333618164063,
674
+ "completions/min_length": 672.9,
675
+ "completions/min_terminated_length": 672.9,
676
+ "entropy": 0.6331382483243942,
677
+ "epoch": 0.033003300330033,
678
+ "frac_reward_zero_std": 0.0,
679
+ "grad_norm": 1.59375,
680
+ "learning_rate": 4.7482014388489204e-07,
681
+ "loss": -0.002,
682
+ "num_tokens": 22299374.0,
683
+ "reward": 1.1317298769950868,
684
+ "reward_std": 0.13332342132925987,
685
+ "rewards/ADEnReward/mean": 0.02584674544632435,
686
+ "rewards/ADEnReward/std": 0.06565526202321052,
687
+ "rewards/ReasoningConfidenceReward/mean": 0.12254976853728294,
688
+ "rewards/ReasoningConfidenceReward/std": 0.06627246364951134,
689
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
690
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
691
+ "step": 220,
692
+ "step_time": 34.399103596247734
693
+ },
694
+ {
695
+ "clip_ratio/high_max": 0.0,
696
+ "clip_ratio/high_mean": 0.0,
697
+ "clip_ratio/low_mean": 0.0,
698
+ "clip_ratio/low_min": 0.0,
699
+ "clip_ratio/region_mean": 0.0,
700
+ "completions/clipped_ratio": 0.0,
701
+ "completions/max_length": 811.9,
702
+ "completions/max_terminated_length": 811.9,
703
+ "completions/mean_length": 735.714599609375,
704
+ "completions/mean_terminated_length": 735.714599609375,
705
+ "completions/min_length": 681.9,
706
+ "completions/min_terminated_length": 681.9,
707
+ "entropy": 0.6306542903184891,
708
+ "epoch": 0.034503450345034506,
709
+ "frac_reward_zero_std": 0.0,
710
+ "grad_norm": 1.5390625,
711
+ "learning_rate": 4.508393285371702e-07,
712
+ "loss": -0.0024,
713
+ "num_tokens": 23310965.0,
714
+ "reward": 1.1562806963920593,
715
+ "reward_std": 0.11753289476037025,
716
+ "rewards/ADEnReward/mean": 0.03307559220120311,
717
+ "rewards/ADEnReward/std": 0.07661229185760021,
718
+ "rewards/ReasoningConfidenceReward/mean": 0.13153844997286795,
719
+ "rewards/ReasoningConfidenceReward/std": 0.07286004684865474,
720
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
721
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
722
+ "step": 230,
723
+ "step_time": 34.26871528588235
724
+ },
725
+ {
726
+ "clip_ratio/high_max": 0.0,
727
+ "clip_ratio/high_mean": 0.0,
728
+ "clip_ratio/low_mean": 0.0,
729
+ "clip_ratio/low_min": 0.0,
730
+ "clip_ratio/region_mean": 0.0,
731
+ "completions/clipped_ratio": 0.0,
732
+ "completions/max_length": 811.3,
733
+ "completions/max_terminated_length": 811.3,
734
+ "completions/mean_length": 731.7271118164062,
735
+ "completions/mean_terminated_length": 731.7271118164062,
736
+ "completions/min_length": 618.4,
737
+ "completions/min_terminated_length": 618.4,
738
+ "entropy": 0.6345497578382492,
739
+ "epoch": 0.036003600360036005,
740
+ "frac_reward_zero_std": 0.0,
741
+ "grad_norm": 1.1328125,
742
+ "learning_rate": 4.2685851318944845e-07,
743
+ "loss": -0.009,
744
+ "num_tokens": 24320578.0,
745
+ "reward": 1.1457740783691406,
746
+ "reward_std": 0.12441672384738922,
747
+ "rewards/ADEnReward/mean": 0.024325233418494463,
748
+ "rewards/ADEnReward/std": 0.055920045264065264,
749
+ "rewards/ReasoningConfidenceReward/mean": 0.12978217378258705,
750
+ "rewards/ReasoningConfidenceReward/std": 0.0885798055678606,
751
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
752
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
753
+ "step": 240,
754
+ "step_time": 35.589772913791236
755
+ },
756
+ {
757
+ "clip_ratio/high_max": 0.0,
758
+ "clip_ratio/high_mean": 0.0,
759
+ "clip_ratio/low_mean": 0.0,
760
+ "clip_ratio/low_min": 0.0,
761
+ "clip_ratio/region_mean": 0.0,
762
+ "completions/clipped_ratio": 0.0,
763
+ "completions/max_length": 808.2,
764
+ "completions/max_terminated_length": 808.2,
765
+ "completions/mean_length": 732.7125183105469,
766
+ "completions/mean_terminated_length": 732.7125183105469,
767
+ "completions/min_length": 676.8,
768
+ "completions/min_terminated_length": 676.8,
769
+ "entropy": 0.6391450583934783,
770
+ "epoch": 0.0375037503750375,
771
+ "frac_reward_zero_std": 0.0,
772
+ "grad_norm": 1.015625,
773
+ "learning_rate": 4.028776978417266e-07,
774
+ "loss": -0.001,
775
+ "num_tokens": 25330312.0,
776
+ "reward": 1.1600376486778259,
777
+ "reward_std": 0.10150842666625977,
778
+ "rewards/ADEnReward/mean": 0.025944713107310235,
779
+ "rewards/ADEnReward/std": 0.0668052526190877,
780
+ "rewards/ReasoningConfidenceReward/mean": 0.13825955241918564,
781
+ "rewards/ReasoningConfidenceReward/std": 0.0673112541437149,
782
+ "rewards/StrictFormatReward/mean": 0.9958333313465119,
783
+ "rewards/StrictFormatReward/std": 0.028867512941360474,
784
+ "step": 250,
785
+ "step_time": 35.417141625192016
786
+ },
787
+ {
788
+ "clip_ratio/high_max": 0.0,
789
+ "clip_ratio/high_mean": 0.0,
790
+ "clip_ratio/low_mean": 0.0,
791
+ "clip_ratio/low_min": 0.0,
792
+ "clip_ratio/region_mean": 0.0,
793
+ "completions/clipped_ratio": 0.0,
794
+ "completions/max_length": 869.5,
795
+ "completions/max_terminated_length": 869.5,
796
+ "completions/mean_length": 734.552099609375,
797
+ "completions/mean_terminated_length": 734.552099609375,
798
+ "completions/min_length": 685.6,
799
+ "completions/min_terminated_length": 685.6,
800
+ "entropy": 0.6193063586950303,
801
+ "epoch": 0.039003900390039,
802
+ "frac_reward_zero_std": 0.0,
803
+ "grad_norm": 1.3359375,
804
+ "learning_rate": 3.7889688249400476e-07,
805
+ "loss": 0.0072,
806
+ "num_tokens": 26341233.0,
807
+ "reward": 1.141334629058838,
808
+ "reward_std": 0.15343550890684127,
809
+ "rewards/ADEnReward/mean": 0.036122958175837994,
810
+ "rewards/ADEnReward/std": 0.08206971623003483,
811
+ "rewards/ReasoningConfidenceReward/mean": 0.11771163120865821,
812
+ "rewards/ReasoningConfidenceReward/std": 0.07470999509096146,
813
+ "rewards/StrictFormatReward/mean": 0.9874999940395355,
814
+ "rewards/StrictFormatReward/std": 0.08660253882408142,
815
+ "step": 260,
816
+ "step_time": 35.400501331407575
817
+ },
818
+ {
819
+ "clip_ratio/high_max": 0.0,
820
+ "clip_ratio/high_mean": 0.0,
821
+ "clip_ratio/low_mean": 0.0,
822
+ "clip_ratio/low_min": 0.0,
823
+ "clip_ratio/region_mean": 0.0,
824
+ "completions/clipped_ratio": 0.0,
825
+ "completions/max_length": 878.0,
826
+ "completions/max_terminated_length": 878.0,
827
+ "completions/mean_length": 735.9958557128906,
828
+ "completions/mean_terminated_length": 735.9958557128906,
829
+ "completions/min_length": 671.3,
830
+ "completions/min_terminated_length": 671.3,
831
+ "entropy": 0.6377118974924088,
832
+ "epoch": 0.0405040504050405,
833
+ "frac_reward_zero_std": 0.0,
834
+ "grad_norm": 0.9609375,
835
+ "learning_rate": 3.54916067146283e-07,
836
+ "loss": 0.0056,
837
+ "num_tokens": 27352527.0,
838
+ "reward": 1.1401844143867492,
839
+ "reward_std": 0.16229173466563224,
840
+ "rewards/ADEnReward/mean": 0.035310357715934514,
841
+ "rewards/ADEnReward/std": 0.07879957482218743,
842
+ "rewards/ReasoningConfidenceReward/mean": 0.12154066711664199,
843
+ "rewards/ReasoningConfidenceReward/std": 0.0928368739783764,
844
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
845
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
846
+ "step": 270,
847
+ "step_time": 37.65524366889149
848
+ },
849
+ {
850
+ "clip_ratio/high_max": 0.0,
851
+ "clip_ratio/high_mean": 0.0,
852
+ "clip_ratio/low_mean": 0.0,
853
+ "clip_ratio/low_min": 0.0,
854
+ "clip_ratio/region_mean": 0.0,
855
+ "completions/clipped_ratio": 0.0,
856
+ "completions/max_length": 806.4,
857
+ "completions/max_terminated_length": 806.4,
858
+ "completions/mean_length": 738.8979370117188,
859
+ "completions/mean_terminated_length": 738.8979370117188,
860
+ "completions/min_length": 683.0,
861
+ "completions/min_terminated_length": 683.0,
862
+ "entropy": 0.6446064680814743,
863
+ "epoch": 0.04200420042004201,
864
+ "frac_reward_zero_std": 0.0,
865
+ "grad_norm": 1.4609375,
866
+ "learning_rate": 3.309352517985611e-07,
867
+ "loss": -0.0014,
868
+ "num_tokens": 28365614.0,
869
+ "reward": 1.1542957901954651,
870
+ "reward_std": 0.12034987881779671,
871
+ "rewards/ADEnReward/mean": 0.027850225754082202,
872
+ "rewards/ADEnReward/std": 0.07261905167251825,
873
+ "rewards/ReasoningConfidenceReward/mean": 0.13477883711457253,
874
+ "rewards/ReasoningConfidenceReward/std": 0.07817486226558686,
875
+ "rewards/StrictFormatReward/mean": 0.9916666686534882,
876
+ "rewards/StrictFormatReward/std": 0.04038818776607513,
877
+ "step": 280,
878
+ "step_time": 35.198139012046155
879
+ },
880
+ {
881
+ "clip_ratio/high_max": 0.0,
882
+ "clip_ratio/high_mean": 0.0,
883
+ "clip_ratio/low_mean": 0.0,
884
+ "clip_ratio/low_min": 0.0,
885
+ "clip_ratio/region_mean": 0.0,
886
+ "completions/clipped_ratio": 0.0,
887
+ "completions/max_length": 809.5,
888
+ "completions/max_terminated_length": 809.5,
889
+ "completions/mean_length": 735.2833557128906,
890
+ "completions/mean_terminated_length": 735.2833557128906,
891
+ "completions/min_length": 665.6,
892
+ "completions/min_terminated_length": 665.6,
893
+ "entropy": 0.6377699196338653,
894
+ "epoch": 0.043504350435043505,
895
+ "frac_reward_zero_std": 0.0,
896
+ "grad_norm": 1.4609375,
897
+ "learning_rate": 3.069544364508393e-07,
898
+ "loss": -0.0057,
899
+ "num_tokens": 29376822.0,
900
+ "reward": 1.1390021562576294,
901
+ "reward_std": 0.18877924084663392,
902
+ "rewards/ADEnReward/mean": 0.03187443232163787,
903
+ "rewards/ADEnReward/std": 0.06868166662752628,
904
+ "rewards/ReasoningConfidenceReward/mean": 0.1362943433225155,
905
+ "rewards/ReasoningConfidenceReward/std": 0.06989900954067707,
906
+ "rewards/StrictFormatReward/mean": 0.9708333313465118,
907
+ "rewards/StrictFormatReward/std": 0.1673789143562317,
908
+ "step": 290,
909
+ "step_time": 35.80791048258543
910
+ },
911
+ {
912
+ "clip_ratio/high_max": 0.0,
913
+ "clip_ratio/high_mean": 0.0,
914
+ "clip_ratio/low_mean": 0.0,
915
+ "clip_ratio/low_min": 0.0,
916
+ "clip_ratio/region_mean": 0.0,
917
+ "completions/clipped_ratio": 0.0,
918
+ "completions/max_length": 868.4,
919
+ "completions/max_terminated_length": 868.4,
920
+ "completions/mean_length": 738.370849609375,
921
+ "completions/mean_terminated_length": 738.370849609375,
922
+ "completions/min_length": 684.6,
923
+ "completions/min_terminated_length": 684.6,
924
+ "entropy": 0.6240056931972504,
925
+ "epoch": 0.045004500450045004,
926
+ "frac_reward_zero_std": 0.0,
927
+ "grad_norm": 1.3671875,
928
+ "learning_rate": 2.8297362110311753e-07,
929
+ "loss": 0.0029,
930
+ "num_tokens": 30389624.0,
931
+ "reward": 1.1787766337394714,
932
+ "reward_std": 0.12015584260225295,
933
+ "rewards/ADEnReward/mean": 0.03684027479030192,
934
+ "rewards/ADEnReward/std": 0.07373644839972257,
935
+ "rewards/ReasoningConfidenceReward/mean": 0.14610298424959184,
936
+ "rewards/ReasoningConfidenceReward/std": 0.08906417265534401,
937
+ "rewards/StrictFormatReward/mean": 0.9958333313465119,
938
+ "rewards/StrictFormatReward/std": 0.028867512941360474,
939
+ "step": 300,
940
+ "step_time": 36.948212468624114
941
+ },
942
+ {
943
+ "clip_ratio/high_max": 0.0,
944
+ "clip_ratio/high_mean": 0.0,
945
+ "clip_ratio/low_mean": 0.0,
946
+ "clip_ratio/low_min": 0.0,
947
+ "clip_ratio/region_mean": 0.0,
948
+ "completions/clipped_ratio": 0.0,
949
+ "completions/max_length": 900.3,
950
+ "completions/max_terminated_length": 900.3,
951
+ "completions/mean_length": 738.145849609375,
952
+ "completions/mean_terminated_length": 738.145849609375,
953
+ "completions/min_length": 682.3,
954
+ "completions/min_terminated_length": 682.3,
955
+ "entropy": 0.6172331184148788,
956
+ "epoch": 0.0465046504650465,
957
+ "frac_reward_zero_std": 0.0,
958
+ "grad_norm": 1.1640625,
959
+ "learning_rate": 2.5899280575539566e-07,
960
+ "loss": 0.0048,
961
+ "num_tokens": 31402542.0,
962
+ "reward": 1.1431400060653687,
963
+ "reward_std": 0.12335308417677879,
964
+ "rewards/ADEnReward/mean": 0.022267039050348103,
965
+ "rewards/ADEnReward/std": 0.05063635241240263,
966
+ "rewards/ReasoningConfidenceReward/mean": 0.13337293565273284,
967
+ "rewards/ReasoningConfidenceReward/std": 0.083626439422369,
968
+ "rewards/StrictFormatReward/mean": 0.9875,
969
+ "rewards/StrictFormatReward/std": 0.06925570070743561,
970
+ "step": 310,
971
+ "step_time": 35.03951444858685
972
+ },
973
+ {
974
+ "clip_ratio/high_max": 0.0,
975
+ "clip_ratio/high_mean": 0.0,
976
+ "clip_ratio/low_mean": 0.0,
977
+ "clip_ratio/low_min": 0.0,
978
+ "clip_ratio/region_mean": 0.0,
979
+ "completions/clipped_ratio": 0.0,
980
+ "completions/max_length": 805.2,
981
+ "completions/max_terminated_length": 805.2,
982
+ "completions/mean_length": 736.4104431152343,
983
+ "completions/mean_terminated_length": 736.4104431152343,
984
+ "completions/min_length": 685.8,
985
+ "completions/min_terminated_length": 685.8,
986
+ "entropy": 0.6272884041070939,
987
+ "epoch": 0.048004800480048,
988
+ "frac_reward_zero_std": 0.0,
989
+ "grad_norm": 1.25,
990
+ "learning_rate": 2.3501199040767386e-07,
991
+ "loss": -0.0024,
992
+ "num_tokens": 32414083.0,
993
+ "reward": 1.164345955848694,
994
+ "reward_std": 0.11102337539196014,
995
+ "rewards/ADEnReward/mean": 0.0322182422503829,
996
+ "rewards/ADEnReward/std": 0.07476234342902899,
997
+ "rewards/ReasoningConfidenceReward/mean": 0.13629434630274773,
998
+ "rewards/ReasoningConfidenceReward/std": 0.07802207246422768,
999
+ "rewards/StrictFormatReward/mean": 0.9958333313465119,
1000
+ "rewards/StrictFormatReward/std": 0.028867512941360474,
1001
+ "step": 320,
1002
+ "step_time": 33.934950474463406
1003
+ },
1004
+ {
1005
+ "clip_ratio/high_max": 0.0,
1006
+ "clip_ratio/high_mean": 0.0,
1007
+ "clip_ratio/low_mean": 0.0,
1008
+ "clip_ratio/low_min": 0.0,
1009
+ "clip_ratio/region_mean": 0.0,
1010
+ "completions/clipped_ratio": 0.002083333395421505,
1011
+ "completions/max_length": 890.8,
1012
+ "completions/max_terminated_length": 820.7,
1013
+ "completions/mean_length": 739.639599609375,
1014
+ "completions/mean_terminated_length": 737.9682739257812,
1015
+ "completions/min_length": 678.7,
1016
+ "completions/min_terminated_length": 678.7,
1017
+ "entropy": 0.6308663070201874,
1018
+ "epoch": 0.04950495049504951,
1019
+ "frac_reward_zero_std": 0.0,
1020
+ "grad_norm": 0.90234375,
1021
+ "learning_rate": 2.1103117505995202e-07,
1022
+ "loss": 0.0072,
1023
+ "num_tokens": 33427510.0,
1024
+ "reward": 1.143249773979187,
1025
+ "reward_std": 0.17548604309558868,
1026
+ "rewards/ADEnReward/mean": 0.02304396389517933,
1027
+ "rewards/ADEnReward/std": 0.048843689542263743,
1028
+ "rewards/ReasoningConfidenceReward/mean": 0.1452057547867298,
1029
+ "rewards/ReasoningConfidenceReward/std": 0.08942365646362305,
1030
+ "rewards/StrictFormatReward/mean": 0.9749999940395355,
1031
+ "rewards/StrictFormatReward/std": 0.15585823953151703,
1032
+ "step": 330,
1033
+ "step_time": 35.96805807314813
1034
+ },
1035
+ {
1036
+ "clip_ratio/high_max": 0.0,
1037
+ "clip_ratio/high_mean": 0.0,
1038
+ "clip_ratio/low_mean": 0.0,
1039
+ "clip_ratio/low_min": 0.0,
1040
+ "clip_ratio/region_mean": 0.0,
1041
+ "completions/clipped_ratio": 0.002083333395421505,
1042
+ "completions/max_length": 928.9,
1043
+ "completions/max_terminated_length": 859.0,
1044
+ "completions/mean_length": 740.6625183105468,
1045
+ "completions/mean_terminated_length": 738.9875549316406,
1046
+ "completions/min_length": 639.8,
1047
+ "completions/min_terminated_length": 639.8,
1048
+ "entropy": 0.6350211173295974,
1049
+ "epoch": 0.051005100510051006,
1050
+ "frac_reward_zero_std": 0.0,
1051
+ "grad_norm": 1.046875,
1052
+ "learning_rate": 1.870503597122302e-07,
1053
+ "loss": 0.0087,
1054
+ "num_tokens": 34441572.0,
1055
+ "reward": 1.1206173300743103,
1056
+ "reward_std": 0.2239961326122284,
1057
+ "rewards/ADEnReward/mean": 0.025679756049066783,
1058
+ "rewards/ADEnReward/std": 0.06525715440511703,
1059
+ "rewards/ReasoningConfidenceReward/mean": 0.1282708764076233,
1060
+ "rewards/ReasoningConfidenceReward/std": 0.09580355063080788,
1061
+ "rewards/StrictFormatReward/mean": 0.9666666626930237,
1062
+ "rewards/StrictFormatReward/std": 0.19624642729759217,
1063
+ "step": 340,
1064
+ "step_time": 39.633191799465564
1065
+ },
1066
+ {
1067
+ "clip_ratio/high_max": 0.0,
1068
+ "clip_ratio/high_mean": 0.0,
1069
+ "clip_ratio/low_mean": 0.0,
1070
+ "clip_ratio/low_min": 0.0,
1071
+ "clip_ratio/region_mean": 0.0,
1072
+ "completions/clipped_ratio": 0.0,
1073
+ "completions/max_length": 880.0,
1074
+ "completions/max_terminated_length": 880.0,
1075
+ "completions/mean_length": 738.3625183105469,
1076
+ "completions/mean_terminated_length": 738.3625183105469,
1077
+ "completions/min_length": 679.3,
1078
+ "completions/min_terminated_length": 679.3,
1079
+ "entropy": 0.6254092067480087,
1080
+ "epoch": 0.052505250525052505,
1081
+ "frac_reward_zero_std": 0.0,
1082
+ "grad_norm": 1.2109375,
1083
+ "learning_rate": 1.630695443645084e-07,
1084
+ "loss": 0.0056,
1085
+ "num_tokens": 35454578.0,
1086
+ "reward": 1.147234356403351,
1087
+ "reward_std": 0.19355273023247718,
1088
+ "rewards/ADEnReward/mean": 0.04021202381700277,
1089
+ "rewards/ADEnReward/std": 0.09158026501536369,
1090
+ "rewards/ReasoningConfidenceReward/mean": 0.13202227056026458,
1091
+ "rewards/ReasoningConfidenceReward/std": 0.08882957771420479,
1092
+ "rewards/StrictFormatReward/mean": 0.9749999940395355,
1093
+ "rewards/StrictFormatReward/std": 0.13552714586257936,
1094
+ "step": 350,
1095
+ "step_time": 38.13350407043472
1096
+ },
1097
+ {
1098
+ "clip_ratio/high_max": 0.0,
1099
+ "clip_ratio/high_mean": 0.0,
1100
+ "clip_ratio/low_mean": 0.0,
1101
+ "clip_ratio/low_min": 0.0,
1102
+ "clip_ratio/region_mean": 0.0,
1103
+ "completions/clipped_ratio": 0.0,
1104
+ "completions/max_length": 870.3,
1105
+ "completions/max_terminated_length": 870.3,
1106
+ "completions/mean_length": 734.5250122070313,
1107
+ "completions/mean_terminated_length": 734.5250122070313,
1108
+ "completions/min_length": 626.3,
1109
+ "completions/min_terminated_length": 626.3,
1110
+ "entropy": 0.6399930000305176,
1111
+ "epoch": 0.054005400540054004,
1112
+ "frac_reward_zero_std": 0.0,
1113
+ "grad_norm": 0.98046875,
1114
+ "learning_rate": 1.3908872901678656e-07,
1115
+ "loss": -0.0011,
1116
+ "num_tokens": 36465134.0,
1117
+ "reward": 1.1478922486305236,
1118
+ "reward_std": 0.166877593845129,
1119
+ "rewards/ADEnReward/mean": 0.028004290629178287,
1120
+ "rewards/ADEnReward/std": 0.06561574544757605,
1121
+ "rewards/ReasoningConfidenceReward/mean": 0.1365545865148306,
1122
+ "rewards/ReasoningConfidenceReward/std": 0.09227995947003365,
1123
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
1124
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
1125
+ "step": 360,
1126
+ "step_time": 36.46239227745682
1127
+ },
1128
+ {
1129
+ "clip_ratio/high_max": 0.0,
1130
+ "clip_ratio/high_mean": 0.0,
1131
+ "clip_ratio/low_mean": 0.0,
1132
+ "clip_ratio/low_min": 0.0,
1133
+ "clip_ratio/region_mean": 0.0,
1134
+ "completions/clipped_ratio": 0.0,
1135
+ "completions/max_length": 876.1,
1136
+ "completions/max_terminated_length": 876.1,
1137
+ "completions/mean_length": 738.1521118164062,
1138
+ "completions/mean_terminated_length": 738.1521118164062,
1139
+ "completions/min_length": 686.4,
1140
+ "completions/min_terminated_length": 686.4,
1141
+ "entropy": 0.6230753421783447,
1142
+ "epoch": 0.0555055505550555,
1143
+ "frac_reward_zero_std": 0.0,
1144
+ "grad_norm": 0.96484375,
1145
+ "learning_rate": 1.1510791366906475e-07,
1146
+ "loss": 0.0051,
1147
+ "num_tokens": 37477991.0,
1148
+ "reward": 1.1488192439079286,
1149
+ "reward_std": 0.16836398094892502,
1150
+ "rewards/ADEnReward/mean": 0.03516276562586427,
1151
+ "rewards/ADEnReward/std": 0.08009659349918366,
1152
+ "rewards/ReasoningConfidenceReward/mean": 0.13448973298072814,
1153
+ "rewards/ReasoningConfidenceReward/std": 0.0842104259878397,
1154
+ "rewards/StrictFormatReward/mean": 0.9791666626930237,
1155
+ "rewards/StrictFormatReward/std": 0.12699072659015656,
1156
+ "step": 370,
1157
+ "step_time": 35.618351658433674
1158
+ },
1159
+ {
1160
+ "clip_ratio/high_max": 0.0,
1161
+ "clip_ratio/high_mean": 0.0,
1162
+ "clip_ratio/low_mean": 0.0,
1163
+ "clip_ratio/low_min": 0.0,
1164
+ "clip_ratio/region_mean": 0.0,
1165
+ "completions/clipped_ratio": 0.0,
1166
+ "completions/max_length": 807.5,
1167
+ "completions/max_terminated_length": 807.5,
1168
+ "completions/mean_length": 735.2291870117188,
1169
+ "completions/mean_terminated_length": 735.2291870117188,
1170
+ "completions/min_length": 676.8,
1171
+ "completions/min_terminated_length": 676.8,
1172
+ "entropy": 0.620813924074173,
1173
+ "epoch": 0.05700570057005701,
1174
+ "frac_reward_zero_std": 0.0,
1175
+ "grad_norm": 1.2890625,
1176
+ "learning_rate": 9.112709832134293e-08,
1177
+ "loss": -0.0018,
1178
+ "num_tokens": 38489269.0,
1179
+ "reward": 1.165612268447876,
1180
+ "reward_std": 0.1205348752439022,
1181
+ "rewards/ADEnReward/mean": 0.03250669157132506,
1182
+ "rewards/ADEnReward/std": 0.06946750096976757,
1183
+ "rewards/ReasoningConfidenceReward/mean": 0.1414388693869114,
1184
+ "rewards/ReasoningConfidenceReward/std": 0.06920381374657154,
1185
+ "rewards/StrictFormatReward/mean": 0.9916666626930237,
1186
+ "rewards/StrictFormatReward/std": 0.05773502588272095,
1187
+ "step": 380,
1188
+ "step_time": 34.560751888528465
1189
+ },
1190
+ {
1191
+ "clip_ratio/high_max": 0.0,
1192
+ "clip_ratio/high_mean": 0.0,
1193
+ "clip_ratio/low_mean": 0.0,
1194
+ "clip_ratio/low_min": 0.0,
1195
+ "clip_ratio/region_mean": 0.0,
1196
+ "completions/clipped_ratio": 0.0,
1197
+ "completions/max_length": 816.5,
1198
+ "completions/max_terminated_length": 816.5,
1199
+ "completions/mean_length": 736.0250122070313,
1200
+ "completions/mean_terminated_length": 736.0250122070313,
1201
+ "completions/min_length": 676.1,
1202
+ "completions/min_terminated_length": 676.1,
1203
+ "entropy": 0.6374829471111297,
1204
+ "epoch": 0.05850585058505851,
1205
+ "frac_reward_zero_std": 0.0,
1206
+ "grad_norm": 1.5703125,
1207
+ "learning_rate": 6.71462829736211e-08,
1208
+ "loss": -0.0012,
1209
+ "num_tokens": 39501137.0,
1210
+ "reward": 1.1812423110008239,
1211
+ "reward_std": 0.11223873198032379,
1212
+ "rewards/ADEnReward/mean": 0.037600951222702864,
1213
+ "rewards/ADEnReward/std": 0.07687103077769279,
1214
+ "rewards/ReasoningConfidenceReward/mean": 0.14780800566077232,
1215
+ "rewards/ReasoningConfidenceReward/std": 0.07224066257476806,
1216
+ "rewards/StrictFormatReward/mean": 0.9958333313465119,
1217
+ "rewards/StrictFormatReward/std": 0.028867512941360474,
1218
+ "step": 390,
1219
+ "step_time": 35.27680900348351
1220
+ },
1221
+ {
1222
+ "clip_ratio/high_max": 0.0,
1223
+ "clip_ratio/high_mean": 0.0,
1224
+ "clip_ratio/low_mean": 0.0,
1225
+ "clip_ratio/low_min": 0.0,
1226
+ "clip_ratio/region_mean": 0.0,
1227
+ "completions/clipped_ratio": 0.0,
1228
+ "completions/max_length": 806.4,
1229
+ "completions/max_terminated_length": 806.4,
1230
+ "completions/mean_length": 734.8396118164062,
1231
+ "completions/mean_terminated_length": 734.8396118164062,
1232
+ "completions/min_length": 674.3,
1233
+ "completions/min_terminated_length": 674.3,
1234
+ "entropy": 0.6304167330265045,
1235
+ "epoch": 0.060006000600060005,
1236
+ "frac_reward_zero_std": 0.0,
1237
+ "grad_norm": 1.53125,
1238
+ "learning_rate": 4.316546762589928e-08,
1239
+ "loss": -0.0016,
1240
+ "num_tokens": 40512484.0,
1241
+ "reward": 1.154755175113678,
1242
+ "reward_std": 0.14684431552886962,
1243
+ "rewards/ADEnReward/mean": 0.03186223171651363,
1244
+ "rewards/ADEnReward/std": 0.06947189681231976,
1245
+ "rewards/ReasoningConfidenceReward/mean": 0.13955960273742676,
1246
+ "rewards/ReasoningConfidenceReward/std": 0.07285604253411293,
1247
+ "rewards/StrictFormatReward/mean": 0.9833333253860473,
1248
+ "rewards/StrictFormatReward/std": 0.1154700517654419,
1249
+ "step": 400,
1250
+ "step_time": 34.50618078866974
1251
+ },
1252
+ {
1253
+ "clip_ratio/high_max": 0.0,
1254
+ "clip_ratio/high_mean": 0.0,
1255
+ "clip_ratio/low_mean": 0.0,
1256
+ "clip_ratio/low_min": 0.0,
1257
+ "clip_ratio/region_mean": 0.0,
1258
+ "completions/clipped_ratio": 0.0,
1259
+ "completions/max_length": 827.3,
1260
+ "completions/max_terminated_length": 827.3,
1261
+ "completions/mean_length": 734.5437683105469,
1262
+ "completions/mean_terminated_length": 734.5437683105469,
1263
+ "completions/min_length": 674.0,
1264
+ "completions/min_terminated_length": 674.0,
1265
+ "entropy": 0.6269477725028991,
1266
+ "epoch": 0.061506150615061504,
1267
+ "frac_reward_zero_std": 0.0,
1268
+ "grad_norm": 1.0546875,
1269
+ "learning_rate": 1.9184652278177456e-08,
1270
+ "loss": -0.0049,
1271
+ "num_tokens": 41523257.0,
1272
+ "reward": 1.1434462666511536,
1273
+ "reward_std": 0.14020287990570068,
1274
+ "rewards/ADEnReward/mean": 0.02198589243926108,
1275
+ "rewards/ADEnReward/std": 0.053791009541600945,
1276
+ "rewards/ReasoningConfidenceReward/mean": 0.13812703192234038,
1277
+ "rewards/ReasoningConfidenceReward/std": 0.07505685314536095,
1278
+ "rewards/StrictFormatReward/mean": 0.9833333313465118,
1279
+ "rewards/StrictFormatReward/std": 0.09812321364879609,
1280
+ "step": 410,
1281
+ "step_time": 34.98682966614142
1282
+ }
1283
+ ],
1284
+ "logging_steps": 10,
1285
+ "max_steps": 417,
1286
+ "num_input_tokens_seen": 42232644,
1287
+ "num_train_epochs": 1,
1288
+ "save_steps": 100,
1289
+ "stateful_callbacks": {
1290
+ "TrainerControl": {
1291
+ "args": {
1292
+ "should_epoch_stop": false,
1293
+ "should_evaluate": false,
1294
+ "should_log": false,
1295
+ "should_save": true,
1296
+ "should_training_stop": true
1297
+ },
1298
+ "attributes": {}
1299
+ }
1300
+ },
1301
+ "total_flos": 0.0,
1302
+ "train_batch_size": 6,
1303
+ "trial_name": null,
1304
+ "trial_params": null
1305
+ }
grpo-nADE-format-RC/checkpoint-417/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format-RC/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3VLForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "eos_token_id": 151645,
7
+ "image_token_id": 151655,
8
+ "model_type": "qwen3_vl",
9
+ "pad_token_id": 151643,
10
+ "text_config": {
11
+ "attention_bias": false,
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 151643,
14
+ "dtype": "bfloat16",
15
+ "eos_token_id": 151645,
16
+ "head_dim": 128,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 2560,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 9728,
21
+ "max_position_embeddings": 262144,
22
+ "model_type": "qwen3_vl_text",
23
+ "num_attention_heads": 32,
24
+ "num_hidden_layers": 36,
25
+ "num_key_value_heads": 8,
26
+ "rms_norm_eps": 1e-06,
27
+ "rope_scaling": {
28
+ "mrope_interleaved": true,
29
+ "mrope_section": [
30
+ 24,
31
+ 20,
32
+ 20
33
+ ],
34
+ "rope_type": "default"
35
+ },
36
+ "rope_theta": 5000000,
37
+ "tie_word_embeddings": true,
38
+ "use_cache": true,
39
+ "vocab_size": 151936
40
+ },
41
+ "tie_word_embeddings": true,
42
+ "transformers_version": "4.57.6",
43
+ "use_cache": false,
44
+ "video_token_id": 151656,
45
+ "vision_config": {
46
+ "deepstack_visual_indexes": [
47
+ 5,
48
+ 11,
49
+ 17
50
+ ],
51
+ "depth": 24,
52
+ "dtype": "bfloat16",
53
+ "hidden_act": "gelu_pytorch_tanh",
54
+ "hidden_size": 1024,
55
+ "in_channels": 3,
56
+ "initializer_range": 0.02,
57
+ "intermediate_size": 4096,
58
+ "model_type": "qwen3_vl",
59
+ "num_heads": 16,
60
+ "num_position_embeddings": 2304,
61
+ "out_hidden_size": 2560,
62
+ "patch_size": 16,
63
+ "spatial_merge_size": 2,
64
+ "temporal_patch_size": 2
65
+ },
66
+ "vision_end_token_id": 151653,
67
+ "vision_start_token_id": 151652
68
+ }
grpo-nADE-format-RC/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format-RC/model.safetensors.index.json ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 4437815808,
4
+ "total_size": 8875631616
5
+ },
6
+ "weight_map": {
7
+ "model.language_model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "model.language_model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "model.language_model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.language_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.language_model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "model.language_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "model.language_model.layers.0.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
14
+ "model.language_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.language_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
16
+ "model.language_model.layers.0.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
17
+ "model.language_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
18
+ "model.language_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.language_model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
20
+ "model.language_model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.language_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
22
+ "model.language_model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.language_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
24
+ "model.language_model.layers.1.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
25
+ "model.language_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.language_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
27
+ "model.language_model.layers.1.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
28
+ "model.language_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.language_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
30
+ "model.language_model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
31
+ "model.language_model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
32
+ "model.language_model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.language_model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.language_model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
35
+ "model.language_model.layers.10.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
36
+ "model.language_model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.language_model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.language_model.layers.10.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
39
+ "model.language_model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
40
+ "model.language_model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.language_model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
42
+ "model.language_model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.language_model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
44
+ "model.language_model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
45
+ "model.language_model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
46
+ "model.language_model.layers.11.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
47
+ "model.language_model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
48
+ "model.language_model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
49
+ "model.language_model.layers.11.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
50
+ "model.language_model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.language_model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.language_model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
53
+ "model.language_model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
54
+ "model.language_model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.language_model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.language_model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
57
+ "model.language_model.layers.12.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
58
+ "model.language_model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.language_model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
+ "model.language_model.layers.12.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
61
+ "model.language_model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.language_model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
63
+ "model.language_model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
64
+ "model.language_model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.language_model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
66
+ "model.language_model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
67
+ "model.language_model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
68
+ "model.language_model.layers.13.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
69
+ "model.language_model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.language_model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.language_model.layers.13.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
72
+ "model.language_model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
73
+ "model.language_model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.language_model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
75
+ "model.language_model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
76
+ "model.language_model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.language_model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
78
+ "model.language_model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
79
+ "model.language_model.layers.14.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
80
+ "model.language_model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
81
+ "model.language_model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.language_model.layers.14.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
83
+ "model.language_model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
84
+ "model.language_model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
85
+ "model.language_model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
86
+ "model.language_model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.language_model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.language_model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.language_model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "model.language_model.layers.15.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
91
+ "model.language_model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
92
+ "model.language_model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.language_model.layers.15.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
94
+ "model.language_model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.language_model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
96
+ "model.language_model.layers.16.input_layernorm.weight": "model-00002-of-00002.safetensors",
97
+ "model.language_model.layers.16.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
98
+ "model.language_model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
99
+ "model.language_model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.language_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
101
+ "model.language_model.layers.16.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
102
+ "model.language_model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
103
+ "model.language_model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.language_model.layers.16.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
105
+ "model.language_model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.language_model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.language_model.layers.17.input_layernorm.weight": "model-00002-of-00002.safetensors",
108
+ "model.language_model.layers.17.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
109
+ "model.language_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
110
+ "model.language_model.layers.17.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
111
+ "model.language_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
112
+ "model.language_model.layers.17.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
113
+ "model.language_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
114
+ "model.language_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
115
+ "model.language_model.layers.17.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
116
+ "model.language_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
117
+ "model.language_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
118
+ "model.language_model.layers.18.input_layernorm.weight": "model-00002-of-00002.safetensors",
119
+ "model.language_model.layers.18.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
120
+ "model.language_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
121
+ "model.language_model.layers.18.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
122
+ "model.language_model.layers.18.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
123
+ "model.language_model.layers.18.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
124
+ "model.language_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
125
+ "model.language_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
126
+ "model.language_model.layers.18.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
127
+ "model.language_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
128
+ "model.language_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
129
+ "model.language_model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
130
+ "model.language_model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
131
+ "model.language_model.layers.19.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
132
+ "model.language_model.layers.19.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
133
+ "model.language_model.layers.19.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
134
+ "model.language_model.layers.19.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
135
+ "model.language_model.layers.19.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
136
+ "model.language_model.layers.19.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
137
+ "model.language_model.layers.19.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
138
+ "model.language_model.layers.19.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
139
+ "model.language_model.layers.19.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
140
+ "model.language_model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
141
+ "model.language_model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
142
+ "model.language_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
143
+ "model.language_model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
144
+ "model.language_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
145
+ "model.language_model.layers.2.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
146
+ "model.language_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
147
+ "model.language_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
148
+ "model.language_model.layers.2.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
149
+ "model.language_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
150
+ "model.language_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
151
+ "model.language_model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
152
+ "model.language_model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
153
+ "model.language_model.layers.20.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
154
+ "model.language_model.layers.20.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
155
+ "model.language_model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
156
+ "model.language_model.layers.20.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
157
+ "model.language_model.layers.20.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
158
+ "model.language_model.layers.20.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
159
+ "model.language_model.layers.20.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
160
+ "model.language_model.layers.20.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
161
+ "model.language_model.layers.20.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
162
+ "model.language_model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
163
+ "model.language_model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
164
+ "model.language_model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
165
+ "model.language_model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
166
+ "model.language_model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
167
+ "model.language_model.layers.21.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
168
+ "model.language_model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
169
+ "model.language_model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
170
+ "model.language_model.layers.21.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
171
+ "model.language_model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
172
+ "model.language_model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
173
+ "model.language_model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
174
+ "model.language_model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
175
+ "model.language_model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
176
+ "model.language_model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.language_model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
178
+ "model.language_model.layers.22.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
179
+ "model.language_model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
180
+ "model.language_model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
181
+ "model.language_model.layers.22.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
182
+ "model.language_model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
183
+ "model.language_model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
184
+ "model.language_model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
185
+ "model.language_model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
186
+ "model.language_model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
187
+ "model.language_model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
188
+ "model.language_model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
189
+ "model.language_model.layers.23.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
190
+ "model.language_model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.language_model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
192
+ "model.language_model.layers.23.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
193
+ "model.language_model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.language_model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.language_model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
196
+ "model.language_model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.language_model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
198
+ "model.language_model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
199
+ "model.language_model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
200
+ "model.language_model.layers.24.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
201
+ "model.language_model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
202
+ "model.language_model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
203
+ "model.language_model.layers.24.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
204
+ "model.language_model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
+ "model.language_model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
206
+ "model.language_model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
207
+ "model.language_model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
208
+ "model.language_model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
209
+ "model.language_model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
210
+ "model.language_model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
211
+ "model.language_model.layers.25.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
212
+ "model.language_model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
213
+ "model.language_model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
214
+ "model.language_model.layers.25.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
215
+ "model.language_model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
216
+ "model.language_model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
217
+ "model.language_model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
218
+ "model.language_model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
219
+ "model.language_model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
220
+ "model.language_model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
221
+ "model.language_model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
222
+ "model.language_model.layers.26.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
223
+ "model.language_model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
224
+ "model.language_model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
225
+ "model.language_model.layers.26.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
226
+ "model.language_model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
227
+ "model.language_model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
228
+ "model.language_model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
229
+ "model.language_model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
230
+ "model.language_model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
231
+ "model.language_model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
232
+ "model.language_model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
233
+ "model.language_model.layers.27.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
234
+ "model.language_model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
235
+ "model.language_model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
236
+ "model.language_model.layers.27.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
237
+ "model.language_model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
238
+ "model.language_model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
239
+ "model.language_model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
240
+ "model.language_model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
241
+ "model.language_model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
242
+ "model.language_model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
243
+ "model.language_model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
244
+ "model.language_model.layers.28.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
245
+ "model.language_model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
246
+ "model.language_model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
247
+ "model.language_model.layers.28.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
248
+ "model.language_model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
249
+ "model.language_model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
250
+ "model.language_model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
251
+ "model.language_model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
252
+ "model.language_model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
253
+ "model.language_model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
254
+ "model.language_model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
255
+ "model.language_model.layers.29.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
256
+ "model.language_model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
257
+ "model.language_model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
258
+ "model.language_model.layers.29.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
259
+ "model.language_model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
260
+ "model.language_model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
261
+ "model.language_model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
262
+ "model.language_model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
263
+ "model.language_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
264
+ "model.language_model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
265
+ "model.language_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
266
+ "model.language_model.layers.3.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
267
+ "model.language_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
268
+ "model.language_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
269
+ "model.language_model.layers.3.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
270
+ "model.language_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
271
+ "model.language_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
272
+ "model.language_model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
273
+ "model.language_model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
274
+ "model.language_model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
275
+ "model.language_model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
276
+ "model.language_model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
277
+ "model.language_model.layers.30.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
278
+ "model.language_model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
279
+ "model.language_model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
280
+ "model.language_model.layers.30.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
281
+ "model.language_model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
282
+ "model.language_model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
283
+ "model.language_model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
284
+ "model.language_model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
285
+ "model.language_model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
286
+ "model.language_model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
287
+ "model.language_model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
288
+ "model.language_model.layers.31.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
289
+ "model.language_model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
290
+ "model.language_model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
291
+ "model.language_model.layers.31.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
292
+ "model.language_model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
293
+ "model.language_model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
294
+ "model.language_model.layers.32.input_layernorm.weight": "model-00002-of-00002.safetensors",
295
+ "model.language_model.layers.32.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
296
+ "model.language_model.layers.32.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
297
+ "model.language_model.layers.32.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
298
+ "model.language_model.layers.32.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
299
+ "model.language_model.layers.32.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
300
+ "model.language_model.layers.32.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
301
+ "model.language_model.layers.32.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
302
+ "model.language_model.layers.32.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
303
+ "model.language_model.layers.32.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
304
+ "model.language_model.layers.32.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
305
+ "model.language_model.layers.33.input_layernorm.weight": "model-00002-of-00002.safetensors",
306
+ "model.language_model.layers.33.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
307
+ "model.language_model.layers.33.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
308
+ "model.language_model.layers.33.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
309
+ "model.language_model.layers.33.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
310
+ "model.language_model.layers.33.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
311
+ "model.language_model.layers.33.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
312
+ "model.language_model.layers.33.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
313
+ "model.language_model.layers.33.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
314
+ "model.language_model.layers.33.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
315
+ "model.language_model.layers.33.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
316
+ "model.language_model.layers.34.input_layernorm.weight": "model-00002-of-00002.safetensors",
317
+ "model.language_model.layers.34.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
318
+ "model.language_model.layers.34.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
319
+ "model.language_model.layers.34.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
320
+ "model.language_model.layers.34.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
321
+ "model.language_model.layers.34.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
322
+ "model.language_model.layers.34.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
323
+ "model.language_model.layers.34.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
324
+ "model.language_model.layers.34.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
325
+ "model.language_model.layers.34.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
326
+ "model.language_model.layers.34.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
327
+ "model.language_model.layers.35.input_layernorm.weight": "model-00002-of-00002.safetensors",
328
+ "model.language_model.layers.35.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
329
+ "model.language_model.layers.35.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
330
+ "model.language_model.layers.35.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
331
+ "model.language_model.layers.35.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
332
+ "model.language_model.layers.35.self_attn.k_norm.weight": "model-00002-of-00002.safetensors",
333
+ "model.language_model.layers.35.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
334
+ "model.language_model.layers.35.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
335
+ "model.language_model.layers.35.self_attn.q_norm.weight": "model-00002-of-00002.safetensors",
336
+ "model.language_model.layers.35.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
337
+ "model.language_model.layers.35.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
338
+ "model.language_model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
339
+ "model.language_model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
340
+ "model.language_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
341
+ "model.language_model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
342
+ "model.language_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
343
+ "model.language_model.layers.4.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
344
+ "model.language_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
345
+ "model.language_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
346
+ "model.language_model.layers.4.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
347
+ "model.language_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
348
+ "model.language_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
349
+ "model.language_model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
350
+ "model.language_model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
351
+ "model.language_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
352
+ "model.language_model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
353
+ "model.language_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
354
+ "model.language_model.layers.5.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
355
+ "model.language_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
356
+ "model.language_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
357
+ "model.language_model.layers.5.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
358
+ "model.language_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
359
+ "model.language_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
360
+ "model.language_model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
361
+ "model.language_model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
362
+ "model.language_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
363
+ "model.language_model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
364
+ "model.language_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
365
+ "model.language_model.layers.6.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
366
+ "model.language_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
367
+ "model.language_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
368
+ "model.language_model.layers.6.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
369
+ "model.language_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
370
+ "model.language_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
371
+ "model.language_model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
372
+ "model.language_model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
373
+ "model.language_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
374
+ "model.language_model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
375
+ "model.language_model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
376
+ "model.language_model.layers.7.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
377
+ "model.language_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
378
+ "model.language_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
379
+ "model.language_model.layers.7.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
380
+ "model.language_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
381
+ "model.language_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
382
+ "model.language_model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
383
+ "model.language_model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
384
+ "model.language_model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
385
+ "model.language_model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
386
+ "model.language_model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
387
+ "model.language_model.layers.8.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
388
+ "model.language_model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
389
+ "model.language_model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
390
+ "model.language_model.layers.8.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
391
+ "model.language_model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
392
+ "model.language_model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
393
+ "model.language_model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
394
+ "model.language_model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
395
+ "model.language_model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
396
+ "model.language_model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
397
+ "model.language_model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
398
+ "model.language_model.layers.9.self_attn.k_norm.weight": "model-00001-of-00002.safetensors",
399
+ "model.language_model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
400
+ "model.language_model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
401
+ "model.language_model.layers.9.self_attn.q_norm.weight": "model-00001-of-00002.safetensors",
402
+ "model.language_model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
403
+ "model.language_model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
404
+ "model.language_model.norm.weight": "model-00002-of-00002.safetensors",
405
+ "model.visual.blocks.0.attn.proj.bias": "model-00001-of-00002.safetensors",
406
+ "model.visual.blocks.0.attn.proj.weight": "model-00001-of-00002.safetensors",
407
+ "model.visual.blocks.0.attn.qkv.bias": "model-00001-of-00002.safetensors",
408
+ "model.visual.blocks.0.attn.qkv.weight": "model-00001-of-00002.safetensors",
409
+ "model.visual.blocks.0.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
410
+ "model.visual.blocks.0.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
411
+ "model.visual.blocks.0.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
412
+ "model.visual.blocks.0.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
413
+ "model.visual.blocks.0.norm1.bias": "model-00001-of-00002.safetensors",
414
+ "model.visual.blocks.0.norm1.weight": "model-00001-of-00002.safetensors",
415
+ "model.visual.blocks.0.norm2.bias": "model-00001-of-00002.safetensors",
416
+ "model.visual.blocks.0.norm2.weight": "model-00001-of-00002.safetensors",
417
+ "model.visual.blocks.1.attn.proj.bias": "model-00001-of-00002.safetensors",
418
+ "model.visual.blocks.1.attn.proj.weight": "model-00001-of-00002.safetensors",
419
+ "model.visual.blocks.1.attn.qkv.bias": "model-00001-of-00002.safetensors",
420
+ "model.visual.blocks.1.attn.qkv.weight": "model-00001-of-00002.safetensors",
421
+ "model.visual.blocks.1.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
422
+ "model.visual.blocks.1.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
423
+ "model.visual.blocks.1.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
424
+ "model.visual.blocks.1.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
425
+ "model.visual.blocks.1.norm1.bias": "model-00001-of-00002.safetensors",
426
+ "model.visual.blocks.1.norm1.weight": "model-00001-of-00002.safetensors",
427
+ "model.visual.blocks.1.norm2.bias": "model-00001-of-00002.safetensors",
428
+ "model.visual.blocks.1.norm2.weight": "model-00001-of-00002.safetensors",
429
+ "model.visual.blocks.10.attn.proj.bias": "model-00001-of-00002.safetensors",
430
+ "model.visual.blocks.10.attn.proj.weight": "model-00001-of-00002.safetensors",
431
+ "model.visual.blocks.10.attn.qkv.bias": "model-00001-of-00002.safetensors",
432
+ "model.visual.blocks.10.attn.qkv.weight": "model-00001-of-00002.safetensors",
433
+ "model.visual.blocks.10.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
434
+ "model.visual.blocks.10.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
435
+ "model.visual.blocks.10.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
436
+ "model.visual.blocks.10.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
437
+ "model.visual.blocks.10.norm1.bias": "model-00001-of-00002.safetensors",
438
+ "model.visual.blocks.10.norm1.weight": "model-00001-of-00002.safetensors",
439
+ "model.visual.blocks.10.norm2.bias": "model-00001-of-00002.safetensors",
440
+ "model.visual.blocks.10.norm2.weight": "model-00001-of-00002.safetensors",
441
+ "model.visual.blocks.11.attn.proj.bias": "model-00001-of-00002.safetensors",
442
+ "model.visual.blocks.11.attn.proj.weight": "model-00001-of-00002.safetensors",
443
+ "model.visual.blocks.11.attn.qkv.bias": "model-00001-of-00002.safetensors",
444
+ "model.visual.blocks.11.attn.qkv.weight": "model-00001-of-00002.safetensors",
445
+ "model.visual.blocks.11.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
446
+ "model.visual.blocks.11.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
447
+ "model.visual.blocks.11.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
448
+ "model.visual.blocks.11.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
449
+ "model.visual.blocks.11.norm1.bias": "model-00001-of-00002.safetensors",
450
+ "model.visual.blocks.11.norm1.weight": "model-00001-of-00002.safetensors",
451
+ "model.visual.blocks.11.norm2.bias": "model-00001-of-00002.safetensors",
452
+ "model.visual.blocks.11.norm2.weight": "model-00001-of-00002.safetensors",
453
+ "model.visual.blocks.12.attn.proj.bias": "model-00001-of-00002.safetensors",
454
+ "model.visual.blocks.12.attn.proj.weight": "model-00001-of-00002.safetensors",
455
+ "model.visual.blocks.12.attn.qkv.bias": "model-00001-of-00002.safetensors",
456
+ "model.visual.blocks.12.attn.qkv.weight": "model-00001-of-00002.safetensors",
457
+ "model.visual.blocks.12.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
458
+ "model.visual.blocks.12.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
459
+ "model.visual.blocks.12.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
460
+ "model.visual.blocks.12.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
461
+ "model.visual.blocks.12.norm1.bias": "model-00001-of-00002.safetensors",
462
+ "model.visual.blocks.12.norm1.weight": "model-00001-of-00002.safetensors",
463
+ "model.visual.blocks.12.norm2.bias": "model-00001-of-00002.safetensors",
464
+ "model.visual.blocks.12.norm2.weight": "model-00001-of-00002.safetensors",
465
+ "model.visual.blocks.13.attn.proj.bias": "model-00001-of-00002.safetensors",
466
+ "model.visual.blocks.13.attn.proj.weight": "model-00001-of-00002.safetensors",
467
+ "model.visual.blocks.13.attn.qkv.bias": "model-00001-of-00002.safetensors",
468
+ "model.visual.blocks.13.attn.qkv.weight": "model-00001-of-00002.safetensors",
469
+ "model.visual.blocks.13.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
470
+ "model.visual.blocks.13.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
471
+ "model.visual.blocks.13.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
472
+ "model.visual.blocks.13.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
473
+ "model.visual.blocks.13.norm1.bias": "model-00001-of-00002.safetensors",
474
+ "model.visual.blocks.13.norm1.weight": "model-00001-of-00002.safetensors",
475
+ "model.visual.blocks.13.norm2.bias": "model-00001-of-00002.safetensors",
476
+ "model.visual.blocks.13.norm2.weight": "model-00001-of-00002.safetensors",
477
+ "model.visual.blocks.14.attn.proj.bias": "model-00001-of-00002.safetensors",
478
+ "model.visual.blocks.14.attn.proj.weight": "model-00001-of-00002.safetensors",
479
+ "model.visual.blocks.14.attn.qkv.bias": "model-00001-of-00002.safetensors",
480
+ "model.visual.blocks.14.attn.qkv.weight": "model-00001-of-00002.safetensors",
481
+ "model.visual.blocks.14.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
482
+ "model.visual.blocks.14.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
483
+ "model.visual.blocks.14.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
484
+ "model.visual.blocks.14.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
485
+ "model.visual.blocks.14.norm1.bias": "model-00001-of-00002.safetensors",
486
+ "model.visual.blocks.14.norm1.weight": "model-00001-of-00002.safetensors",
487
+ "model.visual.blocks.14.norm2.bias": "model-00001-of-00002.safetensors",
488
+ "model.visual.blocks.14.norm2.weight": "model-00001-of-00002.safetensors",
489
+ "model.visual.blocks.15.attn.proj.bias": "model-00001-of-00002.safetensors",
490
+ "model.visual.blocks.15.attn.proj.weight": "model-00001-of-00002.safetensors",
491
+ "model.visual.blocks.15.attn.qkv.bias": "model-00001-of-00002.safetensors",
492
+ "model.visual.blocks.15.attn.qkv.weight": "model-00001-of-00002.safetensors",
493
+ "model.visual.blocks.15.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
494
+ "model.visual.blocks.15.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
495
+ "model.visual.blocks.15.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
496
+ "model.visual.blocks.15.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
497
+ "model.visual.blocks.15.norm1.bias": "model-00001-of-00002.safetensors",
498
+ "model.visual.blocks.15.norm1.weight": "model-00001-of-00002.safetensors",
499
+ "model.visual.blocks.15.norm2.bias": "model-00001-of-00002.safetensors",
500
+ "model.visual.blocks.15.norm2.weight": "model-00001-of-00002.safetensors",
501
+ "model.visual.blocks.16.attn.proj.bias": "model-00001-of-00002.safetensors",
502
+ "model.visual.blocks.16.attn.proj.weight": "model-00001-of-00002.safetensors",
503
+ "model.visual.blocks.16.attn.qkv.bias": "model-00001-of-00002.safetensors",
504
+ "model.visual.blocks.16.attn.qkv.weight": "model-00001-of-00002.safetensors",
505
+ "model.visual.blocks.16.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
506
+ "model.visual.blocks.16.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
507
+ "model.visual.blocks.16.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
508
+ "model.visual.blocks.16.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
509
+ "model.visual.blocks.16.norm1.bias": "model-00001-of-00002.safetensors",
510
+ "model.visual.blocks.16.norm1.weight": "model-00001-of-00002.safetensors",
511
+ "model.visual.blocks.16.norm2.bias": "model-00001-of-00002.safetensors",
512
+ "model.visual.blocks.16.norm2.weight": "model-00001-of-00002.safetensors",
513
+ "model.visual.blocks.17.attn.proj.bias": "model-00001-of-00002.safetensors",
514
+ "model.visual.blocks.17.attn.proj.weight": "model-00001-of-00002.safetensors",
515
+ "model.visual.blocks.17.attn.qkv.bias": "model-00001-of-00002.safetensors",
516
+ "model.visual.blocks.17.attn.qkv.weight": "model-00001-of-00002.safetensors",
517
+ "model.visual.blocks.17.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
518
+ "model.visual.blocks.17.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
519
+ "model.visual.blocks.17.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
520
+ "model.visual.blocks.17.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
521
+ "model.visual.blocks.17.norm1.bias": "model-00001-of-00002.safetensors",
522
+ "model.visual.blocks.17.norm1.weight": "model-00001-of-00002.safetensors",
523
+ "model.visual.blocks.17.norm2.bias": "model-00001-of-00002.safetensors",
524
+ "model.visual.blocks.17.norm2.weight": "model-00001-of-00002.safetensors",
525
+ "model.visual.blocks.18.attn.proj.bias": "model-00001-of-00002.safetensors",
526
+ "model.visual.blocks.18.attn.proj.weight": "model-00001-of-00002.safetensors",
527
+ "model.visual.blocks.18.attn.qkv.bias": "model-00001-of-00002.safetensors",
528
+ "model.visual.blocks.18.attn.qkv.weight": "model-00001-of-00002.safetensors",
529
+ "model.visual.blocks.18.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
530
+ "model.visual.blocks.18.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
531
+ "model.visual.blocks.18.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
532
+ "model.visual.blocks.18.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
533
+ "model.visual.blocks.18.norm1.bias": "model-00001-of-00002.safetensors",
534
+ "model.visual.blocks.18.norm1.weight": "model-00001-of-00002.safetensors",
535
+ "model.visual.blocks.18.norm2.bias": "model-00001-of-00002.safetensors",
536
+ "model.visual.blocks.18.norm2.weight": "model-00001-of-00002.safetensors",
537
+ "model.visual.blocks.19.attn.proj.bias": "model-00001-of-00002.safetensors",
538
+ "model.visual.blocks.19.attn.proj.weight": "model-00001-of-00002.safetensors",
539
+ "model.visual.blocks.19.attn.qkv.bias": "model-00001-of-00002.safetensors",
540
+ "model.visual.blocks.19.attn.qkv.weight": "model-00001-of-00002.safetensors",
541
+ "model.visual.blocks.19.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
542
+ "model.visual.blocks.19.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
543
+ "model.visual.blocks.19.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
544
+ "model.visual.blocks.19.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
545
+ "model.visual.blocks.19.norm1.bias": "model-00001-of-00002.safetensors",
546
+ "model.visual.blocks.19.norm1.weight": "model-00001-of-00002.safetensors",
547
+ "model.visual.blocks.19.norm2.bias": "model-00001-of-00002.safetensors",
548
+ "model.visual.blocks.19.norm2.weight": "model-00001-of-00002.safetensors",
549
+ "model.visual.blocks.2.attn.proj.bias": "model-00001-of-00002.safetensors",
550
+ "model.visual.blocks.2.attn.proj.weight": "model-00001-of-00002.safetensors",
551
+ "model.visual.blocks.2.attn.qkv.bias": "model-00001-of-00002.safetensors",
552
+ "model.visual.blocks.2.attn.qkv.weight": "model-00001-of-00002.safetensors",
553
+ "model.visual.blocks.2.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
554
+ "model.visual.blocks.2.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
555
+ "model.visual.blocks.2.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
556
+ "model.visual.blocks.2.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
557
+ "model.visual.blocks.2.norm1.bias": "model-00001-of-00002.safetensors",
558
+ "model.visual.blocks.2.norm1.weight": "model-00001-of-00002.safetensors",
559
+ "model.visual.blocks.2.norm2.bias": "model-00001-of-00002.safetensors",
560
+ "model.visual.blocks.2.norm2.weight": "model-00001-of-00002.safetensors",
561
+ "model.visual.blocks.20.attn.proj.bias": "model-00001-of-00002.safetensors",
562
+ "model.visual.blocks.20.attn.proj.weight": "model-00001-of-00002.safetensors",
563
+ "model.visual.blocks.20.attn.qkv.bias": "model-00001-of-00002.safetensors",
564
+ "model.visual.blocks.20.attn.qkv.weight": "model-00001-of-00002.safetensors",
565
+ "model.visual.blocks.20.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
566
+ "model.visual.blocks.20.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
567
+ "model.visual.blocks.20.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
568
+ "model.visual.blocks.20.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
569
+ "model.visual.blocks.20.norm1.bias": "model-00001-of-00002.safetensors",
570
+ "model.visual.blocks.20.norm1.weight": "model-00001-of-00002.safetensors",
571
+ "model.visual.blocks.20.norm2.bias": "model-00001-of-00002.safetensors",
572
+ "model.visual.blocks.20.norm2.weight": "model-00001-of-00002.safetensors",
573
+ "model.visual.blocks.21.attn.proj.bias": "model-00001-of-00002.safetensors",
574
+ "model.visual.blocks.21.attn.proj.weight": "model-00001-of-00002.safetensors",
575
+ "model.visual.blocks.21.attn.qkv.bias": "model-00001-of-00002.safetensors",
576
+ "model.visual.blocks.21.attn.qkv.weight": "model-00001-of-00002.safetensors",
577
+ "model.visual.blocks.21.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
578
+ "model.visual.blocks.21.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
579
+ "model.visual.blocks.21.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
580
+ "model.visual.blocks.21.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
581
+ "model.visual.blocks.21.norm1.bias": "model-00001-of-00002.safetensors",
582
+ "model.visual.blocks.21.norm1.weight": "model-00001-of-00002.safetensors",
583
+ "model.visual.blocks.21.norm2.bias": "model-00001-of-00002.safetensors",
584
+ "model.visual.blocks.21.norm2.weight": "model-00001-of-00002.safetensors",
585
+ "model.visual.blocks.22.attn.proj.bias": "model-00001-of-00002.safetensors",
586
+ "model.visual.blocks.22.attn.proj.weight": "model-00001-of-00002.safetensors",
587
+ "model.visual.blocks.22.attn.qkv.bias": "model-00001-of-00002.safetensors",
588
+ "model.visual.blocks.22.attn.qkv.weight": "model-00001-of-00002.safetensors",
589
+ "model.visual.blocks.22.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
590
+ "model.visual.blocks.22.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
591
+ "model.visual.blocks.22.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
592
+ "model.visual.blocks.22.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
593
+ "model.visual.blocks.22.norm1.bias": "model-00001-of-00002.safetensors",
594
+ "model.visual.blocks.22.norm1.weight": "model-00001-of-00002.safetensors",
595
+ "model.visual.blocks.22.norm2.bias": "model-00001-of-00002.safetensors",
596
+ "model.visual.blocks.22.norm2.weight": "model-00001-of-00002.safetensors",
597
+ "model.visual.blocks.23.attn.proj.bias": "model-00001-of-00002.safetensors",
598
+ "model.visual.blocks.23.attn.proj.weight": "model-00001-of-00002.safetensors",
599
+ "model.visual.blocks.23.attn.qkv.bias": "model-00001-of-00002.safetensors",
600
+ "model.visual.blocks.23.attn.qkv.weight": "model-00001-of-00002.safetensors",
601
+ "model.visual.blocks.23.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
602
+ "model.visual.blocks.23.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
603
+ "model.visual.blocks.23.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
604
+ "model.visual.blocks.23.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
605
+ "model.visual.blocks.23.norm1.bias": "model-00001-of-00002.safetensors",
606
+ "model.visual.blocks.23.norm1.weight": "model-00001-of-00002.safetensors",
607
+ "model.visual.blocks.23.norm2.bias": "model-00001-of-00002.safetensors",
608
+ "model.visual.blocks.23.norm2.weight": "model-00001-of-00002.safetensors",
609
+ "model.visual.blocks.3.attn.proj.bias": "model-00001-of-00002.safetensors",
610
+ "model.visual.blocks.3.attn.proj.weight": "model-00001-of-00002.safetensors",
611
+ "model.visual.blocks.3.attn.qkv.bias": "model-00001-of-00002.safetensors",
612
+ "model.visual.blocks.3.attn.qkv.weight": "model-00001-of-00002.safetensors",
613
+ "model.visual.blocks.3.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
614
+ "model.visual.blocks.3.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
615
+ "model.visual.blocks.3.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
616
+ "model.visual.blocks.3.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
617
+ "model.visual.blocks.3.norm1.bias": "model-00001-of-00002.safetensors",
618
+ "model.visual.blocks.3.norm1.weight": "model-00001-of-00002.safetensors",
619
+ "model.visual.blocks.3.norm2.bias": "model-00001-of-00002.safetensors",
620
+ "model.visual.blocks.3.norm2.weight": "model-00001-of-00002.safetensors",
621
+ "model.visual.blocks.4.attn.proj.bias": "model-00001-of-00002.safetensors",
622
+ "model.visual.blocks.4.attn.proj.weight": "model-00001-of-00002.safetensors",
623
+ "model.visual.blocks.4.attn.qkv.bias": "model-00001-of-00002.safetensors",
624
+ "model.visual.blocks.4.attn.qkv.weight": "model-00001-of-00002.safetensors",
625
+ "model.visual.blocks.4.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
626
+ "model.visual.blocks.4.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
627
+ "model.visual.blocks.4.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
628
+ "model.visual.blocks.4.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
629
+ "model.visual.blocks.4.norm1.bias": "model-00001-of-00002.safetensors",
630
+ "model.visual.blocks.4.norm1.weight": "model-00001-of-00002.safetensors",
631
+ "model.visual.blocks.4.norm2.bias": "model-00001-of-00002.safetensors",
632
+ "model.visual.blocks.4.norm2.weight": "model-00001-of-00002.safetensors",
633
+ "model.visual.blocks.5.attn.proj.bias": "model-00001-of-00002.safetensors",
634
+ "model.visual.blocks.5.attn.proj.weight": "model-00001-of-00002.safetensors",
635
+ "model.visual.blocks.5.attn.qkv.bias": "model-00001-of-00002.safetensors",
636
+ "model.visual.blocks.5.attn.qkv.weight": "model-00001-of-00002.safetensors",
637
+ "model.visual.blocks.5.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
638
+ "model.visual.blocks.5.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
639
+ "model.visual.blocks.5.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
640
+ "model.visual.blocks.5.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
641
+ "model.visual.blocks.5.norm1.bias": "model-00001-of-00002.safetensors",
642
+ "model.visual.blocks.5.norm1.weight": "model-00001-of-00002.safetensors",
643
+ "model.visual.blocks.5.norm2.bias": "model-00001-of-00002.safetensors",
644
+ "model.visual.blocks.5.norm2.weight": "model-00001-of-00002.safetensors",
645
+ "model.visual.blocks.6.attn.proj.bias": "model-00001-of-00002.safetensors",
646
+ "model.visual.blocks.6.attn.proj.weight": "model-00001-of-00002.safetensors",
647
+ "model.visual.blocks.6.attn.qkv.bias": "model-00001-of-00002.safetensors",
648
+ "model.visual.blocks.6.attn.qkv.weight": "model-00001-of-00002.safetensors",
649
+ "model.visual.blocks.6.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
650
+ "model.visual.blocks.6.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
651
+ "model.visual.blocks.6.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
652
+ "model.visual.blocks.6.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
653
+ "model.visual.blocks.6.norm1.bias": "model-00001-of-00002.safetensors",
654
+ "model.visual.blocks.6.norm1.weight": "model-00001-of-00002.safetensors",
655
+ "model.visual.blocks.6.norm2.bias": "model-00001-of-00002.safetensors",
656
+ "model.visual.blocks.6.norm2.weight": "model-00001-of-00002.safetensors",
657
+ "model.visual.blocks.7.attn.proj.bias": "model-00001-of-00002.safetensors",
658
+ "model.visual.blocks.7.attn.proj.weight": "model-00001-of-00002.safetensors",
659
+ "model.visual.blocks.7.attn.qkv.bias": "model-00001-of-00002.safetensors",
660
+ "model.visual.blocks.7.attn.qkv.weight": "model-00001-of-00002.safetensors",
661
+ "model.visual.blocks.7.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
662
+ "model.visual.blocks.7.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
663
+ "model.visual.blocks.7.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
664
+ "model.visual.blocks.7.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
665
+ "model.visual.blocks.7.norm1.bias": "model-00001-of-00002.safetensors",
666
+ "model.visual.blocks.7.norm1.weight": "model-00001-of-00002.safetensors",
667
+ "model.visual.blocks.7.norm2.bias": "model-00001-of-00002.safetensors",
668
+ "model.visual.blocks.7.norm2.weight": "model-00001-of-00002.safetensors",
669
+ "model.visual.blocks.8.attn.proj.bias": "model-00001-of-00002.safetensors",
670
+ "model.visual.blocks.8.attn.proj.weight": "model-00001-of-00002.safetensors",
671
+ "model.visual.blocks.8.attn.qkv.bias": "model-00001-of-00002.safetensors",
672
+ "model.visual.blocks.8.attn.qkv.weight": "model-00001-of-00002.safetensors",
673
+ "model.visual.blocks.8.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
674
+ "model.visual.blocks.8.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
675
+ "model.visual.blocks.8.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
676
+ "model.visual.blocks.8.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
677
+ "model.visual.blocks.8.norm1.bias": "model-00001-of-00002.safetensors",
678
+ "model.visual.blocks.8.norm1.weight": "model-00001-of-00002.safetensors",
679
+ "model.visual.blocks.8.norm2.bias": "model-00001-of-00002.safetensors",
680
+ "model.visual.blocks.8.norm2.weight": "model-00001-of-00002.safetensors",
681
+ "model.visual.blocks.9.attn.proj.bias": "model-00001-of-00002.safetensors",
682
+ "model.visual.blocks.9.attn.proj.weight": "model-00001-of-00002.safetensors",
683
+ "model.visual.blocks.9.attn.qkv.bias": "model-00001-of-00002.safetensors",
684
+ "model.visual.blocks.9.attn.qkv.weight": "model-00001-of-00002.safetensors",
685
+ "model.visual.blocks.9.mlp.linear_fc1.bias": "model-00001-of-00002.safetensors",
686
+ "model.visual.blocks.9.mlp.linear_fc1.weight": "model-00001-of-00002.safetensors",
687
+ "model.visual.blocks.9.mlp.linear_fc2.bias": "model-00001-of-00002.safetensors",
688
+ "model.visual.blocks.9.mlp.linear_fc2.weight": "model-00001-of-00002.safetensors",
689
+ "model.visual.blocks.9.norm1.bias": "model-00001-of-00002.safetensors",
690
+ "model.visual.blocks.9.norm1.weight": "model-00001-of-00002.safetensors",
691
+ "model.visual.blocks.9.norm2.bias": "model-00001-of-00002.safetensors",
692
+ "model.visual.blocks.9.norm2.weight": "model-00001-of-00002.safetensors",
693
+ "model.visual.deepstack_merger_list.0.linear_fc1.bias": "model-00001-of-00002.safetensors",
694
+ "model.visual.deepstack_merger_list.0.linear_fc1.weight": "model-00001-of-00002.safetensors",
695
+ "model.visual.deepstack_merger_list.0.linear_fc2.bias": "model-00001-of-00002.safetensors",
696
+ "model.visual.deepstack_merger_list.0.linear_fc2.weight": "model-00001-of-00002.safetensors",
697
+ "model.visual.deepstack_merger_list.0.norm.bias": "model-00001-of-00002.safetensors",
698
+ "model.visual.deepstack_merger_list.0.norm.weight": "model-00001-of-00002.safetensors",
699
+ "model.visual.deepstack_merger_list.1.linear_fc1.bias": "model-00001-of-00002.safetensors",
700
+ "model.visual.deepstack_merger_list.1.linear_fc1.weight": "model-00001-of-00002.safetensors",
701
+ "model.visual.deepstack_merger_list.1.linear_fc2.bias": "model-00001-of-00002.safetensors",
702
+ "model.visual.deepstack_merger_list.1.linear_fc2.weight": "model-00001-of-00002.safetensors",
703
+ "model.visual.deepstack_merger_list.1.norm.bias": "model-00001-of-00002.safetensors",
704
+ "model.visual.deepstack_merger_list.1.norm.weight": "model-00001-of-00002.safetensors",
705
+ "model.visual.deepstack_merger_list.2.linear_fc1.bias": "model-00001-of-00002.safetensors",
706
+ "model.visual.deepstack_merger_list.2.linear_fc1.weight": "model-00001-of-00002.safetensors",
707
+ "model.visual.deepstack_merger_list.2.linear_fc2.bias": "model-00001-of-00002.safetensors",
708
+ "model.visual.deepstack_merger_list.2.linear_fc2.weight": "model-00001-of-00002.safetensors",
709
+ "model.visual.deepstack_merger_list.2.norm.bias": "model-00001-of-00002.safetensors",
710
+ "model.visual.deepstack_merger_list.2.norm.weight": "model-00001-of-00002.safetensors",
711
+ "model.visual.merger.linear_fc1.bias": "model-00001-of-00002.safetensors",
712
+ "model.visual.merger.linear_fc1.weight": "model-00001-of-00002.safetensors",
713
+ "model.visual.merger.linear_fc2.bias": "model-00001-of-00002.safetensors",
714
+ "model.visual.merger.linear_fc2.weight": "model-00001-of-00002.safetensors",
715
+ "model.visual.merger.norm.bias": "model-00001-of-00002.safetensors",
716
+ "model.visual.merger.norm.weight": "model-00001-of-00002.safetensors",
717
+ "model.visual.patch_embed.proj.bias": "model-00001-of-00002.safetensors",
718
+ "model.visual.patch_embed.proj.weight": "model-00001-of-00002.safetensors",
719
+ "model.visual.pos_embed.weight": "model-00001-of-00002.safetensors"
720
+ }
721
+ }
grpo-nADE-format-RC/preprocessor_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "disable_grouping": null,
7
+ "do_center_crop": null,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": null,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_mean": [
14
+ 0.5,
15
+ 0.5,
16
+ 0.5
17
+ ],
18
+ "image_processor_type": "Qwen2VLImageProcessorFast",
19
+ "image_std": [
20
+ 0.5,
21
+ 0.5,
22
+ 0.5
23
+ ],
24
+ "input_data_format": null,
25
+ "max_pixels": null,
26
+ "merge_size": 2,
27
+ "min_pixels": null,
28
+ "pad_size": null,
29
+ "patch_size": 16,
30
+ "processor_class": "Qwen3VLProcessor",
31
+ "resample": 3,
32
+ "rescale_factor": 0.00392156862745098,
33
+ "return_tensors": null,
34
+ "size": {
35
+ "longest_edge": 16777216,
36
+ "shortest_edge": 65536
37
+ },
38
+ "temporal_patch_size": 2
39
+ }
grpo-nADE-format-RC/tokenizer_config.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "max_length": null,
235
+ "model_max_length": 262144,
236
+ "pad_to_multiple_of": null,
237
+ "pad_token": "<|endoftext|>",
238
+ "pad_token_type_id": 0,
239
+ "padding_side": "left",
240
+ "processor_class": "Qwen3VLProcessor",
241
+ "split_special_tokens": false,
242
+ "tokenizer_class": "Qwen2Tokenizer",
243
+ "unk_token": null
244
+ }
grpo-nADE-format-RC/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format/README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: mjf-su/PhysicalAI-reason-VLA-MetaAction-1e
3
+ library_name: transformers
4
+ model_name: grpo-nADE-format
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - grpo
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for grpo-nADE-format
13
+
14
+ This model is a fine-tuned version of [mjf-su/PhysicalAI-reason-VLA-MetaAction-1e](https://huggingface.co/mjf-su/PhysicalAI-reason-VLA-MetaAction-1e).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="None", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/mjf-su-stanford-university/GRPO-faithfulness/runs/2qw6ji2w)
31
+
32
+
33
+ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.26.1
38
+ - Transformers: 4.57.6
39
+ - Pytorch: 2.10.0
40
+ - Datasets: 4.4.1
41
+ - Tokenizers: 0.22.1
42
+
43
+ ## Citations
44
+
45
+ Cite GRPO as:
46
+
47
+ ```bibtex
48
+ @article{shao2024deepseekmath,
49
+ title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
50
+ author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
51
+ year = 2024,
52
+ eprint = {arXiv:2402.03300},
53
+ }
54
+
55
+ ```
56
+
57
+ Cite TRL as:
58
+
59
+ ```bibtex
60
+ @misc{vonwerra2022trl,
61
+ title = {{TRL: Transformer Reinforcement Learning}},
62
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
63
+ year = 2020,
64
+ journal = {GitHub repository},
65
+ publisher = {GitHub},
66
+ howpublished = {\url{https://github.com/huggingface/trl}}
67
+ }
68
+ ```
grpo-nADE-format/checkpoint-417/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
grpo-nADE-format/checkpoint-417/vocab.json ADDED
The diff for this file is too large to render. See raw diff