QwenQKing commited on
Commit
e9444ea
·
verified ·
1 Parent(s): 29698b8

Upload 34 files

Browse files
Files changed (35) hide show
  1. .gitattributes +1 -0
  2. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_0.pt +3 -0
  3. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_1.pt +3 -0
  4. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_2.pt +3 -0
  5. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_3.pt +3 -0
  6. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_4.pt +3 -0
  7. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_5.pt +3 -0
  8. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_6.pt +3 -0
  9. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_7.pt +3 -0
  10. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/added_tokens.json +28 -0
  11. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/chat_template.jinja +54 -0
  12. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/config.json +68 -0
  13. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/merges.txt +0 -0
  14. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/special_tokens_map.json +31 -0
  15. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/tokenizer.json +3 -0
  16. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/tokenizer_config.json +239 -0
  17. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/vocab.json +0 -0
  18. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_0.pt +3 -0
  19. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_1.pt +3 -0
  20. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_2.pt +3 -0
  21. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_3.pt +3 -0
  22. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_4.pt +3 -0
  23. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_5.pt +3 -0
  24. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_6.pt +3 -0
  25. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_7.pt +3 -0
  26. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_0.pt +3 -0
  27. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_1.pt +3 -0
  28. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_2.pt +3 -0
  29. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_3.pt +3 -0
  30. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_4.pt +3 -0
  31. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_5.pt +3 -0
  32. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_6.pt +3 -0
  33. grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_7.pt +3 -0
  34. grpo-qwen3-4b-gpt-4o-mini/global_step_320/data.pt +3 -0
  35. grpo-qwen3-4b-gpt-4o-mini/latest_checkpointed_iteration.txt +1 -0
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Prompt-R1-gpt-oss/grpo-qwen3-4b-gpt-oss-20b/global_step_320/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  Prompt-R1-gpt-oss/grpo-qwen3-4b-gpt-oss-20b/global_step_320/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/tokenizer.json filter=lfs diff=lfs merge=lfs -text
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72b51bf1b0d4b0128df5dee5bee1c15833e4860694eb76b49eb6eb0926daf3d5
3
+ size 14632
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff608a732eab60c3366f023a7d4420aa340473fef199653ac6e6f3b2f8b856a
3
+ size 14632
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cd19a699e313878fe80d3763bb8e0d829812e32cdd4a2f153b09af90537c8e
3
+ size 14632
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a260a1d92ac1ff26bd749b3c31a5a2a42e553e1bcb94c0dd18ec8350bdcc9944
3
+ size 14632
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19b60d94742a405087a6228635181850e1f562ccfce273a27f1869c49198a716
3
+ size 14632
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:532d4dd4db32406de8567628f3da6b28ac9182dd8cc55ca5d42cd490dec47b8d
3
+ size 14632
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcddadcd27b294f2b43c593caa6a7bcc2e2aa8e9fc2213772e8c897dd4d722a9
3
+ size 14632
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/extra_state_world_size_8_rank_7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fe1510c1aa9a10a55f15d0e2bd28e6f306f8a8b0a9472b0fe34020a149fd636
3
+ size 14632
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a great assistant. ' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou can call one or more powerful Large Language Models to answer the user's questions. But you MUST provide the tool with an explanation and analysis of the problem, as well as your thought process.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <interaction_prompt></interaction_prompt> XML tags:\n<interaction_prompt>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</interaction_prompt><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<interaction_prompt>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</interaction_prompt>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<interaction_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</interaction_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "dtype": "float32",
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2560,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 9728,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention"
51
+ ],
52
+ "max_position_embeddings": 40960,
53
+ "max_window_layers": 36,
54
+ "model_type": "qwen3",
55
+ "num_attention_heads": 32,
56
+ "num_hidden_layers": 36,
57
+ "num_key_value_heads": 8,
58
+ "pad_token_id": 151643,
59
+ "rms_norm_eps": 1e-06,
60
+ "rope_scaling": null,
61
+ "rope_theta": 1000000,
62
+ "sliding_window": null,
63
+ "tie_word_embeddings": true,
64
+ "transformers_version": "4.56.2",
65
+ "use_cache": true,
66
+ "use_sliding_window": false,
67
+ "vocab_size": 151936
68
+ }
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/tokenizer_config.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 131072,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null
239
+ }
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/huggingface/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28173c76762f9f40cac47fa0836e903e43d66f639f1955737aaf2ba9f1fc634e
3
+ size 2205943162
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf52b3e17fe0a20ff62f225961b777ffb8bc7d1c3c9a544417e34a1a8b60f1ff
3
+ size 2205943162
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcfb039dfcf48c1f9616ece434be7066b81e4ed6a3e394784fc3d5b243a66621
3
+ size 2205943162
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2854913d7dac2ed424ae703abc2e4523dbd1eeb3df6b47ea46271c199942086e
3
+ size 2205943162
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fcff8e8dca569acdf60f5c3f8aeeb69cb43762d3de282d06a80633f412a939f
3
+ size 2205943162
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c41e83d72ccd834c18569a4afb58b0cfc25b89f410f87298e162b4605d4884
3
+ size 2205943162
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82983b780fd8adde91f5cabd402207efccfd5fe87ef390790604de6bffbc26e4
3
+ size 2205943162
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/model_world_size_8_rank_7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca25f42d3dd9ae9b312a108734b2f2021c91c2488bf630c026710545bd6786b2
3
+ size 2205943162
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3760dfbf4c1ac88ac3239e1665c5a972f6b4a3a80338227d773474a12d0601d4
3
+ size 4022501290
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:670600914f093c38f0640b2a50967920a7d87805aa61c2c2359bd3c49d6301bf
3
+ size 4022501290
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d831645b9233de05dbf8ecf9083527507fa1a3df1e7ed027af360955e6c2c797
3
+ size 4022501290
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:530cc0a45af3054e8dc14d05b6694cca391e6aac414eb552502e140b89673b84
3
+ size 4022501290
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b2fb27540df94fc1ab2dea44f8c019ac317022b93a3cbaef193079351d23f2
3
+ size 4022501290
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9211a7abf8020003f414420b41ce27a45d10e74b24a034b1700dcc5dce007312
3
+ size 4022501290
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7dabdaba41600b92416662d47a85d249ea2fea7efc538f53595a9e2f4f5cb82
3
+ size 4022501290
grpo-qwen3-4b-gpt-4o-mini/global_step_320/actor/optim_world_size_8_rank_7.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35490e63dda35b9145d1746d9cd1fe83a396926f3d9372d45d931bc71bb83dce
3
+ size 4022501290
grpo-qwen3-4b-gpt-4o-mini/global_step_320/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a51225c1ab27330c7d330a8210191d5b14c9270684562d704182bcfc5312d2
3
+ size 1492
grpo-qwen3-4b-gpt-4o-mini/latest_checkpointed_iteration.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 320