yuntian-deng commited on
Commit
60f07dd
·
verified ·
1 Parent(s): 9f723ea

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ compiler/tokenizer.json filter=lfs diff=lfs merge=lfs -text
compiler/chat_template.jinja ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- for message in messages %}
18
+ {%- if message.content is string %}
19
+ {%- set content = message.content %}
20
+ {%- else %}
21
+ {%- set content = '' %}
22
+ {%- endif %}
23
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
24
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
25
+ {%- elif message.role == "assistant" %}
26
+ {{- '<|im_start|>' + message.role + '\n' + content }}
27
+ {%- if message.tool_calls %}
28
+ {%- for tool_call in message.tool_calls %}
29
+ {%- if (loop.first and content) or (not loop.first) %}
30
+ {{- '\n' }}
31
+ {%- endif %}
32
+ {%- if tool_call.function %}
33
+ {%- set tool_call = tool_call.function %}
34
+ {%- endif %}
35
+ {{- '<tool_call>\n{"name": "' }}
36
+ {{- tool_call.name }}
37
+ {{- '", "arguments": ' }}
38
+ {%- if tool_call.arguments is string %}
39
+ {{- tool_call.arguments }}
40
+ {%- else %}
41
+ {{- tool_call.arguments | tojson }}
42
+ {%- endif %}
43
+ {{- '}\n</tool_call>' }}
44
+ {%- endfor %}
45
+ {%- endif %}
46
+ {{- '<|im_end|>\n' }}
47
+ {%- elif message.role == "tool" %}
48
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
49
+ {{- '<|im_start|>user' }}
50
+ {%- endif %}
51
+ {{- '\n<tool_response>\n' }}
52
+ {{- content }}
53
+ {{- '\n</tool_response>' }}
54
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
55
+ {{- '<|im_end|>\n' }}
56
+ {%- endif %}
57
+ {%- endif %}
58
+ {%- endfor %}
59
+ {%- if add_generation_prompt %}
60
+ {{- '<|im_start|>assistant\n' }}
61
+ {%- endif %}
compiler/config.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": 151645,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2560,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 9728,
15
+ "layer_types": [
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 262144,
54
+ "max_window_layers": 36,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 32,
57
+ "num_hidden_layers": 36,
58
+ "num_key_value_heads": 8,
59
+ "pad_token_id": null,
60
+ "rms_norm_eps": 1e-06,
61
+ "rope_parameters": {
62
+ "rope_theta": 5000000,
63
+ "rope_type": "default"
64
+ },
65
+ "sliding_window": null,
66
+ "tie_word_embeddings": true,
67
+ "transformers_version": "5.3.0.dev0",
68
+ "use_cache": true,
69
+ "use_sliding_window": false,
70
+ "vocab_size": 151733,
71
+ "rope_theta": 5000000
72
+ }
compiler/generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.7,
10
+ "top_k": 20,
11
+ "top_p": 0.8,
12
+ "transformers_version": "5.3.0.dev0"
13
+ }
compiler/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aef7e47beb5a80ea34fe32b48b28ef4ae1a3de83d6544b6ed0ffd69f9d2b3e5
3
+ size 8043942720
compiler/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd8bc35d4523ff0a96c59767f45ad105b60a3df4823ea2d111c704d1c08e3e12
3
+ size 11434673
compiler/tokenizer_config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": {
9
+ "<prefix_1>": "<prefix_1>",
10
+ "<prefix_2>": "<prefix_2>",
11
+ "<prefix_3>": "<prefix_3>",
12
+ "<prefix_4>": "<prefix_4>",
13
+ "<prefix_5>": "<prefix_5>",
14
+ "<prefix_6>": "<prefix_6>",
15
+ "<prefix_7>": "<prefix_7>",
16
+ "<prefix_8>": "<prefix_8>",
17
+ "<prefix_9>": "<prefix_9>",
18
+ "<prefix_10>": "<prefix_10>",
19
+ "<prefix_11>": "<prefix_11>",
20
+ "<prefix_12>": "<prefix_12>",
21
+ "<prefix_13>": "<prefix_13>",
22
+ "<prefix_14>": "<prefix_14>",
23
+ "<prefix_15>": "<prefix_15>",
24
+ "<prefix_16>": "<prefix_16>",
25
+ "<prefix_17>": "<prefix_17>",
26
+ "<prefix_18>": "<prefix_18>",
27
+ "<prefix_19>": "<prefix_19>",
28
+ "<prefix_20>": "<prefix_20>",
29
+ "<prefix_21>": "<prefix_21>",
30
+ "<prefix_22>": "<prefix_22>",
31
+ "<prefix_23>": "<prefix_23>",
32
+ "<prefix_24>": "<prefix_24>",
33
+ "<prefix_25>": "<prefix_25>",
34
+ "<prefix_26>": "<prefix_26>",
35
+ "<prefix_27>": "<prefix_27>",
36
+ "<prefix_28>": "<prefix_28>",
37
+ "<prefix_29>": "<prefix_29>",
38
+ "<prefix_30>": "<prefix_30>",
39
+ "<prefix_31>": "<prefix_31>",
40
+ "<prefix_32>": "<prefix_32>",
41
+ "<prefix_33>": "<prefix_33>",
42
+ "<prefix_34>": "<prefix_34>",
43
+ "<prefix_35>": "<prefix_35>",
44
+ "<prefix_36>": "<prefix_36>",
45
+ "<prefix_37>": "<prefix_37>",
46
+ "<prefix_38>": "<prefix_38>",
47
+ "<prefix_39>": "<prefix_39>",
48
+ "<prefix_40>": "<prefix_40>",
49
+ "<prefix_41>": "<prefix_41>",
50
+ "<prefix_42>": "<prefix_42>",
51
+ "<prefix_43>": "<prefix_43>",
52
+ "<prefix_44>": "<prefix_44>",
53
+ "<prefix_45>": "<prefix_45>",
54
+ "<prefix_46>": "<prefix_46>",
55
+ "<prefix_47>": "<prefix_47>",
56
+ "<prefix_48>": "<prefix_48>",
57
+ "<prefix_49>": "<prefix_49>",
58
+ "<prefix_50>": "<prefix_50>",
59
+ "<prefix_51>": "<prefix_51>",
60
+ "<prefix_52>": "<prefix_52>",
61
+ "<prefix_53>": "<prefix_53>",
62
+ "<prefix_54>": "<prefix_54>",
63
+ "<prefix_55>": "<prefix_55>",
64
+ "<prefix_56>": "<prefix_56>",
65
+ "<prefix_57>": "<prefix_57>",
66
+ "<prefix_58>": "<prefix_58>",
67
+ "<prefix_59>": "<prefix_59>",
68
+ "<prefix_60>": "<prefix_60>",
69
+ "<prefix_61>": "<prefix_61>",
70
+ "<prefix_62>": "<prefix_62>",
71
+ "<prefix_63>": "<prefix_63>",
72
+ "<prefix_64>": "<prefix_64>"
73
+ },
74
+ "is_local": true,
75
+ "model_max_length": 1010000,
76
+ "pad_token": "<|endoftext|>",
77
+ "split_special_tokens": false,
78
+ "tokenizer_class": "Qwen2Tokenizer",
79
+ "unk_token": null
80
+ }
lora_mapper.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc618ad1aab05bf826aca4bbfd5e842abdd6750d66f112918330c6faa239a8b5
3
+ size 290495303
meta.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "opt_step": 210751,
3
+ "timestamp": 1775400448.2125497,
4
+ "compiler_model": "train_runs/big_run_gpt2.latest/compiler",
5
+ "interpreter_model": "gpt2",
6
+ "freeze_interpreter": true,
7
+ "freeze_compiler": true,
8
+ "reward_type": "gt_logprob",
9
+ "prefix_steps": 64,
10
+ "prefix_loss_coef": 1.0,
11
+ "compiler_rollout_reg_coef": 0.0,
12
+ "compiler_prompt_style": "minimal",
13
+ "num_rollouts": 1,
14
+ "debug_overfit_one": false,
15
+ "debug_example_idx": 0,
16
+ "debug_overfit_n": 0,
17
+ "lora_rank": 64,
18
+ "lora_alpha": 16.0,
19
+ "lora_target_modules": [
20
+ "c_attn",
21
+ "c_proj",
22
+ "c_fc"
23
+ ],
24
+ "lora_num_bases": 64
25
+ }