AsherYang commited on
Commit
00a678d
·
verified ·
1 Parent(s): 06504e5

upload hf_global_step_111

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {%- if messages[0].content is string %}
5
+ {{- messages[0].content }}
6
+ {%- else %}
7
+ {%- for content in messages[0].content %}
8
+ {%- if 'text' in content %}
9
+ {{- content.text }}
10
+ {%- endif %}
11
+ {%- endfor %}
12
+ {%- endif %}
13
+ {{- '\n\n' }}
14
+ {%- endif %}
15
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
16
+ {%- for tool in tools %}
17
+ {{- "\n" }}
18
+ {{- tool | tojson }}
19
+ {%- endfor %}
20
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
21
+ {%- else %}
22
+ {%- if messages[0].role == 'system' %}
23
+ {{- '<|im_start|>system\n' }}
24
+ {%- if messages[0].content is string %}
25
+ {{- messages[0].content }}
26
+ {%- else %}
27
+ {%- for content in messages[0].content %}
28
+ {%- if 'text' in content %}
29
+ {{- content.text }}
30
+ {%- endif %}
31
+ {%- endfor %}
32
+ {%- endif %}
33
+ {{- '<|im_end|>\n' }}
34
+ {%- endif %}
35
+ {%- endif %}
36
+ {%- set image_count = namespace(value=0) %}
37
+ {%- set video_count = namespace(value=0) %}
38
+ {%- for message in messages %}
39
+ {%- if message.role == "user" %}
40
+ {{- '<|im_start|>' + message.role + '\n' }}
41
+ {%- if message.content is string %}
42
+ {{- message.content }}
43
+ {%- else %}
44
+ {%- for content in message.content %}
45
+ {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
46
+ {%- set image_count.value = image_count.value + 1 %}
47
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
48
+ <|vision_start|><|image_pad|><|vision_end|>
49
+ {%- elif content.type == 'video' or 'video' in content %}
50
+ {%- set video_count.value = video_count.value + 1 %}
51
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
52
+ <|vision_start|><|video_pad|><|vision_end|>
53
+ {%- elif 'text' in content %}
54
+ {{- content.text }}
55
+ {%- endif %}
56
+ {%- endfor %}
57
+ {%- endif %}
58
+ {{- '<|im_end|>\n' }}
59
+ {%- elif message.role == "assistant" %}
60
+ {{- '<|im_start|>' + message.role + '\n' }}
61
+ {%- if message.content is string %}
62
+ {{- message.content }}
63
+ {%- else %}
64
+ {%- for content_item in message.content %}
65
+ {%- if 'text' in content_item %}
66
+ {{- content_item.text }}
67
+ {%- endif %}
68
+ {%- endfor %}
69
+ {%- endif %}
70
+ {%- if message.tool_calls %}
71
+ {%- for tool_call in message.tool_calls %}
72
+ {%- if (loop.first and message.content) or (not loop.first) %}
73
+ {{- '\n' }}
74
+ {%- endif %}
75
+ {%- if tool_call.function %}
76
+ {%- set tool_call = tool_call.function %}
77
+ {%- endif %}
78
+ {{- '<tool_call>\n{"name": "' }}
79
+ {{- tool_call.name }}
80
+ {{- '", "arguments": ' }}
81
+ {%- if tool_call.arguments is string %}
82
+ {{- tool_call.arguments }}
83
+ {%- else %}
84
+ {{- tool_call.arguments | tojson }}
85
+ {%- endif %}
86
+ {{- '}\n</tool_call>' }}
87
+ {%- endfor %}
88
+ {%- endif %}
89
+ {{- '<|im_end|>\n' }}
90
+ {%- elif message.role == "tool" %}
91
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
92
+ {{- '<|im_start|>user' }}
93
+ {%- endif %}
94
+ {{- '\n<tool_response>\n' }}
95
+ {%- if message.content is string %}
96
+ {{- message.content }}
97
+ {%- else %}
98
+ {%- for content in message.content %}
99
+ {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
100
+ {%- set image_count.value = image_count.value + 1 %}
101
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
102
+ <|vision_start|><|image_pad|><|vision_end|>
103
+ {%- elif content.type == 'video' or 'video' in content %}
104
+ {%- set video_count.value = video_count.value + 1 %}
105
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
106
+ <|vision_start|><|video_pad|><|vision_end|>
107
+ {%- elif 'text' in content %}
108
+ {{- content.text }}
109
+ {%- endif %}
110
+ {%- endfor %}
111
+ {%- endif %}
112
+ {{- '\n</tool_response>' }}
113
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
114
+ {{- '<|im_end|>\n' }}
115
+ {%- endif %}
116
+ {%- endif %}
117
+ {%- endfor %}
118
+ {%- if add_generation_prompt %}
119
+ {{- '<|im_start|>assistant\n' }}
120
+ {%- endif %}
chat_template.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {%- if messages[0].content is string %}\n {{- messages[0].content }}\n {%- else %}\n {%- for content in messages[0].content %}\n {%- if 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].content is string %}\n {{- messages[0].content }}\n {%- else %}\n {%- for content in messages[0].content %}\n {%- if 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set image_count = namespace(value=0) %}\n{%- set video_count = namespace(value=0) %}\n{%- for message in messages %}\n {%- if message.role == \"user\" %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content in message.content %}\n {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}\n <|vision_start|><|image_pad|><|vision_end|>\n {%- elif content.type == 'video' or 'video' in content %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}\n <|vision_start|><|video_pad|><|vision_end|>\n {%- elif 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content_item in message.content %}\n {%- if 'text' in content_item %}\n {{- content_item.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and message.content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content in message.content %}\n {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}\n <|vision_start|><|image_pad|><|vision_end|>\n {%- elif content.type == 'video' or 'video' in content %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}\n <|vision_start|><|video_pad|><|vision_end|>\n {%- elif 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n"
3
+ }
4
+
config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "AxiomSForConditionalGeneration"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_axiom.AxiomSConfig",
7
+ "AutoImageProcessor": "processing_axiom.AxiomSImageProcessor",
8
+ "AutoModel": "modeling_axiom.AxiomSModel",
9
+ "AutoModelForImageTextToText": "modeling_axiom.AxiomSForConditionalGeneration",
10
+ "AutoProcessor": "processing_axiom.AxiomSProcessor",
11
+ "AutoVideoProcessor": "processing_axiom.AxiomSVideoProcessor"
12
+ },
13
+ "bos_token_id": null,
14
+ "dtype": "bfloat16",
15
+ "eos_token_id": 151645,
16
+ "hidden_size": 2560,
17
+ "image_token_id": 151655,
18
+ "model_type": "axiom_s",
19
+ "pad_token_id": 151643,
20
+ "text_config": {
21
+ "attention_bias": false,
22
+ "attention_dropout": 0.0,
23
+ "bos_token_id": 151643,
24
+ "dtype": "bfloat16",
25
+ "eos_token_id": 151645,
26
+ "head_dim": 128,
27
+ "hidden_act": "silu",
28
+ "hidden_size": 2560,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 9728,
31
+ "max_position_embeddings": 262144,
32
+ "model_type": "qwen3_vl_text",
33
+ "num_attention_heads": 32,
34
+ "num_hidden_layers": 36,
35
+ "num_key_value_heads": 8,
36
+ "pad_token_id": null,
37
+ "rms_norm_eps": 1e-06,
38
+ "rope_parameters": {
39
+ "mrope_interleaved": true,
40
+ "mrope_section": [
41
+ 24,
42
+ 20,
43
+ 20
44
+ ],
45
+ "rope_theta": 5000000,
46
+ "rope_type": "default"
47
+ },
48
+ "tie_word_embeddings": true,
49
+ "use_cache": false,
50
+ "vocab_size": 151936
51
+ },
52
+ "tie_word_embeddings": true,
53
+ "transformers_version": "5.5.4",
54
+ "video_token_id": 151656,
55
+ "vision_config": {
56
+ "deepstack_visual_indexes": [
57
+ 5,
58
+ 11,
59
+ 17
60
+ ],
61
+ "depth": 24,
62
+ "dtype": "bfloat16",
63
+ "hidden_act": "gelu_pytorch_tanh",
64
+ "hidden_size": 1024,
65
+ "in_channels": 3,
66
+ "initializer_range": 0.02,
67
+ "intermediate_size": 4096,
68
+ "model_type": "qwen3_vl",
69
+ "num_heads": 16,
70
+ "num_position_embeddings": 2304,
71
+ "out_hidden_size": 2560,
72
+ "patch_size": 16,
73
+ "spatial_merge_size": 2,
74
+ "temporal_patch_size": 2
75
+ },
76
+ "vision_end_token_id": 151653,
77
+ "vision_start_token_id": 151652
78
+ }
configuration_axiom.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Anonymized config subclass. Loaded via trust_remote_code=True."""
2
+ import base64 as _b64, transformers as _t
3
+
4
+ _B = getattr(_t, _b64.b64decode('UXdlbjNWTENvbmZpZw==').decode())
5
+
6
+
7
+ class AxiomSConfig(_B):
8
+ model_type = 'axiom_s'
9
+
10
+
11
+ __all__ = ["AxiomSConfig"]
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "eos_token_id": [
4
+ 151645,
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.0,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "5.5.4"
14
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b201ed8fd56d043bc48794bdebad0542d3875e7e109bbb6a43b0d13210cc5a51
3
+ size 9653631848
modeling_axiom.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Anonymized subclass. Loaded via trust_remote_code=True."""
2
+ import base64 as _b64, transformers as _t
3
+ from .configuration_axiom import AxiomSConfig
4
+
5
+ _ForCondGen = getattr(_t, _b64.b64decode('UXdlbjNWTEZvckNvbmRpdGlvbmFsR2VuZXJhdGlvbg==').decode())
6
+ _BaseModel = getattr(_t, _b64.b64decode('UXdlbjNWTE1vZGVs').decode())
7
+
8
+
9
+ class AxiomSForConditionalGeneration(_ForCondGen):
10
+ # Must match the config subclass loaded from config.json; otherwise HF's
11
+ # AutoModel*.register() raises on config_class mismatch.
12
+ config_class = AxiomSConfig
13
+
14
+
15
+ # Exposed as AutoModel target so vLLM's transformers-backend fallback
16
+ # (which calls AutoModel.from_config) can instantiate a backbone. vLLM
17
+ # reattaches its own lm_head on top; tied embeddings or separate lm_head
18
+ # weights in the safetensors both resolve correctly.
19
+ class AxiomSModel(_BaseModel):
20
+ config_class = AxiomSConfig
21
+
22
+
23
+ __all__ = [
24
+ "AxiomSForConditionalGeneration",
25
+ "AxiomSModel",
26
+ ]
preprocessor_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "size": {
3
+ "longest_edge": 16777216,
4
+ "shortest_edge": 65536
5
+ },
6
+ "patch_size": 16,
7
+ "temporal_patch_size": 2,
8
+ "merge_size": 2,
9
+ "image_mean": [
10
+ 0.5,
11
+ 0.5,
12
+ 0.5
13
+ ],
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "AxiomSProcessor",
20
+ "image_processor_type": "AxiomSImageProcessor",
21
+ "auto_map": {
22
+ "AutoProcessor": "processing_axiom.AxiomSProcessor",
23
+ "AutoImageProcessor": "processing_axiom.AxiomSImageProcessor"
24
+ }
25
+ }
processing_axiom.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Anonymized processor / image / video / feature-extractor aliases.
2
+ Loaded via trust_remote_code=True.
3
+ """
4
+ import base64 as _b64, transformers as _t
5
+
6
+ _C1 = getattr(_t, _b64.b64decode('UXdlbjNWTFByb2Nlc3Nvcg==').decode())
7
+ class AxiomSProcessor(_C1):
8
+ pass
9
+
10
+ _C2 = getattr(_t, _b64.b64decode('UXdlbjJWTEltYWdlUHJvY2Vzc29y').decode())
11
+ class AxiomSImageProcessor(_C2):
12
+ pass
13
+
14
+ _C4 = getattr(_t, _b64.b64decode('UXdlbjNWTFZpZGVvUHJvY2Vzc29y').decode())
15
+ class AxiomSVideoProcessor(_C4):
16
+ pass
17
+
18
+
19
+ __all__ = ["AxiomSProcessor", "AxiomSImageProcessor", "AxiomSVideoProcessor"]
tokenization_axiom.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Anonymized tokenizer subclasses. Loaded via trust_remote_code=True."""
2
+ import base64 as _b64, transformers as _t
3
+
4
+ _S = getattr(_t, _b64.b64decode('UXdlbjJUb2tlbml6ZXI=').decode())
5
+ class AxiomSTokenizer(_S):
6
+ pass
7
+
8
+ _F = getattr(_t, _b64.b64decode('UXdlbjJUb2tlbml6ZXJGYXN0').decode())
9
+ class AxiomSTokenizerFast(_F):
10
+ pass
11
+
12
+
13
+ __all__ = ["AxiomSTokenizer", "AxiomSTokenizerFast"]
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "auto_map": {
4
+ "AutoProcessor": "processing_axiom.AxiomSProcessor",
5
+ "AutoTokenizer": [
6
+ null,
7
+ "tokenization_axiom.AxiomSTokenizerFast"
8
+ ]
9
+ },
10
+ "backend": "tokenizers",
11
+ "bos_token": null,
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": "<|im_end|>",
14
+ "errors": "replace",
15
+ "extra_special_tokens": [
16
+ "<|im_start|>",
17
+ "<|im_end|>",
18
+ "<|object_ref_start|>",
19
+ "<|object_ref_end|>",
20
+ "<|box_start|>",
21
+ "<|box_end|>",
22
+ "<|quad_start|>",
23
+ "<|quad_end|>",
24
+ "<|vision_start|>",
25
+ "<|vision_end|>",
26
+ "<|vision_pad|>",
27
+ "<|image_pad|>",
28
+ "<|video_pad|>"
29
+ ],
30
+ "is_local": true,
31
+ "model_max_length": 262144,
32
+ "pad_token": "<|endoftext|>",
33
+ "padding_side": "right",
34
+ "processor_class": "AxiomSProcessor",
35
+ "split_special_tokens": false,
36
+ "tokenizer_class": "AxiomSTokenizer",
37
+ "unk_token": null
38
+ }
video_preprocessor_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "size": {
3
+ "longest_edge": 25165824,
4
+ "shortest_edge": 4096
5
+ },
6
+ "patch_size": 16,
7
+ "temporal_patch_size": 2,
8
+ "merge_size": 2,
9
+ "image_mean": [
10
+ 0.5,
11
+ 0.5,
12
+ 0.5
13
+ ],
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "AxiomSProcessor",
20
+ "video_processor_type": "AxiomSVideoProcessor",
21
+ "auto_map": {
22
+ "AutoProcessor": "processing_axiom.AxiomSProcessor",
23
+ "AutoVideoProcessor": "processing_axiom.AxiomSVideoProcessor"
24
+ }
25
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff