AntonV HF Staff commited on
Commit
0bba43f
·
verified ·
1 Parent(s): 89d8d60

Upload processor

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {%- set image_count = namespace(value=0) -%}
3
+ {%- set video_count = namespace(value=0) -%}
4
+ {{- '<|begin_of_sentence|>' }}
5
+ {%- for message in messages -%}
6
+ {%- if message.role in ['system', 'user'] -%}
7
+ {%- if message.role == 'user' -%}
8
+ {{- 'User: ' -}}
9
+ {%- endif -%}
10
+ {%- if message.content is string -%}
11
+ {{- message.content -}}
12
+ {%- else -%}
13
+ {%- for content_item in message.content -%}
14
+ {%- if content_item.type == 'text' -%}
15
+ {{- content_item.text -}}
16
+ {%- elif content_item.type in ['image_url', 'image'] -%}
17
+ {%- set image_count.value = image_count.value + 1 -%}
18
+ Picture {{ image_count.value }}:<|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>
19
+ {%- elif content_item.type in ['video_url', 'video'] -%}
20
+ {%- set video_count.value = video_count.value + 1 -%}
21
+ Video {{ video_count.value }}:<|VIDEO_START|><|VIDEO_PLACEHOLDER|><|VIDEO_END|>
22
+ {%- endif -%}
23
+ {%- endfor -%}
24
+ {%- endif -%}
25
+ {%- if message.role == 'system' -%}
26
+ {{- '
27
+ ' -}}
28
+ {%- endif -%}
29
+ {%- elif message.role == 'assistant' -%}
30
+ {%- macro extract_text_content(content_field) -%}
31
+ {%- if content_field is string -%}
32
+ {{- content_field -}}
33
+ {%- elif content_field is iterable and content_field is not string -%}
34
+ {%- set ns = namespace(text_parts=[]) -%}
35
+ {%- set text_parts = [] -%}
36
+ {%- for item in content_field -%}
37
+ {%- if item.type == 'text' -%}
38
+ {%- set ns.text_parts = ns.text_parts + [item.text] -%}
39
+ {%- endif -%}
40
+ {%- endfor -%}
41
+ {{- ns.text_parts | join("") -}}
42
+ {%- else -%}
43
+ {{- '' -}}
44
+ {%- endif -%}
45
+ {%- endmacro -%}
46
+ {%- set reasoning_content = extract_text_content(message.reasoning_content) -%}
47
+ {%- set content = extract_text_content(message.content) -%}
48
+ {%- if '</think>' in content %}
49
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('
50
+ ').split('<think>')[-1].lstrip('
51
+ ') %}
52
+ {%- set content = content.split('</think>')[-1].lstrip('
53
+ ') %}
54
+ {%- endif %}
55
+ {%- if reasoning_content %}
56
+ {{- '
57
+ ' + 'Assistant: ' + '<think>
58
+ ' + reasoning_content.strip('
59
+ ') + '
60
+ </think>
61
+ ' + content.lstrip('
62
+ ') }}
63
+ {%- else %}
64
+ {{- '
65
+ ' + 'Assistant: ' + content }}
66
+ {%- endif %}
67
+ {{- '<|end_of_sentence |>' }}
68
+ {%- endif -%}
69
+ {%- endfor -%}
70
+ {%- if add_generation_prompt is not defined or add_generation_prompt is true %}
71
+ {{- '
72
+ Assistant: ' -}}
73
+ {%- if (enable_thinking is defined and enable_thinking is false) or enable_thinking is not defined %}
74
+ {{- '<think>
75
+
76
+ </think>
77
+
78
+ ' }}
79
+ {%- endif %}
80
+ {%- if enable_thinking is defined and enable_thinking is true %}{{- '<think>' }}{%- endif %}
81
+ {%- endif %}
processor_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "data_format": "channels_first",
4
+ "do_convert_rgb": true,
5
+ "do_normalize": true,
6
+ "do_rescale": true,
7
+ "do_resize": true,
8
+ "image_mean": [
9
+ 0.48145466,
10
+ 0.4578275,
11
+ 0.40821073
12
+ ],
13
+ "image_processor_type": "Ernie4_5_VL_MoeImageProcessorFast",
14
+ "image_std": [
15
+ 0.26862954,
16
+ 0.26130258,
17
+ 0.27577711
18
+ ],
19
+ "merge_size": 2,
20
+ "patch_size": 14,
21
+ "processor_class": "Ernie4_5_VL_MoeProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "longest_edge": 4842768,
26
+ "shortest_edge": 3136
27
+ }
28
+ },
29
+ "processor_class": "Ernie4_5_VL_MoeProcessor",
30
+ "video_processor": {
31
+ "data_format": "channels_first",
32
+ "default_to_square": true,
33
+ "do_convert_rgb": true,
34
+ "do_normalize": true,
35
+ "do_rescale": true,
36
+ "do_resize": true,
37
+ "do_sample_frames": true,
38
+ "draw_on_frames": true,
39
+ "font": "Roboto-Regular.ttf",
40
+ "image_mean": [
41
+ 0.48145466,
42
+ 0.4578275,
43
+ 0.40821073
44
+ ],
45
+ "image_std": [
46
+ 0.26862954,
47
+ 0.26130258,
48
+ 0.27577711
49
+ ],
50
+ "max_frames": 180,
51
+ "merge_size": 2,
52
+ "min_frames": 16,
53
+ "patch_size": 14,
54
+ "processor_class": "Ernie4_5_VL_MoeProcessor",
55
+ "resample": 3,
56
+ "rescale_factor": 0.00392156862745098,
57
+ "return_metadata": false,
58
+ "size": {
59
+ "longest_edge": 937664,
60
+ "shortest_edge": 234416
61
+ },
62
+ "temporal_patch_size": 2,
63
+ "video_processor_type": "Ernie4_5_VL_MoeVideoProcessor"
64
+ }
65
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94718bd0ed3c1f3591a1cf2c517d6364283457f2fd7e95ae4d201138e922079d
3
+ size 11590466
tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "cls_token": "<|begin_of_sentence|>",
7
+ "eos_token": "</s>",
8
+ "extra_special_tokens": [
9
+ "<|IMAGE_PLACEHOLDER|>",
10
+ "<|IMAGE_START|>",
11
+ "<|IMAGE_END|>",
12
+ "<|VIDEO_PLACEHOLDER|>",
13
+ "<|VIDEO_START|>",
14
+ "<|VIDEO_END|>",
15
+ "<think>",
16
+ "</think>"
17
+ ],
18
+ "header_end_token": "<mask:7>",
19
+ "header_start_token": "<mask:6>",
20
+ "image_end_token": "<|IMAGE_END|>",
21
+ "image_start_token": "<|IMAGE_START|>",
22
+ "image_token": "<|IMAGE_PLACEHOLDER|>",
23
+ "is_local": true,
24
+ "mask_token": "<mask:1>",
25
+ "model_max_length": 131072,
26
+ "model_specific_special_tokens": {
27
+ "header_end_token": "<mask:7>",
28
+ "header_start_token": "<mask:6>",
29
+ "image_end_token": "<|IMAGE_END|>",
30
+ "image_start_token": "<|IMAGE_START|>",
31
+ "image_token": "<|IMAGE_PLACEHOLDER|>",
32
+ "sys_end_token": "<mask:5>",
33
+ "sys_start_token": "<mask:4>",
34
+ "video_end_token": "<|VIDEO_END|>",
35
+ "video_start_token": "<|VIDEO_START|>",
36
+ "video_token": "<|VIDEO_PLACEHOLDER|>"
37
+ },
38
+ "pad_token": "<unk>",
39
+ "processor_class": "Ernie4_5_VL_MoeProcessor",
40
+ "sep_token": "<|end_of_sentence|>",
41
+ "sys_end_token": "<mask:5>",
42
+ "sys_start_token": "<mask:4>",
43
+ "tokenizer_class": "LlamaTokenizer",
44
+ "unk_token": "<unk>",
45
+ "use_default_system_prompt": false,
46
+ "video_end_token": "<|VIDEO_END|>",
47
+ "video_start_token": "<|VIDEO_START|>",
48
+ "video_token": "<|VIDEO_PLACEHOLDER|>"
49
+ }