File size: 5,893 Bytes
2842319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
{%- set image_placeholder = '<|vision_start|><|image_pad|><|vision_end|>' -%}
{%- for message in messages -%}
    {%- if message['role'] == 'system' -%}
        {{- '<|im_start|>system
' -}}
        {%- if message['content'] is string -%}
            {{- message['content'] | trim -}}
        {%- endif -%}
        {{- '<|im_end|>
' -}}
    {%- elif message['role'] == 'user' -%}
        {%- if loop.first -%}
            {{- '<|im_start|>system
' -}}
            {%- if template -%}
                {#--- If template, extraction task ---#}
                {{- 'You are NuExtract, an information extraction tool created by NuMind.' -}}
            {%- else -%}
                {#--- Else, template generation task ---#}
                {{- 'You are a helpful assistant.' -}}
            {%- endif -%}
            {{ '<|im_end|>
' }}
        {%- endif -%}
        {{- '<|im_start|>' + message['role'] + '
' -}}
        {%- if template -%}
            {#--- Template Section ---#}
            {{- '# Template:
' -}}
            {{- template -}}
            {{- '
' -}}

            {%- if examples -%}
                {#--- Examples can only exist in the extraction task ---#}
                {{- '# Examples:
' -}}
                {%- for example in examples -%}
                    {{- '## Input:
' -}}
                    {%- if example['input'] is mapping and (example['input']['type'] == 'image' or example['input']['type'] == 'image_url') -%}
                        {{- image_placeholder | trim -}}
                    {%- elif example['input'] == '<image>' -%}
                        {#--- Keep compatibility with <image> for now ---#}
                        {{- image_placeholder | trim -}}
                    {%- else -%}
                        {#--- Text input example ---#}
                        {{- example['input'] -}}
                    {%- endif -%}
                    {{- '
' -}}
                    {{- '## Output:
' -}}
                    {{- example['output'] -}}
                    {{- '
' -}}
                {%- endfor -%}
            {%- endif -%}
            {{- '# Context:
' -}}
        {%- endif -%}

        {%- if message['content'] is string -%}
            {#--- Simple string content ---#}
            {{- message['content'] | trim -}}
        {%- elif message['content'] is mapping and (message['content']['type'] == 'image' or message['content']['type'] == 'image_url') -%}
            {{- image_placeholder | trim -}}
        {%- else -%}
            {#--- List of content items (mixed text/images) ---#}
            {#--- First, determine what the actual input content is (not ICL images) ---#}
            {%- set ns = namespace(has_text_input=false, text_content='') -%}

            {#--- Count content types and identify actual input document ---#}
            {%- for content in message['content'] -%}
                {%- if content is mapping and content.get('type') == 'text' -%}
                    {%- if content.get('text') != '<image>' -%}
                        {#--- Keep compatibility with <image> for now ---#}
                        {%- set ns.has_text_input = true -%}
                        {%- set ns.text_content = content['text'] -%}
                    {%- endif -%}
                {%- elif content is string -%}
                    {%- if content != '<image>' -%}
                        {#--- Keep compatibility with <image> for now ---#}
                        {%- set ns.has_text_input = true -%}
                        {%- set ns.text_content = content -%}
                    {%- endif -%}
                {%- endif -%}
            {%- endfor -%}

            {#--- Determine what to output based on actual input type ---#}
            {%- if ns.has_text_input -%}
                {#--- Main input is text, so output the text content ---#}
                {{- ns.text_content | trim -}}
            {%- else -%}
                {#--- Main input is image or <image> placeholder ---#}
                {%- set ns2 = namespace(found_image=false) -%}
                {%- for content in message['content'] -%}
                    {%- if content is mapping and (content.get('type') == 'image' or content.get('type') == 'image_url') and not ns2.found_image -%}
                        {{- image_placeholder | trim -}}
                        {%- set ns2.found_image = true -%}
                    {%- elif content is mapping and content.get('type') == 'text' and content.get('text') == '<image>' and not ns2.found_image -%}
                        {#--- Keep compatibility with <image> for now ---#}
                        {{- image_placeholder | trim -}}
                        {%- set ns2.found_image = true -%}
                    {%- elif content is string and content == '<image>' and not ns2.found_image -%}
                        {#--- Keep compatibility with <image> for now ---#}
                        {{- image_placeholder | trim -}}
                        {%- set ns2.found_image = true -%}
                    {%- endif -%}
                {%- endfor -%}
            {%- endif -%}
        {%- endif -%}
        {{- '<|im_end|>
'}}

    {%- elif message['role'] == 'assistant' -%}
        {{- '<|im_start|>assistant
' -}}
        {%- if message['content'] is string -%}
            {{- message['content'] | trim -}}
        {%- elif message['content'] is iterable and message['content'] is not string -%}
            {%- for content in message['content'] -%}
                {%- if content is mapping and content.get('type') == 'text' -%}
                    {{- content['text'] | trim -}}
                {%- elif content is string -%}
                    {{- content | trim -}}
                {%- endif -%}
            {%- endfor -%}
        {%- endif -%}
        {{- '<|im_end|>
' -}}
    {%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
    {{- '<|im_start|>assistant
' -}}
{%- endif -%}