File size: 5,893 Bytes
2842319 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | {%- set image_placeholder = '<|vision_start|><|image_pad|><|vision_end|>' -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{{- '<|im_start|>system
' -}}
{%- if message['content'] is string -%}
{{- message['content'] | trim -}}
{%- endif -%}
{{- '<|im_end|>
' -}}
{%- elif message['role'] == 'user' -%}
{%- if loop.first -%}
{{- '<|im_start|>system
' -}}
{%- if template -%}
{#--- If template, extraction task ---#}
{{- 'You are NuExtract, an information extraction tool created by NuMind.' -}}
{%- else -%}
{#--- Else, template generation task ---#}
{{- 'You are a helpful assistant.' -}}
{%- endif -%}
{{ '<|im_end|>
' }}
{%- endif -%}
{{- '<|im_start|>' + message['role'] + '
' -}}
{%- if template -%}
{#--- Template Section ---#}
{{- '# Template:
' -}}
{{- template -}}
{{- '
' -}}
{%- if examples -%}
{#--- Examples can only exist in the extraction task ---#}
{{- '# Examples:
' -}}
{%- for example in examples -%}
{{- '## Input:
' -}}
{%- if example['input'] is mapping and (example['input']['type'] == 'image' or example['input']['type'] == 'image_url') -%}
{{- image_placeholder | trim -}}
{%- elif example['input'] == '<image>' -%}
{#--- Keep compatibility with <image> for now ---#}
{{- image_placeholder | trim -}}
{%- else -%}
{#--- Text input example ---#}
{{- example['input'] -}}
{%- endif -%}
{{- '
' -}}
{{- '## Output:
' -}}
{{- example['output'] -}}
{{- '
' -}}
{%- endfor -%}
{%- endif -%}
{{- '# Context:
' -}}
{%- endif -%}
{%- if message['content'] is string -%}
{#--- Simple string content ---#}
{{- message['content'] | trim -}}
{%- elif message['content'] is mapping and (message['content']['type'] == 'image' or message['content']['type'] == 'image_url') -%}
{{- image_placeholder | trim -}}
{%- else -%}
{#--- List of content items (mixed text/images) ---#}
{#--- First, determine what the actual input content is (not ICL images) ---#}
{%- set ns = namespace(has_text_input=false, text_content='') -%}
{#--- Count content types and identify actual input document ---#}
{%- for content in message['content'] -%}
{%- if content is mapping and content.get('type') == 'text' -%}
{%- if content.get('text') != '<image>' -%}
{#--- Keep compatibility with <image> for now ---#}
{%- set ns.has_text_input = true -%}
{%- set ns.text_content = content['text'] -%}
{%- endif -%}
{%- elif content is string -%}
{%- if content != '<image>' -%}
{#--- Keep compatibility with <image> for now ---#}
{%- set ns.has_text_input = true -%}
{%- set ns.text_content = content -%}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{#--- Determine what to output based on actual input type ---#}
{%- if ns.has_text_input -%}
{#--- Main input is text, so output the text content ---#}
{{- ns.text_content | trim -}}
{%- else -%}
{#--- Main input is image or <image> placeholder ---#}
{%- set ns2 = namespace(found_image=false) -%}
{%- for content in message['content'] -%}
{%- if content is mapping and (content.get('type') == 'image' or content.get('type') == 'image_url') and not ns2.found_image -%}
{{- image_placeholder | trim -}}
{%- set ns2.found_image = true -%}
{%- elif content is mapping and content.get('type') == 'text' and content.get('text') == '<image>' and not ns2.found_image -%}
{#--- Keep compatibility with <image> for now ---#}
{{- image_placeholder | trim -}}
{%- set ns2.found_image = true -%}
{%- elif content is string and content == '<image>' and not ns2.found_image -%}
{#--- Keep compatibility with <image> for now ---#}
{{- image_placeholder | trim -}}
{%- set ns2.found_image = true -%}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{%- endif -%}
{{- '<|im_end|>
'}}
{%- elif message['role'] == 'assistant' -%}
{{- '<|im_start|>assistant
' -}}
{%- if message['content'] is string -%}
{{- message['content'] | trim -}}
{%- elif message['content'] is iterable and message['content'] is not string -%}
{%- for content in message['content'] -%}
{%- if content is mapping and content.get('type') == 'text' -%}
{{- content['text'] | trim -}}
{%- elif content is string -%}
{{- content | trim -}}
{%- endif -%}
{%- endfor -%}
{%- endif -%}
{{- '<|im_end|>
' -}}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{- '<|im_start|>assistant
' -}}
{%- endif -%} |