Instructions to use numind/NuExtract3-mlx-4bits with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- MLX
How to use numind/NuExtract3-mlx-4bits with MLX:
# Download the model from the Hub pip install huggingface_hub[hf_xet] huggingface-cli download --local-dir NuExtract3-mlx-4bits numind/NuExtract3-mlx-4bits
- Notebooks
- Google Colab
- Kaggle
- Local Apps
- LM Studio
File size: 6,752 Bytes
29c3826 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | {%- if not messages %}
{{- raise_exception('No messages provided.') }}
{%- endif %}
{%- set image_count = namespace(value=0) %}
{%- set image_placeholder = '<|vision_start|><|image_pad|><|vision_end|>' -%}
{%- set mode = mode | default('content') -%}
{%- if template -%}{%- set mode = 'structured' -%}{%- endif -%}
{%- if not template and mode == 'structured' %}
{{- raise_exception('`structured` mode specified but no `template` provided.') }}
{%- endif %}
{%- if mode not in ['structured', 'content', 'template-generation', 'document-detection', 'markdown'] -%}{%- set mode = 'content' -%}{%- endif -%}
{%- if mode == 'markdown' %}{%- set mode = 'content' -%}{%- endif %}
{%- set enable_thinking = enable_thinking | default(False) -%}
{%- if mode not in ['structured', 'content'] and enable_thinking %}
{{- raise_exception('`enable_thinking` can only be `True` for `structured` and `content` modes.') }}
{%- endif %}
{%- set has_examples = namespace(flag=false) -%}
{%- if mode != 'structured' -%}{%- set has_examples = false -%}{%- endif -%}
{# MACRO TO RENDER MESSAGE CONTENT #}
{%- macro render_content(content, do_vision_count, is_system_content=false) %}
{%- if content is string %}
{{- content }}
{%- elif content is iterable and content is not mapping %}
{%- for item in content %}
{%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
{%- if is_system_content %}
{{- raise_exception('System message cannot contain images.') }}
{%- endif %}
{%- if do_vision_count %}
{%- set image_count.value = image_count.value + 1 %}
{%- endif %}
{%- if add_vision_id %}
{{- 'Picture ' ~ image_count.value ~ ': ' }}
{%- endif %}
{{- '<|vision_start|><|image_pad|><|vision_end|>\n' }}
{%- elif 'text' in item %}
{{- item.text + '\n' }}
{%- else %}
{{- raise_exception('Unexpected item type in content.') }}
{%- endif %}
{%- endfor %}
{%- elif content is none or content is undefined %}
{{- '' }}
{%- else %}
{{- raise_exception('Unexpected content type.') }}
{%- endif %}
{%- endmacro %}
{# SYSTEM MESSAGE #}
{%- if messages[0].role == 'system' %}
{%- set content = render_content(messages[0].content, false, true)|trim %}
{{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
{%- endif %}
{# USER MESSAGE #}
{{- '<|im_start|>user\n' -}}
{{- '【task】' + mode|replace("-", " ") + '\n' -}}
{# Template Section (for structured task): specifies template, instructions, examples, previous_output #}
{%- if mode == 'structured' -%}
{{- '【template_start】' + template + '【template_end】\n' -}}
{# Instructions Section #}
{%- if instructions -%}
{{- '【instructions_start】' + instructions + '【instructions_end】\n'-}}
{%- endif -%}
{# Examples Section (only for extraction tasks) #}
{%- for message in messages -%}
{%- if message.role == 'developer' and 'content' in message -%}
{# Validate that there is at least one input and one output contents #}
{%- set example_inputs = message.content[:-1] -%}
{%- set example_output_part = message.content[-1] -%}
{%- if example_inputs|length > 0 -%}
{%- if not has_examples.flag -%}
{{- '【examples_start】\n' -}}
{%- set has_examples.flag = true -%}
{%- endif -%}
{{- '【example_input_start】' + render_content(example_inputs, true)|trim + '【example_input_end】\n' -}}
{# Example output: only keep the text of the first output content #}
{%- set output_text = '' -%}
{%- if example_output_part is string -%}
{%- set output_text = example_output_part -%}
{%- elif example_output_part.text is defined -%}
{%- set output_text = example_output_part.text -%}
{%- endif -%}
{{- '【example_output_start】' + output_text|trim + '【example_output_end】\n' -}}
{%- if loop.last and has_examples.flag -%}
{{- '【examples_end】\n' -}}
{%- endif -%}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{# Previous Output Section #}
{%- if previous_output -%}
{{- '【previous_output_start】' + previous_output + '【previous_output_end】\n' -}}
{%- endif -%}
{%- endif -%}
{{- '【document_start】\n' -}}
{# PROCESS PROVIDED USER MESSAGES (RENDERED INTO A SINGLE ONE) #}
{%- for message in messages -%}
{%- if message.role == "system" %}
{%- if not loop.first %}
{{- raise_exception('System message must be at the beginning.') }}
{%- endif %}
{%- elif message.role == 'user' and message.name != "example" -%}
{%- set content = render_content(message.content, true)|trim %}
{{- content + '\n' -}}
{# {%- elif message.role == 'assistant' and not loop.last %}
llama.cpp renders a synthetic init example with an assistant turn in
the middle; ignore it so valid NuExtract prompts render unchanged.
{{- raise_exception('Assistant message must be at the end.') }} #}
{%- endif %}
{%- endfor -%}
{{- '【document_end】<|im_end|>\n' -}}
{# ASSISTANT MESSAGE #}
{%- if messages[-1].role == 'assistant' %}
{%- if add_generation_prompt -%}
{{- raise_exception('`add_generation_prompt` can only be `True` when no assistant message is provided.') }}
{%- endif %}
{%- set content = render_content(messages[-1].content, true)|trim %}
{%- set reasoning_content = '' %}
{%- if messages[-1].reasoning_content is string %}
{%- set reasoning_content = messages[-1].reasoning_content %}
{%- else %}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{%- set reasoning_content = reasoning_content|trim %}
{% generation %}
{{- '<|im_start|>assistant\n<think>\n' + reasoning_content + '\n</think>\n\n' + content + '<|im_end|>\n' -}}
{% endgeneration %}
{%- endif -%}
{# GENERATION PROMPT #}
{%- if add_generation_prompt -%}
{{- '<|im_start|>assistant\n' -}}
{%- if not enable_thinking -%}
{{- '<think>\n\n</think>\n\n' -}}
{%- else %}
{{- '<think>\n' -}}
{%- endif %}
{%- endif -%}
|