Instructions to use tencent/Hunyuan-1.8B-Instruct-AWQ-Int4 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use tencent/Hunyuan-1.8B-Instruct-AWQ-Int4 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-generation", model="tencent/Hunyuan-1.8B-Instruct-AWQ-Int4") messages = [ {"role": "user", "content": "Who are you?"}, ] pipe(messages)# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("tencent/Hunyuan-1.8B-Instruct-AWQ-Int4") model = AutoModelForCausalLM.from_pretrained("tencent/Hunyuan-1.8B-Instruct-AWQ-Int4") messages = [ {"role": "user", "content": "Who are you?"}, ] inputs = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use tencent/Hunyuan-1.8B-Instruct-AWQ-Int4 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "tencent/Hunyuan-1.8B-Instruct-AWQ-Int4" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "tencent/Hunyuan-1.8B-Instruct-AWQ-Int4", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker
docker model run hf.co/tencent/Hunyuan-1.8B-Instruct-AWQ-Int4
- SGLang
How to use tencent/Hunyuan-1.8B-Instruct-AWQ-Int4 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "tencent/Hunyuan-1.8B-Instruct-AWQ-Int4" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "tencent/Hunyuan-1.8B-Instruct-AWQ-Int4", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "tencent/Hunyuan-1.8B-Instruct-AWQ-Int4" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "tencent/Hunyuan-1.8B-Instruct-AWQ-Int4", "messages": [ { "role": "user", "content": "What is the capital of France?" } ] }' - Docker Model Runner
How to use tencent/Hunyuan-1.8B-Instruct-AWQ-Int4 with Docker Model Runner:
docker model run hf.co/tencent/Hunyuan-1.8B-Instruct-AWQ-Int4
Upload tokenizer_config.json with huggingface_hub
Browse files- tokenizer_config.json +3 -3
tokenizer_config.json
CHANGED
|
@@ -6548,8 +6548,8 @@
|
|
| 6548 |
"bos_token": "<|hy_begin▁of▁sentence|>",
|
| 6549 |
"clean_up_tokenization_spaces": true,
|
| 6550 |
"eos_token": "<|hy_place▁holder▁no▁2|>",
|
| 6551 |
-
"
|
| 6552 |
-
"model_max_length": 1000000000000000019884624838656,
|
| 6553 |
"pad_token": "<|hy_▁pad▁|>",
|
| 6554 |
-
"tokenizer_class": "
|
|
|
|
| 6555 |
}
|
|
|
|
| 6548 |
"bos_token": "<|hy_begin▁of▁sentence|>",
|
| 6549 |
"clean_up_tokenization_spaces": true,
|
| 6550 |
"eos_token": "<|hy_place▁holder▁no▁2|>",
|
| 6551 |
+
"model_max_length": 262144,
|
|
|
|
| 6552 |
"pad_token": "<|hy_▁pad▁|>",
|
| 6553 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 6554 |
+
"chat_template": "{%- if not add_generation_prompt is defined %}\n {%- set add_generation_prompt = false %}\n{%- endif %}\n{%- set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_first_user=true, is_last_user=false) %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {%- if ns.is_first_sp %}\n {%- set ns.system_prompt = ns.system_prompt + message['content'] %}\n {%- set ns.is_first_sp = false %}\n {%- else %}\n {% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{{- bos_token }}\n{{- ns.system_prompt }}\n{%- if tools %}\n {%- if ns.system_prompt != '' %}\n {{- '\n\n# Tools\n\nYou may call one or more functions to assist with the user query.' }}\n {%- else %}\n {{- '# Tools\n\nYou may call one or more functions to assist with the user query.' }}\n {%- endif %}\n {{- '\n\nYou are provided with function signatures within <tools></tools> XML tags:' }}\n {{- '\n<tools>\n' }}\n {%- for tool in tools %}\n {%- if loop.index0 > 1 %}\n {{- '\n' }}\n {%- endif %}\n {{- tool | tojson }}\n {%- endfor %}\n {{- '\n</tools>\n\n' }}\n {{- 'For function call returns, you should first print <tool_calls>' }}\n {{- 'For each function call, you should return object like:\n' }}\n {{- '<tool_call>function_name\n```json\nfunction_arguments_in_json_format\n```</tool_call>' }}\n {{- 'At the end of function call returns, you should print </tool_calls>' }}\n{%- endif %}\n{%- if ns.system_prompt != '' or tools %}\n {{- '<|hy_place▁holder▁no▁3|>' }}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'user' %}\n {%- set ns.is_tool = false %}\n {%- set ns.is_first = false %}\n {%- set ns.is_last_user = true %}\n {{- '<|hy_User|>' + message['content'] + '<|hy_Assistant|>' }}\n {%- endif %}\n {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}\n {%- set ns.is_last_user = false %}\n {%- if ns.is_tool %}\n {{- '</tool_responses>' + '<|hy_Assistant|>' }}\n {%- endif %}\n {%- set ns.is_first = false %}\n {%- set ns.is_tool = false %}\n {%- set ns.is_output_first = true %}\n {%- for tool in message['tool_calls'] %}\n {%- set arguments = tool['function']['arguments'] %}\n {%- if arguments is not string %}\n {%- set arguments = arguments | tojson %}\n {%- endif %}\n {%- if not ns.is_first %}\n {%- if message['content'] is none %}\n {{- '<tool_calls><tool_call>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '</tool_call>' }}\n {%- else %}\n {{- message['content'] + '<tool_calls><tool_call>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '</tool_call>' }}\n {%- endif %}\n {%- set ns.is_first = true %}\n {%- else %}\n {{- '\n' + '<tool_call>' + tool['function']['name'] + '\n' + '```json' + '\n' + arguments + '\n' + '```' + '</tool_call>' }}\n {%- endif %}\n {%- endfor %}\n {{- '</tool_calls>' + eos_token }}\n {%- endif %}\n {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}\n {%- set content = message['content'] %}\n {%- if '<answer>' in content and not loop.last %}\n {%- set content = content.split('<answer>')[-1].strip('</answer>').strip() %}\n {%- endif %}\n {%- set ns.is_last_user = false %}\n {%- if ns.is_tool %}\n {{- '</tool_responses>' + '<|hy_Assistant|>' + content + eos_token }}\n {%- set ns.is_tool = false %}\n {%- else %}\n {{- content + eos_token }}\n {%- endif %}\n {%- endif %}\n {%- if message['role'] == 'tool' %}\n {%- set ns.is_last_user = false %}\n {%- set ns.is_tool = true %}\n {%- if ns.is_output_first %}\n {{- '<|hy_User|>' + '<tool_responses><tool_response>' + message['content'] + '</tool_response>' }}\n {%- set ns.is_output_first = false %}\n {%- else %}\n {{- '\n<tool_response>' + message['content'] + '</tool_response>' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if ns.is_tool %}\n {{- '</tool_responses>' + '<|hy_Assistant|>' }}\n{%- endif %}\n{%- if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}\n {{- '<|hy_Assistant|>' }}\n{%- endif %}\n{%- if enable_thinking is defined and not enable_thinking %}\n {{- '<think>\n\n</think>\n' }}\n{%- endif %}"
|
| 6555 |
}
|