Image-Text-to-Text
Transformers
Safetensors
kimi_k25
feature-extraction
kimi
fp4
nvfp4
vllm
llm-compressor
compressed-tensors
conversational
custom_code
Instructions to use RedHatAI/Kimi-K2.6-NVFP4 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use RedHatAI/Kimi-K2.6-NVFP4 with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="RedHatAI/Kimi-K2.6-NVFP4", trust_remote_code=True) messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModel processor = AutoProcessor.from_pretrained("RedHatAI/Kimi-K2.6-NVFP4", trust_remote_code=True) model = AutoModel.from_pretrained("RedHatAI/Kimi-K2.6-NVFP4", trust_remote_code=True) messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use RedHatAI/Kimi-K2.6-NVFP4 with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "RedHatAI/Kimi-K2.6-NVFP4" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "RedHatAI/Kimi-K2.6-NVFP4", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/RedHatAI/Kimi-K2.6-NVFP4
- SGLang
How to use RedHatAI/Kimi-K2.6-NVFP4 with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "RedHatAI/Kimi-K2.6-NVFP4" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "RedHatAI/Kimi-K2.6-NVFP4", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "RedHatAI/Kimi-K2.6-NVFP4" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "RedHatAI/Kimi-K2.6-NVFP4", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use RedHatAI/Kimi-K2.6-NVFP4 with Docker Model Runner:
docker model run hf.co/RedHatAI/Kimi-K2.6-NVFP4
| { | |
| "schema_version": "0.2.2", | |
| "evaluation_id": "swebench_lite_dev/RedHatAI/Kimi-K2.6-NVFP4/1782913700", | |
| "retrieved_timestamp": "1782913700", | |
| "source_metadata": { | |
| "source_name": "mini-swe-agent", | |
| "source_type": "evaluation_run", | |
| "source_organization_name": "RedHatAI", | |
| "evaluator_relationship": "third_party", | |
| "additional_details": { | |
| "note": "submission-rate aggregation across reruns" | |
| } | |
| }, | |
| "eval_library": { | |
| "name": "mini-swe-agent", | |
| "version": "2.4.3" | |
| }, | |
| "model_info": { | |
| "name": "RedHatAI/Kimi-K2.6-NVFP4", | |
| "id": "RedHatAI/Kimi-K2.6-NVFP4", | |
| "developer": "RedHatAI", | |
| "inference_engine": { | |
| "name": "vllm", | |
| "version": "0.22.1" | |
| }, | |
| "additional_details": { | |
| "profile": "kimi_nvfp4", | |
| "subset": "lite", | |
| "split": "dev" | |
| } | |
| }, | |
| "evaluation_results": [ | |
| { | |
| "evaluation_name": "swebench_lite_dev/submission_rate", | |
| "source_data": { | |
| "dataset_name": "SWE-bench Lite dev", | |
| "source_type": "hf_dataset", | |
| "hf_repo": "princeton-nlp/SWE-Bench_Lite", | |
| "hf_split": "dev", | |
| "samples_number": 23, | |
| "sample_ids": [ | |
| "marshmallow-code__marshmallow-1343", | |
| "marshmallow-code__marshmallow-1359", | |
| "pvlib__pvlib-python-1072", | |
| "pvlib__pvlib-python-1154", | |
| "pvlib__pvlib-python-1606", | |
| "pvlib__pvlib-python-1707", | |
| "pvlib__pvlib-python-1854", | |
| "pydicom__pydicom-1139", | |
| "pydicom__pydicom-1256", | |
| "pydicom__pydicom-1413", | |
| "pydicom__pydicom-1694", | |
| "pydicom__pydicom-901", | |
| "pylint-dev__astroid-1196", | |
| "pylint-dev__astroid-1268", | |
| "pylint-dev__astroid-1333", | |
| "pylint-dev__astroid-1866", | |
| "pylint-dev__astroid-1978", | |
| "pyvista__pyvista-4315", | |
| "sqlfluff__sqlfluff-1517", | |
| "sqlfluff__sqlfluff-1625", | |
| "sqlfluff__sqlfluff-1733", | |
| "sqlfluff__sqlfluff-1763", | |
| "sqlfluff__sqlfluff-2419" | |
| ] | |
| }, | |
| "metric_config": { | |
| "evaluation_description": "Submission rate (Submitted / total instances).", | |
| "lower_is_better": false, | |
| "score_type": "continuous", | |
| "min_score": 0.0, | |
| "max_score": 1.0 | |
| }, | |
| "score_details": { | |
| "score": 0.9130434782608695, | |
| "details": { | |
| "total_instances": "23", | |
| "submitted_instances": "21", | |
| "unsubmitted_instances": "2", | |
| "status_counts": "{\"RepeatedFormatError\": 2, \"Submitted\": 21}", | |
| "unsubmitted_ids": "[\"sqlfluff__sqlfluff-1625\", \"sqlfluff__sqlfluff-1733\"]", | |
| "source_exit_status_files": "[\"/home/shubhra/kimik2.6_evals/runs/swebench/20260629T171225Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782760165.6381476.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T135435Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782827902.5985954.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T141107Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782830121.1086848.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T151900Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782833877.9161725.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T190648Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782846629.8118455.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T192452Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782848479.344266.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T195651Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782850223.0775344.yaml\"]" | |
| }, | |
| "completed_ids": [ | |
| "marshmallow-code__marshmallow-1343", | |
| "marshmallow-code__marshmallow-1359", | |
| "pvlib__pvlib-python-1072", | |
| "pvlib__pvlib-python-1154", | |
| "pvlib__pvlib-python-1606", | |
| "pvlib__pvlib-python-1707", | |
| "pvlib__pvlib-python-1854", | |
| "pydicom__pydicom-1139", | |
| "pydicom__pydicom-1256", | |
| "pydicom__pydicom-1413", | |
| "pydicom__pydicom-1694", | |
| "pydicom__pydicom-901", | |
| "pylint-dev__astroid-1196", | |
| "pylint-dev__astroid-1268", | |
| "pylint-dev__astroid-1333", | |
| "pylint-dev__astroid-1866", | |
| "pylint-dev__astroid-1978", | |
| "pyvista__pyvista-4315", | |
| "sqlfluff__sqlfluff-1517", | |
| "sqlfluff__sqlfluff-1625", | |
| "sqlfluff__sqlfluff-1733", | |
| "sqlfluff__sqlfluff-1763", | |
| "sqlfluff__sqlfluff-2419" | |
| ], | |
| "submitted_ids": [ | |
| "marshmallow-code__marshmallow-1343", | |
| "marshmallow-code__marshmallow-1359", | |
| "pvlib__pvlib-python-1072", | |
| "pvlib__pvlib-python-1154", | |
| "pvlib__pvlib-python-1606", | |
| "pvlib__pvlib-python-1707", | |
| "pvlib__pvlib-python-1854", | |
| "pydicom__pydicom-1139", | |
| "pydicom__pydicom-1256", | |
| "pydicom__pydicom-1413", | |
| "pydicom__pydicom-1694", | |
| "pydicom__pydicom-901", | |
| "pylint-dev__astroid-1196", | |
| "pylint-dev__astroid-1268", | |
| "pylint-dev__astroid-1333", | |
| "pylint-dev__astroid-1866", | |
| "pylint-dev__astroid-1978", | |
| "pyvista__pyvista-4315", | |
| "sqlfluff__sqlfluff-1517", | |
| "sqlfluff__sqlfluff-1763", | |
| "sqlfluff__sqlfluff-2419" | |
| ] | |
| }, | |
| "generation_config": { | |
| "generation_args": { | |
| "agentic_eval_config": { | |
| "available_tools": [ | |
| { | |
| "name": "bash" | |
| } | |
| ] | |
| }, | |
| "max_attempts": 1 | |
| } | |
| } | |
| } | |
| ] | |
| } |