Upload folder using huggingface_hub
Browse files
triton/Qwen2-VL-7B-Instruct/1/model.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model":"Qwen/Qwen2-VL-7B-Instruct",
|
| 3 |
+
"disable_log_requests": true,
|
| 4 |
+
"gpu_memory_utilization": 0.95
|
| 5 |
+
}
|
triton/Qwen2-VL-7B-Instruct/config.pbtxt
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
| 2 |
+
#
|
| 3 |
+
# Redistribution and use in source and binary forms, with or without
|
| 4 |
+
# modification, are permitted provided that the following conditions
|
| 5 |
+
# are met:
|
| 6 |
+
# * Redistributions of source code must retain the above copyright
|
| 7 |
+
# notice, this list of conditions and the following disclaimer.
|
| 8 |
+
# * Redistributions in binary form must reproduce the above copyright
|
| 9 |
+
# notice, this list of conditions and the following disclaimer in the
|
| 10 |
+
# documentation and/or other materials provided with the distribution.
|
| 11 |
+
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
| 12 |
+
# contributors may be used to endorse or promote products derived
|
| 13 |
+
# from this software without specific prior written permission.
|
| 14 |
+
#
|
| 15 |
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
| 16 |
+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| 17 |
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
| 18 |
+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
| 19 |
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
| 20 |
+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
| 21 |
+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
| 22 |
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
| 23 |
+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
| 24 |
+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
| 25 |
+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
| 26 |
+
|
| 27 |
+
# Note: You do not need to change any fields in this configuration.
|
| 28 |
+
|
| 29 |
+
backend: "vllm"
|
| 30 |
+
|
| 31 |
+
# vLLM Health Check - https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/vllm_backend/docs/health_check.html
|
| 32 |
+
parameters: {
|
| 33 |
+
key: "ENABLE_VLLM_HEALTH_CHECK"
|
| 34 |
+
value: { string_value: "true" }
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
# The usage of device is deferred to the vLLM engine
|
| 38 |
+
instance_group [
|
| 39 |
+
{
|
| 40 |
+
count: 1
|
| 41 |
+
kind: KIND_MODEL
|
| 42 |
+
}
|
| 43 |
+
]
|