Commit
·
66a1268
1
Parent(s):
20a3158
Delete vllm_config.json
Browse files- vllm_config.json +0 -14
vllm_config.json
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"model_type": "llama",
|
| 3 |
-
"quantization": "fp8",
|
| 4 |
-
"max_model_len": 8192,
|
| 5 |
-
"gpu_memory_utilization": 0.9,
|
| 6 |
-
"tensor_parallel_size": 6,
|
| 7 |
-
"pipeline_parallel_size": 1,
|
| 8 |
-
"trust_remote_code": true,
|
| 9 |
-
"dtype": "half",
|
| 10 |
-
"enforce_eager": false,
|
| 11 |
-
"max_num_seqs": 256,
|
| 12 |
-
"max_num_batched_tokens": 8192,
|
| 13 |
-
"enable_prefix_caching": true
|
| 14 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|