Update deploy.json
Browse files- deploy.json +8 -10
deploy.json
CHANGED
|
@@ -1,13 +1,11 @@
|
|
| 1 |
{
|
| 2 |
"parameters": {
|
| 3 |
-
"max_total_tokens":
|
| 4 |
-
"max_input_length":
|
| 5 |
-
"max_batch_total_tokens":
|
| 6 |
-
"max_concurrent_requests":
|
| 7 |
-
"
|
| 8 |
-
"
|
| 9 |
-
"max_batch_size": 1,
|
| 10 |
-
"waiting_served_ratio": 1.2
|
| 11 |
},
|
| 12 |
"hardware": {
|
| 13 |
"task_type": "text-generation",
|
|
@@ -17,7 +15,7 @@
|
|
| 17 |
"distributed_setup": false
|
| 18 |
},
|
| 19 |
"framework_type": "pytorch",
|
| 20 |
-
"torch_compile":
|
| 21 |
"trust_remote_code": true,
|
| 22 |
-
"disable_custom_kernels":
|
| 23 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"parameters": {
|
| 3 |
+
"max_total_tokens": 4096, // Increase from 2048
|
| 4 |
+
"max_input_length": 2048, // Increase from 1024
|
| 5 |
+
"max_batch_total_tokens": 16384, // Increase from 8192
|
| 6 |
+
"max_concurrent_requests": 2, // Increase from 1
|
| 7 |
+
"max_batch_size": 2, // Increase from 1
|
| 8 |
+
"waiting_served_ratio": 0.8 // Decrease from 1.2
|
|
|
|
|
|
|
| 9 |
},
|
| 10 |
"hardware": {
|
| 11 |
"task_type": "text-generation",
|
|
|
|
| 15 |
"distributed_setup": false
|
| 16 |
},
|
| 17 |
"framework_type": "pytorch",
|
| 18 |
+
"torch_compile": true,
|
| 19 |
"trust_remote_code": true,
|
| 20 |
+
"disable_custom_kernels": false
|
| 21 |
}
|