essayevaluator / deploy.json
DanielHafezi's picture
Update deploy.json
28d1b45 verified
raw
history blame contribute delete
663 Bytes
{
"parameters": {
"max_total_tokens": 4096, // Increase from 2048
"max_input_length": 2048, // Increase from 1024
"max_batch_total_tokens": 16384, // Increase from 8192
"max_concurrent_requests": 2, // Increase from 1
"max_batch_size": 2, // Increase from 1
"waiting_served_ratio": 0.8 // Decrease from 1.2
},
"hardware": {
"task_type": "text-generation",
"accelerator": "gpu",
"num_gpus": 1,
"gpu_memory_gb": 24,
"distributed_setup": false
},
"framework_type": "pytorch",
"torch_compile": true,
"trust_remote_code": true,
"disable_custom_kernels": false
}