| { | |
| "parameters": { | |
| "max_total_tokens": 4096, // Increase from 2048 | |
| "max_input_length": 2048, // Increase from 1024 | |
| "max_batch_total_tokens": 16384, // Increase from 8192 | |
| "max_concurrent_requests": 2, // Increase from 1 | |
| "max_batch_size": 2, // Increase from 1 | |
| "waiting_served_ratio": 0.8 // Decrease from 1.2 | |
| }, | |
| "hardware": { | |
| "task_type": "text-generation", | |
| "accelerator": "gpu", | |
| "num_gpus": 1, | |
| "gpu_memory_gb": 24, | |
| "distributed_setup": false | |
| }, | |
| "framework_type": "pytorch", | |
| "torch_compile": true, | |
| "trust_remote_code": true, | |
| "disable_custom_kernels": false | |
| } |