{ "compute": { "accelerator": "gpu", "instanceSize": "medium", "instanceType": "nvidia-a10g", "scaling": { "minReplica": 0, "maxReplica": 1 } }, "model": { "framework": "pytorch", "task": "text-generation", "image": { "huggingface": {} } }, "env": { "PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True", "CUDA_LAUNCH_BLOCKING": "1", "TORCH_USE_CUDA_DSA": "1", "TRANSFORMERS_OFFLINE": "0", "HF_HUB_ENABLE_HF_TRANSFER": "1", "MODEL_LOAD_TIMEOUT": "600" } }