tfrere's picture
tfrere HF Staff
add model factory
b0bf659
raw
history blame contribute delete
579 Bytes
from huggingface_hub import create_inference_endpoint
endpoint = create_inference_endpoint(
"vllm-meta-llama-3-8b-instruct",
repository="meta-llama/Meta-Llama-3-8B-Instruct",
framework="pytorch",
task="custom",
accelerator="gpu",
vendor="aws",
region="us-east-1",
type="protected",
instance_type="g5.2xlarge",
instance_size="medium",
custom_image={
"health_route": "/health",
"env": { "MAX_MODEL_LEN": "8192" },
"url": "philschmi/vllm-hf-inference-endpoints",
},
)
endpoint.wait()
print(endpoint.status)