File size: 579 Bytes
b0bf659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from huggingface_hub import create_inference_endpoint
 
endpoint = create_inference_endpoint(
    "vllm-meta-llama-3-8b-instruct",
    repository="meta-llama/Meta-Llama-3-8B-Instruct",
    framework="pytorch",
    task="custom",
    accelerator="gpu",
    vendor="aws",
    region="us-east-1",
    type="protected",
    instance_type="g5.2xlarge",
    instance_size="medium",
    custom_image={
        "health_route": "/health",
        "env": { "MAX_MODEL_LEN": "8192" },
        "url": "philschmi/vllm-hf-inference-endpoints",
    },
)
 
endpoint.wait()
print(endpoint.status)