Text Generation
Transformers
PyTorch
codegen
File size: 1,983 Bytes
7364b6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# Install required packages
%pip install azure-ai-ml azure-identity --upgrade --quiet

import os
import time
from azure.ai.ml import MLClient
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment
from azure.identity import DefaultAzureCredential

# Set environment variables (replace with your values)
# Follow setup steps at: https://huggingface.co/docs/microsoft-azure/guides/configure-azure-ml-microsoft-foundry
os.environ["SUBSCRIPTION_ID"] = "<YOUR_SUBSCRIPTION_ID>"
os.environ["RESOURCE_GROUP"] = "<YOUR_RESOURCE_GROUP>"
os.environ["WORKSPACE_NAME"] = "<YOUR_WORKSPACE_NAME>"

# Generate unique names for endpoint and deployment
timestamp = str(int(time.time()))
os.environ["ENDPOINT_NAME"] = f"hf-ep-{timestamp}"
os.environ["DEPLOYMENT_NAME"] = f"hf-deploy-{timestamp}"

# Create Azure ML Client for Microsoft Foundry (classic)
client = MLClient(
    credential=DefaultAzureCredential(),
    subscription_id=os.getenv("SUBSCRIPTION_ID"),
    resource_group_name=os.getenv("RESOURCE_GROUP"),
    workspace_name=os.getenv("WORKSPACE_NAME"),
)

# Build model URI for Azure registry
model_uri = f"azureml://registries/HuggingFace/models/salesforce-codegen-350m-multi/labels/latest"

# Create endpoint and deployment
endpoint = ManagedOnlineEndpoint(name=os.getenv("ENDPOINT_NAME"))

deployment = ManagedOnlineDeployment(
    name=os.getenv("DEPLOYMENT_NAME"),
    endpoint_name=os.getenv("ENDPOINT_NAME"),
    model=model_uri,
    # Check https://huggingface.co/docs/microsoft-azure/foundry/hardware to see the available instances
    instance_type="Standard_NC40ads_H100_v5",
    instance_count=1,
)

# Deploy endpoint and deployment (this may take 10-15 minutes)
client.begin_create_or_update(endpoint).wait()
client.online_deployments.begin_create_or_update(deployment).wait()

print(f"Endpoint '{os.getenv('ENDPOINT_NAME')}' deployed successfully!")
print("You can now send requests to your endpoint via Microsoft Foundry or Azure Machine Learning.")