| | |
| | %pip install azure-ai-ml azure-identity --upgrade --quiet |
| |
|
| | import os |
| | import time |
| | from azure.ai.ml import MLClient |
| | from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment |
| | from azure.identity import DefaultAzureCredential |
| |
|
| | |
| | |
| | os.environ["SUBSCRIPTION_ID"] = "<YOUR_SUBSCRIPTION_ID>" |
| | os.environ["RESOURCE_GROUP"] = "<YOUR_RESOURCE_GROUP>" |
| | os.environ["WORKSPACE_NAME"] = "<YOUR_WORKSPACE_NAME>" |
| |
|
| | |
| | timestamp = str(int(time.time())) |
| | os.environ["ENDPOINT_NAME"] = f"hf-ep-{timestamp}" |
| | os.environ["DEPLOYMENT_NAME"] = f"hf-deploy-{timestamp}" |
| |
|
| | |
| | client = MLClient( |
| | credential=DefaultAzureCredential(), |
| | subscription_id=os.getenv("SUBSCRIPTION_ID"), |
| | resource_group_name=os.getenv("RESOURCE_GROUP"), |
| | workspace_name=os.getenv("WORKSPACE_NAME"), |
| | ) |
| |
|
| | |
| | model_uri = f"azureml://registries/HuggingFace/models/salesforce-codegen-350m-multi/labels/latest" |
| |
|
| | |
| | endpoint = ManagedOnlineEndpoint(name=os.getenv("ENDPOINT_NAME")) |
| |
|
| | deployment = ManagedOnlineDeployment( |
| | name=os.getenv("DEPLOYMENT_NAME"), |
| | endpoint_name=os.getenv("ENDPOINT_NAME"), |
| | model=model_uri, |
| | |
| | instance_type="Standard_NC40ads_H100_v5", |
| | instance_count=1, |
| | ) |
| |
|
| | |
| | client.begin_create_or_update(endpoint).wait() |
| | client.online_deployments.begin_create_or_update(deployment).wait() |
| |
|
| | print(f"Endpoint '{os.getenv('ENDPOINT_NAME')}' deployed successfully!") |
| | print("You can now send requests to your endpoint via Microsoft Foundry or Azure Machine Learning.") |