Spaces:
Build error
Build error
| import enum | |
| from typing import Optional | |
| from loguru import logger | |
| try: | |
| import boto3 | |
| from sagemaker.enums import EndpointType | |
| from sagemaker.huggingface import HuggingFaceModel | |
| except ModuleNotFoundError: | |
| logger.warning("Couldn't load AWS or SageMaker imports. Run 'poetry install --with aws' to support AWS.") | |
| from llm_engineering.domain.inference import DeploymentStrategy | |
| from llm_engineering.settings import settings | |
| class SagemakerHuggingfaceStrategy(DeploymentStrategy): | |
| def __init__(self, deployment_service) -> None: | |
| """ | |
| Initializes the deployment strategy with the necessary services. | |
| :param deployment_service: The service handling the deployment details. | |
| :param logger: Logger for logging information and errors. | |
| """ | |
| self.deployment_service = deployment_service | |
| def deploy( | |
| self, | |
| role_arn: str, | |
| llm_image: str, | |
| config: dict, | |
| endpoint_name: str, | |
| endpoint_config_name: str, | |
| gpu_instance_type: str, | |
| resources: Optional[dict] = None, | |
| endpoint_type: enum.Enum = EndpointType.MODEL_BASED, | |
| ) -> None: | |
| """ | |
| Initiates the deployment process for a HuggingFace model on AWS SageMaker. | |
| :param role_arn: AWS role ARN with permissions for SageMaker deployment. | |
| :param llm_image: URI for the HuggingFace model Docker image. | |
| :param config: Configuration settings for the model environment. | |
| :param endpoint_name: Name of the SageMaker endpoint. | |
| :param endpoint_config_name: Name of the SageMaker endpoint configuration. | |
| :param resources: Optional resources for the model deployment (used for multi model endpoints) | |
| :param endpoint_type: can be EndpointType.MODEL_BASED (without inference component) | |
| or EndpointType.INFERENCE_COMPONENT (with inference component) | |
| """ | |
| logger.info("Starting deployment using Sagemaker Huggingface Strategy...") | |
| logger.info( | |
| f"Deployment parameters: nb of replicas: {settings.COPIES}, nb of gpus:{settings.GPUS}, instance_type:{settings.GPU_INSTANCE_TYPE}" | |
| ) | |
| try: | |
| # Delegate to the deployment service to handle the actual deployment details | |
| self.deployment_service.deploy( | |
| role_arn=role_arn, | |
| llm_image=llm_image, | |
| config=config, | |
| endpoint_name=endpoint_name, | |
| endpoint_config_name=endpoint_config_name, | |
| gpu_instance_type=gpu_instance_type, | |
| resources=resources, | |
| endpoint_type=endpoint_type, | |
| ) | |
| logger.info("Deployment completed successfully.") | |
| except Exception as e: | |
| logger.error(f"Error during deployment: {e}") | |
| raise | |
| class DeploymentService: | |
| def __init__(self, resource_manager): | |
| """ | |
| Initializes the DeploymentService with necessary dependencies. | |
| :param resource_manager: Manages resources and configurations for deployments. | |
| :param settings: Configuration settings for deployment. | |
| :param logger: Optional logger for logging messages. If None, the standard logging module will be used. | |
| """ | |
| self.sagemaker_client = boto3.client( | |
| "sagemaker", | |
| region_name=settings.AWS_REGION, | |
| aws_access_key_id=settings.AWS_ACCESS_KEY, | |
| aws_secret_access_key=settings.AWS_SECRET_KEY, | |
| ) | |
| self.resource_manager = resource_manager | |
| def deploy( | |
| self, | |
| role_arn: str, | |
| llm_image: str, | |
| config: dict, | |
| endpoint_name: str, | |
| endpoint_config_name: str, | |
| gpu_instance_type: str, | |
| resources: Optional[dict] = None, | |
| endpoint_type: enum.Enum = EndpointType.MODEL_BASED, | |
| ) -> None: | |
| """ | |
| Handles the deployment of a model to SageMaker, including checking and creating | |
| configurations and endpoints as necessary. | |
| :param role_arn: The ARN of the IAM role for SageMaker to access resources. | |
| :param llm_image: URI of the Docker image in ECR for the HuggingFace model. | |
| :param config: Configuration dictionary for the environment variables of the model. | |
| :param endpoint_name: The name for the SageMaker endpoint. | |
| :param endpoint_config_name: The name for the SageMaker endpoint configuration. | |
| :param resources: Optional resources for the model deployment (used for multi model endpoints) | |
| :param endpoint_type: can be EndpointType.MODEL_BASED (without inference component) | |
| or EndpointType.INFERENCE_COMPONENT (with inference component) | |
| :param gpu_instance_type: The instance type for the SageMaker endpoint. | |
| """ | |
| try: | |
| # Check if the endpoint configuration exists | |
| if self.resource_manager.endpoint_config_exists(endpoint_config_name=endpoint_config_name): | |
| logger.info(f"Endpoint configuration {endpoint_config_name} exists. Using existing configuration...") | |
| else: | |
| logger.info(f"Endpoint configuration{endpoint_config_name} does not exist.") | |
| # Prepare and deploy the HuggingFace model | |
| self.prepare_and_deploy_model( | |
| role_arn=role_arn, | |
| llm_image=llm_image, | |
| config=config, | |
| endpoint_name=endpoint_name, | |
| update_endpoint=False, | |
| resources=resources, | |
| endpoint_type=endpoint_type, | |
| gpu_instance_type=gpu_instance_type, | |
| ) | |
| logger.info(f"Successfully deployed/updated model to endpoint {endpoint_name}.") | |
| except Exception as e: | |
| logger.error(f"Failed to deploy model to SageMaker: {e}") | |
| raise | |
| def prepare_and_deploy_model( | |
| role_arn: str, | |
| llm_image: str, | |
| config: dict, | |
| endpoint_name: str, | |
| update_endpoint: bool, | |
| gpu_instance_type: str, | |
| resources: Optional[dict] = None, | |
| endpoint_type: enum.Enum = EndpointType.MODEL_BASED, | |
| ) -> None: | |
| """ | |
| Prepares and deploys/updates the HuggingFace model on SageMaker. | |
| :param role_arn: The ARN of the IAM role. | |
| :param llm_image: The Docker image URI for the HuggingFace model. | |
| :param config: Configuration settings for the model. | |
| :param endpoint_name: The name of the endpoint. | |
| :param update_endpoint: Boolean flag to update an existing endpoint. | |
| :param gpu_instance_type: The instance type for the SageMaker endpoint. | |
| :param resources: Optional resources for the model deployment(used for multi model endpoints) | |
| :param endpoint_type: can be EndpointType.MODEL_BASED (without inference component) | |
| or EndpointType.INFERENCE_COMPONENT (with inference component) | |
| """ | |
| huggingface_model = HuggingFaceModel( | |
| role=role_arn, | |
| image_uri=llm_image, | |
| env=config, | |
| ) | |
| # Deploy or update the model based on the endpoint existence | |
| huggingface_model.deploy( | |
| instance_type=gpu_instance_type, | |
| initial_instance_count=1, | |
| endpoint_name=endpoint_name, | |
| update_endpoint=update_endpoint, | |
| resources=resources, | |
| tags=[{"Key": "task", "Value": "model_task"}], | |
| endpoint_type=endpoint_type, | |
| container_startup_health_check_timeout=900, | |
| ) | |