sagemaker_deploy.py · dineth554/legion-coder-8m-10k at main

File size: 2,611 Bytes

0d9979c

"""
Amazon SageMaker Deployment Script for Legion Coder 8M

This script demonstrates how to deploy the Legion Coder model to Amazon SageMaker
for production inference.

Requirements:
    pip install sagemaker boto3

Usage:
    python sagemaker_deploy.py
"""

import sagemaker
from sagemaker.huggingface import HuggingFaceModel
import boto3

# Configuration
ROLE_ARN = "arn:aws:iam::YOUR_ACCOUNT_ID:role/YOUR_SAGEMAKER_ROLE"
MODEL_ID = "dineth554/legion-coder-8m"
INSTANCE_TYPE = "ml.m5.large"
INSTANCE_COUNT = 1


def deploy_to_sagemaker():
    """
    Deploy Legion Coder 8M to Amazon SageMaker.
    
    This creates a SageMaker endpoint with the model ready for inference.
    """
    # Initialize SageMaker session
    sess = sagemaker.Session()
    
    # Create Hugging Face Model
    huggingface_model = HuggingFaceModel(
        model_data=f"https://huggingface.co/{MODEL_ID}/resolve/main/model.safetensors",
        transformers_version="4.36.0",
        pytorch_version="2.1.0",
        py_version="py310",
        role=ROLE_ARN,
        sagemaker_session=sess,
        env={
            "HF_MODEL_ID": MODEL_ID,
            "HF_TASK": "text-generation",
            "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
            "SAGEMAKER_PROGRAM": "inference.py"
        }
    )
    
    # Deploy to SageMaker
    predictor = huggingface_model.deploy(
        initial_instance_count=INSTANCE_COUNT,
        instance_type=INSTANCE_TYPE,
        endpoint_name="legion-coder-8m-endpoint"
    )
    
    print(f"Model deployed successfully!")
    print(f"Endpoint name: legion-coder-8m-endpoint")
    print(f"Instance type: {INSTANCE_TYPE}")
    
    return predictor


def test_endpoint(predictor):
    """
    Test the deployed endpoint with a sample prompt.
    """
    test_payload = {
        "inputs": "Write a Python function to calculate fibonacci numbers:",
        "parameters": {
            "temperature": 0.8,
            "top_p": 0.95,
            "top_k": 50,
            "max_new_tokens": 200
        }
    }
    
    response = predictor.predict(test_payload)
    print("Test response:", response)
    return response


def cleanup_endpoint(predictor):
    """
    Clean up the SageMaker endpoint when done.
    """
    predictor.delete_endpoint()
    print("Endpoint deleted successfully.")


if __name__ == "__main__":
    # Deploy the model
    print("Deploying Legion Coder 8M to SageMaker...")
    predictor = deploy_to_sagemaker()
    
    # Test the endpoint
    print("\nTesting endpoint...")
    test_endpoint(predictor)
    
    # Uncomment to clean up
    # cleanup_endpoint(predictor)