| | """ |
| | Amazon SageMaker Deployment Script for Legion Coder 8M |
| | |
| | This script demonstrates how to deploy the Legion Coder model to Amazon SageMaker |
| | for production inference. |
| | |
| | Requirements: |
| | pip install sagemaker boto3 |
| | |
| | Usage: |
| | python sagemaker_deploy.py |
| | """ |
| |
|
| | import sagemaker |
| | from sagemaker.huggingface import HuggingFaceModel |
| | import boto3 |
| |
|
| | |
| | ROLE_ARN = "arn:aws:iam::YOUR_ACCOUNT_ID:role/YOUR_SAGEMAKER_ROLE" |
| | MODEL_ID = "dineth554/legion-coder-8m" |
| | INSTANCE_TYPE = "ml.m5.large" |
| | INSTANCE_COUNT = 1 |
| |
|
| |
|
| | def deploy_to_sagemaker(): |
| | """ |
| | Deploy Legion Coder 8M to Amazon SageMaker. |
| | |
| | This creates a SageMaker endpoint with the model ready for inference. |
| | """ |
| | |
| | sess = sagemaker.Session() |
| | |
| | |
| | huggingface_model = HuggingFaceModel( |
| | model_data=f"https://huggingface.co/{MODEL_ID}/resolve/main/model.safetensors", |
| | transformers_version="4.36.0", |
| | pytorch_version="2.1.0", |
| | py_version="py310", |
| | role=ROLE_ARN, |
| | sagemaker_session=sess, |
| | env={ |
| | "HF_MODEL_ID": MODEL_ID, |
| | "HF_TASK": "text-generation", |
| | "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", |
| | "SAGEMAKER_PROGRAM": "inference.py" |
| | } |
| | ) |
| | |
| | |
| | predictor = huggingface_model.deploy( |
| | initial_instance_count=INSTANCE_COUNT, |
| | instance_type=INSTANCE_TYPE, |
| | endpoint_name="legion-coder-8m-endpoint" |
| | ) |
| | |
| | print(f"Model deployed successfully!") |
| | print(f"Endpoint name: legion-coder-8m-endpoint") |
| | print(f"Instance type: {INSTANCE_TYPE}") |
| | |
| | return predictor |
| |
|
| |
|
| | def test_endpoint(predictor): |
| | """ |
| | Test the deployed endpoint with a sample prompt. |
| | """ |
| | test_payload = { |
| | "inputs": "Write a Python function to calculate fibonacci numbers:", |
| | "parameters": { |
| | "temperature": 0.8, |
| | "top_p": 0.95, |
| | "top_k": 50, |
| | "max_new_tokens": 200 |
| | } |
| | } |
| | |
| | response = predictor.predict(test_payload) |
| | print("Test response:", response) |
| | return response |
| |
|
| |
|
| | def cleanup_endpoint(predictor): |
| | """ |
| | Clean up the SageMaker endpoint when done. |
| | """ |
| | predictor.delete_endpoint() |
| | print("Endpoint deleted successfully.") |
| |
|
| |
|
| | if __name__ == "__main__": |
| | |
| | print("Deploying Legion Coder 8M to SageMaker...") |
| | predictor = deploy_to_sagemaker() |
| | |
| | |
| | print("\nTesting endpoint...") |
| | test_endpoint(predictor) |
| | |
| | |
| | |
| |
|