""" Amazon SageMaker Deployment Script for Legion Coder 8M This script demonstrates how to deploy the Legion Coder model to Amazon SageMaker for production inference. Requirements: pip install sagemaker boto3 Usage: python sagemaker_deploy.py """ import sagemaker from sagemaker.huggingface import HuggingFaceModel import boto3 # Configuration ROLE_ARN = "arn:aws:iam::YOUR_ACCOUNT_ID:role/YOUR_SAGEMAKER_ROLE" MODEL_ID = "dineth554/legion-coder-8m" INSTANCE_TYPE = "ml.m5.large" INSTANCE_COUNT = 1 def deploy_to_sagemaker(): """ Deploy Legion Coder 8M to Amazon SageMaker. This creates a SageMaker endpoint with the model ready for inference. """ # Initialize SageMaker session sess = sagemaker.Session() # Create Hugging Face Model huggingface_model = HuggingFaceModel( model_data=f"https://huggingface.co/{MODEL_ID}/resolve/main/model.safetensors", transformers_version="4.36.0", pytorch_version="2.1.0", py_version="py310", role=ROLE_ARN, sagemaker_session=sess, env={ "HF_MODEL_ID": MODEL_ID, "HF_TASK": "text-generation", "SAGEMAKER_CONTAINER_LOG_LEVEL": "20", "SAGEMAKER_PROGRAM": "inference.py" } ) # Deploy to SageMaker predictor = huggingface_model.deploy( initial_instance_count=INSTANCE_COUNT, instance_type=INSTANCE_TYPE, endpoint_name="legion-coder-8m-endpoint" ) print(f"Model deployed successfully!") print(f"Endpoint name: legion-coder-8m-endpoint") print(f"Instance type: {INSTANCE_TYPE}") return predictor def test_endpoint(predictor): """ Test the deployed endpoint with a sample prompt. """ test_payload = { "inputs": "Write a Python function to calculate fibonacci numbers:", "parameters": { "temperature": 0.8, "top_p": 0.95, "top_k": 50, "max_new_tokens": 200 } } response = predictor.predict(test_payload) print("Test response:", response) return response def cleanup_endpoint(predictor): """ Clean up the SageMaker endpoint when done. """ predictor.delete_endpoint() print("Endpoint deleted successfully.") if __name__ == "__main__": # Deploy the model print("Deploying Legion Coder 8M to SageMaker...") predictor = deploy_to_sagemaker() # Test the endpoint print("\nTesting endpoint...") test_endpoint(predictor) # Uncomment to clean up # cleanup_endpoint(predictor)