Instructions to use my-ai-stack/Stack-2-9-finetuned with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use my-ai-stack/Stack-2-9-finetuned with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
model = AutoModelForCausalLM.from_pretrained("my-ai-stack/Stack-2-9-finetuned")
messages = [
    {"role": "user", "content": "Who are you?"},
]
inputs = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=True,
	return_dict=True,
	return_tensors="pt",
).to(model.device)

outputs = model.generate(**inputs, max_new_tokens=40)
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:]))

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use my-ai-stack/Stack-2-9-finetuned with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "my-ai-stack/Stack-2-9-finetuned"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/my-ai-stack/Stack-2-9-finetuned

SGLang

How to use my-ai-stack/Stack-2-9-finetuned with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "my-ai-stack/Stack-2-9-finetuned" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "my-ai-stack/Stack-2-9-finetuned" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "my-ai-stack/Stack-2-9-finetuned",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use my-ai-stack/Stack-2-9-finetuned with Docker Model Runner:
```
docker model run hf.co/my-ai-stack/Stack-2-9-finetuned
```

Stack-2-9-finetuned / stack /deploy /local_deploy.sh

walidsobhie-code

refactor: Squeeze folders further - cleaner structure

65888d5 about 2 months ago

raw

history blame

6.55 kB

	#!/bin/bash

	# Stack 2.9 Local Deployment Script
	# Usage: ./local_deploy.sh [options]

	set -e

	# Colors for output
	RED='\033[0;31m'
	GREEN='\033[0;32m'
	YELLOW='\033[1;33m'
	BLUE='\033[0;34m'
	NC='\033[0m' # No Color

	# Default configuration
	COMPOSE_FILE="docker-compose.yml"
	MODEL_PATH="./models"
	MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct" # Will be replaced with Stack 2.9
	MODEL_FORMAT="hf"
	GPU_MEMORY_UTILIZATION="0.9"
	LOG_LEVEL="INFO"

	# Function to print colored output
	print_status() {
	echo -e "${BLUE}[INFO]${NC} $1"
	}

	print_success() {
	echo -e "${GREEN}[SUCCESS]${NC} $1"
	}

	print_warning() {
	echo -e "${YELLOW}[WARNING]${NC} $1"
	}

	print_error() {
	echo -e "${RED}[ERROR]${NC} $1"
	}

	# Function to check prerequisites
	check_prerequisites() {
	print_status "Checking prerequisites..."

	# Check Docker
	if ! command -v docker &> /dev/null; then
	print_error "Docker is not installed or not in PATH"
	exit 1
	fi

	# Check Docker Compose (v1 or v2)
	if docker compose version &> /dev/null; then
	COMPOSE_CMD="docker compose"
	elif command -v docker-compose &> /dev/null; then
	COMPOSE_CMD="docker-compose"
	else
	print_error "Docker Compose is not installed or not in PATH"
	exit 1
	fi

	# Check NVIDIA Docker support
	if ! docker info 2>/dev/null \| grep -q "nvidia"; then
	print_warning "NVIDIA Docker support not detected. GPU acceleration may not work."
	print_warning "Ensure nvidia-docker2 is installed and configured."
	fi

	print_success "Prerequisites check passed"
	}

	# Function to setup environment
	setup_environment() {
	print_status "Setting up environment..."

	# Create directories
	mkdir -p models logs
	chmod 755 models logs

	# Create .env file
	cat > .env << EOF
	MODEL_PATH=${MODEL_PATH}
	MODEL_NAME=${MODEL_NAME}
	MODEL_FORMAT=${MODEL_FORMAT}
	GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION}
	LOG_LEVEL=${LOG_LEVEL}
	EOF

	print_success "Environment setup complete"
	}

	# Function to download model
	download_model() {
	print_status "Downloading model (this may take a while)..."

	if [ ! -d "models/${MODEL_NAME##*/}" ]; then
	print_status "Downloading ${MODEL_NAME}..."

	# Use HuggingFace Hub to download model
	if command -v huggingface-cli &> /dev/null; then
	huggingface-cli download ${MODEL_NAME} --local-dir models
	elif command -v git &> /dev/null; then
	git lfs install
	git clone https://huggingface.co/${MODEL_NAME} models/${MODEL_NAME##*/}
	else
	print_error "Neither huggingface-cli nor git is available for model download"
	exit 1
	fi

	print_success "Model downloaded successfully"
	else
	print_warning "Model already exists, skipping download"
	fi
	}

	# Function to start services
	start_services() {
	print_status "Starting services..."

	${COMPOSE_CMD} -f ${COMPOSE_FILE} up -d

	print_status "Waiting for services to be ready..."
	sleep 30

	# Check if services are running
	if ${COMPOSE_CMD} -f ${COMPOSE_FILE} ps \| grep -q "Up"; then
	print_success "Services started successfully"
	else
	print_error "Failed to start services"
	${COMPOSE_CMD} -f ${COMPOSE_FILE} logs
	exit 1
	fi
	}

	# Function to check status
	check_status() {
	print_status "Checking service status..."

	${COMPOSE_CMD} -f ${COMPOSE_FILE} ps

	print_status "Health check..."
	if python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health').read()" &> /dev/null; then
	print_success "vLLM server is healthy"
	else
	print_warning "vLLM server health check failed"
	fi
	}

	# Function to show usage
	show_usage() {
	echo "Usage: $0 [OPTIONS]"
	echo ""
	echo "Options:"
	echo " -h, --help Show this help message"
	echo " --no-model Skip model download"
	echo " --force-download Force download even if model exists"
	echo " --clean Clean up before deployment"
	echo ""
	echo "Environment variables:"
	echo " MODEL_PATH Path to model directory"
	echo " MODEL_NAME HuggingFace model name"
	echo " MODEL_FORMAT Model format (hf, safetensors, etc.)"
	echo " GPU_MEMORY_UTILIZATION GPU memory utilization (0.0-1.0)"
	echo " LOG_LEVEL Log level (DEBUG, INFO, WARNING, ERROR)"
	}

	# Parse command line arguments
	NO_MODEL=false
	FORCE_DOWNLOAD=false
	CLEAN=false

	while [[ $# -gt 0 ]]; do
	case $1 in
	-h\|--help)
	show_usage
	exit 0
	;;
	--no-model)
	NO_MODEL=true
	shift
	;;
	--force-download)
	FORCE_DOWNLOAD=true
	shift
	;;
	--clean)
	CLEAN=true
	shift
	;;
	*)
	print_error "Unknown option: $1"
	show_usage
	exit 1
	;;
	esac
	done

	# Clean up if requested
	if [[ "${CLEAN}" == "true" ]]; then
	print_status "Cleaning up existing deployment..."
	${COMPOSE_CMD} -f ${COMPOSE_FILE} down -v
	rm -rf models logs
	fi

	# Main deployment process
	main() {
	print_status "Starting Stack 2.9 local deployment..."
	echo "==================================="

	# Check prerequisites
	check_prerequisites

	# Setup environment
	setup_environment

	# Download model if not skipped
	if [[ "${NO_MODEL}" == "false" ]]; then
	if [[ "${FORCE_DOWNLOAD}" == "true" ]] \|\| [ ! -d "models/${MODEL_NAME##*/}" ]; then
	download_model
	else
	print_warning "Model exists and --force-download not specified, skipping download"
	fi
	else
	print_warning "Model download skipped as requested"
	fi

	# Start services
	start_services

	# Check status
	check_status

	print_success "Stack 2.9 deployment completed successfully!"
	echo ""
	echo "Service URLs:"
	echo " vLLM API: http://localhost:8000"
	echo " Prometheus: http://localhost:9090"
	echo " Grafana: http://localhost:3000"
	echo " Traefik Dashboard: http://localhost:8080"
	echo ""
	echo "Health check: http://localhost:8000/health"
	echo ""
	echo "To stop services: ${COMPOSE_CMD} -f ${COMPOSE_FILE} down"
	echo "To view logs: ${COMPOSE_CMD} -f ${COMPOSE_FILE} logs -f"
	}

	# Run main function
	main "$@"