Spaces:

binary1ne
/

vllm-llama2

Paused

App Files Files Community

vllm-llama2 / Dockerfile_4

binary1ne

Rename Dockerfile to Dockerfile_4

25edd10 verified 9 months ago

raw

history blame contribute delete

2.21 kB

	# # Use the openeuler/vllm-cpu base (includes Python, pip, and vLLM pre-installed)
	# FROM openeuler/vllm-cpu:0.8.5-oe2403lts

	# # Ensure Python uses the CPU device (vLLM expects VLLM_TARGET_DEVICE for inference)
	# ENV VLLM_TARGET_DEVICE=cpu
	# ENV PYTHONUNBUFFERED=1

	# # Set working directory
	# WORKDIR /workspace

	# # Upgrade pip and install CPU-only PyTorch, Transformers, Accelerate, Unsloth, etc.
	# # Use the official PyTorch CPU wheel index for performance on CPU:contentReference[oaicite:5]{index=5}.

	# #RUN pip3 install --upgrade pip \
	# # && pip3 install torch --index-url https://download.pytorch.org/whl/cpu \
	# # && pip3 install transformers accelerate unsloth

	# # (Optional) Install unsloth_zoo or other utilities if needed:
	# # RUN pip3 install unsloth-zoo

	# # RUN pip3 install --upgrade pip \
	# # && pip3 install transformers accelerate unsloth

	# # Copy an example inference script into the container
	# # (This script should load the model and do a sample generation.)
	# # COPY inference.py /workspace/inference.py

	# # Default command: run the inference script to verify setup
	# CMD ["python3", "/workspace/inference.py"]
	# FROM openeuler/vllm-cpu:0.8.5-oe2403lts
	FROM openeuler/vllm-cpu:latest


	ENV VLLM_TARGET_DEVICE=cpu
	ENV PYTHONUNBUFFERED=1

	WORKDIR /workspace

	# # Install system packages
	# RUN yum install -y \
	# gcc \
	# gcc-c++ \
	# cmake \
	# python-pip \
	# python3-devel \
	# ninja-build.aarch64 \
	# numactl-devel.aarch64 \
	# git \
	# && yum clean all

	RUN yum install -y \
	gcc \
	gcc-c++ \
	cmake \
	python3-pip \
	python3-devel \
	ninja-build \
	numactl-devel \
	git \
	&& yum clean all

	# Install Python packages
	RUN pip3 install --upgrade pip \
	&& pip3 install numpy

	RUN pip show vllm

	RUN pip list

	# Start vLLM OpenAI-compatible API server for the Unsloth Llama 3.2 model
	CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
	"--served-model-name", "llama-3.2-3b-instruct", \
	"--model", "unsloth/Llama-3.2-3B-Instruct", \
	"--trust-remote-code", \
	"--host", "0.0.0.0", \
	"--port", "7860", \
	"--max-model-len", "4096", \
	"--enforce-eager", \
	"--dtype", "float32"]