Spaces:

MightyOctopus
/

Qwen-mockup-data-generator

Sleeping

Qwen-mockup-data-generator / Dockerfile

Update Dockerfile

b3bb53a verified 5 months ago

952 Bytes

	FROM vllm/vllm-openai:latest

	WORKDIR /app

	# Python deps
	COPY requirements.txt .
	RUN pip install --no-cache-dir -r requirements.txt

	# ===== Model to bake into the image =====
	ARG MODEL_REPO=Qwen/Qwen1.5-4B-Chat-AWQ
	ENV MODEL_DIR=/app/models/model
	ENV SERVED_MODEL_NAME=${MODEL_REPO}

	# Hugging Face Token
	ARG HF_TOKEN
	ENV HUGGINGFACE_HUB_TOKEN=${HF_TOKEN}
	# Make the download speed much faster
	ENV HF_HUB_ENABLE_HF_TRANSFER=1

	# Download the model in the image
	RUN mkdir -p "${MODEL_DIR}" && \
	if [ -n "$HUGGINGFACE_HUB_TOKEN" ]; then \
	huggingface-cli login --token "$HUGGINGFACE_HUB_TOKEN" --add-to-git-credential; \
	fi && \
	hf download "${MODEL_REPO}" --local-dir "${MODEL_DIR}" && \
	ls -lh "${MODEL_DIR}"

	# App code + entrypoint
	COPY app.py .
	COPY entrypoint.sh .
	RUN chmod +x /app/entrypoint.sh


	# Expose ports
	EXPOSE 8000 7860


	# Start both: vLLM (background) + Gradio (PID 1)
	ENTRYPOINT ["/app/entrypoint.sh"]