Spaces:

Suguru1846
/

TalkToMe

Sleeping

TalkToMe / Dockerfile

Update Dockerfile

7e11a8d verified about 1 year ago

1.25 kB

	# Use NVIDIA CUDA image for GPU acceleration
	FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04

	# Set environment variables
	ENV DEBIAN_FRONTEND=noninteractive \
	HF_HOME=/tmp/hf_cache \
	TRANSFORMERS_CACHE=/tmp/hf_cache \
	TORCH_HOME=/tmp/hf_cache \
	TRITON_DISABLE="1" \
	BNB_DISABLE_TRITON="1" \
	USE_TORCH="1" \
	BITSANDBYTES_NOWELCOME="1"

	# Install system dependencies
	RUN apt-get update && apt-get install -y \
	git wget curl python3 python3-pip python3-venv \
	&& rm -rf /var/lib/apt/lists/*

	# Set working directory
	WORKDIR /app

	# Create cache directories with proper permissions
	RUN mkdir -p /tmp/hf_cache && chmod -R 777 /tmp/hf_cache
	RUN mkdir -p /.triton && chmod 777 /.triton

	# Install packages directly (no virtual environment)
	RUN pip install --no-cache-dir --upgrade pip setuptools wheel
	RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
	RUN pip install --no-cache-dir transformers accelerate fastapi uvicorn huggingface_hub protobuf
	RUN pip install --no-cache-dir scipy bitsandbytes

	# Expose API port
	EXPOSE 7860

	# Copy API script
	COPY app.py /app/

	# Run FastAPI server
	CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]