Spaces:
Runtime error
Runtime error
| FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04 | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| RUN apt-get update && \ | |
| apt-get upgrade -y && \ | |
| apt-get install -y --no-install-recommends --fix-missing \ | |
| git \ | |
| git-lfs \ | |
| wget \ | |
| curl \ | |
| cmake \ | |
| build-essential \ | |
| libssl-dev \ | |
| zlib1g-dev \ | |
| libbz2-dev \ | |
| libreadline-dev \ | |
| libsqlite3-dev \ | |
| libncursesw5-dev \ | |
| xz-utils \ | |
| tk-dev \ | |
| libxml2-dev \ | |
| libxmlsec1-dev \ | |
| libffi-dev \ | |
| liblzma-dev \ | |
| ffmpeg \ | |
| libcurl4-openssl-dev \ | |
| nvidia-driver-570 | |
| # Check if user with UID 1000 exists, if not create it | |
| RUN id -u 1000 &>/dev/null || useradd -m -u 1000 user | |
| USER 1000 | |
| ENV HOME=/app \ | |
| PATH=/app/.local/bin:${PATH} | |
| WORKDIR /app | |
| RUN curl https://pyenv.run | PYENV_ROOT=/app/.pyenv bash | |
| ENV PYENV_ROOT=/app/.pyenv \ | |
| PATH=/app/.pyenv/shims:/app/.pyenv/bin:${PATH} | |
| ARG PYTHON_VERSION=3.11 | |
| RUN pyenv install ${PYTHON_VERSION} && \ | |
| pyenv global ${PYTHON_VERSION} && \ | |
| pyenv rehash && \ | |
| pip install --no-cache-dir -U pip setuptools wheel && \ | |
| pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.8" "APScheduler" | |
| # Clone llama.cpp and install its requirements | |
| RUN git clone https://github.com/ggerganov/llama.cpp /app/llama.cpp | |
| RUN pip install -r /app/llama.cpp/requirements.txt | |
| # Build llama.cpp | |
| ARG RUN_LOCALLY=false | |
| ENV GGML_CUDA=ON | |
| RUN if [ "$RUN_LOCALLY" = "true" ]; then export GGML_CUDA=OFF; fi && \ | |
| echo "GGML_CUDA=$GGML_CUDA" && \ | |
| cd /app/llama.cpp && \ | |
| cmake -B build -DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=${GGML_CUDA} && \ | |
| cmake --build build --config Release -j2 --target llama-quantize llama-gguf-split llama-imatrix && \ | |
| cp ./build/bin/llama-* . && \ | |
| rm -rf build | |
| COPY --chown=1000 . /app | |
| COPY groups_merged.txt /app/llama.cpp/ | |
| ENV PYTHONPATH=/app \ | |
| PYTHONUNBUFFERED=1 \ | |
| HF_HUB_ENABLE_HF_TRANSFER=1 \ | |
| GRADIO_ALLOW_FLAGGING=never \ | |
| GRADIO_NUM_PORTS=1 \ | |
| GRADIO_SERVER_NAME=0.0.0.0 \ | |
| GRADIO_THEME=huggingface \ | |
| TQDM_POSITION=-1 \ | |
| TQDM_MININTERVAL=1 \ | |
| SYSTEM=spaces \ | |
| LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \ | |
| PATH=/usr/local/nvidia/bin:${PATH} | |
| CMD ["/bin/bash", "start.sh"] |