| FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04 |
| ENV DEBIAN_FRONTEND=noninteractive |
|
|
| ARG HF_TOKEN |
|
|
| ENV HF_TOKEN=$HF_TOKEN |
| RUN rm -rf /usr/local/bin /usr/local/lib* || true |
| RUN ln -s /usr/bin /usr/local/bin && ln -s /usr/lib /usr/local/lib && ln -s /usr/lib /usr/local/lib64 |
| RUN apt-get update && apt-get install -y |
| RUN apt-get update && \ |
| apt-get upgrade -y |
| RUN apt-get install -y --no-install-recommends --fix-missing \ |
| git \ |
| git-lfs \ |
| wget \ |
| curl \ |
| cmake \ |
| build-essential \ |
| libssl-dev \ |
| zlib1g-dev \ |
| libbz2-dev \ |
| libreadline-dev \ |
| libsqlite3-dev \ |
| libncursesw5-dev \ |
| xz-utils \ |
| tk-dev \ |
| libxml2-dev \ |
| libxmlsec1-dev \ |
| libffi-dev \ |
| golang-go \ |
| python3 \ |
| liblzma-dev \ |
| ffmpeg \ |
| nvidia-driver-570 \ |
| python3 \ |
| python3-pip unzip curl original-awk grep sed zstd |
|
|
| WORKDIR /app |
| COPY --chown=1000 . /app |
| RUN mkdir /app -p && chmod 777 /app |
|
|
| |
|
|
| RUN curl -fsSL https://ollama.com/install.sh | sh |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| WORKDIR /app |
| |
| |
| |
| |
| |
| |
| RUN pip install --no-cache-dir -U pip setuptools wheel --break-system-packages --ignore-installed |
| RUN pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=6.5.1" "APScheduler" "protobuf>=4.21.0,<5.0.0" "sentencepiece>=0.1.98,<0.3.0" "numpy~=1.26.4" "gguf>=0.1.0" "fastapi" --break-system-packages --ignore-installed |
|
|
| RUN pip install "torch>=2.8.0" --break-system-packages --ignore-installed |
| RUN pip install git+https://github.com/huggingface/transformers.git --break-system-packages --ignore-installed |
|
|
|
|
|
|
| RUN mkdir /tmp/llama && hf download lainlives/llama.cpp --local-dir /tmp/llama && chmod +x /tmp/llama/* && cp /tmp/llama/convert* /app/convert_hf_to_gguf.py && mv /tmp/llama/* /usr/bin/ |
|
|
| ENV PYTHONPATH=${HOME}/app \ |
| PYTHONUNBUFFERED=1 \ |
| HF_HUB_ENABLE_HF_TRANSFER=1 \ |
| GRADIO_ALLOW_FLAGGING=never \ |
| GRADIO_NUM_PORTS=1 \ |
| GRADIO_SERVER_NAME=0.0.0.0 \ |
| GRADIO_ANALYTICS_ENABLED=False \ |
| TQDM_POSITION=-1 \ |
| TQDM_MININTERVAL=1 \ |
| SYSTEM=spaces \ |
| LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \ |
| PATH=/usr/local/nvidia/bin:${PATH} |
|
|
|
|
|
|
|
|
| EXPOSE 7860 |
| RUN cp /app/start.sh /usr/bin/start_space && chmod +x /usr/bin/start_space |
| ENTRYPOINT python3 /app/app.py |