Samba / Dockerfile
Kaballas's picture
Update Dockerfile
05a2948 verified
raw
history blame
1.73 kB
FROM python:3.9 AS base
# Create a new user and set up the working directory
RUN useradd -m -u 1000 user
WORKDIR /app
# Set HOME environment variable
ENV HOME /home/user
# Switch to the new user
USER user
# Copy the requirements file and install the dependencies
COPY --chown=user ./requirements.txt requirements.txt
RUN pip install --no-cache-dir --upgrade -r requirements.txt
# Use the latest version of PyTorch
FROM nvcr.io/nvidia/pytorch:23.07-py3 AS pytorch
# Set HOME environment variable
ENV HOME /home/user
# Switch to the new user
USER user
FROM nvcr.io/nvidia/pytorch:23.07-py3
RUN pip install --user azureml-mlflow tensorboard
RUN pip install -U xformers --index-url https://download.pytorch.org/whl/cu121
RUN pip install packaging torch>=2.1.0dev lightning==2.1.2 lightning[app]
RUN pip install jsonargparse[signatures] tokenizers sentencepiece wandb lightning[data] torchmetrics
RUN pip install tensorboard zstandard pandas pyarrow huggingface_hub
RUN pip install -U flash-attn --no-build-isolation
RUN git clone https://github.com/Dao-AILab/flash-attention
WORKDIR flash-attention
WORKDIR csrc/rotary
RUN pip install .
WORKDIR ../layer_norm
RUN pip install .
WORKDIR ../xentropy
RUN pip install .
RUN pip install causal-conv1d==1.2.0.post2
RUN pip install mamba-ssm==1.2.0.post1
RUN pip install -U --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ triton-nightly
Run pip install einops
Run pip install opt_einsum
Run pip install -U git+https://github.com/sustcsonglin/flash-linear-attention
# Copy the application code
COPY --chown=user . /app
# Set the command to run the application
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]