File size: 1,249 Bytes
1b6b568
75c3447
83f9645
75c3447
83f9645
7e11a8d
 
 
a936dda
 
 
 
83f9645
 
 
 
406821d
83f9645
 
406821d
 
7e11a8d
 
a936dda
83f9645
7e11a8d
c8859a4
7e11a8d
 
 
83f9645
1b6b568
 
83f9645
 
406821d
83f9645
 
c8859a4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# Use NVIDIA CUDA image for GPU acceleration
FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 

# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive \
    HF_HOME=/tmp/hf_cache \
    TRANSFORMERS_CACHE=/tmp/hf_cache \
    TORCH_HOME=/tmp/hf_cache \
    TRITON_DISABLE="1" \
    BNB_DISABLE_TRITON="1" \
    USE_TORCH="1" \
    BITSANDBYTES_NOWELCOME="1"

# Install system dependencies
RUN apt-get update && apt-get install -y \
    git wget curl python3 python3-pip python3-venv \
    && rm -rf /var/lib/apt/lists/* 

# Set working directory
WORKDIR /app 

# Create cache directories with proper permissions
RUN mkdir -p /tmp/hf_cache && chmod -R 777 /tmp/hf_cache
RUN mkdir -p /.triton && chmod 777 /.triton

# Install packages directly (no virtual environment)
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
RUN pip install --no-cache-dir transformers accelerate fastapi uvicorn huggingface_hub protobuf
RUN pip install --no-cache-dir scipy bitsandbytes

# Expose API port
EXPOSE 7860 

# Copy API script
COPY app.py /app/ 

# Run FastAPI server
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]