File size: 3,098 Bytes
a2e1879
 
 
 
 
b880cfc
a2e1879
 
 
 
c26a471
b880cfc
 
 
a2e1879
 
b880cfc
 
8ae95d2
474c1d6
b880cfc
 
 
 
 
474c1d6
fdc636c
 
 
 
f2c9322
fdc636c
 
 
 
f2c9322
fdc636c
 
 
 
f2c9322
 
b880cfc
474c1d6
 
 
 
 
b880cfc
 
 
 
a2e1879
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
FROM python:3.10-slim

# Set working directory
WORKDIR /app

# Install system dependencies for PDF processing, llama.cpp compilation, and other requirements
RUN apt-get update && apt-get install -y \
    gcc \
    g++ \
    git \
    unzip \
    cmake \
    make \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

# Compile llama.cpp from source (for translation feature)
# This ensures compatibility with the container's architecture
# Disable CURL since we don't need it for local GGUF model inference
# Build statically linked to avoid shared library dependencies
RUN cd /tmp && \
    git clone --depth 1 --branch master https://github.com/ggerganov/llama.cpp.git && \
    cd llama.cpp && \
    mkdir build && \
    cd build && \
    cmake .. -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=OFF -DBUILD_SHARED_LIBS=OFF && \
    # Build the llama-cli target (the command-line interface we need) \
    cmake --build . --config Release --target llama-cli -j$(nproc) && \
    # Find the binary (it might be in different locations) \
    if [ -f bin/llama-cli ]; then \
        cp bin/llama-cli /usr/local/bin/llama-main; \
    elif [ -f llama-cli ]; then \
        cp llama-cli /usr/local/bin/llama-main; \
    elif [ -f bin/main ]; then \
        cp bin/main /usr/local/bin/llama-main; \
    else \
        echo "Error: Could not find llama-cli binary. Available files in build/bin:"; \
        ls -la bin/ 2>/dev/null || echo "bin/ directory not found"; \
        echo "Available files in build/:"; \
        find . -name "*llama*" -o -name "main" 2>/dev/null | head -20; \
        exit 1; \
    fi && \
    chmod 755 /usr/local/bin/llama-main && \
    # Also copy any shared libraries if they exist (fallback) \
    if [ -f libllama.so ]; then \
        cp libllama.so /usr/local/lib/ && \
        ldconfig; \
    fi && \
    cd / && \
    rm -rf /tmp/llama.cpp && \
    echo "✅ llama.cpp compiled and installed to /usr/local/bin/llama-main"

# Create a user to avoid running as root
RUN useradd -m -u 1000 user
USER user

# Set environment variables for Hugging Face cache and performance optimization
ENV HOME=/home/user \
    PATH="/home/user/.local/bin:$PATH" \
    HF_HOME=/home/user/.cache/huggingface \
    TRANSFORMERS_CACHE=/home/user/.cache/huggingface/transformers \
    TORCH_HOME=/home/user/.cache/torch

# Set environment variables for performance optimization
ENV TORCH_COMPILE_DISABLE=1 \
    TORCHDYNAMO_DISABLE=1 \
    TF_ENABLE_ONEDNN_OPTS=0 \
    TF_CPP_MIN_LOG_LEVEL=3 \
    TOKENIZERS_PARALLELISM=false \
    OMP_NUM_THREADS=1

# Create cache directories with proper permissions
RUN mkdir -p /home/user/.cache/huggingface/transformers \
    && mkdir -p /home/user/.cache/torch \
    && mkdir -p /tmp/uploads

# Copy requirements first for better Docker layer caching
COPY --chown=user requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir --user -r requirements.txt

# Copy the application code
COPY --chown=user . .

# Expose the port that HF Spaces expects
EXPOSE 7860

# Set the default command to run the Flask app
CMD ["python", "app.py"]