File size: 2,184 Bytes
fe7599d
 
 
a7a2ed7
fe7599d
72ef9d7
 
 
a7a2ed7
 
 
 
fe7599d
a7a2ed7
 
b01deb2
 
 
a7a2ed7
 
 
fe7599d
0d25138
b01deb2
 
 
 
fe7599d
0d25138
 
a7a2ed7
b01deb2
0d25138
 
a7a2ed7
b01deb2
a7a2ed7
 
 
 
 
fe7599d
b01deb2
 
 
a7a2ed7
 
 
 
 
 
 
 
 
b01deb2
 
a7a2ed7
 
 
 
 
 
0d25138
 
 
b01deb2
db807ff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
# you will also find guides on how best to write your Dockerfile

FROM python:3.10

ARG API_KEY
ENV API_KEY=${API_KEY}

# Install git and git-lfs
RUN apt-get update && apt-get install -y git git-lfs && rm -rf /var/lib/apt/lists/*

# Create user and set working directory
RUN useradd -m -u 1000 user
WORKDIR /home/user/app

# Create necessary directories and set permissions
RUN mkdir -p /home/user/model /home/user/data /home/user/.cache /usr/local/lib/python3.10/site-packages/nltk_data && \
    chown -R user:user /home/user /usr/local/lib/python3.10/site-packages/nltk_data

# Set environment variables
ENV HF_ENDPOINT=https://hf-mirror.com
ENV PATH="/home/user/.local/bin:$PATH"
ENV PYTHONPATH="/home/user/app:$PYTHONPATH"
ENV NLTK_DATA="/home/user/nltk_data"

# Switch to user for pip installations
USER user

# Copy requirements first
COPY --chown=user requirements.txt .

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt && \
    pip install --no-cache-dir transformers torch sentence-transformers streamlit

# Download models and data
RUN cd /home/user/model && \
    git clone https://www.modelscope.cn/Ceceliachenen/paraphrase-multilingual-MiniLM-L12-v2.git && \
    cd /home/user/model/paraphrase-multilingual-MiniLM-L12-v2 && \
    git lfs install && \
    git lfs pull

# Create NLTK data directory and download data
RUN mkdir -p /home/user/nltk_data && \
    cd /home/user && \
    git clone https://gitee.com/yzy0612/nltk_data.git --branch gh-pages && \
    cd /home/user/nltk_data && \
    mv /home/user/nltk_data/packages/* /home/user/nltk_data && \
    cd /home/user/nltk_data/tokenizers && \
    unzip punkt.zip && \
    cd /home/user/nltk_data/taggers && \
    unzip averaged_perceptron_tagger.zip

# Download demo data
RUN mkdir -p /home/user/data && \
    cd /home/user/data && \
    git clone https://github.com/Kit-Hung/demos && \
    cp /home/user/data/demos/k8s/1-deploy/3-k8s_install/2-kubeadm/kubeadm.md /home/user/data

# Copy application files
COPY --chown=user . .

# Expose Streamlit port
EXPOSE 8501

# Run streamlit
CMD ["streamlit", "run", "app.py"]