# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker # you will also find guides on how best to write your Dockerfile FROM python:3.10 ARG API_KEY ENV API_KEY=${API_KEY} # Install git and git-lfs RUN apt-get update && apt-get install -y git git-lfs && rm -rf /var/lib/apt/lists/* # Create user and set working directory RUN useradd -m -u 1000 user WORKDIR /home/user/app # Create necessary directories and set permissions RUN mkdir -p /home/user/model /home/user/data /home/user/.cache /usr/local/lib/python3.10/site-packages/nltk_data && \ chown -R user:user /home/user /usr/local/lib/python3.10/site-packages/nltk_data # Set environment variables ENV HF_ENDPOINT=https://hf-mirror.com ENV PATH="/home/user/.local/bin:$PATH" ENV PYTHONPATH="/home/user/app:$PYTHONPATH" ENV NLTK_DATA="/home/user/nltk_data" # Switch to user for pip installations USER user # Copy requirements first COPY --chown=user requirements.txt . # Install Python dependencies RUN pip install --no-cache-dir -r requirements.txt && \ pip install --no-cache-dir transformers torch sentence-transformers streamlit # Download models and data RUN cd /home/user/model && \ git clone https://www.modelscope.cn/Ceceliachenen/paraphrase-multilingual-MiniLM-L12-v2.git && \ cd /home/user/model/paraphrase-multilingual-MiniLM-L12-v2 && \ git lfs install && \ git lfs pull # Create NLTK data directory and download data RUN mkdir -p /home/user/nltk_data && \ cd /home/user && \ git clone https://gitee.com/yzy0612/nltk_data.git --branch gh-pages && \ cd /home/user/nltk_data && \ mv /home/user/nltk_data/packages/* /home/user/nltk_data && \ cd /home/user/nltk_data/tokenizers && \ unzip punkt.zip && \ cd /home/user/nltk_data/taggers && \ unzip averaged_perceptron_tagger.zip # Download demo data RUN mkdir -p /home/user/data && \ cd /home/user/data && \ git clone https://github.com/Kit-Hung/demos && \ cp /home/user/data/demos/k8s/1-deploy/3-k8s_install/2-kubeadm/kubeadm.md /home/user/data # Copy application files COPY --chown=user . . # Expose Streamlit port EXPOSE 8501 # Run streamlit CMD ["streamlit", "run", "app.py"]