Finish-him commited on
Commit
315638f
verified
1 Parent(s): ef8f674

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +16 -11
Dockerfile CHANGED
@@ -1,29 +1,34 @@
1
- # Usa uma imagem base mais recente e suportada
2
  FROM python:3.9-slim-bookworm
3
 
4
- # Define o diret贸rio de trabalho
5
  WORKDIR /app
6
 
7
- # --- ETAPA 1: INSTALAR O GIT E CLONAR O SEU DATASET COMPLETO ---
8
  RUN apt-get update && apt-get install -y git git-lfs && git-lfs install
9
  RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull
10
 
11
- # --- ETAPA 2: CONFIGURAR O AMBIENTE PYTHON E PR脡-AQUECER O CACHE ---
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
- # Define o caminho da cache
16
  ENV HF_HOME=/app/cache/huggingface
17
  ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch
18
- RUN mkdir -p $HF_HOME && mkdir -p $SENTENCE_TRANSFORMERS_HOME && chown -R 1000:1000 /app/cache
19
 
20
- # --- CORRE脟脙O PRINCIPAL: PR脡-AQUECIMENTO DO CACHE ---
21
- # Adiciona "import os;" ao in铆cio do comando python
22
  RUN python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder=os.environ.get('SENTENCE_TRANSFORMERS_HOME', '/app/cache/torch'))"
23
 
24
- # --- ETAPA 3: EXECUTAR O SCRIPT ---
25
- # Copia o script de treinamento
 
 
 
 
 
26
  COPY train.py .
27
 
28
- # Executa o script e, ap贸s o t茅rmino, mant茅m o cont锚iner a correr.
 
29
  CMD ["sh", "-c", "python train.py && sleep infinity"]
 
1
+ # Use a more recent and supported base image
2
  FROM python:3.9-slim-bookworm
3
 
4
+ # Set the working directory
5
  WORKDIR /app
6
 
7
+ # --- STEP 1: INSTALL GIT AND CLONE YOUR DATASET ---
8
  RUN apt-get update && apt-get install -y git git-lfs && git-lfs install
9
  RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull
10
 
11
+ # --- STEP 2: SET UP PYTHON ENVIRONMENT & PRE-WARM CACHE ---
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
+ # Define cache path
16
  ENV HF_HOME=/app/cache/huggingface
17
  ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch
18
+ RUN mkdir -p $HF_HOME && mkdir -p $SENTENCE_TRANSFORMERS_HOME
19
 
20
+ # Pre-warm the cache with the e5-large model
 
21
  RUN python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder=os.environ.get('SENTENCE_TRANSFORMERS_HOME', '/app/cache/torch'))"
22
 
23
+ # --- STEP 3: PREPARE RUN DIRECTORIES & COPY SCRIPT ---
24
+
25
+ # *** THE FIX IS HERE ***
26
+ # Create the extraction directory and give the default user (1000) ownership
27
+ RUN mkdir -p /app/dados_extraidos && chown -R 1000:1000 /app/dados_extraidos && chown -R 1000:1000 /app/cache
28
+
29
+ # Copy the training script
30
  COPY train.py .
31
 
32
+ # --- STEP 4: RUN THE SCRIPT ---
33
+ # Execute the script and keep the container alive afterward
34
  CMD ["sh", "-c", "python train.py && sleep infinity"]