Finish-him commited on
Commit
4820f79
·
verified ·
1 Parent(s): 315638f

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +15 -13
Dockerfile CHANGED
@@ -1,34 +1,36 @@
1
- # Use a more recent and supported base image
2
  FROM python:3.9-slim-bookworm
3
 
4
- # Set the working directory
5
  WORKDIR /app
6
 
7
- # --- STEP 1: INSTALL GIT AND CLONE YOUR DATASET ---
8
  RUN apt-get update && apt-get install -y git git-lfs && git-lfs install
9
  RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull
10
 
11
- # --- STEP 2: SET UP PYTHON ENVIRONMENT & PRE-WARM CACHE ---
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
- # Define cache path
16
  ENV HF_HOME=/app/cache/huggingface
17
  ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch
18
  RUN mkdir -p $HF_HOME && mkdir -p $SENTENCE_TRANSFORMERS_HOME
19
 
20
- # Pre-warm the cache with the e5-large model
21
  RUN python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder=os.environ.get('SENTENCE_TRANSFORMERS_HOME', '/app/cache/torch'))"
22
 
23
- # --- STEP 3: PREPARE RUN DIRECTORIES & COPY SCRIPT ---
24
 
25
- # *** THE FIX IS HERE ***
26
- # Create the extraction directory and give the default user (1000) ownership
27
- RUN mkdir -p /app/dados_extraidos && chown -R 1000:1000 /app/dados_extraidos && chown -R 1000:1000 /app/cache
 
 
28
 
29
- # Copy the training script
30
  COPY train.py .
31
 
32
- # --- STEP 4: RUN THE SCRIPT ---
33
- # Execute the script and keep the container alive afterward
34
  CMD ["sh", "-c", "python train.py && sleep infinity"]
 
1
+ # Usa uma imagem base mais recente e suportada
2
  FROM python:3.9-slim-bookworm
3
 
4
+ # Define o diretório de trabalho
5
  WORKDIR /app
6
 
7
+ # --- ETAPA 1: INSTALAR O GIT E CLONAR O SEU DATASET COMPLETO ---
8
  RUN apt-get update && apt-get install -y git git-lfs && git-lfs install
9
  RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull
10
 
11
+ # --- ETAPA 2: CONFIGURAR O AMBIENTE PYTHON E PRÉ-AQUECER O CACHE ---
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
+ # Define o caminho da cache
16
  ENV HF_HOME=/app/cache/huggingface
17
  ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch
18
  RUN mkdir -p $HF_HOME && mkdir -p $SENTENCE_TRANSFORMERS_HOME
19
 
20
+ # Pré-aquece o cache com o modelo e5-large
21
  RUN python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder=os.environ.get('SENTENCE_TRANSFORMERS_HOME', '/app/cache/torch'))"
22
 
23
+ # --- ETAPA 3: PREPARAR DIRETÓRIOS E COPIAR SCRIPT ---
24
 
25
+ # *** A CORREÇÃO ESTÁ AQUI ***
26
+ # Cria os diretórios necessários e permissão ao usuário padrão (1000)
27
+ RUN mkdir -p /app/dados_extraidos && \
28
+ mkdir -p /app/output && \
29
+ chown -R 1000:1000 /app/dados_extraidos /app/output /app/cache
30
 
31
+ # Copia o script de treinamento
32
  COPY train.py .
33
 
34
+ # --- ETAPA 4: EXECUTAR O SCRIPT ---
35
+ # Executa o script e, após o término, mantém o contêiner a correr.
36
  CMD ["sh", "-c", "python train.py && sleep infinity"]