Finish-him commited on
Commit
d55a481
·
verified ·
1 Parent(s): d54fcdc

fix: Dockerfile references app.py instead of missing train.py, add gradio

Browse files
Files changed (1) hide show
  1. Dockerfile +18 -20
Dockerfile CHANGED
@@ -1,36 +1,34 @@
1
- # Usa uma imagem base mais recente e suportada
2
  FROM python:3.9-slim-bookworm
3
 
4
- # Define o diretório de trabalho
5
  WORKDIR /app
6
 
7
- # --- ETAPA 1: INSTALAR O GIT E CLONAR O SEU DATASET COMPLETO ---
8
- RUN apt-get update && apt-get install -y git git-lfs && git-lfs install
 
 
9
  RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull
10
 
11
- # --- ETAPA 2: CONFIGURAR O AMBIENTE PYTHON E PRÉ-AQUECER O CACHE ---
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
- # Define o caminho da cache
16
  ENV HF_HOME=/app/cache/huggingface
17
  ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch
18
- RUN mkdir -p $HF_HOME && mkdir -p $SENTENCE_TRANSFORMERS_HOME
19
-
20
- # Pré-aquece o cache com o modelo e5-large
21
- RUN python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder=os.environ.get('SENTENCE_TRANSFORMERS_HOME', '/app/cache/torch'))"
22
 
23
- # --- ETAPA 3: PREPARAR DIRETÓRIOS E COPIAR SCRIPT ---
 
24
 
25
- # *** A CORREÇÃO ESTÁ AQUI ***
26
- # Cria os diretórios necessários e dá permissão ao usuário padrão (1000)
27
- RUN mkdir -p /app/dados_extraidos && \
28
- mkdir -p /app/output && \
29
  chown -R 1000:1000 /app/dados_extraidos /app/output /app/cache
30
 
31
- # Copia o script de treinamento
32
- COPY train.py .
 
 
 
 
33
 
34
- # --- ETAPA 4: EXECUTAR O SCRIPT ---
35
- # Executa o script e, após o término, mantém o contêiner a correr.
36
- CMD ["sh", "-c", "python train.py && sleep infinity"]
 
 
1
  FROM python:3.9-slim-bookworm
2
 
 
3
  WORKDIR /app
4
 
5
+ # Install git + LFS for data cloning
6
+ RUN apt-get update && apt-get install -y git git-lfs && git-lfs install && rm -rf /var/lib/apt/lists/*
7
+
8
+ # Clone data files from the space repo
9
  RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull
10
 
11
+ # Python deps
12
  COPY requirements.txt .
13
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
 
15
+ # Cache dir for models
16
  ENV HF_HOME=/app/cache/huggingface
17
  ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch
18
+ RUN mkdir -p $HF_HOME $SENTENCE_TRANSFORMERS_HOME
 
 
 
19
 
20
+ # Pre-download model
21
+ RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder='/app/cache/torch')"
22
 
23
+ # Create output dirs with proper permissions
24
+ RUN mkdir -p /app/dados_extraidos /app/output && \
 
 
25
  chown -R 1000:1000 /app/dados_extraidos /app/output /app/cache
26
 
27
+ # Copy the actual app file (was train.py, now app.py)
28
+ COPY app.py .
29
+
30
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
31
+ ENV GRADIO_SERVER_PORT="7860"
32
+ EXPOSE 7860
33
 
34
+ CMD ["python", "app.py"]