File size: 1,119 Bytes
59e123e
102926c
dca8957
 
d55a481
 
 
 
92ff09a
b5c2272
d55a481
b5c2272
dca8957
 
d55a481
b5c2272
 
d55a481
9edd46d
d55a481
 
4662e37
d55a481
 
4662e37
 
d55a481
 
 
 
 
 
9edd46d
d55a481
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
FROM python:3.9-slim-bookworm

WORKDIR /app

# Install git + LFS for data cloning
RUN apt-get update && apt-get install -y git git-lfs && git-lfs install && rm -rf /var/lib/apt/lists/*

# Clone data files from the space repo
RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull

# Python deps
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade -r requirements.txt

# Cache dir for models
ENV HF_HOME=/app/cache/huggingface
ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch
RUN mkdir -p $HF_HOME $SENTENCE_TRANSFORMERS_HOME

# Pre-download model
RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder='/app/cache/torch')"

# Create output dirs with proper permissions
RUN mkdir -p /app/dados_extraidos /app/output && \
    chown -R 1000:1000 /app/dados_extraidos /app/output /app/cache

# Copy the actual app file (was train.py, now app.py)
COPY app.py .

ENV GRADIO_SERVER_NAME="0.0.0.0"
ENV GRADIO_SERVER_PORT="7860"
EXPOSE 7860

CMD ["python", "app.py"]