| FROM python:3.9-slim-bookworm | |
| WORKDIR /app | |
| # Install git + LFS for data cloning | |
| RUN apt-get update && apt-get install -y git git-lfs && git-lfs install && rm -rf /var/lib/apt/lists/* | |
| # Clone data files from the space repo | |
| RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull | |
| # Python deps | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir --upgrade -r requirements.txt | |
| # Cache dir for models | |
| ENV HF_HOME=/app/cache/huggingface | |
| ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch | |
| RUN mkdir -p $HF_HOME $SENTENCE_TRANSFORMERS_HOME | |
| # Pre-download model | |
| RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder='/app/cache/torch')" | |
| # Create output dirs with proper permissions | |
| RUN mkdir -p /app/dados_extraidos /app/output && \ | |
| chown -R 1000:1000 /app/dados_extraidos /app/output /app/cache | |
| # Copy the actual app file (was train.py, now app.py) | |
| COPY app.py . | |
| ENV GRADIO_SERVER_NAME="0.0.0.0" | |
| ENV GRADIO_SERVER_PORT="7860" | |
| EXPOSE 7860 | |
| CMD ["python", "app.py"] | |