FROM python:3.9-slim-bookworm WORKDIR /app # Install git + LFS for data cloning RUN apt-get update && apt-get install -y git git-lfs && git-lfs install && rm -rf /var/lib/apt/lists/* # Clone data files from the space repo RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull # Python deps COPY requirements.txt . RUN pip install --no-cache-dir --upgrade -r requirements.txt # Cache dir for models ENV HF_HOME=/app/cache/huggingface ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch RUN mkdir -p $HF_HOME $SENTENCE_TRANSFORMERS_HOME # Pre-download model RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder='/app/cache/torch')" # Create output dirs with proper permissions RUN mkdir -p /app/dados_extraidos /app/output && \ chown -R 1000:1000 /app/dados_extraidos /app/output /app/cache # Copy the actual app file (was train.py, now app.py) COPY app.py . ENV GRADIO_SERVER_NAME="0.0.0.0" ENV GRADIO_SERVER_PORT="7860" EXPOSE 7860 CMD ["python", "app.py"]