Spaces:

Finish-him
/

prometheus-embedding-generator

Sleeping

fix: Dockerfile references app.py instead of missing train.py, add gradio

d55a481 verified 3 days ago

1.12 kB

	FROM python:3.9-slim-bookworm

	WORKDIR /app

	# Install git + LFS for data cloning
	RUN apt-get update && apt-get install -y git git-lfs && git-lfs install && rm -rf /var/lib/apt/lists/*

	# Clone data files from the space repo
	RUN git clone https://huggingface.co/spaces/Finish-him/prometheus-embedding-generator ./dados && cd dados && git lfs pull

	# Python deps
	COPY requirements.txt .
	RUN pip install --no-cache-dir --upgrade -r requirements.txt

	# Cache dir for models
	ENV HF_HOME=/app/cache/huggingface
	ENV SENTENCE_TRANSFORMERS_HOME=/app/cache/torch
	RUN mkdir -p $HF_HOME $SENTENCE_TRANSFORMERS_HOME

	# Pre-download model
	RUN python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('intfloat/multilingual-e5-large', cache_folder='/app/cache/torch')"

	# Create output dirs with proper permissions
	RUN mkdir -p /app/dados_extraidos /app/output && \
	chown -R 1000:1000 /app/dados_extraidos /app/output /app/cache

	# Copy the actual app file (was train.py, now app.py)
	COPY app.py .

	ENV GRADIO_SERVER_NAME="0.0.0.0"
	ENV GRADIO_SERVER_PORT="7860"
	EXPOSE 7860

	CMD ["python", "app.py"]