FROM huggingface/transformers-pytorch-gpu:latest

WORKDIR /app

# Instalar dependências do sistema
# python-is-python3 cria automaticamente o symlink python -> python3
RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    python3 \
    python3-pip \
    python-is-python3 \
    && rm -rf /var/lib/apt/lists/*

# Verificar que python está disponível (entrypoint do NVIDIA precisa)
RUN python --version && \
    python3 --version && \
    echo "✅ Python disponível: $(which python)"

# Instalar dependências Python
COPY requirements.txt .
RUN python3 -m pip install --no-cache-dir --upgrade pip && \
    python3 -m pip install --no-cache-dir -r requirements.txt

# Copiar scripts de treinamento
COPY train.py /app/train.py
COPY app.py /app/app.py

# Criar diretório de logs
RUN mkdir -p /app/logs

# Configurar variáveis de ambiente padrão (podem ser sobrescritas)
ENV MODEL_NAME=microsoft/Phi-3-mini-4k-instruct
ENV DATASET_REPO=beAnalytic/eda-training-dataset
ENV OUTPUT_REPO=beAnalytic/eda-llm-model
ENV OMP_NUM_THREADS=1
ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

# Executar treinamento
# Usar 'python' (que será o symlink para python3 criado acima)
# O entrypoint do NVIDIA espera 'python' estar disponível
CMD ["python", "/app/app.py"]