| FROM huggingface/transformers-pytorch-gpu:latest |
|
|
| WORKDIR /app |
|
|
| |
| |
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| git \ |
| python3 \ |
| python3-pip \ |
| python-is-python3 \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| |
| RUN python --version && \ |
| python3 --version && \ |
| echo "✅ Python disponível: $(which python)" |
|
|
| |
| COPY requirements.txt . |
| RUN python3 -c "import torch; open('/tmp/constraints.txt','w').write('torch==' + torch.__version__.split('+')[0] + '\n')" && \ |
| python3 -m pip install --no-cache-dir --upgrade pip && \ |
| python3 -m pip install --no-cache-dir -r requirements.txt -c /tmp/constraints.txt && \ |
| python3 -m pip install --no-cache-dir --upgrade torchvision && \ |
| (python3 -m pip install --no-cache-dir nvidia-cuda-nvjitlink-cu12 || true) && \ |
| python3 -m pip uninstall -y bitsandbytes 2>/dev/null || true |
|
|
| |
| ENV LD_LIBRARY_PATH=/usr/local/lib/python3.10/dist-packages/nvidia/cuda_nvjitlink/lib:/usr/local/lib/python3.10/dist-packages/nvidia/cuda_nvjitlink/lib/x86_64-linux-gnu:/usr/local/lib/python3.11/dist-packages/nvidia/cuda_nvjitlink/lib:/usr/local/lib/python3.11/dist-packages/nvidia/cuda_nvjitlink/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH} |
|
|
| |
| COPY training_env.py /app/training_env.py |
| COPY reporting.py /app/reporting.py |
| COPY space_lock.py /app/space_lock.py |
| COPY train.py /app/train.py |
| COPY app.py /app/app.py |
| COPY benchmarks/gold_sample.json /app/benchmarks/gold_sample.json |
|
|
| |
| RUN mkdir -p /app/logs |
|
|
| |
| ENV MODEL_NAME=Qwen/Qwen2.5-1.5B-Instruct |
| ENV DATASET_REPO=beAnalytic/eda-training-dataset |
| ENV OUTPUT_REPO=beAnalytic/eda-llm-qwen2.5-lora |
| ENV OMP_NUM_THREADS=1 |
| ENV PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True |
| |
| ENV MAX_SEQ_LENGTH=256 |
| ENV PER_DEVICE_TRAIN_BATCH_SIZE=1 |
| ENV PER_DEVICE_EVAL_BATCH_SIZE=1 |
| ENV GRADIENT_ACCUMULATION_STEPS=8 |
|
|
| |
| |
| |
| CMD ["python", "/app/app.py"] |
|
|
|
|