Spaces:

Rayugacodes
/

KernelX

Sleeping

Rayugacodes commited on Apr 25

Commit

7f52093

verified ·

1 Parent(s): 278a0ec

Skip GRPO: merge warm-start and push to HF

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -10,8 +10,6 @@ ENV XDG_CACHE_HOME=/tmp/cache
 ENV HOME=/tmp/home
 ENV USER=user
 ENV PYTHONUNBUFFERED=1
-ENV NVIDIA_VISIBLE_DEVICES=all
-ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 RUN mkdir -p /tmp/hf_cache /tmp/torch_cache /tmp/cache /tmp/home && \
     chmod -R 777 /tmp/hf_cache /tmp/torch_cache /tmp/cache /tmp/home
@@ -27,13 +25,8 @@ RUN pip install --no-cache-dir \
     "accelerate>=0.34,<0.36" \
     huggingface_hub
-# Verify all imports work at build time
-RUN python3 -c "from trl import SFTTrainer, SFTConfig, GRPOConfig, GRPOTrainer; print('TRL OK')"
-RUN python3 -c "from peft import LoraConfig; print('PEFT OK')"
-RUN python3 -c "import torch; print('CUDA:', torch.cuda.is_available())"
 COPY train_on_hf.py .
 RUN chmod -R 777 /app
 EXPOSE 7860
-CMD ["sh", "-c", "python3 train_on_hf.py --hf-token $HF_TOKEN --skip-world-model"]

 ENV HOME=/tmp/home
 ENV USER=user
 ENV PYTHONUNBUFFERED=1
 RUN mkdir -p /tmp/hf_cache /tmp/torch_cache /tmp/cache /tmp/home && \
     chmod -R 777 /tmp/hf_cache /tmp/torch_cache /tmp/cache /tmp/home
     "accelerate>=0.34,<0.36" \
     huggingface_hub
 COPY train_on_hf.py .
 RUN chmod -R 777 /app
 EXPOSE 7860
+CMD ["sh", "-c", "python3 train_on_hf.py --hf-token $HF_TOKEN --skip-world-model --skip-strategist"]