|
|
#!/bin/bash |
|
|
set -x |
|
|
|
|
|
|
|
|
GPU_ID=${GPU_ID:-3} |
|
|
export CUDA_VISIBLE_DEVICES=$GPU_ID |
|
|
|
|
|
|
|
|
export VLLM_ATTENTION_BACKEND=FLASH_ATTN |
|
|
export RAY_memory_monitor_refresh_ms=0 |
|
|
export RAY_LOGGING_LEVEL=DEBUG |
|
|
export HYDRA_FULL_ERROR=1 |
|
|
export PYTHONPATH="${PYTHONPATH}:/home/ubuntu/RLVR/verl:/home/ubuntu/RLVR/TestTime-RLVR-v2" |
|
|
|
|
|
|
|
|
LOG_DIR="/home/ubuntu/RLVR/TestTime-RLVR-v2/logs" |
|
|
mkdir -p "$LOG_DIR" |
|
|
|
|
|
|
|
|
TIMESTAMP=$(date +"%Y%m%d_%H%M%S") |
|
|
LOG_FILE="$LOG_DIR/ttrlvr_azr_training_${TIMESTAMP}.log" |
|
|
|
|
|
echo "๐ Logging to: $LOG_FILE" |
|
|
|
|
|
|
|
|
TTRLVR_DATA_PATH="/home/ubuntu/RLVR/TestTime-RLVR-v2/tmp/batch_results/ttrlvr_azr_20250730_155352/mbpp/Mbpp_7/round_1/azr_training_data" |
|
|
|
|
|
|
|
|
python -m absolute_zero_reasoner.main_azr_ppo \ |
|
|
--config-path=/home/ubuntu/RLVR/TestTime-RLVR-v2/test/configs \ |
|
|
--config-name=ttrlvr_azr_ppo \ |
|
|
data.train_files=$TTRLVR_DATA_PATH \ |
|
|
data.val_files=$TTRLVR_DATA_PATH \ |
|
|
trainer.experiment_name="ttrlvr_azr_gpu${GPU_ID}" 2>&1 | tee "$LOG_FILE" |
|
|
|
|
|
|
|
|
EXITCODE=${PIPESTATUS[0]} |
|
|
|
|
|
|
|
|
echo "" >> "$LOG_FILE" |
|
|
echo "========================================" >> "$LOG_FILE" |
|
|
echo "Execution finished at: $(date)" >> "$LOG_FILE" |
|
|
echo "Exit code: $EXITCODE" >> "$LOG_FILE" |
|
|
|
|
|
|
|
|
echo "" |
|
|
echo "โ
Training completed!" |
|
|
echo "๐ Full log saved to: $LOG_FILE" |
|
|
echo "๐ Exit code: $EXITCODE" |
|
|
|
|
|
exit $EXITCODE |