File size: 7,459 Bytes
24c2665 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
#!/bin/bash
set -x
# Force use only GPU 4,5,6
export CUDA_VISIBLE_DEVICES=4,5,6
# ๋ก๊ทธ ์ค์ - /data/azr๋ก ํตํฉ
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOG_DIR="/data/azr/logs"
LOG_FILE="$LOG_DIR/7b_gpu567_$TIMESTAMP.log"
# ๋ก๊ทธ ๋๋ ํ ๋ฆฌ ์์ฑ
mkdir -p $LOG_DIR
# ๋ก๊ทธ ์์
echo "๐ AZR 7B GPU567 Training Started at $(date)" | tee -a $LOG_FILE
echo "๐ Log file: $LOG_FILE" | tee -a $LOG_FILE
echo "๐ฏ GPUs: 4,5,6" | tee -a $LOG_FILE
echo "========================================" | tee -a $LOG_FILE
# ๋ชจ๋ ์ถ๋ ฅ์ ๋ก๊ทธํ์ผ๊ณผ ํฐ๋ฏธ๋์ ๋์ ์ถ๋ ฅ
exec > >(tee -a $LOG_FILE) 2>&1
# FlashAttention ์ค์ (ํด๊ฒฐ ๋ฐฉ์ ์ ์ฉ)
export VLLM_ATTENTION_BACKEND=FLASH_ATTN
export VLLM_USE_FLASH_ATTN_2=1
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export TRANSFORMERS_CACHE_DTYPE=bfloat16
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
# FlashAttention ํธํ์ฑ์ ์ํ ์ถ๊ฐ ์ค์
export TORCH_DTYPE=bfloat16
export VLLM_TORCH_DTYPE=bfloat16
export RAY_memory_monitor_refresh_ms=0
export RAY_LOGGING_LEVEL=DEBUG
export RAY_DEDUP_LOGS=0
export HYDRA_FULL_ERROR=1
export PYTHONPATH="${PYTHONPATH}:$(pwd)/verl"
# FlashAttention ํธํ์ฑ์ ์ํ ์ค์
export HF_HOME=/data/.cache/huggingface
unset TRANSFORMERS_CACHE
OUTPUT_SEED_PATH=${OUTPUT_SEED_PATH:-/data/azr/data/7b_seed_io.jsonl}
OUTPUT_ERROR_SEED_PATH=${OUTPUT_ERROR_SEED_PATH:-/data/azr/data/7b_error_seed_io.jsonl}
OUTPUT_CODE_F_SEED_PATH=${OUTPUT_CODE_F_SEED_PATH:-/data/azr/data/7b_code_f_seed_io.jsonl}
python -m absolute_zero_reasoner.main_azr_ppo \
data.shuffle=True \
actor_rollout_ref.ref.include_ref=False \
algorithm.adv_estimator=reinforce_plus_plus \
data.train_files=data/code_reason/test_answer.parquet \
data.val_files=data/code_reason/test_answer.parquet \
+trainer.checkpoint_dir=/data/azr/checkpoints/7b_gpu567 \
+trainer.save_dir=/data/azr/models/7b_gpu567 \
hydra.run.dir=/data/azr/outputs/7b_gpu567 \
data.train_batch_size=24 \
data.val_batch_size=492 \
data.max_prompt_length=4096 \
data.max_response_length=6144 \
azr.data_selection_strategy.content_max_length=4000 \
actor_rollout_ref.model.path=Qwen/Qwen2.5-7B \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.model.use_remove_padding=True \
+actor_rollout_ref.model.torch_dtype=bfloat16 \
+actor_rollout_ref.model.fsdp_config.model_dtype=bf16 \
+actor_rollout_ref.actor.fsdp_config.model_dtype=bf16 \
+actor_rollout_ref.ref.fsdp_config.model_dtype=bf16 \
actor_rollout_ref.actor.ppo_mini_batch_size=48 \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \
actor_rollout_ref.actor.use_kl_loss=False \
actor_rollout_ref.actor.kl_loss_coef=0.0 \
actor_rollout_ref.actor.kl_loss_type=low_var_kl \
actor_rollout_ref.actor.ulysses_sequence_parallel_size=1 \
actor_rollout_ref.model.enable_gradient_checkpointing=True \
actor_rollout_ref.model.pretrained_tokenizer=True \
actor_rollout_ref.actor.fsdp_config.param_offload=True \
actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
actor_rollout_ref.rollout.name=vllm \
actor_rollout_ref.rollout.max_num_batched_tokens=32768 \
actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \
actor_rollout_ref.rollout.enforce_eager=True \
actor_rollout_ref.rollout.free_cache_engine=True \
actor_rollout_ref.rollout.enable_chunked_prefill=False \
actor_rollout_ref.rollout.n=1 \
actor_rollout_ref.rollout.temperature=1.0 \
actor_rollout_ref.rollout.dtype=bfloat16 \
actor_rollout_ref.ref.fsdp_config.param_offload=True \
algorithm.kl_ctrl.kl_coef=0.0 \
trainer.critic_warmup=0 \
trainer.logger=['console','wandb'] \
trainer.project_name='azr' \
trainer.experiment_name='azr_7b_gpu567' \
trainer.n_gpus_per_node=3 \
trainer.nnodes=1 \
trainer.save_freq=10 \
trainer.remove_previous_ckpt_in_save=True \
trainer.del_local_ckpt_after_load=True \
trainer.test_freq=10 \
+trainer.val_before_train=False \
reward_fn.extraction_type=answer_conditional \
reward_fn.math_metric=math_verify \
trainer.log_val_generations=0 \
azr.data_selection_strategy.update_iteration=1 \
azr.seed_dataset=$OUTPUT_SEED_PATH \
azr.output_seed_path=$OUTPUT_SEED_PATH \
azr.error_seed_dataset=$OUTPUT_ERROR_SEED_PATH \
azr.output_error_seed_path=$OUTPUT_ERROR_SEED_PATH \
azr.code_f_seed_dataset=$OUTPUT_CODE_F_SEED_PATH \
azr.output_code_f_seed_path=$OUTPUT_CODE_F_SEED_PATH \
azr.pretrain_pred_steps=-1 \
azr.executor=qwq \
azr.ast_check=True \
azr.reward.n_samples=4 \
azr.problem_types=['code_i','code_o','code_f'] \
azr.data_selection_strategy.banned_keywords_for_errors_and_exceptions=['raise'] \
trainer.debug=False \
azr.reward.generation_reward_config.complexity_reward.coef=0.0 \
azr.reward.generation_reward_config.complexity_reward.max=0.0 \
azr.reward.generation_reward_config.complexity_reward.enabled=False \
azr.reward.generation_reward_config.mean_edit_distance_reward.coef=0.0 \
azr.reward.generation_reward_config.mean_edit_distance_reward.max=0.0 \
azr.reward.generation_reward_config.mean_edit_distance_reward.enabled=False \
azr.reward.generation_reward_config.halstead_reward.coef=0.0 \
azr.reward.generation_reward_config.halstead_reward.max=0.0 \
azr.reward.generation_reward_config.halstead_reward.enabled=False \
azr.reward.generation_reward_config.answer_diversity_reward.coef=0.0 \
azr.reward.generation_reward_config.answer_diversity_reward.max=0.0 \
azr.reward.generation_reward_config.answer_diversity_reward.enabled=False \
azr.reward.generation_reward_config.answer_diversity_reward.hierarchical=False \
azr.pred_data_mix_strategy=max_new \
azr.data_selection_strategy.seed_batch_factor=4 \
azr.data_selection_strategy.valid_program_filter=all \
azr.data_selection_strategy.max_programs=16384 \
azr.data_selection_strategy.batched_estimate=False \
azr.reward.generation_reward_config.intrinsic_combine_method=sum \
azr.gen_data_probabilities_strategy=uniform \
trainer.resume_mode=auto \
azr.data_selection_strategy.composite_start_step=-1 \
azr.data_selection_strategy.composite_chance=0.0 \
azr.reward.generation_reward_config.remove_comments=False \
azr.reward.generation_reward_config.remove_after_return=False \
azr.reward.generation_reward_config.use_original_code_as_ref=True \
azr.reward.generation_reward_config.remove_print=False \
azr.data_selection_strategy.composite_function_n_min=0 \
azr.data_selection_strategy.composite_function_n_max=0 \
azr.reward.code_f_reward_type=binary \
trainer.wandb_run_id=null \
trainer.total_epochs=30 \
azr.save_generated_data=True \
azr.save_data_path="/data/azr/generated/generated_programs_7b_gpu567" \
azr.save_valid_data=True \
azr.save_invalid_data=True \
azr.save_frequency=10 \
azr.save_final_datasets=True $@
# ์คํ ์๋ฃ ๋ก๊ทธ
echo "========================================" | tee -a $LOG_FILE
echo "๐ AZR 7B GPU567 Training Completed at $(date)" | tee -a $LOG_FILE
echo "๐ Final log saved to: $LOG_FILE" | tee -a $LOG_FILE |