File size: 7,459 Bytes
24c2665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#!/bin/bash
set -x

# Force use only GPU 4,5,6
export CUDA_VISIBLE_DEVICES=4,5,6

# ๋กœ๊ทธ ์„ค์ • - /data/azr๋กœ ํ†ตํ•ฉ
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
LOG_DIR="/data/azr/logs"
LOG_FILE="$LOG_DIR/7b_gpu567_$TIMESTAMP.log"

# ๋กœ๊ทธ ๋””๋ ‰ํ† ๋ฆฌ ์ƒ์„ฑ
mkdir -p $LOG_DIR

# ๋กœ๊ทธ ์‹œ์ž‘
echo "๐Ÿš€ AZR 7B GPU567 Training Started at $(date)" | tee -a $LOG_FILE
echo "๐Ÿ“ Log file: $LOG_FILE" | tee -a $LOG_FILE
echo "๐ŸŽฏ GPUs: 4,5,6" | tee -a $LOG_FILE
echo "========================================" | tee -a $LOG_FILE

# ๋ชจ๋“  ์ถœ๋ ฅ์„ ๋กœ๊ทธํŒŒ์ผ๊ณผ ํ„ฐ๋ฏธ๋„์— ๋™์‹œ ์ถœ๋ ฅ
exec > >(tee -a $LOG_FILE) 2>&1

# FlashAttention ์„ค์ • (ํ•ด๊ฒฐ ๋ฐฉ์•ˆ ์ ์šฉ)
export VLLM_ATTENTION_BACKEND=FLASH_ATTN
export VLLM_USE_FLASH_ATTN_2=1
export VLLM_WORKER_MULTIPROC_METHOD=spawn
export TRANSFORMERS_CACHE_DTYPE=bfloat16
export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512

# FlashAttention ํ˜ธํ™˜์„ฑ์„ ์œ„ํ•œ ์ถ”๊ฐ€ ์„ค์ •
export TORCH_DTYPE=bfloat16
export VLLM_TORCH_DTYPE=bfloat16
export RAY_memory_monitor_refresh_ms=0
export RAY_LOGGING_LEVEL=DEBUG
export RAY_DEDUP_LOGS=0
export HYDRA_FULL_ERROR=1
export PYTHONPATH="${PYTHONPATH}:$(pwd)/verl"

# FlashAttention ํ˜ธํ™˜์„ฑ์„ ์œ„ํ•œ ์„ค์ •
export HF_HOME=/data/.cache/huggingface
unset TRANSFORMERS_CACHE

OUTPUT_SEED_PATH=${OUTPUT_SEED_PATH:-/data/azr/data/7b_seed_io.jsonl}
OUTPUT_ERROR_SEED_PATH=${OUTPUT_ERROR_SEED_PATH:-/data/azr/data/7b_error_seed_io.jsonl}
OUTPUT_CODE_F_SEED_PATH=${OUTPUT_CODE_F_SEED_PATH:-/data/azr/data/7b_code_f_seed_io.jsonl}

python -m absolute_zero_reasoner.main_azr_ppo \
    data.shuffle=True \
    actor_rollout_ref.ref.include_ref=False \
    algorithm.adv_estimator=reinforce_plus_plus \
    data.train_files=data/code_reason/test_answer.parquet \
    data.val_files=data/code_reason/test_answer.parquet \
    +trainer.checkpoint_dir=/data/azr/checkpoints/7b_gpu567 \
    +trainer.save_dir=/data/azr/models/7b_gpu567 \
    hydra.run.dir=/data/azr/outputs/7b_gpu567 \
    data.train_batch_size=24 \
    data.val_batch_size=492 \
    data.max_prompt_length=4096 \
    data.max_response_length=6144 \
    azr.data_selection_strategy.content_max_length=4000 \
    actor_rollout_ref.model.path=Qwen/Qwen2.5-7B \
    actor_rollout_ref.actor.optim.lr=1e-6 \
    actor_rollout_ref.model.use_remove_padding=True \
    +actor_rollout_ref.model.torch_dtype=bfloat16 \
    +actor_rollout_ref.model.fsdp_config.model_dtype=bf16 \
    +actor_rollout_ref.actor.fsdp_config.model_dtype=bf16 \
    +actor_rollout_ref.ref.fsdp_config.model_dtype=bf16 \
    actor_rollout_ref.actor.ppo_mini_batch_size=48 \
    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=8 \
    actor_rollout_ref.actor.use_kl_loss=False \
    actor_rollout_ref.actor.kl_loss_coef=0.0 \
    actor_rollout_ref.actor.kl_loss_type=low_var_kl \
    actor_rollout_ref.actor.ulysses_sequence_parallel_size=1 \
    actor_rollout_ref.model.enable_gradient_checkpointing=True \
    actor_rollout_ref.model.pretrained_tokenizer=True \
    actor_rollout_ref.actor.fsdp_config.param_offload=True \
    actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=32 \
    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=32 \
    actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
    actor_rollout_ref.rollout.name=vllm \
    actor_rollout_ref.rollout.max_num_batched_tokens=32768 \
    actor_rollout_ref.rollout.gpu_memory_utilization=0.7 \
    actor_rollout_ref.rollout.enforce_eager=True \
    actor_rollout_ref.rollout.free_cache_engine=True \
    actor_rollout_ref.rollout.enable_chunked_prefill=False \
    actor_rollout_ref.rollout.n=1 \
    actor_rollout_ref.rollout.temperature=1.0 \
    actor_rollout_ref.rollout.dtype=bfloat16 \
    actor_rollout_ref.ref.fsdp_config.param_offload=True \
    algorithm.kl_ctrl.kl_coef=0.0 \
    trainer.critic_warmup=0 \
    trainer.logger=['console','wandb'] \
    trainer.project_name='azr' \
    trainer.experiment_name='azr_7b_gpu567' \
    trainer.n_gpus_per_node=3 \
    trainer.nnodes=1 \
    trainer.save_freq=10 \
    trainer.remove_previous_ckpt_in_save=True \
    trainer.del_local_ckpt_after_load=True \
    trainer.test_freq=10 \
    +trainer.val_before_train=False \
    reward_fn.extraction_type=answer_conditional \
    reward_fn.math_metric=math_verify \
    trainer.log_val_generations=0 \
    azr.data_selection_strategy.update_iteration=1 \
    azr.seed_dataset=$OUTPUT_SEED_PATH \
    azr.output_seed_path=$OUTPUT_SEED_PATH \
    azr.error_seed_dataset=$OUTPUT_ERROR_SEED_PATH \
    azr.output_error_seed_path=$OUTPUT_ERROR_SEED_PATH \
    azr.code_f_seed_dataset=$OUTPUT_CODE_F_SEED_PATH \
    azr.output_code_f_seed_path=$OUTPUT_CODE_F_SEED_PATH \
    azr.pretrain_pred_steps=-1 \
    azr.executor=qwq \
    azr.ast_check=True \
    azr.reward.n_samples=4 \
    azr.problem_types=['code_i','code_o','code_f'] \
    azr.data_selection_strategy.banned_keywords_for_errors_and_exceptions=['raise'] \
    trainer.debug=False \
    azr.reward.generation_reward_config.complexity_reward.coef=0.0 \
    azr.reward.generation_reward_config.complexity_reward.max=0.0 \
    azr.reward.generation_reward_config.complexity_reward.enabled=False \
    azr.reward.generation_reward_config.mean_edit_distance_reward.coef=0.0 \
    azr.reward.generation_reward_config.mean_edit_distance_reward.max=0.0 \
    azr.reward.generation_reward_config.mean_edit_distance_reward.enabled=False \
    azr.reward.generation_reward_config.halstead_reward.coef=0.0 \
    azr.reward.generation_reward_config.halstead_reward.max=0.0 \
    azr.reward.generation_reward_config.halstead_reward.enabled=False \
    azr.reward.generation_reward_config.answer_diversity_reward.coef=0.0 \
    azr.reward.generation_reward_config.answer_diversity_reward.max=0.0 \
    azr.reward.generation_reward_config.answer_diversity_reward.enabled=False \
    azr.reward.generation_reward_config.answer_diversity_reward.hierarchical=False \
    azr.pred_data_mix_strategy=max_new \
    azr.data_selection_strategy.seed_batch_factor=4 \
    azr.data_selection_strategy.valid_program_filter=all \
    azr.data_selection_strategy.max_programs=16384 \
    azr.data_selection_strategy.batched_estimate=False \
    azr.reward.generation_reward_config.intrinsic_combine_method=sum \
    azr.gen_data_probabilities_strategy=uniform \
    trainer.resume_mode=auto \
    azr.data_selection_strategy.composite_start_step=-1 \
    azr.data_selection_strategy.composite_chance=0.0 \
    azr.reward.generation_reward_config.remove_comments=False \
    azr.reward.generation_reward_config.remove_after_return=False \
    azr.reward.generation_reward_config.use_original_code_as_ref=True \
    azr.reward.generation_reward_config.remove_print=False \
    azr.data_selection_strategy.composite_function_n_min=0 \
    azr.data_selection_strategy.composite_function_n_max=0 \
    azr.reward.code_f_reward_type=binary \
    trainer.wandb_run_id=null \
    trainer.total_epochs=30 \
    azr.save_generated_data=True \
    azr.save_data_path="/data/azr/generated/generated_programs_7b_gpu567" \
    azr.save_valid_data=True \
    azr.save_invalid_data=True \
    azr.save_frequency=10 \
    azr.save_final_datasets=True $@

# ์‹คํ–‰ ์™„๋ฃŒ ๋กœ๊ทธ
echo "========================================" | tee -a $LOG_FILE
echo "๐ŸŽ‰ AZR 7B GPU567 Training Completed at $(date)" | tee -a $LOG_FILE
echo "๐Ÿ“ Final log saved to: $LOG_FILE" | tee -a $LOG_FILE