#!/bin/bash


# export https_proxy=http://192.168.102.101:7890
# export http_proxy=http://192.168.102.101:7890
# # 给 HuggingFace 用的镜像（完全不走代理）
# export HF_ENDPOINT=https://hf-mirror.com
# export no_proxy="hf-mirror.com"

###使用ds训练，需要修复 tf4.53的bug!
export WANDB_MODE=offline  ##不联网

export HF_HOME="./.cache"
export HF_DATASETS_CACHE="./.cache/datasets"
export TRANSFORMERS_CACHE="./.cache/transformers"
export TIMM_CACHE="./.cache/timm"

MODEL=LLaMA3_modify
MAX_LEN=8192
RUN_NAME=zigzag_mask0_a19_b64_1e-5
OUTPUT_DIR=experiments/$MODEL/$MAX_LEN/$RUN_NAME

mkdir -p $OUTPUT_DIR
cp train_8B.sh $OUTPUT_DIR/train.sh

#--master_port=29588 
export CUDA_VISIBLE_DEVICES="1"  #可能需要指定gpu

NCCL_P2P_DISABLE=0 NCCL_IB_DISABLE=0 WANDB_PROJECT="kvcache" torchrun  --nproc_per_node=1 --master_port=29591     train.py  \
        --t 2.0 \
        --alpha 19 \
        --ddp_find_unused_parameters=False \
        --model_name_or_path modify_llama3_zigzag \
        --output_dir $OUTPUT_DIR       \
        --cache_dir /inspire/hdd/project/heziweiproject/heziwei-25044/projects_lmlu/datasets \
        --model_max_length $MAX_LEN \
        --report_to wandb \
        --run_name $MODEL-$RUN_NAME-$MAX_LEN \
        --use_flash_attn True \
        --low_rank_training False \
        --num_train_epochs 1  \
        --save_strategy "steps"     \
        --save_steps 1000     \
        --save_total_limit 2     \
        --logging_steps 1     \
        --tf32 True \
        --max_steps 500 \
        \
        --per_device_train_batch_size 8 \
        --gradient_accumulation_steps 8 \
        --learning_rate 1e-5 \
        --weight_decay 0.1 \
        --adam_beta1 0.9 \
        --adam_beta2 0.95 \
        --bf16 True \
        \
        --deepspeed "ds_configs/stage2.json" \
| tee -a $OUTPUT_DIR/run.log