| export SAMA_CONFIG=./config/sama_cms_lla1b7.yaml | |
| export TOKENIZERS_PARALLELISM=true | |
| # CUDA Include (/cuda.h) | |
| CUDA_INCLUDE_PATH="/home/work/miniconda3/envs/allm/include" | |
| export CPATH=$CPATH:$CUDA_INCLUDE_PATH | |
| export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:$CUDA_INCLUDE_PATH | |
| # export WANDB_PROJECT="SAMA_CMS_Llama7BB" | |
| export WANDB_PROJECT="SAMA_CMS_Llama7B1" | |
| date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=1e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 0.5 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 0.7071 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 0.5 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 0.7071 | |
| # date +"%F %T" | |
| #### wrong model name | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=1e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=8e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=3e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 1 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=1e-3 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 1 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=8e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 1 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 1 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=3e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 1 | |
| # date +"%F %T" | |
| # more | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=1e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=2e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 1 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=1e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 1 | |
| # date +"%F %T" | |
| # scaling | |
| # check seeds | |
| # STEP=1 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end False --trainer_args.save_strategy '"no"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --trainer_args.max_steps 10 --trainer_args.eval_strategy '"no"' | |
| # date +"%F %T" | |
| # scaling | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 --sama_adapter.scaling 1.4142 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 --sama_adapter.scaling 0.7071 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 --sama_adapter.scaling 0.5 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=8e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 1.4142 | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=8e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 0.7071 | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=8e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 0.5 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 --sama_adapter.scaling 2 | |
| # date +"%F %T" | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=8e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| # --sama_adapter.scaling 2 | |
| # date +"%F %T" | |
| # bash scripts/cms_l1b7_merge_eval.sh | |
| #### | |
| # STEP=300 | |
| # accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| # --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| # --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| # --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| # --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| # --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| # --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| # --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| # --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 --sama_adapter.scaling 1.4142 | |
| # date +"%F %T" | |
| STEP=300 | |
| accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 --sama_adapter.scaling 1.4142 --seed 56 --run_text sd56 | |
| date +"%F %T" | |
| STEP=300 | |
| accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 --sama_adapter.scaling 1.4142 --seed 56 --run_text sd56 | |
| date +"%F %T" | |
| STEP=300 | |
| accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| --config_path $SAMA_CONFIG --trainer_args.learning_rate=8e-4 --trainer_args.output_dir "./Llama7B" \ | |
| --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| --sama_adapter.scaling 1.4142 --seed 56 --run_text sd56 | |
| date +"%F %T" | |
| STEP=300 | |
| accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 --sama_adapter.scaling 1.4142 --seed 57 --run_text sd57 | |
| date +"%F %T" | |
| STEP=300 | |
| accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| --config_path $SAMA_CONFIG --trainer_args.learning_rate=5e-4 --trainer_args.output_dir "./Llama7B" \ | |
| --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| --sama_adapter.col_L 16 --sama_adapter.row_R 16 \ | |
| --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| --sama_adapter.num_unique_blocks_L 16 --sama_adapter.num_unique_blocks_R 16 \ | |
| --sama_adapter.target_modules '["q_proj", "v_proj", "k_proj", "up_proj","down_proj"]' \ | |
| --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 --sama_adapter.scaling 1.4142 --seed 57 --run_text sd57 | |
| date +"%F %T" | |
| STEP=300 | |
| accelerate launch --dynamo_backend=inductor --dynamo_mode=max-autotune --main_process_port 41353 -m src.cms_main \ | |
| --config_path $SAMA_CONFIG --trainer_args.learning_rate=8e-4 --trainer_args.output_dir "./Llama7B" \ | |
| --trainer_args.load_best_model_at_end True --trainer_args.save_strategy '"steps"' \ | |
| --sama_adapter.col_L 4 --sama_adapter.row_R 4 \ | |
| --trainer_args.num_train_epochs 2 --trainer_args.report_to none \ | |
| --trainer_args.save_steps $STEP --trainer_args.eval_steps $STEP --trainer_args.logging_steps $STEP \ | |
| --sama_adapter.num_unique_blocks_L 4 --sama_adapter.num_unique_blocks_R 4 \ | |
| --sama_adapter.target_modules '["q_proj", "v_proj"]' \ | |
| --data.path ft_training_set/commonsense_147k.json --trainer_args.eval_delay 0 \ | |
| --sama_adapter.scaling 1.4142 --seed 57 --run_text sd57 | |
| date +"%F %T" | |
| bash scripts/cms_l1b7_merge_eval.sh |