|
|
CHECKPOINTS_DIR=../llmcal2/outputs/checkpoints |
|
|
HF_TOKEN=$(cat hf_token.txt) |
|
|
model="llama3.2-1b-instruct" |
|
|
|
|
|
|
|
|
|
|
|
base_seed=2834 |
|
|
declare -A dataset2nseeds=( |
|
|
["sst2"]=9 |
|
|
["agnews"]=9 |
|
|
["dbpedia"]=5 |
|
|
["20newsgroups"]=5 |
|
|
["banking77"]=5 |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
declare -A model2checkpoint=( |
|
|
["llama3.2-1b"]="meta-llama/Llama-3.2-1B" |
|
|
["llama3.2-1b-instruct"]="meta-llama/Llama-3.2-1B-Instruct" |
|
|
["qwen2.5-7b"]="Qwen/Qwen2.5-7B" |
|
|
["qwen2.5-7b-instruct"]="Qwen/Qwen2.5-7B-Instruct" |
|
|
) |
|
|
mkdir -p $CHECKPOINTS_DIR |
|
|
if [ ! -d $CHECKPOINTS_DIR/${model2checkpoint[$model]} ]; then |
|
|
litgpt download ${model2checkpoint[$model]} --checkpoint_dir $CHECKPOINTS_DIR --access_token $HF_TOKEN |
|
|
rm -rf $CHECKPOINTS_DIR/${model2checkpoint[$model]}/*.bin |
|
|
fi |
|
|
if [ ! -z ${model2checkpoint[${model}-instruct]} ]; then |
|
|
if [ ! -d $CHECKPOINTS_DIR/${model2checkpoint[${model}-instruct]} ]; then |
|
|
litgpt download ${model2checkpoint[${model}-instruct]} --checkpoint_dir $CHECKPOINTS_DIR --access_token $HF_TOKEN |
|
|
rm -rf $CHECKPOINTS_DIR/${model2checkpoint[${model}-instruct]}/*.bin |
|
|
fi |
|
|
fi |
|
|
|
|
|
|
|
|
declare -a DATASETS=(20newsgroups dbpedia sst2 agnews banking77) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
declare -a FACTORS=(16 32 64 128 256) |
|
|
|
|
|
|
|
|
declare -A dataset2testsize=( |
|
|
["sst2"]=400 |
|
|
["agnews"]=400 |
|
|
["dbpedia"]=700 |
|
|
["20newsgroups"]=800 |
|
|
["banking77"]=1000 |
|
|
) |
|
|
|
|
|
max_seq_length=2048 |
|
|
inference_max_seq_len=20000 |
|
|
|
|
|
export CUDA_VISIBLE_DEVICES=1 |