llmcal

File size: 1,602 Bytes

8cd1f2e

CHECKPOINTS_DIR=../llmcal2/outputs/checkpoints
HF_TOKEN=$(cat hf_token.txt)
model="llama3.2-1b-instruct"
# model="qwen2.5-7b-instruct"

# Reproducibility
base_seed=2834
declare -A dataset2nseeds=(
    ["sst2"]=9
    ["agnews"]=9
    ["dbpedia"]=5
    ["20newsgroups"]=5
    ["banking77"]=5
)
# num_seeds=9

# Supported models
declare -A model2checkpoint=(
    ["llama3.2-1b"]="meta-llama/Llama-3.2-1B"
    ["llama3.2-1b-instruct"]="meta-llama/Llama-3.2-1B-Instruct"
    ["qwen2.5-7b"]="Qwen/Qwen2.5-7B"
    ["qwen2.5-7b-instruct"]="Qwen/Qwen2.5-7B-Instruct"
)
mkdir -p $CHECKPOINTS_DIR
if [ ! -d $CHECKPOINTS_DIR/${model2checkpoint[$model]} ]; then
    litgpt download ${model2checkpoint[$model]} --checkpoint_dir $CHECKPOINTS_DIR --access_token $HF_TOKEN
    rm -rf $CHECKPOINTS_DIR/${model2checkpoint[$model]}/*.bin
fi
if [ ! -z ${model2checkpoint[${model}-instruct]} ]; then
    if [ ! -d $CHECKPOINTS_DIR/${model2checkpoint[${model}-instruct]} ]; then
        litgpt download ${model2checkpoint[${model}-instruct]} --checkpoint_dir $CHECKPOINTS_DIR --access_token $HF_TOKEN
        rm -rf $CHECKPOINTS_DIR/${model2checkpoint[${model}-instruct]}/*.bin
    fi
fi

# Datasets
declare -a DATASETS=(20newsgroups dbpedia sst2 agnews banking77)
# declare -a DATASETS=(sst2 agnews)

# Train sizes
# declare -a FACTORS=(8 16 32 64 128 256)
declare -a FACTORS=(16 32 64 128 256)

# Test sizes
declare -A dataset2testsize=(
    ["sst2"]=400
    ["agnews"]=400
    ["dbpedia"]=700
    ["20newsgroups"]=800
    ["banking77"]=1000
)

max_seq_length=2048
inference_max_seq_len=20000

export CUDA_VISIBLE_DEVICES=1