Qwen_save3 / perplexities /run_experiments.sh
Yaning1001's picture
Add files using upload-large-folder tool
7c5df0b verified
#!/bin/bash
# Assuming necessary initializations like MODEL_NAME are not needed since commented out
# Define parameter options
# perturbations=(
# "hop_control" "hop_tokens4" "hop_words4"
# "reverse_control" "reverse_full" "reverse_partial"
# "shuffle_control" "shuffle_deterministic21" "shuffle_deterministic57"
# "shuffle_deterministic84" "shuffle_even_odd" "shuffle_local3"
# "shuffle_local5" "shuffle_local10" "shuffle_nondeterministic"
# )
perturbations=("shuffle_even_odd" "shuffle_deterministic84") # 1290 1934
# perturbations=("reverse_control" "reverse_partial") # 1382 2072
# perturbations=("hop_words4" "hop_control") # 1122 1682
babylm_datasets=("10M")
batch_sizes=(6) # Ensure batch sizes are valid and intentional
seeds=(0) # Using a fixed seed for reproducibility
# Generate checkpoint paths from 100 to 2700 in steps of 100
checkpoint_paths=()
# for i in {100..2700..100}; do
# checkpoint_paths+=("checkpoint-${i}")
# done
checkpoint_paths+=("checkpoint-1290")
# Review the generated checkpoints for correctness
echo "Checkpoints generated: ${checkpoint_paths[@]}"
# Loop over all combinations and execute the Python script
for perturbation in "${perturbations[@]}"; do
for dataset in "${babylm_datasets[@]}"; do
for batch_size in "${batch_sizes[@]}"; do
for seed in "${seeds[@]}"; do
for checkpoint in "${checkpoint_paths[@]}"; do
# Before calling the scriptd, check to avoid duplicate invocations
echo "Executing for $perturbation, dataset: $dataset, checkpoint: $checkpoint, batch size: $batch_size, seed: $seed"
# Command to run the Python script
CMD="CUDA_VISIBLE_DEVICES=0,1 python perplexities_qwen.py $perturbation $dataset $checkpoint $batch_size $seed"
echo "Running command: $CMD"
# Execute the command
eval $CMD
done
done
done
done
echo "Completed experiments for perturbation '$perturbation'"
done
echo "All experiments completed."