probejie's picture
Upload folder using huggingface_hub
237451b verified
#!/bin/bash
# Grid Engine options (lines prefixed with #$)
# Runtime limit of 1 hour:
#$ -l h_rt=48:00:00
#
# Set working directory to the directory where the job is submitted from:
#$ -cwd
#
# Request one GPU in the gpu queue:
#$ -q gpu
#$ -pe gpu-a100 4
#$ -P is_courses
#-wd /exports/eddie/scratch/s2325053/
# Request 4 GB system RAM
# the total system RAM available to the job is the value specified here multiplied by
# the number of requested GPUs (above)
#$ -l h_vmem=300G
# -l rl9=false
# Initialise the environment modules and load CUDA version 11.0.2
. /etc/profile.d/modules.sh
module load anaconda/2024.02
conda activate distill
# Environment Configuration
nvidia-smi
# export CUDA_VISIBLE_DEVICES=0,12,3
export HF_DATASETS_CACHE="/exports/eddie/scratch/s2325053/cache"
export HF_HOME="/exports/eddie/scratch/s2325053/cache"
# Path Configuration
MODEL_PATH="/exports/eddie/scratch/s2325053/model"
OUTPUT_BASE_PATH="/exports/eddie/scratch/s2325053/feedback_qa/ex_part4/judge_retrieval_necessary_check_eddie/data_output"
DATASET_PATH="/exports/eddie/scratch/s2325053/feedback_qa/datasets/merged_qa_with_facts.json"
# Define retrieval methods
declare -a RETRIEVAL_METHODS=("contrieve")
# Model Configuration
declare -A MODEL_FAMILIES
# MODEL_FAMILIES=(
# ["llama"]="Llama-3.1-8B-Instruct Llama-3.1-70B-Instruct Llama-3.2-1B-Instruct Llama-3.2-3B-Instruct"
# ["qwen"]="Qwen2.5-1.5B-Instruct Qwen2.5-3B-Instruct Qwen2.5-7B-Instruct Qwen2.5-14B-Instruct Qwen2.5-32B-Instruct Qwen2.5-72B-Instruct"
# ["mistral"]="Mistral-7B-Instruct-v0.3 Ministral-8B-Instruct-2410 Mixtral-8x7B-Instruct-v0.1"
# ["gemma"]="gemma-2-2b-it gemma-2-9b-it gemma-2-27b-it"
# ["phi"]="Phi-3-mini-4k-instruct Phi-3-medium-4k-instruct Phi-3-small-8k-instruct"
# )
MODEL_FAMILIES=(
["qwen"]="Qwen2.5-72B-Instruct"
# ["phi"]="Phi-3-small-8k-instruct"
# ["mistral"]="Mixtral-8x7B-Instruct-v0.1"
["llama"]="Llama-3.1-70B-Instruct"
)
# Function to determine batch size based on model
get_batch_size() {
local model_name=$1
local gpu_count=1 # Number of GPUs being used
# Base batch size calculation
if [[ $model_name == *"70B"* ]] || [[ $model_name == *"72B"* ]]; then
echo $((40000 * gpu_count)) # Smaller batch size for largest models
elif [[ $model_name == *"32B"* ]] || [[ $model_name == *"27B"* ]]; then
echo $((40000 * gpu_count)) # Medium batch size for large models
else
echo $((40000 * gpu_count)) # Larger batch size for smaller models
fi
}
# Evaluation function
evaluate_model() {
local model_type=$1
local model_name=$2
local retrieval_method=$3
# Get appropriate batch size
local batch_size=$(get_batch_size "$model_name")
# Create specific output directory for each retrieval method
local output_dir="${OUTPUT_BASE_PATH}/${retrieval_method}"
mkdir -p "$output_dir"
echo "Evaluating ${model_type} model: ${model_name} with ${retrieval_method} retrieval"
python -u -m predict_tool_contrieve_large \
--input_files "$DATASET_PATH" \
--data_output "${output_dir}/${model_name}.json" \
--model_name_or_path "${MODEL_PATH}/${model_name}" \
--use_vllm \
--max_seq_length 4096 \
--use_flash_attn \
--retrieval "$retrieval_method" \
--per_device_eval_batch_size "$batch_size" \
--model_type "$model_type"
}
# Main execution loop
for retrieval_method in "${RETRIEVAL_METHODS[@]}"; do
echo "Starting evaluations with ${retrieval_method} retrieval method"
for model_type in "${!MODEL_FAMILIES[@]}"; do
for model_name in ${MODEL_FAMILIES[$model_type]}; do
echo "Starting evaluation for ${model_type}/${model_name} with ${retrieval_method}"
if evaluate_model "$model_type" "$model_name" "$retrieval_method"; then
echo "Successfully evaluated ${model_name} with ${retrieval_method}"
else
echo "Failed to evaluate ${model_name} with ${retrieval_method}"
echo "${model_name} with ${retrieval_method} failed"
continue
fi
# Optional: Add a short delay between evaluations
sleep 10
done
done
done