#!/bin/bash # Grid Engine options (lines prefixed with #$) # Runtime limit of 1 hour: #$ -l h_rt=48:00:00 # # Set working directory to the directory where the job is submitted from: #$ -cwd # # Request one GPU in the gpu queue: #$ -q gpu #$ -pe gpu-a100 4 #$ -P is_courses #-wd /exports/eddie/scratch/s2325053/ # Request 4 GB system RAM # the total system RAM available to the job is the value specified here multiplied by # the number of requested GPUs (above) #$ -l h_vmem=300G # -l rl9=false # Initialise the environment modules and load CUDA version 11.0.2 . /etc/profile.d/modules.sh module load anaconda/2024.02 conda activate distill # Environment Configuration nvidia-smi # export CUDA_VISIBLE_DEVICES=0,12,3 export HF_DATASETS_CACHE="/exports/eddie/scratch/s2325053/cache" export HF_HOME="/exports/eddie/scratch/s2325053/cache" # Path Configuration MODEL_PATH="/exports/eddie/scratch/s2325053/model" OUTPUT_BASE_PATH="/exports/eddie/scratch/s2325053/feedback_qa/ex_part4/judge_retrieval_necessary_check_eddie/data_output" DATASET_PATH="/exports/eddie/scratch/s2325053/feedback_qa/datasets/merged_qa_with_facts.json" # Define retrieval methods declare -a RETRIEVAL_METHODS=("contrieve") # Model Configuration declare -A MODEL_FAMILIES # MODEL_FAMILIES=( # ["llama"]="Llama-3.1-8B-Instruct Llama-3.1-70B-Instruct Llama-3.2-1B-Instruct Llama-3.2-3B-Instruct" # ["qwen"]="Qwen2.5-1.5B-Instruct Qwen2.5-3B-Instruct Qwen2.5-7B-Instruct Qwen2.5-14B-Instruct Qwen2.5-32B-Instruct Qwen2.5-72B-Instruct" # ["mistral"]="Mistral-7B-Instruct-v0.3 Ministral-8B-Instruct-2410 Mixtral-8x7B-Instruct-v0.1" # ["gemma"]="gemma-2-2b-it gemma-2-9b-it gemma-2-27b-it" # ["phi"]="Phi-3-mini-4k-instruct Phi-3-medium-4k-instruct Phi-3-small-8k-instruct" # ) MODEL_FAMILIES=( ["qwen"]="Qwen2.5-72B-Instruct" # ["phi"]="Phi-3-small-8k-instruct" # ["mistral"]="Mixtral-8x7B-Instruct-v0.1" ["llama"]="Llama-3.1-70B-Instruct" ) # Function to determine batch size based on model get_batch_size() { local model_name=$1 local gpu_count=1 # Number of GPUs being used # Base batch size calculation if [[ $model_name == *"70B"* ]] || [[ $model_name == *"72B"* ]]; then echo $((40000 * gpu_count)) # Smaller batch size for largest models elif [[ $model_name == *"32B"* ]] || [[ $model_name == *"27B"* ]]; then echo $((40000 * gpu_count)) # Medium batch size for large models else echo $((40000 * gpu_count)) # Larger batch size for smaller models fi } # Evaluation function evaluate_model() { local model_type=$1 local model_name=$2 local retrieval_method=$3 # Get appropriate batch size local batch_size=$(get_batch_size "$model_name") # Create specific output directory for each retrieval method local output_dir="${OUTPUT_BASE_PATH}/${retrieval_method}" mkdir -p "$output_dir" echo "Evaluating ${model_type} model: ${model_name} with ${retrieval_method} retrieval" python -u -m predict_tool_contrieve_large \ --input_files "$DATASET_PATH" \ --data_output "${output_dir}/${model_name}.json" \ --model_name_or_path "${MODEL_PATH}/${model_name}" \ --use_vllm \ --max_seq_length 4096 \ --use_flash_attn \ --retrieval "$retrieval_method" \ --per_device_eval_batch_size "$batch_size" \ --model_type "$model_type" } # Main execution loop for retrieval_method in "${RETRIEVAL_METHODS[@]}"; do echo "Starting evaluations with ${retrieval_method} retrieval method" for model_type in "${!MODEL_FAMILIES[@]}"; do for model_name in ${MODEL_FAMILIES[$model_type]}; do echo "Starting evaluation for ${model_type}/${model_name} with ${retrieval_method}" if evaluate_model "$model_type" "$model_name" "$retrieval_method"; then echo "Successfully evaluated ${model_name} with ${retrieval_method}" else echo "Failed to evaluate ${model_name} with ${retrieval_method}" echo "${model_name} with ${retrieval_method} failed" continue fi # Optional: Add a short delay between evaluations sleep 10 done done done