probejie
/

feedback

Model card Files Files and versions

feedback / judge_retrieval_necessary_check /eval_tool_bm25.sh

probejie's picture

Upload folder using huggingface_hub

237451b verified over 1 year ago

history blame contribute delete

3.89 kB

	#!/bin/bash

	# Grid Engine options (lines prefixed with #$)
	# Runtime limit of 1 hour:
	#$ -l h_rt=48:00:00
	#
	# Set working directory to the directory where the job is submitted from:
	#$ -cwd
	#
	# Request one GPU in the gpu queue:
	#$ -q gpu
	#$ -pe gpu-a100 4
	#$ -P is_courses
	#-wd /exports/eddie/scratch/s2325053/
	# Request 4 GB system RAM
	# the total system RAM available to the job is the value specified here multiplied by
	# the number of requested GPUs (above)
	#$ -l h_vmem=300G
	# -l rl9=false
	# Initialise the environment modules and load CUDA version 11.0.2
	. /etc/profile.d/modules.sh
	module load anaconda/2024.02
	conda activate distill

	# Environment Configuration
	nvidia-smi
	# export CUDA_VISIBLE_DEVICES=0,1
	export HF_DATASETS_CACHE="/exports/eddie/scratch/s2325053/cache"
	export HF_HOME="/exports/eddie/scratch/s2325053/cache"

	# Path Configuration
	MODEL_PATH="/exports/eddie/scratch/s2325053/model"
	OUTPUT_BASE_PATH="/exports/eddie/scratch/s2325053/feedback_qa/ex_part4/judge_retrieval_necessary_check_eddie/data_output"
	DATASET_PATH="/exports/eddie/scratch/s2325053/feedback_qa/datasets/merged_qa_with_facts.json"

	# Define retrieval methods
	declare -a RETRIEVAL_METHODS=("bm25")

	# Model Configuration
	declare -A MODEL_FAMILIES
	MODEL_FAMILIES=(
	["llama"]="Llama-3.1-70B-Instruct"
	["qwen"]="Qwen2.5-72B-Instruct"
	["mistral"]="Mixtral-8x7B-Instruct-v0.1"
	# ["gemma"]="gemma-2-2b-it gemma-2-9b-it gemma-2-27b-it"
	# ["phi"]="Phi-3-small-8k-instruct"
	)
	# MODEL_FAMILIES=(
	# ["mistral"]="Ministral-8B-Instruct-2410"
	# )

	# Function to determine batch size based on model
	get_batch_size() {
	local model_name=$1
	local gpu_count=1 # Number of GPUs being used

	# Base batch size calculation
	if [[ $model_name == "70B" ]] \|\| [[ $model_name == "72B" ]]; then
	echo $((40000 * gpu_count)) # Smaller batch size for largest models
	elif [[ $model_name == "32B" ]] \|\| [[ $model_name == "27B" ]]; then
	echo $((40000 * gpu_count)) # Medium batch size for large models
	else
	echo $((40000 * gpu_count)) # Larger batch size for smaller models
	fi
	}

	# Evaluation function
	evaluate_model() {
	local model_type=$1
	local model_name=$2
	local retrieval_method=$3

	# Get appropriate batch size
	local batch_size=$(get_batch_size "$model_name")

	# Create specific output directory for each retrieval method
	local output_dir="${OUTPUT_BASE_PATH}/${retrieval_method}"
	mkdir -p "$output_dir"

	echo "Evaluating ${model_type} model: ${model_name} with ${retrieval_method} retrieval"

	python -u -m predict_tool_bm25 \
	--input_files "$DATASET_PATH" \
	--data_output "${output_dir}/${model_name}.json" \
	--model_name_or_path "${MODEL_PATH}/${model_name}" \
	--use_vllm \
	--max_seq_length 4096 \
	--use_flash_attn \
	--retrieval "$retrieval_method" \
	--per_device_eval_batch_size "$batch_size" \
	--model_type "$model_type"
	}

	# Main execution loop
	for retrieval_method in "${RETRIEVAL_METHODS[@]}"; do
	echo "Starting evaluations with ${retrieval_method} retrieval method"

	for model_type in "${!MODEL_FAMILIES[@]}"; do
	for model_name in ${MODEL_FAMILIES[$model_type]}; do
	echo "Starting evaluation for ${model_type}/${model_name} with ${retrieval_method}"

	if evaluate_model "$model_type" "$model_name" "$retrieval_method"; then
	echo "Successfully evaluated ${model_name} with ${retrieval_method}"
	else
	echo "Failed to evaluate ${model_name} with ${retrieval_method}"
	echo "${model_name} with ${retrieval_method} failed"
	continue
	fi

	# Optional: Add a short delay between evaluations
	sleep 10
	done
	done
	done