neural-mesh-v2 / evaluation /code_eval /scripts /run_evalplus.sh

Restore all essential files - code, configs, and MBPP/HumanEval data

24c2665 verified 4 months ago

2.14 kB

	#!/bin/bash

	# GPU 6번 사용 설정
	export CUDA_VISIBLE_DEVICES=6

	export HUMANEVAL_OVERRIDE_PATH=/home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/code_eval/data/HumanEvalPlus.jsonl
	export MBPP_OVERRIDE_PATH=/home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/code_eval/data/MbppPlus.jsonl

	# Set defaults if not specified - fix argument assignments
	DATASET=${1:-humaneval}
	MODEL=${2:-"andrewzh/Absolute_Zero_Reasoner-Coder-3b"}
	GREEDY=${3:-1}
	TEMP=${4:-0.8}
	TOP_P=${5:-0.9}
	N_SAMPLES=${6:-1}

	# If greedy mode, force n_samples to 1
	if [ "$GREEDY" -eq 1 ]; then
	N_SAMPLES=1
	fi

	echo "Dataset: $DATASET"
	echo "Model: $MODEL"
	echo "Greedy: $GREEDY (1=yes, 0=no)"
	echo "Temperature: $TEMP"
	echo "Top-P: $TOP_P"
	echo "Number of samples: $N_SAMPLES"

	# Extract model identifier for output file
	MODEL_BASE=$(basename "$MODEL")
	echo "Model base: $MODEL_BASE"

	# Execute command directly without quoting the arguments
	if [ "$GREEDY" -eq 1 ]; then
	evalplus.codegen --model "$MODEL" \
	--dataset $DATASET \
	--backend vllm \
	--trust_remote_code \
	--greedy
	TEMP_VAL="0.0"
	else
	evalplus.codegen --model "$MODEL" \
	--dataset $DATASET \
	--backend vllm \
	--temperature $TEMP \
	--top-p $TOP_P \
	--trust_remote_code \
	--n-samples $N_SAMPLES
	TEMP_VAL="$TEMP"
	fi

	# The actual output file - use a glob pattern to find the file
	echo "Waiting for output file to be generated..."
	sleep 2 # Give some time for the file to be created

	# Use find to locate the file with a more flexible pattern that matches actual filename format
	OUTPUT_FILE=$(find "evalplus_results/${DATASET}" -name "${MODEL_BASE}_vllm_temp_${TEMP_VAL}.jsonl" ! -name ".raw.jsonl" -type f \| head -n 1)

	# Run evaluation with found file
	evalplus.evaluate --dataset "$DATASET" \
	--samples "$OUTPUT_FILE" \
	--min-time-limit 10.0 \
	--gt-time-limit-factor 8.0

	echo "Evaluation complete. Results saved to evalplus_results/${DATASET}/${MODEL_BASE}_eval_results.json"