zhou777
/

agent-l

Model card Files Files and versions

Metrics Training metrics Community

agent-l / scripts /run_full_inference_b_queue.sh

zhou777's picture

Add files using upload-large-folder tool

f0c9bfd verified 18 days ago

History Blame Contribute Delete

2.77 kB

	#!/usr/bin/env bash
	set -euo pipefail

	PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
	cd "$PROJECT_ROOT"

	MAX_NEW_TOKENS="${MAX_NEW_TOKENS:-10000}"
	VLLM_MAX_MODEL_LEN="${VLLM_MAX_MODEL_LEN:-32768}"
	MAX_BATCH_SIZE="${MAX_BATCH_SIZE:-2}"
	FREE_MEMORY_THRESHOLD_MIB="${FREE_MEMORY_THRESHOLD_MIB:-1024}"
	POLL_INTERVAL_SECONDS="${POLL_INTERVAL_SECONDS:-60}"
	RUN_DATE="${RUN_DATE:-20260530}"

	B_ATTR_MODEL="runs/mid/qwen3_5_35b_a3b_exp_b_attr/v1-20260331-192847/checkpoint-145-merged"
	B_ATTR_DATASET="runs/swift_data/qwen3_5_35b_a3b_exp_b_attr_v2/full.jsonl"
	B_ATTR_OUTPUT="runs/full_inference/b_attr_tp2_nothink_10000_${RUN_DATE}"

	B_LITE_MODEL="runs/mid/qwen3_5_35b_a3b_exp_b_lite/v0-20260504-214725/checkpoint-147-merged"
	B_LITE_DATASET="runs/swift_data/qwen3_5_35b_a3b_exp_b_lite_v1/full.jsonl"
	B_LITE_OUTPUT="runs/full_inference/b_lite_tp2_nothink_10000_${RUN_DATE}"

	find_free_gpus() {
	local gpu_ids
	gpu_ids="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits 2>/dev/null \|
	awk -F ', *' -v threshold="$FREE_MEMORY_THRESHOLD_MIB" '$2 < threshold { print $1 }' \|
	head -n 2 \|
	paste -sd,)" \|\| return 1
	if [[ -n "$gpu_ids" && ! "$gpu_ids" =~ ^[0-9]+(,[0-9]+)?$ ]]; then
	return 1
	fi
	printf '%s\n' "$gpu_ids"
	}

	wait_for_two_free_gpus() {
	local gpu_ids
	while true; do
	if ! gpu_ids="$(find_free_gpus)"; then
	printf '[%s] Unable to query GPU status. Retrying.\n' "$(date '+%F %T')" >&2
	sleep "$POLL_INTERVAL_SECONDS"
	continue
	fi
	if [[ "$gpu_ids" == , ]]; then
	printf '%s\n' "$gpu_ids"
	return
	fi
	printf '[%s] Waiting for two free GPUs. Currently available: %s\n' \
	"$(date '+%F %T')" "${gpu_ids:-none}" >&2
	sleep "$POLL_INTERVAL_SECONDS"
	done
	}

	run_inference() {
	local gpu_ids="$1"
	local model_path="$2"
	local dataset_path="$3"
	local output_dir="$4"

	printf '[%s] Starting inference: output=%s GPUs=%s\n' \
	"$(date '+%F %T')" "$output_dir" "$gpu_ids"
	CUDA_VISIBLE_DEVICES="$gpu_ids" conda run -n lsy-agent python -m pipelines.run_validation_inference \
	--model-path "$model_path" \
	--val-dataset-path "$dataset_path" \
	--output-dir "$output_dir" \
	--infer-backend vllm \
	--tensor-parallel-size 2 \
	--max-batch-size "$MAX_BATCH_SIZE" \
	--max-new-tokens "$MAX_NEW_TOKENS" \
	--vllm-max-model-len "$VLLM_MAX_MODEL_LEN" \
	--template-type qwen3_nothinking
	}

	mkdir -p runs/full_inference runs/logs

	GPU_IDS="$(wait_for_two_free_gpus)"
	run_inference "$GPU_IDS" "$B_ATTR_MODEL" "$B_ATTR_DATASET" "$B_ATTR_OUTPUT"

	GPU_IDS="$(wait_for_two_free_gpus)"
	run_inference "$GPU_IDS" "$B_LITE_MODEL" "$B_LITE_DATASET" "$B_LITE_OUTPUT"

	printf '[%s] B full and B-lite inference completed.\n' "$(date '+%F %T')"