agent-l / scripts /run_full_inference_b_queue.sh
zhou777's picture
Add files using upload-large-folder tool
f0c9bfd verified
Raw
History Blame Contribute Delete
2.77 kB
#!/usr/bin/env bash
set -euo pipefail
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$PROJECT_ROOT"
MAX_NEW_TOKENS="${MAX_NEW_TOKENS:-10000}"
VLLM_MAX_MODEL_LEN="${VLLM_MAX_MODEL_LEN:-32768}"
MAX_BATCH_SIZE="${MAX_BATCH_SIZE:-2}"
FREE_MEMORY_THRESHOLD_MIB="${FREE_MEMORY_THRESHOLD_MIB:-1024}"
POLL_INTERVAL_SECONDS="${POLL_INTERVAL_SECONDS:-60}"
RUN_DATE="${RUN_DATE:-20260530}"
B_ATTR_MODEL="runs/mid/qwen3_5_35b_a3b_exp_b_attr/v1-20260331-192847/checkpoint-145-merged"
B_ATTR_DATASET="runs/swift_data/qwen3_5_35b_a3b_exp_b_attr_v2/full.jsonl"
B_ATTR_OUTPUT="runs/full_inference/b_attr_tp2_nothink_10000_${RUN_DATE}"
B_LITE_MODEL="runs/mid/qwen3_5_35b_a3b_exp_b_lite/v0-20260504-214725/checkpoint-147-merged"
B_LITE_DATASET="runs/swift_data/qwen3_5_35b_a3b_exp_b_lite_v1/full.jsonl"
B_LITE_OUTPUT="runs/full_inference/b_lite_tp2_nothink_10000_${RUN_DATE}"
find_free_gpus() {
local gpu_ids
gpu_ids="$(nvidia-smi --query-gpu=index,memory.used --format=csv,noheader,nounits 2>/dev/null |
awk -F ', *' -v threshold="$FREE_MEMORY_THRESHOLD_MIB" '$2 < threshold { print $1 }' |
head -n 2 |
paste -sd,)" || return 1
if [[ -n "$gpu_ids" && ! "$gpu_ids" =~ ^[0-9]+(,[0-9]+)?$ ]]; then
return 1
fi
printf '%s\n' "$gpu_ids"
}
wait_for_two_free_gpus() {
local gpu_ids
while true; do
if ! gpu_ids="$(find_free_gpus)"; then
printf '[%s] Unable to query GPU status. Retrying.\n' "$(date '+%F %T')" >&2
sleep "$POLL_INTERVAL_SECONDS"
continue
fi
if [[ "$gpu_ids" == *,* ]]; then
printf '%s\n' "$gpu_ids"
return
fi
printf '[%s] Waiting for two free GPUs. Currently available: %s\n' \
"$(date '+%F %T')" "${gpu_ids:-none}" >&2
sleep "$POLL_INTERVAL_SECONDS"
done
}
run_inference() {
local gpu_ids="$1"
local model_path="$2"
local dataset_path="$3"
local output_dir="$4"
printf '[%s] Starting inference: output=%s GPUs=%s\n' \
"$(date '+%F %T')" "$output_dir" "$gpu_ids"
CUDA_VISIBLE_DEVICES="$gpu_ids" conda run -n lsy-agent python -m pipelines.run_validation_inference \
--model-path "$model_path" \
--val-dataset-path "$dataset_path" \
--output-dir "$output_dir" \
--infer-backend vllm \
--tensor-parallel-size 2 \
--max-batch-size "$MAX_BATCH_SIZE" \
--max-new-tokens "$MAX_NEW_TOKENS" \
--vllm-max-model-len "$VLLM_MAX_MODEL_LEN" \
--template-type qwen3_nothinking
}
mkdir -p runs/full_inference runs/logs
GPU_IDS="$(wait_for_two_free_gpus)"
run_inference "$GPU_IDS" "$B_ATTR_MODEL" "$B_ATTR_DATASET" "$B_ATTR_OUTPUT"
GPU_IDS="$(wait_for_two_free_gpus)"
run_inference "$GPU_IDS" "$B_LITE_MODEL" "$B_LITE_DATASET" "$B_LITE_OUTPUT"
printf '[%s] B full and B-lite inference completed.\n' "$(date '+%F %T')"