readCtrl_lambda / code /readctrl_rl_inference /run_full_pipeline_v2.sh
mshahidul
Initial commit of readCtrl code without large models
030876e
#!/bin/bash
set -euo pipefail
###############################################################################
# Full Pipeline: vLLM Server β†’ Inference β†’ Testing β†’ Summary
#
# Usage:
# bash run_full_pipeline.sh [--gpu GPU_ID] [--port PORT]
#
# This script:
# 1. Starts a vLLM server for the converted RL model
# 2. Waits until the server is healthy
# 3. Runs batched inference (run_inference_vllm_server.py)
# 4. Runs classifier + subclaim threshold evaluation
# 5. Prints a final summary of all results
###############################################################################
# ─── Defaults (override via env vars or CLI flags) ───────────────────────────
MODEL_PATH="${MODEL_PATH:-/home/mshahidul/readctrl/code/RL_model/models/converted_model/v1}"
CONDA_ENV="${CONDA_ENV:-verl}"
GPU_ID="${GPU_ID:-1}"
PORT="${PORT:-8001}"
SERVED_MODEL_NAME="${SERVED_MODEL_NAME:-inference}"
DTYPE="${DTYPE:-bfloat16}"
MAX_MODEL_LEN="${MAX_MODEL_LEN:-16384}"
DATASET_PATH="${DATASET_PATH:-/home/mshahidul/readctrl/code/readctrl_rl_inference/verified_combined_0-80_clean200.json}"
INFERENCE_OUTPUT_DIR="${INFERENCE_OUTPUT_DIR:-/home/mshahidul/readctrl/code/RL_model/inference_data}"
BATCH_SIZE="${BATCH_SIZE:-64}"
MAX_TOKENS="${MAX_TOKENS:-1024}"
TEMPERATURE="${TEMPERATURE:-0.7}"
TOP_P="${TOP_P:-0.8}"
NUM_WORKERS="${NUM_WORKERS:-4}"
CLASSIFIER_API_BASE="${CLASSIFIER_API_BASE:-http://172.16.34.19:8040/v1}"
# Support API: FastAPI /check_support endpoint β€” NO /v1 suffix
SUPPORT_API_BASE="${SUPPORT_API_BASE:-http://172.16.34.19:8090}"
SUPPORT_MODEL="${SUPPORT_MODEL:-sc}"
CLASSIFIER_MODEL_PATH="${CLASSIFIER_MODEL_PATH:-/home/mshahidul/readctrl/code/readctrl_rl_inference/model.json}"
REFERENCE_SUBCLAIMS="${REFERENCE_SUBCLAIMS:-/home/mshahidul/readctrl/code/text_classifier/data/verified_combined_0-80_clean200_with_subclaims.json}"
TEST_OUTPUT_DIR="${TEST_OUTPUT_DIR:-/home/mshahidul/readctrl/code/readctrl_rl_inference/test_result_v4}"
PROMPT_LOW="${PROMPT_LOW:-/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/prompt_low}"
PROMPT_INTERMEDIATE="${PROMPT_INTERMEDIATE:-/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/prompt_intermediate}"
PROMPT_PROFICIENT="${PROMPT_PROFICIENT:-/home/mshahidul/readctrl/code/RL_model/verl/verl_train/dataset/prompt_proficient}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
INFERENCE_SCRIPT="${SCRIPT_DIR}/run_inference_vllm_server.py"
TEST_SCRIPT="${SCRIPT_DIR}/test_classifier_with_subclaim_thresholds.py"
SERVER_STARTUP_TIMEOUT=300 # seconds to wait for vLLM to become healthy
VLLM_PID=""
# ─── Parse CLI args ─────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case "$1" in
--gpu) GPU_ID="$2"; shift 2 ;;
--port) PORT="$2"; shift 2 ;;
--model) MODEL_PATH="$2"; shift 2 ;;
--batch-size) BATCH_SIZE="$2"; shift 2 ;;
--max-samples) MAX_SAMPLES="$2"; shift 2 ;;
--dtype) DTYPE="$2"; shift 2 ;;
--classifier-api) CLASSIFIER_API_BASE="$2"; shift 2 ;;
--support-api) SUPPORT_API_BASE="$2"; shift 2 ;;
*) echo "[WARN] Unknown arg: $1"; shift ;;
esac
done
MAX_SAMPLES="${MAX_SAMPLES:--1}"
BASE_URL="http://127.0.0.1:${PORT}/v1"
# ─── Cleanup handler ────────────────────────────────────────────────────────
cleanup() {
if [[ -n "${VLLM_PID}" ]] && kill -0 "${VLLM_PID}" 2>/dev/null; then
echo ""
echo "================================================================"
echo " Shutting down vLLM server (PID ${VLLM_PID}) ..."
echo "================================================================"
kill "${VLLM_PID}" 2>/dev/null || true
wait "${VLLM_PID}" 2>/dev/null || true
echo "[INFO] vLLM server stopped."
fi
}
trap cleanup EXIT INT TERM
# ─── Activate conda ─────────────────────────────────────────────────────────
eval "$(conda shell.bash hook)"
conda activate "${CONDA_ENV}"
RUN_TS="$(date +%Y%m%d_%H%M%S)"
echo "╔══════════════════════════════════════════════════════════════════╗"
echo "β•‘ ReadCtrl Full Pipeline β€” ${RUN_TS} β•‘"
echo "╠══════════════════════════════════════════════════════════════════╣"
echo "β•‘ Model: ${MODEL_PATH}"
echo "β•‘ GPU: ${GPU_ID}"
echo "β•‘ Port: ${PORT}"
echo "β•‘ Dtype: ${DTYPE}"
echo "β•‘ Batch: ${BATCH_SIZE} (${NUM_WORKERS} concurrent workers)"
echo "β•‘ Conda env: ${CONDA_ENV}"
echo "β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•"
echo ""
###############################################################################
# STEP 1 β€” Start vLLM server
###############################################################################
echo "================================================================"
echo " STEP 1/4: Starting vLLM server on GPU ${GPU_ID}, port ${PORT}"
echo "================================================================"
VLLM_LOG="${INFERENCE_OUTPUT_DIR}/vllm_server_${RUN_TS}.log"
mkdir -p "${INFERENCE_OUTPUT_DIR}"
CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES="${GPU_ID}" \
python -m vllm.entrypoints.openai.api_server \
--model "${MODEL_PATH}" \
--served-model-name "${SERVED_MODEL_NAME}" \
--dtype "${DTYPE}" \
--port "${PORT}" \
--max-model-len "${MAX_MODEL_LEN}" \
--gpu-memory-utilization 0.95 \
--max-num-seqs 256 \
--enable-prefix-caching \
--disable-log-requests \
> "${VLLM_LOG}" 2>&1 &
VLLM_PID=$!
echo "[INFO] vLLM server PID: ${VLLM_PID}"
echo "[INFO] Server log: ${VLLM_LOG}"
###############################################################################
# STEP 2 β€” Wait for vLLM to become healthy
###############################################################################
echo ""
echo "================================================================"
echo " STEP 2/4: Waiting for vLLM server to be ready ..."
echo "================================================================"
ELAPSED=0
INTERVAL=5
while [[ ${ELAPSED} -lt ${SERVER_STARTUP_TIMEOUT} ]]; do
if ! kill -0 "${VLLM_PID}" 2>/dev/null; then
echo "[ERROR] vLLM server process died. Check log: ${VLLM_LOG}"
tail -30 "${VLLM_LOG}"
exit 1
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" "${BASE_URL}/models" 2>/dev/null || echo "000")
if [[ "${HTTP_CODE}" == "200" ]]; then
echo "[INFO] vLLM server is healthy (${ELAPSED}s elapsed)."
break
fi
echo " ... waiting (${ELAPSED}s / ${SERVER_STARTUP_TIMEOUT}s, last HTTP=${HTTP_CODE})"
sleep ${INTERVAL}
ELAPSED=$((ELAPSED + INTERVAL))
done
if [[ ${ELAPSED} -ge ${SERVER_STARTUP_TIMEOUT} ]]; then
echo "[ERROR] Server did not become healthy within ${SERVER_STARTUP_TIMEOUT}s."
tail -30 "${VLLM_LOG}"
exit 1
fi
echo ""
echo "[INFO] Available models on server:"
curl -s "${BASE_URL}/models" | python -m json.tool 2>/dev/null || true
echo ""
###############################################################################
# STEP 3 β€” Run inference
###############################################################################
echo "================================================================"
echo " STEP 3/4: Running batched inference"
echo "================================================================"
echo "[INFO] Dataset: ${DATASET_PATH}"
echo "[INFO] Output dir: ${INFERENCE_OUTPUT_DIR}"
echo ""
python "${INFERENCE_SCRIPT}" \
--model_path "${MODEL_PATH}" \
--dataset_path "${DATASET_PATH}" \
--prompt-low-path "${PROMPT_LOW}" \
--prompt-intermediate-path "${PROMPT_INTERMEDIATE}" \
--prompt-proficient-path "${PROMPT_PROFICIENT}" \
--output_dir "${INFERENCE_OUTPUT_DIR}" \
--base_url "${BASE_URL}" \
--served_model_name "${SERVED_MODEL_NAME}" \
--batch_size "${BATCH_SIZE}" \
--max_samples "${MAX_SAMPLES}" \
--max_tokens "${MAX_TOKENS}" \
--temperature "${TEMPERATURE}" \
--top_p "${TOP_P}" \
--num_workers "${NUM_WORKERS}"
INFERENCE_JSONL="$(ls -t "${INFERENCE_OUTPUT_DIR}"/vllm_inference_*.jsonl 2>/dev/null | head -1)"
if [[ -z "${INFERENCE_JSONL}" ]]; then
echo "[ERROR] No inference JSONL output found in ${INFERENCE_OUTPUT_DIR}"
exit 1
fi
echo ""
echo "[INFO] Inference output: ${INFERENCE_JSONL}"
INFERENCE_LINE_COUNT="$(wc -l < "${INFERENCE_JSONL}")"
echo "[INFO] Total inference rows: ${INFERENCE_LINE_COUNT}"
###############################################################################
# STEP 4 β€” Run testing / evaluation
###############################################################################
echo ""
echo "================================================================"
echo " STEP 4/4: Running classifier + subclaim threshold evaluation"
echo "================================================================"
echo "[INFO] Input JSONL: ${INFERENCE_JSONL}"
echo "[INFO] Classifier API: ${CLASSIFIER_API_BASE}"
echo "[INFO] Support API: ${SUPPORT_API_BASE} (FastAPI /check_support, no /v1)"
echo "[INFO] Reference subclaims: ${REFERENCE_SUBCLAIMS}"
echo ""
python "${TEST_SCRIPT}" \
--model-path "${CLASSIFIER_MODEL_PATH}" \
--input-file "${INFERENCE_JSONL}" \
--reference-subclaims-file "${REFERENCE_SUBCLAIMS}" \
--classifier-api-base "${CLASSIFIER_API_BASE}" \
--support-api-base "${SUPPORT_API_BASE}" \
--output-dir "${TEST_OUTPUT_DIR}" \
--max-samples "${MAX_SAMPLES}" \
--provide-traceback
TEST_SUMMARY_JSON="$(ls -t "${TEST_OUTPUT_DIR}"/classifier_subclaim_threshold_eval_*.json 2>/dev/null | head -1)"
TEST_DETAILS_JSONL="$(ls -t "${TEST_OUTPUT_DIR}"/classifier_subclaim_threshold_eval_*.jsonl 2>/dev/null | head -1)"
###############################################################################
# FINAL SUMMARY
###############################################################################
echo ""
echo ""
echo "╔══════════════════════════════════════════════════════════════════╗"
echo "β•‘ PIPELINE COMPLETE β•‘"
echo "╠══════════════════════════════════════════════════════════════════╣"
echo "β•‘ Run timestamp: ${RUN_TS}"
echo "β•‘ Model: ${MODEL_PATH}"
echo "β•‘ GPU: ${GPU_ID}"
echo "β•‘ Samples inferred: ${INFERENCE_LINE_COUNT}"
echo "╠══════════════════════════════════════════════════════════════════╣"
echo "β•‘ OUTPUT FILES β•‘"
echo "╠══════════════════════════════════════════════════════════════════╣"
echo "β•‘ Inference JSONL: ${INFERENCE_JSONL}"
echo "β•‘ vLLM server log: ${VLLM_LOG}"
if [[ -n "${TEST_SUMMARY_JSON:-}" ]]; then
echo "β•‘ Test summary: ${TEST_SUMMARY_JSON}"
fi
if [[ -n "${TEST_DETAILS_JSONL:-}" ]]; then
echo "β•‘ Test details: ${TEST_DETAILS_JSONL}"
fi
echo "╠══════════════════════════════════════════════════════════════════╣"
echo "β•‘ EVALUATION RESULTS β•‘"
echo "╠══════════════════════════════════════════════════════════════════╣"
if [[ -n "${TEST_SUMMARY_JSON:-}" && -f "${TEST_SUMMARY_JSON}" ]]; then
python3 -c "
import json
with open('${TEST_SUMMARY_JSON}') as f:
s = json.load(f)
total = s.get('total_samples', 0)
cls_acc = s.get('classifier_only_accuracy', 0)
comp_pr = s.get('completeness_pass_rate', 0)
comp_mean = s.get('completeness_mean')
halluc_fail = s.get('hallucination_fail_rate', 0)
halluc_mean = s.get('hallucination_mean')
cls_comp = s.get('accuracy_cls_and_completeness', 0)
cls_comp_nh = s.get('accuracy_cls_comp_no_hallucination', 0)
comp_thresh = s.get('completeness_threshold', 0)
halluc_thresh= s.get('hallucination_threshold', 0)
print(f' Total evaluated samples: {total}')
print(f' Classifier-only accuracy: {cls_acc:.4f} ({cls_acc*100:.2f}%)')
print()
comp_str = f'{comp_mean:.4f}' if comp_mean is not None else 'N/A'
print(f' Completeness pass rate: {comp_pr:.4f} ({comp_pr*100:.2f}%)')
print(f' Completeness mean score: {comp_str}')
print(f' Completeness threshold: >= {comp_thresh}')
print()
halluc_str = f'{halluc_mean:.4f}' if halluc_mean is not None else 'N/A'
print(f' Hallucination fail rate: {halluc_fail:.4f} ({halluc_fail*100:.2f}%)')
print(f' Hallucination mean score: {halluc_str}')
print(f' Hallucination threshold: > {halluc_thresh}')
print()
print(f' Cls + Completeness: {cls_comp:.4f} ({cls_comp*100:.2f}%)')
print(f' Cls + Comp + No Hallucination: {cls_comp_nh:.4f} ({cls_comp_nh*100:.2f}%)')
"
else
echo " [WARN] No test summary JSON found."
fi
echo "β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•"
echo ""
echo "[DONE] Full pipeline finished at $(date '+%Y-%m-%d %H:%M:%S')"