readCtrl_lambda / code /fine_tune_sft_dpo /evaluation_model_en.sh
mshahidul
Initial commit of readCtrl code without large models
030876e
#!/bin/bash
export CUDA_DEVICE_ORDER=PCI_BUS_ID
export CUDA_VISIBLE_DEVICES=5
# Start NVIDIA MPS for efficient GPU sharing
nvidia-cuda-mps-control -d
echo "βœ… MPS started on GPU 2"
# ──────────────────────────────────────────────
# Service 1: vLLM β€” Llama-3.1-8B-Instruct
# ──────────────────────────────────────────────
vllm serve meta-llama/Llama-3.1-8B-Instruct \
--port 8031 \
--served-model-name dspy \
--dtype bfloat16 \
--tensor-parallel-size 1 \
--max-model-len 16384 \
--gpu-memory-utilization 0.40 \
--enable-prefix-caching \
--max-num-seqs 256 &
VLLM_PID=$!
echo "⏳ Loading vLLM (Llama-3.1-8B)... PID: $VLLM_PID"
sleep 40 # wait for vLLM to fully load
# ──────────────────────────────────────────────
# Service 2: FastAPI β€” HHEM Support Claim API
# ──────────────────────────────────────────────
export SUPPORT_API_PORT=8030
export SUPPORT_API_HOST=0.0.0.0
export HHEM_MODEL_NAME=vectara/hallucination_evaluation_model
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
echo "⏳ Starting Support Claim Checking API..."
cd "$SCRIPT_DIR"
python support_claim_api.py &
HHEM_PID=$!
echo "βœ… HHEM API started... PID: $HHEM_PID"
# ──────────────────────────────────────────────
echo ""
echo "========================================="
echo " Both services running on GPU 2"
echo " vLLM (dspy): http://0.0.0.0:8031"
echo " HHEM (support): http://0.0.0.0:8030"
echo "========================================="
echo ""
# Wait for both processes
wait $VLLM_PID $HHEM_PID