GradioDemo / scripts /start_reranker_service.sh
eigentom
Initial Update
90c099b
#!/bin/bash
# Start Reranker API Service on multiple GPUs
# Usage: ./scripts/start_reranker_service.sh [model_path] [num_gpus] [base_port]
set -e
# Default values
# MODEL_PATH="${1:-OpenScholar/OpenScholar_Reranker}"
# NUM_GPUS="${2:-4}"
# BASE_PORT="${3:-8005}"
# MODEL_PATH="BAAI/bge-reranker-base"
# MODEL_PATH="BAAI/bge-reranker-large"
MODEL_PATH="${1:-OpenScholar/OpenScholar_Reranker}"
NUM_GPUS=8
BASE_PORT=8008
echo "Starting Reranker API Service"
echo "Model: $MODEL_PATH"
echo "Number of GPUs: $NUM_GPUS"
echo "Base port: $BASE_PORT"
echo ""
# Get script directory
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$PROJECT_ROOT"
# Activate virtual environment if it exists
if [ -d ".venv" ]; then
source .venv/bin/activate
fi
# Check if FastAPI is installed
python3 -c "import fastapi" 2>/dev/null || {
echo "Error: FastAPI not installed. Install with: pip install fastapi uvicorn"
exit 1
}
# Check if FlagEmbedding is installed
python3 -c "from FlagEmbedding import FlagReranker" 2>/dev/null || {
echo "Error: FlagEmbedding not installed. Install with: pip install FlagEmbedding"
exit 1
}
# Create logs directory
mkdir -p logs
# PID file for stopping services later
PID_FILE="logs/reranker/reranker_pids.txt"
LB_PID_FILE="logs/reranker/reranker_lb_pid.txt"
# Start services on each GPU
PIDS=()
ENDPOINTS=()
# Ensure we use GPUs 0, 1, 2, 3 (explicitly)
# now we use gpus: 1,2,3,4,5,6,7
for i in $(seq 0 $((NUM_GPUS - 1))); do
PORT=$((BASE_PORT + i))
GPU_ID=$i # Use GPU 0, 1, 2, 3 explicitly
echo "Starting reranker service on GPU $GPU_ID, port $PORT..."
# Set CUDA device (each service will see only one GPU)
export CUDA_VISIBLE_DEVICES=$GPU_ID
# Start service in background
# Note: When CUDA_VISIBLE_DEVICES is set, cuda:0 refers to the visible GPU
nohup python3 -m shared.utils.reranker_api_service \
--model_path "$MODEL_PATH" \
--host 0.0.0.0 \
--port "$PORT" \
--use_fp16 \
--device "cuda:0" \
> "logs/reranker/reranker_service_gpu${GPU_ID}_port${PORT}.log" 2>&1 &
PID=$!
PIDS+=($PID)
ENDPOINTS+=("http://localhost:${PORT}")
echo " Started with PID: $PID"
echo " Endpoint: http://localhost:${PORT}"
sleep 2 # Give service time to start
done
echo ""
echo "All reranker services started!"
echo ""
echo "Endpoints:"
for endpoint in "${ENDPOINTS[@]}"; do
echo " - $endpoint"
done
# Create endpoint pool file
ENDPOINT_POOL_FILE="shared/configs/reranker_endpoint_pool.txt"
mkdir -p "$(dirname "$ENDPOINT_POOL_FILE")"
printf "%s\n" "${ENDPOINTS[@]}" > "$ENDPOINT_POOL_FILE"
echo ""
echo "Endpoint pool file created: $ENDPOINT_POOL_FILE"
# Save PIDs to file (one per line)
printf "%s\n" "${PIDS[@]}" > "$PID_FILE"
echo ""
echo "PIDs saved to: $PID_FILE"
echo ""
echo "To stop these specific reranker services, run:"
echo " ./scripts/stop_reranker_services.sh"
echo ""
echo "This will only kill the processes listed above, not other reranker services."
echo ""
echo "To check service status, run:"
for endpoint in "${ENDPOINTS[@]}"; do
echo "curl $endpoint/health"
done