dealflow-ai / scripts /start_vllm.sh
PeterBot22's picture
feat: DealFlow AI MVP β€” 3-agent CrewAI due diligence system on HF Spaces
8dcf472 verified
raw
history blame contribute delete
812 Bytes
#!/usr/bin/env bash
# DealFlow AI β€” Start vLLM server on AMD MI300X
# Requires: vllm installed, AMD ROCm drivers, MI300X GPU
# Usage: ./scripts/start_vllm.sh
set -euo pipefail
MODEL="${VLLM_MODEL:-Qwen/Qwen3-VL-32B-Instruct-FP8}"
PORT="${VLLM_PORT:-8000}"
GPU_MEMORY_UTIL="${GPU_MEMORY_UTIL:-0.90}"
MAX_MODEL_LEN="${MAX_MODEL_LEN:-8192}"
echo "==> Starting vLLM server"
echo " Model: $MODEL"
echo " Port: $PORT"
echo " GPU util: $GPU_MEMORY_UTIL"
echo " Max context: $MAX_MODEL_LEN"
# AMD MI300X β€” use ROCm backend
python3 -m vllm.entrypoints.openai.api_server \
--model "$MODEL" \
--port "$PORT" \
--gpu-memory-utilization "$GPU_MEMORY_UTIL" \
--max-model-len "$MAX_MODEL_LEN" \
--dtype float16 \
--trust-remote-code \
--served-model-name "$MODEL"