Hanrui / sglang /.github /workflows /pr-test-rust.yml
Lekr0's picture
Add files using upload-large-folder tool
a227c91 verified
name: PR Test (SMG)
on:
push:
branches: [ main ]
paths:
- "sgl-model-gateway/**"
pull_request:
branches: [ main ]
types: [opened, synchronize, reopened, labeled]
paths:
- "sgl-model-gateway/**"
workflow_dispatch:
concurrency:
group: gateway-tests-${{ github.ref }}
cancel-in-progress: true
env:
RUSTC_WRAPPER: sccache
SCCACHE_GHA_ENABLED: "true"
jobs:
build-wheel:
if: |
github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
runs-on: 4-gpu-a10
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install rust dependencies
run: |
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
- name: Configure sccache
uses: mozilla-actions/sccache-action@v0.0.9
with:
version: "v0.12.0"
disable_annotations: true
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-model-gateway
shared-key: "rust-cache"
cache-all-crates: true
cache-on-failure: true
save-if: true
- name: Build python binding
run: |
source "$HOME/.cargo/env"
export RUSTC_WRAPPER=sccache
cd sgl-model-gateway/bindings/python
python3 -m pip install --upgrade pip maturin
maturin build --profile ci --features vendored-openssl --out dist
- name: List built wheel
run: ls -lh sgl-model-gateway/bindings/python/dist/
- name: Upload wheel artifact
uses: actions/upload-artifact@v4
with:
name: smg-wheel
path: sgl-model-gateway/bindings/python/dist/*.whl
retention-days: 1
- name: Test wheel install
run: |
pip install sgl-model-gateway/bindings/python/dist/*.whl
python3 -c "import sglang_router; print('Python package: OK')"
python3 -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')"
python3 -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK"
python-unit-tests:
needs: build-wheel
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
path: sglang-repo
- name: Move sgl-model-gateway folder to root
run: |
mv sglang-repo/sgl-model-gateway/* .
rm -rf sglang-repo
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
name: smg-wheel
path: dist/
- name: Install wheel
run: pip install dist/*.whl
- name: Run Python unit tests
run: |
cd bindings/python
python3 -m pip install pytest pytest-cov pytest-xdist
pytest -q tests --cov=sglang_router --cov-config=.coveragerc --cov-report=term-missing --cov-fail-under=80
unit-tests:
if: |
github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install dependencies
run: |
bash scripts/ci/cuda/ci_install_gateway_dependencies.sh
- name: Configure sccache
uses: mozilla-actions/sccache-action@v0.0.9
with:
version: "v0.12.0"
disable_annotations: true
- name: Rust cache
uses: Swatinem/rust-cache@v2
with:
workspaces: sgl-model-gateway
shared-key: "rust-cache"
cache-all-crates: true
cache-on-failure: true
save-if: true
- name: Run lint
run: |
source "$HOME/.cargo/env"
cd sgl-model-gateway/
rustup component add clippy
cargo clippy --all-targets --all-features -- -D warnings
- name: Run fmt
run: |
source "$HOME/.cargo/env"
cd sgl-model-gateway/
rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt
rustup toolchain install nightly --profile minimal
cargo +nightly fmt -- --check
- name: Generate vision golden fixtures
run: |
pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu
pip install transformers pillow numpy scipy
pip install transformers pillow numpy
cd sgl-model-gateway/
python scripts/generate_vision_golden.py
- name: Run Rust tests
timeout-minutes: 20
run: |
source "$HOME/.cargo/env"
cd sgl-model-gateway/
cargo test
- name: Show sccache stats
if: always()
run: sccache --show-stats
gateway-e2e:
name: ${{ matrix.name }}
needs: build-wheel
if: |
github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
strategy:
fail-fast: false
matrix:
include:
- name: benchmarks
timeout: 32
test_dirs: "e2e_test/benchmarks"
extra_deps: "genai-bench==0.0.3"
env_vars: ""
reruns: ""
upload_benchmarks: true
parallel_opts: "" # No parallel for benchmarks (performance measurement)
- name: responses
timeout: 45
test_dirs: "e2e_test/responses"
extra_deps: ""
env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
reruns: "--reruns 2 --reruns-delay 5"
setup_oracle: true
setup_brave: true
parallel_opts: "" # Cloud backend tests not compatible with parallel execution
- name: e2e
timeout: 45
test_dirs: "e2e_test/router e2e_test/embeddings"
extra_deps: "pytest-parallel py" # py is required for pytest-parallel with newer pytest
env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
reruns: "--reruns 2 --reruns-delay 5"
parallel_opts: "--workers 1 --tests-per-worker 4" # Thread-based parallelism
- name: chat-completions
timeout: 45
test_dirs: "e2e_test/chat_completions"
extra_deps: ""
env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1"
reruns: "--reruns 2 --reruns-delay 5"
parallel_opts: ""
runs-on: 4-gpu-a10
timeout-minutes: ${{ matrix.timeout }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install SGLang dependencies
run: |
sudo --preserve-env=PATH bash scripts/ci/cuda/ci_install_dependency.sh
- name: Setup Oracle Instant Client
if: matrix.setup_oracle
run: |
sudo apt-get install -y unzip
INSTANT_CLIENT_DIR="/home/ubuntu/instant-client"
INSTANT_CLIENT_ZIP="instantclient-basic-linux.x64-23.9.0.25.07.zip"
if [ ! -d "$INSTANT_CLIENT_DIR/instantclient_23_9" ]; then
echo "Downloading Oracle Instant Client..."
mkdir -p "$INSTANT_CLIENT_DIR"
cd "$INSTANT_CLIENT_DIR"
wget https://download.oracle.com/otn_software/linux/instantclient/2390000/$INSTANT_CLIENT_ZIP
unzip $INSTANT_CLIENT_ZIP
rm $INSTANT_CLIENT_ZIP
else
echo "Oracle Instant Client already exists, skipping download"
fi
echo "LD_LIBRARY_PATH=/home/ubuntu/instant-client/instantclient_23_9:\$LD_LIBRARY_PATH" >> $GITHUB_ENV
- name: Start Oracle Database
if: matrix.setup_oracle
run: |
docker run -d -p 1521:1521 -e ORACLE_PASSWORD=oracle --name oracle-db gvenzl/oracle-xe:21-slim
echo "Starting Oracle DB..."
# Export Oracle connection environment variables
echo "ATP_USER=system" >> $GITHUB_ENV
echo "ATP_PASSWORD=oracle" >> $GITHUB_ENV
echo "ATP_DSN=localhost:1521/XEPDB1" >> $GITHUB_ENV
- name: Start Brave MCP Server
if: matrix.setup_brave
run: |
docker run -d --rm \
-p 8001:8080 \
-e BRAVE_API_KEY \
--name brave-search-server \
shoofio/brave-search-mcp-sse:1.0.10
echo "Starting Brave MCP Server..."
sleep 2
curl -f --max-time 1 http://localhost:8001/sse > /dev/null 2>&1 && echo "Brave MCP Server is healthy!" || echo "Brave MCP Server responded"
- name: Download wheel artifact
uses: actions/download-artifact@v4
with:
name: smg-wheel
path: wheel/
- name: Install wheel
run: |
pip uninstall -y sglang-router || true
pip install wheel/*.whl
- name: Install e2e test dependencies
run: |
python3 -m pip install pytest pytest-rerunfailures httpx openai grpcio grpcio-health-checking numpy
if [ -n "${{ matrix.extra_deps }}" ]; then
python3 -m pip --no-cache-dir install --upgrade ${{ matrix.extra_deps }}
fi
- name: Run E2E tests
run: |
bash scripts/killall_sglang.sh all
cd sgl-model-gateway
${{ matrix.env_vars }} ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest ${{ matrix.reruns }} ${{ matrix.parallel_opts }} ${{ matrix.test_dirs }} -s -vv -o log_cli=true --log-cli-level=INFO
- name: Upload benchmark results
if: matrix.upload_benchmarks && success()
uses: actions/upload-artifact@v4
with:
name: genai-bench-results-all-policies
path: sgl-model-gateway/benchmark_**/
- name: Cleanup Brave MCP Server
if: always() && matrix.setup_brave
run: |
docker stop brave-search-server || true
docker rm brave-search-server || true
- name: Cleanup Oracle Database
if: always() && matrix.setup_oracle
run: |
docker stop oracle-db || true
docker rm oracle-db || true
docker-build-test:
if: |
github.event_name != 'pull_request' ||
(github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) ||
(github.event.action == 'labeled' && github.event.label.name == 'run-ci')
runs-on: ubuntu-24.04
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build Docker image (no push)
uses: docker/build-push-action@v5
with:
context: .
file: docker/gateway.Dockerfile
push: false
tags: sgl-model-gateway:test
cache-from: type=gha
cache-to: type=gha,mode=max
finish:
needs: [build-wheel, python-unit-tests, unit-tests, gateway-e2e, docker-build-test]
runs-on: ubuntu-latest
steps:
- name: Finish
run: echo "This is an empty step to ensure that all jobs are completed."
summarize-benchmarks:
needs: gateway-e2e
runs-on: ubuntu-latest
if: success()
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download benchmark results
uses: actions/download-artifact@v4
with:
name: genai-bench-results-all-policies
- name: Create benchmark summary
run: python3 sgl-model-gateway/e2e_test/benchmarks/summarize.py .