name: PR Test (SMG) on: push: branches: [ main ] paths: - "sgl-model-gateway/**" pull_request: branches: [ main ] types: [opened, synchronize, reopened, labeled] paths: - "sgl-model-gateway/**" workflow_dispatch: concurrency: group: gateway-tests-${{ github.ref }} cancel-in-progress: true env: RUSTC_WRAPPER: sccache SCCACHE_GHA_ENABLED: "true" jobs: build-wheel: if: | github.event_name != 'pull_request' || (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) || (github.event.action == 'labeled' && github.event.label.name == 'run-ci') runs-on: 4-gpu-a10 steps: - name: Checkout code uses: actions/checkout@v4 - name: Install rust dependencies run: | bash scripts/ci/cuda/ci_install_gateway_dependencies.sh - name: Configure sccache uses: mozilla-actions/sccache-action@v0.0.9 with: version: "v0.12.0" disable_annotations: true - name: Rust cache uses: Swatinem/rust-cache@v2 with: workspaces: sgl-model-gateway shared-key: "rust-cache" cache-all-crates: true cache-on-failure: true save-if: true - name: Build python binding run: | source "$HOME/.cargo/env" export RUSTC_WRAPPER=sccache cd sgl-model-gateway/bindings/python python3 -m pip install --upgrade pip maturin maturin build --profile ci --features vendored-openssl --out dist - name: List built wheel run: ls -lh sgl-model-gateway/bindings/python/dist/ - name: Upload wheel artifact uses: actions/upload-artifact@v4 with: name: smg-wheel path: sgl-model-gateway/bindings/python/dist/*.whl retention-days: 1 - name: Test wheel install run: | pip install sgl-model-gateway/bindings/python/dist/*.whl python3 -c "import sglang_router; print('Python package: OK')" python3 -c "from sglang_router.sglang_router_rs import Router; print('Rust extension: OK')" python3 -m sglang_router.launch_router --help > /dev/null && echo "Entry point: OK" python-unit-tests: needs: build-wheel runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: path: sglang-repo - name: Move sgl-model-gateway folder to root run: | mv sglang-repo/sgl-model-gateway/* . rm -rf sglang-repo - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.13" - name: Download wheel artifact uses: actions/download-artifact@v4 with: name: smg-wheel path: dist/ - name: Install wheel run: pip install dist/*.whl - name: Run Python unit tests run: | cd bindings/python python3 -m pip install pytest pytest-cov pytest-xdist pytest -q tests --cov=sglang_router --cov-config=.coveragerc --cov-report=term-missing --cov-fail-under=80 unit-tests: if: | github.event_name != 'pull_request' || (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) || (github.event.action == 'labeled' && github.event.label.name == 'run-ci') runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 - name: Install dependencies run: | bash scripts/ci/cuda/ci_install_gateway_dependencies.sh - name: Configure sccache uses: mozilla-actions/sccache-action@v0.0.9 with: version: "v0.12.0" disable_annotations: true - name: Rust cache uses: Swatinem/rust-cache@v2 with: workspaces: sgl-model-gateway shared-key: "rust-cache" cache-all-crates: true cache-on-failure: true save-if: true - name: Run lint run: | source "$HOME/.cargo/env" cd sgl-model-gateway/ rustup component add clippy cargo clippy --all-targets --all-features -- -D warnings - name: Run fmt run: | source "$HOME/.cargo/env" cd sgl-model-gateway/ rustup component add --toolchain nightly-x86_64-unknown-linux-gnu rustfmt rustup toolchain install nightly --profile minimal cargo +nightly fmt -- --check - name: Generate vision golden fixtures run: | pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu pip install transformers pillow numpy scipy pip install transformers pillow numpy cd sgl-model-gateway/ python scripts/generate_vision_golden.py - name: Run Rust tests timeout-minutes: 20 run: | source "$HOME/.cargo/env" cd sgl-model-gateway/ cargo test - name: Show sccache stats if: always() run: sccache --show-stats gateway-e2e: name: ${{ matrix.name }} needs: build-wheel if: | github.event_name != 'pull_request' || (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) || (github.event.action == 'labeled' && github.event.label.name == 'run-ci') strategy: fail-fast: false matrix: include: - name: benchmarks timeout: 32 test_dirs: "e2e_test/benchmarks" extra_deps: "genai-bench==0.0.3" env_vars: "" reruns: "" upload_benchmarks: true parallel_opts: "" # No parallel for benchmarks (performance measurement) - name: responses timeout: 45 test_dirs: "e2e_test/responses" extra_deps: "" env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1" reruns: "--reruns 2 --reruns-delay 5" setup_oracle: true setup_brave: true parallel_opts: "" # Cloud backend tests not compatible with parallel execution - name: e2e timeout: 45 test_dirs: "e2e_test/router e2e_test/embeddings" extra_deps: "pytest-parallel py" # py is required for pytest-parallel with newer pytest env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1" reruns: "--reruns 2 --reruns-delay 5" parallel_opts: "--workers 1 --tests-per-worker 4" # Thread-based parallelism - name: chat-completions timeout: 45 test_dirs: "e2e_test/chat_completions" extra_deps: "" env_vars: "SHOW_WORKER_LOGS=0 SHOW_ROUTER_LOGS=1" reruns: "--reruns 2 --reruns-delay 5" parallel_opts: "" runs-on: 4-gpu-a10 timeout-minutes: ${{ matrix.timeout }} steps: - name: Checkout code uses: actions/checkout@v4 - name: Install SGLang dependencies run: | sudo --preserve-env=PATH bash scripts/ci/cuda/ci_install_dependency.sh - name: Setup Oracle Instant Client if: matrix.setup_oracle run: | sudo apt-get install -y unzip INSTANT_CLIENT_DIR="/home/ubuntu/instant-client" INSTANT_CLIENT_ZIP="instantclient-basic-linux.x64-23.9.0.25.07.zip" if [ ! -d "$INSTANT_CLIENT_DIR/instantclient_23_9" ]; then echo "Downloading Oracle Instant Client..." mkdir -p "$INSTANT_CLIENT_DIR" cd "$INSTANT_CLIENT_DIR" wget https://download.oracle.com/otn_software/linux/instantclient/2390000/$INSTANT_CLIENT_ZIP unzip $INSTANT_CLIENT_ZIP rm $INSTANT_CLIENT_ZIP else echo "Oracle Instant Client already exists, skipping download" fi echo "LD_LIBRARY_PATH=/home/ubuntu/instant-client/instantclient_23_9:\$LD_LIBRARY_PATH" >> $GITHUB_ENV - name: Start Oracle Database if: matrix.setup_oracle run: | docker run -d -p 1521:1521 -e ORACLE_PASSWORD=oracle --name oracle-db gvenzl/oracle-xe:21-slim echo "Starting Oracle DB..." # Export Oracle connection environment variables echo "ATP_USER=system" >> $GITHUB_ENV echo "ATP_PASSWORD=oracle" >> $GITHUB_ENV echo "ATP_DSN=localhost:1521/XEPDB1" >> $GITHUB_ENV - name: Start Brave MCP Server if: matrix.setup_brave run: | docker run -d --rm \ -p 8001:8080 \ -e BRAVE_API_KEY \ --name brave-search-server \ shoofio/brave-search-mcp-sse:1.0.10 echo "Starting Brave MCP Server..." sleep 2 curl -f --max-time 1 http://localhost:8001/sse > /dev/null 2>&1 && echo "Brave MCP Server is healthy!" || echo "Brave MCP Server responded" - name: Download wheel artifact uses: actions/download-artifact@v4 with: name: smg-wheel path: wheel/ - name: Install wheel run: | pip uninstall -y sglang-router || true pip install wheel/*.whl - name: Install e2e test dependencies run: | python3 -m pip install pytest pytest-rerunfailures httpx openai grpcio grpcio-health-checking numpy if [ -n "${{ matrix.extra_deps }}" ]; then python3 -m pip --no-cache-dir install --upgrade ${{ matrix.extra_deps }} fi - name: Run E2E tests run: | bash scripts/killall_sglang.sh all cd sgl-model-gateway ${{ matrix.env_vars }} ROUTER_LOCAL_MODEL_PATH="/home/ubuntu/models" pytest ${{ matrix.reruns }} ${{ matrix.parallel_opts }} ${{ matrix.test_dirs }} -s -vv -o log_cli=true --log-cli-level=INFO - name: Upload benchmark results if: matrix.upload_benchmarks && success() uses: actions/upload-artifact@v4 with: name: genai-bench-results-all-policies path: sgl-model-gateway/benchmark_**/ - name: Cleanup Brave MCP Server if: always() && matrix.setup_brave run: | docker stop brave-search-server || true docker rm brave-search-server || true - name: Cleanup Oracle Database if: always() && matrix.setup_oracle run: | docker stop oracle-db || true docker rm oracle-db || true docker-build-test: if: | github.event_name != 'pull_request' || (github.event.action != 'labeled' && contains(github.event.pull_request.labels.*.name, 'run-ci')) || (github.event.action == 'labeled' && github.event.label.name == 'run-ci') runs-on: ubuntu-24.04 steps: - name: Checkout repository uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build Docker image (no push) uses: docker/build-push-action@v5 with: context: . file: docker/gateway.Dockerfile push: false tags: sgl-model-gateway:test cache-from: type=gha cache-to: type=gha,mode=max finish: needs: [build-wheel, python-unit-tests, unit-tests, gateway-e2e, docker-build-test] runs-on: ubuntu-latest steps: - name: Finish run: echo "This is an empty step to ensure that all jobs are completed." summarize-benchmarks: needs: gateway-e2e runs-on: ubuntu-latest if: success() steps: - name: Checkout code uses: actions/checkout@v4 - name: Download benchmark results uses: actions/download-artifact@v4 with: name: genai-bench-results-all-policies - name: Create benchmark summary run: python3 sgl-model-gateway/e2e_test/benchmarks/summarize.py .