| name: PR Test |
| |
| |
| run-name: ${{ inputs.target_stage && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage)) || '' }} |
| |
| on: |
| schedule: |
| - cron: '0 */6 * * *' |
| pull_request: |
| branches: [main] |
| workflow_dispatch: |
| inputs: |
| version: |
| description: "FlashInfer version" |
| required: true |
| type: choice |
| default: "release" |
| options: |
| - "release" |
| - "nightly" |
| target_stage: |
| description: "Specific stage to run (optional, for quick testing)" |
| required: false |
| type: string |
| default: "" |
| force_continue_on_error: |
| description: "Force continue-on-error (test scheduled CI behavior)" |
| required: false |
| type: boolean |
| default: false |
| pr_head_sha: |
| description: "PR head SHA to checkout (for /rerun-stage on fork PRs)" |
| required: false |
| type: string |
| default: "" |
| test_parallel_dispatch: |
| description: "Test parallel dispatch behavior (simulates scheduled run)" |
| required: false |
| type: boolean |
| default: false |
| workflow_call: |
| inputs: |
| ref: |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' |
| required: false |
| type: string |
| default: '' |
| run_all_tests: |
| description: "Run all tests (for releasing or testing purpose)" |
| required: false |
| type: boolean |
| default: false |
|
|
| concurrency: |
| |
| |
| |
| |
| |
| |
| group: pr-test-${{ github.event_name }}-${{ github.head_ref || github.ref_name || 'default' }}-${{ inputs.pr_head_sha || 'current' }}-${{ inputs.target_stage || inputs.ref || 'all' }} |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} |
|
|
| env: |
| SGLANG_IS_IN_CI: true |
| SGLANG_CUDA_COREDUMP: "1" |
| SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true |
|
|
| permissions: |
| actions: write |
| contents: read |
| pull-requests: read |
|
|
| jobs: |
| |
| check-changes: |
| runs-on: ubuntu-latest |
| outputs: |
| |
| main_package: ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |
| |
| |
| |
| sgl_kernel: ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} |
| |
| sgl_kernel_raw: ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} |
| jit_kernel: ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |
| multimodal_gen: ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |
| max_parallel: ${{ steps.set-parallel.outputs.max_parallel }} |
| b200_runner: ${{ steps.set-runner.outputs.b200_runner }} |
| enable_retry: ${{ steps.set-retry.outputs.enable_retry }} |
| continue_on_error: ${{ steps.set-continue-on-error.outputs.continue_on_error }} |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Determine run mode |
| id: run-mode |
| run: | |
| # Run all tests for scheduled runs and workflow_call (when ref input is provided) |
| # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.ref |
| if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.run_all_tests }}" == "true" ]]; then |
| echo "run_all_tests=true" >> $GITHUB_OUTPUT |
| echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})" |
| else |
| echo "run_all_tests=false" >> $GITHUB_OUTPUT |
| echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" |
| fi |
| |
| - name: Detect file changes |
| id: filter |
| uses: dorny/paths-filter@v3 |
| |
| |
| if: steps.run-mode.outputs.run_all_tests != 'true' && !inputs.target_stage |
| with: |
| filters: | |
| main_package: |
| - "python/sglang/!(multimodal_gen)/**" |
| - "python/pyproject.toml" |
| - "scripts/ci/cuda/*" |
| - "scripts/ci/utils/*" |
| - "test/**" |
| - ".github/workflows/pr-test.yml" |
| sgl_kernel: |
| - "sgl-kernel/**" |
| jit_kernel: |
| - "python/sglang/jit_kernel/**" |
| - "python/pyproject.toml" |
| - ".github/workflows/pr-test.yml" |
| multimodal_gen: |
| - "python/sglang/multimodal_gen/**" |
| - "python/sglang/jit_kernel/**" |
| - "python/sglang/cli/**" |
| - "python/pyproject.toml" |
| - ".github/workflows/pr-test.yml" |
| |
| |
| |
| |
| - name: Detect file changes via API (for target_stage) |
| id: filter-api |
| if: inputs.target_stage && inputs.pr_head_sha |
| env: |
| GH_TOKEN: ${{ github.token }} |
| run: | |
| echo "Detecting file changes via GitHub API for target_stage mode..." |
| echo "PR head SHA: ${{ inputs.pr_head_sha }}" |
| |
| |
| |
| CHANGED_FILES=$(gh api "repos/${{ github.repository }}/compare/main...${{ inputs.pr_head_sha }}" \ |
| --jq '[.files[].filename] | .[]' 2>/dev/null || echo "") |
|
|
| if [ -z "$CHANGED_FILES" ]; then |
| echo "Warning: Could not fetch changed files from API, assuming no changes" |
| echo "sgl_kernel=false" >> $GITHUB_OUTPUT |
| echo "main_package=false" >> $GITHUB_OUTPUT |
| echo "jit_kernel=false" >> $GITHUB_OUTPUT |
| echo "multimodal_gen=false" >> $GITHUB_OUTPUT |
| exit 0 |
| fi |
|
|
| echo "Changed files:" |
| echo "$CHANGED_FILES" | head -20 |
| echo "..." |
|
|
| |
| if echo "$CHANGED_FILES" | grep -q "^sgl-kernel/"; then |
| echo "sgl_kernel=true" >> $GITHUB_OUTPUT |
| echo "Detected sgl-kernel changes" |
| else |
| echo "sgl_kernel=false" >> $GITHUB_OUTPUT |
| fi |
|
|
| |
| |
| MAIN_PKG_FILES=$(echo "$CHANGED_FILES" | grep -E "^(python/sglang/|python/pyproject\.toml|scripts/ci/cuda/|scripts/ci/utils/|test/|\.github/workflows/pr-test\.yml)" | grep -v "^python/sglang/multimodal_gen/" || true) |
| if [ -n "$MAIN_PKG_FILES" ]; then |
| echo "main_package=true" >> $GITHUB_OUTPUT |
| echo "Detected main_package changes" |
| else |
| echo "main_package=false" >> $GITHUB_OUTPUT |
| fi |
|
|
| |
| if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/jit_kernel/|python/pyproject\.toml|\.github/workflows/pr-test\.yml)"; then |
| echo "jit_kernel=true" >> $GITHUB_OUTPUT |
| echo "Detected jit_kernel changes" |
| else |
| echo "jit_kernel=false" >> $GITHUB_OUTPUT |
| fi |
|
|
| |
| if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/multimodal_gen/|python/sglang/cli/|python/pyproject\.toml|\.github/workflows/pr-test\.yml)"; then |
| echo "multimodal_gen=true" >> $GITHUB_OUTPUT |
| echo "Detected multimodal_gen changes" |
| else |
| echo "multimodal_gen=false" >> $GITHUB_OUTPUT |
| fi |
|
|
| - name: Set max-parallel based on run type |
| id: set-parallel |
| env: |
| GH_TOKEN: ${{ github.token }} |
| run: | |
| # Scheduled runs and high-priority PRs get full parallelism |
| if [[ "${{ github.event_name }}" == "schedule" ]]; then |
| echo "max_parallel=14" >> $GITHUB_OUTPUT |
| echo "Scheduled run detected, setting max_parallel to 14" |
| elif [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then |
| echo "max_parallel=14" >> $GITHUB_OUTPUT |
| echo "High priority PR detected, setting max_parallel to 14" |
| elif [[ -n "${{ inputs.target_stage }}" ]]; then |
| # /rerun-stage (workflow_dispatch): query PR labels via GitHub API |
| # Try SHA lookup first (fork PRs), fallback to branch name (non-fork PRs) |
| LABELS="" |
| PR_HEAD_SHA="${{ inputs.pr_head_sha }}" |
| if [[ -n "$PR_HEAD_SHA" ]]; then |
| LABELS=$(gh api "repos/${{ github.repository }}/commits/${PR_HEAD_SHA}/pulls" \ |
| --jq '.[0].labels[].name' 2>/dev/null || true) |
| fi |
| if [[ -z "$LABELS" ]]; then |
| LABELS=$(gh pr list --head "${{ github.ref_name }}" --repo "${{ github.repository }}" \ |
| --json labels --jq '.[0].labels[].name' 2>/dev/null || true) |
| fi |
| echo "PR labels: ${LABELS:-"(none)"}" |
| if echo "$LABELS" | grep -Fxq "high priority"; then |
| echo "max_parallel=14" >> $GITHUB_OUTPUT |
| echo "High priority PR detected via API (/rerun-stage), setting max_parallel to 14" |
| else |
| echo "max_parallel=3" >> $GITHUB_OUTPUT |
| echo "Using default max_parallel of 3 (/rerun-stage, no high priority label)" |
| fi |
| else |
| echo "max_parallel=3" >> $GITHUB_OUTPUT |
| echo "Using default max_parallel of 3" |
| fi |
| |
| - name: Set B200 runner tag |
| id: set-runner |
| run: | |
| # Use kernel-build runner only when sgl_kernel changes are detected AND we're not in target_stage mode |
| # (target_stage skips wheel builds, so we can't use custom kernels) |
| # Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter) |
| sgl_kernel="${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel || steps.run-mode.outputs.run_all_tests }}" |
| target_stage="${{ inputs.target_stage }}" |
| if [[ "$sgl_kernel" == "true" && -z "$target_stage" ]]; then |
| echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT |
| else |
| echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT |
| fi |
| |
| - name: Enable retry for CI |
| id: set-retry |
| run: | |
| echo "enable_retry=true" >> $GITHUB_OUTPUT |
| echo "Retry logic enabled for CI" |
| |
| - name: Set continue-on-error for full test runs |
| id: set-continue-on-error |
| run: | |
| if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" || "${{ inputs.force_continue_on_error }}" == "true" ]]; then |
| echo "continue_on_error=true" >> $GITHUB_OUTPUT |
| echo "Full test run or force flag detected, enabling continue-on-error to run all tests" |
| else |
| echo "continue_on_error=false" >> $GITHUB_OUTPUT |
| echo "Filtered run, continue-on-error disabled" |
| fi |
| |
| - name: Validate target_stage with kernel changes |
| |
| if: inputs.target_stage && (steps.filter-api.outputs.sgl_kernel == 'true' || steps.filter.outputs.sgl_kernel == 'true') |
| run: | |
| echo "::error::Cannot use /rerun-stage when PR has sgl-kernel changes." |
| echo "::error::The sgl-kernel-build-wheels job is skipped in target_stage mode, but this PR modifies sgl-kernel/ files." |
| echo "::error::Please use /tag-and-rerun-ci to run the full workflow including kernel builds." |
| echo "" |
| echo "ERROR: Cannot use /rerun-stage when PR has sgl-kernel changes." |
| echo "" |
| echo "This PR modifies files in sgl-kernel/, which requires building custom kernel wheels." |
| echo "The /rerun-stage command skips the wheel build job, so the test would run against" |
| echo "the wrong (PyPI) version of sgl-kernel instead of your changes." |
| echo "" |
| echo "To properly test your kernel changes, use one of these commands instead:" |
| echo " /tag-and-rerun-ci - Re-run the full workflow including kernel builds" |
| echo " /rerun-ci - Re-run the full workflow" |
| echo "" |
| exit 1 |
| |
| - name: Show filter results in summary (table) |
| run: | |
| { |
| echo "## Change Detection" |
| echo "" |
| echo "| Component | Changed |" |
| echo "|-------------------|---------|" |
| echo "| main_package | ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |" |
| echo "| sgl_kernel (raw) | ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} |" |
| echo "| sgl_kernel (used) | ${{ !inputs.target_stage && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} |" |
| echo "| jit_kernel | ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |" |
| echo "| multimodal_gen | ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |" |
| echo "| target_stage | ${{ inputs.target_stage || '(none)' }} |" |
| echo "| detection_method | ${{ inputs.target_stage && 'GitHub API' || 'dorny/paths-filter' }} |" |
| echo "| max_parallel | ${{ steps.set-parallel.outputs.max_parallel }} |" |
| echo "| b200_runner | ${{ steps.set-runner.outputs.b200_runner }} |" |
| echo "| enable_retry | ${{ steps.set-retry.outputs.enable_retry }} |" |
| echo "| continue_on_error | ${{ steps.set-continue-on-error.outputs.continue_on_error }} |" |
| } >> $GITHUB_STEP_SUMMARY |
| |
| |
| |
| |
| |
|
|
| wait-for-stage-a: |
| needs: [check-changes, call-gate] |
| |
| |
| |
| if: | |
| always() && |
| !cancelled() && |
| github.event_name == 'pull_request' && |
| !inputs.target_stage && |
| inputs.test_parallel_dispatch != true && |
| (needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') && |
| (needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped') |
| runs-on: ubuntu-latest |
| outputs: |
| stage_a_result: ${{ steps.wait.outputs.result }} |
| steps: |
| - name: Wait for stage-a-test-1 to complete |
| id: wait |
| uses: actions/github-script@v7 |
| with: |
| script: | |
| const maxWaitMinutes = 240; |
| const pollIntervalSeconds = 120; // 2 minutes to reduce GH API calls |
| const maxAttempts = (maxWaitMinutes * 60) / pollIntervalSeconds; |
| |
| for (let attempt = 0; attempt < maxAttempts; attempt++) { |
| const jobs = await github.paginate(github.rest.actions.listJobsForWorkflowRun, { |
| owner: context.repo.owner, |
| repo: context.repo.repo, |
| run_id: context.runId, |
| per_page: 100, |
| }); |
|
|
| const stageAJob = jobs.find(job => job.name === 'stage-a-test-1'); |
|
|
| if (stageAJob) { |
| console.log(`stage-a-test-1 status: ${stageAJob.status}, conclusion: ${stageAJob.conclusion}`); |
|
|
| if (stageAJob.status === 'completed') { |
| if (stageAJob.conclusion === 'success' || stageAJob.conclusion === 'skipped') { |
| core.setOutput('result', stageAJob.conclusion === 'success' ? 'success' : 'skipped'); |
| return; |
| } else { |
| core.setOutput('result', 'failure'); |
| core.setFailed(`stage-a-test-1 ${stageAJob.conclusion}`); |
| return; |
| } |
| } |
| } else { |
| console.log('stage-a-test-1 job not found yet'); |
| } |
|
|
| console.log(`Waiting ${pollIntervalSeconds}s... (attempt ${attempt + 1}/${maxAttempts})`); |
| await new Promise(resolve => setTimeout(resolve, pollIntervalSeconds * 1000)); |
| } |
|
|
| core.setFailed('Timeout waiting for stage-a-test-1'); |
| core.setOutput('result', 'timeout'); |
|
|
| wait-for-stage-b: |
| needs: [check-changes, call-gate, wait-for-stage-a] |
| |
| |
| if: | |
| always() && |
| !cancelled() && |
| github.event_name == 'pull_request' && |
| !inputs.target_stage && |
| inputs.test_parallel_dispatch != true && |
| (needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') && |
| (needs.wait-for-stage-a.result == 'success' || needs.wait-for-stage-a.result == 'skipped') && |
| (needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped') |
| runs-on: ubuntu-latest |
| outputs: |
| stage_b_result: ${{ steps.wait.outputs.result }} |
| steps: |
| - name: Wait for stage-b jobs to complete |
| id: wait |
| uses: actions/github-script@v7 |
| with: |
| script: | |
| const maxWaitMinutes = 480; |
| const pollIntervalSeconds = 120; // 2 minutes to reduce GH API calls |
| const maxAttempts = (maxWaitMinutes * 60) / pollIntervalSeconds; |
| |
| // Stage-b jobs to wait for |
| const stageBJobs = [ |
| { prefix: 'stage-b-test-small-1-gpu', expectedCount: 8 }, // partitions 0-7 |
| { prefix: 'stage-b-test-large-1-gpu', expectedCount: 14 }, // partitions 0-13 |
| { prefix: 'stage-b-test-large-2-gpu', expectedCount: 4 }, // partitions 0-3 |
| { prefix: 'stage-b-test-4-gpu-b200', expectedCount: 1 }, |
| ]; |
| const totalExpectedJobs = stageBJobs.reduce((sum, j) => sum + j.expectedCount, 0); // 27 total |
|
|
| // Helper to match job names exactly (prefix alone or prefix + " (N)" for matrix jobs) |
| const matchesPrefix = (jobName, prefix) => { |
| return jobName === prefix || jobName.startsWith(prefix + ' ('); |
| }; |
|
|
| for (let attempt = 0; attempt < maxAttempts; attempt++) { |
| const jobs = await github.paginate(github.rest.actions.listJobsForWorkflowRun, { |
| owner: context.repo.owner, |
| repo: context.repo.repo, |
| run_id: context.runId, |
| per_page: 100, |
| }); |
|
|
| let allCompleted = true; |
| let anyFailed = false; |
| let failedJobs = []; |
| let completedCount = 0; |
| let totalCount = 0; |
|
|
| for (const { prefix, expectedCount } of stageBJobs) { |
| const matchingJobs = jobs.filter(job => matchesPrefix(job.name, prefix)); |
|
|
| // Check existing jobs for failures first (fail fast) |
| for (const job of matchingJobs) { |
| totalCount++; |
| console.log(`${job.name}: status=${job.status}, conclusion=${job.conclusion}`); |
|
|
| if (job.status !== 'completed') { |
| allCompleted = false; |
| } else { |
| completedCount++; |
| if (job.conclusion !== 'success' && job.conclusion !== 'skipped') { |
| anyFailed = true; |
| failedJobs.push(job.name); |
| } |
| } |
| } |
|
|
| if (matchingJobs.length < expectedCount) { |
| console.log(`${prefix}: found ${matchingJobs.length}/${expectedCount} jobs (waiting for more)`); |
| allCompleted = false; |
| } |
| } |
|
|
| console.log(`Progress: ${completedCount}/${totalCount} jobs completed (expected ${totalExpectedJobs})`); |
|
|
| // Fail fast if any jobs failed (don't wait for all jobs to be created) |
| if (anyFailed) { |
| core.setOutput('result', 'failure'); |
| core.setFailed(`Stage-b jobs failed: ${failedJobs.join(', ')}`); |
| return; |
| } |
| |
| if (allCompleted && totalCount >= totalExpectedJobs) { |
| core.setOutput('result', 'success'); |
| return; |
| } |
|
|
| console.log(`Waiting ${pollIntervalSeconds}s... (attempt ${attempt + 1}/${maxAttempts})`); |
| await new Promise(resolve => setTimeout(resolve, pollIntervalSeconds * 1000)); |
| } |
|
|
| core.setFailed('Timeout waiting for stage-b jobs'); |
| core.setOutput('result', 'timeout'); |
|
|
| |
| call-gate: |
| needs: check-changes |
| |
| if: | |
| github.event_name != 'schedule' && |
| inputs.test_parallel_dispatch != true && |
| !inputs.target_stage && |
| ( |
| needs.check-changes.outputs.main_package == 'true' || |
| needs.check-changes.outputs.sgl_kernel == 'true' || |
| needs.check-changes.outputs.jit_kernel == 'true' || |
| needs.check-changes.outputs.multimodal_gen == 'true' |
| ) |
| uses: ./.github/workflows/pr-gate.yml |
| secrets: inherit |
|
|
| |
|
|
| sgl-kernel-build-wheels: |
| needs: [check-changes, call-gate] |
| |
| if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true' |
| runs-on: x64-kernel-build-node |
| timeout-minutes: 240 |
| strategy: |
| matrix: |
| include: |
| - python-version: "3.10" |
| cuda-version: "12.9" |
| |
| |
| |
| name: Build Wheel |
| steps: |
| - name: Cleanup |
| run: | |
| sudo rm -rf $GITHUB_WORKSPACE/* || true |
| |
| - uses: actions/checkout@v4 |
| with: |
| submodules: "recursive" |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Set up Python ${{ matrix.python-version }} |
| uses: actions/setup-python@v5 |
| with: |
| python-version: ${{ matrix.python-version }} |
|
|
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} |
| run: | |
| cd sgl-kernel |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" |
| env: |
| USE_CCACHE: 1 |
|
|
| - name: Verify wheel artifacts |
| run: | |
| ls -alh sgl-kernel/dist |
| ls -alh sgl-kernel/dist/*.whl |
| |
| - name: Upload artifacts |
| uses: actions/upload-artifact@v4 |
| with: |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} |
| path: sgl-kernel/dist/* |
| if-no-files-found: error |
|
|
| sgl-kernel-build-wheels-arm: |
| needs: [check-changes, call-gate] |
| |
| if: github.event_name != 'schedule' && inputs.test_parallel_dispatch != true && !inputs.target_stage && needs.check-changes.outputs.sgl_kernel == 'true' |
| runs-on: arm-kernel-build-node |
| timeout-minutes: 240 |
| strategy: |
| matrix: |
| include: |
| - python-version: "3.10" |
| cuda-version: "12.9" |
| name: Build Wheel Arm |
| steps: |
| - name: Cleanup |
| run: | |
| if [ -d "$GITHUB_WORKSPACE" ]; then |
| sudo rm -rf "$GITHUB_WORKSPACE"/* || true |
| else |
| echo "$GITHUB_WORKSPACE does not exist, nothing to clean" |
| fi |
| |
| - uses: actions/checkout@v4 |
| with: |
| submodules: "recursive" |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Set up Python ${{ matrix.python-version }} |
| uses: actions/setup-python@v5 |
| with: |
| python-version: ${{ matrix.python-version }} |
|
|
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} |
| run: | |
| cd sgl-kernel |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" |
| env: |
| USE_CCACHE: 1 |
|
|
| - name: Verify wheel artifacts |
| run: | |
| ls -alh sgl-kernel/dist |
| ls -alh sgl-kernel/dist/*.whl |
| |
| - name: Upload artifacts |
| uses: actions/upload-artifact@v4 |
| with: |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64 |
| path: sgl-kernel/dist/* |
| if-no-files-found: error |
|
|
| sgl-kernel-unit-test: |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] |
| |
| if: | |
| github.event_name != 'schedule' && |
| inputs.test_parallel_dispatch != true && |
| !inputs.target_stage && |
| needs.check-changes.outputs.sgl_kernel == 'true' |
| runs-on: 1-gpu-runner |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 1-gpu-runner |
| steps: |
| - uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Cleanup |
| run: | |
| ls -alh sgl-kernel/dist || true |
| rm -rf sgl-kernel/dist/* || true |
| |
| - name: Download artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion |
| |
| - name: Run test |
| timeout-minutes: 30 |
| run: | |
| cd sgl-kernel |
| pytest tests/ |
| |
| sgl-kernel-mla-test: |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] |
| |
| if: | |
| github.event_name != 'schedule' && |
| inputs.test_parallel_dispatch != true && |
| !inputs.target_stage && |
| needs.check-changes.outputs.sgl_kernel == 'true' |
| runs-on: 1-gpu-runner |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 1-gpu-runner |
| steps: |
| - uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Cleanup |
| run: | |
| ls -alh sgl-kernel/dist || true |
| rm -rf sgl-kernel/dist/* || true |
| |
| - name: Download artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh |
| |
| - name: Run test |
| timeout-minutes: 30 |
| run: | |
| cd test/registered/mla |
| python3 test_mla_deepseek_v3.py |
| |
| sgl-kernel-benchmark-test: |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] |
| |
| if: | |
| github.event_name != 'schedule' && |
| inputs.test_parallel_dispatch != true && |
| !inputs.target_stage && |
| needs.check-changes.outputs.sgl_kernel == 'true' |
| runs-on: 1-gpu-runner |
| timeout-minutes: 240 |
| env: |
| CI: true |
| RUNNER_LABELS: 1-gpu-runner |
| steps: |
| - uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Cleanup |
| run: | |
| ls -alh sgl-kernel/dist || true |
| rm -rf sgl-kernel/dist/* || true |
| |
| - name: Download artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh |
| |
| - name: Run benchmark tests |
| timeout-minutes: 45 |
| run: | |
| cd sgl-kernel/benchmark |
| echo "Running sgl-kernel benchmark tests in CI mode..." |
| |
| echo "CI environment variable: $CI" |
| echo "GITHUB_ACTIONS environment variable: $GITHUB_ACTIONS" |
|
|
| for bench_file in bench_*.py; do |
| echo "Testing $bench_file..." |
| timeout 60 python3 "$bench_file" || echo "Warning: $bench_file timed out or failed, continuing..." |
| echo "Completed $bench_file" |
| echo "---" |
| done |
|
|
| echo "All benchmark tests completed!" |
|
|
| sgl-kernel-b200-test: |
| needs: [check-changes, sgl-kernel-build-wheels] |
| |
| if: | |
| github.event_name != 'schedule' && |
| inputs.test_parallel_dispatch != true && |
| !inputs.target_stage && |
| needs.check-changes.outputs.sgl_kernel == 'true' |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} |
| steps: |
| - uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Cleanup |
| run: | |
| ls -alh sgl-kernel/dist || true |
| rm -rf sgl-kernel/dist/* || true |
| |
| - name: Download artifacts |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh diffusion |
| |
| - name: Run sgl-kernel unit tests on B200 |
| timeout-minutes: 30 |
| run: | |
| cd sgl-kernel |
| pytest tests/ |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
|
|
| jit-kernel-unit-test: |
| needs: [check-changes, call-gate] |
| |
| if: | |
| github.event_name != 'schedule' && |
| inputs.test_parallel_dispatch != true && |
| !inputs.target_stage && |
| needs.check-changes.outputs.jit_kernel == 'true' |
| runs-on: 1-gpu-runner |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 1-gpu-runner |
| steps: |
| - uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| bash scripts/ci/cuda/ci_install_dependency.sh |
| |
| - name: Run test |
| timeout-minutes: 30 |
| run: | |
| cd python/sglang/jit_kernel |
| pytest tests/ |
| |
| |
|
|
| stage-a-test-1: |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-a-test-1') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: 1-gpu-runner |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 1-gpu-runner |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh |
| |
| - name: Run test |
| timeout-minutes: 10 |
| run: | |
| cd test/ |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cuda --suite stage-a-test-1 $CONTINUE_ON_ERROR_FLAG |
| # temporarily put backend-independent cpu tests here |
| python3 run_suite.py --hw cpu --suite default $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
|
|
| stage-a-cpu-only: |
| needs: [check-changes, call-gate] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-a-cpu-only') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| (needs.check-changes.outputs.main_package == 'true') |
| ) |
| ) |
| runs-on: ubuntu-latest |
| timeout-minutes: 240 |
| steps: |
| - name: Free disk space |
| run: | |
| sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc |
| df -h |
| |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Set up Python |
| uses: actions/setup-python@v5 |
| with: |
| python-version: '3.10' |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| pip install -e "python/[dev]" |
| |
| - name: Run test |
| timeout-minutes: 10 |
| run: | |
| cd test/ |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cpu --suite stage-a-cpu-only $CONTINUE_ON_ERROR_FLAG |
| |
| |
| stage-b-test-small-1-gpu: |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-b-test-small-1-gpu') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: 1-gpu-5090 |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 1-gpu-5090 |
| IS_BLACKWELL: "1" |
| strategy: |
| fail-fast: false |
| max-parallel: 8 |
| matrix: |
| partition: [0, 1, 2, 3, 4, 5, 6, 7] |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| source /etc/profile.d/sglang-ci.sh |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh |
| git clone https://github.com/merrymercy/human-eval.git |
| cd human-eval |
| pip install -e . --no-build-isolation |
| |
| - name: Run test |
| timeout-minutes: 30 |
| run: | |
| source /etc/profile.d/sglang-ci.sh |
| cd test/ |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cuda --suite stage-b-test-small-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 8 $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
| with: |
| artifact-suffix: ${{ matrix.partition }} |
|
|
| |
| stage-b-test-large-1-gpu: |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-b-test-large-1-gpu') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: 1-gpu-runner |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 1-gpu-runner |
| strategy: |
| fail-fast: false |
| max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }} |
| matrix: |
| partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh |
| |
| - name: Run test |
| timeout-minutes: 30 |
| run: | |
| cd test/ |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cuda --suite stage-b-test-large-1-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
| with: |
| artifact-suffix: ${{ matrix.partition }} |
|
|
| stage-b-test-large-2-gpu: |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-b-test-large-2-gpu') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: 2-gpu-runner |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 2-gpu-runner |
| strategy: |
| fail-fast: false |
| matrix: |
| partition: [0, 1, 2, 3] |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh |
| git clone https://github.com/merrymercy/human-eval.git |
| cd human-eval |
| pip install -e . --no-build-isolation |
| |
| - name: Run test |
| timeout-minutes: 30 |
| run: | |
| cd test/ |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cuda --suite stage-b-test-large-2-gpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
| with: |
| artifact-suffix: ${{ matrix.partition }} |
|
|
| stage-b-test-4-gpu-b200: |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-b-test-4-gpu-b200') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} |
| strategy: |
| fail-fast: false |
|
|
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v6 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh |
| |
| - name: Run test |
| timeout-minutes: 30 |
| run: | |
| cd test |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-b-test-4-gpu-b200 $CONTINUE_ON_ERROR_FLAG |
| |
| - name: Run FA4 jit_kernel tests (SM100+) |
| timeout-minutes: 10 |
| run: | |
| IS_BLACKWELL=1 python3 -m pytest -q python/sglang/jit_kernel/tests/test_flash_attention_4.py |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
|
|
| multimodal-gen-test-1-gpu: |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'multimodal-gen-test-1-gpu') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| needs.check-changes.outputs.multimodal_gen == 'true' |
| ) |
| ) |
| runs-on: 1-gpu-runner |
| timeout-minutes: 240 |
| strategy: |
| fail-fast: false |
| matrix: |
| part: [0, 1] |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion |
| - name: Run diffusion server tests |
| timeout-minutes: 240 |
| env: |
| RUNAI_STREAMER_MEMORY_LIMIT: 0 |
| run: | |
| cd python |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 sglang/multimodal_gen/test/run_suite.py \ |
| --suite 1-gpu \ |
| --partition-id ${{ matrix.part }} \ |
| --total-partitions 2 \ |
| $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
| with: |
| artifact-suffix: ${{ matrix.part }} |
|
|
| multimodal-gen-test-2-gpu: |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'multimodal-gen-test-2-gpu') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| needs.check-changes.outputs.multimodal_gen == 'true' |
| ) |
| ) |
| runs-on: 2-gpu-runner |
| timeout-minutes: 240 |
| strategy: |
| fail-fast: false |
| matrix: |
| part: [0, 1] |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh diffusion |
| |
| - name: Run diffusion server tests |
| timeout-minutes: 240 |
| env: |
| RUNAI_STREAMER_MEMORY_LIMIT: 0 |
| run: | |
| cd python |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 sglang/multimodal_gen/test/run_suite.py \ |
| --suite 2-gpu \ |
| --partition-id ${{ matrix.part }} \ |
| --total-partitions 2 \ |
| $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
| with: |
| artifact-suffix: ${{ matrix.part }} |
|
|
| stage-c-test-4-gpu-h100: |
| needs: [check-changes, call-gate, wait-for-stage-b] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-c-test-4-gpu-h100') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: 4-gpu-h100 |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 4-gpu-h100 |
| strategy: |
| fail-fast: false |
| matrix: |
| part: [0, 1, 2] |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh |
| |
| - name: Run test |
| timeout-minutes: 20 |
| run: | |
| cd test |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-h100 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
| with: |
| artifact-suffix: ${{ matrix.part }} |
|
|
| stage-c-test-8-gpu-h200: |
| needs: [check-changes, call-gate, wait-for-stage-b] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-c-test-8-gpu-h200') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: 8-gpu-h200 |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 8-gpu-h200 |
| strategy: |
| fail-fast: false |
| matrix: |
| part: [0, 1, 2, 3] |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh |
| |
| - name: Warmup DeepGEMM JIT Compilation |
| timeout-minutes: 25 |
| run: | |
| python3 scripts/ci/cuda/warmup_deep_gemm.py \ |
| deepseek-ai/DeepSeek-V3-0324:8 \ |
| deepseek-ai/DeepSeek-V3.2-Exp:8 |
| |
| - name: Warmup Server CUDA Graphs |
| timeout-minutes: 25 |
| run: | |
| python3 scripts/ci/cuda/warmup_server.py \ |
| deepseek-ai/DeepSeek-V3-0324:8 \ |
| inclusionAI/Ring-2.5-1T:8 |
| |
| - name: Run test |
| timeout-minutes: 30 |
| run: | |
| cd test |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
| with: |
| artifact-suffix: ${{ matrix.part }} |
|
|
| stage-c-test-8-gpu-h20: |
| needs: [check-changes, call-gate, wait-for-stage-b] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-c-test-8-gpu-h20') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: 8-gpu-h20 |
| timeout-minutes: 240 |
| env: |
| SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4" |
| RUNNER_LABELS: 8-gpu-h20 |
| strategy: |
| fail-fast: false |
| matrix: |
| part: [0, 1] |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh |
| |
| - name: Run test |
| timeout-minutes: 20 |
| run: | |
| cd test |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
| with: |
| artifact-suffix: ${{ matrix.part }} |
|
|
| stage-c-test-deepep-4-gpu: |
| needs: [check-changes, call-gate, wait-for-stage-b] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-c-test-deepep-4-gpu') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: 4-gpu-h100 |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 4-gpu-h100 |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh |
| |
| - name: Warmup DeepGEMM JIT Compilation |
| timeout-minutes: 25 |
| run: | |
| python3 scripts/ci/cuda/warmup_deep_gemm.py \ |
| lmsys/sglang-ci-dsv3-test:4 |
| |
| - name: Warmup Server CUDA Graphs |
| timeout-minutes: 25 |
| run: | |
| python3 scripts/ci/cuda/warmup_server.py \ |
| lmsys/sglang-ci-dsv3-test:4 |
| |
| - name: Run test |
| timeout-minutes: 20 |
| run: | |
| cd test |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cuda --suite stage-c-test-deepep-4-gpu $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
|
|
| stage-c-test-deepep-8-gpu-h200: |
| needs: [check-changes, call-gate, wait-for-stage-b] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-c-test-deepep-8-gpu-h200') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: 8-gpu-h200 |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: 8-gpu-h200 |
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v4 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh |
| |
| - name: Warmup DeepGEMM JIT Compilation |
| timeout-minutes: 25 |
| run: | |
| python3 scripts/ci/cuda/warmup_deep_gemm.py \ |
| deepseek-ai/DeepSeek-V3-0324:8 \ |
| deepseek-ai/DeepSeek-V3.2-Exp:8 |
| |
| - name: Warmup Server CUDA Graphs |
| timeout-minutes: 25 |
| run: | |
| python3 scripts/ci/cuda/warmup_server.py \ |
| deepseek-ai/DeepSeek-V3-0324:8 |
| |
| - name: Run test |
| timeout-minutes: 45 |
| run: | |
| cd test |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| python3 run_suite.py --hw cuda --suite stage-c-test-deepep-8-gpu-h200 $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
|
|
| stage-c-test-4-gpu-b200: |
| needs: [check-changes, call-gate, wait-for-stage-b] |
| if: | |
| always() && |
| ( |
| (inputs.target_stage == 'stage-c-test-4-gpu-b200') || |
| ( |
| !inputs.target_stage && |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) |
| ) |
| ) |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} |
| timeout-minutes: 240 |
| env: |
| RUNNER_LABELS: ${{ needs.check-changes.outputs.b200_runner }} |
| strategy: |
| fail-fast: false |
| matrix: |
| part: [0, 1, 2] |
|
|
| steps: |
| - name: Checkout code |
| uses: actions/checkout@v4 |
| with: |
| ref: ${{ inputs.pr_head_sha || inputs.ref || github.sha }} |
|
|
| - name: Download artifacts |
| if: needs.check-changes.outputs.sgl_kernel == 'true' |
| uses: actions/download-artifact@v6 |
| with: |
| path: sgl-kernel/dist/ |
| merge-multiple: true |
| pattern: wheel-python3.10-cuda12.9 |
|
|
| - name: Install dependencies |
| timeout-minutes: 20 |
| run: | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/cuda/ci_install_dependency.sh |
| |
| - name: Run test |
| timeout-minutes: 30 |
| run: | |
| cd test |
| CONTINUE_ON_ERROR_FLAG="" |
| if [[ "${{ needs.check-changes.outputs.continue_on_error }}" == "true" ]]; then |
| CONTINUE_ON_ERROR_FLAG="--continue-on-error" |
| fi |
| IS_BLACKWELL=1 python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG |
| |
| - uses: ./.github/actions/upload-cuda-coredumps |
| if: always() |
| with: |
| artifact-suffix: ${{ matrix.part }} |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| pr-test-finish: |
| needs: |
| [ |
| call-gate, |
| check-changes, |
|
|
| sgl-kernel-build-wheels, |
| sgl-kernel-unit-test, |
| sgl-kernel-mla-test, |
| sgl-kernel-benchmark-test, |
| sgl-kernel-b200-test, |
|
|
| wait-for-stage-a, |
| wait-for-stage-b, |
|
|
| jit-kernel-unit-test, |
|
|
| multimodal-gen-test-1-gpu, |
| multimodal-gen-test-2-gpu, |
|
|
| stage-a-test-1, |
| stage-a-cpu-only, |
| stage-b-test-small-1-gpu, |
| stage-b-test-large-1-gpu, |
| stage-b-test-large-2-gpu, |
| stage-b-test-4-gpu-b200, |
| stage-c-test-4-gpu-h100, |
| stage-c-test-8-gpu-h20, |
| stage-c-test-8-gpu-h200, |
| stage-c-test-deepep-4-gpu, |
| stage-c-test-deepep-8-gpu-h200, |
| stage-c-test-4-gpu-b200, |
| |
| ] |
| if: always() |
| runs-on: ubuntu-latest |
| steps: |
| - name: Check all dependent job statuses |
| run: | |
| # Convert the 'needs' context to a JSON string |
| json_needs='${{ toJson(needs) }}' |
| |
| |
| job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]') |
|
|
| for job in $job_names; do |
| |
| result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result') |
|
|
| |
| echo "$job: $result" |
|
|
| |
| if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then |
| echo "The above jobs failed." |
| exit 1 |
| fi |
| done |
| |
| echo "All jobs completed successfully" |
| exit 0 |
|
|