import argparse import glob import sys from typing import List import tabulate from sglang.test.ci.ci_register import CIRegistry, HWBackend, collect_tests from sglang.test.ci.ci_utils import run_unittest_files HW_MAPPING = { "cpu": HWBackend.CPU, "cuda": HWBackend.CUDA, "amd": HWBackend.AMD, "npu": HWBackend.NPU, } # Per-commit test suites (run on every PR) PER_COMMIT_SUITES = { HWBackend.CPU: ["default", "stage-a-cpu-only"], HWBackend.AMD: [ "stage-a-test-1-amd", "stage-b-test-small-1-gpu-amd", "stage-b-test-small-1-gpu-amd-nondeterministic", "stage-b-test-small-1-gpu-amd-mi35x", "stage-b-test-large-8-gpu-35x-disaggregation-amd", "stage-b-test-large-1-gpu-amd", "stage-b-test-large-2-gpu-amd", "stage-c-test-large-8-gpu-amd", "stage-c-test-large-8-gpu-amd-mi35x", ], HWBackend.CUDA: [ "stage-a-test-1", "stage-b-test-small-1-gpu", "stage-b-test-large-1-gpu", "stage-b-test-large-2-gpu", "stage-c-test-4-gpu-h100", "stage-c-test-4-gpu-b200", "stage-c-test-4-gpu-gb200", "stage-c-test-deepep-4-gpu", "stage-c-test-8-gpu-h20", "stage-c-test-8-gpu-h200", "stage-c-test-8-gpu-b200", "stage-c-test-deepep-8-gpu-h200", ], HWBackend.NPU: [ "stage-a-test-1", "stage-b-test-1-npu-a2", "stage-b-test-2-npu-a2", "stage-b-test-4-npu-a3", "stage-b-test-16-npu-a3", ], } # Nightly test suites (run nightly, organized by GPU configuration) NIGHTLY_SUITES = { HWBackend.CUDA: [ "nightly-1-gpu", "nightly-2-gpu", "nightly-4-gpu", "nightly-4-gpu-b200", "nightly-8-gpu", "nightly-8-gpu-h200", "nightly-8-gpu-h20", "nightly-8-gpu-b200", "nightly-8-gpu-h200-basic", # Basic tests for large models on H200 "nightly-8-gpu-b200-basic", # Basic tests for large models on B200 "nightly-8-gpu-common", # Common tests that run on both H200 and B200 # Eval and perf suites (2-gpu) "nightly-eval-text-2-gpu", "nightly-eval-vlm-2-gpu", "nightly-perf-text-2-gpu", "nightly-perf-vlm-2-gpu", ], HWBackend.AMD: [ "nightly-amd", "nightly-amd-1-gpu", "nightly-amd-1-gpu-mi35x", "nightly-amd-1-gpu-zimage-turbo", "nightly-amd-8-gpu", "nightly-amd-vlm", # MI35x 8-GPU suite (different model configs) "nightly-amd-8-gpu-mi35x", ], HWBackend.CPU: [], HWBackend.NPU: [ "nightly-1-npu-a3", "nightly-2-npu-a3", "nightly-4-npu-a3", "nightly-8-npu-a3", "nightly-16-npu-a3", ], } def filter_tests( ci_tests: List[CIRegistry], hw: HWBackend, suite: str, nightly: bool = False ) -> List[CIRegistry]: ci_tests = [ t for t in ci_tests if t.backend == hw and t.suite == suite and t.nightly == nightly ] valid_suites = ( NIGHTLY_SUITES.get(hw, []) if nightly else PER_COMMIT_SUITES.get(hw, []) ) if suite not in valid_suites: print( f"Warning: Unknown suite {suite} for backend {hw.name}, nightly={nightly}" ) enabled_tests = [t for t in ci_tests if t.disabled is None] skipped_tests = [t for t in ci_tests if t.disabled is not None] return enabled_tests, skipped_tests def auto_partition(files: List[CIRegistry], rank, size): """ Partition files into size sublists with approximately equal sums of estimated times using a greedy algorithm (LPT heuristic), and return the partition for the specified rank. """ if not files or size <= 0: return [] # Sort files by estimated_time in descending order (LPT heuristic). # Use filename as tie-breaker to ensure deterministic partitioning # regardless of glob ordering. sorted_files = sorted(files, key=lambda f: (-f.est_time, f.filename)) partitions = [[] for _ in range(size)] partition_sums = [0.0] * size # Greedily assign each file to the partition with the smallest current total time for file in sorted_files: min_sum_idx = min(range(size), key=partition_sums.__getitem__) partitions[min_sum_idx].append(file) partition_sums[min_sum_idx] += file.est_time if rank < size: return partitions[rank] return [] def pretty_print_tests( args, ci_tests: List[CIRegistry], skipped_tests: List[CIRegistry] ): hw = HW_MAPPING[args.hw] suite = args.suite nightly = args.nightly if args.auto_partition_size: partition_info = ( f"{args.auto_partition_id + 1}/{args.auto_partition_size} " f"(0-based id={args.auto_partition_id})" ) else: partition_info = "full" headers = ["Hardware", "Suite", "Nightly", "Partition"] rows = [[hw.name, suite, str(nightly), partition_info]] msg = tabulate.tabulate(rows, headers=headers, tablefmt="psql") + "\n" if skipped_tests: msg += f"⚠️ Skipped {len(skipped_tests)} test(s):\n" for t in skipped_tests: reason = t.disabled or "disabled" msg += f" - {t.filename} (reason: {reason})\n" msg += "\n" if len(ci_tests) == 0: msg += f"No tests found for hw={hw.name}, suite={suite}, nightly={nightly}\n" msg += "This is expected during incremental migration. Skipping.\n" else: total_est_time = sum(t.est_time for t in ci_tests) msg += ( f"✅ Enabled {len(ci_tests)} test(s) (est total {total_est_time:.1f}s):\n" ) for t in ci_tests: msg += f" - {t.filename} (est_time={t.est_time})\n" print(msg, flush=True) def run_a_suite(args): hw = HW_MAPPING[args.hw] suite = args.suite nightly = args.nightly auto_partition_id = args.auto_partition_id auto_partition_size = args.auto_partition_size # All tests (per-commit and nightly) are now in registered/ files = [ f for f in glob.glob("registered/**/*.py", recursive=True) if not f.endswith("/conftest.py") and not f.endswith("/__init__.py") ] # Strict: all registered files must have proper registration sanity_check = True all_tests = collect_tests(files, sanity_check=sanity_check) ci_tests, skipped_tests = filter_tests(all_tests, hw, suite, nightly) if auto_partition_size: ci_tests = auto_partition(ci_tests, auto_partition_id, auto_partition_size) pretty_print_tests(args, ci_tests, skipped_tests) # Add extra timeout when retry is enabled timeout = args.timeout_per_file if args.enable_retry: timeout += args.retry_timeout_increase return run_unittest_files( ci_tests, timeout_per_file=timeout, continue_on_error=args.continue_on_error, enable_retry=args.enable_retry, max_attempts=args.max_attempts, retry_wait_seconds=args.retry_wait_seconds, ) def main(): parser = argparse.ArgumentParser( description="Run CI test suites from test/registered/" ) parser.add_argument( "--hw", type=str, choices=HW_MAPPING.keys(), required=True, help="Hardware backend to run tests on.", ) parser.add_argument("--suite", type=str, required=True, help="Test suite to run.") parser.add_argument( "--nightly", action="store_true", help="Run nightly tests instead of per-commit tests.", ) parser.add_argument( "--timeout-per-file", type=int, default=1200, help="The time limit for running one file in seconds (default: 1200).", ) parser.add_argument( "--continue-on-error", action="store_true", default=False, help="Continue running remaining tests even if one fails (default: False, useful for nightly tests).", ) parser.add_argument( "--auto-partition-id", type=int, help="Use auto load balancing. The part id.", ) parser.add_argument( "--auto-partition-size", type=int, help="Use auto load balancing. The number of parts.", ) parser.add_argument( "--enable-retry", action="store_true", default=False, help="Enable smart retry for accuracy/performance assertion failures (not code errors)", ) parser.add_argument( "--max-attempts", type=int, default=2, help="Maximum number of attempts per file including initial run (default: 2)", ) parser.add_argument( "--retry-wait-seconds", type=int, default=60, help="Seconds to wait between retries (default: 60)", ) parser.add_argument( "--retry-timeout-increase", type=int, default=600, help="Additional timeout in seconds when retry is enabled (default: 600)", ) args = parser.parse_args() # Validate auto-partition arguments if (args.auto_partition_id is not None) != (args.auto_partition_size is not None): parser.error( "--auto-partition-id and --auto-partition-size must be specified together." ) if args.auto_partition_size is not None: if args.auto_partition_size <= 0: parser.error("--auto-partition-size must be positive.") if not 0 <= args.auto_partition_id < args.auto_partition_size: parser.error( f"--auto-partition-id must be in range [0, {args.auto_partition_size}), " f"but got {args.auto_partition_id}" ) exit_code = run_a_suite(args) sys.exit(exit_code) if __name__ == "__main__": main()