| """Visual comparison figure tests — CI sanity check + human verification. |
| |
| This file serves two purposes: |
| 1. CI sanity check: ensures generate_comparison_figure() runs without errors |
| across various tensor scenarios (registered via register_cpu_ci). |
| 2. Human verification: all generated PNGs are copied to /tmp/comparator_manual_verify/ |
| so they can be pulled back to a local machine for visual inspection. |
| |
| Run: |
| python -m pytest test/registered/debug_utils/comparator/test_manually_verify.py -x -v |
| |
| Human verification: |
| After running, images are at /tmp/comparator_manual_verify/. |
| Each test's docstring describes the expected visual appearance. |
| """ |
|
|
| import shutil |
| import sys |
| from pathlib import Path |
|
|
| import pytest |
| import torch |
|
|
| from sglang.test.ci.ci_register import register_cpu_ci |
|
|
| register_cpu_ci(est_time=60, suite="default", nightly=True) |
|
|
| _PUBLISH_DIR: Path = Path("/tmp/comparator_manual_verify") |
| _PNG_MAGIC: bytes = b"\x89PNG" |
|
|
|
|
| @pytest.fixture(scope="session") |
| def publish_dir() -> Path: |
| """Fixed output dir for human inspection — files are copied here after generation.""" |
| if _PUBLISH_DIR.exists(): |
| shutil.rmtree(_PUBLISH_DIR) |
| _PUBLISH_DIR.mkdir(parents=True) |
| return _PUBLISH_DIR |
|
|
|
|
| def _assert_valid_png(path: Path) -> None: |
| assert path.exists(), f"PNG not created: {path}" |
| assert path.stat().st_size > 0, f"PNG is empty: {path}" |
| with open(path, "rb") as f: |
| magic: bytes = f.read(4) |
| assert magic == _PNG_MAGIC, f"Not a valid PNG: {path}" |
|
|
|
|
| def _generate_and_publish( |
| *, |
| baseline: torch.Tensor, |
| target: torch.Tensor, |
| name: str, |
| tmp_path: Path, |
| publish_dir: Path, |
| ) -> Path: |
| from sglang.srt.debug_utils.comparator.visualizer import ( |
| generate_comparison_figure, |
| ) |
|
|
| output_path: Path = tmp_path / f"{name}.png" |
| generate_comparison_figure( |
| baseline=baseline, |
| target=target, |
| name=name, |
| output_path=output_path, |
| ) |
|
|
| _assert_valid_png(output_path) |
| shutil.copy2(src=output_path, dst=publish_dir / output_path.name) |
| return output_path |
|
|
|
|
| @pytest.fixture(autouse=True) |
| def _skip_if_no_matplotlib() -> None: |
| pytest.importorskip("matplotlib") |
|
|
|
|
| class TestBundleDetailsManualVerify: |
| def test_normal_small_diff(self, tmp_path: Path, publish_dir: Path) -> None: |
| """Two nearly-identical tensors (randn + 0.01 noise). |
| |
| Expected: All 6 panel rows visible. Diff heatmap nearly uniform light color. |
| Hist2d tightly clustered along the red diagonal line. |
| """ |
| baseline: torch.Tensor = torch.randn(32, 64) |
| target: torch.Tensor = baseline + torch.randn(32, 64) * 0.01 |
|
|
| _generate_and_publish( |
| baseline=baseline, |
| target=target, |
| name="normal_small_diff", |
| tmp_path=tmp_path, |
| publish_dir=publish_dir, |
| ) |
|
|
| def test_significant_diff(self, tmp_path: Path, publish_dir: Path) -> None: |
| """Two tensors with larger differences (randn + 0.5 noise). |
| |
| Expected: All 6 panel rows visible. Diff heatmap shows noticeable structure. |
| Hist2d scatter is broader, spread away from the diagonal. |
| """ |
| baseline: torch.Tensor = torch.randn(32, 64) |
| target: torch.Tensor = baseline + torch.randn(32, 64) * 0.5 |
|
|
| _generate_and_publish( |
| baseline=baseline, |
| target=target, |
| name="significant_diff", |
| tmp_path=tmp_path, |
| publish_dir=publish_dir, |
| ) |
|
|
| def test_shape_mismatch(self, tmp_path: Path, publish_dir: Path) -> None: |
| """Baseline 32x64, target 16x32 — shapes do not match. |
| |
| Expected: Only 2 panel rows (baseline heatmap, target heatmap). |
| No diff/histogram/hist2d/sampled panels since diff cannot be computed. |
| """ |
| baseline: torch.Tensor = torch.randn(32, 64) |
| target: torch.Tensor = torch.randn(16, 32) |
|
|
| _generate_and_publish( |
| baseline=baseline, |
| target=target, |
| name="shape_mismatch", |
| tmp_path=tmp_path, |
| publish_dir=publish_dir, |
| ) |
|
|
| def test_large_tensor(self, tmp_path: Path, publish_dir: Path) -> None: |
| """4000x4000 tensor — triggers internal downsampling. |
| |
| Expected: Figure renders normally without OOM. Downsampled panels |
| should still look reasonable. |
| """ |
| baseline: torch.Tensor = torch.randn(4000, 4000) |
| target: torch.Tensor = baseline + torch.randn(4000, 4000) * 0.001 |
|
|
| _generate_and_publish( |
| baseline=baseline, |
| target=target, |
| name="large_tensor", |
| tmp_path=tmp_path, |
| publish_dir=publish_dir, |
| ) |
|
|
| def test_1d_tensor(self, tmp_path: Path, publish_dir: Path) -> None: |
| """1D tensor (256,) — internally reshaped to 2D before plotting. |
| |
| Expected: All 6 panel rows visible. The heatmap shape reflects the |
| reshaped 2D form, not the original 1D. |
| """ |
| baseline: torch.Tensor = torch.randn(256) |
| target: torch.Tensor = baseline + 0.01 |
|
|
| _generate_and_publish( |
| baseline=baseline, |
| target=target, |
| name="1d_tensor", |
| tmp_path=tmp_path, |
| publish_dir=publish_dir, |
| ) |
|
|
| def test_constant_tensor(self, tmp_path: Path, publish_dir: Path) -> None: |
| """All-zero baseline, tiny-valued target. |
| |
| Expected: Colorbar range is extremely small. Histogram concentrates in |
| a single bin. No rendering errors from near-zero variance. |
| """ |
| baseline: torch.Tensor = torch.zeros(32, 64) |
| target: torch.Tensor = torch.ones(32, 64) * 1e-8 |
|
|
| _generate_and_publish( |
| baseline=baseline, |
| target=target, |
| name="constant_tensor", |
| tmp_path=tmp_path, |
| publish_dir=publish_dir, |
| ) |
|
|
| def test_extreme_values(self, tmp_path: Path, publish_dir: Path) -> None: |
| """Tensor containing values spanning 1e-10 to 1e10. |
| |
| Expected: Log10 panels handle the wide range gracefully. No inf/nan |
| artifacts in the rendered figure. |
| """ |
| baseline: torch.Tensor = torch.randn(32, 64).abs() |
| baseline[0, 0] = 1e-10 |
| baseline[0, 1] = 1e10 |
| target: torch.Tensor = baseline + torch.randn(32, 64) * 0.01 |
|
|
| _generate_and_publish( |
| baseline=baseline, |
| target=target, |
| name="extreme_values", |
| tmp_path=tmp_path, |
| publish_dir=publish_dir, |
| ) |
|
|
|
|
| class TestPerTokenHeatmapManualVerify: |
| def test_increasing_diff(self, tmp_path: Path, publish_dir: Path) -> None: |
| """Per-token heatmap with linearly increasing diff across token positions. |
| |
| Expected: Heatmap shows a clear left-to-right gradient — dark/cold on |
| the left (small diff), bright/hot on the right (large diff). Multiple |
| rows for different tensor names. Colorbar shows log10 scale. |
| """ |
| from sglang.srt.debug_utils.comparator.output_types import ( |
| ComparisonTensorRecord, |
| ) |
| from sglang.srt.debug_utils.comparator.per_token_visualizer import ( |
| generate_per_token_heatmap, |
| ) |
| from sglang.srt.debug_utils.comparator.tensor_comparator.comparator import ( |
| compare_tensor_pair, |
| ) |
|
|
| torch.manual_seed(42) |
| seq_len: int = 64 |
| hidden_dim: int = 128 |
| num_tensors: int = 5 |
|
|
| records: list[ComparisonTensorRecord] = [] |
| for i in range(num_tensors): |
| baseline: torch.Tensor = torch.randn(seq_len, hidden_dim) |
| noise_scale: torch.Tensor = torch.linspace( |
| 1e-6, 0.5, steps=seq_len |
| ).unsqueeze(1) |
| target: torch.Tensor = baseline + torch.randn_like(baseline) * noise_scale |
|
|
| info = compare_tensor_pair( |
| x_baseline=baseline, |
| x_target=target, |
| name=f"layer_{i}_hidden_states", |
| diff_threshold=1e-3, |
| seq_dim=0, |
| ) |
| records.append(ComparisonTensorRecord(**info.model_dump())) |
|
|
| output_path: Path = tmp_path / "per_token_increasing_diff.png" |
| result = generate_per_token_heatmap(records=records, output_path=output_path) |
|
|
| assert result is not None |
| _assert_valid_png(output_path) |
| shutil.copy2(src=output_path, dst=publish_dir / output_path.name) |
|
|
| def test_single_spike(self, tmp_path: Path, publish_dir: Path) -> None: |
| """Per-token heatmap where only one token position has large diff. |
| |
| Expected: Heatmap shows one bright vertical stripe at the spike position, |
| rest is dark/cold. |
| """ |
| from sglang.srt.debug_utils.comparator.output_types import ( |
| ComparisonTensorRecord, |
| ) |
| from sglang.srt.debug_utils.comparator.per_token_visualizer import ( |
| generate_per_token_heatmap, |
| ) |
| from sglang.srt.debug_utils.comparator.tensor_comparator.comparator import ( |
| compare_tensor_pair, |
| ) |
|
|
| torch.manual_seed(42) |
| seq_len: int = 64 |
| hidden_dim: int = 128 |
| spike_pos: int = 32 |
| num_tensors: int = 4 |
|
|
| records: list[ComparisonTensorRecord] = [] |
| for i in range(num_tensors): |
| baseline: torch.Tensor = torch.randn(seq_len, hidden_dim) |
| target: torch.Tensor = baseline.clone() |
| target[spike_pos, :] += torch.randn(hidden_dim) * 5.0 |
|
|
| info = compare_tensor_pair( |
| x_baseline=baseline, |
| x_target=target, |
| name=f"layer_{i}_attn_output", |
| diff_threshold=1e-3, |
| seq_dim=0, |
| ) |
| records.append(ComparisonTensorRecord(**info.model_dump())) |
|
|
| output_path: Path = tmp_path / "per_token_single_spike.png" |
| result = generate_per_token_heatmap(records=records, output_path=output_path) |
|
|
| assert result is not None |
| _assert_valid_png(output_path) |
| shutil.copy2(src=output_path, dst=publish_dir / output_path.name) |
|
|
|
|
| if __name__ == "__main__": |
| sys.exit(pytest.main([__file__])) |
|
|