File size: 3,859 Bytes
61ba51e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import glob
import os
import pickle
import tempfile
import time
import unittest

import requests

from sglang.srt.environ import envs
from sglang.srt.utils import kill_process_tree
from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
from sglang.test.test_utils import (
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
    CustomTestCase,
    popen_launch_server,
)

register_cuda_ci(est_time=40, suite="nightly-1-gpu", nightly=True)
register_amd_ci(est_time=40, suite="nightly-amd-1-gpu", nightly=True)


class TestCrashDump(CustomTestCase):
    crash_dump_folder = None
    MAX_NEW_TOKENS = 4
    NUM_REQUESTS_BEFORE_CRASH = 5

    @classmethod
    def setUpClass(cls):
        cls.crash_dump_folder = tempfile.mkdtemp(prefix="crash_dump_test_")

        with envs.SGLANG_TEST_CRASH_AFTER_STREAM_OUTPUTS.override(
            cls.NUM_REQUESTS_BEFORE_CRASH * cls.MAX_NEW_TOKENS + 10
        ):
            cls.process = popen_launch_server(
                "Qwen/Qwen3-0.6B",
                DEFAULT_URL_FOR_TEST,
                timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
                other_args=[
                    "--crash-dump-folder",
                    cls.crash_dump_folder,
                    "--skip-server-warmup",
                ],
            )

    @classmethod
    def tearDownClass(cls):
        kill_process_tree(cls.process.pid)

    def test_crash_dump_generated(self):
        """Test that crash dump file is generated after server crash."""
        # Send multiple requests to trigger the crash
        for i in range(self.NUM_REQUESTS_BEFORE_CRASH * 2):
            try:
                response = requests.post(
                    DEFAULT_URL_FOR_TEST + "/generate",
                    json={
                        "text": f"Hello, this is request {i}.",
                        "sampling_params": {
                            "max_new_tokens": self.MAX_NEW_TOKENS,
                            "temperature": 0,
                        },
                    },
                    timeout=30,
                )
            except requests.exceptions.RequestException:
                # Connection error expected after crash
                pass

        # Wait for crash dump to be written
        time.sleep(5)

        # Find the crash dump file
        dump_pattern = os.path.join(self.crash_dump_folder, "*", "crash_dump_*.pkl")
        dump_files = glob.glob(dump_pattern)

        # Check that a dump file was created
        self.assertTrue(
            len(dump_files) > 0,
            f"No crash dump file found in {self.crash_dump_folder}. "
            f"Pattern: {dump_pattern}",
        )

        # Read the dump file and verify contents
        dump_file = dump_files[0]
        with open(dump_file, "rb") as f:
            dump_data = pickle.load(f)

        # Verify the dump structure
        self.assertIn("server_args", dump_data)
        self.assertIn("requests", dump_data)

        # Check that there are more than 5 requests in the dump
        requests_list = dump_data["requests"]
        self.assertGreater(
            len(requests_list),
            self.NUM_REQUESTS_BEFORE_CRASH,
            f"Expected more than {self.NUM_REQUESTS_BEFORE_CRASH} requests in dump, but got {len(requests_list)}",
        )

        # Verify each request tuple has the expected structure (obj, out, created_time, finish_time)
        for i, req_tuple in enumerate(requests_list):
            self.assertIsInstance(
                req_tuple,
                tuple,
                f"Request {i} should be a tuple, got {type(req_tuple)}",
            )
            self.assertGreaterEqual(
                len(req_tuple),
                4,
                f"Request {i} tuple should have at least 4 elements",
            )


if __name__ == "__main__":
    unittest.main()