Seg Fault
Hi, I was using your non-conservative config, after some time the deployment died:
[2026-04-27 10:33:37] INFO: 10.26.97.18:52996 - "POST /v1/chat/completions HTTP/1.1" 200 OK
Fatal Python error: Segmentation fault
Thread 0x000074329ffff6c0 (most recent call first):
File "/opt/sglang/python/sglang/srt/utils/watchdog.py", line 147 in _watchdog_once
File "/opt/sglang/python/sglang/srt/utils/watchdog.py", line 127 in _watchdog_thread
File "/usr/lib/python3.12/threading.py", line 1010 in run
File "/usr/lib/python3.12/threading.py", line 1073 in _bootstrap_inner
File "/usr/lib/python3.12/threading.py", line 1030 in _bootstrap
Thread 0x00007433cfffe6c0 (most recent call first):
File "/opt/venv/lib/python3.12/site-packages/torch/_inductor/compile_worker/subproc_pool.py", line 76 in _recv_msg
File "/opt/venv/lib/python3.12/site-packages/torch/_inductor/compile_worker/subproc_pool.py", line 271 in _read_thread
File "/usr/lib/python3.12/threading.py", line 1010 in run
File "/usr/lib/python3.12/threading.py", line 1073 in _bootstrap_inner
File "/usr/lib/python3.12/threading.py", line 1030 in _bootstrap
Thread 0x00007433d3fff6c0 (most recent call first):
File "/usr/lib/python3.12/threading.py", line 359 in wait
File "/usr/lib/python3.12/threading.py", line 655 in wait
File "/opt/venv/lib/python3.12/site-packages/tqdm/_monitor.py", line 60 in run
File "/usr/lib/python3.12/threading.py", line 1073 in _bootstrap_inner
File "/usr/lib/python3.12/threading.py", line 1030 in _bootstrap
Thread 0x00007437cffff6c0 (most recent call first):
File "/usr/lib/python3.12/threading.py", line 359 in wait
File "/usr/lib/python3.12/threading.py", line 655 in wait
File "/opt/venv/lib/python3.12/site-packages/tqdm/_monitor.py", line 60 in run
File "/usr/lib/python3.12/threading.py", line 1073 in _bootstrap_inner
File "/usr/lib/python3.12/threading.py", line 1030 in _bootstrap
Current thread 0x00007452b7937300 (most recent call first):
File "/opt/venv/lib/python3.12/site-packages/torch/cuda/graphs.py", line 139 in replay
File "/opt/sglang/python/sglang/srt/compilation/cuda_piecewise_backend.py", line 205 in call
File ".141", line 814 in forward
File "/opt/venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1790 in _call_impl
File "/opt/venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1779 in _wrapped_call_impl
File "/opt/venv/lib/python3.12/site-packages/torch/fx/graph_module.py", line 447 in call
File "/opt/venv/lib/python3.12/site-packages/torch/fx/graph_module.py", line 949 in call_wrapped
File "/opt/venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 1263 in _fn
File "/opt/venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1790 in _call_impl
File "/opt/venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1779 in _wrapped_call_impl
File "/opt/venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 472 in call
File "/opt/sglang/python/sglang/srt/models/minimax_m2.py", line 882 in forward
File "/opt/venv/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 1024 in compile_wrapper
File "/opt/sglang/python/sglang/srt/compilation/compile.py", line 195 in trampoline
File "/opt/venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1790 in _call_impl
File "/opt/venv/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1779 in _wrapped_call_impl
File "/opt/sglang/python/sglang/srt/models/minimax_m2.py", line 1021 in forward
File "/opt/venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 124 in decorate_context
File "/opt/sglang/python/sglang/srt/model_executor/piecewise_cuda_graph_runner.py", line 779 in replay
File "/opt/sglang/python/sglang/srt/model_executor/model_runner.py", line 2624 in forward_extend
File "/opt/sglang/python/sglang/srt/model_executor/model_runner.py", line 2805 in _forward_raw
File "/opt/sglang/python/sglang/srt/model_executor/model_runner.py", line 2695 in forward
File "/opt/sglang/python/sglang/srt/managers/tp_worker.py", line 469 in forward_batch_generation
File "/opt/sglang/python/sglang/srt/managers/scheduler.py", line 2657 in run_batch
File "/opt/sglang/python/sglang/srt/managers/scheduler.py", line 1337 in event_loop_overlap
File "/opt/venv/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 124 in decorate_context
File "/opt/sglang/python/sglang/srt/managers/scheduler.py", line 3494 in dispatch_event_loop
File "/opt/sglang/python/sglang/srt/managers/scheduler.py", line 1276 in run_event_loop
File "/opt/sglang/python/sglang/srt/managers/scheduler.py", line 3613 in run_scheduler_process
File "/usr/lib/python3.12/multiprocessing/process.py", line 108 in run
File "/usr/lib/python3.12/multiprocessing/process.py", line 314 in _bootstrap
File "/usr/lib/python3.12/multiprocessing/spawn.py", line 135 in _main
File "/usr/lib/python3.12/multiprocessing/spawn.py", line 122 in spawn_main
File "", line 1 in
Extension modules: numpy._core._multiarray_umath, numpy.linalg._umath_linalg, pybase64._pybase64, charset_normalizer.md, charset_normalizer.cd, requests.packages.charset_normalizer.md, requests.packages.chardet.md, requests.packages.charset_normalizer.cd, requests.packages.chardet.cd, multidict._multidict, yarl._quoting_c, propcache._helpers_c, aiohttp._http_writer, aiohttp._http_parser, aiohttp._websocket.mask, aiohttp._websocket.reader_c, frozenlist._frozenlist, torch._C, torch._C._dynamo.autograd_compiler, torch._C._dynamo.eval_frame, torch._C._dynamo.guards, torch._C._dynamo.utils, torch._C._fft, torch._C._linalg, torch._C._nested, torch._C._nn, torch._C._sparse, torch._C._special, psutil._psutil_linux, PIL._imaging, PIL._imagingft, numpy.random._common, numpy.random.bit_generator, numpy.random._bounded_integers, numpy.random._pcg64, numpy.random._generator, numpy.random._mt19937, numpy.random._philox, numpy.random._sfc64, numpy.random.mtrand, cuda.bindings._bindings.cydriver, cuda.bindings.cydriver, cuda.bindings.driver, tvm_ffi.core, markupsafe._speedups, cuda.bindings._bindings.cyruntime_ptds, cuda.bindings._bindings.cyruntime, cuda.bindings.cyruntime, cuda.bindings.runtime, cuda.bindings._bindings.cynvrtc, cuda.bindings.cynvrtc, cuda.bindings.nvrtc, sentencepiece._sentencepiece, regex._regex, yaml._yaml, cuda_utils, _cffi_backend, _cyutility, scipy._cyutility, scipy._lib._ccallback_c, scipy.linalg._fblas, scipy.linalg._flapack, scipy.linalg.cython_lapack, scipy.linalg._cythonized_array_utils, scipy.linalg._solve_toeplitz, scipy.linalg._batched_linalg, scipy.linalg._decomp_lu_cython, scipy.linalg._matfuncs_schur_sqrtm, scipy.linalg._matfuncs_expm, scipy.linalg._linalg_pythran, scipy.linalg.cython_blas, scipy.linalg._decomp_update, scipy.sparse._sparsetools, _csparsetools, scipy.sparse._csparsetools, scipy.sparse.linalg._dsolve._superlu, scipy.sparse.linalg._eigen.arpack._arpacklib, scipy.sparse.linalg._propack, scipy.optimize._group_columns, scipy._lib.messagestream, scipy.optimize._trlib._trlib, scipy.optimize._lbfgsb, _moduleTNC, scipy.optimize._moduleTNC, scipy.optimize._slsqplib, scipy.optimize._minpack, scipy.optimize._lsq.givens_elimination, scipy.optimize._zeros, scipy._lib._uarray._uarray, scipy.special._ufuncs_cxx, scipy.special._ellip_harm_2, scipy.special._special_ufuncs, scipy.special._gufuncs, scipy.special._ufuncs, scipy.special._specfun, scipy.special._comb, scipy.linalg._decomp_interpolative, scipy.optimize._bglu_dense, scipy.optimize._lsap, scipy.spatial._ckdtree, scipy.spatial._qhull, scipy.spatial._voronoi, scipy.spatial._hausdorff, scipy.spatial._distance_wrap, scipy.spatial.transform._rotation_cy, scipy.spatial.transform._rigid_transform_cy, scipy.optimize._direct, sklearn.__check_build._check_build, scipy.integrate._odepack, scipy.integrate._quadpack, scipy.integrate._vode, scipy.integrate._dop, scipy.interpolate._fitpack, scipy.interpolate._dfitpack, scipy.interpolate._dierckx, scipy.interpolate._ppoly, scipy.interpolate._interpnd, scipy.interpolate._rbfinterp_pythran, scipy.interpolate._rgi_cython, scipy.special.cython_special, scipy.stats._stats, scipy.stats._biasedurn, scipy.stats._stats_pythran, scipy.stats._levy_stable.levyst, scipy.stats._ansari_swilk_statistics, scipy.sparse.csgraph._tools, scipy.sparse.csgraph._shortest_path, scipy.sparse.csgraph._traversal, scipy.sparse.csgraph._min_spanning_tree, scipy.sparse.csgraph._flow, scipy.sparse.csgraph._matching, scipy.sparse.csgraph._reordering, scipy.stats._sobol, scipy.stats._qmc_cy, scipy.stats._rcont.rcont, scipy.stats._qmvnt_cy, scipy.ndimage._nd_image, scipy.ndimage._rank_filter_1d, _ni_label, scipy.ndimage._ni_label, pandas._libs._cyutility, pandas._libs.tslibs.ccalendar, pandas._libs.tslibs.np_datetime, pandas._libs.tslibs.dtypes, pandas._libs.tslibs.base, pandas._libs.tslibs.nattype, pandas._libs.tslibs.timezones, pandas._libs.properties, pandas._libs.tslibs.fields, pandas._libs.tslibs.timedeltas, pandas._libs.tslibs.tzconversion, pandas._libs.tslibs.timestamps, pandas._libs.tslibs.offsets, pandas._libs.tslibs.strptime, pandas._libs.tslibs.parsing, pandas._libs.tslibs.conversion, pandas._libs.tslibs.period, pandas._libs.tslibs.vectorized, pandas._libs.ops_dispatch, pandas._libs.missing, pandas._libs.hashtable, pandas._libs.algos, pandas._libs.interval, pandas._libs.lib, pandas._libs.ops, pandas._libs.hashing, pandas._libs.arrays, pandas._libs.tslib, pandas._libs.sparse, pandas._libs.internals, pandas._libs.indexing, pandas._libs.index, pandas._libs.writers, pandas._libs.join, pandas._libs.window.aggregations, pandas._libs.window.indexers, pandas._libs.reshape, pandas._libs.groupby, pandas._libs.json, pandas._libs.parsers, pandas._libs.testing, sklearn._cyutility, sklearn.utils._isfinite, sklearn.utils.sparsefuncs_fast, sklearn.utils.murmurhash, sklearn.utils._openmp_helpers, sklearn.metrics.cluster._expected_mutual_info_fast, sklearn.metrics._dist_metrics, sklearn.metrics._pairwise_distances_reduction._datasets_pair, sklearn.utils._cython_blas, sklearn.metrics._pairwise_distances_reduction._base, sklearn.metrics._pairwise_distances_reduction._middle_term_computer, sklearn.utils._heap, sklearn.utils._sorting, sklearn.metrics._pairwise_distances_reduction._argkmin, sklearn.metrics._pairwise_distances_reduction._argkmin_classmode, sklearn.utils._vector_sentinel, sklearn.metrics._pairwise_distances_reduction._radius_neighbors, sklearn.metrics._pairwise_distances_reduction._radius_neighbors_classmode, sklearn.metrics._pairwise_fast, sklearn.preprocessing._csr_polynomial_expansion, sklearn.preprocessing._target_encoder_fast, zmq.backend.cython._zmq, setproctitle._setproctitle, msgspec._core, _triton_launcher, uvloop.loop (total: 207)
!!!!!!! Segfault encountered !!!!!!!
File "", line 0, in pthread_kill
File "", line 0, in gsignal
File "", line 0, in cuGraphLaunch
File "", line 0, in cudaGraphLaunch
File "", line 0, in at::cuda::CUDAGraph::replay()
File "", line 0, in torch::detail::wrap_pybind_function_impl<void (at::cuda::CUDAGraph::)(), 0ul, true>(void (at::cuda::CUDAGraph::&&)(), std::integer_sequence<unsigned long, 0ul>, std::integral_constant<bool, true>)::{lambda(at::cuda::CUDAGraph&)#1}::operator()(at::cuda::CUDAGraph&) const
File "", line 0, in pybind11::cpp_function::initialize<torch::detail::wrap_pybind_function_impl_<void (at::cuda::CUDAGraph::)(), 0ul, true>(void (at::cuda::CUDAGraph::&&)(), std::integer_sequence<unsigned long, 0ul>, std::integral_constant<bool, true>)::{lambda(at::cuda::CUDAGraph&)#1}, void, at::cuda::CUDAGraph&, pybind11::name, pybind11::is_method, pybind11::sibling>(void (at::cuda::CUDAGraph::&&)(), void ()(at::cuda::CUDAGraph&), pybind11::name const&, pybind11::is_method const&, pybind11::sibling const&)::{lambda(pybind11::detail::function_call&)#1}::_FUN(pybind11::detail::function_call&)
File "", line 0, in pybind11::cpp_function::dispatcher(_object*, _object*, _object*)
File "", line 0, in _PyObject_MakeTpCall
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in PyObject_Vectorcall
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyObject_Call_Prepend
File "", line 0, in _PyObject_MakeTpCall
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyObject_Call_Prepend
File "", line 0, in PyObject_Call
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyObject_Call_Prepend
File "", line 0, in _PyObject_MakeTpCall
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in dynamo_eval_custom_code
File "", line 0, in dynamo__custom_eval_frame::{lambda()#2}::operator()() const
File "", line 0, in dynamo__custom_eval_frame
File "", line 0, in PyObject_Call
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in PyObject_Call
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in PyObject_Call
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in _PyObject_Call_Prepend
File "", line 0, in _PyObject_MakeTpCall
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in PyObject_Call
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in PyObject_Call
File "", line 0, in _PyEval_EvalFrameDefault
File "", line 0, in PyEval_EvalCode
File "", line 0, in PyRun_StringFlags
File "", line 0, in PyRun_SimpleStringFlags
File "", line 0, in Py_RunMain
File "", line 0, in Py_BytesMain
File "", line 0, in _start
File "", line 0, in 0xffffffffffffffff
[rank1]:[W427 10:35:37.826628986 TCPStore.cpp:125] [c10d] recvValue failed on SocketImpl(fd=59, addr=[localhost]:55630, remote=[::ffff:0.0.0.0]:44215): Connection reset by peer
Exception raised from recvBytes at /pytorch/torch/csrc/distributed/c10d/Utils.hpp:679 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits, std::allocator >) + 0x9d (0x7d80a917305d in /opt/venv/lib/python3.12/site-packages/torch/lib/libc10.so)
frame #1: + 0x6a91532 (0x7d80126b2532 in /opt/venv/lib/python3.12/site-packages/torch/lib/libtorch_cpu.so)
frame #2: c10d::TCPStore::check(std::vector<std::__cxx11::basic_string<char, std::char_traits, std::allocator >, std::allocator<std::__cxx11::basic_string<char, std::char_traits, std::allocator > > > const&) + 0x273 (0x7d80126b0413 in /opt/venv/lib/python3.12/site-packages/torch/lib/libtorch_cpu.so)
frame #3: c10d::ProcessGroupNCCL::HeartbeatMonitor::runLoop() + 0x4a5 (0x7d7ff4e9c8c5 in /opt/venv/lib/python3.12/site-packages/torch/lib/libtorch_cuda.so)
frame #4: + 0xecdb4 (0x7d80d8e0edb4 in /lib/x86_64-linux-gnu/libstdc++.so.6)
frame #5: + 0x9caa4 (0x7d80db8a3aa4 in /lib/x86_64-linux-gnu/libc.so.6)
frame #6: __clone + 0x44 (0x7d80db930a64 in /lib/x86_64-linux-gnu/libc.so.6)
[rank1]:[W427 10:35:37.827783417 ProcessGroupNCCL.cpp:1826] [PG ID 0 PG GUID 0 Rank 1] Failed to check the "should dump" flag on TCPStore, (maybe TCPStore server has shut down too early), with error: Connection reset by peer
[2026-04-27 10:35:38] Subprocess scheduler_0 (pid=416) crashed with exit code -11. Triggering SIGQUIT for cleanup...
[2026-04-27 10:35:38] SIGQUIT received. signum=None, frame=None. It usually means one child failed.
[2026-04-27 10:35:38] ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/usr/lib/python3.12/asyncio/runners.py", line 194, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1512, in uvloop.loop.Loop.run_until_complete
[2026-04-27 10:35:38] INFO: 10.26.97.18:56762 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
File "uvloop/loop.pyx", line 1505, in uvloop.loop.Loop.run_until_complete
File "uvloop/loop.pyx", line 1379, in uvloop.loop.Loop.run_forever
File "uvloop/loop.pyx", line 557, in uvloop.loop.Loop._run
File "uvloop/loop.pyx", line 476, in uvloop.loop.Loop._on_idle
File "uvloop/cbhandles.pyx", line 83, in uvloop.loop.Handle._run
File "uvloop/cbhandles.pyx", line 61, in uvloop.loop.Handle._run
File "/opt/sglang/python/sglang/srt/managers/tokenizer_manager.py", line 2557, in running_phase_sigquit_handler
kill_process_tree(os.getpid())
File "/opt/sglang/python/sglang/srt/utils/common.py", line 1062, in kill_process_tree
sys.exit(0)
SystemExit: 0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/venv/lib/python3.12/site-packages/uvicorn/protocols/http/h11_impl.py", line 410, in run_asgi
result = await app( # type: ignore[func-returns-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib/python3.12/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in call
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib/python3.12/site-packages/fastapi/applications.py", line 1163, in call
await super().call(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/applications.py", line 107, in call
await self.middleware_stack(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 164, in call
await self.app(scope, receive, _send)
File "/opt/venv/lib/python3.12/site-packages/starlette/middleware/cors.py", line 87, in call
await self.app(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/middleware/exceptions.py", line 63, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/opt/venv/lib/python3.12/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in call
await self.app(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/routing.py", line 716, in call
await self.middleware_stack(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/routing.py", line 736, in app
await route.handle(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/routing.py", line 290, in handle
await self.app(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/fastapi/routing.py", line 134, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/opt/venv/lib/python3.12/site-packages/fastapi/routing.py", line 120, in app
response = await f(request)
^^^^^^^^^^^^^^^^
File "/opt/venv/lib/python3.12/site-packages/fastapi/routing.py", line 674, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib/python3.12/site-packages/fastapi/routing.py", line 328, in run_endpoint_function
return await dependant.call(**values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/sglang/python/sglang/srt/entrypoints/http_server.py", line 1414, in openai_v1_chat_completions
return await raw_request.app.state.openai_serving_chat.handle_request(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/sglang/python/sglang/srt/entrypoints/openai/serving_base.py", line 107, in handle_request
return await self._handle_non_streaming_request(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/sglang/python/sglang/srt/entrypoints/openai/serving_chat.py", line 895, in _handle_non_streaming_request
ret = await self.tokenizer_manager.generate_request(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/sglang/python/sglang/srt/managers/tokenizer_manager.py", line 517, in generate_request
async for response in self._wait_one_response(obj, state, request):
File "/opt/sglang/python/sglang/srt/managers/tokenizer_manager.py", line 1122, in _wait_one_response
await asyncio.wait_for(
File "/usr/lib/python3.12/asyncio/tasks.py", line 520, in wait_for
return await fut
^^^^^^^^^
File "/usr/lib/python3.12/asyncio/locks.py", line 212, in wait
await fut
asyncio.exceptions.CancelledError
[2026-04-27 10:35:38] ERROR: Traceback (most recent call last):
File "/usr/lib/python3.12/asyncio/runners.py", line 194, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1512, in uvloop.loop.Loop.run_until_complete
File "uvloop/loop.pyx", line 1505, in uvloop.loop.Loop.run_until_complete
File "uvloop/loop.pyx", line 1379, in uvloop.loop.Loop.run_forever
File "uvloop/loop.pyx", line 557, in uvloop.loop.Loop._run
File "uvloop/loop.pyx", line 476, in uvloop.loop.Loop._on_idle
File "uvloop/cbhandles.pyx", line 83, in uvloop.loop.Handle._run
File "uvloop/cbhandles.pyx", line 61, in uvloop.loop.Handle._run
File "/opt/sglang/python/sglang/srt/managers/tokenizer_manager.py", line 2557, in running_phase_sigquit_handler
kill_process_tree(os.getpid())
File "/opt/sglang/python/sglang/srt/utils/common.py", line 1062, in kill_process_tree
sys.exit(0)
SystemExit: 0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/venv/lib/python3.12/site-packages/starlette/routing.py", line 701, in lifespan
await receive()
File "/opt/venv/lib/python3.12/site-packages/uvicorn/lifespan/on.py", line 137, in receive
return await self.receive_queue.get()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/queues.py", line 158, in get
await getter
asyncio.exceptions.CancelledError
[2026-04-27 10:35:38] ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/usr/lib/python3.12/asyncio/runners.py", line 194, in run
return runner.run(main)
^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
return self._loop.run_until_complete(task)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "uvloop/loop.pyx", line 1512, in uvloop.loop.Loop.run_until_complete
File "uvloop/loop.pyx", line 1505, in uvloop.loop.Loop.run_until_complete
File "uvloop/loop.pyx", line 1379, in uvloop.loop.Loop.run_forever
File "uvloop/loop.pyx", line 557, in uvloop.loop.Loop._run
File "uvloop/loop.pyx", line 476, in uvloop.loop.Loop._on_idle
File "uvloop/cbhandles.pyx", line 83, in uvloop.loop.Handle._run
File "uvloop/cbhandles.pyx", line 61, in uvloop.loop.Handle._run
File "/opt/sglang/python/sglang/srt/managers/tokenizer_manager.py", line 2557, in running_phase_sigquit_handler
kill_process_tree(os.getpid())
File "/opt/sglang/python/sglang/srt/utils/common.py", line 1062, in kill_process_tree
sys.exit(0)
SystemExit: 0
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/opt/venv/lib/python3.12/site-packages/uvicorn/protocols/http/h11_impl.py", line 410, in run_asgi
result = await app( # type: ignore[func-returns-value]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib/python3.12/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in call
return await self.app(scope, receive, send)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/venv/lib/python3.12/site-packages/fastapi/applications.py", line 1163, in call
await super().call(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/applications.py", line 107, in call
await self.middleware_stack(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/middleware/errors.py", line 164, in call
await self.app(scope, receive, _send)
File "/opt/venv/lib/python3.12/site-packages/starlette/middleware/cors.py", line 87, in call
await self.app(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/middleware/exceptions.py", line 63, in call
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/opt/venv/lib/python3.12/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in call
await self.app(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/routing.py", line 716, in call
await self.middleware_stack(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/routing.py", line 736, in app
await route.handle(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/routing.py", line 290, in handle
await self.app(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/fastapi/routing.py", line 134, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "/opt/venv/lib/python3.12/site-packages/fastapi/routing.py", line 121, in app
await response(scope, receive, send)
File "/opt/venv/lib/python3.12/site-packages/starlette/responses.py", line 270, in call
async with anyio.create_task_group() as task_group:
File "/opt/venv/lib/python3.12/site-packages/anyio/_backends/_asyncio.py", line 803, in aexit
raise exc_val
File "/opt/venv/lib/python3.12/site-packages/starlette/responses.py", line 277, in call
await wrap(partial(self.listen_for_disconnect, receive))
File "/opt/venv/lib/python3.12/site-packages/starlette/responses.py", line 273, in wrap
await func()
File "/opt/venv/lib/python3.12/site-packages/starlette/responses.py", line 241, in listen_for_disconnect
message = await receive()
^^^^^^^^^^^^^^^
File "/opt/venv/lib/python3.12/site-packages/uvicorn/protocols/http/h11_impl.py", line 538, in receive
await self.message_event.wait()
File "/usr/lib/python3.12/asyncio/locks.py", line 212, in wait
await fut
asyncio.exceptions.CancelledError
run script:
docker run -d
--name sglang-minimax-m27
--publish 80:8000
--gpus '"device=0,1"'
--shm-size=32g
--ipc=host
--ulimit memlock=-1
--ulimit nofile=1048576
-e CUDA_DEVICE_ORDER=PCI_BUS_ID
-e CUDA_VISIBLE_DEVICES=0,1
-e NCCL_IB_DISABLE=1
-e NCCL_P2P_LEVEL=PHB
-e SGLANG_DISABLE_CUDNN_CHECK=1
-e OMP_NUM_THREADS=8
-e SAFETENSORS_FAST_GPU=1
-e HOME=/root
-e HF_HOME=/root/.cache/huggingface
-e HUGGINGFACE_HUB_CACHE=/root/.cache/huggingface/hub
-e TRANSFORMERS_CACHE=/root/.cache/huggingface/hub
-e XDG_CACHE_HOME=/root/.cache/sglang-generated
-e TORCH_EXTENSIONS_DIR=/root/.cache/sglang-generated/torch_extensions
-e TORCHINDUCTOR_CACHE_DIR=/root/.cache/sglang-generated/torchinductor
-e TRITON_CACHE_DIR=/root/.cache/sglang-generated/triton
-e SGLANG_CACHE_DIR=/root/.cache/sglang-generated/sglang
-e SGLANG_DG_CACHE_DIR=/root/.cache/sglang-generated/deep_gemm
-e CUDA_CACHE_PATH=/root/.cache/sglang-generated/nv
-v "$HOME/.cache/huggingface:/root/.cache/huggingface"
-v "$HOME/.cache/sglang-generated:/root/.cache/sglang-generated"
-v /dev/shm:/dev/shm
-v /models/model_weights/MiniMax-M2.7-NVFP4:/models/MiniMax-M2.7-NVFP4:ro
--entrypoint python3
voipmonitor/sglang:cu130
-m sglang.launch_server
--model-path /models/MiniMax-M2.7-NVFP4
--served-model-name minimax-m2.7
--trust-remote-code
--tp 2
--ep 2
--quantization modelopt_fp4
--mem-fraction-static 0.90
--context-length 131072
--max-running-requests 16
--chunked-prefill-size 8192
--kv-cache-dtype fp8_e5m2
--attention-backend fa3
--moe-runner-backend flashinfer_cutlass
--disable-custom-all-reduce
--disable-piecewise-cuda-graph
--enable-flashinfer-allreduce-fusion
--tool-call-parser minimax-m2
--reasoning-parser minimax-append-think
--host 0.0.0.0
--port 8000