Add files using upload-large-folder tool

a402b9b verified about 2 months ago

9.23 kB

	"""
	Usage:
	python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_with_reasoning
	python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_chat_completion_without_reasoning
	python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_with_reasoning
	python3 -m unittest openai_server.features.test_enable_thinking.TestEnableThinking.test_stream_chat_completion_without_reasoning
	"""

	import json
	import unittest

	import requests

	from sglang.srt.utils import kill_process_tree
	from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
	from sglang.test.test_utils import (
	DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST,
	DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
	DEFAULT_URL_FOR_TEST,
	CustomTestCase,
	popen_launch_server,
	)

	register_cuda_ci(est_time=103, suite="stage-b-test-large-1-gpu")
	register_amd_ci(est_time=200, suite="stage-b-test-small-1-gpu-amd")


	class TestEnableThinking(CustomTestCase):
	@classmethod
	def setUpClass(cls):
	cls.model = DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST
	cls.base_url = DEFAULT_URL_FOR_TEST
	cls.api_key = "sk-1234"
	cls.process = popen_launch_server(
	cls.model,
	cls.base_url,
	timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
	api_key=cls.api_key,
	other_args=[
	"--reasoning-parser",
	"qwen3",
	],
	)
	cls.additional_chat_kwargs = {}

	@classmethod
	def tearDownClass(cls):
	kill_process_tree(cls.process.pid)

	def test_chat_completion_with_reasoning(self):
	# Test non-streaming with "enable_thinking": True, reasoning_content should not be empty
	client = requests.post(
	f"{self.base_url}/v1/chat/completions",
	headers={"Authorization": f"Bearer {self.api_key}"},
	json={
	"model": self.model,
	"messages": [{"role": "user", "content": "Hello"}],
	"temperature": 0,
	"separate_reasoning": True,
	"chat_template_kwargs": {"enable_thinking": True},
	**self.additional_chat_kwargs,
	},
	)

	self.assertEqual(client.status_code, 200, f"Failed with: {client.text}")
	data = client.json()

	self.assertIn("choices", data)
	self.assertTrue(len(data["choices"]) > 0)
	self.assertIn("message", data["choices"][0])
	self.assertIn("reasoning_content", data["choices"][0]["message"])
	self.assertIsNotNone(data["choices"][0]["message"]["reasoning_content"])

	def test_chat_completion_without_reasoning(self):
	# Test non-streaming with "enable_thinking": False, reasoning_content should be empty
	client = requests.post(
	f"{self.base_url}/v1/chat/completions",
	headers={"Authorization": f"Bearer {self.api_key}"},
	json={
	"model": self.model,
	"messages": [{"role": "user", "content": "Hello"}],
	"temperature": 0,
	"separate_reasoning": True,
	"chat_template_kwargs": {"enable_thinking": False},
	**self.additional_chat_kwargs,
	},
	)

	self.assertEqual(client.status_code, 200, f"Failed with: {client.text}")
	data = client.json()

	self.assertIn("choices", data)
	self.assertTrue(len(data["choices"]) > 0)
	self.assertIn("message", data["choices"][0])

	if "reasoning_content" in data["choices"][0]["message"]:
	self.assertIsNone(data["choices"][0]["message"]["reasoning_content"])

	def test_stream_chat_completion_with_reasoning(self):
	# Test streaming with "enable_thinking": True, reasoning_content should not be empty
	response = requests.post(
	f"{self.base_url}/v1/chat/completions",
	headers={"Authorization": f"Bearer {self.api_key}"},
	json={
	"model": self.model,
	"messages": [{"role": "user", "content": "Hello"}],
	"temperature": 0,
	"separate_reasoning": True,
	"stream": True,
	"chat_template_kwargs": {"enable_thinking": True},
	**self.additional_chat_kwargs,
	},
	stream=True,
	)

	self.assertEqual(response.status_code, 200, f"Failed with: {response.text}")

	has_reasoning = False
	has_content = False

	print("\n=== Stream With Reasoning ===")
	for line in response.iter_lines():
	if line:
	line = line.decode("utf-8")
	if line.startswith("data:") and not line.startswith("data: [DONE]"):
	data = json.loads(line[6:])
	if "choices" in data and len(data["choices"]) > 0:
	delta = data["choices"][0].get("delta", {})

	if "reasoning_content" in delta and delta["reasoning_content"]:
	has_reasoning = True

	if "content" in delta and delta["content"]:
	has_content = True

	self.assertTrue(
	has_reasoning,
	"The reasoning content is not included in the stream response",
	)
	self.assertTrue(
	has_content, "The stream response does not contain normal content"
	)

	def test_stream_chat_completion_without_reasoning(self):
	# Test streaming with "enable_thinking": False, reasoning_content should be empty
	response = requests.post(
	f"{self.base_url}/v1/chat/completions",
	headers={"Authorization": f"Bearer {self.api_key}"},
	json={
	"model": self.model,
	"messages": [{"role": "user", "content": "Hello"}],
	"temperature": 0,
	"separate_reasoning": True,
	"stream": True,
	"chat_template_kwargs": {"enable_thinking": False},
	**self.additional_chat_kwargs,
	},
	stream=True,
	)

	self.assertEqual(response.status_code, 200, f"Failed with: {response.text}")

	has_reasoning = False
	has_content = False

	print("\n=== Stream Without Reasoning ===")
	for line in response.iter_lines():
	if line:
	line = line.decode("utf-8")
	if line.startswith("data:") and not line.startswith("data: [DONE]"):
	data = json.loads(line[6:])
	if "choices" in data and len(data["choices"]) > 0:
	delta = data["choices"][0].get("delta", {})

	if "reasoning_content" in delta and delta["reasoning_content"]:
	has_reasoning = True

	if "content" in delta and delta["content"]:
	has_content = True

	self.assertFalse(
	has_reasoning,
	"The reasoning content should not be included in the stream response",
	)
	self.assertTrue(
	has_content, "The stream response does not contain normal content"
	)


	# Skip for ci test
	# class TestGLM45EnableThinking(TestEnableThinking):
	# @classmethod
	# def setUpClass(cls):
	# # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
	# cls.model = "THUDM/GLM-4.5"
	# cls.base_url = DEFAULT_URL_FOR_TEST
	# cls.api_key = "sk-1234"
	# cls.process = popen_launch_server(
	# cls.model,
	# cls.base_url,
	# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
	# api_key=cls.api_key,
	# other_args=[
	# "--tool-call-parser",
	# "glm45",
	# "--reasoning-parser",
	# "glm45",
	# "--tp-size",
	# "8"
	# ],
	# )

	# # Validate whether enable-thinking conflict with tool_calls
	# cls.additional_chat_kwargs = {
	# "tools": [
	# {
	# "type": "function",
	# "function": {
	# "name": "add",
	# "description": "Compute the sum of two numbers",
	# "parameters": {
	# "type": "object",
	# "properties": {
	# "a": {
	# "type": "int",
	# "description": "A number",
	# },
	# "b": {
	# "type": "int",
	# "description": "A number",
	# },
	# },
	# "required": ["a", "b"],
	# },
	# },
	# }
	# ]
	# }

	if __name__ == "__main__":
	unittest.main()