Hanrui / sglang /test /registered /lora /test_lora_radix_cache.py

Add files using upload-large-folder tool

61ba51e verified about 2 months ago

2.6 kB

	# Copyright 2023-2024 SGLang Team
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	import multiprocessing as mp
	import unittest

	import torch

	from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci
	from sglang.test.lora_utils import CI_MULTI_LORA_MODELS, run_lora_test_one_by_one
	from sglang.test.test_utils import CustomTestCase

	register_cuda_ci(est_time=200, suite="nightly-1-gpu", nightly=True)
	register_amd_ci(est_time=200, suite="nightly-amd-1-gpu", nightly=True)

	PROMPTS = [
	"AI is a field of computer science focused on",
	"""
	### Instruction:
	Tell me about llamas and alpacas
	### Response:
	Llamas are large, long-necked animals with a woolly coat. They have two toes on each foot instead of three like other camelids.
	### Question:
	What do you know about llamas?
	### Answer:
	""",
	]


	class TestLoRARadixCache(CustomTestCase):

	def test_lora_radix_cache(self):
	# Here we need a model case with multiple adaptors for testing correctness of radix cache
	model_case = CI_MULTI_LORA_MODELS[0]

	torch_dtype = torch.float16
	max_new_tokens = 32
	batch_prompts = (
	PROMPTS
	if not model_case.skip_long_prompt
	else [p for p in PROMPTS if len(p) < 1000]
	)

	# Test lora with radix cache
	run_lora_test_one_by_one(
	batch_prompts,
	model_case,
	torch_dtype,
	max_new_tokens=max_new_tokens,
	disable_radix_cache=False,
	test_tag="lora-with-radix-cache",
	)

	# Test lora without radix cache
	run_lora_test_one_by_one(
	batch_prompts,
	model_case,
	torch_dtype,
	max_new_tokens=max_new_tokens,
	disable_radix_cache=True,
	test_tag="lora-without-radix-cache",
	)


	if __name__ == "__main__":
	try:
	mp.set_start_method("spawn")
	except RuntimeError:
	pass

	unittest.main(warnings="ignore")