| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import multiprocessing as mp |
| import unittest |
|
|
| import torch |
|
|
| from sglang.test.ci.ci_register import register_amd_ci, register_cuda_ci |
| from sglang.test.lora_utils import CI_MULTI_LORA_MODELS, run_lora_test_one_by_one |
| from sglang.test.test_utils import CustomTestCase |
|
|
| register_cuda_ci(est_time=200, suite="nightly-1-gpu", nightly=True) |
| register_amd_ci(est_time=200, suite="nightly-amd-1-gpu", nightly=True) |
|
|
| PROMPTS = [ |
| "AI is a field of computer science focused on", |
| """ |
| ### Instruction: |
| Tell me about llamas and alpacas |
| ### Response: |
| Llamas are large, long-necked animals with a woolly coat. They have two toes on each foot instead of three like other camelids. |
| ### Question: |
| What do you know about llamas? |
| ### Answer: |
| """, |
| ] |
|
|
|
|
| class TestLoRARadixCache(CustomTestCase): |
|
|
| def test_lora_radix_cache(self): |
| |
| model_case = CI_MULTI_LORA_MODELS[0] |
|
|
| torch_dtype = torch.float16 |
| max_new_tokens = 32 |
| batch_prompts = ( |
| PROMPTS |
| if not model_case.skip_long_prompt |
| else [p for p in PROMPTS if len(p) < 1000] |
| ) |
|
|
| |
| run_lora_test_one_by_one( |
| batch_prompts, |
| model_case, |
| torch_dtype, |
| max_new_tokens=max_new_tokens, |
| disable_radix_cache=False, |
| test_tag="lora-with-radix-cache", |
| ) |
|
|
| |
| run_lora_test_one_by_one( |
| batch_prompts, |
| model_case, |
| torch_dtype, |
| max_new_tokens=max_new_tokens, |
| disable_radix_cache=True, |
| test_tag="lora-without-radix-cache", |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| try: |
| mp.set_start_method("spawn") |
| except RuntimeError: |
| pass |
|
|
| unittest.main(warnings="ignore") |
|
|