| from transformers import AutoTokenizer
|
|
|
| from rllm.parser import (
|
| ChatTemplateParser,
|
| DeepseekQwenChatTemplateParser,
|
| LlamaChatTemplateParser,
|
| QwenChatTemplateParser,
|
| )
|
| from rllm.parser.utils import PARSER_TEST_MESSAGES
|
|
|
|
|
| def test_qwen_chat_template_parser():
|
|
|
| tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B")
|
| parser = QwenChatTemplateParser(tokenizer)
|
|
|
|
|
| assert parser.verify_equivalence(PARSER_TEST_MESSAGES)
|
|
|
|
|
| result = parser.parse(PARSER_TEST_MESSAGES, add_generation_prompt=True)
|
| assert isinstance(result, str)
|
| assert len(result) > 0
|
| assert parser.assistant_token in result
|
|
|
|
|
| def test_deepseek_qwen_chat_template_parser():
|
|
|
| tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
|
| parser = DeepseekQwenChatTemplateParser(tokenizer)
|
|
|
|
|
| assert parser.verify_equivalence(PARSER_TEST_MESSAGES)
|
|
|
|
|
| result = parser.parse(PARSER_TEST_MESSAGES)
|
| assert isinstance(result, str)
|
| assert len(result) > 0
|
|
|
|
|
| def test_llama_chat_template_parser():
|
|
|
| tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
|
| parser = LlamaChatTemplateParser(tokenizer)
|
|
|
|
|
| assert parser.verify_equivalence(PARSER_TEST_MESSAGES)
|
|
|
|
|
| result = parser.parse(PARSER_TEST_MESSAGES)
|
| assert isinstance(result, str)
|
| assert len(result) > 0
|
| assert parser.assistant_token in result
|
|
|
|
|
| def test_parser_factory():
|
|
|
| qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B")
|
| qwen_parser = ChatTemplateParser.get_parser(qwen_tokenizer)
|
| assert isinstance(qwen_parser, QwenChatTemplateParser)
|
| assert qwen_parser.verify_equivalence(PARSER_TEST_MESSAGES)
|
|
|
|
|
| deepseek_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B")
|
| deepseek_parser = ChatTemplateParser.get_parser(deepseek_tokenizer)
|
| assert isinstance(deepseek_parser, DeepseekQwenChatTemplateParser)
|
| assert deepseek_parser.verify_equivalence(PARSER_TEST_MESSAGES)
|
|
|
|
|
| def test_parser_with_disable_thinking():
|
|
|
| tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-4B")
|
| parser = QwenChatTemplateParser(tokenizer, disable_thinking=True)
|
|
|
|
|
| assert "<think>\\n\\n</think>\\n\\n" in parser.assistant_token
|
|
|
|
|
| assert parser.verify_equivalence(PARSER_TEST_MESSAGES)
|
|
|