from xtuner.engine.hooks import DatasetInfoHook from ..dataset.utils import VPT_CONTEXT_TOKEN, VPT_START_TOKEN, VPT_END_TOKEN class DatasetInfoHook_withSpecialTokens(DatasetInfoHook): def __init__(self, tokenizer, is_intern_repo_dataset=False): super(DatasetInfoHook_withSpecialTokens, self).__init__(tokenizer, is_intern_repo_dataset) self._add_special_tokens() def _add_special_tokens(self): special_tokens = [VPT_CONTEXT_TOKEN,] num_new_tokens = self.tokenizer.add_tokens(special_tokens, special_tokens=True)