| from xtuner.engine.hooks import DatasetInfoHook | |
| from ..dataset.utils import VPT_CONTEXT_TOKEN, VPT_START_TOKEN, VPT_END_TOKEN | |
| class DatasetInfoHook_withSpecialTokens(DatasetInfoHook): | |
| def __init__(self, tokenizer, is_intern_repo_dataset=False): | |
| super(DatasetInfoHook_withSpecialTokens, self).__init__(tokenizer, is_intern_repo_dataset) | |
| self._add_special_tokens() | |
| def _add_special_tokens(self): | |
| special_tokens = [VPT_CONTEXT_TOKEN,] | |
| num_new_tokens = self.tokenizer.add_tokens(special_tokens, special_tokens=True) |