from xtuner.engine.hooks import DatasetInfoHook class DatasetInfoHook_withSpecoalTokens(DatasetInfoHook): def __init__(self, tokenizer, is_intern_repo_dataset=False): super(DatasetInfoHook_withSpecoalTokens, self).__init__(tokenizer, is_intern_repo_dataset) # add special tokens # Adding special tokens for pixel grounding segmentation_tokens = ['[SEG]'] # Adding tokens for GCG phrase_tokens = ['

', '

'] # add for visual prompt region_tokens = [''] point_tokens = [''] special_tokens = segmentation_tokens + phrase_tokens + region_tokens + point_tokens self.tokenizer.add_tokens(special_tokens, special_tokens=True)