File size: 729 Bytes
032e687 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | from xtuner.engine.hooks import DatasetInfoHook
class DatasetInfoHook_withSpecoalTokens(DatasetInfoHook):
def __init__(self, tokenizer, is_intern_repo_dataset=False):
super(DatasetInfoHook_withSpecoalTokens, self).__init__(tokenizer, is_intern_repo_dataset)
# add special tokens
# Adding special tokens for pixel grounding
segmentation_tokens = ['[SEG]']
# Adding tokens for GCG
phrase_tokens = ['<p>', '</p>']
# add for visual prompt
region_tokens = ['<region>']
point_tokens = ['<mark>']
special_tokens = segmentation_tokens + phrase_tokens + region_tokens + point_tokens
self.tokenizer.add_tokens(special_tokens, special_tokens=True) |