DenseLabelDev / projects /omg_llava /engine /dataset_info_hook.py
zhouyik's picture
Upload folder using huggingface_hub
032e687 verified
from xtuner.engine.hooks import DatasetInfoHook
class DatasetInfoHook_withSpecoalTokens(DatasetInfoHook):
def __init__(self, tokenizer, is_intern_repo_dataset=False):
super(DatasetInfoHook_withSpecoalTokens, self).__init__(tokenizer, is_intern_repo_dataset)
# add special tokens
# Adding special tokens for pixel grounding
segmentation_tokens = ['[SEG]']
# Adding tokens for GCG
phrase_tokens = ['<p>', '</p>']
# add for visual prompt
region_tokens = ['<region>']
point_tokens = ['<mark>']
special_tokens = segmentation_tokens + phrase_tokens + region_tokens + point_tokens
self.tokenizer.add_tokens(special_tokens, special_tokens=True)