File size: 729 Bytes
032e687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from xtuner.engine.hooks import DatasetInfoHook

class DatasetInfoHook_withSpecoalTokens(DatasetInfoHook):
    def __init__(self, tokenizer, is_intern_repo_dataset=False):
        super(DatasetInfoHook_withSpecoalTokens, self).__init__(tokenizer, is_intern_repo_dataset)
        # add special tokens
        # Adding special tokens for pixel grounding
        segmentation_tokens = ['[SEG]']
        # Adding tokens for GCG
        phrase_tokens = ['<p>', '</p>']
        # add for visual prompt
        region_tokens = ['<region>']
        point_tokens = ['<mark>']
        special_tokens = segmentation_tokens + phrase_tokens + region_tokens + point_tokens
        self.tokenizer.add_tokens(special_tokens, special_tokens=True)