Transformers
File size: 729 Bytes
c2d916f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from typing import Union

from transformers import CLIPProcessor, CLIPTokenizer, CLIPTokenizerFast


class LongCLIPProcessor(CLIPProcessor):
    tokenizer: Union[CLIPTokenizer, CLIPTokenizerFast]

    def __call__(
        self, text=None, short_text=None, images=None, return_tensors=None, **kwargs
    ):
        encoding = super().__call__(text, images, return_tensors, **kwargs)
        if short_text is not None:
            short_text_encoding = self.tokenizer(
                short_text, return_tensors=return_tensors, **kwargs
            )
            encoding["short_input_ids"] = short_text_encoding.input_ids
            encoding["short_attention_mask"] = short_text_encoding.attention_mask

        return encoding