Transformers
LongCLIP-B / processing_longclip.py
shunk031's picture
Upload processor
c2d916f verified
raw
history blame contribute delete
729 Bytes
from typing import Union
from transformers import CLIPProcessor, CLIPTokenizer, CLIPTokenizerFast
class LongCLIPProcessor(CLIPProcessor):
tokenizer: Union[CLIPTokenizer, CLIPTokenizerFast]
def __call__(
self, text=None, short_text=None, images=None, return_tensors=None, **kwargs
):
encoding = super().__call__(text, images, return_tensors, **kwargs)
if short_text is not None:
short_text_encoding = self.tokenizer(
short_text, return_tensors=return_tensors, **kwargs
)
encoding["short_input_ids"] = short_text_encoding.input_ids
encoding["short_attention_mask"] = short_text_encoding.attention_mask
return encoding