|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
|
Processor class for Bros. |
|
|
""" |
|
|
|
|
|
from ...processing_utils import ProcessingKwargs, ProcessorMixin |
|
|
|
|
|
|
|
|
class BrosProcessorKwargs(ProcessingKwargs, total=False): |
|
|
_defaults = { |
|
|
"text_kwargs": { |
|
|
"add_special_tokens": True, |
|
|
"padding": False, |
|
|
"stride": 0, |
|
|
"return_overflowing_tokens": False, |
|
|
"return_special_tokens_mask": False, |
|
|
"return_offsets_mapping": False, |
|
|
"return_length": False, |
|
|
"verbose": True, |
|
|
}, |
|
|
} |
|
|
|
|
|
|
|
|
class BrosProcessor(ProcessorMixin): |
|
|
r""" |
|
|
Constructs a Bros processor which wraps a BERT tokenizer. |
|
|
|
|
|
[`BrosProcessor`] offers all the functionalities of [`BertTokenizerFast`]. See the docstring of |
|
|
[`~BrosProcessor.__call__`] and [`~BrosProcessor.decode`] for more information. |
|
|
|
|
|
Args: |
|
|
tokenizer (`BertTokenizerFast`, *optional*): |
|
|
An instance of ['BertTokenizerFast`]. The tokenizer is a required input. |
|
|
""" |
|
|
|
|
|
attributes = ["tokenizer"] |
|
|
tokenizer_class = ("BertTokenizer", "BertTokenizerFast") |
|
|
valid_processor_kwargs = BrosProcessorKwargs |
|
|
|
|
|
def __init__(self, tokenizer=None, **kwargs): |
|
|
if tokenizer is None: |
|
|
raise ValueError("You need to specify a `tokenizer`.") |
|
|
|
|
|
super().__init__(tokenizer) |
|
|
|
|
|
|
|
|
__all__ = ["BrosProcessor"] |
|
|
|