MRI / venv /lib /python3.13 /site-packages /transformers /models /align /processing_align.py

Add files using upload-large-folder tool

b9ae124 verified about 1 month ago

2.53 kB

	# coding=utf-8
	# Copyright 2023 The HuggingFace Inc. team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""
	Image/Text processor class for ALIGN
	"""

	from ...processing_utils import ProcessingKwargs, ProcessorMixin


	class AlignProcessorKwargs(ProcessingKwargs, total=False):
	# see processing_utils.ProcessingKwargs documentation for usage.
	_defaults = {
	"text_kwargs": {
	"padding": "max_length",
	"max_length": 64,
	},
	}


	class AlignProcessor(ProcessorMixin):
	r"""
	Constructs an ALIGN processor which wraps [`EfficientNetImageProcessor`] and
	[`BertTokenizer`]/[`BertTokenizerFast`] into a single processor that inherits both the image processor and
	tokenizer functionalities. See the [`~AlignProcessor.__call__`] and [`~OwlViTProcessor.decode`] for more
	information.
	The preferred way of passing kwargs is as a dictionary per modality, see usage example below.
	```python
	from transformers import AlignProcessor
	from PIL import Image
	model_id = "kakaobrain/align-base"
	processor = AlignProcessor.from_pretrained(model_id)

	processor(
	images=your_pil_image,
	text=["What is that?"],
	images_kwargs = {"crop_size": {"height": 224, "width": 224}},
	text_kwargs = {"padding": "do_not_pad"},
	common_kwargs = {"return_tensors": "pt"},
	)
	```

	Args:
	image_processor ([`EfficientNetImageProcessor`]):
	The image processor is a required input.
	tokenizer ([`BertTokenizer`, `BertTokenizerFast`]):
	The tokenizer is a required input.

	"""

	attributes = ["image_processor", "tokenizer"]
	image_processor_class = "EfficientNetImageProcessor"
	tokenizer_class = ("BertTokenizer", "BertTokenizerFast")
	valid_processor_kwargs = AlignProcessorKwargs

	def __init__(self, image_processor, tokenizer):
	super().__init__(image_processor, tokenizer)


	__all__ = ["AlignProcessor"]