Upload preprocessing_molmo.py with huggingface_hub
Browse files- preprocessing_molmo.py +20 -6
preprocessing_molmo.py
CHANGED
|
@@ -2,9 +2,11 @@
|
|
| 2 |
Processor class for Molmo.
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
from typing import
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
|
| 9 |
try:
|
| 10 |
from typing import Unpack
|
|
@@ -25,7 +27,7 @@ from transformers.tokenization_utils_base import TextInput
|
|
| 25 |
from transformers.utils import logging
|
| 26 |
|
| 27 |
from transformers import AutoTokenizer
|
| 28 |
-
from .image_preprocessing_molmo import MolmoImagesKwargs,
|
| 29 |
|
| 30 |
|
| 31 |
logger = logging.get_logger(__name__)
|
|
@@ -81,7 +83,7 @@ class MolmoProcessorKwargs(ProcessingKwargs, total=False):
|
|
| 81 |
class MolmoProcessor(ProcessorMixin):
|
| 82 |
attributes = ["image_processor", "tokenizer"]
|
| 83 |
image_processor_class = "AutoImageProcessor"
|
| 84 |
-
tokenizer_class = ("
|
| 85 |
|
| 86 |
def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
|
| 87 |
# self.image_processor = image_processor
|
|
@@ -131,8 +133,20 @@ class MolmoProcessor(ProcessorMixin):
|
|
| 131 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
| 132 |
|
| 133 |
if images is not None:
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
# For now only support inserting images at the start
|
| 137 |
image_idx = [-1]*len(images)
|
| 138 |
else:
|
|
|
|
| 2 |
Processor class for Molmo.
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
from typing import Optional
|
| 6 |
|
| 7 |
+
import PIL
|
| 8 |
+
from PIL import ImageOps
|
| 9 |
+
from PIL.Image import Image
|
| 10 |
|
| 11 |
try:
|
| 12 |
from typing import Unpack
|
|
|
|
| 27 |
from transformers.utils import logging
|
| 28 |
|
| 29 |
from transformers import AutoTokenizer
|
| 30 |
+
from .image_preprocessing_molmo import MolmoImagesKwargs, MolmoImageProcessor
|
| 31 |
|
| 32 |
|
| 33 |
logger = logging.get_logger(__name__)
|
|
|
|
| 83 |
class MolmoProcessor(ProcessorMixin):
|
| 84 |
attributes = ["image_processor", "tokenizer"]
|
| 85 |
image_processor_class = "AutoImageProcessor"
|
| 86 |
+
tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
|
| 87 |
|
| 88 |
def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
|
| 89 |
# self.image_processor = image_processor
|
|
|
|
| 133 |
image_token_id = self.special_token_ids[IMAGE_PROMPT]
|
| 134 |
|
| 135 |
if images is not None:
|
| 136 |
+
if not isinstance(images, (list, tuple)):
|
| 137 |
+
images = [images]
|
| 138 |
+
image_arrays = []
|
| 139 |
+
for image in images:
|
| 140 |
+
if isinstance(image, Image):
|
| 141 |
+
image = image.convert("RGB")
|
| 142 |
+
# Handle images with EXIF orientation tags, which PIL will ignore by default
|
| 143 |
+
# https://github.com/python-pillow/Pillow/issues/4703
|
| 144 |
+
img = ImageOps.exif_transpose(image)
|
| 145 |
+
image_arrays.append(np.array(image))
|
| 146 |
+
else:
|
| 147 |
+
assert len(image.shape) == 3 and image.shape[-1] == 3
|
| 148 |
+
image_arrays.append(image.astype(np.uint8))
|
| 149 |
+
images = image_arrays
|
| 150 |
# For now only support inserting images at the start
|
| 151 |
image_idx = [-1]*len(images)
|
| 152 |
else:
|