momergul commited on
Commit
2e3384c
·
verified ·
1 Parent(s): c3474d7

Upload processor_flamingo.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. processor_flamingo.py +16 -8
processor_flamingo.py CHANGED
@@ -1,6 +1,7 @@
1
  from transformers import ProcessorMixin, AutoProcessor
2
  from transformers.models.auto.processing_auto import AutoProcessor
3
  from transformers.processing_utils import ProcessorMixin
 
4
  import json
5
  import os
6
 
@@ -8,12 +9,12 @@ class FlamingoProcessor(ProcessorMixin):
8
  """
9
  Custom processor that combines a tokenizer and feature extractor.
10
  """
11
- attributes = ["feature_extractor", "tokenizer"]
12
- feature_extractor_class = "AutoImageProcessor"
13
  tokenizer_class = "AutoTokenizer"
14
 
15
- def __init__(self, feature_extractor, tokenizer):
16
- super().__init__(feature_extractor, tokenizer)
17
 
18
  def __call__(self, text=None, images=None, **kwargs):
19
  """
@@ -22,7 +23,7 @@ class FlamingoProcessor(ProcessorMixin):
22
  Args:
23
  text: Text input(s) to tokenize
24
  images: Image input(s) to process
25
- **kwargs: Additional arguments passed to tokenizer/feature_extractor
26
 
27
  Returns:
28
  Dictionary with processed inputs
@@ -37,18 +38,25 @@ class FlamingoProcessor(ProcessorMixin):
37
  if type(text) == str:
38
  all_text = "<image> " + text
39
  else:
40
- all_text = ["<image> " + _text for _text in text]
 
 
 
41
  text_encoding = self.tokenizer(all_text, **kwargs)
 
 
 
 
42
  encoding.update(text_encoding)
43
 
44
  # Process images if provided
45
  if images is not None:
46
- image_encoding = self.feature_extractor(images, **kwargs)
47
  # Add prefix to avoid key conflicts
48
  for key, value in image_encoding.items():
49
  encoding[f"pixel_values" if key == "pixel_values" else f"image_{key}"] = value
50
 
51
- return encoding
52
 
53
  def batch_decode(self, *args, **kwargs):
54
  """
 
1
  from transformers import ProcessorMixin, AutoProcessor
2
  from transformers.models.auto.processing_auto import AutoProcessor
3
  from transformers.processing_utils import ProcessorMixin
4
+ from transformers.tokenization_utils_base import BatchEncoding
5
  import json
6
  import os
7
 
 
9
  """
10
  Custom processor that combines a tokenizer and feature extractor.
11
  """
12
+ attributes = ["image_processor", "tokenizer"]
13
+ image_processor_class = "AutoImageProcessor"
14
  tokenizer_class = "AutoTokenizer"
15
 
16
+ def __init__(self, image_processor, tokenizer):
17
+ super().__init__(image_processor, tokenizer)
18
 
19
  def __call__(self, text=None, images=None, **kwargs):
20
  """
 
23
  Args:
24
  text: Text input(s) to tokenize
25
  images: Image input(s) to process
26
+ **kwargs: Additional arguments passed to tokenizer/image_processor
27
 
28
  Returns:
29
  Dictionary with processed inputs
 
38
  if type(text) == str:
39
  all_text = "<image> " + text
40
  else:
41
+ if type(text[0]) == str:
42
+ all_text = ["<image> " + _text for _text in text]
43
+ else:
44
+ all_text = ['<image> ' + " ".join(_text) for _text in text]
45
  text_encoding = self.tokenizer(all_text, **kwargs)
46
+
47
+ import pdb
48
+ pdb.set_trace()
49
+
50
  encoding.update(text_encoding)
51
 
52
  # Process images if provided
53
  if images is not None:
54
+ image_encoding = self.image_processor(images, **kwargs)
55
  # Add prefix to avoid key conflicts
56
  for key, value in image_encoding.items():
57
  encoding[f"pixel_values" if key == "pixel_values" else f"image_{key}"] = value
58
 
59
+ return BatchEncoding(encoding)
60
 
61
  def batch_decode(self, *args, **kwargs):
62
  """