Upload processor
Browse files- processing_taivisionlm.py +1 -29
processing_taivisionlm.py
CHANGED
|
@@ -285,32 +285,4 @@ class TaiVisionProcessor(ProcessorMixin):
|
|
| 285 |
def model_input_names(self):
|
| 286 |
tokenizer_input_names = self.tokenizer.model_input_names
|
| 287 |
image_processor_input_names = self.image_processor.model_input_names
|
| 288 |
-
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
# if __name__ == '__main__':
|
| 293 |
-
# from configuration_taivisionlm import TaiVisionLMConfig
|
| 294 |
-
# import transformers
|
| 295 |
-
# import torch
|
| 296 |
-
# config = TaiVisionLMConfig.from_pretrained("./")
|
| 297 |
-
# preprocessor = transformers.SiglipImageProcessor.from_pretrained("google/siglip-base-patch16-224")
|
| 298 |
-
# preprocessor.image_seq_length = config.num_image_tokens
|
| 299 |
-
# tokenizer = transformers.AutoTokenizer.from_pretrained("benchang1110/Taiwan-tinyllama-v1.0-chat")
|
| 300 |
-
# processor = TaiVisionProcessor(tokenizer=tokenizer, image_processor=preprocessor)
|
| 301 |
-
# processor.save_pretrained("./")
|
| 302 |
-
|
| 303 |
-
# from PIL import Image
|
| 304 |
-
# import requests
|
| 305 |
-
# processor = TaiVisionProcessor.from_pretrained("./")
|
| 306 |
-
# url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg"
|
| 307 |
-
# image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
|
| 308 |
-
# text = "Hello< what is your name?"
|
| 309 |
-
# suffix = "I am fine, thank you."
|
| 310 |
-
# inputs = processor(text=text,suffix=suffix,images=image, return_tensors="pt",padding="max_length",max_length=512)
|
| 311 |
-
# print(inputs['attention_mask'].shape)
|
| 312 |
-
# print(inputs['input_ids'].shape)
|
| 313 |
-
# print(inputs['token_type_ids'].shape)
|
| 314 |
-
# # print number of 0 in token_type_ids
|
| 315 |
-
# print(torch.sum(inputs['token_type_ids']==0))
|
| 316 |
-
# print(inputs)
|
|
|
|
| 285 |
def model_input_names(self):
|
| 286 |
tokenizer_input_names = self.tokenizer.model_input_names
|
| 287 |
image_processor_input_names = self.image_processor.model_input_names
|
| 288 |
+
return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|