HuggingFaceM4
/

VLM_WebSight_finetuned

Text Generation

Model card Files Files and versions

VictorSanh commited on Jan 8, 2024

Commit

b2fcf7c

·

1 Parent(s): f91fa43

cleaning

Files changed (1) hide show

processing_img2html.py +0 -64

processing_img2html.py CHANGED Viewed

@@ -32,70 +32,6 @@ if is_torch_available():
 IMAGE_TOKEN = "<image>"
-# copied from m4.training.packing
-def incremental_to_binary_attention_mask(incremental_mask, num_classes=-1):
-    # This function converts: [-1, 0, 1] => [[0, 0], [1, 0], [0, 1]]
-    # If any of images index are more than num_classes, set them to -1.
-    # Words after the max number of images allowed have been seen don't attend on anything
-    if num_classes != -1:
-        incremental_mask[incremental_mask >= num_classes] = -1
-    negatives = incremental_mask == -1
-    incremental_mask[negatives] = 0
-    attn_mask = torch.nn.functional.one_hot(incremental_mask, num_classes=num_classes)
-    attn_mask[negatives, :] = 0
-    return attn_mask
-# copied from m4.training.packing
-def image_attention_mask_for_packed_input_ids(input_ids, tokenizer):
-    image_attention_mask = torch.full_like(input_ids, fill_value=-1)
-    next_image_attention_mask = torch.full_like(input_ids, fill_value=-1)
-    image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
-    eod_token_id = tokenizer.eos_token_id
-    for batch_idx in range(input_ids.size(0)):
-        count = -1
-        seen_eod = False
-        for idx, token_id in enumerate(input_ids[batch_idx]):
-            if token_id == image_token_id:
-                count += 1
-                image_attention_mask[batch_idx][idx] = count
-                seen_eod = False
-            else:
-                image_attention_mask[batch_idx][idx] = count
-            if seen_eod:
-                image_attention_mask[batch_idx][idx] = -1
-            if token_id == eod_token_id:
-                seen_eod = True
-    for batch_idx in range(input_ids.size(0)):
-        count = -1
-        seen_eod = False
-        for idx in range(input_ids[batch_idx].size(0) - 1, -1, -1):
-            token_id = input_ids[batch_idx][idx]
-            if token_id == image_token_id:
-                count += 1
-                next_image_attention_mask[batch_idx][idx] = count
-                seen_eod = False
-            else:
-                next_image_attention_mask[batch_idx][idx] = count
-            if token_id == eod_token_id:
-                seen_eod = True
-            if seen_eod:
-                next_image_attention_mask[batch_idx][idx] = -1
-        non_negative_indices = next_image_attention_mask[batch_idx] != -1
-        next_image_attention_mask[batch_idx][non_negative_indices] -= count
-        next_image_attention_mask[batch_idx][non_negative_indices] *= -1
-    return image_attention_mask, next_image_attention_mask
 def is_url(string):
     """Checks if the passed string contains a valid url and nothing else. e.g. if space is included it's immediately
     invalidated the url"""

 IMAGE_TOKEN = "<image>"
 def is_url(string):
     """Checks if the passed string contains a valid url and nothing else. e.g. if space is included it's immediately
     invalidated the url"""