Commit
·
b2fcf7c
1
Parent(s):
f91fa43
cleaning
Browse files- processing_img2html.py +0 -64
processing_img2html.py
CHANGED
|
@@ -32,70 +32,6 @@ if is_torch_available():
|
|
| 32 |
IMAGE_TOKEN = "<image>"
|
| 33 |
|
| 34 |
|
| 35 |
-
# copied from m4.training.packing
|
| 36 |
-
def incremental_to_binary_attention_mask(incremental_mask, num_classes=-1):
|
| 37 |
-
# This function converts: [-1, 0, 1] => [[0, 0], [1, 0], [0, 1]]
|
| 38 |
-
|
| 39 |
-
# If any of images index are more than num_classes, set them to -1.
|
| 40 |
-
# Words after the max number of images allowed have been seen don't attend on anything
|
| 41 |
-
if num_classes != -1:
|
| 42 |
-
incremental_mask[incremental_mask >= num_classes] = -1
|
| 43 |
-
|
| 44 |
-
negatives = incremental_mask == -1
|
| 45 |
-
incremental_mask[negatives] = 0
|
| 46 |
-
attn_mask = torch.nn.functional.one_hot(incremental_mask, num_classes=num_classes)
|
| 47 |
-
attn_mask[negatives, :] = 0
|
| 48 |
-
return attn_mask
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
# copied from m4.training.packing
|
| 52 |
-
def image_attention_mask_for_packed_input_ids(input_ids, tokenizer):
|
| 53 |
-
image_attention_mask = torch.full_like(input_ids, fill_value=-1)
|
| 54 |
-
next_image_attention_mask = torch.full_like(input_ids, fill_value=-1)
|
| 55 |
-
image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
|
| 56 |
-
eod_token_id = tokenizer.eos_token_id
|
| 57 |
-
for batch_idx in range(input_ids.size(0)):
|
| 58 |
-
count = -1
|
| 59 |
-
seen_eod = False
|
| 60 |
-
for idx, token_id in enumerate(input_ids[batch_idx]):
|
| 61 |
-
if token_id == image_token_id:
|
| 62 |
-
count += 1
|
| 63 |
-
image_attention_mask[batch_idx][idx] = count
|
| 64 |
-
seen_eod = False
|
| 65 |
-
else:
|
| 66 |
-
image_attention_mask[batch_idx][idx] = count
|
| 67 |
-
|
| 68 |
-
if seen_eod:
|
| 69 |
-
image_attention_mask[batch_idx][idx] = -1
|
| 70 |
-
|
| 71 |
-
if token_id == eod_token_id:
|
| 72 |
-
seen_eod = True
|
| 73 |
-
|
| 74 |
-
for batch_idx in range(input_ids.size(0)):
|
| 75 |
-
count = -1
|
| 76 |
-
seen_eod = False
|
| 77 |
-
for idx in range(input_ids[batch_idx].size(0) - 1, -1, -1):
|
| 78 |
-
token_id = input_ids[batch_idx][idx]
|
| 79 |
-
if token_id == image_token_id:
|
| 80 |
-
count += 1
|
| 81 |
-
next_image_attention_mask[batch_idx][idx] = count
|
| 82 |
-
seen_eod = False
|
| 83 |
-
else:
|
| 84 |
-
next_image_attention_mask[batch_idx][idx] = count
|
| 85 |
-
|
| 86 |
-
if token_id == eod_token_id:
|
| 87 |
-
seen_eod = True
|
| 88 |
-
|
| 89 |
-
if seen_eod:
|
| 90 |
-
next_image_attention_mask[batch_idx][idx] = -1
|
| 91 |
-
|
| 92 |
-
non_negative_indices = next_image_attention_mask[batch_idx] != -1
|
| 93 |
-
next_image_attention_mask[batch_idx][non_negative_indices] -= count
|
| 94 |
-
next_image_attention_mask[batch_idx][non_negative_indices] *= -1
|
| 95 |
-
|
| 96 |
-
return image_attention_mask, next_image_attention_mask
|
| 97 |
-
|
| 98 |
-
|
| 99 |
def is_url(string):
|
| 100 |
"""Checks if the passed string contains a valid url and nothing else. e.g. if space is included it's immediately
|
| 101 |
invalidated the url"""
|
|
|
|
| 32 |
IMAGE_TOKEN = "<image>"
|
| 33 |
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
def is_url(string):
|
| 36 |
"""Checks if the passed string contains a valid url and nothing else. e.g. if space is included it's immediately
|
| 37 |
invalidated the url"""
|