Update modeling_gpt2vision.py
Browse files- modeling_gpt2vision.py +18 -1
modeling_gpt2vision.py
CHANGED
|
@@ -7,13 +7,30 @@ from .vision_encoder import VisionEncoder
|
|
| 7 |
from .configuration_gpt2vision import GPT2VisionConfig
|
| 8 |
from .modeling_gpt2 import GPT2LMHeadModel
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
class GPT2Vision(PreTrainedModel):
|
| 11 |
config_class = GPT2VisionConfig
|
| 12 |
|
| 13 |
def __init__(self, config):
|
| 14 |
super().__init__(config)
|
| 15 |
self.vision_encoder = VisionEncoder()
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
if isinstance(config.gpt2_config, dict):
|
| 18 |
gpt2_config = GPT2Config(**config.gpt2_config)
|
| 19 |
else:
|
|
|
|
| 7 |
from .configuration_gpt2vision import GPT2VisionConfig
|
| 8 |
from .modeling_gpt2 import GPT2LMHeadModel
|
| 9 |
|
| 10 |
+
IMAGE_TOKEN = "<image>"
|
| 11 |
+
ANSWER_EOS = "<|endoftext|>"
|
| 12 |
+
|
| 13 |
+
def resize_token_embeds(model_name="openai-community/gpt2"):
|
| 14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 15 |
+
new_tokens = {
|
| 16 |
+
"additional_special_tokens": [IMAGE_TOKEN]
|
| 17 |
+
}
|
| 18 |
+
tokenizer.add_special_tokens(new_tokens)
|
| 19 |
+
return tokenizer
|
| 20 |
+
|
| 21 |
+
tokenizer = resize_token_embeds()
|
| 22 |
+
|
| 23 |
class GPT2Vision(PreTrainedModel):
|
| 24 |
config_class = GPT2VisionConfig
|
| 25 |
|
| 26 |
def __init__(self, config):
|
| 27 |
super().__init__(config)
|
| 28 |
self.vision_encoder = VisionEncoder()
|
| 29 |
+
self.language_model.resize_token_embeddings(len(tokenizer))
|
| 30 |
+
self.tokenizer = tokenizer
|
| 31 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 32 |
+
self.image_token_id = self.tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
|
| 33 |
+
|
| 34 |
if isinstance(config.gpt2_config, dict):
|
| 35 |
gpt2_config = GPT2Config(**config.gpt2_config)
|
| 36 |
else:
|