feipengma commited on
Commit ·
ff26c9f
1
Parent(s): f1298e6
update wemm
Browse files- configuration_wemm.py +0 -5
- connector.py +1 -10
- image_processor.py +1 -1
- vision_model.py +0 -11
configuration_wemm.py
CHANGED
|
@@ -63,8 +63,3 @@ class WeMMConfig(PretrainedConfig):
|
|
| 63 |
self.tokenizer_config = tokenizer_config
|
| 64 |
|
| 65 |
super().__init__(**kwargs)
|
| 66 |
-
|
| 67 |
-
if __name__=="__main__":
|
| 68 |
-
wemm_config_path = "/mnt/csp/mmvision/home/feipengma/projects/wemm_evaluation/WeMM/config.json"
|
| 69 |
-
wemm_config = WeMMConfig.from_pretrained(wemm_config_path)
|
| 70 |
-
print(wemm_config.connector_config)
|
|
|
|
| 63 |
self.tokenizer_config = tokenizer_config
|
| 64 |
|
| 65 |
super().__init__(**kwargs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
connector.py
CHANGED
|
@@ -668,15 +668,6 @@ class Idefics2Qformer(nn.Module):
|
|
| 668 |
False,
|
| 669 |
False,
|
| 670 |
use_reentrant=True)
|
| 671 |
-
#layer_outputs = perceiver_layer(
|
| 672 |
-
# compressed_context,
|
| 673 |
-
# context,
|
| 674 |
-
# attention_mask=attention_mask,
|
| 675 |
-
# position_ids=None,
|
| 676 |
-
# past_key_value=None,
|
| 677 |
-
# output_attentions=False,
|
| 678 |
-
# use_cache=False,
|
| 679 |
-
#)
|
| 680 |
compressed_context = layer_outputs[0]
|
| 681 |
#all_latents.append(compressed_context)
|
| 682 |
|
|
@@ -710,7 +701,7 @@ class Idefics2Connector(PreTrainedModel):
|
|
| 710 |
return reshaped_image_hidden_states
|
| 711 |
|
| 712 |
@classmethod
|
| 713 |
-
def from_pretrained(self, config_path
|
| 714 |
config = Idefics2ConnectorConfig.from_pretrained(f'{config_path}/config.json')
|
| 715 |
cls = Idefics2Connector(config=config)
|
| 716 |
|
|
|
|
| 668 |
False,
|
| 669 |
False,
|
| 670 |
use_reentrant=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
compressed_context = layer_outputs[0]
|
| 672 |
#all_latents.append(compressed_context)
|
| 673 |
|
|
|
|
| 701 |
return reshaped_image_hidden_states
|
| 702 |
|
| 703 |
@classmethod
|
| 704 |
+
def from_pretrained(self, config_path):
|
| 705 |
config = Idefics2ConnectorConfig.from_pretrained(f'{config_path}/config.json')
|
| 706 |
cls = Idefics2Connector(config=config)
|
| 707 |
|
image_processor.py
CHANGED
|
@@ -636,7 +636,7 @@ class Idefics2ImageProcessor(BaseImageProcessor):
|
|
| 636 |
return BatchFeature(data=data, tensor_type=return_tensors)
|
| 637 |
|
| 638 |
@classmethod
|
| 639 |
-
def from_pretrained(self, config_path
|
| 640 |
with open(f'{config_path}/config.json', "r", encoding="utf-8") as f:
|
| 641 |
config = json.load(f)
|
| 642 |
|
|
|
|
| 636 |
return BatchFeature(data=data, tensor_type=return_tensors)
|
| 637 |
|
| 638 |
@classmethod
|
| 639 |
+
def from_pretrained(self, config_path):
|
| 640 |
with open(f'{config_path}/config.json', "r", encoding="utf-8") as f:
|
| 641 |
config = json.load(f)
|
| 642 |
|
vision_model.py
CHANGED
|
@@ -715,14 +715,3 @@ class Idefics2VisionTransformer(PreTrainedModel):
|
|
| 715 |
hidden_states=encoder_outputs.hidden_states,
|
| 716 |
attentions=encoder_outputs.attentions,
|
| 717 |
)
|
| 718 |
-
"""
|
| 719 |
-
@classmethod
|
| 720 |
-
def from_pretrained(self, config_path="/mnt/csp/mmvision/home/arrayyang/idefics2-8b/idefics2_vision_model"):
|
| 721 |
-
config = Idefics2VisionConfig.from_pretrained(f'{config_path}/config.json')
|
| 722 |
-
cls = Idefics2VisionTransformer(config=config)
|
| 723 |
-
|
| 724 |
-
state_dict = torch.load(f'{config_path}/vision_model.pth', map_location='cpu')
|
| 725 |
-
ret = cls.load_state_dict(state_dict, strict=False)
|
| 726 |
-
print("Loading idefics2 Vision Model: {}".format(config_path))
|
| 727 |
-
return cls
|
| 728 |
-
"""
|
|
|
|
| 715 |
hidden_states=encoder_outputs.hidden_states,
|
| 716 |
attentions=encoder_outputs.attentions,
|
| 717 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|