Commit ·
142b2b1
1
Parent(s): 343d807
Update model
Browse files- config.json +4 -0
- model.safetensors +1 -1
- modeling_vivqa.py +3 -3
config.json
CHANGED
|
@@ -5,6 +5,10 @@
|
|
| 5 |
"BEiT3ForVietnameseVisualQuestionAnswering"
|
| 6 |
],
|
| 7 |
"attention_dropout": 0.0,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"bert_init": false,
|
| 9 |
"checkpoint_activations": false,
|
| 10 |
"ddp_rank": 0,
|
|
|
|
| 5 |
"BEiT3ForVietnameseVisualQuestionAnswering"
|
| 6 |
],
|
| 7 |
"attention_dropout": 0.0,
|
| 8 |
+
"auto_map": {
|
| 9 |
+
"AutoConfig": "configuration_vivqa.ViVQAConfig",
|
| 10 |
+
"AutoModel": "modeling_vivqa.BEiT3ForVietnameseVisualQuestionAnswering"
|
| 11 |
+
},
|
| 12 |
"bert_init": false,
|
| 13 |
"checkpoint_activations": false,
|
| 14 |
"ddp_rank": 0,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4911309508
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f5d2c605437bfec5f62512ec5bd54851ff49d5f056bdc794cd5e5e4a45b11f4
|
| 3 |
size 4911309508
|
modeling_vivqa.py
CHANGED
|
@@ -37,8 +37,8 @@ class Blip2EfficientExtractor(nn.Module):
|
|
| 37 |
self.model_blip2.eval()
|
| 38 |
|
| 39 |
# Efficientnet
|
| 40 |
-
self.
|
| 41 |
-
self.
|
| 42 |
self.pooling1 = nn.AdaptiveAvgPool2d((1, 32))
|
| 43 |
self.pooling2 = nn.AdaptiveAvgPool2d((1, 768))
|
| 44 |
|
|
@@ -46,7 +46,7 @@ class Blip2EfficientExtractor(nn.Module):
|
|
| 46 |
|
| 47 |
global_features = self.model_blip2.extract_features(samples={"image": images}, mode="image").image_embeds
|
| 48 |
|
| 49 |
-
local_features = self.
|
| 50 |
local_features = self.pooling1(local_features)
|
| 51 |
local_features = local_features.permute(0, 3, 2, 1)
|
| 52 |
local_features = self.pooling2(local_features)
|
|
|
|
| 37 |
self.model_blip2.eval()
|
| 38 |
|
| 39 |
# Efficientnet
|
| 40 |
+
self.model_efficientnet = EfficientNet.from_pretrained('efficientnet-b7', advprop=True).to(self.device)
|
| 41 |
+
self.model_efficientnet.eval()
|
| 42 |
self.pooling1 = nn.AdaptiveAvgPool2d((1, 32))
|
| 43 |
self.pooling2 = nn.AdaptiveAvgPool2d((1, 768))
|
| 44 |
|
|
|
|
| 46 |
|
| 47 |
global_features = self.model_blip2.extract_features(samples={"image": images}, mode="image").image_embeds
|
| 48 |
|
| 49 |
+
local_features = self.model_efficientnet.extract_features(images)
|
| 50 |
local_features = self.pooling1(local_features)
|
| 51 |
local_features = local_features.permute(0, 3, 2, 1)
|
| 52 |
local_features = self.pooling2(local_features)
|