File size: 1,193 Bytes
032e687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Copyright (c) OpenMMLab. All rights reserved.
from transformers import PretrainedConfig

class ProjectorConfig_OMG_LLaVA(PretrainedConfig):
    model_type = 'projector'
    _auto_class = 'AutoConfig'

    def __init__(
        self,
        visual_hidden_size=4096,
        llm_hidden_size=4096,
        depth=2,
        hidden_act='gelu',
        bias=True,
        query_channels=256,
        feat_channels=1536,
        pixel_shuffle_ratio=None,
        additional_bg_tokens=10,
        visual_prompt_proj=False,
        add_cross_attn_layer=False,
        **kwargs,
    ):
        self.visual_hidden_size = visual_hidden_size
        self.llm_hidden_size = llm_hidden_size
        self.depth = depth
        self.hidden_act = hidden_act
        self.bias = bias
        self.query_channels=query_channels
        self.feat_channels=feat_channels
        if pixel_shuffle_ratio is not None:
            self.feat_channels = self.feat_channels * pixel_shuffle_ratio * pixel_shuffle_ratio
        self.additional_bg_tokens = additional_bg_tokens
        self.visual_prompt_proj = visual_prompt_proj
        self.add_cross_attn_layer = add_cross_attn_layer
        super().__init__(**kwargs)