qualityv-0606 / configuration_qualityv.py
CyberBoyNull's picture
Upload folder
cb65f9f verified
from transformers.configuration_utils import PretrainedConfig
from transformers import AutoConfig
from transformers.activations import ACT2FN
class QualityLinearAdapterConfig(PretrainedConfig):
model_type = "QualityvForCausalLM"
adapter_type = "linear"
def __init__(self,
in_hidden_size: int = 1024,
num_layers: int = 2,
intermediate_size: int = 2048,
out_hidden_size: int = 2028,
act_fn: str = "gelu",
**kwargs,
) -> None:
super().__init__(**kwargs)
self.in_hidden_size = in_hidden_size
self.num_layers = num_layers
self.intermediate_size = intermediate_size
self.out_hidden_size = out_hidden_size
self.act_fn = act_fn
class QualityvConfig(PretrainedConfig):
model_type = "QualityvForCausalLM"
def __init__(self,
vision_model_name: str=None,
audio_model_name: str=None,
llm_model_name: str=None,
image_token_id: int=None,
video_token_id: int=None,
audio_token_id: int=None,
adapter_type: str="linear",
num_adapter_layers: int=2,
**kwargs,
) -> None:
super().__init__(**kwargs)
self.vision_model_name = vision_model_name
self.audio_model_name = audio_model_name
self.llm_model_name = llm_model_name
self.image_token_id = image_token_id
self.video_token_id = video_token_id
self.audio_token_id = audio_token_id
self.adapter_type = adapter_type
self.num_adapter_layers = num_adapter_layers
if llm_model_name is not None:
self.llm_config = AutoConfig.from_pretrained(llm_model_name)
for key, value in self.llm_config.to_dict().items():
setattr(self, key, value)
if vision_model_name is not None:
self.vision_config = AutoConfig.from_pretrained(vision_model_name)
self.vision_adapter_config = QualityLinearAdapterConfig(
in_hidden_size=self.vision_config.hidden_size,
intermediate_size=self.vision_config.hidden_size * 2,
out_hidden_size=self.llm_config.hidden_size,
num_layers=num_adapter_layers,
)
else:
self.vision_config = None
if audio_model_name is not None:
self.audio_config = AutoConfig.from_pretrained(audio_model_name)
self.audio_adapter_config = QualityLinearAdapterConfig(
in_hidden_size=self.audio_config.hidden_size,
intermediate_size=self.audio_config.hidden_size * 2,
out_hidden_size=self.llm_config.hidden_size,
num_layers=num_adapter_layers,
)
else:
self.audio_config = None
def get_vocab_size(self):
return self.llm_config.vocab_size
def get_text_config(self, **kwargs):
return self.llm_config.get_text_config(**kwargs)