|
|
from typing import Any |
|
|
|
|
|
from transformers import PretrainedConfig, Qwen3Config |
|
|
|
|
|
try: |
|
|
from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig |
|
|
except ImportError: |
|
|
print('Please upgrade transformers to version 4.46.3 or higher') |
|
|
|
|
|
|
|
|
class POINTSGUIConfig(PretrainedConfig): |
|
|
model_type = "points_gui" |
|
|
is_composition = True |
|
|
"""Configuration class for `POINTSGUI`.""" |
|
|
|
|
|
def __init__(self, |
|
|
**kwargs) -> None: |
|
|
super().__init__(**kwargs) |
|
|
if not kwargs: |
|
|
return |
|
|
vision_config = kwargs.pop("vision_config", None) |
|
|
llm_config = kwargs.pop("llm_config", None) |
|
|
if isinstance(vision_config, dict): |
|
|
self.vision_config = Qwen2VLVisionConfig(**vision_config) |
|
|
else: |
|
|
self.vision_config = vision_config |
|
|
if isinstance(llm_config, dict): |
|
|
self.llm_config = Qwen3Config(**llm_config) |
|
|
else: |
|
|
self.llm_config = llm_config |
|
|
|
|
|
self.vocab_size = llm_config["vocab_size"] |
|
|
self.max_position_embeddings = llm_config["max_position_embeddings"] |
|
|
self.hidden_size = llm_config["hidden_size"] |
|
|
self.intermediate_size = llm_config["intermediate_size"] |
|
|
self.num_hidden_layers = llm_config["num_hidden_layers"] |
|
|
self.num_attention_heads = llm_config["num_attention_heads"] |
|
|
self.use_sliding_window = llm_config["use_sliding_window"] |
|
|
self.sliding_window = llm_config["sliding_window"] |
|
|
self.max_window_layers = llm_config["max_window_layers"] |
|
|
|
|
|
|
|
|
if llm_config["num_key_value_heads"] is None: |
|
|
llm_config["num_key_value_heads"] = llm_config["num_attention_heads"] |
|
|
|
|
|
self.num_key_value_heads = llm_config["num_key_value_heads"] |
|
|
self.head_dim = llm_config["head_dim"] |
|
|
self.hidden_act = llm_config["hidden_act"] |
|
|
self.initializer_range = llm_config["initializer_range"] |
|
|
self.rms_norm_eps = llm_config["rms_norm_eps"] |
|
|
self.use_cache = llm_config["use_cache"] |
|
|
self.rope_theta = llm_config["rope_theta"] |
|
|
self.rope_scaling = llm_config["rope_scaling"] |
|
|
self.attention_bias = llm_config["attention_bias"] |
|
|
self.attention_dropout = llm_config["attention_dropout"] |
|
|
|
|
|
|
|
|
if self.rope_scaling is not None and "type" in self.rope_scaling: |
|
|
if self.rope_scaling["type"] == "mrope": |
|
|
self.rope_scaling["type"] = "default" |
|
|
self.rope_scaling["rope_type"] = self.rope_scaling["type"] |
|
|
|
|
|
super().__init__( |
|
|
tie_word_embeddings=llm_config["tie_word_embeddings"], |
|
|
**kwargs, |
|
|
) |