File size: 2,868 Bytes
534f14c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
from typing import Any
from transformers import PretrainedConfig, Qwen3Config
try:
from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLVisionConfig
except ImportError:
print('Please upgrade transformers to version 4.46.3 or higher')
class POINTSGUIConfig(PretrainedConfig):
model_type = "points_gui"
is_composition = True
"""Configuration class for `POINTSGUI`."""
def __init__(self,
**kwargs) -> None:
super().__init__(**kwargs)
if not kwargs:
return
vision_config = kwargs.pop("vision_config", None)
llm_config = kwargs.pop("llm_config", None)
if isinstance(vision_config, dict):
self.vision_config = Qwen2VLVisionConfig(**vision_config)
else:
self.vision_config = vision_config
if isinstance(llm_config, dict):
self.llm_config = Qwen3Config(**llm_config)
else:
self.llm_config = llm_config
self.vocab_size = llm_config["vocab_size"]
self.max_position_embeddings = llm_config["max_position_embeddings"]
self.hidden_size = llm_config["hidden_size"]
self.intermediate_size = llm_config["intermediate_size"]
self.num_hidden_layers = llm_config["num_hidden_layers"]
self.num_attention_heads = llm_config["num_attention_heads"]
self.use_sliding_window = llm_config["use_sliding_window"]
self.sliding_window = llm_config["sliding_window"] # we check `use_sliding_window` in the modeling code
self.max_window_layers = llm_config["max_window_layers"]
# for backward compatibility
if llm_config["num_key_value_heads"] is None:
llm_config["num_key_value_heads"] = llm_config["num_attention_heads"]
self.num_key_value_heads = llm_config["num_key_value_heads"]
self.head_dim = llm_config["head_dim"]
self.hidden_act = llm_config["hidden_act"]
self.initializer_range = llm_config["initializer_range"]
self.rms_norm_eps = llm_config["rms_norm_eps"]
self.use_cache = llm_config["use_cache"]
self.rope_theta = llm_config["rope_theta"]
self.rope_scaling = llm_config["rope_scaling"]
self.attention_bias = llm_config["attention_bias"]
self.attention_dropout = llm_config["attention_dropout"]
# Validate the correctness of rotary position embeddings parameters
# BC: if there is a 'type' field, move it to 'rope_type'.
if self.rope_scaling is not None and "type" in self.rope_scaling:
if self.rope_scaling["type"] == "mrope":
self.rope_scaling["type"] = "default"
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
super().__init__(
tie_word_embeddings=llm_config["tie_word_embeddings"],
**kwargs,
) |