Update code for transformers 5.5.4

#2
by sjzhou - opened
Files changed (1) hide show
  1. configuration_moss_vl.py +28 -4
configuration_moss_vl.py CHANGED
@@ -69,6 +69,8 @@ class MossVLTextConfig(PretrainedConfig):
69
 
70
  model_type = "moss_vl_text"
71
  base_config_key = "text_config"
 
 
72
 
73
  def __init__(
74
  self,
@@ -86,9 +88,11 @@ class MossVLTextConfig(PretrainedConfig):
86
  use_cache=True,
87
  tie_word_embeddings=False,
88
  rope_theta=5000000.0,
 
89
  rope_scaling=None,
90
  attention_bias=False,
91
  attention_dropout=0.0,
 
92
  # Cross attention specific
93
  cross_attention_layers=None, # List of layer indices to insert cross attention
94
  **kwargs,
@@ -112,11 +116,31 @@ class MossVLTextConfig(PretrainedConfig):
112
  self.rms_norm_eps = rms_norm_eps
113
  self.use_cache = use_cache
114
  self.rope_theta = rope_theta
115
- self.rope_scaling = rope_scaling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  self.attention_bias = attention_bias
117
  self.attention_dropout = attention_dropout
118
-
119
- rope_config_validation(self, ignore_keys={"mrope_section", "mrope_interleaved"})
 
 
 
 
 
 
120
  self.cross_attention_layers = cross_attention_layers or [2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46]
121
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
122
 
@@ -161,4 +185,4 @@ class MossVLConfig(PretrainedConfig):
161
  super().__init__(**kwargs, tie_word_embeddings=tie_word_embeddings)
162
 
163
 
164
- __all__ = ["MossVLConfig", "MossVLTextConfig"]
 
69
 
70
  model_type = "moss_vl_text"
71
  base_config_key = "text_config"
72
+ default_theta = 5000000.0
73
+ ignore_keys_at_rope_validation = {"mrope_section", "mrope_interleaved"}
74
 
75
  def __init__(
76
  self,
 
88
  use_cache=True,
89
  tie_word_embeddings=False,
90
  rope_theta=5000000.0,
91
+ rope_parameters=None,
92
  rope_scaling=None,
93
  attention_bias=False,
94
  attention_dropout=0.0,
95
+ pad_token_id=None,
96
  # Cross attention specific
97
  cross_attention_layers=None, # List of layer indices to insert cross attention
98
  **kwargs,
 
116
  self.rms_norm_eps = rms_norm_eps
117
  self.use_cache = use_cache
118
  self.rope_theta = rope_theta
119
+ if rope_parameters is None:
120
+ if rope_scaling is not None:
121
+ rope_parameters = dict(rope_scaling)
122
+ else:
123
+ rope_parameters = {"rope_type": "default"}
124
+ else:
125
+ rope_parameters = dict(rope_parameters)
126
+
127
+ if "type" in rope_parameters and "rope_type" not in rope_parameters:
128
+ rope_parameters["rope_type"] = rope_parameters.pop("type")
129
+ rope_parameters.setdefault("rope_type", "default")
130
+ rope_parameters.setdefault("rope_theta", rope_theta)
131
+
132
+ self.rope_parameters = rope_parameters
133
+ self.rope_scaling = rope_scaling if rope_scaling is not None else dict(rope_parameters)
134
  self.attention_bias = attention_bias
135
  self.attention_dropout = attention_dropout
136
+ self.pad_token_id = pad_token_id
137
+
138
+ if hasattr(self, "standardize_rope_params"):
139
+ self.standardize_rope_params()
140
+ if hasattr(self, "validate_rope"):
141
+ self.validate_rope()
142
+ else:
143
+ rope_config_validation(self, ignore_keys=self.ignore_keys_at_rope_validation)
144
  self.cross_attention_layers = cross_attention_layers or [2, 6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46]
145
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
146
 
 
185
  super().__init__(**kwargs, tie_word_embeddings=tie_word_embeddings)
186
 
187
 
188
+ __all__ = ["MossVLConfig", "MossVLTextConfig"]