man2machine commited on
Commit
3a421cd
·
1 Parent(s): 274f8d5

Update working

Browse files
Files changed (2) hide show
  1. configuration_slimmoe.py +1 -1
  2. modeling_slimmoe.py +9 -1
configuration_slimmoe.py CHANGED
@@ -111,7 +111,7 @@ class PhiMoEConfig(PretrainedConfig):
111
  >>> configuration = model.config
112
  ```"""
113
 
114
- model_type = "phimoe"
115
  keys_to_ignore_at_inference = ["past_key_values"]
116
 
117
  def __init__(
 
111
  >>> configuration = model.config
112
  ```"""
113
 
114
+ model_type = "phimoe_slim" # renamed from "phimoe" to bypass transformers >=4.46 conversion_mapping
115
  keys_to_ignore_at_inference = ["past_key_values"]
116
 
117
  def __init__(
modeling_slimmoe.py CHANGED
@@ -330,9 +330,17 @@ class PhiMoEAttention(nn.Module):
330
  base=self.rope_theta,
331
  )
332
  else:
333
- scaling_type = self.config.rope_scaling["type"]
 
334
  if scaling_type == "longrope":
335
  self.rotary_emb = Phi3LongRoPEScaledRotaryEmbedding(self.head_dim, self.config)
 
 
 
 
 
 
 
336
  else:
337
  raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
338
 
 
330
  base=self.rope_theta,
331
  )
332
  else:
333
+ # "type" key was renamed to "rope_type" in transformers >=4.46; handle both
334
+ scaling_type = self.config.rope_scaling.get("type") or self.config.rope_scaling.get("rope_type", "")
335
  if scaling_type == "longrope":
336
  self.rotary_emb = Phi3LongRoPEScaledRotaryEmbedding(self.head_dim, self.config)
337
+ elif not scaling_type or scaling_type == "default":
338
+ # newer transformers injects {"rope_type": "default"} when rope_scaling is absent
339
+ self.rotary_emb = PhiMoERotaryEmbedding(
340
+ self.head_dim,
341
+ max_position_embeddings=self.max_position_embeddings,
342
+ base=self.rope_theta,
343
+ )
344
  else:
345
  raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
346