KitsuVp
/

NeoLLM

@@ -515,9 +515,13 @@ class NeoLLMRotaryEmbedding(nn.Module):
         self.max_seq_len_cached = config.max_position_embeddings
         self.original_max_seq_len = config.max_position_embeddings
         if hasattr(config, "rope_scaling") and config.rope_scaling is not None and isinstance(config.rope_scaling, dict):
-            self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type"))
-            self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]
             inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device)
         else:
             self.rope_type = None

         self.max_seq_len_cached = config.max_position_embeddings
         self.original_max_seq_len = config.max_position_embeddings
+        rope_type = None
         if hasattr(config, "rope_scaling") and config.rope_scaling is not None and isinstance(config.rope_scaling, dict):
+            rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type"))
+        if rope_type and rope_type != "default" and rope_type in ROPE_INIT_FUNCTIONS:
+            self.rope_type = rope_type
+            self.rope_init_fn = ROPE_INIT_FUNCTIONS[rope_type]
             inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device)
         else:
             self.rope_type = None