Florian valade commited on
Commit
432ea6e
·
2 Parent(s): a781577 4794a4d

Merge remote 'space/main' (try fix qwen)

Browse files
Files changed (1) hide show
  1. src/model_adapters.py +5 -0
src/model_adapters.py CHANGED
@@ -127,6 +127,11 @@ class LlamaStyleAdapter(ModelAdapter):
127
  ) -> Optional[Tuple[Tensor, Tensor]]:
128
  if self._rotary is not None:
129
  cos, sin = self._rotary(hidden_states, position_ids)
 
 
 
 
 
130
  return (cos, sin)
131
  return None
132
 
 
127
  ) -> Optional[Tuple[Tensor, Tensor]]:
128
  if self._rotary is not None:
129
  cos, sin = self._rotary(hidden_states, position_ids)
130
+ # Unsqueeze to (batch, 1, seq_len, head_dim) to support broadcasting
131
+ # This matches LlamaModel behavior which prepares embeddings for layers
132
+ if cos.dim() == 3:
133
+ cos = cos.unsqueeze(1)
134
+ sin = sin.unsqueeze(1)
135
  return (cos, sin)
136
  return None
137