Merge remote 'space/main' (try fix qwen)
Browse files- src/model_adapters.py +5 -0
src/model_adapters.py
CHANGED
|
@@ -127,6 +127,11 @@ class LlamaStyleAdapter(ModelAdapter):
|
|
| 127 |
) -> Optional[Tuple[Tensor, Tensor]]:
|
| 128 |
if self._rotary is not None:
|
| 129 |
cos, sin = self._rotary(hidden_states, position_ids)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
return (cos, sin)
|
| 131 |
return None
|
| 132 |
|
|
|
|
| 127 |
) -> Optional[Tuple[Tensor, Tensor]]:
|
| 128 |
if self._rotary is not None:
|
| 129 |
cos, sin = self._rotary(hidden_states, position_ids)
|
| 130 |
+
# Unsqueeze to (batch, 1, seq_len, head_dim) to support broadcasting
|
| 131 |
+
# This matches LlamaModel behavior which prepares embeddings for layers
|
| 132 |
+
if cos.dim() == 3:
|
| 133 |
+
cos = cos.unsqueeze(1)
|
| 134 |
+
sin = sin.unsqueeze(1)
|
| 135 |
return (cos, sin)
|
| 136 |
return None
|
| 137 |
|