Spaces:

OliverPerrin
/

LexiMind

Running

OliverPerrin commited on Dec 3, 2025

Commit

2dcb4b5

1 Parent(s): f2e0099

Chore: Update pre-commit hooks and fix formatting

Files changed (4) hide show

.pre-commit-config.yaml CHANGED Viewed

@@ -1,13 +1,13 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.11
     hooks:
       - id: ruff
         args: [ --fix ]
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.8.0
     hooks:
       - id: mypy
         additional_dependencies: [types-requests, types-PyYAML]

 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.7
     hooks:
       - id: ruff
         args: [ --fix ]
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.19.0
     hooks:
       - id: mypy
         additional_dependencies: [types-requests, types-PyYAML]

src/models/encoder.py CHANGED Viewed

@@ -160,9 +160,9 @@ class TransformerEncoder(nn.Module):
         Build a 3D attention mask (batch, seq, seq) from input_ids and pad_token_id.
         True indicates valid positions; False indicates masked (pad).
         """
-        assert (
-            self.pad_token_id is not None
-        ), "pad_token_id must be set to build padding mask from ids."
         # mask shape: (batch, seq) where True = token kept (non-pad)
         pad_mask = input_ids != self.pad_token_id
         # Convert to (batch, seq_q, seq_k) by outer product broadcasting

         Build a 3D attention mask (batch, seq, seq) from input_ids and pad_token_id.
         True indicates valid positions; False indicates masked (pad).
         """
+        assert self.pad_token_id is not None, (
+            "pad_token_id must be set to build padding mask from ids."
+        )
         # mask shape: (batch, seq) where True = token kept (non-pad)
         pad_mask = input_ids != self.pad_token_id
         # Convert to (batch, seq_q, seq_k) by outer product broadcasting

src/models/heads.py CHANGED Viewed

@@ -97,12 +97,12 @@ class LMHead(nn.Module):
         if tie_embedding is not None:
             # Validate sizes
-            assert (
-                tie_embedding.num_embeddings == vocab_size
-            ), "vocab size mismatch for weight tying"
-            assert (
-                tie_embedding.embedding_dim == d_model
-            ), "embedding dim must match d_model for weight tying"
             # Tie weights: point the projection weight to the embedding weight Tensor
             # Remove the existing projection parameter in favor of the embedding weight
             # This keeps the same Parameter object, so updates affect both modules.

         if tie_embedding is not None:
             # Validate sizes
+            assert tie_embedding.num_embeddings == vocab_size, (
+                "vocab size mismatch for weight tying"
+            )
+            assert tie_embedding.embedding_dim == d_model, (
+                "embedding dim must match d_model for weight tying"
+            )
             # Tie weights: point the projection weight to the embedding weight Tensor
             # Remove the existing projection parameter in favor of the embedding weight
             # This keeps the same Parameter object, so updates affect both modules.

tests/test_models/test_decoder.py CHANGED Viewed

@@ -64,9 +64,9 @@ def test_decoder_layer_causal_mask_blocks_future():
     B, H, Tq, Tk = self_attn.shape
     for i in range(Tq):
         for j in range(i + 1, Tk):
-            assert torch.allclose(
-                self_attn[:, :, i, j], torch.zeros(B, H)
-            ), f"Found nonzero attention to future position {j} from query {i}"
 def test_decoder_stack_and_greedy_decode_shapes():

     B, H, Tq, Tk = self_attn.shape
     for i in range(Tq):
         for j in range(i + 1, Tk):
+            assert torch.allclose(self_attn[:, :, i, j], torch.zeros(B, H)), (
+                f"Found nonzero attention to future position {j} from query {i}"
+            )
 def test_decoder_stack_and_greedy_decode_shapes():