Maxtimer97
/

GLM2NSA

Text Generation

Model card Files Files and versions

Maxtimer97 commited on Sep 30, 2025

Commit

bdf9fdc

·

1 Parent(s): bf53a99

Safe cache check

Files changed (1) hide show

modeling_chatglm.py +3 -2

modeling_chatglm.py CHANGED Viewed

@@ -717,8 +717,9 @@ class SelfAttention(torch.nn.Module):
         # adjust key and value for inference
         if kv_cache is not None:
             cache_k, cache_v = kv_cache
-            key_layer = torch.cat((cache_k, key_layer), dim=2)
-            value_layer = torch.cat((cache_v, value_layer), dim=2)
         if use_cache:
             if kv_cache is None:
                 kv_cache = torch.cat((key_layer.unsqueeze(0).unsqueeze(0), value_layer.unsqueeze(0).unsqueeze(0)),

         # adjust key and value for inference
         if kv_cache is not None:
             cache_k, cache_v = kv_cache
+            if cache_k is not None and cache_v is not None:
+                key_layer = torch.cat((cache_k, key_layer), dim=2)
+                value_layer = torch.cat((cache_v, value_layer), dim=2)
         if use_cache:
             if kv_cache is None:
                 kv_cache = torch.cat((key_layer.unsqueeze(0).unsqueeze(0), value_layer.unsqueeze(0).unsqueeze(0)),