Spaces:

Dionyssos
/

speech-analysis2

Running

Dionyssos commited on Sep 22, 2025

Commit

2f811de

1 Parent(s): cecf050

kv flush

Files changed (1) hide show

audiocraft.py CHANGED Viewed

@@ -66,7 +66,7 @@ class AudioGen(torch.nn.Module):
                  ):
         torch.manual_seed(42)  # https://github.com/facebookresearch/audiocraft/issues/111#issuecomment-1614732858
         n_draw = int(duration * 50 / (max_tokens * N_REPEAT)) + 1
-        print(f'{n_draw=}            {duration=}seconds < {prompt=}')
         with torch.autocast(device_type='cpu', dtype=torch.bfloat16):
             gen_tokens = self.lm.generate(
                 text_condition=[prompt] * N_REPEAT  + [''] * N_REPEAT,#['dogs', 'dogs...!', '', '']
@@ -540,7 +540,7 @@ class LMModel(nn.Module):
             out_codes[:, :, [0, 1, 2, 3], torch.tensor([3, 2, 1, 0]) + offset + 1] = next_token
             # Sink Attn
-            if (offset > 0) and (offset % 476) == 0:
                 n_preserve = 4
                 self.transformer._flush(n_preserve=n_preserve)
                 cache_position = n_preserve
@@ -640,7 +640,7 @@ class StreamingMultiheadAttention(nn.Module):
             k = self.k_history
             v = self.v_history
-            print(q.shape, k.shape, v.shape,'Self Atts')
             # -> kv CACHE ONLY APPLIES if not self.cross_attention
         x = torch.nn.functional.scaled_dot_product_attention(

                  ):
         torch.manual_seed(42)  # https://github.com/facebookresearch/audiocraft/issues/111#issuecomment-1614732858
         n_draw = int(duration * 50 / (max_tokens * N_REPEAT)) + 1
+        print(f'{n_draw=}            {duration=}seconds < {prompt=} | {max_tokens=}')
         with torch.autocast(device_type='cpu', dtype=torch.bfloat16):
             gen_tokens = self.lm.generate(
                 text_condition=[prompt] * N_REPEAT  + [''] * N_REPEAT,#['dogs', 'dogs...!', '', '']
             out_codes[:, :, [0, 1, 2, 3], torch.tensor([3, 2, 1, 0]) + offset + 1] = next_token
             # Sink Attn
+            if (offset > 0) and (offset % 71) == 0:
                 n_preserve = 4
                 self.transformer._flush(n_preserve=n_preserve)
                 cache_position = n_preserve
             k = self.k_history
             v = self.v_history
             # -> kv CACHE ONLY APPLIES if not self.cross_attention
         x = torch.nn.functional.scaled_dot_product_attention(