dev-mode-orpheus

Paused

Tomtom84 commited on Apr 21, 2025

Commit

7c37296

verified ·

1 Parent(s): 2a41e43

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ torch.backends.cuda.enable_flash_sdp(False)          # PyTorch‑2.2‑Bug
 # 1) Konstanten -------------------------------------------------------
 REPO           = "SebastianBodza/Kartoffel_Orpheus-3B_german_natural-v0.1"
-CHUNK_TOKENS   = 50
 START_TOKEN    = 128259
 NEW_BLOCK      = 128257
 EOS_TOKEN      = 128258
@@ -108,16 +108,16 @@ async def tts(ws: WebSocket):
         while True:
             next_cache_pos = torch.tensor([offset_len], device=device) if past is not None else None
             gen = model.generate(
                 input_ids       = ids if past is None else torch.tensor([[last_tok]], device=device),
                 attention_mask  = attn if past is None else None,
                 past_key_values = past,
-                cache_position  = next_cache_pos,     # **hier nur ab 2. Durchlauf**
                 max_new_tokens  = CHUNK_TOKENS,
                 logits_processor=[masker],
                 do_sample=True, temperature=0.7, top_p=0.95,
                 use_cache=True, return_dict_in_generate=True,
             )
             # neu erzeugte Tokens hinter dem bisherigen Ende

 # 1) Konstanten -------------------------------------------------------
 REPO           = "SebastianBodza/Kartoffel_Orpheus-3B_german_natural-v0.1"
+CHUNK_TOKENS   = 7
 START_TOKEN    = 128259
 NEW_BLOCK      = 128257
 EOS_TOKEN      = 128258
         while True:
             next_cache_pos = torch.tensor([offset_len], device=device) if past is not None else None
             gen = model.generate(
                 input_ids       = ids if past is None else torch.tensor([[last_tok]], device=device),
                 attention_mask  = attn if past is None else None,
                 past_key_values = past,
+                cache_position  = None if past is None else next_cache_pos,
                 max_new_tokens  = CHUNK_TOKENS,
                 logits_processor=[masker],
                 do_sample=True, temperature=0.7, top_p=0.95,
                 use_cache=True, return_dict_in_generate=True,
+                return_legacy_cache=False
             )
             # neu erzeugte Tokens hinter dem bisherigen Ende