Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,7 @@ torch.backends.cuda.enable_flash_sdp(False) # PyTorch‑2.2‑Bug
|
|
| 15 |
|
| 16 |
# 1) Konstanten -------------------------------------------------------
|
| 17 |
REPO = "SebastianBodza/Kartoffel_Orpheus-3B_german_natural-v0.1"
|
| 18 |
-
CHUNK_TOKENS =
|
| 19 |
START_TOKEN = 128259
|
| 20 |
NEW_BLOCK = 128257
|
| 21 |
EOS_TOKEN = 128258
|
|
@@ -108,16 +108,16 @@ async def tts(ws: WebSocket):
|
|
| 108 |
|
| 109 |
while True:
|
| 110 |
next_cache_pos = torch.tensor([offset_len], device=device) if past is not None else None
|
| 111 |
-
|
| 112 |
gen = model.generate(
|
| 113 |
input_ids = ids if past is None else torch.tensor([[last_tok]], device=device),
|
| 114 |
attention_mask = attn if past is None else None,
|
| 115 |
past_key_values = past,
|
| 116 |
-
cache_position =
|
| 117 |
max_new_tokens = CHUNK_TOKENS,
|
| 118 |
logits_processor=[masker],
|
| 119 |
do_sample=True, temperature=0.7, top_p=0.95,
|
| 120 |
use_cache=True, return_dict_in_generate=True,
|
|
|
|
| 121 |
)
|
| 122 |
|
| 123 |
# neu erzeugte Tokens hinter dem bisherigen Ende
|
|
|
|
| 15 |
|
| 16 |
# 1) Konstanten -------------------------------------------------------
|
| 17 |
REPO = "SebastianBodza/Kartoffel_Orpheus-3B_german_natural-v0.1"
|
| 18 |
+
CHUNK_TOKENS = 7
|
| 19 |
START_TOKEN = 128259
|
| 20 |
NEW_BLOCK = 128257
|
| 21 |
EOS_TOKEN = 128258
|
|
|
|
| 108 |
|
| 109 |
while True:
|
| 110 |
next_cache_pos = torch.tensor([offset_len], device=device) if past is not None else None
|
|
|
|
| 111 |
gen = model.generate(
|
| 112 |
input_ids = ids if past is None else torch.tensor([[last_tok]], device=device),
|
| 113 |
attention_mask = attn if past is None else None,
|
| 114 |
past_key_values = past,
|
| 115 |
+
cache_position = None if past is None else next_cache_pos,
|
| 116 |
max_new_tokens = CHUNK_TOKENS,
|
| 117 |
logits_processor=[masker],
|
| 118 |
do_sample=True, temperature=0.7, top_p=0.95,
|
| 119 |
use_cache=True, return_dict_in_generate=True,
|
| 120 |
+
return_legacy_cache=False
|
| 121 |
)
|
| 122 |
|
| 123 |
# neu erzeugte Tokens hinter dem bisherigen Ende
|