Update orpheus-tts/kartoffel_decoder.py
Browse files- orpheus-tts/kartoffel_decoder.py +12 -15
orpheus-tts/kartoffel_decoder.py
CHANGED
|
@@ -73,21 +73,16 @@ def convert_to_audio_kartoffel(audio_tensor):
|
|
| 73 |
return audio_numpy.tobytes()
|
| 74 |
|
| 75 |
def extract_kartoffel_tokens(token_text, tokenizer):
|
| 76 |
-
"""Extrahiert Audio-Token-IDs aus dem generierten Text"""
|
| 77 |
try:
|
| 78 |
-
print(f"DEBUG KARTOFFEL: Received token_text: {token_text}")
|
| 79 |
|
| 80 |
-
#
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
print(f"DEBUG KARTOFFEL: Parsed token_ids from string: {token_ids}")
|
| 85 |
-
else:
|
| 86 |
-
# Fallback: Text zu Token-IDs konvertieren (altes Format)
|
| 87 |
-
token_ids = tokenizer.encode(token_text)
|
| 88 |
-
print(f"DEBUG KARTOFFEL: Encoded token_ids: {token_ids}")
|
| 89 |
|
| 90 |
-
# Nach Start-Token suchen
|
| 91 |
start_idx = -1
|
| 92 |
for i, token_id in enumerate(token_ids):
|
| 93 |
if token_id == CODE_START_TOKEN_ID:
|
|
@@ -95,14 +90,16 @@ def extract_kartoffel_tokens(token_text, tokenizer):
|
|
| 95 |
break
|
| 96 |
|
| 97 |
if start_idx == -1:
|
| 98 |
-
print(f"DEBUG KARTOFFEL: No start token found ({CODE_START_TOKEN_ID})")
|
|
|
|
| 99 |
return []
|
| 100 |
|
| 101 |
-
print(f"DEBUG KARTOFFEL: Found start token at index {start_idx}")
|
| 102 |
|
| 103 |
# Audio-Tokens extrahieren (nach Start-Token)
|
| 104 |
potential_code_tokens = token_ids[start_idx + 1:]
|
| 105 |
-
print(f"DEBUG KARTOFFEL: Potential code tokens: {potential_code_tokens
|
|
|
|
| 106 |
|
| 107 |
# Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
|
| 108 |
valid_raw_codes = [
|
|
|
|
| 73 |
return audio_numpy.tobytes()
|
| 74 |
|
| 75 |
def extract_kartoffel_tokens(token_text, tokenizer):
|
| 76 |
+
"""Extrahiert Audio-Token-IDs aus dem von vLLM generierten Text"""
|
| 77 |
try:
|
| 78 |
+
print(f"DEBUG KARTOFFEL: Received token_text: {token_text[:100]}...")
|
| 79 |
|
| 80 |
+
# Text zu Token-IDs konvertieren (vLLM generiert Text, nicht numerische IDs)
|
| 81 |
+
token_ids = tokenizer.encode(token_text)
|
| 82 |
+
print(f"DEBUG KARTOFFEL: Encoded token_ids count: {len(token_ids)}")
|
| 83 |
+
print(f"DEBUG KARTOFFEL: First 20 token_ids: {token_ids[:20]}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
+
# Nach Audio-Start-Token suchen (128257)
|
| 86 |
start_idx = -1
|
| 87 |
for i, token_id in enumerate(token_ids):
|
| 88 |
if token_id == CODE_START_TOKEN_ID:
|
|
|
|
| 90 |
break
|
| 91 |
|
| 92 |
if start_idx == -1:
|
| 93 |
+
print(f"DEBUG KARTOFFEL: No audio start token found ({CODE_START_TOKEN_ID})")
|
| 94 |
+
print(f"DEBUG KARTOFFEL: Available unique tokens: {sorted(set(token_ids))}")
|
| 95 |
return []
|
| 96 |
|
| 97 |
+
print(f"DEBUG KARTOFFEL: Found audio start token at index {start_idx}")
|
| 98 |
|
| 99 |
# Audio-Tokens extrahieren (nach Start-Token)
|
| 100 |
potential_code_tokens = token_ids[start_idx + 1:]
|
| 101 |
+
print(f"DEBUG KARTOFFEL: Potential code tokens count: {len(potential_code_tokens)}")
|
| 102 |
+
print(f"DEBUG KARTOFFEL: First 10 potential codes: {potential_code_tokens[:10]}")
|
| 103 |
|
| 104 |
# Nur gültige Audio-Tokens (>= CODE_TOKEN_OFFSET, nicht REMOVE_TOKEN)
|
| 105 |
valid_raw_codes = [
|