alex-ht commited on
Commit ·
7b01696
1
Parent(s): f767f82
update
Browse files- ultravox_processing.py +2 -1
ultravox_processing.py
CHANGED
|
@@ -171,6 +171,7 @@ class UltravoxProcessor(transformers.ProcessorMixin):
|
|
| 171 |
text, list
|
| 172 |
), "Text must be a list."
|
| 173 |
processed_text = []
|
|
|
|
| 174 |
for t in text:
|
| 175 |
if self.audio_placeholder in t:
|
| 176 |
if "audio_token_len" not in data:
|
|
@@ -184,7 +185,7 @@ class UltravoxProcessor(transformers.ProcessorMixin):
|
|
| 184 |
add_special_tokens=False,
|
| 185 |
)
|
| 186 |
)
|
| 187 |
-
data["audio_token_start_idx"]
|
| 188 |
|
| 189 |
# Replace the audio placeholder with the audio token.
|
| 190 |
# e.g. "Transcribe\n<|audio|>" -> "Transcribe </s></s></s></s></s></s></s></s>"
|
|
|
|
| 171 |
text, list
|
| 172 |
), "Text must be a list."
|
| 173 |
processed_text = []
|
| 174 |
+
data["audio_token_start_idx"] = []
|
| 175 |
for t in text:
|
| 176 |
if self.audio_placeholder in t:
|
| 177 |
if "audio_token_len" not in data:
|
|
|
|
| 185 |
add_special_tokens=False,
|
| 186 |
)
|
| 187 |
)
|
| 188 |
+
data["audio_token_start_idx"].append(start_idx)
|
| 189 |
|
| 190 |
# Replace the audio placeholder with the audio token.
|
| 191 |
# e.g. "Transcribe\n<|audio|>" -> "Transcribe </s></s></s></s></s></s></s></s>"
|