Fix HuggingFaceTokenizerAdapter to match working implementation
Browse files- adapter.py +1 -0
adapter.py
CHANGED
|
@@ -538,6 +538,7 @@ class HuggingFaceTokenizerAdapter:
|
|
| 538 |
chars = [self.inv_vocab.get(int(t), '') for t in seq if self.inv_vocab.get(int(t), '') not in {'-', '*', '<', '>'} and self.inv_vocab.get(int(t), '') != '']
|
| 539 |
# Use res_to_seq for formatting, pass (sequence, length) tuple as in original code
|
| 540 |
# The length is not always available, so use len(chars) as fallback
|
|
|
|
| 541 |
formatted = res_to_seq([ ''.join(chars), len(chars) ], mode='restore')
|
| 542 |
decoded.append(formatted)
|
| 543 |
return decoded
|
|
|
|
| 538 |
chars = [self.inv_vocab.get(int(t), '') for t in seq if self.inv_vocab.get(int(t), '') not in {'-', '*', '<', '>'} and self.inv_vocab.get(int(t), '') != '']
|
| 539 |
# Use res_to_seq for formatting, pass (sequence, length) tuple as in original code
|
| 540 |
# The length is not always available, so use len(chars) as fallback
|
| 541 |
+
from extra_utils import res_to_seq
|
| 542 |
formatted = res_to_seq([ ''.join(chars), len(chars) ], mode='restore')
|
| 543 |
decoded.append(formatted)
|
| 544 |
return decoded
|