hemantn commited on
Commit
ea9d808
·
1 Parent(s): 0e5736d

Fix HuggingFaceTokenizerAdapter to match working implementation

Browse files
Files changed (1) hide show
  1. adapter.py +1 -0
adapter.py CHANGED
@@ -538,6 +538,7 @@ class HuggingFaceTokenizerAdapter:
538
  chars = [self.inv_vocab.get(int(t), '') for t in seq if self.inv_vocab.get(int(t), '') not in {'-', '*', '<', '>'} and self.inv_vocab.get(int(t), '') != '']
539
  # Use res_to_seq for formatting, pass (sequence, length) tuple as in original code
540
  # The length is not always available, so use len(chars) as fallback
 
541
  formatted = res_to_seq([ ''.join(chars), len(chars) ], mode='restore')
542
  decoded.append(formatted)
543
  return decoded
 
538
  chars = [self.inv_vocab.get(int(t), '') for t in seq if self.inv_vocab.get(int(t), '') not in {'-', '*', '<', '>'} and self.inv_vocab.get(int(t), '') != '']
539
  # Use res_to_seq for formatting, pass (sequence, length) tuple as in original code
540
  # The length is not always available, so use len(chars) as fallback
541
+ from extra_utils import res_to_seq
542
  formatted = res_to_seq([ ''.join(chars), len(chars) ], mode='restore')
543
  decoded.append(formatted)
544
  return decoded