dataflare
/

df-arc

@@ -213,7 +213,22 @@ class DFArcTokenizer(PreTrainedTokenizerFast):
             text = self.phrase_helper.merge_phrases(text)
         return super().encode(text, *args, **kwargs)
     def convert_tokens_to_string(self, tokens: List[str]) -> str:
         """
         Converts a sequence of tokens (string) in a single string.
         """

             text = self.phrase_helper.merge_phrases(text)
         return super().encode(text, *args, **kwargs)
+    def decode(self, token_ids, skip_special_tokens=False, clean_up_tokenization_spaces=None, **kwargs):
+        """
+        Override decode to force use of convert_tokens_to_string for readable output.
+        """
+        # Ensure token_ids is a list of ints
+        if isinstance(token_ids, int):
+            token_ids = [token_ids]
+        # Convert to tokens
+        tokens = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)
+        # Convert to string using our custom logic
+        return self.convert_tokens_to_string(tokens)
     def convert_tokens_to_string(self, tokens: List[str]) -> str:
         """
         Converts a sequence of tokens (string) in a single string.
         """