Qwen
/

Qwen-7B

@@ -198,15 +198,16 @@ class QWenTokenizer(PreTrainedTokenizer):
         return tokens
-    def convert_tokens_to_string(self, tokens: List[bytes]) -> str:
         """
         Converts a sequence of tokens in a single string. The most simple way to do it is `" ".join(tokens)` but we
         often want to remove sub-word tokenization artifacts at the same time.
         """
-        text = b""
-        for token in tokens:
-            text += token
-        return text.decode('utf-8')
     @property
     def vocab_size(self):

         return tokens
+    def convert_tokens_to_string(self, tokens: List[str]) -> str:
         """
         Converts a sequence of tokens in a single string. The most simple way to do it is `" ".join(tokens)` but we
         often want to remove sub-word tokenization artifacts at the same time.
         """
+        text = "".join(tokens)
+        text = bytearray([self.byte_decoder[c] for c in text]).decode(
+            "utf-8", errors=self.errors
+        )
+        return text
     @property
     def vocab_size(self):