Aixile commited on
Commit ·
50e1e74
1
Parent(s): 323bf0d
support older transformer versions
Browse files- tokenization_qwen.py +7 -7
tokenization_qwen.py
CHANGED
|
@@ -18,7 +18,7 @@ from PIL import Image
|
|
| 18 |
from PIL import ImageFont
|
| 19 |
from PIL import ImageDraw
|
| 20 |
from transformers import PreTrainedTokenizer, AddedToken
|
| 21 |
-
from transformers.utils import try_to_load_from_cache
|
| 22 |
|
| 23 |
import matplotlib.colors as mcolors
|
| 24 |
from matplotlib.font_manager import FontProperties
|
|
@@ -27,12 +27,12 @@ logger = logging.getLogger(__name__)
|
|
| 27 |
|
| 28 |
|
| 29 |
VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken", "ttf": "SimSun.ttf"}
|
| 30 |
-
FONT_PATH = try_to_load_from_cache("Qwen/Qwen-VL-Chat", "SimSun.ttf")
|
| 31 |
-
if FONT_PATH is None:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
|
| 37 |
PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
|
| 38 |
ENDOFTEXT = "<|endoftext|>"
|
|
|
|
| 18 |
from PIL import ImageFont
|
| 19 |
from PIL import ImageDraw
|
| 20 |
from transformers import PreTrainedTokenizer, AddedToken
|
| 21 |
+
# from transformers.utils import try_to_load_from_cache
|
| 22 |
|
| 23 |
import matplotlib.colors as mcolors
|
| 24 |
from matplotlib.font_manager import FontProperties
|
|
|
|
| 27 |
|
| 28 |
|
| 29 |
VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken", "ttf": "SimSun.ttf"}
|
| 30 |
+
# FONT_PATH = try_to_load_from_cache("Qwen/Qwen-VL-Chat", "SimSun.ttf")
|
| 31 |
+
# if FONT_PATH is None:
|
| 32 |
+
# if not os.path.exists("SimSun.ttf"):
|
| 33 |
+
# ttf = requests.get("https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/SimSun.ttf")
|
| 34 |
+
# open("SimSun.ttf", "wb").write(ttf.content)
|
| 35 |
+
# FONT_PATH = "SimSun.ttf"
|
| 36 |
|
| 37 |
PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
|
| 38 |
ENDOFTEXT = "<|endoftext|>"
|