Spaces:
Sleeping
Sleeping
Sahil Seemant commited on
Commit ·
9ae233f
1
Parent(s): 1fd607f
Bypass TokenizersBackend bug by using Nemo tokenizer for Ministral
Browse files- chat_gui.py +5 -1
chat_gui.py
CHANGED
|
@@ -305,9 +305,13 @@ if st.session_state.messages and st.session_state.messages[-1]["role"] == "user"
|
|
| 305 |
# Setting use_fast=False to avoid "TokenizersBackend" errors on some environments
|
| 306 |
processor_class = AutoTokenizer
|
| 307 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 308 |
st.info(f"Loading {st.session_state.current_model} via Transformers...")
|
| 309 |
tokenizer = processor_class.from_pretrained(
|
| 310 |
-
|
| 311 |
token=hf_token,
|
| 312 |
trust_remote_code=True,
|
| 313 |
use_fast=False
|
|
|
|
| 305 |
# Setting use_fast=False to avoid "TokenizersBackend" errors on some environments
|
| 306 |
processor_class = AutoTokenizer
|
| 307 |
|
| 308 |
+
# The Ministral-3 config has an invalid tokenizer class ("TokenizersBackend")
|
| 309 |
+
# We load the exact same Tekken tokenizer from the Nemo repository to bypass this bug on HF Spaces.
|
| 310 |
+
tokenizer_path = "mistralai/Mistral-Nemo-Instruct-2407" if is_mistral else conf["path"]
|
| 311 |
+
|
| 312 |
st.info(f"Loading {st.session_state.current_model} via Transformers...")
|
| 313 |
tokenizer = processor_class.from_pretrained(
|
| 314 |
+
tokenizer_path,
|
| 315 |
token=hf_token,
|
| 316 |
trust_remote_code=True,
|
| 317 |
use_fast=False
|