Sahil Seemant commited on
Commit
9ae233f
·
1 Parent(s): 1fd607f

Bypass TokenizersBackend bug by using Nemo tokenizer for Ministral

Browse files
Files changed (1) hide show
  1. chat_gui.py +5 -1
chat_gui.py CHANGED
@@ -305,9 +305,13 @@ if st.session_state.messages and st.session_state.messages[-1]["role"] == "user"
305
  # Setting use_fast=False to avoid "TokenizersBackend" errors on some environments
306
  processor_class = AutoTokenizer
307
 
 
 
 
 
308
  st.info(f"Loading {st.session_state.current_model} via Transformers...")
309
  tokenizer = processor_class.from_pretrained(
310
- conf["path"],
311
  token=hf_token,
312
  trust_remote_code=True,
313
  use_fast=False
 
305
  # Setting use_fast=False to avoid "TokenizersBackend" errors on some environments
306
  processor_class = AutoTokenizer
307
 
308
+ # The Ministral-3 config has an invalid tokenizer class ("TokenizersBackend")
309
+ # We load the exact same Tekken tokenizer from the Nemo repository to bypass this bug on HF Spaces.
310
+ tokenizer_path = "mistralai/Mistral-Nemo-Instruct-2407" if is_mistral else conf["path"]
311
+
312
  st.info(f"Loading {st.session_state.current_model} via Transformers...")
313
  tokenizer = processor_class.from_pretrained(
314
+ tokenizer_path,
315
  token=hf_token,
316
  trust_remote_code=True,
317
  use_fast=False