stmasson commited on
Commit
d520342
·
verified ·
1 Parent(s): da2c216

Upload train_ministral_n8n.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_ministral_n8n.py +3 -1
train_ministral_n8n.py CHANGED
@@ -37,7 +37,9 @@ MAX_SEQ_LENGTH = 4096 # n8n workflows can be long
37
  trackio.init(project="ministral-3b-n8n-sft")
38
 
39
  print(f"Loading tokenizer from {MODEL_NAME}...")
40
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
 
 
41
  if tokenizer.pad_token is None:
42
  tokenizer.pad_token = tokenizer.eos_token
43
 
 
37
  trackio.init(project="ministral-3b-n8n-sft")
38
 
39
  print(f"Loading tokenizer from {MODEL_NAME}...")
40
+ # Ministral uses a new tokenizer backend - load with use_fast=True
41
+ from transformers import MistralTokenizerFast
42
+ tokenizer = MistralTokenizerFast.from_pretrained(MODEL_NAME)
43
  if tokenizer.pad_token is None:
44
  tokenizer.pad_token = tokenizer.eos_token
45