stmasson commited on
Commit
8f69ba1
·
verified ·
1 Parent(s): d520342

Upload train_ministral_n8n.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_ministral_n8n.py +3 -3
train_ministral_n8n.py CHANGED
@@ -37,9 +37,9 @@ MAX_SEQ_LENGTH = 4096 # n8n workflows can be long
37
  trackio.init(project="ministral-3b-n8n-sft")
38
 
39
  print(f"Loading tokenizer from {MODEL_NAME}...")
40
- # Ministral uses a new tokenizer backend - load with use_fast=True
41
- from transformers import MistralTokenizerFast
42
- tokenizer = MistralTokenizerFast.from_pretrained(MODEL_NAME)
43
  if tokenizer.pad_token is None:
44
  tokenizer.pad_token = tokenizer.eos_token
45
 
 
37
  trackio.init(project="ministral-3b-n8n-sft")
38
 
39
  print(f"Loading tokenizer from {MODEL_NAME}...")
40
+ # Ministral uses tekken tokenizer - use PreTrainedTokenizerFast
41
+ from transformers import PreTrainedTokenizerFast
42
+ tokenizer = PreTrainedTokenizerFast.from_pretrained(MODEL_NAME)
43
  if tokenizer.pad_token is None:
44
  tokenizer.pad_token = tokenizer.eos_token
45