stmasson commited on
Commit
1af3bc8
·
verified ·
1 Parent(s): a4640ca

Upload train_ministral_n8n.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_ministral_n8n.py +6 -5
train_ministral_n8n.py CHANGED
@@ -30,17 +30,18 @@ from trl import SFTTrainer, SFTConfig
30
  import trackio
31
 
32
  # Configuration
33
- MODEL_NAME = "mistralai/Ministral-3-3B-Instruct-2512"
 
 
34
  DATASET_NAME = "stmasson/n8n-workflows-thinking"
35
- OUTPUT_MODEL = "stmasson/ministral-3b-n8n-workflows"
36
  MAX_SEQ_LENGTH = 4096 # n8n workflows can be long
37
 
38
  # Initialize Trackio for monitoring
39
- trackio.init(project="ministral-3b-n8n-sft")
40
 
41
  print(f"Loading tokenizer from {MODEL_NAME}...")
42
- # Ministral uses Tekken tokenizer - use AutoTokenizer with trust_remote_code
43
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
44
  if tokenizer.pad_token is None:
45
  tokenizer.pad_token = tokenizer.eos_token
46
 
 
30
  import trackio
31
 
32
  # Configuration
33
+ # Note: Ministral-3-3B-Instruct-2512 uses new TokenizersBackend not yet in transformers
34
+ # Using Mistral-7B-Instruct-v0.3 as compatible alternative
35
+ MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
36
  DATASET_NAME = "stmasson/n8n-workflows-thinking"
37
+ OUTPUT_MODEL = "stmasson/mistral-7b-n8n-workflows"
38
  MAX_SEQ_LENGTH = 4096 # n8n workflows can be long
39
 
40
  # Initialize Trackio for monitoring
41
+ trackio.init(project="mistral-7b-n8n-sft")
42
 
43
  print(f"Loading tokenizer from {MODEL_NAME}...")
44
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
45
  if tokenizer.pad_token is None:
46
  tokenizer.pad_token = tokenizer.eos_token
47