Upload train_ministral_n8n.py with huggingface_hub
Browse files- train_ministral_n8n.py +6 -5
train_ministral_n8n.py
CHANGED
|
@@ -30,17 +30,18 @@ from trl import SFTTrainer, SFTConfig
|
|
| 30 |
import trackio
|
| 31 |
|
| 32 |
# Configuration
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
DATASET_NAME = "stmasson/n8n-workflows-thinking"
|
| 35 |
-
OUTPUT_MODEL = "stmasson/
|
| 36 |
MAX_SEQ_LENGTH = 4096 # n8n workflows can be long
|
| 37 |
|
| 38 |
# Initialize Trackio for monitoring
|
| 39 |
-
trackio.init(project="
|
| 40 |
|
| 41 |
print(f"Loading tokenizer from {MODEL_NAME}...")
|
| 42 |
-
|
| 43 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
|
| 44 |
if tokenizer.pad_token is None:
|
| 45 |
tokenizer.pad_token = tokenizer.eos_token
|
| 46 |
|
|
|
|
| 30 |
import trackio
|
| 31 |
|
| 32 |
# Configuration
|
| 33 |
+
# Note: Ministral-3-3B-Instruct-2512 uses new TokenizersBackend not yet in transformers
|
| 34 |
+
# Using Mistral-7B-Instruct-v0.3 as compatible alternative
|
| 35 |
+
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"
|
| 36 |
DATASET_NAME = "stmasson/n8n-workflows-thinking"
|
| 37 |
+
OUTPUT_MODEL = "stmasson/mistral-7b-n8n-workflows"
|
| 38 |
MAX_SEQ_LENGTH = 4096 # n8n workflows can be long
|
| 39 |
|
| 40 |
# Initialize Trackio for monitoring
|
| 41 |
+
trackio.init(project="mistral-7b-n8n-sft")
|
| 42 |
|
| 43 |
print(f"Loading tokenizer from {MODEL_NAME}...")
|
| 44 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
|
|
| 45 |
if tokenizer.pad_token is None:
|
| 46 |
tokenizer.pad_token = tokenizer.eos_token
|
| 47 |
|