Upload scripts/train_alizee_v2_stage1_sft.py with huggingface_hub
Browse files
scripts/train_alizee_v2_stage1_sft.py
CHANGED
|
@@ -39,9 +39,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
|
|
| 39 |
from trl import SFTTrainer, SFTConfig
|
| 40 |
|
| 41 |
# Configuration
|
| 42 |
-
# Use
|
| 43 |
-
#
|
| 44 |
-
BASE_MODEL = "
|
| 45 |
OUTPUT_REPO = "stmasson/alizee-coder-devstral-2-small-stage1"
|
| 46 |
FINAL_REPO = "stmasson/alizee-coder-devstral-2-small"
|
| 47 |
|
|
@@ -71,7 +71,6 @@ print("\n📝 Loading tokenizer...")
|
|
| 71 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 72 |
BASE_MODEL,
|
| 73 |
trust_remote_code=True,
|
| 74 |
-
use_fast=False, # Use slow tokenizer to avoid conversion issues
|
| 75 |
)
|
| 76 |
if tokenizer.pad_token is None:
|
| 77 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
| 39 |
from trl import SFTTrainer, SFTConfig
|
| 40 |
|
| 41 |
# Configuration
|
| 42 |
+
# Use unsloth's Devstral version which has HuggingFace-compatible tokenizer
|
| 43 |
+
# (mistralai version uses non-compatible Tekken tokenizer)
|
| 44 |
+
BASE_MODEL = "unsloth/Devstral-Small-2505"
|
| 45 |
OUTPUT_REPO = "stmasson/alizee-coder-devstral-2-small-stage1"
|
| 46 |
FINAL_REPO = "stmasson/alizee-coder-devstral-2-small"
|
| 47 |
|
|
|
|
| 71 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 72 |
BASE_MODEL,
|
| 73 |
trust_remote_code=True,
|
|
|
|
| 74 |
)
|
| 75 |
if tokenizer.pad_token is None:
|
| 76 |
tokenizer.pad_token = tokenizer.eos_token
|