Fix SFTConfig: move max_seq_length to tokenizer.model_max_length
Browse filesNewer TRL removed max_seq_length from SFTConfig.__init__.
Set tokenizer.model_max_length = MAX_SEQ_LEN instead.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
notebooks/ch_trader_finetune.ipynb
CHANGED
|
@@ -285,13 +285,7 @@
|
|
| 285 |
"id": "load-tokenizer",
|
| 286 |
"metadata": {},
|
| 287 |
"outputs": [],
|
| 288 |
-
"source":
|
| 289 |
-
"print(f\"Loading tokenizer: {BASE_MODEL}\")\n",
|
| 290 |
-
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\n",
|
| 291 |
-
"tokenizer.pad_token = tokenizer.eos_token\n",
|
| 292 |
-
"tokenizer.padding_side = \"right\"\n",
|
| 293 |
-
"print(\"Tokenizer loaded\")"
|
| 294 |
-
]
|
| 295 |
},
|
| 296 |
{
|
| 297 |
"cell_type": "code",
|
|
@@ -404,47 +398,7 @@
|
|
| 404 |
"id": "train",
|
| 405 |
"metadata": {},
|
| 406 |
"outputs": [],
|
| 407 |
-
"source":
|
| 408 |
-
"sft_config = SFTConfig(\n",
|
| 409 |
-
" output_dir=OUTPUT_DIR,\n",
|
| 410 |
-
" num_train_epochs=NUM_EPOCHS,\n",
|
| 411 |
-
" per_device_train_batch_size=BATCH_SIZE,\n",
|
| 412 |
-
" per_device_eval_batch_size=BATCH_SIZE,\n",
|
| 413 |
-
" gradient_accumulation_steps=GRAD_ACCUM,\n",
|
| 414 |
-
" gradient_checkpointing=True,\n",
|
| 415 |
-
" optim=\"paged_adamw_32bit\",\n",
|
| 416 |
-
" learning_rate=LR,\n",
|
| 417 |
-
" lr_scheduler_type=\"cosine\",\n",
|
| 418 |
-
" warmup_ratio=0.05,\n",
|
| 419 |
-
" max_seq_length=MAX_SEQ_LEN,\n",
|
| 420 |
-
" fp16=not torch.cuda.is_bf16_supported(),\n",
|
| 421 |
-
" bf16=torch.cuda.is_bf16_supported(),\n",
|
| 422 |
-
" logging_steps=25,\n",
|
| 423 |
-
" eval_strategy=\"steps\",\n",
|
| 424 |
-
" eval_steps=100,\n",
|
| 425 |
-
" save_strategy=\"steps\",\n",
|
| 426 |
-
" save_steps=100,\n",
|
| 427 |
-
" load_best_model_at_end=True,\n",
|
| 428 |
-
" metric_for_best_model=\"eval_loss\",\n",
|
| 429 |
-
" greater_is_better=False,\n",
|
| 430 |
-
" report_to=\"none\",\n",
|
| 431 |
-
" dataset_text_field=\"text\",\n",
|
| 432 |
-
" packing=False,\n",
|
| 433 |
-
")\n",
|
| 434 |
-
"\n",
|
| 435 |
-
"trainer = SFTTrainer(\n",
|
| 436 |
-
" model=model,\n",
|
| 437 |
-
" args=sft_config,\n",
|
| 438 |
-
" train_dataset=train_dataset,\n",
|
| 439 |
-
" eval_dataset=val_dataset,\n",
|
| 440 |
-
" peft_config=lora_config,\n",
|
| 441 |
-
" processing_class=tokenizer,\n",
|
| 442 |
-
")\n",
|
| 443 |
-
"\n",
|
| 444 |
-
"print(\"Starting training...\")\n",
|
| 445 |
-
"trainer.train()\n",
|
| 446 |
-
"print(\"Training complete.\")"
|
| 447 |
-
]
|
| 448 |
},
|
| 449 |
{
|
| 450 |
"cell_type": "markdown",
|
|
|
|
| 285 |
"id": "load-tokenizer",
|
| 286 |
"metadata": {},
|
| 287 |
"outputs": [],
|
| 288 |
+
"source": "print(f\"Loading tokenizer: {BASE_MODEL}\")\ntokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)\ntokenizer.pad_token = tokenizer.eos_token\ntokenizer.padding_side = \"right\"\ntokenizer.model_max_length = MAX_SEQ_LEN # replaces max_seq_length in SFTConfig\nprint(\"Tokenizer loaded\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
},
|
| 290 |
{
|
| 291 |
"cell_type": "code",
|
|
|
|
| 398 |
"id": "train",
|
| 399 |
"metadata": {},
|
| 400 |
"outputs": [],
|
| 401 |
+
"source": "sft_config = SFTConfig(\n output_dir=OUTPUT_DIR,\n num_train_epochs=NUM_EPOCHS,\n per_device_train_batch_size=BATCH_SIZE,\n per_device_eval_batch_size=BATCH_SIZE,\n gradient_accumulation_steps=GRAD_ACCUM,\n gradient_checkpointing=True,\n optim=\"paged_adamw_32bit\",\n learning_rate=LR,\n lr_scheduler_type=\"cosine\",\n warmup_ratio=0.05,\n fp16=not torch.cuda.is_bf16_supported(),\n bf16=torch.cuda.is_bf16_supported(),\n logging_steps=25,\n eval_strategy=\"steps\",\n eval_steps=100,\n save_strategy=\"steps\",\n save_steps=100,\n load_best_model_at_end=True,\n metric_for_best_model=\"eval_loss\",\n greater_is_better=False,\n report_to=\"none\",\n dataset_text_field=\"text\",\n packing=False,\n)\n\ntrainer = SFTTrainer(\n model=model,\n args=sft_config,\n train_dataset=train_dataset,\n eval_dataset=val_dataset,\n peft_config=lora_config,\n processing_class=tokenizer,\n)\n\nprint(\"Starting training...\")\ntrainer.train()\nprint(\"Training complete.\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
},
|
| 403 |
{
|
| 404 |
"cell_type": "markdown",
|