Upload train_code_reasoning.py
Browse files- train_code_reasoning.py +0 -6
train_code_reasoning.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
| 13 |
import os
|
| 14 |
import random
|
| 15 |
from datasets import load_dataset, concatenate_datasets
|
| 16 |
-
from transformers import AutoTokenizer
|
| 17 |
from trl import SFTTrainer, SFTConfig
|
| 18 |
import trackio
|
| 19 |
|
|
@@ -25,9 +24,6 @@ OUTPUT_DIR = "./code-reasoning-1.5b"
|
|
| 25 |
# Initialize Trackio
|
| 26 |
trackio.init(project="code-reasoning-ft", name="qwen2.5-coder-1.5b-code-reasoning")
|
| 27 |
|
| 28 |
-
# Load tokenizer
|
| 29 |
-
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
| 30 |
-
|
| 31 |
print("Loading and preparing datasets...")
|
| 32 |
|
| 33 |
all_datasets = []
|
|
@@ -188,8 +184,6 @@ trainer = SFTTrainer(
|
|
| 188 |
model=MODEL_ID,
|
| 189 |
train_dataset=train_dataset,
|
| 190 |
args=training_args,
|
| 191 |
-
processing_class=tokenizer,
|
| 192 |
-
max_seq_length=2048,
|
| 193 |
)
|
| 194 |
|
| 195 |
print("Starting training...")
|
|
|
|
| 13 |
import os
|
| 14 |
import random
|
| 15 |
from datasets import load_dataset, concatenate_datasets
|
|
|
|
| 16 |
from trl import SFTTrainer, SFTConfig
|
| 17 |
import trackio
|
| 18 |
|
|
|
|
| 24 |
# Initialize Trackio
|
| 25 |
trackio.init(project="code-reasoning-ft", name="qwen2.5-coder-1.5b-code-reasoning")
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
print("Loading and preparing datasets...")
|
| 28 |
|
| 29 |
all_datasets = []
|
|
|
|
| 184 |
model=MODEL_ID,
|
| 185 |
train_dataset=train_dataset,
|
| 186 |
args=training_args,
|
|
|
|
|
|
|
| 187 |
)
|
| 188 |
|
| 189 |
print("Starting training...")
|