|
|
|
|
|
|
|
|
|
|
|
|
|
|
from datasets import load_dataset
|
|
|
from peft import LoraConfig
|
|
|
from trl import SFTTrainer, SFTConfig
|
|
|
|
|
|
|
|
|
print("Loading dataset...")
|
|
|
dataset = load_dataset("open-r1/codeforces-cots", "solutions_py_decontaminated", split="train")
|
|
|
print(f"Dataset size: {len(dataset)} examples")
|
|
|
print(f"Columns: {dataset.column_names}")
|
|
|
|
|
|
|
|
|
print(f"First example keys: {dataset[0].keys()}")
|
|
|
if "messages" in dataset.column_names:
|
|
|
print(f"Messages sample: {dataset[0]['messages'][:2] if len(dataset[0]['messages']) > 1 else dataset[0]['messages']}")
|
|
|
|
|
|
|
|
|
dataset_split = dataset.train_test_split(test_size=0.05, seed=42)
|
|
|
print(f"Train: {len(dataset_split['train'])}, Eval: {len(dataset_split['test'])}")
|
|
|
|
|
|
|
|
|
peft_config = LoraConfig(
|
|
|
r=16,
|
|
|
lora_alpha=32,
|
|
|
lora_dropout=0.05,
|
|
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
|
|
|
bias="none",
|
|
|
task_type="CAUSAL_LM",
|
|
|
)
|
|
|
|
|
|
|
|
|
training_args = SFTConfig(
|
|
|
output_dir="qwen3-0.6b-codeforces-sft",
|
|
|
push_to_hub=True,
|
|
|
hub_model_id="luiscosio/qwen3-0.6b-codeforces-sft",
|
|
|
hub_strategy="every_save",
|
|
|
num_train_epochs=3,
|
|
|
per_device_train_batch_size=2,
|
|
|
gradient_accumulation_steps=8,
|
|
|
gradient_checkpointing=True,
|
|
|
learning_rate=2e-4,
|
|
|
lr_scheduler_type="cosine",
|
|
|
warmup_ratio=0.1,
|
|
|
eval_strategy="steps",
|
|
|
eval_steps=200,
|
|
|
save_strategy="steps",
|
|
|
save_steps=200,
|
|
|
save_total_limit=3,
|
|
|
logging_steps=10,
|
|
|
report_to="trackio",
|
|
|
run_name="qwen3-0.6b-codeforces-sft",
|
|
|
bf16=True,
|
|
|
optim="adamw_torch_fused",
|
|
|
max_grad_norm=1.0,
|
|
|
max_length=2048,
|
|
|
dataset_text_field=None,
|
|
|
)
|
|
|
|
|
|
|
|
|
print("Initializing trainer...")
|
|
|
trainer = SFTTrainer(
|
|
|
model="Qwen/Qwen3-0.6B",
|
|
|
train_dataset=dataset_split["train"],
|
|
|
eval_dataset=dataset_split["test"],
|
|
|
peft_config=peft_config,
|
|
|
args=training_args,
|
|
|
)
|
|
|
|
|
|
|
|
|
print("Starting training...")
|
|
|
trainer.train()
|
|
|
|
|
|
|
|
|
print("Pushing to Hub...")
|
|
|
trainer.push_to_hub()
|
|
|
print("Training complete!")
|
|
|
|