kriyanshi's picture
Prepare Android Skill Router for Build Small hackathon submission.
6524169
Raw
History Blame Contribute Delete
3.02 kB
"""
Fine-tune Qwen2.5-3B-Instruct with Unsloth (4-bit QLoRA + SFT).
Installation (CUDA GPU required):
pip install -r modal_apps/requirements-modal.txt
Or install manually:
pip install torch torchvision
pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
pip install unsloth_zoo
pip install --no-deps trl peft accelerate bitsandbytes
pip install datasets transformers
Run:
python scripts/train.py
"""
import unsloth # noqa: F401 — must import before trl/transformers/peft
from pathlib import Path
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer
from unsloth import FastLanguageModel, is_bf16_supported
from unsloth.chat_templates import get_chat_template
MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
PROJECT_ROOT = Path(__file__).resolve().parent.parent
DATA_PATH = str(PROJECT_ROOT / "data" / "train.jsonl")
OUTPUT_DIR = str(PROJECT_ROOT / "trained_model")
MAX_SEQ_LENGTH = 2048
def main() -> None:
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=MODEL_NAME,
max_seq_length=MAX_SEQ_LENGTH,
dtype=None,
load_in_4bit=True,
)
model = FastLanguageModel.get_peft_model(
model,
r=16,
target_modules=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
],
lora_alpha=16,
lora_dropout=0,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=3407,
max_seq_length=MAX_SEQ_LENGTH,
)
tokenizer = get_chat_template(
tokenizer,
chat_template="qwen-2.5",
)
dataset = load_dataset("json", data_files=DATA_PATH, split="train")
def formatting_prompts_func(examples):
texts = [
tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=False,
)
for messages in examples["messages"]
]
return {"text": texts}
dataset = dataset.map(formatting_prompts_func, batched=True)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
args=SFTConfig(
output_dir=OUTPUT_DIR,
num_train_epochs=3,
per_device_train_batch_size=8,
gradient_accumulation_steps=1,
warmup_steps=10,
learning_rate=2e-4,
fp16=not is_bf16_supported(),
bf16=is_bf16_supported(),
logging_steps=10,
optim="adamw_8bit",
seed=3407,
report_to="none",
max_seq_length=MAX_SEQ_LENGTH,
dataset_text_field="text",
packing=False,
),
)
trainer.train()
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"Model saved to {OUTPUT_DIR}")
if __name__ == "__main__":
main()