""" Fine-tune Qwen2.5-3B-Instruct with Unsloth (4-bit QLoRA + SFT). Installation (CUDA GPU required): pip install -r modal_apps/requirements-modal.txt Or install manually: pip install torch torchvision pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" pip install unsloth_zoo pip install --no-deps trl peft accelerate bitsandbytes pip install datasets transformers Run: python scripts/train.py """ import unsloth # noqa: F401 — must import before trl/transformers/peft from pathlib import Path from datasets import load_dataset from trl import SFTConfig, SFTTrainer from unsloth import FastLanguageModel, is_bf16_supported from unsloth.chat_templates import get_chat_template MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct" PROJECT_ROOT = Path(__file__).resolve().parent.parent DATA_PATH = str(PROJECT_ROOT / "data" / "train.jsonl") OUTPUT_DIR = str(PROJECT_ROOT / "trained_model") MAX_SEQ_LENGTH = 2048 def main() -> None: model, tokenizer = FastLanguageModel.from_pretrained( model_name=MODEL_NAME, max_seq_length=MAX_SEQ_LENGTH, dtype=None, load_in_4bit=True, ) model = FastLanguageModel.get_peft_model( model, r=16, target_modules=[ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ], lora_alpha=16, lora_dropout=0, bias="none", use_gradient_checkpointing="unsloth", random_state=3407, max_seq_length=MAX_SEQ_LENGTH, ) tokenizer = get_chat_template( tokenizer, chat_template="qwen-2.5", ) dataset = load_dataset("json", data_files=DATA_PATH, split="train") def formatting_prompts_func(examples): texts = [ tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=False, ) for messages in examples["messages"] ] return {"text": texts} dataset = dataset.map(formatting_prompts_func, batched=True) trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=dataset, args=SFTConfig( output_dir=OUTPUT_DIR, num_train_epochs=3, per_device_train_batch_size=8, gradient_accumulation_steps=1, warmup_steps=10, learning_rate=2e-4, fp16=not is_bf16_supported(), bf16=is_bf16_supported(), logging_steps=10, optim="adamw_8bit", seed=3407, report_to="none", max_seq_length=MAX_SEQ_LENGTH, dataset_text_field="text", packing=False, ), ) trainer.train() model.save_pretrained(OUTPUT_DIR) tokenizer.save_pretrained(OUTPUT_DIR) print(f"Model saved to {OUTPUT_DIR}") if __name__ == "__main__": main()