import os # Set GPU environment variables os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "2" import torch from unsloth import FastLanguageModel from datasets import load_dataset from trl import SFTTrainer, SFTConfig from unsloth.chat_templates import get_chat_template, standardize_data_formats, train_on_responses_only # 1. Configuration model_name = "unsloth/Qwen3-4B-Instruct-2507" max_seq_length = 8192 dataset_path = "/home/mshahidul/readctrl/data/finetuning_data/training_data_readability_data_generation.json" output_dir = "/home/mshahidul/readctrl_model/RL_model/readability_sft_lora_model" # 2. Load Model and Tokenizer model, tokenizer = FastLanguageModel.from_pretrained( model_name = model_name, max_seq_length = max_seq_length, load_in_4bit = True, ) # 3. Add LoRA Adapters model = FastLanguageModel.get_peft_model( model, r = 32, target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",], lora_alpha = 32, lora_dropout = 0, bias = "none", use_gradient_checkpointing = "unsloth", random_state = 3407, ) # 4. Data Preparation tokenizer = get_chat_template( tokenizer, chat_template = "qwen3-instruct", ) dataset = load_dataset("json", data_files = dataset_path, split = "train") dataset = standardize_data_formats(dataset) def formatting_prompts_func(examples): convos = examples["conversations"] texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos] return { "text" : texts, } dataset = dataset.map(formatting_prompts_func, batched = True) # 5. Training Setup trainer = SFTTrainer( model = model, tokenizer = tokenizer, train_dataset = dataset, dataset_text_field = "text", max_seq_length = max_seq_length, args = SFTConfig( per_device_train_batch_size = 2, gradient_accumulation_steps = 4, warmup_steps = 5, # max_steps = 60, # Adjust as needed for your dataset size num_train_epochs = 3, learning_rate = 2e-4, fp16 = not torch.cuda.is_bf16_supported(), bf16 = torch.cuda.is_bf16_supported(), logging_steps = 1, optim = "adamw_8bit", weight_decay = 0.01, lr_scheduler_type = "linear", seed = 3407, output_dir = "outputs", ), ) # Train only on assistant responses trainer = train_on_responses_only( trainer, instruction_part = "<|im_start|>user\n", response_part = "<|im_start|>assistant\n", ) # 6. Train and Save trainer.train() model.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) print(f"Model saved to {output_dir}")