import os import json os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "2" from unsloth import FastLanguageModel import torch dataset_path = "/home/mshahidul/readctrl/data/finetuning_data/finetune_dataset_subclaim_support_v2_sft_prompt.json" lora_save_path = "/home/mshahidul/readctrl_model/Mistral-Small-3.1-24B_subclaims-support-check-8b_ctx_v2-lora" full_model_save_path = "/home/mshahidul/readctrl_model/full_model/Mistral-Small-3.1-24B_subclaims-support-check-8b_ctx_v2-bf16" lora=False # === Load base model === model, tokenizer = FastLanguageModel.from_pretrained( model_name = "unsloth/Mistral-Small-3.1-24B-Instruct-2503", max_seq_length = 8192, load_in_4bit = False, load_in_8bit = False, full_finetuning = False, dtype = torch.bfloat16, ) # === Prepare LoRA model === model = FastLanguageModel.get_peft_model( model, r = 32, target_modules = [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ], lora_alpha = 32, lora_dropout = 0, bias = "none", use_gradient_checkpointing = "unsloth", random_state = 3407, use_rslora = False, loftq_config = None, ) # === Load non-reasoning dataset (Full dataset) === from datasets import load_dataset from unsloth.chat_templates import standardize_sharegpt print("Loading dataset...") with open(f"{dataset_path}") as f: data = json.load(f) from datasets import Dataset dataset = Dataset.from_list(data) # Standardize and apply chat formatting dataset = standardize_sharegpt(dataset) non_reasoning_conversations = [ tokenizer.apply_chat_template(conv, tokenize=False) for conv in dataset["conversations"] ] # === Prepare dataset for training === import pandas as pd from datasets import Dataset data = pd.Series(non_reasoning_conversations, name="text") combined_dataset = Dataset.from_pandas(pd.DataFrame(data)) combined_dataset = combined_dataset.shuffle(seed=3407) # === Training setup === from trl import SFTTrainer, SFTConfig trainer = SFTTrainer( model=model, tokenizer=tokenizer, train_dataset=combined_dataset, eval_dataset=None, # Optional args=SFTConfig( dataset_text_field="text", per_device_train_batch_size=16, gradient_accumulation_steps=8, warmup_steps=5, num_train_epochs=1, # max_steps=30, learning_rate=2e-4, logging_steps=1, optim="adamw_8bit", weight_decay=0.01, lr_scheduler_type="linear", seed=3407, report_to="none", ), ) # === Train model === trainer_stats = trainer.train() if lora==True: model.save_pretrained(lora_save_path) tokenizer.save_pretrained(lora_save_path) else: model.save_pretrained_merged( full_model_save_path, tokenizer, save_method="merged_16bit", )