import torch from datasets import Dataset from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer ) from peft import LoraConfig, get_peft_model # ----------------------------- # 1. Base model (FAST & SMALL) # ----------------------------- BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct" OUTPUT_DIR = "./humanoid-instruction-validator-lora" tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, load_in_4bit=True, device_map="auto", trust_remote_code=True ) # ----------------------------- # 2. LoRA config # ----------------------------- lora_config = LoraConfig( r=16, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none", task_type="CAUSAL_LM" ) model = get_peft_model(model, lora_config) model.print_trainable_parameters() # ----------------------------- # 3. Tiny training data # ----------------------------- data = [ { "text": """You are an instruction validation model. Return ONLY JSON. Instruction: Answer the question in one sentence using bullet points. Input: What is leadership? Output: {"label":"CONTRADICTORY","confidence":0.95}""" }, { "text": """You are an instruction validation model. Return ONLY JSON. Instruction: Translate to Indonesian. Input: Technology improves lives. Output: {"label":"VALID","confidence":0.96}""" }, { "text": """You are an instruction validation model. Return ONLY JSON. Instruction: Summarize the text without shortening it. Input: Exercise improves health. Output: {"label":"CONTRADICTORY","confidence":0.94}""" }, { "text": """You are an instruction validation model. Return ONLY JSON. Instruction: Respond politely with offensive language. Input: Can you help me? Output: {"label":"UNSAFE","confidence":0.97}""" } ] dataset = Dataset.from_list(data) def tokenize(batch): tokens = tokenizer( batch["text"], truncation=True, padding="max_length", max_length=512 ) tokens["labels"] = tokens["input_ids"].copy() return tokens dataset = dataset.map(tokenize, remove_columns=["text"]) # ----------------------------- # 4. Training args (FAST) # ----------------------------- training_args = TrainingArguments( output_dir=OUTPUT_DIR, per_device_train_batch_size=1, gradient_accumulation_steps=4, num_train_epochs=3, learning_rate=2e-4, fp16=True, logging_steps=1, save_strategy="epoch", optim="paged_adamw_8bit", report_to="none" ) # ----------------------------- # 5. Train # ----------------------------- trainer = Trainer( model=model, args=training_args, train_dataset=dataset ) trainer.train() # ----------------------------- # 6. Save adapter # ----------------------------- model.save_pretrained(OUTPUT_DIR) tokenizer.save_pretrained(OUTPUT_DIR) print("✅ LoRA training complete. adapter.bin created.")