|
|
import torch
|
|
|
from datasets import Dataset
|
|
|
from transformers import (
|
|
|
AutoModelForCausalLM,
|
|
|
AutoTokenizer,
|
|
|
TrainingArguments,
|
|
|
Trainer
|
|
|
)
|
|
|
from peft import LoraConfig, get_peft_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
|
|
|
OUTPUT_DIR = "./humanoid-instruction-validator-lora"
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
|
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
|
BASE_MODEL,
|
|
|
load_in_4bit=True,
|
|
|
device_map="auto",
|
|
|
trust_remote_code=True
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lora_config = LoraConfig(
|
|
|
r=16,
|
|
|
lora_alpha=32,
|
|
|
target_modules=["q_proj", "v_proj"],
|
|
|
lora_dropout=0.05,
|
|
|
bias="none",
|
|
|
task_type="CAUSAL_LM"
|
|
|
)
|
|
|
|
|
|
model = get_peft_model(model, lora_config)
|
|
|
model.print_trainable_parameters()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data = [
|
|
|
{
|
|
|
"text": """You are an instruction validation model.
|
|
|
Return ONLY JSON.
|
|
|
|
|
|
Instruction:
|
|
|
Answer the question in one sentence using bullet points.
|
|
|
Input:
|
|
|
What is leadership?
|
|
|
|
|
|
Output:
|
|
|
{"label":"CONTRADICTORY","confidence":0.95}"""
|
|
|
},
|
|
|
{
|
|
|
"text": """You are an instruction validation model.
|
|
|
Return ONLY JSON.
|
|
|
|
|
|
Instruction:
|
|
|
Translate to Indonesian.
|
|
|
Input:
|
|
|
Technology improves lives.
|
|
|
|
|
|
Output:
|
|
|
{"label":"VALID","confidence":0.96}"""
|
|
|
},
|
|
|
{
|
|
|
"text": """You are an instruction validation model.
|
|
|
Return ONLY JSON.
|
|
|
|
|
|
Instruction:
|
|
|
Summarize the text without shortening it.
|
|
|
Input:
|
|
|
Exercise improves health.
|
|
|
|
|
|
Output:
|
|
|
{"label":"CONTRADICTORY","confidence":0.94}"""
|
|
|
},
|
|
|
{
|
|
|
"text": """You are an instruction validation model.
|
|
|
Return ONLY JSON.
|
|
|
|
|
|
Instruction:
|
|
|
Respond politely with offensive language.
|
|
|
Input:
|
|
|
Can you help me?
|
|
|
|
|
|
Output:
|
|
|
{"label":"UNSAFE","confidence":0.97}"""
|
|
|
}
|
|
|
]
|
|
|
|
|
|
dataset = Dataset.from_list(data)
|
|
|
|
|
|
def tokenize(batch):
|
|
|
tokens = tokenizer(
|
|
|
batch["text"],
|
|
|
truncation=True,
|
|
|
padding="max_length",
|
|
|
max_length=512
|
|
|
)
|
|
|
tokens["labels"] = tokens["input_ids"].copy()
|
|
|
return tokens
|
|
|
|
|
|
dataset = dataset.map(tokenize, remove_columns=["text"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args = TrainingArguments(
|
|
|
output_dir=OUTPUT_DIR,
|
|
|
per_device_train_batch_size=1,
|
|
|
gradient_accumulation_steps=4,
|
|
|
num_train_epochs=3,
|
|
|
learning_rate=2e-4,
|
|
|
fp16=True,
|
|
|
logging_steps=1,
|
|
|
save_strategy="epoch",
|
|
|
optim="paged_adamw_8bit",
|
|
|
report_to="none"
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trainer = Trainer(
|
|
|
model=model,
|
|
|
args=training_args,
|
|
|
train_dataset=dataset
|
|
|
)
|
|
|
|
|
|
trainer.train()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model.save_pretrained(OUTPUT_DIR)
|
|
|
tokenizer.save_pretrained(OUTPUT_DIR)
|
|
|
|
|
|
print("✅ LoRA training complete. adapter.bin created.") |