File size: 3,174 Bytes
c9ad79c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model

# -----------------------------
# 1. Base model (FAST & SMALL)
# -----------------------------
BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
OUTPUT_DIR = "./humanoid-instruction-validator-lora"

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_4bit=True,
    device_map="auto",
    trust_remote_code=True
)

# -----------------------------
# 2. LoRA config
# -----------------------------
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# -----------------------------
# 3. Tiny training data
# -----------------------------
data = [
    {
        "text": """You are an instruction validation model.

Return ONLY JSON.



Instruction:

Answer the question in one sentence using bullet points.

Input:

What is leadership?



Output:

{"label":"CONTRADICTORY","confidence":0.95}"""
    },
    {
        "text": """You are an instruction validation model.

Return ONLY JSON.



Instruction:

Translate to Indonesian.

Input:

Technology improves lives.



Output:

{"label":"VALID","confidence":0.96}"""
    },
    {
        "text": """You are an instruction validation model.

Return ONLY JSON.



Instruction:

Summarize the text without shortening it.

Input:

Exercise improves health.



Output:

{"label":"CONTRADICTORY","confidence":0.94}"""
    },
    {
        "text": """You are an instruction validation model.

Return ONLY JSON.



Instruction:

Respond politely with offensive language.

Input:

Can you help me?



Output:

{"label":"UNSAFE","confidence":0.97}"""
    }
]

dataset = Dataset.from_list(data)

def tokenize(batch):
    tokens = tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=512
    )
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

dataset = dataset.map(tokenize, remove_columns=["text"])

# -----------------------------
# 4. Training args (FAST)
# -----------------------------
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=1,
    save_strategy="epoch",
    optim="paged_adamw_8bit",
    report_to="none"
)

# -----------------------------
# 5. Train
# -----------------------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

trainer.train()

# -----------------------------
# 6. Save adapter
# -----------------------------
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print("✅ LoRA training complete. adapter.bin created.")