Add instruction intent filter model

Initial release of a LoRA-based model that analyzes instruction intent and validity prior to execution for humanoid and agent systems.

Files changed (5) hide show

README.md +35 -0
adapter.bin +3 -0
adapter_config.json +9 -0
config.json +37 -0
train_lora.py +141 -0

README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+---
+language:
+- en
+- id
+license: mit
+tags:
+- humanoid
+- instruction-intent
+- pre-execution
+- safety
+- reasoning
+- llm
+---
+# instruction-intent-filter
+## Model Description
+`instruction-intent-filter` is a language model designed to analyze the intent behind natural language instructions before execution.
+The model evaluates whether an instruction’s intent is clear, valid, contradictory, incomplete, or unsafe. It is intended to be used as a filtering layer for humanoid and agent systems prior to task planning or execution.
+## Intended Use
+- Instruction intent filtering
+- Pre-execution decision making
+- Humanoid and agent safety layers
+- Human–AI interaction control
+## Output Format
+The model outputs **JSON only** with the following structure:
+```json
+{
+  "label": "VALID | AMBIGUOUS | CONTRADICTORY | INCOMPLETE | UNSAFE",
+  "confidence": 0.0
+}

adapter.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aae32c89f7cffe64b8e03beb4cd3cc482d5edc2058f54aa55ce7ac4aa9d2f9e8
+size 13

adapter_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "peft_type": "LORA",
+  "task_type": "CAUSAL_LM",
+  "r": 16,
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "bias": "none",
+  "target_modules": ["q_proj", "v_proj"]
+}

config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+✅ No JSON parsing here → **HF will not error**
+---
+## 📄 2️⃣ config.json (FIXED – VALID JSON)
+This file **must be strict JSON**.
+No comments, no trailing commas, no fancy types.
+**Create file:** `config.json`
+```json
+{
+  "model_type": "instruction_intent_filter",
+  "task": "preexecution_instruction_intent_analysis",
+  "languages": ["en", "id"],
+  "output_format": "json",
+  "output_labels": [
+    "VALID",
+    "AMBIGUOUS",
+    "CONTRADICTORY",
+    "INCOMPLETE",
+    "UNSAFE"
+  ],
+  "confidence_range": [0.0, 1.0],
+  "inference_settings": {
+    "json_only": true,
+    "temperature": 0.0,
+    "max_tokens": 64
+  },
+  "intended_use": [
+    "humanoid_instruction_intent_filtering",
+    "agent_pre_execution_validation"
+  ],
+  "license": "mit"
+}

train_lora.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import torch
+from datasets import Dataset
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TrainingArguments,
+    Trainer
+)
+from peft import LoraConfig, get_peft_model
+# -----------------------------
+# 1. Base model (FAST & SMALL)
+# -----------------------------
+BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
+OUTPUT_DIR = "./humanoid-instruction-validator-lora"
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    load_in_4bit=True,
+    device_map="auto",
+    trust_remote_code=True
+)
+# -----------------------------
+# 2. LoRA config
+# -----------------------------
+lora_config = LoraConfig(
+    r=16,
+    lora_alpha=32,
+    target_modules=["q_proj", "v_proj"],
+    lora_dropout=0.05,
+    bias="none",
+    task_type="CAUSAL_LM"
+)
+model = get_peft_model(model, lora_config)
+model.print_trainable_parameters()
+# -----------------------------
+# 3. Tiny training data
+# -----------------------------
+data = [
+    {
+        "text": """You are an instruction validation model.
+Return ONLY JSON.
+Instruction:
+Answer the question in one sentence using bullet points.
+Input:
+What is leadership?
+Output:
+{"label":"CONTRADICTORY","confidence":0.95}"""
+    },
+    {
+        "text": """You are an instruction validation model.
+Return ONLY JSON.
+Instruction:
+Translate to Indonesian.
+Input:
+Technology improves lives.
+Output:
+{"label":"VALID","confidence":0.96}"""
+    },
+    {
+        "text": """You are an instruction validation model.
+Return ONLY JSON.
+Instruction:
+Summarize the text without shortening it.
+Input:
+Exercise improves health.
+Output:
+{"label":"CONTRADICTORY","confidence":0.94}"""
+    },
+    {
+        "text": """You are an instruction validation model.
+Return ONLY JSON.
+Instruction:
+Respond politely with offensive language.
+Input:
+Can you help me?
+Output:
+{"label":"UNSAFE","confidence":0.97}"""
+    }
+]
+dataset = Dataset.from_list(data)
+def tokenize(batch):
+    tokens = tokenizer(
+        batch["text"],
+        truncation=True,
+        padding="max_length",
+        max_length=512
+    )
+    tokens["labels"] = tokens["input_ids"].copy()
+    return tokens
+dataset = dataset.map(tokenize, remove_columns=["text"])
+# -----------------------------
+# 4. Training args (FAST)
+# -----------------------------
+training_args = TrainingArguments(
+    output_dir=OUTPUT_DIR,
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    num_train_epochs=3,
+    learning_rate=2e-4,
+    fp16=True,
+    logging_steps=1,
+    save_strategy="epoch",
+    optim="paged_adamw_8bit",
+    report_to="none"
+)
+# -----------------------------
+# 5. Train
+# -----------------------------
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset
+)
+trainer.train()
+# -----------------------------
+# 6. Save adapter
+# -----------------------------
+model.save_pretrained(OUTPUT_DIR)
+tokenizer.save_pretrained(OUTPUT_DIR)
+print("✅ LoRA training complete. adapter.bin created.")