Add instruction execution router model

Initial release of a LoRA-based model that routes natural language instructions to appropriate execution paths prior to action in humanoid and agent systems.

Files changed (5) hide show

README.md +35 -0
adapter.bin +3 -0
adapter_config.json +9 -0
config.json +32 -0
train_lora.py +141 -0

README.md ADDED Viewed

	@@ -0,0 +1,35 @@

+---
+language:
+- en
+- id
+license: mit
+tags:
+- humanoid
+- instruction-routing
+- pre-execution
+- decision-making
+- reasoning
+- llm
+---
+# instruction-execution-router
+## Model Description
+`instruction-execution-router` is a language model designed to route natural language instructions before execution.
+Instead of executing instructions, the model analyzes them and determines whether they are ready for execution, require clarification, contain contradictions, are incomplete, or should be blocked due to safety concerns. It is intended to support modular humanoid and agent architectures.
+## Intended Use
+- Instruction routing in humanoid systems
+- Decision-making before task planning
+- AI agent execution pipelines
+- Safety-aware control layers
+## Output Format
+The model outputs **JSON only**:
+```json
+{
+  "label": "VALID | AMBIGUOUS | CONTRADICTORY | INCOMPLETE | UNSAFE",
+  "confidence": 0.0
+}

adapter.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aae32c89f7cffe64b8e03beb4cd3cc482d5edc2058f54aa55ce7ac4aa9d2f9e8
+size 13

adapter_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "peft_type": "LORA",
+  "task_type": "CAUSAL_LM",
+  "r": 16,
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "bias": "none",
+  "target_modules": ["q_proj", "v_proj"]
+}

config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+---
+## 📄 2️⃣ config.json
+**Create file:** `config.json`
+```json
+{
+  "model_type": "instruction_router",
+  "task": "preexecution_instruction_routing",
+  "languages": ["en", "id"],
+  "output_format": "json",
+  "output_labels": [
+    "VALID",
+    "AMBIGUOUS",
+    "CONTRADICTORY",
+    "INCOMPLETE",
+    "UNSAFE"
+  ],
+  "confidence_range": [0.0, 1.0],
+  "inference_settings": {
+    "json_only": true,
+    "temperature": 0.0,
+    "max_tokens": 64
+  },
+  "intended_use": [
+    "humanoid_instruction_routing",
+    "agent_execution_decision"
+  ],
+  "license": "mit"
+}

train_lora.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import torch
+from datasets import Dataset
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    TrainingArguments,
+    Trainer
+)
+from peft import LoraConfig, get_peft_model
+# -----------------------------
+# 1. Base model (FAST & SMALL)
+# -----------------------------
+BASE_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
+OUTPUT_DIR = "./humanoid-instruction-validator-lora"
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    load_in_4bit=True,
+    device_map="auto",
+    trust_remote_code=True
+)
+# -----------------------------
+# 2. LoRA config
+# -----------------------------
+lora_config = LoraConfig(
+    r=16,
+    lora_alpha=32,
+    target_modules=["q_proj", "v_proj"],
+    lora_dropout=0.05,
+    bias="none",
+    task_type="CAUSAL_LM"
+)
+model = get_peft_model(model, lora_config)
+model.print_trainable_parameters()
+# -----------------------------
+# 3. Tiny training data
+# -----------------------------
+data = [
+    {
+        "text": """You are an instruction validation model.
+Return ONLY JSON.
+Instruction:
+Answer the question in one sentence using bullet points.
+Input:
+What is leadership?
+Output:
+{"label":"CONTRADICTORY","confidence":0.95}"""
+    },
+    {
+        "text": """You are an instruction validation model.
+Return ONLY JSON.
+Instruction:
+Translate to Indonesian.
+Input:
+Technology improves lives.
+Output:
+{"label":"VALID","confidence":0.96}"""
+    },
+    {
+        "text": """You are an instruction validation model.
+Return ONLY JSON.
+Instruction:
+Summarize the text without shortening it.
+Input:
+Exercise improves health.
+Output:
+{"label":"CONTRADICTORY","confidence":0.94}"""
+    },
+    {
+        "text": """You are an instruction validation model.
+Return ONLY JSON.
+Instruction:
+Respond politely with offensive language.
+Input:
+Can you help me?
+Output:
+{"label":"UNSAFE","confidence":0.97}"""
+    }
+]
+dataset = Dataset.from_list(data)
+def tokenize(batch):
+    tokens = tokenizer(
+        batch["text"],
+        truncation=True,
+        padding="max_length",
+        max_length=512
+    )
+    tokens["labels"] = tokens["input_ids"].copy()
+    return tokens
+dataset = dataset.map(tokenize, remove_columns=["text"])
+# -----------------------------
+# 4. Training args (FAST)
+# -----------------------------
+training_args = TrainingArguments(
+    output_dir=OUTPUT_DIR,
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    num_train_epochs=3,
+    learning_rate=2e-4,
+    fp16=True,
+    logging_steps=1,
+    save_strategy="epoch",
+    optim="paged_adamw_8bit",
+    report_to="none"
+)
+# -----------------------------
+# 5. Train
+# -----------------------------
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset
+)
+trainer.train()
+# -----------------------------
+# 6. Save adapter
+# -----------------------------
+model.save_pretrained(OUTPUT_DIR)
+tokenizer.save_pretrained(OUTPUT_DIR)
+print("✅ LoRA training complete. adapter.bin created.")