ssaraf1
/

slm-workflow-planner-7b-v1

+# =============================================================
+# MLX-LM LoRA Fine-Tuning Config
+# SLM Workflow Planner — Qwen2.5-7B-Instruct
+# =============================================================
+#
+# Optimized for:
+#   - Apple M4 Pro (48GB unified memory)
+#   - Policy classification task (structured output)
+#   - 554K instruction pairs from 89-workflow multi-topology corpus
+#
+# Training objective:
+#   Stage 1: decision_type classification (NEXT/FORK/JOIN/RETRY/META)
+#   Stage 2: node subset selection from eligible candidates
+#
+# Key tuning decisions:
+#   - LR 8e-5 (lower for 7B stability + structured output)
+#   - 8000 iters ≈ 6.4% epoch (sufficient for topology generalization)
+#   - num_layers 28/32 (planner reasoning in mid-upper stack)
+#   - dropout 0.02 (dataset large enough, avoid slow convergence)
+#   - warmup 400 (5% of 8000 iters)
+# =============================================================
+# --- Model ---
+model: "Qwen/Qwen2.5-7B-Instruct"
+# --- Training ---
+train: true
+fine_tune_type: "lora"
+optimizer: "adam"
+# --- Iterations ---
+# Dataset: 554K instruction pairs → ~499K train
+# At batch_size=4: 499K/4 = 124,750 steps per epoch
+# 8000 iters ≈ 6.4% epoch — enough for policy + topology learning
+# without overfit risk
+iters: 8000
+batch_size: 4              # 7B on 48GB — safe headroom
+max_seq_length: 512        # Prompts avg ~65-115 tokens, 512 gives headroom
+# --- Learning rate ---
+# 8e-5 is in the safe zone for 7B LoRA on classification tasks
+# (1.5e-4 was borderline high — risk of logit instability)
+# cosine_decay(init, decay_steps, end)
+learning_rate: 8.0e-5
+lr_schedule:
+  name: "cosine_decay"
+  arguments: [8.0e-5, 8000, 1.0e-6]
+  warmup: 400              # 5% warmup (400/8000)
+  warmup_init: 0.0
+# --- LoRA parameters ---
+# rank=16 sufficient for policy classification
+# scale = alpha/rank = 32/16 = 2.0
+# Qwen2.5-7B has 32 layers — LoRA on last 28 (87.5%)
+# Planner reasoning lives in mid-upper stack
+num_layers: 28
+lora_parameters:
+  rank: 16
+  dropout: 0.02            # Lower dropout: 554K samples, avoid slow convergence
+  scale: 2.0
+# --- Prompt masking ---
+# Critical: only train on assistant output (decision), not the prompt
+mask_prompt: true
+# --- Gradient ---
+grad_checkpoint: true      # Essential for 7B on 48GB
+grad_accumulation_steps: 2 # Effective batch = 4 × 2 = 8
+# --- Logging & saving ---
+steps_per_report: 50
+steps_per_eval: 100        # More frequent eval for planner loss curves (jagged)
+val_batches: 100           # 100 × 4 = 400 samples per eval (less noisy)
+save_every: 50             # Frequent saves — crash-proof, resume from last checkpoint
+# --- Data ---
+data: "src_slm/training/data"
+# --- Adapter output ---
+adapter_path: "src_slm/training/adapters_7b"
+# --- Evaluation ---
+test: true
+test_batches: 200          # Thorough test evaluation
+# --- Reproducibility ---
+seed: 42