Text Classification
Transformers
lora
fine-tuning
adaptive
research
nested-lora
synaptic-plasticity
rank-adaptation
Instructions to use Simo76/Unified-LoRA with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Simo76/Unified-LoRA with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="Simo76/Unified-LoRA")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Simo76/Unified-LoRA", dtype="auto") - Notebooks
- Google Colab
- Kaggle
File size: 6,098 Bytes
64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 64f78fc d72fbc5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 | """
Orbital LoRA β Stable Task Parity Test
MRPC only, 120 steps, 3 seeds.
Validates that the controller causes zero degradation on stable training.
Usage:
pip install transformers datasets evaluate
python stable_task_test.py
"""
import time, random, math, numpy as np, torch, torch.nn as nn
import torch.nn.functional as F, evaluate
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import DataLoader
import sys, os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(file))))
from nested_lora import NestedLoRALinear, inject_nested_lora
from orbital_controller import OrbitalController
from controller import set_rank
ββ CONFIG ββββββββββββββββββββββββββββββββββββββββββ
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL = "distilbert-base-uncased"
BATCH = 8
STEPS = 120
LR = 5e-5
SEEDS = [0, 1, 2]
MAX_RANK = 16
WARMUP = 15
STABLE_WINDOW = 8
ββ DATA ββββββββββββββββββββββββββββββββββββββββββββ
print("Loading data...")
tok = AutoTokenizer.from_pretrained(MODEL)
ds = load_dataset("glue", "mrpc")
def tok_fn(x):
return tok(x["sentence1"], x["sentence2"],
truncation=True, padding="max_length", max_length=128)
ds = ds.map(tok_fn, batched=True)
ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
train_loader = DataLoader(ds["train"], batch_size=BATCH, shuffle=True)
val_loader = DataLoader(ds["validation"], batch_size=BATCH)
metric = evaluate.load("glue", "mrpc")
ββ HELPERS βββββββββββββββββββββββββββββββββββββββββ
def build_model():
base = AutoModelForSequenceClassification.from_pretrained(
MODEL, num_labels=2, ignore_mismatched_sizes=True
)
return inject_nested_lora(base, MAX_RANK).to(DEVICE)
def eval_model(model):
model.eval()
preds, labels = [], []
with torch.no_grad():
for batch in val_loader:
x = batch["input_ids"].to(DEVICE)
m = batch["attention_mask"].to(DEVICE)
y = batch["label"].to(DEVICE)
logits = model(input_ids=x, attention_mask=m).logits
preds.extend(logits.argmax(dim=-1).cpu().numpy())
labels.extend(y.cpu().numpy())
return metric.compute(predictions=preds, references=labels)["f1"]
def eff_rank(usage):
tot = sum(usage.values())
return sum(k * v for k, v in usage.items()) / tot if tot > 0 else 0
ββ TRAIN BASELINE ββββββββββββββββββββββββββββββββββ
def train_baseline(model):
opt = torch.optim.AdamW(model.parameters(), lr=LR)
set_rank(model, 16)
it = iter(train_loader)
for step in range(STEPS):
try:
batch = next(it)
except StopIteration:
it = iter(train_loader); batch = next(it)
x = batch["input_ids"].to(DEVICE)
m = batch["attention_mask"].to(DEVICE)
y = batch["label"].to(DEVICE)
loss = model(input_ids=x, attention_mask=m, labels=y).loss
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
opt.step()
opt.zero_grad()
return model
ββ TRAIN ORBITAL βββββββββββββββββββββββββββββββββββ
def train_orbital(model):
ctrl = OrbitalController(warmup=WARMUP, stable_window=STABLE_WINDOW)
opt = torch.optim.AdamW(model.parameters(), lr=LR)
usage = {4: 0, 8: 0, 16: 0}
rank_trace = []
it = iter(train_loader)
for step in range(STEPS):
try:
batch = next(it)
except StopIteration:
it = iter(train_loader); batch = next(it)
x = batch["input_ids"].to(DEVICE)
m = batch["attention_mask"].to(DEVICE)
y = batch["label"].to(DEVICE)
loss = model(input_ids=x, attention_mask=m, labels=y).loss
loss.backward()
new_rank = ctrl.step(loss.item())
new_rank = max(4, min(16, new_rank))
set_rank(model, new_rank)
usage[new_rank] += 1
rank_trace.append(new_rank)
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
opt.step()
opt.zero_grad()
return model, usage, rank_trace, ctrl
ββ RUN βββββββββββββββββββββββββββββββββββββββββββββ
print(f"\nDevice: {DEVICE}")
print(f"Task: MRPC, {STEPS} steps")
print("=" * 55)
results = []
for seed in SEEDS:
print(f"\n{'β' * 50}\n SEED {seed}\n{'β' * 50}")
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
base_model = build_model()
base_model = train_baseline(base_model)
f1_base = eval_model(base_model)
del base_model; torch.cuda.empty_cache()
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
uni_model = build_model()
uni_model, usage, trace, ctrl = train_orbital(uni_model)
f1_uni = eval_model(uni_model)
er = eff_rank(usage)
saving = 1 - er / 16
transitions = sum(1 for i in range(1, len(trace)) if trace[i] != trace[i-1])
print(f"\n BASELINE F1 = {f1_base:.3f} (rank=16 fixed)")
print(f" ORBITAL F1 = {f1_uni:.3f} (eff_rank={er:.1f}, saving={saving*100:.0f}%)")
print(f" delta F1 = {f1_uni - f1_base:+.3f}")
print(f" Usage: r4={usage[4]} r8={usage[8]} r16={usage[16]} transitions={transitions}")
results.append({
'seed': seed, 'f1_base': f1_base, 'f1_uni': f1_uni,
'delta': f1_uni - f1_base, 'eff_rank': er,
})
del uni_model; torch.cuda.empty_cache()
ββ SUMMARY βββββββββββββββββββββββββββββββββββββββββ
print(f"\n{'=' * 55}\n SUMMARY\n{'=' * 55}")
f1b = [r['f1_base'] for r in results]
f1u = [r['f1_uni'] for r in results]
print(f"\n Baseline F1: {np.mean(f1b):.3f} +/- {np.std(f1b):.3f}")
print(f" Orbital F1: {np.mean(f1u):.3f} +/- {np.std(f1u):.3f}")
print(f" delta F1: {np.mean([r['delta'] for r in results]):+.3f}")
|