https://github.com/Sva76/Unified-LoRa

by Simo76 - opened Apr 8

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+38

-1830

This PR is in draft mode

Files changed (14) hide show

.gitattributes +35 -0
.gitignore +0 -56
LICENSE +0 -201
README.md +3 -76
controller.py +0 -41
docs/architecture.md +0 -171
docs/experimental_results.md +0 -239
experiments/stable_task_test.py +0 -226
experiments/stress_test_task_switch.py +0 -214
nested_lora.py +0 -130
notebooks/mrpc_example.ipynb +0 -165
orbital_controller.py +0 -291
requirements.txt +0 -6
unified_lora.py +0 -14

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore DELETED Viewed

@@ -1,56 +0,0 @@
-.# ── PYTHON ─────────────────────────────────────────
-__pycache__/
-*.py[cod]
-*.so
-# ── ENV ────────────────────────────────────────────
-.env
-.venv/
-venv/
-env/
-# ── BUILD / DIST ───────────────────────────────────
-build/
-dist/
-*.egg-info/
-# ── NOTEBOOK ───────────────────────────────────────
-.ipynb_checkpoints
-# ── CACHE / TEST ───────────────────────────────────
-.pytest_cache/
-.mypy_cache/
-.coverage*
-htmlcov/
-# ── LOGS ───────────────────────────────────────────
-*.log
-# ── MODELS / CHECKPOINTS (CRITICO) ─────────────────
-*.pt
-*.bin
-*.ckpt
-*.safetensors
-# ── DATASETS ───────────────────────────────────────
-data/
-datasets/
-*.parquet
-*.csv
-# ── HF CACHE ───────────────────────────────────────
-.cache/
-huggingface/
-hf_cache/
-# ── EXPERIMENT OUTPUT ──────────────────────────────
-outputs/
-runs/
-wandb/
-# ── SYSTEM ─────────────────────────────────────────
-.DS_Store
-# ── EDITOR ─────────────────────────────────────────
-.vscode/
-.idea/

LICENSE DELETED Viewed

@@ -1,201 +0,0 @@
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-   1. Definitions.
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-   END OF TERMS AND CONDITIONS
-   APPENDIX: How to apply the Apache License to your work.
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-   Copyright [yyyy] [name of copyright owner]
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-       http://www.apache.org/licenses/LICENSE-2.0
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.

README.md CHANGED Viewed

@@ -1,76 +1,3 @@
----
-license: apache-2.0
-tags:
-  - lora
-  - fine-tuning
-  - adaptive
-  - research
-  - nested-lora
-  - synaptic-plasticity
-  - rank-adaptation
-library_name: transformers
-datasets:
-  - nyu-mll/glue
-pipeline_tag: text-classification
----
-# Unified-LoRA
-**LoRA fine-tuning with synaptic plasticity: a neurobiologically-inspired controller that switches between qualitatively different operational modes based on training stress.**
-⚠️ **This is NOT a pretrained model.** Unified-LoRA is a training method/controller.
-👉 **Code**: [github.com/Sva76/Unified-LoRa](https://github.com/Sva76/Unified-LoRa)
-👉 **Demo**: [unified_lora_demo.ipynb](https://github.com/Sva76/Unified-LoRa/blob/main/notebooks/unified_lora_demo.ipynb)
-## What It Does
-A composite synaptic stress signal **φ(t) = f(Convergence, Entropy, Stress)** drives a 3-state FSM:
-| Mode | φ range | Rank | Behavior |
-|------|---------|------|----------|
-| SINGLE | φ < 0.3 | r=4 | Efficient cruise |
-| MULTI | 0.3 ≤ φ < 0.7 | r=8 | Active learning |
-| MIRROR | φ ≥ 0.7 | r=16 | Max capacity + weight snapshot for rollback |
-Rank transitions use **nested matrix slicing** (r4 ⊂ r8 ⊂ r16) — zero cold-start, zero re-allocation.
-Mirror mode saves a weight snapshot on entry. On exit, if weights drifted <5% (transient noise), the snapshot is restored. If drift was significant (real signal), the new weights are kept.
-## Results
-**GLUE (DistilBERT):** 3/4 tasks equal or better with 33–56% rank reduction.
-**Noise resilience:** +31 F1 at 50% label noise, 9× lower variance. No benefit on clean data. Confirmed at 67M–3B.
-**Stress-recovery cycle (Tinker/Llama-3.2-1B):** φ returns to pre-shock baseline (0.33 → 0.83 → 0.33), demonstrating fully reversible stress handling.
-## Quick Start
-```python
-from controller import setup_unified_lora
-adapters, ctrl = setup_unified_lora(model, target_modules=["q_proj", "v_proj"])
-for batch in dataloader:
-    loss = model(**batch).loss
-    loss.backward()
-    ctrl.step(loss=loss.item())  # φ(t) needs the loss for convergence signal
-    optimizer.step()
-    optimizer.zero_grad()
-```
-## Citation
-```bibtex
-@software{unified_lora_2025,
-  author = {Simona Vargiu},
-  title = {Unified-LoRA: Synaptic Plasticity Controller for Adaptive LoRA Fine-Tuning},
-  year = {2025},
-  url = {https://github.com/Sva76/Unified-LoRa}
-}
-```
-## Contact
-Simona Vargiu (Independent Researcher) — simona.vargiu.malta@gmail.com

+---
+license: apache-2.0
+---

controller.py DELETED Viewed

@@ -1,41 +0,0 @@
-"""
-Unified-LoRA Controller
-======================
-Convenience wrapper that exposes the full Unified-LoRA stack:
-- nested_lora.py        → execution engine (LoRA with dynamic rank slicing)
-- orbital_controller.py → control logic (stress-driven rank adaptation)
-Use this module for simple integration, or import submodules directly
-for fine-grained control.
-Author: Simona Vargiu
-License: Apache 2.0
-"""
-# ── ENGINE ──────────────────────────────────────────
-from nested_lora import (
-    NestedLoRALinear,
-    inject_nested_lora,
-    set_rank,
-    get_lora_params,
-    count_params,
-)
-# ── CONTROLLER ──────────────────────────────────────
-from orbital_controller import (
-    OrbitalController,
-    setup_unified_lora,
-)
-# ── EXPORT ──────────────────────────────────────────
-__all__ = [
-    "NestedLoRALinear",
-    "inject_nested_lora",
-    "set_rank",
-    "get_lora_params",
-    "count_params",
-    "OrbitalController",
-    "setup_unified_lora",
-]

docs/architecture.md DELETED Viewed

@@ -1,171 +0,0 @@
-Architecture — Nested Orbital LoRA
-Core idea: dynamic rank control via stress-driven orbital transitions with weight persistence (no cold start).
-Problem: cold start on rank transitions
-Standard multi-rank LoRA keeps separate adapters per rank:
-r=4, r=8, r=16 → independent weights
-Switching rank causes partial cold restarts → performance drop.
-Solution: Nested LoRA (one adapter, multiple ranks)
-Single adapter at max rank:
-A(16, d), B(d, 16)
-Active rank is obtained by slicing:
-r=4  → A[:4, :],  B[:, :4]
-r=8  → A[:8, :],  B[:, :8]
-r=16 → full matrix
-r4 ⊂ r8 ⊂ r16
-Lower ranks reuse trained weights → no cold start.
-Scaling
-To keep output magnitude consistent:
-scale = max_rank / max(r, 1)
-scale = min(scale, 4.0)  # optional clamp
-Orbital Controller (no thresholds)
-Dynamic trajectory instead of static FSM:
-Ascend → stress detected → increase rank
-Hold → oscillation → stay
-Descend → stable → decrease rank
-Uses a stack to ensure symmetric return.
-Stress signal
-φ(t) = |loss - EMA(loss)| + 2.0 × max(0, loss - prev_loss)
-Auto-calibrated thresholds:
-t_stress = μ + 0.7σ
-t_stable = max(μ - 0.3σ, 0)
-Robust stats can be used to reduce noise.
-Why it matters
-avoids cold starts across rank changes
-adapts capacity in real-time
-works in black-box settings
-O(1) overhead
-Comparison
-Property
-Standard LoRA
-AdaLoRA
-Orbital LoRA
-Rank control
-Fixed
-SVD
-Stress
-Control type
-None
-Open
-Closed-loop
-Transition cost
-N/A
-High
-O(1)
-Architecture
-Single
-Pruned
-Nested
-Black-box
-Yes
-No
-Yes

docs/experimental_results.md DELETED Viewed

@@ -1,239 +0,0 @@
-Experimental Results
-Core result: parity with baseline performance with ~15% rank reduction and dynamic shock response.
-1. Stress Test — Task Switch
-Setup
-Model: DistilBERT-base-uncased + NestedLoRALinear (max_rank=16)
-Protocol: MRPC x 60 steps → SST-2 x 60 steps (shock at step 60)
-Seeds: 0, 1, 2
-Baseline: same architecture, fixed rank=16
-Hardware: Colab T4
-Results
-Baseline (r=16)
-Orbital LoRA
-SST-2 Accuracy
-0.736
-0.740
-MRPC F1 (retention)
-0.526
-0.515
-Effective rank
-16.0
-13.6
-Parity with ~15% rank saving
-Behavior
-Post-shock:
-detect → descend (r16 → r4)
-stabilize
-re-ascend (r4 → r16)
-Baseline: no reaction (fixed r=16)
-2. Stable Task — Parity
-Setup
-Task: MRPC only (120 steps)
-Seeds: 0, 1, 2
-Baseline: fixed r=16
-Results
-Seed
-Baseline F1
-Orbital F1
-0
-0.806
-0.808
-1
-0.822
-0.826
-2
-0.824
-0.824
-Mean
-0.818
-0.820
-No degradation on stable training
-3. Rank Dynamics (Black-box — Tinker)
-Methods
-Method
-Control
-Standard LoRA
-Fixed rank
-AdaLoRA-like
-Open-loop
-Orbital LoRA
-Closed-loop
-Disturbance response
-Method
-Reaction
-Stability
-Recovery
-Standard
-None
-Passive
-—
-AdaLoRA-like
-Indirect
-Partial
-Limited
-Orbital LoRA
-Immediate
-Stable
-Immediate
-4. Architecture Insight
-Root cause: cold start from separate adapters.
-Fix: nested slicing → no cold start → parity restored.
-5. Black-box compatibility
-Uses only loss signal.
-No gradients required.
-O(1) overhead.
-Next
-7B+ validation (ongoing)
-LR controller integration

experiments/stable_task_test.py DELETED Viewed

@@ -1,226 +0,0 @@
-"""
-Orbital LoRA — Stable Task Parity Test
-MRPC only, 120 steps, 3 seeds.
-Validates that the controller causes zero degradation on stable training.
-Usage:
-pip install transformers datasets evaluate
-python stable_task_test.py
-"""
-import time, random, math, numpy as np, torch, torch.nn as nn
-import torch.nn.functional as F, evaluate
-from datasets import load_dataset
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from torch.utils.data import DataLoader
-import sys, os
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(file))))
-from nested_lora import NestedLoRALinear, inject_nested_lora
-from orbital_controller import OrbitalController
-from controller import set_rank
-── CONFIG ──────────────────────────────────────────
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-MODEL  = "distilbert-base-uncased"
-BATCH  = 8
-STEPS  = 120
-LR     = 5e-5
-SEEDS  = [0, 1, 2]
-MAX_RANK      = 16
-WARMUP        = 15
-STABLE_WINDOW = 8
-── DATA ────────────────────────────────────────────
-print("Loading data...")
-tok = AutoTokenizer.from_pretrained(MODEL)
-ds  = load_dataset("glue", "mrpc")
-def tok_fn(x):
-return tok(x["sentence1"], x["sentence2"],
-truncation=True, padding="max_length", max_length=128)
-ds = ds.map(tok_fn, batched=True)
-ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
-train_loader = DataLoader(ds["train"], batch_size=BATCH, shuffle=True)
-val_loader   = DataLoader(ds["validation"], batch_size=BATCH)
-metric = evaluate.load("glue", "mrpc")
-── HELPERS ─────────────────────────────────────────
-def build_model():
-base = AutoModelForSequenceClassification.from_pretrained(
-MODEL, num_labels=2, ignore_mismatched_sizes=True
-)
-return inject_nested_lora(base, MAX_RANK).to(DEVICE)
-def eval_model(model):
-model.eval()
-preds, labels = [], []
-with torch.no_grad():
-for batch in val_loader:
-x = batch["input_ids"].to(DEVICE)
-m = batch["attention_mask"].to(DEVICE)
-y = batch["label"].to(DEVICE)
-logits = model(input_ids=x, attention_mask=m).logits
-preds.extend(logits.argmax(dim=-1).cpu().numpy())
-labels.extend(y.cpu().numpy())
-return metric.compute(predictions=preds, references=labels)["f1"]
-def eff_rank(usage):
-tot = sum(usage.values())
-return sum(k * v for k, v in usage.items()) / tot if tot > 0 else 0
-── TRAIN BASELINE ──────────────────────────────────
-def train_baseline(model):
-opt = torch.optim.AdamW(model.parameters(), lr=LR)
-set_rank(model, 16)
-it = iter(train_loader)
-for step in range(STEPS):
-    try:
-        batch = next(it)
-    except StopIteration:
-        it = iter(train_loader); batch = next(it)
-    x = batch["input_ids"].to(DEVICE)
-    m = batch["attention_mask"].to(DEVICE)
-    y = batch["label"].to(DEVICE)
-    loss = model(input_ids=x, attention_mask=m, labels=y).loss
-    loss.backward()
-    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
-    opt.step()
-    opt.zero_grad()
-return model
-── TRAIN ORBITAL ───────────────────────────────────
-def train_orbital(model):
-ctrl = OrbitalController(warmup=WARMUP, stable_window=STABLE_WINDOW)
-opt  = torch.optim.AdamW(model.parameters(), lr=LR)
-usage = {4: 0, 8: 0, 16: 0}
-rank_trace = []
-it = iter(train_loader)
-for step in range(STEPS):
-    try:
-        batch = next(it)
-    except StopIteration:
-        it = iter(train_loader); batch = next(it)
-    x = batch["input_ids"].to(DEVICE)
-    m = batch["attention_mask"].to(DEVICE)
-    y = batch["label"].to(DEVICE)
-    loss = model(input_ids=x, attention_mask=m, labels=y).loss
-    loss.backward()
-    new_rank = ctrl.step(loss.item())
-    new_rank = max(4, min(16, new_rank))
-    set_rank(model, new_rank)
-    usage[new_rank] += 1
-    rank_trace.append(new_rank)
-    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
-    opt.step()
-    opt.zero_grad()
-return model, usage, rank_trace, ctrl
-── RUN ─────────────────────────────────────────────
-print(f"\nDevice: {DEVICE}")
-print(f"Task: MRPC, {STEPS} steps")
-print("=" * 55)
-results = []
-for seed in SEEDS:
-print(f"\n{'─' * 50}\n  SEED {seed}\n{'─' * 50}")
-torch.manual_seed(seed)
-torch.cuda.manual_seed_all(seed)
-np.random.seed(seed)
-random.seed(seed)
-base_model = build_model()
-base_model = train_baseline(base_model)
-f1_base = eval_model(base_model)
-del base_model; torch.cuda.empty_cache()
-torch.manual_seed(seed)
-torch.cuda.manual_seed_all(seed)
-np.random.seed(seed)
-random.seed(seed)
-uni_model = build_model()
-uni_model, usage, trace, ctrl = train_orbital(uni_model)
-f1_uni = eval_model(uni_model)
-er = eff_rank(usage)
-saving = 1 - er / 16
-transitions = sum(1 for i in range(1, len(trace)) if trace[i] != trace[i-1])
-print(f"\n  BASELINE   F1 = {f1_base:.3f}   (rank=16 fixed)")
-print(f"  ORBITAL    F1 = {f1_uni:.3f}   (eff_rank={er:.1f}, saving={saving*100:.0f}%)")
-print(f"  delta F1 = {f1_uni - f1_base:+.3f}")
-print(f"  Usage: r4={usage[4]}  r8={usage[8]}  r16={usage[16]}  transitions={transitions}")
-results.append({
-    'seed': seed, 'f1_base': f1_base, 'f1_uni': f1_uni,
-    'delta': f1_uni - f1_base, 'eff_rank': er,
-})
-del uni_model; torch.cuda.empty_cache()
-── SUMMARY ─────────────────────────────────────────
-print(f"\n{'=' * 55}\n  SUMMARY\n{'=' * 55}")
-f1b = [r['f1_base'] for r in results]
-f1u = [r['f1_uni']  for r in results]
-print(f"\n  Baseline F1:  {np.mean(f1b):.3f} +/- {np.std(f1b):.3f}")
-print(f"  Orbital  F1:  {np.mean(f1u):.3f} +/- {np.std(f1u):.3f}")
-print(f"  delta F1:     {np.mean([r['delta'] for r in results]):+.3f}")

experiments/stress_test_task_switch.py DELETED Viewed

@@ -1,214 +0,0 @@
-    """
-Orbital LoRA — Stress Test: Task Switch
-MRPC (60 steps) → SST-2 (60 steps)
-Baseline (r=16 fixed) vs Orbital Controller
-"""
-import time, random, math, numpy as np, torch, torch.nn as nn
-import torch.nn.functional as F, evaluate
-from datasets import load_dataset
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-from torch.utils.data import DataLoader
-import sys, os
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(file))))
-from nested_lora import NestedLoRALinear, inject_nested_lora
-from orbital_controller import OrbitalController
-from controller import set_rank
-── CONFIG ──────────────────────────────────────────
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-MODEL  = "distilbert-base-uncased"
-BATCH  = 8
-LR     = 5e-5
-SEEDS  = [0, 1, 2]
-MAX_RANK      = 16
-WARMUP        = 10
-STABLE_WINDOW = 6
-STEPS_TASK1   = 60
-STEPS_TASK2   = 60
-TOTAL_STEPS   = STEPS_TASK1 + STEPS_TASK2
-── DATA ────────────────────────────────────────────
-print("Loading data...")
-tok = AutoTokenizer.from_pretrained(MODEL)
-ds_mrpc = load_dataset("glue", "mrpc")
-def tok_mrpc(x):
-return tok(x["sentence1"], x["sentence2"],
-truncation=True, padding="max_length", max_length=128)
-ds_mrpc = ds_mrpc.map(tok_mrpc, batched=True)
-ds_mrpc.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
-train_mrpc = DataLoader(ds_mrpc["train"], batch_size=BATCH, shuffle=True)
-val_mrpc   = DataLoader(ds_mrpc["validation"], batch_size=BATCH)
-ds_sst2 = load_dataset("glue", "sst2")
-def tok_sst2(x):
-return tok(x["sentence"], truncation=True, padding="max_length", max_length=128)
-ds_sst2 = ds_sst2.map(tok_sst2, batched=True)
-ds_sst2.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
-train_sst2 = DataLoader(ds_sst2["train"], batch_size=BATCH, shuffle=True)
-val_sst2   = DataLoader(ds_sst2["validation"], batch_size=BATCH)
-metric_mrpc = evaluate.load("glue", "mrpc")
-metric_sst2 = evaluate.load("glue", "sst2")
-── HELPERS ─────────────────────────────────────────
-def make_iter(loader):
-while True:
-for batch in loader:
-yield batch
-def get_batch(it):
-batch = next(it)
-return (batch["input_ids"].to(DEVICE),
-batch["attention_mask"].to(DEVICE),
-batch["label"].to(DEVICE))
-def build_model():
-base = AutoModelForSequenceClassification.from_pretrained(
-MODEL, num_labels=2, ignore_mismatched_sizes=True
-)
-return inject_nested_lora(base, MAX_RANK).to(DEVICE)
-def eval_f1(model, loader, metric_fn):
-model.eval()
-preds, labels = [], []
-with torch.no_grad():
-for batch in loader:
-x = batch["input_ids"].to(DEVICE)
-m = batch["attention_mask"].to(DEVICE)
-y = batch["label"].to(DEVICE)
-logits = model(input_ids=x, attention_mask=m).logits
-preds.extend(logits.argmax(dim=-1).cpu().numpy())
-labels.extend(y.cpu().numpy())
-model.train()
-result = metric_fn.compute(predictions=preds, references=labels)
-return result.get("f1", result.get("accuracy", 0.0))
-def eff_rank(usage):
-tot = sum(usage.values())
-return sum(k * v for k, v in usage.items()) / tot if tot > 0 else 0
-── TRAIN BASELINE ──────────────────────────────────
-def train_baseline(model):
-opt = torch.optim.AdamW(model.parameters(), lr=LR)
-set_rank(model, 16)
-it_mrpc = make_iter(train_mrpc)
-it_sst2 = make_iter(train_sst2)
-for step in range(TOTAL_STEPS):
-    x, m, y = get_batch(it_mrpc if step < STEPS_TASK1 else it_sst2)
-    loss = model(input_ids=x, attention_mask=m, labels=y).loss
-    loss.backward()
-    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
-    opt.step()
-    opt.zero_grad()
-return model
-── TRAIN ORBITAL ───────────────────────────────────
-def train_orbital(model):
-ctrl = OrbitalController(warmup=WARMUP, stable_window=STABLE_WINDOW)
-ctrl.rank = 4
-set_rank(model, 4)
-opt = torch.optim.AdamW(model.parameters(), lr=LR)
-usage = {4: 0, 8: 0, 16: 0}
-rank_trace = []
-it_mrpc = make_iter(train_mrpc)
-it_sst2 = make_iter(train_sst2)
-for step in range(TOTAL_STEPS):
-    x, m, y = get_batch(it_mrpc if step < STEPS_TASK1 else it_sst2)
-    loss = model(input_ids=x, attention_mask=m, labels=y).loss
-    loss.backward()
-    new_rank = ctrl.step(loss.item())
-    new_rank = max(4, min(16, new_rank))
-    set_rank(model, new_rank)
-    usage[new_rank] += 1
-    rank_trace.append(new_rank)
-    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
-    opt.step()
-    opt.zero_grad()
-return model, usage, rank_trace
-── RUN ─────────────────────────────────────────────
-print(f"\nDevice: {DEVICE}")
-print(f"Plan: MRPC × {STEPS_TASK1} → SST-2 × {STEPS_TASK2}")
-print(f"Shock at step {STEPS_TASK1}")
-print("=" * 55)
-results = []
-for seed in SEEDS:
-print(f"\n{'─' * 55}\n  SEED {seed}\n{'─' * 55}")
-torch.manual_seed(seed)
-torch.cuda.manual_seed_all(seed)
-np.random.seed(seed)
-random.seed(seed)
-base_model = build_model()
-base_model = train_baseline(base_model)
-f1_mrpc_base = eval_f1(base_model, val_mrpc, metric_mrpc)
-f1_sst2_base = eval_f1(base_model, val_sst2, metric_sst2)
-del base_model; torch.cuda.empty_cache()
-torch.manual_seed(seed)
-torch.cuda.manual_seed_all(seed)
-np.random.seed(seed)
-random.seed(seed)
-uni_model = build_model()
-uni_model, usage, rank_trace = train_orbital(uni_model)
-f1_mrpc_uni = eval_f1(uni_model, val_mrpc, metric_mrpc)
-f1_sst2_uni = eval_f1(uni_model, val_sst2, metric_sst2)
-er = eff_rank(usage)
-saving = 1 - er / 16
-transitions = sum(1 for i in range(1, len(rank_trace)) if rank_trace[i] != rank_trace[i-1])
-print(f"\n  {'':30s} {'BASELINE':>10s}  {'ORBITAL':>10s}")
-print(f"  {'─' * 55}")
-print(f"  {'MRPC F1 (retention)':30s} {f1_mrpc_base:10.3f}  {f1_mrpc_uni:10.3f}")
-print(f"  {'SST-2 Acc (new task)':30s} {f1_sst2_base:10.3f}  {f1_sst2_uni:10.3f}")
-print(f"\n  Orbital: eff_rank={er:.1f}  saving={saving*100:.0f}%  transitions={transitions}")
-results.append({
-    'f1_mrpc_base': f1_mrpc_base, 'f1_sst2_base': f1_sst2_base,
-    'f1_mrpc_uni': f1_mrpc_uni, 'f1_sst2_uni': f1_sst2_uni,
-    'eff_rank': er, 'saving': saving
-})
-del uni_model; torch.cuda.empty_cache()
-── SUMMARY ─────────────────────────────────────────
-print(f"\n{'=' * 55}\n  SUMMARY\n{'=' * 55}")
-mrpc_b = np.mean([r['f1_mrpc_base'] for r in results])
-mrpc_u = np.mean([r['f1_mrpc_uni']  for r in results])
-sst2_b = np.mean([r['f1_sst2_base'] for r in results])
-sst2_u = np.mean([r['f1_sst2_uni']  for r in results])
-er_avg = np.mean([r['eff_rank']     for r in results])
-sv_avg = np.mean([r['saving']       for r in results])
-print(f"\n  {'MRPC F1':20s} {mrpc_b:.3f} → {mrpc_u:.3f}")
-print(f"  {'SST-2 Acc':20s} {sst2_b:.3f} → {sst2_u:.3f}")
-print(f"  {'Eff rank':20s} 16.0 → {er_avg:.1f}")
-print(f"  {'Saving':20s} 0% → {sv_avg*100:.0f}%")

nested_lora.py DELETED Viewed

@@ -1,130 +0,0 @@
-"""
-Nested LoRA — One Particle, Multiple Orbitals
-===============================================
-Single LoRA adapter pair with dynamic rank via slicing.
-r4 ⊂ r8 ⊂ r16 — descending pauses dimensions, ascending resumes them.
-Zero cold start on transitions.
-This module is the "engine" — pure architecture, no control logic.
-Pair with OrbitalController for adaptive rank decisions.
-Author: Simona Vargiu
-License: Apache 2.0
-"""
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from typing import List
-class NestedLoRALinear(nn.Module):
-    """
-    Single LoRA adapter with dynamic rank via slicing.
-    A single pair of matrices A(max_rank, in) and B(out, max_rank) is shared
-    across all rank levels. The active rank is controlled by slicing:
-        r=4  → A[:4, :],  B[:, :4]
-        r=8  → A[:8, :],  B[:, :8]
-        r=16 → A[:16,:],  B[:, :16]
-    When descending from r=16 to r=4, dimensions 0-3 retain all learned
-    weights. Dimensions 4-15 are paused (no gradient), not destroyed.
-    When ascending back, they resume exactly where they left off.
-    Output is scaled by max_rank/active_rank to maintain consistent
-    magnitude across rank changes (analogous to alpha/r in standard LoRA).
-    Args:
-        linear: Original nn.Linear layer to wrap
-        max_rank: Maximum LoRA rank (default: 16)
-    Example:
-        >>> layer = NestedLoRALinear(original_linear, max_rank=16)
-        >>> layer.set_rank(4)    # use 4 dimensions
-        >>> out = layer(x)       # forward with r=4
-        >>> layer.set_rank(16)   # expand to full rank
-        >>> out = layer(x)       # forward with r=16, dimensions 0-3 unchanged
-    """
-    def __init__(self, linear: nn.Linear, max_rank: int = 16):
-        super().__init__()
-        self.linear = linear
-        self.max_rank = max_rank
-        self.active_rank = max_rank
-        # Freeze original weights
-        for p in self.linear.parameters():
-            p.requires_grad = False
-        # One particle: single A and B
-        self.lora_A = nn.Parameter(torch.empty(max_rank, linear.in_features))
-        self.lora_B = nn.Parameter(torch.zeros(linear.out_features, max_rank))
-        # Standard LoRA init: A = kaiming, B = zeros → initial delta = 0
-        nn.init.kaiming_uniform_(self.lora_A, a=math.sqrt(5))
-    def set_rank(self, r: int):
-        """Set the active orbital. Must be <= max_rank."""
-        self.active_rank = min(r, self.max_rank)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        base = self.linear(x)
-        r = self.active_rank
-        h = F.linear(x, self.lora_A[:r, :])
-        delta = F.linear(h, self.lora_B[:, :r])
-        scale = self.max_rank / r
-        return base + delta * scale
-def inject_nested_lora(model: nn.Module, max_rank: int = 16) -> nn.Module:
-    """
-    Replace attention Linear layers with NestedLoRALinear.
-    Targets any nn.Linear whose full name contains "attention".
-    Original weights are frozen; only LoRA parameters are trainable.
-    Args:
-        model: PyTorch model
-        max_rank: Maximum LoRA rank
-    Returns:
-        Model with NestedLoRA injected
-    """
-    for name, module in list(model.named_modules()):
-        if isinstance(module, nn.Linear) and "attention" in name:
-            parent = model
-            *path, last = name.split(".")
-            for p in path:
-                parent = getattr(parent, p)
-            setattr(parent, last, NestedLoRALinear(module, max_rank))
-    return model
-def set_rank(model: nn.Module, r: int):
-    """Set active rank on all NestedLoRALinear modules in the model."""
-    for m in model.modules():
-        if isinstance(m, NestedLoRALinear):
-            m.set_rank(r)
-def get_lora_params(model: nn.Module) -> List[nn.Parameter]:
-    """Get all LoRA parameters (for optimizer setup)."""
-    params = []
-    for m in model.modules():
-        if isinstance(m, NestedLoRALinear):
-            params.extend([m.lora_A, m.lora_B])
-    return params
-def count_params(model: nn.Module) -> dict:
-    """Count total, trainable, and LoRA parameters."""
-    total = sum(p.numel() for p in model.parameters())
-    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
-    lora = sum(p.numel() for p in get_lora_params(model))
-    return {"total": total, "trainable": trainable, "lora": lora}

notebooks/mrpc_example.ipynb DELETED Viewed

@@ -1,165 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Orbital LoRA - MRPC Benchmark Example\n",
-    "\n",
-    "**Expected:** performance parity with baseline + adaptive behavior\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [
-    "!pip install -q transformers datasets evaluate scikit-learn accelerate"
-   ],
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [
-    "import torch\n",
-    "from datasets import load_dataset\n",
-    "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
-    "from torch.utils.data import DataLoader\n",
-    "import evaluate\n",
-    "\n",
-    "import sys\n",
-    "sys.path.append('..')\n",
-    "\n",
-    "from nested_lora import inject_nested_lora\n",
-    "from orbital_controller import OrbitalController\n",
-    "from controller import set_rank\n",
-    "\n",
-    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
-    "print(device)"
-   ],
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [
-    "dataset = load_dataset('glue','mrpc')\n",
-    "tokenizer = AutoTokenizer.from_pretrained('distilbert-base-uncased')\n",
-    "\n",
-    "def tok(x):\n",
-    "    return tokenizer(x['sentence1'], x['sentence2'], truncation=True, padding='max_length', max_length=128)\n",
-    "\n",
-    "train = dataset['train'].map(tok, batched=True)\n",
-    "val   = dataset['validation'].map(tok, batched=True)\n",
-    "\n",
-    "train.set_format(type='torch', columns=['input_ids','attention_mask','label'])\n",
-    "val.set_format(type='torch', columns=['input_ids','attention_mask','label'])\n",
-    "\n",
-    "train_loader = DataLoader(train, batch_size=16, shuffle=True)\n",
-    "val_loader   = DataLoader(val, batch_size=16)\n",
-    "\n",
-    "metric = evaluate.load('glue','mrpc')"
-   ],
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [
-    "def eval_model(model):\n",
-    "    model.eval()\n",
-    "    preds, labels = [], []\n",
-    "    with torch.no_grad():\n",
-    "        for b in val_loader:\n",
-    "            x=b['input_ids'].to(device)\n",
-    "            m=b['attention_mask'].to(device)\n",
-    "            y=b['label'].to(device)\n",
-    "            p=model(input_ids=x,attention_mask=m).logits.argmax(-1)\n",
-    "            preds.extend(p.cpu().numpy()); labels.extend(y.cpu().numpy())\n",
-    "    return metric.compute(predictions=preds,references=labels)['f1']"
-   ],
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [
-    "# BASELINE\n",
-    "model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)\n",
-    "model = inject_nested_lora(model,16).to(device)\n",
-    "set_rank(model,16)\n",
-    "\n",
-    "opt = torch.optim.AdamW(model.parameters(), lr=5e-5)\n",
-    "\n",
-    "for step,b in enumerate(train_loader):\n",
-    "    if step>200: break\n",
-    "    x=b['input_ids'].to(device); m=b['attention_mask'].to(device); y=b['label'].to(device)\n",
-    "    loss=model(input_ids=x,attention_mask=m,labels=y).loss\n",
-    "    loss.backward(); opt.step(); opt.zero_grad()\n",
-    "\n",
-    "f1_base = eval_model(model)\n",
-    "print('Baseline F1:', round(f1_base,3))"
-   ],
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [
-    "# ORBITAL\n",
-    "model = AutoModelForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)\n",
-    "model = inject_nested_lora(model,16).to(device)\n",
-    "\n",
-    "ctrl = OrbitalController(warmup=10, stable_window=6)\n",
-    "set_rank(model,4)\n",
-    "\n",
-    "opt = torch.optim.AdamW(model.parameters(), lr=5e-5)\n",
-    "\n",
-    "for step,b in enumerate(train_loader):\n",
-    "    if step>200: break\n",
-    "    x=b['input_ids'].to(device); m=b['attention_mask'].to(device); y=b['label'].to(device)\n",
-    "    loss=model(input_ids=x,attention_mask=m,labels=y).loss\n",
-    "    loss.backward()\n",
-    "\n",
-    "    r = ctrl.step(loss.item())\n",
-    "    r = max(4,min(16,r))\n",
-    "    set_rank(model,r)\n",
-    "\n",
-    "    opt.step(); opt.zero_grad()\n",
-    "\n",
-    "f1_orb = eval_model(model)\n",
-    "print('Orbital F1:', round(f1_orb,3))"
-   ],
-   "outputs": [],
-   "execution_count": null
-  },
-  {
-   "cell_type": "code",
-   "metadata": {},
-   "source": [
-    "print('\\nBaseline:', round(f1_base,3))\n",
-    "print('Orbital:', round(f1_orb,3))\n",
-    "print('Delta:', round(f1_orb-f1_base,3))"
-   ],
-   "outputs": [],
-   "execution_count": null
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

orbital_controller.py DELETED Viewed

@@ -1,291 +0,0 @@
-"""
-Orbital Controller — Trajectory Control with Memory
-=====================================================
-Closed-loop rank controller that adapts model capacity based on
-observed training stress. Works with any rank-adjustable system
-(NestedLoRA, adaptive LR, or API-based training).
-This module is the "intelligence" — pure control logic, no model code.
-Pair with NestedLoRA for the complete Unified-LoRA system.
-Author: Simona Vargiu
-License: Apache 2.0
-"""
-import numpy as np
-from typing import Dict, List, Optional
-class OrbitalController:
-    """
-    Closed-loop trajectory controller for dynamic capacity adaptation.
-    Unlike threshold-based controllers that map stress to rank statically,
-    this implements orbital dynamics with memory:
-        Ascend:  stress detected  → jump to higher orbital, push delta
-        Hold:    oscillating      → stay, don't move
-        Descend: confirmed stable → pop delta, symmetric return
-    Each capacity increase is tracked on a stack and reversed only under
-    confirmed stability. This prevents premature compression (returning
-    too early) and oscillatory collapse (bouncing between ranks).
-    The stress signal and thresholds are adaptive — they auto-calibrate
-    to any model/task/loss scale without manual tuning.
-    Args:
-        ranks: Available capacity levels (default: [4, 8, 16])
-        warmup: Steps at max capacity to build EMA baseline
-        stable_window: Consecutive stable steps required for descent
-    Example:
-        >>> from nested_lora import inject_nested_lora, set_rank
-        >>> from orbital_controller import OrbitalController
-        >>>
-        >>> model = inject_nested_lora(model, max_rank=16)
-        >>> ctrl = OrbitalController()
-        >>>
-        >>> for step, batch in enumerate(loader):
-        ...     loss = model(**batch).loss
-        ...     new_rank = ctrl.step(loss.item())
-        ...     set_rank(model, new_rank)
-        ...     loss.backward()
-        ...     optimizer.step()
-    """
-    def __init__(
-        self,
-        ranks: Optional[List[int]] = None,
-        warmup: int = 10,
-        stable_window: int = 6,
-    ):
-        self.RANKS = ranks or [4, 8, 16]
-        self.warmup = warmup
-        self.stable_window = stable_window
-        self.reset()
-    def reset(self):
-        """Reset controller to initial state."""
-        self.rank = self.RANKS[-1]
-        self.orbit_stack = []
-        self.loss_ema = 0.0
-        self.prev_loss = None
-        self.phi_hist = []
-        self.stable_count = 0
-        self.step_count = 0
-        self.post_warmup = False
-        self.history = {
-            "rank": [],
-            "phi": [],
-            "stable_count": [],
-        }
-    # ── Stress signal ───────────────────────────────
-    def _compute_phi(self, loss: float) -> float:
-        """
-        Stress signal from loss trajectory.
-        φ = |loss - EMA| + 2.0 × max(0, loss - prev_loss)
-        Combines deviation from trend (general instability)
-        with spike detection (sudden deterioration).
-        """
-        self.loss_ema = 0.9 * self.loss_ema + 0.1 * loss
-        delta = abs(loss - self.loss_ema)
-        spike = max(0.0, loss - self.prev_loss) if self.prev_loss is not None else 0.0
-        self.prev_loss = loss
-        return delta + 2.0 * spike
-    def _thresholds(self):
-        """
-        Adaptive thresholds from running statistics.
-        t_stress = μ + 0.7σ  (above this → ascend)
-        t_stable = μ - 0.3σ  (below this → stability confirmed)
-        Auto-calibrates to loss scale. No manual tuning.
-        """
-        if len(self.phi_hist) < 10:
-            return 0.15, 0.04
-        recent = self.phi_hist[-40:]
-        mu = np.mean(recent)
-        sigma = np.std(recent) + 1e-8
-        t_stress = mu + 0.7 * sigma
-        t_stable = max(mu - 0.3 * sigma, 0.0)
-        return t_stress, t_stable
-    # ── Core logic ──────────────────────────────────
-    def _rank_index(self) -> int:
-        return self.RANKS.index(self.rank)
-    def step(self, loss: float) -> int:
-        """
-        Called once per training step. Returns the capacity level to use.
-        Args:
-            loss: Current step loss value
-        Returns:
-            int: Active rank (or capacity level) for next step
-        """
-        self.step_count += 1
-        # First step: initialize EMA
-        if self.prev_loss is None:
-            self.loss_ema = loss
-            self.prev_loss = loss
-            self._log(0.0)
-            return self.rank
-        phi = self._compute_phi(loss)
-        self.phi_hist.append(phi)
-        # Warmup: build baseline at max capacity
-        if self.step_count <= self.warmup:
-            self._log(phi)
-            return self.rank
-        # Transition: warmup → ground state
-        if not self.post_warmup:
-            self.post_warmup = True
-            self.rank = self.RANKS[0]
-            self.orbit_stack = []
-            self.stable_count = 0
-            self._log(phi)
-            return self.rank
-        t_stress, t_stable = self._thresholds()
-        # Stability counter
-        if phi <= t_stable:
-            self.stable_count += 1
-        elif phi > t_stress:
-            self.stable_count = 0
-        else:
-            self.stable_count = max(0, self.stable_count - 1)
-        # ASCEND: stress → jump to higher orbital
-        if phi > t_stress and self.rank < self.RANKS[-1]:
-            idx = self._rank_index()
-            new_idx = min(idx + 1, len(self.RANKS) - 1)
-            new_rank = self.RANKS[new_idx]
-            if new_rank != self.rank:
-                self.orbit_stack.append(new_rank - self.rank)
-                self.rank = new_rank
-                self.stable_count = 0
-            self._log(phi)
-            return self.rank
-        # DESCEND: confirmed stability → symmetric return
-        if self.stable_count >= self.stable_window and self.orbit_stack:
-            delta = self.orbit_stack.pop()
-            target = self.rank - delta
-            self.rank = min(self.RANKS, key=lambda r: abs(r - target))
-            self.rank = max(self.rank, self.RANKS[0])
-            self.stable_count = 0
-            self._log(phi)
-            return self.rank
-        # HOLD: neutral → don't move
-        self._log(phi)
-        return self.rank
-    # ── Introspection ───────────────────────────────
-    def _log(self, phi: float):
-        self.history["rank"].append(self.rank)
-        self.history["phi"].append(phi)
-        self.history["stable_count"].append(self.stable_count)
-    def get_state(self) -> Dict:
-        """Current controller state."""
-        return {
-            "rank": self.rank,
-            "step": self.step_count,
-            "orbit_stack": list(self.orbit_stack),
-            "stable_count": self.stable_count,
-            "phi": self.phi_hist[-1] if self.phi_hist else 0.0,
-        }
-    def get_history(self) -> Dict[str, list]:
-        """Complete training history."""
-        return self.history
-    def __repr__(self) -> str:
-        return (
-            f"OrbitalController(step={self.step_count}, rank={self.rank}, "
-            f"stack={self.orbit_stack}, stable={self.stable_count})"
-        )
-# ============================================================
-# CONVENIENCE: setup helper
-# ============================================================
-def setup_unified_lora(model, max_rank=16, ranks=None, warmup=10, stable_window=6):
-    """
-    One-call setup: inject NestedLoRA + create OrbitalController.
-    Args:
-        model: PyTorch model
-        max_rank: Maximum LoRA rank
-        ranks: Available rank levels
-        warmup: Controller warmup steps
-        stable_window: Steps of stability before descent
-    Returns:
-        (model, controller) tuple
-    Example:
-        >>> from orbital_controller import setup_unified_lora
-        >>> from nested_lora import set_rank
-        >>>
-        >>> model, ctrl = setup_unified_lora(model)
-        >>> for step, batch in enumerate(loader):
-        ...     loss = model(**batch).loss
-        ...     set_rank(model, ctrl.step(loss.item()))
-        ...     loss.backward(); optimizer.step(); optimizer.zero_grad()
-    """
-    from nested_lora import inject_nested_lora
-    model = inject_nested_lora(model, max_rank)
-    controller = OrbitalController(
-        ranks=ranks or [4, 8, 16],
-        warmup=warmup,
-        stable_window=stable_window,
-    )
-    return model, controller
-# ============================================================
-# DEMO
-# ============================================================
-if __name__ == "__main__":
-    print("Orbital Controller — Demo")
-    print("=" * 50)
-    print("Simulating: 30 stable → 10 shock → 30 recovery\n")
-    ctrl = OrbitalController(warmup=8, stable_window=5)
-    for step in range(70):
-        if step < 30:
-            loss = np.random.uniform(0.4, 0.6)
-        elif step < 40:
-            loss = np.random.uniform(1.5, 3.0)
-        else:
-            loss = np.random.uniform(0.3, 0.5)
-        rank = ctrl.step(loss)
-        if step % 5 == 0 or step == 30:
-            s = ctrl.get_state()
-            tag = " <<<SHOCK" if step == 30 else ""
-            print(f"  [{step:3d}] rank={rank:2d}  phi={s['phi']:.3f}  stack={s['orbit_stack']}{tag}")
-    print(f"\nFinal: {ctrl}")

requirements.txt DELETED Viewed

@@ -1,6 +0,0 @@
-torch
-transformers
-datasets
-evaluate
-accelerate
-scikit-learn

unified_lora.py DELETED Viewed

@@ -1,14 +0,0 @@
-"""
-Legacy Adaptive LoRA (Deprecated)
-================================
-Early gradient-based adaptive rank prototype.
-Replaced by:
-- NestedLoRA (shared orbital architecture)
-- OrbitalController (stress-based closed-loop control)
-This file is kept for reference only.
-Status: deprecated
-"""