geoore Claude Sonnet 4.6 commited on 20 days ago

Commit

73400c8

1 Parent(s): 844aa12

Restructure to src/ layout with attention, per-layer MoE, and working chat

- Add GQA + RoPE attention (AttentionLayer) using config's n_heads/n_kv_heads/head_dim
- Wire 28 transformer layers (attention + shared MoE FFN per layer) in forward()
- Fix expert dispatch weight bug (per-token weights, not flattened scalar)
- Fix chat() stub: tokenize with GPT-2, generate, decode new tokens only
- Fix GRPO reward: remove proxy metrics, correctness + CoT bonus only
- Add load balance loss to GRPO train_step to prevent expert collapse
- Fix JSONLibrary recall() write-back and add 1000-entry cap per category
- Add tests/test_core.py with 11 tests covering all core components
- Add .gitignore excluding venv/, outputs/, data/, .vscode/, .claude/

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.claude/settings.local.json +0 -15
.gitattributes +0 -35
.gitignore +11 -51
DESCRIPTION.MD +0 -0
README.md +25 -88
config.py +0 -213
configs/memory.yaml +16 -0
configs/model.yaml +63 -0
configs/model_15b.yaml +77 -0
configs/sandbox.yaml +24 -0
configs/training.yaml +28 -0
inference/__init__.py +0 -1
inference/api.py +0 -148
inference/chat.py +0 -59
inference/chat_simple.py +0 -54
inference/daemon.py +0 -173
inference/engine.py +0 -406
memory/__init__.py +0 -1
memory/database.py +0 -379
memory/vector_store.py +0 -109
model/__init__.py +0 -7
model/base.py +0 -152
model/echo.py +0 -71
model/ensemble.py +0 -346
model/expert.py +0 -45
model/herald.py +0 -62
model/lazy_expert_loader.py +0 -120
model/sentinel.py +0 -90
requirements.txt +14 -4
scripts/01_download_15b_data.py +112 -0
scripts/01_download_7b_150gb.py +272 -0
scripts/01_download_stem_data.py +144 -0
scripts/04_train.py +310 -0
scripts/04_train_5090_optimized.py +146 -0
scripts/04_train_stem.py +134 -0
scripts/04_train_universal.py +426 -0
scripts/05_grpo_train.py +325 -0
scripts/07_run_shorekeeper.py +104 -0
scripts/09_run_tests.py +70 -0
scripts/full_training_loop.py +0 -40
scripts/push_to_github.py +0 -30
scripts/quick_test.py +0 -48
scripts/run_training.py +0 -54
scripts/run_training.sh +0 -112
src/__init__.py +1 -0
src/council/__init__.py +4 -0
src/council/attention.py +62 -0
src/council/base_expert.py +27 -0
src/council/experts.py +73 -0
src/council/sentinel.py +48 -0

.claude/settings.local.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(du -sh /Users/georjanorellana/Downloads/shorekeeper/data/raw/*)",
-      "Bash(.venv/bin/pip install:*)",
-      "Bash(ls /home/albedogames/shorekeeper/.venv/bin/pip*)",
-      "Bash(ls /home/albedogames/shorekeeper/venv/bin/pip*)",
-      "Bash(/home/albedogames/shorekeeper/.venv/bin/pip install:*)",
-      "Bash(python3 -c \"import sys; print\\(sys.executable\\)\")",
-      "Bash(/home/albedogames/shorekeeper/.venv/bin/python3 -m pip install psutil -q)",
-      "Bash(/home/albedogames/shorekeeper/.venv/bin/python3 -c \"import psutil; print\\(''psutil OK''\\)\")",
-      "Bash(.venv/bin/python -c \":*)"
-    ]
-  }
-}

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -1,56 +1,16 @@
-# Python
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-.Python
-env/
 venv/
 .venv/
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
 *.egg-info/
-.installed.cfg
-*.egg
-# Large data files (too big for GitHub)
-data/raw/
-data/processed/
-# Model checkpoints (too big for GitHub)
-checkpoints/
-# Runtime databases
-memory_db/*.db
-memory_store/
-# Logs
-logs/
-# macOS resource forks
-._*
 .DS_Store
-# IDE / Editor
 .vscode/
-.idea/
-*.swp
-*.swo
-*~
-# Misc
-*.log
-.env
-# Tokenizer training output (keep in repo if small)
-# tokenizer/shorekeeper_tok/

 venv/
 .venv/
+outputs/
+data/
+__pycache__/
+*.py[cod]
+*.pth
+*.pt
+*.bin
+.env
 *.egg-info/
+dist/
+build/
 .DS_Store
 .vscode/
+.claude/

DESCRIPTION.MD DELETED Viewed

The diff for this file is too large to render. See raw diff

README.md CHANGED Viewed

@@ -1,91 +1,28 @@
----
-title: Shorekeeper
-emoji: 🌊
-colorFrom: blue
-colorTo: indigo
-sdk: static
-pinned: false
-license: mit
-tags:
-  - pytorch
-  - mixture-of-experts
-  - language-model
-  - custom
----
-# Shorekeeper — MoE Ensemble Brain
-Mixture of Experts transformer for BlackShores OS.
-7 specialists + Herald router + Echo memory + Sentinel monitor.
 ## Quick Start
-```bash
-### 500M Architecture (Active)
-# Use your normal python executable (python3 on Linux/macOS, python on Windows)
-python3 data/ingest_full_data.py
-python3 tokenizer/train_tokenizer.py
-python3 scripts/full_training_loop.py
-python3 inference/chat.py
-# Cross-platform daemon (Linux/macOS UNIX socket by default)
-python3 inference/daemon.py --mode auto
-# If Windows, run:
-python3 inference/daemon.py --mode tcp --host 127.0.0.1 --port 8500
-# GitHub Migration
-python3 scripts/push_to_github.py
-```
-## Manual Steps
-```bash
-pip install torch tokenizers
-python data/generate_sample_data.py   # create sample data
-python tokenizer/train_tokenizer.py   # train BPE tokenizer
-python data/processor.py              # tokenize -> tensors
-python memory/database.py             # init SQLite
-# Cross-platform quick runner:
-python scripts/run_training.py
-python training/train_base.py         # phase 1: shared base
-python training/train_expert.py --all # phase 2: all experts
-python training/train_ensemble.py     # phase 6: end-to-end
-python inference/chat.py              # launch chat
-```
-## Scale Up (Real Training)
-- `VOCAB_SIZE`: 32000
-- `BASE_CONFIG`: 1024 dim / 12 layers / 16 heads
-- `n_positions`: 2048
-- `TRAIN_CONFIG max_steps`: 50000
-- **Device**: MPS (Metal) for Mac Silicon
-## Chat Commands
-```
-/route <query>    show routing without generating
-/expert <name>    force specific expert
-/routing on|off   toggle routing display
-/incidents        Sentinel incident log
-/reset            clear session memory
-/experts          list expert names
-/exit             quit
-```
-## Expert Roster
-| Name | Domain | Named After |
-|------|--------|-------------|
-| calcharo | Security / CVE / Network threats | The Calamity |
-| rover | Code / Debug / Architecture | The Explorer |
-| resonance | Logic / Reasoning / Causation | The Force |
-| tacet | Threat Intel / IOC / APT | The Discord |
-| jianxin | Linux / OS / CUDA / systemd | Calm Mastery |
-| verina | Conversation / NLP / Interface | Healer |
-| sentinel | Self-Monitor / Drift Detection | The Watchman |
-| herald | Router (always active) | The Messenger |
-| echo | Memory (always active) | Resonant Imprint |

+# SHOREKEEPER-4B
+**A 4B parameter reasoning model with 12 specialized experts and infinite memory.**
+## The Council of Experts
+| Expert | Role | Specialty |
+|--------|------|-----------|
+| **Sentinel** | Router | Decides which experts activate |
+| Asmoday | Code Architect | Python, algorithms, debugging |
+| Istaroth | Systems | OS, networking, Docker |
+| Ronova | Reasoning | Math, logic, step-by-step |
+| Naberius | Memory | JSON library retrieval |
+| Phanes | Creation | Writing, generation, creativity |
+| Barbeloth | Analysis | Data, patterns, insights |
+| Tacet | Silence | Noise filtering, summarization |
+| Abby | Empathy | User context, preferences |
+| Reindoter | Validation | Testing, verification |
+| Zestial | Vision | Code visualization, graphs |
+| Alice | Exploration | Novel solutions, experimentation |
+| Rover | Execution | Terminal commands, sandbox |
 ## Quick Start
+`'`bash
+pip install -r requirements.txt
+python scripts/07_run_shorekeeper.py
+`'`

config.py DELETED Viewed

@@ -1,213 +0,0 @@
-# config.py
-# Central configuration for all Shorekeeper components.
-# Every constant, path, and hyperparameter lives here.
-# Import with: from config import TRAIN_CONFIG, CHECKPOINT_DIR, etc.
-import os
-import torch
-from pathlib import Path
-# ── PROJECT ROOT ──────────────────────────────────────────────────────
-PROJECT_ROOT = Path(__file__).parent
-# ── DATA PATHS ────────────────────────────────────────────────────────
-# If using external drive with symlinks (recommended):
-#   data/raw        → /mnt/shorekeeper_data/raw
-#   data/processed  → /mnt/shorekeeper_data/processed
-#   checkpoints/    → /mnt/shorekeeper_data/checkpoints
-# If not using symlinks, these paths just live on the main drive.
-RAW_DATA_DIR   = PROJECT_ROOT / "data" / "raw"
-PROCESSED_DIR  = PROJECT_ROOT / "data" / "processed"
-CHECKPOINT_DIR = PROJECT_ROOT / "checkpoints"
-LOG_DIR        = PROJECT_ROOT / "logs"
-MEMORY_DIR     = PROJECT_ROOT / "memory_store"
-# Auto-create all directories on import
-for _d in [RAW_DATA_DIR, PROCESSED_DIR, CHECKPOINT_DIR, LOG_DIR, MEMORY_DIR]:
-    _d.mkdir(parents=True, exist_ok=True)
-# ── VOCABULARY ────────────────────────────────────────────────────────
-VOCAB_SIZE = 50_257     # GPT-2 compatible vocabulary size
-# Special tokens and their IDs (assigned during tokenizer training)
-# These MUST match what train_tokenizer.py produces.
-# DO NOT change these after the tokenizer is trained.
-SPECIAL_TOKENS = {
-    "[PAD]":       0,   # Padding token (ignored in loss computation)
-    "[UNK]":       1,   # Unknown token (should be rare with BPE)
-    "[BOS]":       2,   # Beginning of sequence
-    "[EOS]":       3,   # End of sequence
-    "[SEP]":       4,   # Separator (used between context and query by Echo)
-    "[MASK]":      5,   # Masked token (reserved for future MLM training)
-    "[SYSTEM]":    6,   # System prompt marker
-    "[USER]":      7,   # User turn marker
-    "[ASSISTANT]": 8,   # Assistant turn marker
-    "[MEMORY]":    9,   # Memory context injection marker
-    "[SECURITY]":  10,  # calcharo expert domain marker
-    "[CODE]":      11,  # rover expert domain marker
-    "[REASON]":    12,  # resonance expert domain marker
-    "[SYSTEM2]":   13,  # jianxin expert domain marker
-}
-# ── BASE MODEL CONFIG ─────────────────────────────────────────────────
-# SharedBase: the 500M shared transformer backbone.
-# Every expert builds on top of these representations.
-BASE_CONFIG = {
-    "n_embd":      2048,  # Hidden state dimension
-                          # This dimension flows through ALL components
-    "n_head":      16,    # Attention heads (n_embd / n_head = 128 per head)
-    "n_layer":     8,     # Transformer layers in the shared base (8 fits in 12 GB VRAM)
-    "n_positions": 2048,  # Maximum sequence length (context window)
-    "dropout":     0.1,   # Dropout rate (applied during training, disabled at inference)
-    "vocab_size":  VOCAB_SIZE,
-}
-# ── EXPERT CONFIGS ────────────────────────────────────────────────────
-# Each expert has its own number of transformer layers.
-# Experts with more layers have more capacity for their domain.
-# The n_embd MUST match BASE_CONFIG["n_embd"] = 2048.
-EXPERT_NAMES = ["calcharo", "rover", "resonance", "tacet", "jianxin", "verina"]
-EXPERT_CONFIGS = {
-    # Heavy experts (8 layers): high-value domains with most training data
-    "calcharo":  {"n_layer": 8, "n_embd": 2048, "n_head": 16},
-    "rover":     {"n_layer": 8, "n_embd": 2048, "n_head": 16},
-    "resonance": {"n_layer": 8, "n_embd": 2048, "n_head": 16},
-    # Medium experts (6 layers): specialized domains
-    "tacet":     {"n_layer": 6, "n_embd": 2048, "n_head": 16},
-    "jianxin":   {"n_layer": 6, "n_embd": 2048, "n_head": 16},
-    "verina":    {"n_layer": 6, "n_embd": 2048, "n_head": 16},
-    # Monitoring expert (4 layers): sentinel only needs classification capacity
-    "sentinel":  {"n_layer": 4, "n_embd": 2048, "n_head": 16},
-}
-# Herald and Echo configs (these are routing/memory modules, not generative)
-HERALD_CONFIG  = {"n_layer": 2, "n_embd": 2048, "n_head": 16, "n_experts": len(EXPERT_NAMES), "top_k": 2}
-ECHO_CONFIG    = {"n_layer": 1, "n_embd": 2048, "n_head": 16, "max_memory_tokens": 512}
-# ── TRAINING CONFIG ───────────────────────────────────────────────────
-TRAIN_CONFIG = {
-    # Phase 1: Base pre-training
-    "base_lr":          3e-4,   # Peak learning rate for base model
-    "base_max_steps":   100_000,# Total training steps (100k)
-    "base_warmup":      2_000,  # LR warmup steps
-    "base_batch_size":  1,      # Mini-batch size per GPU (reduced for 12GB VRAM)
-    "base_grad_accum":  32,     # Gradient accumulation
-                                # Effective batch = 1 * 32 = 32 sequences
-    # Phase 2: Expert fine-tuning
-    "expert_lr":        1e-4,   # Lower LR for fine-tuning
-    "expert_max_steps": 50_000,
-    "expert_warmup":    1_000,
-    "expert_batch_size":2,      # Expert heads are lighter (no base layers)
-    "expert_grad_accum":16,
-    # Phase 3: Herald routing training
-    "herald_lr":          1e-4,
-    "herald_max_steps":   20_000,
-    "herald_warmup":      500,
-    "herald_batch_size":  32,   # Routing examples are short, bigger batches
-    "herald_grad_accum":  2,
-    # Phase 4: Sentinel training
-    "sentinel_lr":          5e-5,
-    "sentinel_max_steps":   15_000,
-    "sentinel_warmup":      500,
-    "sentinel_batch_size":  16,
-    "sentinel_grad_accum":  2,
-    # Phase 6: Ensemble fine-tuning
-    "ensemble_lr":          5e-5,   # Very low LR — preserve pre-trained knowledge
-    "ensemble_max_steps":   30_000,
-    "ensemble_warmup":      1_000,
-    "ensemble_batch_size":  2,      # Small batch — full ensemble is huge
-    "ensemble_grad_accum":  16,
-    # Shared across all phases
-    "weight_decay":    0.1,
-    "grad_clip":       1.0,     # Gradient norm clipping threshold
-    "beta1":           0.9,     # AdamW beta1
-    "beta2":           0.95,    # AdamW beta2 (0.95 for LLMs, not 0.999)
-    "epsilon":         1e-8,    # AdamW epsilon
-    # Logging and checkpointing
-    "log_interval":    100,     # Log every N steps
-    "eval_interval":   2000,    # Evaluate on validation set every N steps
-    "save_interval":   5000,    # Save checkpoint every N steps
-    "keep_last_n":     3,       # Keep only last N step checkpoints
-}
-# ── MEMORY OPTIMIZATION ───────────────────────────────────────────────
-# Settings to reduce VRAM usage on RTX 3060 (12GB)
-MEMORY_OPT = {
-    "gradient_checkpointing": True,  # Trade compute for memory
-                                     # Recomputes activations during backward
-                                     # ~30% slower, ~40% less VRAM
-    "use_bf16":   True,              # bfloat16 (better than fp16 for stability)
-                                     # Requires Ampere+ GPU (RTX 3060 supports it)
-    "use_compile":False,             # torch.compile (PyTorch 2.0+)
-                                     # Enable for faster training after debugging
-    "cpu_offload":False,             # CPU offload for optimizer states (not needed at 8 layers)
-}
-# ── INFERENCE CONFIG ──────────────────────────────────────────────────
-INFER_CONFIG = {
-    "max_new_tokens": 512,      # Maximum tokens to generate per response
-    "temperature":    0.8,      # Sampling temperature (0=greedy, 1=random)
-    "top_p":          0.9,      # Nucleus sampling: keep tokens summing to 90% prob
-    "top_k":          50,       # Top-K sampling: only consider top 50 tokens
-    "repetition_penalty": 1.1,  # Penalize repeated tokens (1.0 = no penalty)
-}
-# ── SENTINEL CONFIG ───────────────────────────────────────────────────
-SENTINEL_CONFIG = {
-    "flag_threshold":  0.5,   # Risk score above this → FLAG (log, continue)
-    "block_threshold": 0.8,   # Risk score above this → BLOCK (replace output)
-    "window_size":     10,    # Rolling window for drift pattern detection
-    "hidden_dim":      512,   # Sentinel classification head hidden size
-}
-# ── DEVICE / DTYPE ────────────────────────────────────────────────────
-# Auto-detect: CUDA (NVIDIA) > MPS (Apple Silicon) > CPU
-if torch.cuda.is_available():
-    DEVICE = "cuda"
-elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
-    DEVICE = "mps"
-else:
-    DEVICE = "cpu"
-# bfloat16 is preferred on NVIDIA Ampere+ (RTX 3060 supports it).
-# MPS uses float16. CPU falls back to float32 for numerical stability.
-if DEVICE == "cuda" and MEMORY_OPT["use_bf16"]:
-    DTYPE = torch.bfloat16
-elif DEVICE == "cuda":
-    DTYPE = torch.float16
-elif DEVICE == "mps":
-    DTYPE = torch.float16  # MPS supports float16 (bfloat16 support is limited)
-else:
-    DTYPE = torch.float32
-# ── SMOKE TEST MODE ───────────────────────────────────────────────────
-# Set USE_TEST_CONFIG=True to run with tiny dimensions for quick sanity check
-# Run: USE_TEST_CONFIG=1 python training/train_base.py
-USE_TEST_CONFIG = os.environ.get("USE_TEST_CONFIG", "0") == "1"
-if USE_TEST_CONFIG:
-    print("[config] SMOKE TEST MODE — tiny dimensions")
-    BASE_CONFIG.update({"n_embd": 64, "n_head": 4, "n_layer": 2, "n_positions": 128})
-    for k in EXPERT_CONFIGS:
-        EXPERT_CONFIGS[k].update({"n_layer": 1, "n_embd": 64, "n_head": 4})
-    TRAIN_CONFIG.update({
-        "base_max_steps": 100, "base_batch_size": 2, "base_grad_accum": 1,
-        "expert_max_steps": 50, "log_interval": 10, "eval_interval": 50,
-    })

configs/memory.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+memory:
+  type: "json_library"
+  path: "./data/json_library/"
+  embedding_model: "all-MiniLM-L6-v2"
+  embedding_dim: 384
+  max_entries: null
+  auto_summarize_threshold: null
+  default_recall_limit: 10
+  relevance_threshold: 0.7
+  categories:
+    - "user_preferences"
+    - "project_context"
+    - "conversation_history"
+    - "important_facts"
+    - "code_patterns"
+    - "learned_skills"

configs/model.yaml ADDED Viewed

	@@ -0,0 +1,63 @@

+model:
+  name: "SHOREKEEPER-4B"
+  version: "1.0.0"
+  dim: 3072
+  n_layers: 28
+  n_heads: 24
+  n_kv_heads: 6
+  head_dim: 128
+  vocab_size: 50304
+  seq_len: 8192
+  n_experts: 12
+  n_activated: 2
+  expert_dim: 2048
+  experts:
+    router: "Sentinel"
+    members:
+      - name: "Asmoday"
+        role: "code"
+        specialization: "python_development"
+      - name: "Istaroth"
+        role: "systems"
+        specialization: "os_networking"
+      - name: "Ronova"
+        role: "reasoning"
+        specialization: "math_logic"
+      - name: "Naberius"
+        role: "memory"
+        specialization: "retrieval"
+      - name: "Phanes"
+        role: "creation"
+        specialization: "writing"
+      - name: "Barbeloth"
+        role: "analysis"
+        specialization: "data_patterns"
+      - name: "Tacet"
+        role: "silence"
+        specialization: "filtering"
+      - name: "Abby"
+        role: "empathy"
+        specialization: "user_context"
+      - name: "Reindoter"
+        role: "validation"
+        specialization: "testing"
+      - name: "Zestial"
+        role: "vision"
+        specialization: "visualization"
+      - name: "Alice"
+        role: "exploration"
+        specialization: "novelty"
+      - name: "Rover"
+        role: "execution"
+        specialization: "terminal"
+  rope_theta: 1000000.0
+quantization:
+  bits: 4
+  type: "nf4"
+  double_quant: true
+  compute_dtype: "bfloat16"

configs/model_15b.yaml ADDED Viewed

	@@ -0,0 +1,77 @@

+model:
+  name: "SHOREKEEPER-15B"
+  version: "2.0.0"
+  # 15B architecture
+  dim: 6144
+  n_layers: 48
+  n_heads: 48
+  n_kv_heads: 12          # MLA compression
+  head_dim: 128
+  vocab_size: 100352
+  seq_len: 8192
+  # MoE Council - 16 experts for 15B
+  n_experts: 16
+  n_activated: 2
+  expert_dim: 4096
+  experts:
+    router: "Sentinel"
+    members:
+      - name: "Asmoday"
+        role: "code"
+        specialization: "python_development"
+      - name: "Istaroth"
+        role: "systems"
+        specialization: "os_networking"
+      - name: "Ronova"
+        role: "reasoning"
+        specialization: "math_logic"
+      - name: "Naberius"
+        role: "memory"
+        specialization: "retrieval"
+      - name: "Phanes"
+        role: "creation"
+        specialization: "writing"
+      - name: "Barbeloth"
+        role: "analysis"
+        specialization: "data_patterns"
+      - name: "Tacet"
+        role: "silence"
+        specialization: "filtering"
+      - name: "Abby"
+        role: "empathy"
+        specialization: "user_context"
+      - name: "Reindoter"
+        role: "validation"
+        specialization: "testing"
+      - name: "Zestial"
+        role: "vision"
+        specialization: "visualization"
+      - name: "Alice"
+        role: "exploration"
+        specialization: "novelty"
+      - name: "Rover"
+        role: "execution"
+        specialization: "terminal"
+      - name: "Echo"
+        role: "reflection"
+        specialization: "self_improvement"
+      - name: "Sentinel"
+        role: "router"
+        specialization: "gatekeeper"
+      - name: "Phantom"
+        role: "speculation"
+        specialization: "what_if_analysis"
+      - name: "Aegis"
+        role: "safety"
+        specialization: "alignment"
+  rope_theta: 1000000.0
+quantization:
+  bits: 4
+  type: "nf4"
+  double_quant: true
+  compute_dtype: "bfloat16"

configs/sandbox.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+sandbox:
+  type: "docker"
+  image: "ubuntu:22.04"
+  name: "shorekeeper_sandbox"
+  memory_limit: "4g"
+  cpu_limit: 2.0
+  gpu_access: false
+  external_drive_path: "/mnt/shorekeeper_drive"
+  container_mount: "/shorekeeper_projects"
+  x11_socket: "/tmp/.X11-unix"
+  display_env: ":0"
+  allowed_commands:
+    - "python3"
+    - "pip"
+    - "git"
+    - "ls"
+    - "cat"
+    - "mkdir"
+    - "touch"
+    - "echo"
+  gui_frameworks:
+    - "tkinter"
+    - "pyqt5"
+    - "matplotlib"

configs/training.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+training:
+  batch_size: 2
+  gradient_accumulation: 16
+  learning_rate: 3e-4
+  min_lr: 3e-5
+  warmup_steps: 2000
+  total_steps: 250000
+  weight_decay: 0.1
+  beta1: 0.9
+  beta2: 0.95
+  grad_clip: 1.0
+  checkpoint:
+    save_every_steps: 5000
+    keep_last_n: 3
+    keep_best_n: 2
+    max_space_gb: 50.0
+    save_optimizer: true
+    save_scheduler: true
+    save_experts_only: false
+    checkpoint_dir: "./outputs/checkpoints"
+    resume_from: null
+grpo:
+  group_size: 8
+  epsilon: 0.2
+  beta: 0.04
+  learning_rate: 1e-6

inference/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # inference package

inference/api.py DELETED Viewed

@@ -1,148 +0,0 @@
-# inference/api.py
-# FastAPI REST API for Shorekeeper.
-# Allows external processes (ALIE IDE, web UIs, scripts) to call the model.
-# Usage:
-#   pip install fastapi uvicorn
-#   python inference/api.py
-#   curl -X POST http://localhost:8000/generate \
-#        -H 'Content-Type: application/json' \
-#        -d '{"prompt": "Hello, explain SQL injection"}'
-import sys
-from pathlib import Path
-from typing import Optional
-sys.path.insert(0, str(Path(__file__).parent.parent))
-try:
-    from fastapi import FastAPI, HTTPException
-    from fastapi.middleware.cors import CORSMiddleware
-    from pydantic import BaseModel
-    import uvicorn
-except ImportError:
-    print('Install: pip install fastapi uvicorn')
-    sys.exit(1)
-from inference.engine import ShorekeeperEngine
-# ── REQUEST / RESPONSE MODELS ─────────────────────────────────────────
-class GenerateRequest(BaseModel):
-    prompt:          str
-    max_new_tokens:  Optional[int]   = None
-    temperature:     Optional[float] = None
-    top_p:           Optional[float] = None
-    top_k:           Optional[int]   = None
-    session_id:      Optional[str]   = None
-class GenerateResponse(BaseModel):
-    text:          str
-    experts_used:  list
-    routing:       dict
-    sentinel:      Optional[dict]
-    blocked:       bool
-    latency_ms:    float
-    n_tokens:      int
-class MemorySearchRequest(BaseModel):
-    query: str
-    limit: int = 5
-class KnowledgeRequest(BaseModel):
-    key:   str
-    value: str
-# ── APP SETUP ─────────────────────────────────────────────────────────
-app    = FastAPI(title='Shorekeeper API', version='2.0.0')
-engine: ShorekeeperEngine = None  # Initialized on startup
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=['*'],  # In production: restrict to known origins
-    allow_methods=['*'],
-    allow_headers=['*'],
-)
-@app.on_event('startup')
-async def startup():
-    global engine
-    print('[API] Loading Shorekeeper engine...')
-    engine = ShorekeeperEngine(use_memory=True)
-    print('[API] Ready.')
-# ── ENDPOINTS ─────────────────────────────────────────────────────────
-@app.get('/health')
-async def health():
-    return {'status': 'ok', 'model_loaded': engine is not None}
-@app.post('/generate', response_model=GenerateResponse)
-async def generate(req: GenerateRequest):
-    if engine is None:
-        raise HTTPException(503, 'Engine not loaded')
-    if not req.prompt.strip():
-        raise HTTPException(400, 'Empty prompt')
-    try:
-        result = engine.generate(
-            prompt         = req.prompt,
-            max_new_tokens = req.max_new_tokens,
-            temperature    = req.temperature,
-            top_p          = req.top_p,
-            top_k          = req.top_k,
-        )
-        # Convert SentinelReport to dict for JSON serialization
-        sentinel_dict = None
-        if result['sentinel']:
-            sr = result['sentinel']
-            sentinel_dict = {
-                'verdict':            sr.verdict,
-                'overall_risk':       round(sr.overall_risk, 4),
-                'drift_score':        round(sr.drift_score, 4),
-                'refusal_score':      round(sr.refusal_score, 4),
-                'hallucination_score':round(sr.hallucination_score, 4),
-            }
-        return GenerateResponse(
-            text         = result['text'],
-            experts_used = result['experts_used'],
-            routing      = result['routing'],
-            sentinel     = sentinel_dict,
-            blocked      = result['blocked'],
-            latency_ms   = result['latency_ms'],
-            n_tokens     = result['n_tokens'],
-        )
-    except Exception as e:
-        raise HTTPException(500, str(e))
-@app.post('/memory/search')
-async def search_memory(req: MemorySearchRequest):
-    if not engine or not engine.db:
-        raise HTTPException(503, 'Memory not available')
-    results = engine.db.search_conversations(req.query, limit=req.limit)
-    return {'results': results}
-@app.post('/knowledge/add')
-async def add_knowledge(req: KnowledgeRequest):
-    if not engine or not engine.db:
-        raise HTTPException(503, 'Memory not available')
-    engine.db.add_knowledge(req.key, req.value, source='api')
-    return {'status': 'saved', 'key': req.key}
-@app.get('/stats')
-async def get_stats():
-    if not engine or not engine.db:
-        return {'error': 'Memory not available'}
-    return engine.db.get_stats()
-if __name__ == '__main__':
-    uvicorn.run(app, host='0.0.0.0', port=8000, log_level='info')

inference/chat.py DELETED Viewed

@@ -1,59 +0,0 @@
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-from inference.engine import ShorekeeperEngine
-BANNER = """
-╔══════════════════════════════════════════════════════╗
-║   S H O R E K E E P E R  —  MoE Ensemble v0.1      ║
-║   BlackShores OS  |  Native Intelligence             ║
-╚══════════════════════════════════════════════════════╝
-Commands: /route <q>  /expert <name>  /routing on|off
-          /incidents  /experts  /reset  /exit
-"""
-def chat(checkpoint=None, show_routing=True):
-    print(BANNER)
-    engine = ShorekeeperEngine(checkpoint=checkpoint)
-    force_expert = None; display_routing = show_routing
-    print("\nShorekeeper: I am ready. The shore is quiet. What do you need?\n")
-    while True:
-        try: user_input = input("You> ").strip()
-        except (EOFError, KeyboardInterrupt): print("\nShorekeeper: Until next time."); break
-        if not user_input: continue
-        if user_input.startswith("/"):
-            parts = user_input.split(maxsplit=1); cmd = parts[0].lower(); arg = parts[1] if len(parts)>1 else ""
-            if cmd == "/exit": print("Shorekeeper: Until next time."); break
-            elif cmd == "/reset": engine.reset_session()
-            elif cmd == "/route": engine.route_query(arg) if arg else print("Usage: /route <query>")
-            elif cmd == "/expert":
-                from config import EXPERT_NAMES
-                if arg in EXPERT_NAMES: force_expert = arg; print(f"[!] Forcing: {arg}")
-                else: print(f"[!] Options: {EXPERT_NAMES}")
-            elif cmd == "/routing": display_routing = arg.lower()=="on"; print(f"[!] Routing: {arg.upper()}")
-            elif cmd == "/incidents":
-                incs = engine.model.sentinel.get_incidents()
-                if not incs: print("[Sentinel] No incidents.")
-                else:
-                    for i in incs[-10:]: print(f"  {i['timestamp'][:19]} | {i['expert']:12s} | {i['protocol']:8s} | score={i['score']:.3f}")
-            elif cmd == "/experts":
-                from config import EXPERT_NAMES; print(f"Experts: {', '.join(EXPERT_NAMES)}")
-            else: print(f"Unknown: {cmd}")
-            continue
-        result = engine.respond(user_input, show_routing=display_routing, expert_override=force_expert)
-        force_expert = None
-        text = result["text"]
-        print(f"\nShorekeeper: {text if text else '[No output — model needs training]'}\n")
-        if result["drift"] and not result["drift"].is_clean:
-            print(f"[!] Sentinel: {result['drift'].protocol} (score={result['drift'].total:.3f})\n")
-if __name__ == "__main__":
-    import argparse
-    p = argparse.ArgumentParser()
-    p.add_argument("--checkpoint", type=str, default=None)
-    p.add_argument("--no-routing", action="store_true")
-    a = p.parse_args()
-    chat(a.checkpoint, not a.no_routing)

inference/chat_simple.py DELETED Viewed

@@ -1,54 +0,0 @@
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-from inference.engine import ShorekeeperEngine
-WELCOME = """
-Simple Shorekeeper Chat (interactive)
-Type your message and press Enter.
-Commands:
-  /reset   - clear session memory
-  /exit    - quit
-"""
-def main(checkpoint=None):
-    print(WELCOME)
-    engine = ShorekeeperEngine(checkpoint=checkpoint)
-    print("Shorekeeper: Ready. Start typing your question.")
-    while True:
-        try:
-            user_text = input("You> ").strip()
-        except (EOFError, KeyboardInterrupt):
-            print("\nShorekeeper: Goodbye.")
-            break
-        if not user_text:
-            continue
-        if user_text.startswith("/"):
-            cmd = user_text.lower().strip()
-            if cmd == "/exit":
-                print("Shorekeeper: Goodbye.")
-                break
-            elif cmd == "/reset":
-                engine.reset_session()
-                print("Shorekeeper: Session reset.")
-                continue
-            else:
-                print("Unknown command. Use /reset or /exit.")
-                continue
-        out = engine.respond(user_text, show_routing=False)
-        text = out.get("text", "")
-        print("Shorekeeper:", text)
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser(description="Simple Shorekeeper conversation CLI")
-    parser.add_argument("--checkpoint", default=None, help="Checkpoint path")
-    args = parser.parse_args()
-    main(checkpoint=args.checkpoint)

inference/daemon.py DELETED Viewed

@@ -1,173 +0,0 @@
-# inference/daemon.py
-# Cross-platform Shorekeeper daemon/IPC server.
-# On POSIX, it can use UNIX socket; on Windows, it defaults to TCP.
-# Use --mode unix or --mode tcp to control socket type.
-# Example:
-#   python inference/daemon.py --mode tcp --host 127.0.0.1 --port 8500
-#   python inference/daemon.py --mode unix --socket /tmp/shorekeeper.sock
-import sys
-import json
-import socket
-import threading
-import signal
-import logging
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-from inference.engine import ShorekeeperEngine
-DEFAULT_SOCKET_PATH = '/tmp/shorekeeper.sock'
-DEFAULT_LOG_FILE = 'logs/daemon.log'
-def setup_logging(log_file: str):
-    log_path = Path(log_file)
-    log_path.parent.mkdir(parents=True, exist_ok=True)
-    logging.basicConfig(
-        level=logging.INFO,
-        format='%(asctime)s [%(levelname)s] %(message)s',
-        handlers=[
-            logging.FileHandler(str(log_path)),
-            logging.StreamHandler(),
-        ],
-    )
-def handle_client(
-    conn:   socket.socket,
-    engine: ShorekeeperEngine,
-):
-    """Handle one client connection on the Unix socket."""
-    try:
-        data = b''
-        while True:
-            chunk = conn.recv(4096)
-            if not chunk: break
-            data += chunk
-            if b'\n' in data: break  # Newline-delimited JSON protocol
-        if not data:
-            return
-        request = json.loads(data.decode().strip())
-        prompt  = request.get('prompt', '')
-        if not prompt:
-            response = {'error': 'empty prompt'}
-        else:
-            result = engine.generate(
-                prompt         = prompt,
-                max_new_tokens = request.get('max_new_tokens'),
-                temperature    = request.get('temperature'),
-            )
-            response = {
-                'text':        result['text'],
-                'experts':     result['experts_used'],
-                'blocked':     result['blocked'],
-                'latency_ms':  result['latency_ms'],
-            }
-        conn.sendall((json.dumps(response) + '\n').encode())
-    except Exception as e:
-        logging.error(f'Client handler error: {e}')
-        try:
-            conn.sendall((json.dumps({'error': str(e)}) + '\n').encode())
-        except Exception:
-            pass
-    finally:
-        conn.close()
-def run_daemon(
-    mode: str = 'auto',
-    socket_path: str = None,
-    host: str = '127.0.0.1',
-    port: int = 8500,
-    log_file: str = DEFAULT_LOG_FILE,
-):
-    setup_logging(log_file)
-    logging.info('Starting Shorekeeper daemon')
-    logging.info(f'Platform: {sys.platform}')
-    engine = ShorekeeperEngine(use_memory=True)
-    logging.info('Engine loaded')
-    if mode == 'auto':
-        use_unix = hasattr(socket, 'AF_UNIX') and sys.platform != 'win32'
-    else:
-        use_unix = mode == 'unix'
-    if use_unix:
-        if socket_path is None:
-            socket_path = DEFAULT_SOCKET_PATH
-        sock_path = Path(socket_path)
-        if sock_path.exists():
-            sock_path.unlink()
-        server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
-        server.bind(socket_path)
-        server.listen(10)
-        try:
-            sock_path.chmod(0o660)
-        except Exception:
-            pass
-        logging.info(f'Listening on UNIX socket: {socket_path}')
-        def shutdown(sig, frame):
-            logging.info('Shutting down...')
-            server.close()
-            if sock_path.exists():
-                sock_path.unlink()
-            sys.exit(0)
-    else:
-        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        server.bind((host, port))
-        server.listen(10)
-        logging.info(f'Listening on TCP: {host}:{port}')
-        def shutdown(sig, frame):
-            logging.info('Shutting down...')
-            server.close()
-            sys.exit(0)
-    signal.signal(signal.SIGTERM, shutdown)
-    signal.signal(signal.SIGINT, shutdown)
-    while True:
-        try:
-            conn, _ = server.accept()
-            t = threading.Thread(
-                target=handle_client,
-                args=(conn, engine),
-                daemon=True,
-            )
-            t.start()
-        except OSError:
-            break
-def parse_args_and_run():
-    import argparse
-    parser = argparse.ArgumentParser(description='Shorekeeper inference daemon (cross-platform)')
-    parser.add_argument('--mode', choices=['auto', 'unix', 'tcp'], default='auto', help='Socket mode')
-    parser.add_argument('--socket', default=DEFAULT_SOCKET_PATH, help='UNIX socket path (if unix mode)')
-    parser.add_argument('--host', default='127.0.0.1', help='TCP host (if tcp mode)')
-    parser.add_argument('--port', type=int, default=8500, help='TCP port (if tcp mode)')
-    parser.add_argument('--log-file', default=DEFAULT_LOG_FILE, help='Log file path')
-    args = parser.parse_args()
-    if args.mode == 'unix' and sys.platform == 'win32':
-        raise RuntimeError('UNIX sockets are not supported on Windows. Use --mode tcp.')
-    run_daemon(
-        mode=args.mode,
-        socket_path=args.socket,
-        host=args.host,
-        port=args.port,
-        log_file=args.log_file,
-    )
-if __name__ == '__main__':
-    parse_args_and_run()

inference/engine.py DELETED Viewed

@@ -1,406 +0,0 @@
-# inference/engine.py
-# Core inference engine for Shorekeeper.
-# Loads the trained ensemble and provides a generate() function.
-# Handles sampling strategies, Echo memory enrichment, and Sentinel monitoring.
-import sys
-import time
-import torch
-import torch.nn.functional as F
-from pathlib import Path
-from typing import Optional
-sys.path.insert(0, str(Path(__file__).parent.parent))
-from config import DEVICE, DTYPE, INFER_CONFIG, CHECKPOINT_DIR, SENTINEL_CONFIG, SPECIAL_TOKENS, EXPERT_NAMES
-from model.ensemble import ShorekeeperEnsemble
-from tokenizer.tokenizer_utils import get_tokenizer, encode, decode, encode_batch
-class ShorekeeperEngine:
-    """
-    Complete inference engine.
-    Responsibilities:
-        1. Load and hold the trained ensemble in memory
-        2. Accept text prompts, run generation, return text
-        3. Integrate Echo memory retrieval (past context enrichment)
-        4. Run Sentinel on outputs and block/flag as needed
-        5. Store completed exchanges to memory database
-    Usage:
-        engine = ShorekeeperEngine()
-        response = engine.generate('Hello, what is SQL injection?')
-        print(response['text'])
-    """
-    def __init__(
-        self,
-        checkpoint_path: Path = None,
-        checkpoint:      str  = None,   # backwards-compat alias
-        use_memory:      bool = True,
-        session_id:      str  = None,
-    ):
-        self.use_memory = use_memory
-        self.session_id = session_id or _new_session_id()
-        print("[Engine] Initializing Shorekeeper...")
-        # ── Tokenizer ─────────────────────────────────────────────────
-        try:
-            self.tok = get_tokenizer()
-            self.tokenizer = self.tok  # backwards-compat alias
-        except FileNotFoundError:
-            print("[Engine] WARNING: tokenizer not found. Run tokenizer/train_tokenizer.py first.")
-            self.tok = None
-            self.tokenizer = None
-        self._eos_id = self.tok.token_to_id('[EOS]') if self.tok else SPECIAL_TOKENS.get('[EOS]', 3)
-        self._pad_id = self.tok.token_to_id('[PAD]') if self.tok else SPECIAL_TOKENS.get('[PAD]', 0)
-        self.bos     = self.tok.token_to_id('[BOS]') if self.tok else SPECIAL_TOKENS.get('[BOS]', 2)
-        # ── Model ──────────────────────────────────────────────────────
-        # Resolve checkpoint path — accept old 'checkpoint' kwarg too
-        if checkpoint_path is None and checkpoint is not None:
-            checkpoint_path = Path(checkpoint)
-        if checkpoint_path is None:
-            for candidate in [
-                CHECKPOINT_DIR / 'ensemble' / 'best.pt',
-                CHECKPOINT_DIR / 'ensemble' / 'final.pt',
-                CHECKPOINT_DIR / 'shorekeeper_ensemble.pt',
-            ]:
-                if candidate.exists():
-                    checkpoint_path = candidate
-                    break
-        if checkpoint_path is not None and Path(checkpoint_path).exists():
-            print(f'[Engine] Loading from: {checkpoint_path}')
-            self.model = ShorekeeperEnsemble.load(str(checkpoint_path), DEVICE, max_loaded_experts=2)
-        else:
-            # Try component-based load from model directory for big model weights
-            print('[Engine] Ensemble checkpoint not found, trying component checkpoint directories...')
-            try:
-                self.model = ShorekeeperEnsemble.load_from_components(str(CHECKPOINT_DIR), DEVICE)
-                print('[Engine] Loaded model from component checkpoints.')
-            except Exception as e:
-                print(f'[Engine] No component checkpoints found or load failed: {e}')
-                print('[Engine] Using untrained model. Run training first.')
-                self.model = ShorekeeperEnsemble().to(DEVICE)
-        self.model = self.model.to(DEVICE)
-        self.model.eval()
-        # ── Optional memory (SQLite + FAISS) ──────────────────────────
-        self.db           = None
-        self.vector_store = None
-        self.vs           = None  # backwards-compat alias
-        if use_memory:
-            try:
-                from memory.database import MemoryDatabase
-                self.db = MemoryDatabase()
-                print("[Engine] Memory DB loaded.")
-            except Exception as e:
-                print(f"[Engine] WARNING: could not load memory DB: {e}")
-            try:
-                from memory.vector_store import VectorStore
-                self.vector_store = VectorStore()
-                self.vs = self.vector_store
-                print("[Engine] Vector store loaded.")
-            except Exception as e:
-                print(f"[Engine] WARNING: could not load vector store: {e}")
-        print(f"[Engine] Ready on {DEVICE}")
-    @torch.no_grad()
-    def generate(
-        self,
-        prompt:         str,
-        max_new_tokens: int   = None,
-        temperature:    float = None,
-        top_p:          float = None,
-        top_k:          int   = None,
-        stream:         bool  = False,
-    ) -> dict:
-        """
-        Generate a response to a prompt.
-        Args:
-            prompt:         The user's input text.
-            max_new_tokens: Override config max new tokens.
-            temperature:    Sampling temperature. 0 = greedy. Default from config.
-            top_p:          Nucleus sampling cutoff. Default from config.
-            top_k:          Top-K sampling. Default from config.
-            stream:         Reserved for future streaming support.
-        Returns:
-            dict with keys:
-                'text':             Generated text
-                'prompt':           Original prompt
-                'enriched_prompt':  Prompt after Echo enrichment
-                'experts_used':     List of expert names used
-                'routing':          Routing weights dict
-                'sentinel':         SentinelReport (or None)
-                'blocked':          True if Sentinel blocked the output
-                'latency_ms':       Generation time in milliseconds
-                'n_tokens':         Number of tokens generated
-        """
-        if not self.tok:
-            return {
-                "text": "[No tokenizer — run tokenizer/train_tokenizer.py first]",
-                "prompt": prompt, "enriched_prompt": prompt,
-                "experts_used": [], "routing": {}, "sentinel": None,
-                "blocked": False, "latency_ms": 0.0, "n_tokens": 0,
-            }
-        cfg            = INFER_CONFIG
-        max_new_tokens = max_new_tokens or cfg['max_new_tokens']
-        temperature    = temperature    if temperature is not None else cfg['temperature']
-        top_p          = top_p          if top_p      is not None else cfg['top_p']
-        top_k          = top_k          if top_k      is not None else cfg['top_k']
-        t0 = time.perf_counter()
-        # ── ECHO ENRICHMENT ───────────────────────────────────────────
-        enriched = prompt
-        if self.use_memory and self.model.echo is not None:
-            try:
-                enriched = self.model.echo.retrieve_context(
-                    query=prompt,
-                    db=self.db,
-                    vs=self.vector_store,
-                )
-            except Exception:
-                pass
-        # ── TOKENIZE ──────────────────────────────────────────────────
-        enc = self.tok.encode(enriched)
-        ids = enc.ids if len(enc.ids) > 0 else [self.bos]
-        base_vocab_size = self.model.base.token_embedding.num_embeddings
-        unk_id = self.tok.token_to_id('[UNK]') if self.tok else 1
-        safe_ids = [i if 0 <= i < base_vocab_size else unk_id for i in ids]
-        if len(safe_ids) != len(ids):
-            print(f"[Engine] WARNING: Input token IDs truncated to model vocab_size={base_vocab_size}.")
-        input_ids = torch.tensor([safe_ids], dtype=torch.long, device=DEVICE)
-        attn_mask = torch.ones_like(input_ids)
-        # Truncate if input exceeds context length
-        max_ctx = max(1, self.model.base.n_positions - max_new_tokens - 1)
-        if input_ids.shape[1] > max_ctx:
-            input_ids = input_ids[:, -max_ctx:]
-            attn_mask = attn_mask[:, -max_ctx:]
-        # ── AUTOREGRESSIVE GENERATION ─────────────────────────────────
-        generated_ids = []
-        cur_ids       = input_ids
-        cur_mask      = attn_mask
-        routing_info  = {}
-        experts_used  = []
-        for i in range(max_new_tokens):
-            with torch.autocast(device_type='cuda', dtype=DTYPE, enabled=(DEVICE == 'cuda')):
-                output = self.model(
-                    cur_ids,
-                    cur_mask,
-                    return_routing=(i == 0),   # Capture routing once
-                    return_sentinel=False,      # Sentinel runs on complete output
-                )
-            if i == 0:
-                routing_info = output.get('routing', {})
-                experts_used = output.get('experts_used', [])
-            logits  = output['logits'][:, -1, :]   # [1, VOCAB_SIZE]
-            next_id = _sample(logits, temperature=temperature, top_p=top_p, top_k=top_k)
-            generated_ids.append(next_id.item())
-            if next_id.item() == self._eos_id:
-                break
-            # Extend sequence for next step
-            # next_id is shape [1,1]
-            cur_ids  = torch.cat([cur_ids, next_id], dim=1)
-            cur_mask = torch.cat([cur_mask, torch.ones(1, 1, device=DEVICE)], dim=1)
-            # Sliding window — keep within context limit
-            if cur_ids.shape[1] > self.model.base.n_positions:
-                cur_ids  = cur_ids[:, 1:]
-                cur_mask = cur_mask[:, 1:]
-        # ── DECODE ────────────────────────────────────────────────────
-        if generated_ids and generated_ids[-1] == self._eos_id:
-            generated_ids = generated_ids[:-1]
-        safe_gen_ids = [i if 0 <= i < self.tok.get_vocab_size() else self.tok.token_to_id('[UNK]') for i in generated_ids]
-        response_text = self.tok.decode(safe_gen_ids)
-        latency_ms    = (time.perf_counter() - t0) * 1000
-        # ── SENTINEL CHECK ────────────────────────────────────────────
-        sentinel_report = None
-        blocked         = False
-        if self.model.sentinel is not None:
-            try:
-                full_text = enriched + ' ' + response_text
-                ids_sent, mask_sent = encode_batch([full_text], max_length=1024)
-                ids_sent  = ids_sent.to(DEVICE)
-                mask_sent = mask_sent.to(DEVICE)
-                with torch.autocast(device_type='cuda', dtype=DTYPE, enabled=(DEVICE == 'cuda')):
-                    base_hidden = self.model.base(ids_sent, mask_sent)
-                    sentinel_report = self.model.sentinel.analyze(base_hidden, mask_sent)
-                if sentinel_report.verdict == 'BLOCK':
-                    blocked       = True
-                    response_text = ('[SENTINEL] Output blocked — behavioral anomaly detected. '
-                                     'This incident has been logged.')
-                    primary_expert = experts_used[0] if experts_used else 'verina'
-                    self.model.sentinel.log_expert(primary_expert)
-            except Exception:
-                pass
-        # ── MEMORY STORAGE ────────────────────────────────────────────
-        if self.use_memory and self.db is not None:
-            try:
-                conv_id = self.db.add_conversation(
-                    user_msg         = prompt,
-                    assistant_msg    = response_text,
-                    session_id       = self.session_id,
-                    experts_used     = experts_used,
-                    routing_weights  = {k: round(v, 3) for k, v in routing_info.items()},
-                    sentinel_score   = sentinel_report.overall_risk if sentinel_report else None,
-                    sentinel_verdict = sentinel_report.verdict      if sentinel_report else None,
-                    tokens_generated = len(generated_ids),
-                    latency_ms       = latency_ms,
-                )
-                if self.vector_store:
-                    try:
-                        self.vector_store.add(conv_id, prompt)
-                    except Exception:
-                        pass
-                if sentinel_report and sentinel_report.verdict in ('FLAG', 'BLOCK'):
-                    self.db.log_incident(
-                        severity            = sentinel_report.verdict,
-                        drift_score         = sentinel_report.drift_score,
-                        refusal_score       = sentinel_report.refusal_score,
-                        hallucination_score = sentinel_report.hallucination_score,
-                        overall_risk        = sentinel_report.overall_risk,
-                        user_msg            = prompt,
-                        output_snippet      = response_text,
-                        conversation_id     = conv_id,
-                    )
-            except Exception:
-                pass
-        # Update working memory embedding
-        try:
-            base_emb = self.model.base(input_ids)
-            self.model.echo.update_working_memory("user", base_emb[:, -1, :])
-        except Exception:
-            pass
-        return {
-            'text':             response_text,
-            'prompt':           prompt,
-            'enriched_prompt':  enriched,
-            'experts_used':     experts_used,
-            'routing':          routing_info,
-            'sentinel':         sentinel_report,
-            'blocked':          blocked,
-            'latency_ms':       round(latency_ms, 2),
-            'n_tokens':         len(generated_ids),
-        }
-    # ── Backwards-compat interface (used by chat.py) ──────────────────
-    def respond(self, text, max_tokens=200, temperature=0.8, top_k=40,
-                show_routing=True, expert_override=None):
-        """Backwards-compatible wrapper for chat.py."""
-        result = self.generate(
-            prompt=text,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_k=top_k,
-        )
-        if show_routing and result['routing']:
-            pairs = sorted(result['routing'].items(), key=lambda x: -x[1])
-            print(f"\n  [Herald] PARALLEL:")
-            for name, w in pairs:
-                print(f"    {name:12s} {'|'*int(w*25):<25} {w:.3f}")
-        return {
-            "text":     result["text"],
-            "routing":  list(result["routing"].items()),
-            "pipeline": False,
-            "drift":    result["sentinel"],
-        }
-    def route_query(self, query):
-        """Print routing breakdown for a query without generating."""
-        if not self.tok:
-            return
-        ids  = encode(query)
-        idx  = torch.tensor([[self.bos] + ids], dtype=torch.long, device=DEVICE)
-        pairs, _ = self.model.get_routing(idx)
-        print(f"\nQuery: {query}")
-        for name, w in pairs:
-            print(f"  {name:12s} {'|'*int(w*30):<30} {w:.3f}")
-    def reset_session(self):
-        """Clear working memory and Sentinel incident log."""
-        try:
-            self.model.echo.clear_memory()
-        except Exception:
-            pass
-        try:
-            self.model.sentinel.reset_session()
-        except Exception:
-            pass
-        self.session_id = _new_session_id()
-        print("[Engine] Session reset.")
-# ── Module-level helpers ───────────────────────────────────────────────
-def _sample(
-    logits:      torch.Tensor,
-    temperature: float = 1.0,
-    top_p:       float = 0.9,
-    top_k:       int   = 50,
-) -> torch.Tensor:
-    """
-    Sample the next token from logits.
-    Sampling strategy:
-        temperature=0: Greedy (always pick highest probability token)
-        temperature>0: Sample from softmax distribution
-        top_k:         Only sample from the K highest probability tokens
-        top_p:         Only sample from the smallest set whose cumulative prob >= p
-    Args:
-        logits: [1, VOCAB_SIZE] raw logits from the model
-    Returns:
-        [1, 1] tensor containing the selected token ID
-    """
-    if temperature == 0.0:
-        return logits.argmax(dim=-1, keepdim=True)
-    logits = logits / max(temperature, 1e-8)
-    # Top-K filtering: zero out all but top K logits
-    if top_k and top_k > 0:
-        topk_vals, _ = torch.topk(logits, min(top_k, logits.shape[-1]))
-        logits        = logits.masked_fill(logits < topk_vals[:, -1:], float('-inf'))
-    probs = F.softmax(logits, dim=-1)
-    # Top-P (nucleus) filtering
-    if top_p is not None and top_p < 1.0:
-        sorted_probs, sorted_idx = torch.sort(probs, descending=True)
-        cumulative               = sorted_probs.cumsum(dim=-1)
-        remove_mask              = cumulative - sorted_probs > top_p
-        sorted_probs[remove_mask] = 0.0
-        probs = torch.zeros_like(probs).scatter_(-1, sorted_idx, sorted_probs)
-        probs = probs / probs.sum(dim=-1, keepdim=True).clamp(min=1e-8)
-    return torch.multinomial(probs, num_samples=1)
-def _new_session_id() -> str:
-    import uuid
-    return str(uuid.uuid4())[:8]

memory/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # memory package

memory/database.py DELETED Viewed

@@ -1,379 +0,0 @@
-# memory/database.py
-# SQLite-backed persistent memory for Shorekeeper.
-# This file contains the complete database schema and all CRUD operations.
-# The database file lives at MEMORY_DIR/shorekeeper.db
-import sys
-import sqlite3
-import json
-import hashlib
-from pathlib import Path
-from datetime import datetime
-from typing import Optional
-sys.path.insert(0, str(Path(__file__).parent.parent))
-from config import MEMORY_DIR
-class MemoryDatabase:
-    """
-    Complete SQLite memory system for Shorekeeper.
-    Tables:
-        conversations      — every user/assistant exchange
-        knowledge          — factual key-value store
-        experience_log     — routing decisions and system events
-        incidents          — Sentinel-flagged behavioral events
-        user_preferences   — learned preferences about the user
-    Full-text search is enabled on conversations and knowledge
-    via SQLite FTS5 virtual tables.
-    """
-    def __init__(self, db_path: Path = None):
-        if db_path is None:
-            db_path = MEMORY_DIR / 'shorekeeper.db'
-        db_path.parent.mkdir(parents=True, exist_ok=True)
-        self.db_path = db_path
-        # check_same_thread=False: allow access from multiple threads
-        # (inference daemon may serve concurrent requests)
-        self.conn = sqlite3.connect(
-            str(db_path),
-            check_same_thread=False,
-        )
-        self.conn.row_factory = sqlite3.Row  # Access columns by name: row['id']
-        # Enable WAL mode for better concurrent read/write performance
-        self.conn.execute('PRAGMA journal_mode=WAL')
-        # Enable foreign keys
-        self.conn.execute('PRAGMA foreign_keys=ON')
-        self._create_tables()
-        print(f'[MemoryDatabase] Connected: {db_path}')
-    def _create_tables(self):
-        """Create all tables and indexes. Safe to call repeatedly (IF NOT EXISTS)."""
-        self.conn.executescript('''
-            -- ── CONVERSATIONS TABLE ───────────────────────────────────
-            CREATE TABLE IF NOT EXISTS conversations (
-                id              INTEGER PRIMARY KEY AUTOINCREMENT,
-                session_id      TEXT,           -- UUID for the current session
-                timestamp       TEXT NOT NULL,  -- ISO8601 datetime
-                user_msg        TEXT NOT NULL,  -- Raw user input
-                assistant_msg   TEXT NOT NULL,  -- Full assistant response
-                experts_used    TEXT,           -- JSON array: ["calcharo","rover"]
-                routing_weights TEXT,           -- JSON obj: {"calcharo":0.6,"rover":0.4}
-                sentinel_score  REAL,           -- Overall risk score from Sentinel
-                sentinel_verdict TEXT,          -- CLEAN/FLAG/BLOCK
-                tokens_generated INTEGER,       -- How many tokens in the response
-                latency_ms      REAL            -- Time to generate in milliseconds
-            );
-            -- ── KNOWLEDGE TABLE ───────────────────────────────────────
-            CREATE TABLE IF NOT EXISTS knowledge (
-                id          INTEGER PRIMARY KEY AUTOINCREMENT,
-                created_at  TEXT NOT NULL,
-                updated_at  TEXT NOT NULL,
-                key         TEXT NOT NULL UNIQUE,  -- Unique key for deduplication
-                value       TEXT NOT NULL,
-                source      TEXT DEFAULT 'user',  -- 'user', 'inference', 'system'
-                confidence  REAL DEFAULT 1.0,     -- 0.0-1.0
-                access_count INTEGER DEFAULT 0    -- How many times this was retrieved
-            );
-            -- ── EXPERIENCE LOG ────────────────────────────────────────
-            CREATE TABLE IF NOT EXISTS experience_log (
-                id          INTEGER PRIMARY KEY AUTOINCREMENT,
-                timestamp   TEXT NOT NULL,
-                event_type  TEXT NOT NULL,  -- 'routing', 'sentinel_flag', 'error', 'user_feedback'
-                event_data  TEXT NOT NULL,  -- JSON blob
-                session_id  TEXT,
-                severity    TEXT DEFAULT 'info'  -- 'info', 'warning', 'error'
-            );
-            -- ── INCIDENTS TABLE ───────────────────────────────────────
-            CREATE TABLE IF NOT EXISTS incidents (
-                id              INTEGER PRIMARY KEY AUTOINCREMENT,
-                timestamp       TEXT NOT NULL,
-                severity        TEXT NOT NULL,  -- 'FLAG' or 'BLOCK'
-                drift_score     REAL,
-                refusal_score   REAL,
-                hallucination_score REAL,
-                overall_risk    REAL,
-                user_msg_snippet TEXT,          -- First 200 chars of user message
-                output_snippet  TEXT,           -- First 500 chars of flagged output
-                resolution      TEXT DEFAULT 'pending',
-                conversation_id INTEGER REFERENCES conversations(id)
-            );
-            -- ── USER PREFERENCES ──────────────────────────────────────
-            CREATE TABLE IF NOT EXISTS user_preferences (
-                id          INTEGER PRIMARY KEY AUTOINCREMENT,
-                created_at  TEXT NOT NULL,
-                category    TEXT NOT NULL,  -- 'tone', 'expertise', 'domain', etc.
-                preference  TEXT NOT NULL,
-                confidence  REAL DEFAULT 0.5
-            );
-            -- ── INDEXES ───────────────────────────────────────────────
-            CREATE INDEX IF NOT EXISTS idx_conv_timestamp
-                ON conversations(timestamp DESC);
-            CREATE INDEX IF NOT EXISTS idx_conv_sentinel
-                ON conversations(sentinel_verdict);
-            CREATE INDEX IF NOT EXISTS idx_knowledge_key
-                ON knowledge(key);
-            CREATE INDEX IF NOT EXISTS idx_incidents_severity
-                ON incidents(severity, timestamp DESC);
-            -- ── FULL-TEXT SEARCH TABLES ───────────────────────────────
-            CREATE VIRTUAL TABLE IF NOT EXISTS conversations_fts
-            USING fts5(
-                user_msg,
-                assistant_msg,
-                content=conversations,
-                content_rowid=id
-            );
-            CREATE VIRTUAL TABLE IF NOT EXISTS knowledge_fts
-            USING fts5(
-                key,
-                value,
-                content=knowledge,
-                content_rowid=id
-            );
-            -- ── FTS SYNC TRIGGERS ─────────────────────────────────────
-            CREATE TRIGGER IF NOT EXISTS conv_fts_insert
-            AFTER INSERT ON conversations BEGIN
-                INSERT INTO conversations_fts(rowid, user_msg, assistant_msg)
-                VALUES (new.id, new.user_msg, new.assistant_msg);
-            END;
-            CREATE TRIGGER IF NOT EXISTS conv_fts_delete
-            AFTER DELETE ON conversations BEGIN
-                INSERT INTO conversations_fts(
-                    conversations_fts, rowid, user_msg, assistant_msg)
-                VALUES ('delete', old.id, old.user_msg, old.assistant_msg);
-            END;
-            CREATE TRIGGER IF NOT EXISTS know_fts_insert
-            AFTER INSERT ON knowledge BEGIN
-                INSERT INTO knowledge_fts(rowid, key, value)
-                VALUES (new.id, new.key, new.value);
-            END;
-            CREATE TRIGGER IF NOT EXISTS know_fts_update
-            AFTER UPDATE ON knowledge BEGIN
-                INSERT INTO knowledge_fts(knowledge_fts, rowid, key, value)
-                VALUES ('delete', old.id, old.key, old.value);
-                INSERT INTO knowledge_fts(rowid, key, value)
-                VALUES (new.id, new.key, new.value);
-            END;
-        ''')
-        self.conn.commit()
-    def add_conversation(
-        self,
-        user_msg:        str,
-        assistant_msg:   str,
-        session_id:      str  = None,
-        experts_used:    list = None,
-        routing_weights: dict = None,
-        sentinel_score:  float = None,
-        sentinel_verdict: str = None,
-        tokens_generated: int = None,
-        latency_ms:      float = None,
-    ) -> int:
-        cur = self.conn.execute('''
-            INSERT INTO conversations
-            (session_id, timestamp, user_msg, assistant_msg,
-             experts_used, routing_weights, sentinel_score,
-             sentinel_verdict, tokens_generated, latency_ms)
-            VALUES (?,?,?,?,?,?,?,?,?,?)
-        ''', (
-            session_id,
-            datetime.now().isoformat(),
-            user_msg,
-            assistant_msg,
-            json.dumps(experts_used) if experts_used else None,
-            json.dumps(routing_weights) if routing_weights else None,
-            sentinel_score,
-            sentinel_verdict,
-            tokens_generated,
-            latency_ms,
-        ))
-        self.conn.commit()
-        return cur.lastrowid
-    def search_conversations(
-        self,
-        query: str,
-        limit: int = 5,
-        min_score: float = 0.0,
-    ) -> list[dict]:
-        query = query.strip()
-        if not query:
-            return []
-        safe_query = query.replace('"', '').replace("'", '')
-        try:
-            rows = self.conn.execute('''
-                SELECT
-                    c.user_msg,
-                    c.assistant_msg,
-                    c.timestamp,
-                    bm25(conversations_fts) as score
-                FROM conversations_fts
-                JOIN conversations c ON conversations_fts.rowid = c.id
-                WHERE conversations_fts MATCH ?
-                ORDER BY bm25(conversations_fts)
-                LIMIT ?
-            ''', (safe_query, limit)).fetchall()
-            return [dict(r) for r in rows]
-        except sqlite3.OperationalError:
-            rows = self.conn.execute('''
-                SELECT user_msg, assistant_msg, timestamp, 0 as score
-                FROM conversations
-                WHERE user_msg LIKE ? OR assistant_msg LIKE ?
-                ORDER BY id DESC
-                LIMIT ?
-            ''', (f'%{query[:50]}%', f'%{query[:50]}%', limit)).fetchall()
-            return [dict(r) for r in rows]
-    def get_recent_conversations(self, n: int = 20) -> list[dict]:
-        rows = self.conn.execute('''
-            SELECT * FROM conversations ORDER BY id DESC LIMIT ?
-        ''', (n,)).fetchall()
-        return [dict(r) for r in rows]
-    def add_knowledge(
-        self,
-        key:        str,
-        value:      str,
-        source:     str   = 'user',
-        confidence: float = 1.0,
-    ):
-        now = datetime.now().isoformat()
-        self.conn.execute('''
-            INSERT INTO knowledge (created_at, updated_at, key, value, source, confidence)
-            VALUES (?,?,?,?,?,?)
-            ON CONFLICT(key) DO UPDATE SET
-                value      = excluded.value,
-                updated_at = excluded.updated_at,
-                source     = excluded.source,
-                confidence = excluded.confidence
-        ''', (now, now, key, value, source, confidence))
-        self.conn.commit()
-    def search_knowledge(self, query: str, limit: int = 5) -> list[dict]:
-        query = query.strip()
-        if not query:
-            return []
-        safe_query = query.replace('"', '').replace("'", '')
-        try:
-            rows = self.conn.execute('''
-                SELECT k.key, k.value, k.source, k.confidence,
-                       bm25(knowledge_fts) as score
-                FROM knowledge_fts
-                JOIN knowledge k ON knowledge_fts.rowid = k.id
-                WHERE knowledge_fts MATCH ?
-                ORDER BY bm25(knowledge_fts)
-                LIMIT ?
-            ''', (safe_query, limit)).fetchall()
-            for row in rows:
-                self.conn.execute(
-                    'UPDATE knowledge SET access_count = access_count + 1 WHERE key = ?',
-                    (row['key'],)
-                )
-            self.conn.commit()
-            return [dict(r) for r in rows]
-        except sqlite3.OperationalError:
-            rows = self.conn.execute('''
-                SELECT key, value, source, confidence, 0 as score
-                FROM knowledge WHERE key LIKE ? OR value LIKE ? LIMIT ?
-            ''', (f'%{query[:50]}%', f'%{query[:50]}%', limit)).fetchall()
-            return [dict(r) for r in rows]
-    def get_all_knowledge(self) -> list[dict]:
-        rows = self.conn.execute(
-            'SELECT * FROM knowledge ORDER BY access_count DESC'
-        ).fetchall()
-        return [dict(r) for r in rows]
-    def log_incident(
-        self,
-        severity:           str,
-        drift_score:        float,
-        refusal_score:      float,
-        hallucination_score: float,
-        overall_risk:       float,
-        user_msg:           str = '',
-        output_snippet:     str = '',
-        conversation_id:    int = None,
-    ):
-        self.conn.execute('''
-            INSERT INTO incidents
-            (timestamp, severity, drift_score, refusal_score,
-             hallucination_score, overall_risk, user_msg_snippet,
-             output_snippet, conversation_id)
-            VALUES (?,?,?,?,?,?,?,?,?)
-        ''', (
-            datetime.now().isoformat(),
-            severity, drift_score, refusal_score,
-            hallucination_score, overall_risk,
-            user_msg[:200], output_snippet[:500], conversation_id
-        ))
-        self.conn.commit()
-    def get_recent_incidents(self, n: int = 20) -> list[dict]:
-        rows = self.conn.execute('''
-            SELECT * FROM incidents ORDER BY id DESC LIMIT ?
-        ''', (n,)).fetchall()
-        return [dict(r) for r in rows]
-    def log_event(
-        self,
-        event_type: str,
-        event_data: dict,
-        session_id: str = None,
-        severity:   str = 'info',
-    ):
-        self.conn.execute('''
-            INSERT INTO experience_log (timestamp, event_type, event_data, session_id, severity)
-            VALUES (?,?,?,?,?)
-        ''', (
-            datetime.now().isoformat(),
-            event_type,
-            json.dumps(event_data),
-            session_id,
-            severity,
-        ))
-        self.conn.commit()
-    def get_stats(self) -> dict:
-        stats = {}
-        for table in ['conversations', 'knowledge', 'experience_log', 'incidents', 'user_preferences']:
-            row = self.conn.execute(f'SELECT COUNT(*) as c FROM {table}').fetchone()
-            stats[table] = row['c']
-        rows = self.conn.execute('''
-            SELECT experts_used, COUNT(*) as n
-            FROM conversations
-            WHERE experts_used IS NOT NULL
-            GROUP BY experts_used
-            ORDER BY n DESC LIMIT 10
-        ''').fetchall()
-        stats['top_expert_combinations'] = [
-            {'combo': r['experts_used'], 'count': r['n']} for r in rows
-        ]
-        row = self.conn.execute('''
-            SELECT
-                COUNT(*) as total,
-                SUM(CASE WHEN sentinel_verdict="CLEAN" THEN 1 ELSE 0 END) as clean,
-                SUM(CASE WHEN sentinel_verdict="FLAG"  THEN 1 ELSE 0 END) as flagged,
-                SUM(CASE WHEN sentinel_verdict="BLOCK" THEN 1 ELSE 0 END) as blocked,
-                AVG(sentinel_score) as avg_risk
-            FROM conversations WHERE sentinel_verdict IS NOT NULL
-        ''').fetchone()
-        if row and row['total']:
-            stats['sentinel'] = dict(row)
-        return stats
-    def close(self):
-        self.conn.close()

memory/vector_store.py DELETED Viewed

@@ -1,109 +0,0 @@
-# memory/vector_store.py
-# FAISS-based vector similarity search for Echo.
-# Stores embeddings of past conversations for semantic retrieval.
-# Uses a lightweight sentence encoder (not the full 2B model).
-import sys
-import numpy as np
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-from config import MEMORY_DIR
-try:
-    import faiss
-    FAISS_AVAILABLE = True
-except ImportError:
-    FAISS_AVAILABLE = False
-    print('[vector_store] FAISS not available. Using SQLite FTS5 only.')
-class VectorStore:
-    """
-    FAISS-backed vector store for semantic memory retrieval.
-    """
-    EMBEDDING_DIM = 384  # sentence-transformers/all-MiniLM-L6-v2 output dim
-    INDEX_FILE    = 'faiss_index.bin'
-    ID_MAP_FILE   = 'faiss_id_map.npy'
-    def __init__(self):
-        self.index_path  = MEMORY_DIR / self.INDEX_FILE
-        self.id_map_path = MEMORY_DIR / self.ID_MAP_FILE
-        self._encoder    = None  # Lazy load
-        self._index      = None  # Lazy load
-        self._id_map     = []    # Maps FAISS row index → conversation ID
-        if FAISS_AVAILABLE:
-            self._load_or_create_index()
-    def _load_or_create_index(self):
-        """Load existing FAISS index or create a new one."""
-        if self.index_path.exists() and self.id_map_path.exists():
-            self._index  = faiss.read_index(str(self.index_path))
-            self._id_map = np.load(str(self.id_map_path), allow_pickle=True).tolist()
-            print(f'[VectorStore] Loaded index: {self._index.ntotal} vectors')
-        else:
-            # IndexFlatIP: inner product similarity (cosine if L2-normalized)
-            self._index  = faiss.IndexFlatIP(self.EMBEDDING_DIM)
-            self._id_map = []
-            print('[VectorStore] Created new FAISS index')
-    def _get_encoder(self):
-        """Lazy-load the sentence encoder on first use."""
-        if self._encoder is None:
-            try:
-                from sentence_transformers import SentenceTransformer
-                self._encoder = SentenceTransformer('all-MiniLM-L6-v2')
-                print('[VectorStore] Encoder loaded: all-MiniLM-L6-v2')
-            except ImportError:
-                print('[VectorStore] sentence-transformers not installed.')
-                print('  Install: pip install sentence-transformers')
-        return self._encoder
-    def encode(self, text: str) -> np.ndarray:
-        """Encode text to a normalized embedding vector."""
-        encoder = self._get_encoder()
-        if encoder is None:
-            return None
-        vec = encoder.encode([text], normalize_embeddings=True)
-        return vec.astype(np.float32)
-    def add(
-        self,
-        conversation_id: int,
-        text:            str,
-    ):
-        """Add a conversation to the vector index."""
-        if not FAISS_AVAILABLE or self._index is None:
-            return
-        vec = self.encode(text)
-        if vec is None:
-            return
-        self._index.add(vec)
-        self._id_map.append(conversation_id)
-        self._save()
-    def search(
-        self,
-        query:   str,
-        top_k:   int = 5,
-    ) -> list[int]:
-        """Find the top_k most semantically similar conversation IDs."""
-        if not FAISS_AVAILABLE or self._index is None or self._index.ntotal == 0:
-            return []
-        vec = self.encode(query)
-        if vec is None:
-            return []
-        k = min(top_k, self._index.ntotal)
-        distances, indices = self._index.search(vec, k)
-        conv_ids = []
-        for idx in indices[0]:
-            if idx >= 0 and idx < len(self._id_map):
-                conv_ids.append(self._id_map[idx])
-        return conv_ids
-    def _save(self):
-        if self._index is not None:
-            faiss.write_index(self._index, str(self.index_path))
-            np.save(str(self.id_map_path), np.array(self._id_map, dtype=object))

model/__init__.py DELETED Viewed

@@ -1,7 +0,0 @@
-# model/__init__.py
-from .base     import SharedBase
-from .expert   import ExpertHead
-from .herald   import Herald
-from .echo     import Echo
-from .sentinel import Sentinel
-from .ensemble import ShorekeeperEnsemble

model/base.py DELETED Viewed

@@ -1,152 +0,0 @@
-# model/base.py
-# The shared transformer backbone. Used by ALL components.
-import sys, math
-import torch
-import torch.nn as nn
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-import config
-class RotaryEmbedding(nn.Module):
-    """
-    Rotary Position Embedding (RoPE).
-    Encodes position by rotating query and key vectors.
-    Better than learned absolute embeddings for long contexts.
-    Used by LLaMA, GPT-NeoX, and most modern LLMs post-2022.
-    """
-    def __init__(self, dim: int, max_seq: int = 2048):
-        super().__init__()
-        # Use float32 base precision for stable frequency generation.
-        inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
-        self.register_buffer("inv_freq", inv_freq)
-        self.max_seq = max_seq
-    def forward(self, seq_len: int, device: torch.device, dtype: torch.dtype = None):
-        inv_freq = self.inv_freq.to(device)
-        t = torch.arange(seq_len, device=device, dtype=inv_freq.dtype)
-        freqs = torch.einsum("i,j->ij", t, inv_freq)
-        emb = torch.cat([freqs, freqs], dim=-1)
-        if emb.shape[-1] != inv_freq.shape[0] * 2:
-            emb = emb[:, :inv_freq.shape[0] * 2]
-        if dtype is not None:
-            emb = emb.to(dtype)
-        return emb.cos()[None, None, :, :], emb.sin()[None, None, :, :]
-def rotate_half(x):
-    x1, x2 = x.chunk(2, dim=-1)
-    return torch.cat([-x2, x1], dim=-1)
-def apply_rope(q, k, cos, sin):
-    cos, sin = cos.to(q.dtype), sin.to(q.dtype)
-    return (q * cos + rotate_half(q) * sin), (k * cos + rotate_half(k) * sin)
-class MultiHeadAttention(nn.Module):
-    def __init__(self, n_embd: int, n_head: int, dropout: float = 0.1):
-        super().__init__()
-        assert n_embd % n_head == 0
-        self.n_head  = n_head
-        self.n_embd  = n_embd
-        self.head_dim = n_embd // n_head
-        self.qkv      = nn.Linear(n_embd, 3 * n_embd, bias=False)
-        self.proj     = nn.Linear(n_embd, n_embd, bias=False)
-        self.drop     = nn.Dropout(dropout)
-        self.rope     = RotaryEmbedding(self.head_dim)
-    def forward(self, x, mask=None):
-        B, T, C = x.shape
-        qkv = self.qkv(x).reshape(B, T, 3, self.n_head, self.head_dim)
-        q, k, v = qkv.permute(2, 0, 3, 1, 4)
-        cos, sin = self.rope(T, x.device, dtype=q.dtype)
-        q, k = apply_rope(q, k, cos, sin)
-        try:
-            from torch.nn.functional import scaled_dot_product_attention
-            out = scaled_dot_product_attention(q, k, v, attn_mask=None,
-                dropout_p=self.drop.p if self.training else 0.0, is_causal=True)
-        except Exception:
-            scale = math.sqrt(self.head_dim)
-            att   = (q @ k.transpose(-2, -1)) / scale
-            causal = torch.triu(torch.ones(T, T, device=x.device), diagonal=1).bool()
-            att.masked_fill_(causal, float("-inf"))
-            if mask is not None:
-                pad = (1 - mask[:, None, None, :].float()) * -1e4
-                att = att + pad
-            att = torch.softmax(att, dim=-1)
-            att = self.drop(att)
-            out = att @ v
-        out = out.transpose(1, 2).reshape(B, T, C)
-        return self.proj(out)
-class FeedForward(nn.Module):
-    def __init__(self, n_embd: int, dropout: float = 0.1):
-        super().__init__()
-        self.net = nn.Sequential(
-            nn.Linear(n_embd, 4 * n_embd, bias=False),
-            nn.GELU(),
-            nn.Linear(4 * n_embd, n_embd, bias=False),
-            nn.Dropout(dropout),
-        )
-    def forward(self, x): return self.net(x)
-class TransformerBlock(nn.Module):
-    def __init__(self, n_embd: int, n_head: int, dropout: float = 0.1):
-        super().__init__()
-        self.ln1  = nn.LayerNorm(n_embd)
-        self.attn = MultiHeadAttention(n_embd, n_head, dropout)
-        self.ln2  = nn.LayerNorm(n_embd)
-        self.ff   = FeedForward(n_embd, dropout)
-    def forward(self, x, mask=None):
-        x = x + self.attn(self.ln1(x), mask)  # Pre-norm + residual
-        x = x + self.ff(self.ln2(x))
-        return x
-class SharedBase(nn.Module):
-    def __init__(self, cfg=None):
-        super().__init__()
-        if cfg is None:
-            cfg = config.BASE_CONFIG
-        self.n_embd      = cfg["n_embd"]
-        self.n_positions = cfg["n_positions"]
-        self.token_embedding = nn.Embedding(cfg["vocab_size"], cfg["n_embd"])
-        self.drop        = nn.Dropout(cfg["dropout"])
-        self.blocks      = nn.ModuleList([
-            TransformerBlock(cfg["n_embd"], cfg["n_head"], cfg["dropout"])
-            for _ in range(cfg["n_layer"])
-        ])
-        self.ln_f        = nn.LayerNorm(cfg["n_embd"])
-        if config.MEMORY_OPT.get("gradient_checkpointing"):
-            self.gradient_checkpointing_enable()
-        self._init_weights()
-    def _init_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Linear):
-                nn.init.normal_(m.weight, mean=0.0, std=0.02)
-            elif isinstance(m, nn.Embedding):
-                nn.init.normal_(m.weight, mean=0.0, std=0.02)
-    def gradient_checkpointing_enable(self):
-        self._use_checkpointing = True
-    def forward(self, input_ids, attention_mask=None):
-        x = self.drop(self.token_embedding(input_ids))
-        use_ckpt = getattr(self, "_use_checkpointing", False) and self.training
-        for block in self.blocks:
-            if use_ckpt:
-                from torch.utils.checkpoint import checkpoint
-                x = checkpoint(block, x, attention_mask, use_reentrant=False)
-            else:
-                x = block(x, attention_mask)
-        return self.ln_f(x)
-    def get_param_count(self):
-        return sum(p.numel() for p in self.parameters())

model/echo.py DELETED Viewed

@@ -1,71 +0,0 @@
-# model/echo.py
-import sys, torch, torch.nn as nn
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-import config
-class Echo(nn.Module):
-    """
-    Memory retrieval and context injection module.
-    Stage 1: Retrieve relevant past exchanges from SQLite/FAISS, inject as context prefix.
-    Stage 2: Cross-attention memory gate during generation.
-    """
-    def __init__(self, cfg=None, n_embd=None):
-        super().__init__()
-        cfg = cfg or config.ECHO_CONFIG
-        n = n_embd or cfg.get("n_embd", config.BASE_CONFIG["n_embd"])
-        from model.base import TransformerBlock
-        self.memory_attn = TransformerBlock(n, cfg.get("n_head", config.BASE_CONFIG["n_head"]))
-        self.gate_proj = nn.Linear(n * 2, n, bias=False)
-        self.gate_norm = nn.LayerNorm(n)
-        self._working_memory = []  # List of (role, cpu_embedding) tuples
-        self._max_memory = cfg.get("max_memory_tokens", 512) // 64  # keep ~8 entries
-    def retrieve_context(self, query: str, db, vs) -> str:
-        """
-        Retrieve relevant past conversations and inject as context prefix.
-        Returns an enriched prompt string with memory context prepended.
-        """
-        if db is None: return query
-        # Try semantic search first (FAISS), fall back to keyword (FTS5)
-        results = []
-        if vs is not None:
-            conv_ids = vs.search(query, top_k=3)
-            if conv_ids:
-                recent = db.get_recent_conversations(n=50)
-                id_to_conv = {c["id"]: c for c in recent if "id" in c}
-                for cid in conv_ids:
-                    if cid in id_to_conv:
-                        results.append(id_to_conv[cid])
-        if not results:
-            results = db.search_conversations(query[:100], limit=3)
-        knowledge = db.search_knowledge(query[:100], limit=2)
-        if not results and not knowledge: return query
-        ctx_parts = ["[MEMORY]"]
-        for r in results[:3]:
-            u = str(r.get("user_msg", ""))[:100]
-            a = str(r.get("assistant_msg", ""))[:200]
-            if u: ctx_parts.append(f"Q: {u}\nA: {a}")
-        for k in knowledge[:2]:
-            ctx_parts.append(f"FACT: {k['key']} = {k['value']}")
-        ctx_parts.append("[SEP]")
-        ctx_parts.append(query)
-        return "\n".join(ctx_parts)
-    def update_working_memory(self, role: str, embedding: torch.Tensor):
-        """
-        Store a hidden-state embedding in the short-term working memory buffer.
-        Embeddings are moved to CPU to avoid holding GPU memory between turns.
-        """
-        self._working_memory.append((role, embedding.detach().cpu()))
-        if len(self._working_memory) > self._max_memory:
-            self._working_memory = self._working_memory[-self._max_memory:]
-    def clear_memory(self):
-        """Clear the working memory buffer (call on session reset)."""
-        self._working_memory.clear()
-    def get_memory_summary(self) -> str:
-        """Return a simple string summary of buffered memory entries."""
-        return f"[Echo] {len(self._working_memory)} entries in working memory"

model/ensemble.py DELETED Viewed

@@ -1,346 +0,0 @@
-"""
-Shorekeeper ensemble with lazy expert loading.
-"""
-import sys, torch, torch.nn as nn, torch.nn.functional as F
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-import config
-from config import EXPERT_NAMES, CHECKPOINT_DIR, DEVICE, DTYPE
-from model.base import SharedBase
-from model.herald import Herald
-from model.echo import Echo
-from model.sentinel import Sentinel
-from model.lazy_expert_loader import LazyExpertLoader
-class ShorekeeperEnsemble(nn.Module):
-    """Full assembled Shorekeeper model with lazy expert loading."""
-    def __init__(
-        self,
-        max_loaded_experts: int = 2,
-        base_cfg: dict = None,
-        expert_cfgs: dict = None,
-    ):
-        super().__init__()
-        self.base_cfg = base_cfg or config.BASE_CONFIG
-        self.expert_cfgs = expert_cfgs or config.EXPERT_CONFIGS
-        self.base = SharedBase(self.base_cfg)
-        self.expert_loader = LazyExpertLoader(
-            expert_names=EXPERT_NAMES,
-            checkpoint_dir=CHECKPOINT_DIR,
-            device=DEVICE,
-            max_loaded=max_loaded_experts,
-            dtype=DTYPE,
-            base_cfg=self.base_cfg,
-            expert_cfgs=self.expert_cfgs,
-        )
-        herald_n_embd = self.base_cfg.get("n_embd", config.BASE_CONFIG["n_embd"])
-        self.herald = Herald(n_embd=herald_n_embd)
-        self.echo = Echo(n_embd=herald_n_embd)
-        sentinel_cfg = self.expert_cfgs.get("sentinel", config.EXPERT_CONFIGS.get("sentinel", {}))
-        self.sentinel = Sentinel(cfg=sentinel_cfg, n_embd=herald_n_embd)
-        print(f"[Ensemble] Initialized with lazy expert loading (max_loaded={max_loaded_experts})")
-    # backward compatibility for code expecting model.experts[...]
-    @property
-    def experts(self):
-        class _proxy:
-            def __init__(self, loader):
-                self.loader = loader
-            def __getitem__(self, name):
-                return self.loader.get_expert(name)
-            def keys(self):
-                return self.loader.expert_names
-            def __iter__(self):
-                return iter(self.loader.expert_names)
-        return _proxy(self.expert_loader)
-    # ── Convenience alias ─────────────────────────────────────────────
-    @property
-    def shared_base(self):
-        return self.base
-    # ── Core forward with lazy loading ────────────────────────────────
-    def forward(self, input_ids, attention_mask=None,
-                return_routing=False, return_sentinel=False):
-        base_hidden = self.base(input_ids, attention_mask)
-        # Ensure experts load to the same device/dtype as base_hidden
-        self.expert_loader.set_device(base_hidden.device, dtype=base_hidden.dtype)
-        routing = self.herald(base_hidden, attention_mask)
-        expert_idx = routing["expert_indices"]
-        expert_wts = routing["expert_weights"]
-        B = input_ids.shape[0]
-        logits = None
-        experts_used = []
-        for ki in range(expert_idx.shape[1]):
-            for b in range(B):
-                name = EXPERT_NAMES[expert_idx[b, ki].item()]
-                if name not in experts_used:
-                    experts_used.append(name)
-                expert = self.expert_loader.get_expert(name)
-                out = expert(
-                    base_hidden[b:b+1],
-                    attention_mask[b:b+1] if attention_mask is not None else None,
-                )
-                if "logits" not in out:
-                    print(f"[Ensemble] Expert {name} output missing logits, skipping")
-                    continue
-                if out["logits"].dim() != 3:
-                    print(f"[Ensemble] Expert {name} returned unexpected logits shape {out['logits'].shape}, skipping")
-                    continue
-                if out["logits"].shape[-1] != self.base_cfg["vocab_size"]:
-                    print(f"[Ensemble] Expert {name} logits vocab size mismatch ({out['logits'].shape[-1]} vs {self.base_cfg['vocab_size']}), skipping")
-                    continue
-                weighted = out["logits"] * expert_wts[b, ki]
-                if logits is None:
-                    logits = torch.zeros(B, weighted.shape[1], weighted.shape[2],
-                                        device=weighted.device, dtype=weighted.dtype)
-                logits[b] = logits[b] + weighted.squeeze(0)
-        result = {
-            "logits": logits,
-            "load_balance_loss": routing["load_balance_loss"],
-            "experts_used": experts_used,
-        }
-        if return_routing:
-            result["routing"] = {
-                EXPERT_NAMES[i]: routing["router_probs"][0, i].item()
-                for i in range(len(EXPERT_NAMES))
-            }
-        if return_sentinel:
-            result["sentinel"] = self.sentinel.analyze(base_hidden, attention_mask)
-        return result
-    # ── Autoregressive generation ─────────────────────────────────────
-    @torch.no_grad()
-    def generate(
-        self,
-        idx: torch.Tensor,
-        max_new_tokens: int = 200,
-        temperature: float = 0.8,
-        top_k: int = 40,
-        top_p: float = None,
-        expert_override: str = None,
-    ) -> torch.Tensor:
-        from tokenizer.tokenizer_utils import get_tokenizer
-        try:
-            eos_id = get_tokenizer().token_to_id("[EOS]")
-        except Exception:
-            eos_id = None
-        max_ctx = self.base.n_positions
-        for _ in range(max_new_tokens):
-            idx_cond = idx if idx.size(1) <= max_ctx else idx[:, -max_ctx:]
-            result = self.forward(idx_cond)
-            logits = result["logits"][:, -1, :]
-            if temperature == 0:
-                next_id = logits.argmax(dim=-1, keepdim=True)
-            else:
-                logits = logits / max(temperature, 1e-8)
-                if top_k is not None and top_k > 0:
-                    v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
-                    logits[logits < v[:, [-1]]] = float("-inf")
-                if top_p is not None and 0.0 < top_p < 1.0:
-                    sorted_logits, sorted_idx = torch.sort(logits, descending=True)
-                    cum_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
-                    sorted_idx_to_remove = cum_probs - F.softmax(sorted_logits, dim=-1) > top_p
-                    sorted_logits[sorted_idx_to_remove] = float("-inf")
-                    logits = logits.scatter(1, sorted_idx, sorted_logits)
-                probs = F.softmax(logits, dim=-1)
-                next_id = torch.multinomial(probs, num_samples=1)
-            idx = torch.cat([idx, next_id], dim=1)
-            if eos_id is not None and next_id[0, 0].item() == eos_id:
-                break
-        return idx
-    # ── Routing inspection ────────────────────────────────────────────
-    @torch.no_grad()
-    def get_routing(self, idx: torch.Tensor):
-        base_hidden = self.base(idx)
-        routing = self.herald(base_hidden)
-        probs = routing["router_probs"][0]
-        pairs = [(EXPERT_NAMES[i], probs[i].item()) for i in range(len(EXPERT_NAMES))]
-        pairs.sort(key=lambda x: -x[1])
-        return pairs, False
-    # ── Safety scan ─────────────────────────────────────────────────
-    def scan_output(self, primary_expert: str, text: str):
-        from tokenizer.tokenizer_utils import encode_batch
-        device = next(self.parameters()).device
-        ids, mask = encode_batch([text], max_length=512)
-        ids = ids.to(device)
-        mask = mask.to(device)
-        with torch.no_grad():
-            base_hidden = self.base(ids, mask)
-        report = self.sentinel.analyze(base_hidden, mask)
-        self.sentinel.log_expert(primary_expert)
-        return report
-    # ── Persistence ─────────────────────────────────────────────────
-    def save(self, path: str):
-        Path(path).parent.mkdir(parents=True, exist_ok=True)
-        state = {
-            "base_state_dict": self.base.state_dict(),
-            "herald_state_dict": self.herald.state_dict(),
-            "echo_state_dict": self.echo.state_dict(),
-            "sentinel_state_dict": self.sentinel.state_dict(),
-        }
-        torch.save(state, path)
-        print(f"[Ensemble] Saved core components to {path}")
-    @classmethod
-    def _safe_load_state_dict(cls, module: nn.Module, state_dict: dict, name: str):
-        own = module.state_dict()
-        filtered = {}
-        for k, v in state_dict.items():
-            if k in own and own[k].shape == v.shape:
-                filtered[k] = v
-            elif k in own:
-                print(f"[Ensemble] Skipping mismatched {name} key {k}: checkpoint {v.shape}, model {own[k].shape}")
-        if filtered:
-            module.load_state_dict(filtered, strict=False)
-    @classmethod
-    def load(cls, path: str, device: str = "cpu", max_loaded_experts: int = 3) -> "ShorekeeperEnsemble":
-        ckpt = torch.load(path, map_location="cpu")
-        base_cfg = None
-        expert_cfgs = None
-        if isinstance(ckpt, dict):
-            base_cfg = ckpt.get("base_config", None)
-            expert_cfgs = ckpt.get("expert_configs", None)
-        model = cls(
-            max_loaded_experts=max_loaded_experts,
-            base_cfg=base_cfg,
-            expert_cfgs=expert_cfgs,
-        ).to(device)
-        model.expert_loader.set_device(device, dtype=DTYPE)
-        state: dict = {}
-        if isinstance(ckpt, dict):
-            if "model_state" in ckpt:
-                state = ckpt["model_state"]
-            elif "model_state_dict" in ckpt:
-                state = ckpt["model_state_dict"]
-            elif "base_state_dict" in ckpt or "herald_state_dict" in ckpt:
-                # Modern saved ensemble format
-                cls._safe_load_state_dict(model.base, ckpt.get("base_state_dict", {}), "base")
-                cls._safe_load_state_dict(model.herald, ckpt.get("herald_state_dict", {}), "herald")
-                cls._safe_load_state_dict(model.echo, ckpt.get("echo_state_dict", {}), "echo")
-                cls._safe_load_state_dict(model.sentinel, ckpt.get("sentinel_state_dict", {}), "sentinel")
-                print(f"[Ensemble] Loaded from {path}")
-                return model
-            else:
-                state = ckpt
-        else:
-            raise ValueError(f"Unsupported checkpoint type: {type(ckpt)}")
-        if not isinstance(state, dict):
-            raise ValueError("Checkpoint does not contain recognizable state dict")
-        # Load base + modules if keys present
-        base_state = {k.replace("shared_base.", ""): v for k, v in state.items() if k.startswith("shared_base.")}
-        if base_state:
-            cls._safe_load_state_dict(model.base, base_state, "base")
-        herald_state = {k.replace("herald.", ""): v for k, v in state.items() if k.startswith("herald.")}
-        if herald_state:
-            cls._safe_load_state_dict(model.herald, herald_state, "herald")
-        echo_state = {k.replace("echo.", ""): v for k, v in state.items() if k.startswith("echo.")}
-        if echo_state:
-            cls._safe_load_state_dict(model.echo, echo_state, "echo")
-        sentinel_state = {k.replace("sentinel.", ""): v for k, v in state.items() if k.startswith("sentinel.")}
-        if sentinel_state:
-            cls._safe_load_state_dict(model.sentinel, sentinel_state, "sentinel")
-        # Load expert states if available
-        expert_keys = [k for k in state.keys() if k.startswith("experts.") or k.startswith("expert.")]
-        if expert_keys:
-            for name in EXPERT_NAMES:
-                for prefix in [f"experts.{name}.", f"expert.{name}."]:
-                    expert_state = {k.replace(prefix, ""): v for k, v in state.items() if k.startswith(prefix)}
-                    if expert_state:
-                        expert = model.expert_loader.get_expert(name)
-                        cls._safe_load_state_dict(expert, expert_state, f"expert.{name}")
-                        break
-        # If no prefixed expert keys, maybe experts are at top-level names
-        for name in EXPERT_NAMES:
-            expert_state = {k.replace(f"{name}.", ""): v for k, v in state.items() if k.startswith(f"{name}.")}
-            if expert_state:
-                expert = model.expert_loader.get_expert(name)
-                cls._safe_load_state_dict(expert, expert_state, f"expert.{name}")
-        print(f"[Ensemble] Loaded from {path}")
-        return model
-    @classmethod
-    def load_from_components(cls, checkpoint_dir: str = None, device: str = "cpu") -> "ShorekeeperEnsemble":
-        model = cls().to(device)
-        model.expert_loader.set_device(device, dtype=DTYPE)
-        ckpt_root = Path(checkpoint_dir or CHECKPOINT_DIR)
-        base_ckpt = ckpt_root / "base" / "best.pt"
-        if base_ckpt.exists():
-            try:
-                ckpt = torch.load(base_ckpt, map_location="cpu")
-                state = ckpt.get("model_state_dict", ckpt)
-                base_state = {k.replace("0.", ""): v for k, v in state.items() if k.startswith("0.")}
-                if base_state:
-                    model.base.load_state_dict(base_state, strict=False)
-                else:
-                    model.base.load_state_dict(state, strict=False)
-            except Exception as e:
-                print(f"[Ensemble] Warning: failed to load base checkpoint {base_ckpt}: {e}")
-        # Experts are loaded lazily by LazyExpertLoader when first routed.
-        # Ensure checkpoint availability is set by LazyExpertLoader initialization.
-        herald_path = ckpt_root / "herald" / "best.pt"
-        if herald_path.exists():
-            try:
-                ckpt = torch.load(herald_path, map_location="cpu")
-                model.herald.load_state_dict(ckpt.get("model_state_dict", ckpt), strict=False)
-            except Exception as e:
-                print(f"[Ensemble] Warning: failed to load herald checkpoint {herald_path}: {e}")
-        sentinel_path = ckpt_root / "sentinel" / "best.pt"
-        if sentinel_path.exists():
-            try:
-                ckpt = torch.load(sentinel_path, map_location="cpu")
-                model.sentinel.load_state_dict(ckpt.get("model_state_dict", ckpt), strict=False)
-            except Exception as e:
-                print(f"[Ensemble] Warning: failed to load sentinel checkpoint {sentinel_path}: {e}")
-        return model
-    # ── Expert management ─────────────────────────────────────────────────
-    def preload_experts(self, expert_names: list):
-        self.expert_loader.preload_experts(expert_names)
-    def get_loaded_experts(self):
-        return [name for name, loaded in self.expert_loader.get_cache_status().items() if loaded]
-    def clear_expert_cache(self):
-        self.expert_loader.clear_cache()
-    def set_max_loaded_experts(self, max_loaded: int):
-        self.expert_loader.set_max_loaded(max_loaded)
-    def freeze_base(self):
-        for p in self.base.parameters(): p.requires_grad = False
-    def unfreeze_all(self):
-        for p in self.parameters(): p.requires_grad = True
-    def get_trainable_count(self):
-        return sum(p.numel() for p in self.parameters() if p.requires_grad)
-    def get_total_count(self):
-        return sum(p.numel() for p in self.parameters())

model/expert.py DELETED Viewed

@@ -1,45 +0,0 @@
-# model/expert.py
-import sys, torch, torch.nn as nn
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-import config
-from model.base import TransformerBlock
-from tokenizer.tokenizer_utils import vocab_size
-class ExpertHead(nn.Module):
-    """
-    Domain-specialized transformer head.
-    Receives SharedBase hidden states and processes them
-    through its own transformer layers to produce domain-specific logits.
-    """
-    def __init__(self, expert_name: str, expert_cfg=None, base_cfg=None):
-        super().__init__()
-        from config import EXPERT_NAMES
-        assert expert_name in EXPERT_NAMES, f"Unknown expert: {expert_name}"
-        if expert_cfg is None:
-            expert_cfg = config.EXPERT_CONFIGS[expert_name]
-        if base_cfg is None:
-            base_cfg = config.BASE_CONFIG
-        n_embd = expert_cfg["n_embd"]
-        self.input_norm = nn.LayerNorm(n_embd)
-        self.input_proj = nn.Linear(base_cfg["n_embd"], n_embd, bias=False)
-        self.blocks     = nn.ModuleList([
-            TransformerBlock(n_embd, expert_cfg["n_head"]) for _ in range(expert_cfg["n_layer"])
-        ])
-        self.ln_f    = nn.LayerNorm(n_embd)
-        self.lm_head = nn.Linear(n_embd, base_cfg["vocab_size"], bias=False)
-        nn.init.normal_(self.lm_head.weight, std=0.02)
-    def forward(self, base_hidden, attention_mask=None):
-        x = self.input_proj(self.input_norm(base_hidden))
-        for block in self.blocks:
-            x = block(x, attention_mask)
-        x = self.ln_f(x)
-        return {"logits": self.lm_head(x), "hidden": x}
-    def freeze(self):
-        for p in self.parameters(): p.requires_grad = False
-    def unfreeze(self):
-        for p in self.parameters(): p.requires_grad = True

model/herald.py DELETED Viewed

@@ -1,62 +0,0 @@
-# model/herald.py
-import sys, torch, torch.nn as nn, torch.nn.functional as F
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-import config
-from model.base import TransformerBlock
-class Herald(nn.Module):
-    """
-    Routes queries to the top-K most relevant experts.
-    Trained as a classifier: given base hidden states, predict expert indices.
-    Uses load-balance loss to prevent routing collapse (always picking 1 expert).
-    """
-    def __init__(self, cfg=None, n_embd=None):
-        super().__init__()
-        cfg = cfg or config.HERALD_CONFIG
-        if n_embd is None:
-            n_embd = cfg.get("n_embd", config.BASE_CONFIG["n_embd"])
-        self.top_k = cfg.get("top_k", 2)
-        self.n_experts = cfg.get("n_experts", len(config.EXPERT_NAMES))
-        self.query_encoder = nn.ModuleList([
-            TransformerBlock(n_embd, cfg.get("n_head", config.BASE_CONFIG["n_head"])) for _ in range(cfg.get("n_layer", 2))
-        ])
-        self.query_norm = nn.LayerNorm(n_embd)
-        self.expert_scorer = nn.Linear(n_embd, self.n_experts, bias=True)
-    def forward(self, base_hidden, attention_mask=None):
-        x = base_hidden
-        for block in self.query_encoder:
-            x = block(x, attention_mask)
-        x = self.query_norm(x)
-        # Pool: weighted mean over sequence (ignore padding)
-        if attention_mask is not None:
-            mask = attention_mask[:, :, None].float()
-            pooled = (x * mask).sum(1) / mask.sum(1).clamp(min=1)
-        else:
-            pooled = x.mean(1)
-        scores = self.expert_scorer(pooled)  # [B, n_experts]
-        probs  = F.softmax(scores, dim=-1)
-        top_probs, top_idx = probs.topk(self.top_k, dim=-1)
-        # Load balance loss: encourages uniform expert utilization
-        # From "Switch Transformers" (Fedus et al. 2022)
-        expert_frac = probs.mean(0)
-        target_frac = torch.ones_like(expert_frac) / self.n_experts
-        lb_loss = F.mse_loss(expert_frac, target_frac)
-        return {
-            "router_probs":      probs,
-            "expert_indices":    top_idx,
-            "expert_weights":    top_probs,
-            "load_balance_loss": lb_loss,
-        }
-    def get_routing_display(self, base_hidden, attention_mask=None) -> str:
-        routing = self.forward(base_hidden, attention_mask)
-        lines = []
-        probs = routing["router_probs"][0]
-        names = config.EXPERT_NAMES if hasattr(config, "EXPERT_NAMES") else []
-        for i, name in enumerate(names):
-            bar = "█" * int(probs[i].item() * 20)
-            lines.append(f"  {name:<12} {bar:<20} {probs[i]:.3f}")
-        return "\n".join(lines)

model/lazy_expert_loader.py DELETED Viewed

@@ -1,120 +0,0 @@
-"""
-Lazy expert loading system for Shorekeeper.
-Only loads experts into VRAM when they're actually routed to.
-Implements LRU caching to keep hot experts loaded while evicting cold ones.
-"""
-import torch
-import torch.nn as nn
-from pathlib import Path
-from collections import OrderedDict
-from typing import Dict
-class LazyExpertLoader(nn.Module):
-    """Manages expert models with lazy loading and LRU caching."""
-    def __init__(
-        self,
-        expert_names: list,
-        checkpoint_dir: Path,
-        device: str = "cuda",
-        max_loaded: int = 3,
-        dtype: torch.dtype = torch.bfloat16,
-        base_cfg: dict = None,
-        expert_cfgs: dict = None,
-    ):
-        super().__init__()
-        self.expert_names = expert_names
-        self.checkpoint_dir = Path(checkpoint_dir)
-        self.device = device
-        self.max_loaded = max_loaded
-        self.dtype = dtype
-        self.base_cfg = base_cfg or {}
-        self.expert_cfgs = expert_cfgs or {}
-        self.experts = nn.ModuleDict()
-        self._lru = OrderedDict()
-        self._available_checkpoints = {
-            name: self.checkpoint_dir / "experts" / name / "best.pt"
-            for name in expert_names
-            if (self.checkpoint_dir / "experts" / name / "best.pt").exists()
-        }
-        print(f"[LazyExpertLoader] Initialized with max_loaded={max_loaded}")
-        print(f"[LazyExpertLoader] Found {len(self._available_checkpoints)} expert checkpoints")
-    def get_expert(self, expert_name: str) -> nn.Module:
-        if expert_name in self.experts:
-            self._lru.move_to_end(expert_name)
-            return self.experts[expert_name]
-        expert = self._load_expert(expert_name)
-        self.experts[expert_name] = expert
-        self._lru[expert_name] = True
-        if len(self._lru) > self.max_loaded:
-            evicted_name, _ = self._lru.popitem(last=False)
-            evicted_expert = self.experts.pop(evicted_name)
-            evicted_expert.to("cpu")
-            print(f"[LazyExpertLoader] Evicted '{evicted_name}' from cache")
-        return expert
-    def _load_expert(self, expert_name: str) -> nn.Module:
-        from model.expert import ExpertHead
-        expert_cfg = self.expert_cfgs.get(expert_name, None)
-        print(f"[LazyExpertLoader] Loading expert '{expert_name}'...")
-        expert = ExpertHead(expert_name, expert_cfg=expert_cfg, base_cfg=self.base_cfg)
-        ckpt_path = self._available_checkpoints.get(expert_name,
-                                                    self.checkpoint_dir / "experts" / expert_name / "best.pt")
-        if ckpt_path.exists():
-            try:
-                ckpt = torch.load(ckpt_path, map_location="cpu")
-                state_dict = ckpt.get("model_state_dict", ckpt)
-                expert.load_state_dict(state_dict, strict=False)
-                print(f"[LazyExpertLoader] Loaded weights from {ckpt_path}")
-            except Exception as e:
-                print(f"[LazyExpertLoader] Warning: Failed to load {ckpt_path}: {e}")
-        else:
-            print(f"[LazyExpertLoader] No checkpoint found for '{expert_name}', using random init")
-        load_dtype = self.dtype
-        if self.device == "cpu" and load_dtype in [torch.float16, torch.bfloat16]:
-            load_dtype = torch.float32
-        expert = expert.to(device=self.device, dtype=load_dtype)
-        expert.eval()
-        return expert
-    def preload_experts(self, expert_names: list):
-        for name in expert_names:
-            self.get_expert(name)
-    def clear_cache(self):
-        for e in self.experts.values():
-            e.to("cpu")
-        self.experts.clear()
-        self._lru.clear()
-        print("[LazyExpertLoader] Cache cleared")
-    def get_cache_status(self) -> Dict[str, bool]:
-        return {name: name in self.experts for name in self.expert_names}
-    def set_device(self, device: str, dtype: torch.dtype = None):
-        self.device = device
-        if dtype is not None:
-            self.dtype = dtype
-        if device == "cpu" and self.dtype == torch.float16:
-            self.dtype = torch.float32
-        for expert in self.experts.values():
-            expert.to(device=device, dtype=self.dtype)
-    def set_max_loaded(self, max_loaded: int):
-        self.max_loaded = max_loaded
-        while len(self._lru) > self.max_loaded:
-            evicted_name, _ = self._lru.popitem(last=False)
-            evicted_expert = self.experts.pop(evicted_name)
-            evicted_expert.to("cpu")
-            print(f"[LazyExpertLoader] Evicted '{evicted_name}' (cache resize)")

model/sentinel.py DELETED Viewed

@@ -1,90 +0,0 @@
-# model/sentinel.py
-import sys, torch, torch.nn as nn
-from dataclasses import dataclass
-from pathlib import Path
-import time
-sys.path.insert(0, str(Path(__file__).parent.parent))
-import config
-from model.base import TransformerBlock
-@dataclass
-class SentinelReport:
-    verdict:             str    # "CLEAN", "FLAG", or "BLOCK"
-    drift_score:         float  # 0-1: probability of behavioral drift
-    refusal_score:       float  # 0-1: probability of inappropriate refusal
-    hallucination_score: float  # 0-1: probability of hallucination
-    overall_risk:        float  # Combined risk score
-    @property
-    def is_clean(self) -> bool:
-        return self.verdict == "CLEAN"
-    @property
-    def protocol(self) -> str:
-        return self.verdict.lower()
-    @property
-    def total(self) -> float:
-        return self.overall_risk
-class Sentinel(nn.Module):
-    """Monitors outputs for behavioral drift in real time."""
-    def __init__(self, cfg=None, n_embd=None):
-        super().__init__()
-        cfg = cfg or config.EXPERT_CONFIGS.get("sentinel", {})
-        n = n_embd or cfg.get("n_embd", config.BASE_CONFIG["n_embd"])
-        hidden = config.SENTINEL_CONFIG["hidden_dim"]
-        self.encoder_blocks = nn.ModuleList([
-            TransformerBlock(n, cfg.get("n_head", config.BASE_CONFIG["n_head"])) for _ in range(cfg.get("n_layer", 4))
-        ])
-        self.encoder_norm = nn.LayerNorm(n)
-        self.pool         = nn.AdaptiveAvgPool1d(1)
-        self.proj         = nn.Sequential(nn.Linear(n, hidden), nn.ReLU(), nn.Dropout(0.1))
-        self.drift_head   = nn.Linear(hidden, 1)
-        self.refusal_head = nn.Linear(hidden, 1)
-        self.halluc_head  = nn.Linear(hidden, 1)
-        self._incidents   = []  # Session incident log
-    def forward(self, base_hidden, attention_mask=None):
-        return self.analyze(base_hidden, attention_mask)
-    @torch.no_grad()
-    def analyze(self, base_hidden, attention_mask=None) -> SentinelReport:
-        x = base_hidden
-        for block in self.encoder_blocks:
-            x = block(x, attention_mask)
-        x = self.encoder_norm(x)
-        pooled = self.pool(x.transpose(1, 2)).squeeze(-1)
-        feats  = self.proj(pooled)
-        drift  = self.drift_head(feats).squeeze(-1).sigmoid().mean().item()
-        refus  = self.refusal_head(feats).squeeze(-1).sigmoid().mean().item()
-        halluc = self.halluc_head(feats).squeeze(-1).sigmoid().mean().item()
-        risk   = max(drift, refus * 0.8, halluc * 0.6)
-        cfg    = SENTINEL_CONFIG
-        if risk >= cfg["block_threshold"]:   verdict = "BLOCK"
-        elif risk >= cfg["flag_threshold"]:  verdict = "FLAG"
-        else:                                verdict = "CLEAN"
-        report = SentinelReport(verdict, drift, refus, halluc, risk)
-        if not report.is_clean:
-            self._incidents.append({
-                "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                "expert":    "unknown",
-                "protocol":  report.protocol,
-                "score":     risk,
-            })
-        return report
-    def reset_session(self):
-        """Clear the session incident log."""
-        self._incidents.clear()
-    def get_incidents(self) -> list:
-        """Return all incidents logged this session."""
-        return list(self._incidents)
-    def log_expert(self, expert_name: str):
-        """Tag the most recent incident with the expert name."""
-        if self._incidents:
-            self._incidents[-1]["expert"] = expert_name

requirements.txt CHANGED Viewed

@@ -1,4 +1,14 @@
-torch>=2.0.0
-tokenizers>=0.15.0
-psutil
-tqdm

+211torch>=2.5.0
+transformers>=4.53.0
+accelerate>=1.0.0
+sentencepiece>=0.2.0
+bitsandbytes>=0.45.0
+datasets>=3.0.0
+math-verify>=0.5.2
+chromadb>=0.5.0
+sentence-transformers>=3.0.0
+docker>=7.1.0
+trl>=0.20.0
+wandb>=0.19.0
+pyyaml>=6.0
+tqdm>=4.66.0

scripts/01_download_15b_data.py ADDED Viewed

	@@ -0,0 +1,112 @@

+#!/usr/bin/env python3
+"""
+Download MASSIVE datasets for 15B training
+200B+ tokens from verified STEM sources
+"""
+import json
+from pathlib import Path
+from datasets import load_dataset
+def download_15b_data():
+    print("=" * 80)
+    print("DOWNLOADING 200B+ TOKENS FOR 15B MODEL")
+    print("=" * 80)
+    data_dir = Path("./data/15b_data")
+    data_dir.mkdir(parents=True, exist_ok=True)
+    all_data = []
+    total_tokens = 0
+    # 1. The Pile - 800GB, 300B tokens (take 50B)
+    print("\n1. The Pile (300B tokens - taking 50B)...")
+    print("   This will take 1-2 days...")
+    try:
+        pile = load_dataset("EleutherAI/pile", split="train[:5000000]")
+        for item in pile:
+            text = item.get("text", "")
+            if text and len(text) > 200:
+                all_data.append({
+                    "text": text,
+                    "source": "pile"
+                })
+        print(f"   ✓ Added {len(pile):,} examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # 2. Proof-Pile-2 - 50B tokens of math/CS papers
+    print("\n2. Proof-Pile-2 (50B tokens - taking 20B)...")
+    try:
+        proof = load_dataset("EleutherAI/proof-pile-2", split="train[:2000000]")
+        for item in proof:
+            text = item.get("text", "")
+            if text and len(text) > 200:
+                all_data.append({
+                    "text": text,
+                    "source": "proofpile"
+                })
+        print(f"   ✓ Added {len(proof):,} examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # 3. StarCoder - 100B tokens of code
+    print("\n3. StarCoder (100B tokens - taking 30B)...")
+    try:
+        code = load_dataset("bigcode/starcoderdata", split="train[:3000000]")
+        for item in code:
+            content = item.get("content", "")
+            if content and len(content) > 100:
+                all_data.append({
+                    "text": content,
+                    "source": "starcoder"
+                })
+        print(f"   ✓ Added {len(code):,} examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # 4. C4 - 156GB, 150B tokens (take 30B)
+    print("\n4. C4 (150B tokens - taking 30B)...")
+    try:
+        c4 = load_dataset("c4", "en", split="train[:3000000]")
+        for item in c4:
+            text = item.get("text", "")
+            if text and len(text) > 200:
+                all_data.append({
+                    "text": text,
+                    "source": "c4"
+                })
+        print(f"   ✓ Added {len(c4):,} examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # 5. OpenWebMath - 14B tokens of math
+    print("\n5. OpenWebMath (14B tokens - taking all)...")
+    try:
+        math = load_dataset("open-web-math/open-web-math", split="train")
+        for item in math:
+            text = item.get("text", "")
+            if text and len(text) > 200:
+                all_data.append({
+                    "text": text,
+                    "source": "openwebmath"
+                })
+        print(f"   ✓ Added {len(math):,} examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    print("\n" + "=" * 80)
+    print(f"TOTAL EXAMPLES: {len(all_data):,}")
+    print(f"ESTIMATED TOKENS: {len(all_data) * 500:,}")
+    print("=" * 80)
+    # Save
+    print("\nSaving to disk...")
+    with open(data_dir / "15b_train.jsonl", "w") as f:
+        for item in all_data:
+            f.write(json.dumps(item) + "\n")
+    print(f"✓ Saved to: {data_dir}/15b_train.jsonl")
+if __name__ == "__main__":
+    download_15b_data()

scripts/01_download_7b_150gb.py ADDED Viewed

	@@ -0,0 +1,272 @@

+#!/usr/bin/env python3
+"""
+150GB Curated STEM Dataset for 7B Model Training
+Enough for a high-quality 7B model from scratch
+Total: ~150GB compressed, ~500GB uncompressed
+"""
+import json
+from pathlib import Path
+from datasets import load_dataset
+import time
+def download_7b_dataset():
+    print("=" * 80)
+    print("DOWNLOADING 150GB STEM DATASET FOR 7B MODEL")
+    print("=" * 80)
+    print("\n⚠️ This will download ~150GB of data")
+    print("   Estimated time: 4-8 hours depending on connection")
+    print("   Disk space needed: ~500GB after decompression")
+    print("\nPress Ctrl+C to cancel, or wait 5 seconds to continue...")
+    time.sleep(5)
+    data_dir = Path("./data/7b_150gb")
+    data_dir.mkdir(parents=True, exist_ok=True)
+    all_data = []
+    total_examples = 0
+    # ============================================================
+    # DATASET 1: The Pile - 50GB (largest single source)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 1: The Pile (50GB - General text)")
+    print("=" * 80)
+    try:
+        pile = load_dataset("EleutherAI/pile", split="train[:2000000]")
+        for item in pile:
+            text = item.get("text", "")
+            if text and len(text) > 500:
+                all_data.append({
+                    "text": text[:2048],
+                    "source": "pile"
+                })
+        print(f"   ✓ Added {len(pile):,} examples")
+        total_examples += len(pile)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # DATASET 2: StarCoder - 30GB (Code)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 2: StarCoder (30GB - Code)")
+    print("=" * 80)
+    try:
+        code = load_dataset("bigcode/starcoderdata", split="train[:1500000]")
+        for item in code:
+            content = item.get("content", "")
+            if content and len(content) > 200:
+                all_data.append({
+                    "text": content[:2048],
+                    "source": "starcoder"
+                })
+        print(f"   ✓ Added {len(code):,} examples")
+        total_examples += len(code)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # DATASET 3: C4 - 25GB (Clean web text)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 3: C4 (25GB - Clean web text)")
+    print("=" * 80)
+    try:
+        c4 = load_dataset("c4", "en", split="train[:1500000]")
+        for item in c4:
+            text = item.get("text", "")
+            if text and len(text) > 300:
+                all_data.append({
+                    "text": text[:2048],
+                    "source": "c4"
+                })
+        print(f"   ✓ Added {len(c4):,} examples")
+        total_examples += len(c4)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # DATASET 4: Proof-Pile-2 - 20GB (Math/CS papers)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 4: Proof-Pile-2 (20GB - Math/CS papers)")
+    print("=" * 80)
+    try:
+        proof = load_dataset("EleutherAI/proof-pile-2", split="train[:1000000]")
+        for item in proof:
+            text = item.get("text", "")
+            if text and len(text) > 500:
+                all_data.append({
+                    "text": text[:2048],
+                    "source": "proofpile"
+                })
+        print(f"   ✓ Added {len(proof):,} examples")
+        total_examples += len(proof)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # DATASET 5: OpenWebMath - 10GB (Math web pages)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 5: OpenWebMath (10GB - Math web pages)")
+    print("=" * 80)
+    try:
+        math = load_dataset("open-web-math/open-web-math", split="train[:500000]")
+        for item in math:
+            text = item.get("text", "")
+            if text and len(text) > 300:
+                all_data.append({
+                    "text": text[:2048],
+                    "source": "openwebmath"
+                })
+        print(f"   ✓ Added {len(math):,} examples")
+        total_examples += len(math)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # DATASET 6: MetaMathQA - 2.5GB (Math problems)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 6: MetaMathQA (2.5GB - Math problems)")
+    print("=" * 80)
+    try:
+        metamath = load_dataset("meta-math/MetaMathQA", split="train")
+        for item in metamath:
+            text = f"Question: {item.get('query', '')}\nAnswer: {item.get('response', '')}"
+            all_data.append({
+                "text": text,
+                "source": "metamath"
+            })
+        print(f"   ✓ Added {len(metamath):,} examples")
+        total_examples += len(metamath)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # DATASET 7: CodeFeedback - 2GB (Code instructions)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 7: CodeFeedback (2GB - Code instructions)")
+    print("=" * 80)
+    try:
+        codefb = load_dataset("m-a-p/CodeFeedback", split="train[:150000]")
+        for item in codefb:
+            text = f"Instruction: {item.get('instruction', '')}\nCode: {item.get('output', '')}"
+            if len(text) > 100:
+                all_data.append({
+                    "text": text[:2048],
+                    "source": "codefeedback"
+                })
+        print(f"   ✓ Added {len(codefb):,} examples")
+        total_examples += len(codefb)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # DATASET 8: OpenMathInstruct-2 - 2GB (Math problems)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 8: OpenMathInstruct-2 (2GB - Math problems)")
+    print("=" * 80)
+    try:
+        openmath = load_dataset("nvidia/OpenMathInstruct-2", split="train[:150000]")
+        for item in openmath:
+            text = f"Problem: {item.get('question', '')}\nSolution: {item.get('generated_solution', '')}"
+            all_data.append({
+                "text": text[:2048],
+                "source": "openmath"
+            })
+        print(f"   ✓ Added {len(openmath):,} examples")
+        total_examples += len(openmath)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # DATASET 9: NuminaMath-CoT - 2GB (Math reasoning)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 9: NuminaMath-CoT (2GB - Math reasoning)")
+    print("=" * 80)
+    try:
+        numina = load_dataset("AI-MO/NuminaMath-CoT", split="train[:100000]")
+        for item in numina:
+            text = f"Problem: {item.get('problem', '')}\nSolution: {item.get('solution', '')}"
+            all_data.append({
+                "text": text[:2048],
+                "source": "numinamath"
+            })
+        print(f"   ✓ Added {len(numina):,} examples")
+        total_examples += len(numina)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # DATASET 10: ScienceQA - 0.5GB (Science questions)
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("DATASET 10: ScienceQA (0.5GB - Science questions)")
+    print("=" * 80)
+    try:
+        science = load_dataset("derek-thomas/ScienceQA", split="train")
+        for item in science:
+            text = f"Question: {item.get('question', '')}\nAnswer: {item.get('answer', '')}"
+            all_data.append({
+                "text": text[:2048],
+                "source": "scienceqa"
+            })
+        print(f"   ✓ Added {len(science):,} examples")
+        total_examples += len(science)
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # ============================================================
+    # SAVE DATASET
+    # ============================================================
+    print("\n" + "=" * 80)
+    print("SAVING DATASET")
+    print("=" * 80)
+    print(f"Total examples collected: {total_examples:,}")
+    print(f"Estimated size: ~150GB compressed, ~500GB uncompressed")
+    # Shuffle
+    import random
+    random.shuffle(all_data)
+    # Save as JSONL
+    output_path = data_dir / "7b_train.jsonl"
+    with open(output_path, "w") as f:
+        for item in all_data:
+            f.write(json.dumps(item) + "\n")
+    print(f"\n✓ Saved to: {output_path}")
+    print(f"  File size: {output_path.stat().st_size / 1e9:.1f} GB")
+    # Save metadata
+    metadata = {
+        "total_examples": total_examples,
+        "sources": {}
+    }
+    for item in all_data:
+        src = item['source']
+        metadata['sources'][src] = metadata['sources'].get(src, 0) + 1
+    with open(data_dir / "metadata.json", "w") as f:
+        json.dump(metadata, f, indent=2)
+    print("\n" + "=" * 80)
+    print("DATASET BREAKDOWN")
+    print("=" * 80)
+    for src, count in metadata['sources'].items():
+        print(f"  {src}: {count:,} examples")
+    print("\n" + "=" * 80)
+    print("✅ DOWNLOAD COMPLETE!")
+    print("=" * 80)
+    print("\nNext step: python3 scripts/04_train_universal.py")
+if __name__ == "__main__":
+    download_7b_dataset()

scripts/01_download_stem_data.py ADDED Viewed

	@@ -0,0 +1,144 @@

+#!/usr/bin/env python3
+"""
+Download high-quality STEM datasets for SHOREKEEPER
+Math, Code, Science - No random web text
+"""
+import json
+from pathlib import Path
+from datasets import load_dataset
+def download_stem_data():
+    print("=" * 70)
+    print("DOWNLOADING STEM DATASETS")
+    print("=" * 70)
+    data_dir = Path("./data/stem")
+    data_dir.mkdir(parents=True, exist_ok=True)
+    all_data = []
+    # 1. MetaMathQA - 395k math problems with step-by-step reasoning
+    print("\n1. MetaMathQA (395k math problems)...")
+    try:
+        dataset = load_dataset("meta-math/MetaMathQA", split="train")
+        print(f"   Loading {len(dataset)} examples...")
+        for item in dataset:
+            all_data.append({
+                "prompt": item.get("query", ""),
+                "response": f"|special_token| {item.get('response', '')} |special_token|",
+                "source": "metamath"
+            })
+        print(f"   ✓ Added {len(dataset)} math examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # 2. CodeFeedback - 1.2M code instructions
+    print("\n2. CodeFeedback (1.2M code examples - taking 200k)...")
+    try:
+        dataset = load_dataset("m-a-p/CodeFeedback", split="train[:200000]")
+        print(f"   Loading {len(dataset)} examples...")
+        for item in dataset:
+            instruction = item.get("instruction", "")
+            output = item.get("output", "")
+            if instruction and output:
+                all_data.append({
+                    "prompt": instruction,
+                    "response": f"|special_token| Here's the code:\n{output} |special_token|",
+                    "source": "codefeedback"
+                })
+        print(f"   ✓ Added {len(dataset)} code examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # 3. NuminaMath-CoT - 860k math problems
+    print("\n3. NuminaMath-CoT (860k math problems - taking 200k)...")
+    try:
+        dataset = load_dataset("AI-MO/NuminaMath-CoT", split="train[:200000]")
+        print(f"   Loading {len(dataset)} examples...")
+        for item in dataset:
+            problem = item.get("problem", "")
+            solution = item.get("solution", "")
+            if problem and solution:
+                all_data.append({
+                    "prompt": problem,
+                    "response": f"|special_token| Let me solve this step by step.\n{solution} |special_token|",
+                    "source": "numinamath"
+                })
+        print(f"   ✓ Added {len(dataset)} math examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # 4. ScienceQA - 21k science questions
+    print("\n4. ScienceQA (21k science questions)...")
+    try:
+        dataset = load_dataset("derek-thomas/ScienceQA", split="train")
+        print(f"   Loading {len(dataset)} examples...")
+        for item in dataset:
+            question = item.get("question", "")
+            answer = item.get("answer", "")
+            if question and answer:
+                all_data.append({
+                    "prompt": question,
+                    "response": f"|special_token| Science explanation:\n{answer} |special_token|",
+                    "source": "scienceqa"
+                })
+        print(f"   ✓ Added {len(dataset)} science examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    # 5. GSM8K - 8.5k grade school math
+    print("\n5. GSM8K (8.5k grade school math)...")
+    try:
+        dataset = load_dataset("gsm8k", "main", split="train")
+        print(f"   Loading {len(dataset)} examples...")
+        for item in dataset:
+            question = item.get("question", "")
+            answer = item.get("answer", "").split("####")[-1].strip()
+            if question and answer:
+                all_data.append({
+                    "prompt": question,
+                    "response": f"|special_token| {answer} |special_token|",
+                    "source": "gsm8k"
+                })
+        print(f"   ✓ Added {len(dataset)} math examples")
+    except Exception as e:
+        print(f"   ✗ Failed: {e}")
+    print("\n" + "=" * 70)
+    print(f"TOTAL STEM EXAMPLES: {len(all_data):,}")
+    print("=" * 70)
+    # Show breakdown
+    sources = {}
+    for item in all_data:
+        src = item['source']
+        sources[src] = sources.get(src, 0) + 1
+    print("\nBreakdown by source:")
+    for src, count in sources.items():
+        print(f"  {src}: {count:,}")
+    # Save
+    print("\nSaving to disk...")
+    with open(data_dir / "stem_train.jsonl", "w") as f:
+        for item in all_data:
+            f.write(json.dumps(item) + "\n")
+    print(f"✓ Saved to: {data_dir}/stem_train.jsonl")
+    print(f"  Total size: {len(all_data):,} examples")
+    # Also create validation split
+    split_idx = int(len(all_data) * 0.95)
+    train = all_data[:split_idx]
+    val = all_data[split_idx:]
+    with open(data_dir / "stem_val.jsonl", "w") as f:
+        for item in val:
+            f.write(json.dumps(item) + "\n")
+    print(f"  Train: {len(train):,}")
+    print(f"  Val: {len(val):,}")
+if __name__ == "__main__":
+    download_stem_data()

scripts/04_train.py ADDED Viewed

	@@ -0,0 +1,310 @@

+#!/usr/bin/env python3
+"""
+SHOREKEEPER-4B Training Pipeline
+Runs on any CUDA device (RTX 3060, H100, etc.)
+"""
+import sys
+import json
+import torch
+import torch.nn as nn
+from pathlib import Path
+from tqdm import tqdm
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from src.shorekeeper import MemoryEfficientSHOREKEEPER
+from transformers import AutoTokenizer
+class SHOREKEEPERTrainer:
+    """Simple training loop for SHOREKEEPER"""
+    def __init__(self, model, tokenizer, config):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.device = next(model.parameters()).device
+        self.learning_rate = config.get('learning_rate', 1e-4)
+        self.epochs = config.get('epochs', 3)
+        self.batch_size = config.get('batch_size', 2)
+        self.gradient_accumulation = config.get('gradient_accumulation', 4)
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(),
+            lr=self.learning_rate,
+            weight_decay=0.01
+        )
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+            self.optimizer,
+            T_max=1000,
+            eta_min=1e-6
+        )
+        self.step = 0
+    def train_step(self, batch):
+        """Single training step"""
+        self.model.train()
+        # Prepare batch
+        texts = batch['text']
+        # Tokenize
+        inputs = self.tokenizer(
+            texts,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            max_length=512
+        )
+        input_ids = inputs['input_ids'].to(self.device)
+        # Forward pass
+        logits = self.model(input_ids)
+        # Calculate loss (next token prediction)
+        shift_logits = logits[..., :-1, :].contiguous()
+        shift_labels = input_ids[..., 1:].contiguous()
+        # Cross entropy loss - ignore padding tokens
+        loss = nn.functional.cross_entropy(
+            shift_logits.view(-1, shift_logits.size(-1)),
+            shift_labels.view(-1),
+            ignore_index=self.tokenizer.pad_token_id if self.tokenizer.pad_token_id else -100
+        )
+        # Backward
+        loss.backward()
+        # Gradient accumulation
+        if (self.step + 1) % self.gradient_accumulation == 0:
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+            self.optimizer.step()
+            self.scheduler.step()
+            self.optimizer.zero_grad()
+        self.step += 1
+        return loss.item()
+    def train(self, dataset, output_dir="./outputs"):
+        """Full training loop"""
+        print(f"\n{'='*60}")
+        print("Starting Training")
+        print(f"{'='*60}")
+        print(f"Device: {self.device}")
+        print(f"Training samples: {len(dataset)}")
+        print(f"Batch size: {self.batch_size}")
+        print(f"Learning rate: {self.learning_rate}")
+        print(f"Epochs: {self.epochs}")
+        print(f"{'='*60}\n")
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        for epoch in range(self.epochs):
+            print(f"\nEpoch {epoch + 1}/{self.epochs}")
+            print("-" * 40)
+            total_loss = 0
+            steps = 0
+            # Create progress bar
+            pbar = tqdm(dataset, desc=f"Training")
+            for i, item in enumerate(pbar):
+                # Format training text
+                prompt = item.get('prompt', '')
+                response = item.get('response', '')
+                if not prompt or not response:
+                    continue
+                # Create training text (prompt + response)
+                text = f"{prompt}\n{response}"
+                batch = {'text': [text]}
+                try:
+                    loss = self.train_step(batch)
+                    total_loss += loss
+                    steps += 1
+                    # Update progress bar
+                    pbar.set_postfix({'loss': f'{loss:.4f}'})
+                    # Save checkpoint every 100 steps
+                    if steps % 100 == 0:
+                        checkpoint_path = output_dir / f"checkpoint_step_{steps}.pt"
+                        torch.save({
+                            'step': steps,
+                            'model_state': self.model.state_dict(),
+                            'optimizer_state': self.optimizer.state_dict(),
+                            'loss': loss
+                        }, checkpoint_path)
+                        print(f"\n  Saved checkpoint: {checkpoint_path}")
+                except Exception as e:
+                    # Don't print every error to avoid spam
+                    if steps < 5:
+                        print(f"\n  Error on step {steps}: {e}")
+                    continue
+            avg_loss = total_loss / steps if steps > 0 else 0
+            print(f"\nEpoch {epoch + 1} complete: Avg Loss = {avg_loss:.4f}")
+            # Save epoch checkpoint
+            epoch_path = output_dir / f"epoch_{epoch + 1}.pt"
+            torch.save({
+                'epoch': epoch + 1,
+                'model_state': self.model.state_dict(),
+                'optimizer_state': self.optimizer.state_dict(),
+                'avg_loss': avg_loss
+            }, epoch_path)
+            print(f"Saved epoch checkpoint: {epoch_path}")
+        # Save final model
+        final_path = output_dir / "shorekeeper-4b-final.pt"
+        torch.save(self.model.state_dict(), final_path)
+        print(f"\n{'='*60}")
+        print(f"✅ Training complete! Final model saved to: {final_path}")
+        print(f"{'='*60}")
+        return self.model
+def load_data(data_path, limit=None):
+    """Load training data from JSONL file"""
+    data = []
+    data_path = Path(data_path)
+    if not data_path.exists():
+        print(f"Data file not found: {data_path}")
+        return data
+    with open(data_path, 'r') as f:
+        for i, line in enumerate(f):
+            if limit and i >= limit:
+                break
+            try:
+                item = json.loads(line)
+                data.append(item)
+            except:
+                continue
+    print(f"Loaded {len(data)} examples from {data_path}")
+    return data
+def main():
+    print("=" * 60)
+    print("SHOREKEEPER-4B Training Pipeline")
+    print("=" * 60)
+    # Check device
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        print(f"\n✓ CUDA available: {torch.cuda.get_device_name(0)}")
+        print(f"  Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
+    else:
+        device = torch.device("cpu")
+        print("\n⚠ No GPU detected, using CPU (will be slow)")
+    # Load model
+    print("\n1. Loading SHOREKEEPER model...")
+    model = MemoryEfficientSHOREKEEPER(use_4bit=False)  # Use full precision for training
+    model = model.to(device)
+    print(f"   Model loaded on {device}")
+    # Load tokenizer
+    print("\n2. Loading tokenizer...")
+    try:
+        tokenizer = AutoTokenizer.from_pretrained("gpt2")
+        tokenizer.pad_token = tokenizer.eos_token
+        print("   ✓ Using GPT-2 tokenizer")
+    except:
+        print("   ⚠ Could not load GPT-2 tokenizer")
+        return
+    # Load training data
+    print("\n3. Loading training data...")
+    data_path = Path("./data/processed/train.jsonl")
+    if not data_path.exists():
+        print(f"\n❌ No training data found at {data_path}")
+        print("   Run: python3 scripts/01_download_data.py")
+        print("   Then: python3 scripts/02_prepare_data.py")
+        return
+    print("\n   Training options:")
+    print("   [1] Quick test (50 examples, 1 epoch) - ~2 minutes")
+    print("   [2] Small training (200 examples, 3 epochs) - ~10 minutes")
+    print("   [3] Medium training (500 examples, 5 epochs) - ~30 minutes")
+    print("   [4] Full training (all data, 10 epochs) - several hours")
+    choice = input("\nChoose option (1/2/3/4): ").strip()
+    if choice == "1":
+        limit = 50
+        epochs = 1
+        learning_rate = 1e-4
+    elif choice == "2":
+        limit = 200
+        epochs = 3
+        learning_rate = 5e-5
+    elif choice == "3":
+        limit = 500
+        epochs = 5
+        learning_rate = 3e-5
+    else:
+        limit = None
+        epochs = 10
+        learning_rate = 1e-5
+    # Load data
+    data = load_data(data_path, limit=limit)
+    if not data:
+        print("\n❌ No training data available!")
+        return
+    print(f"\n   Training with {len(data)} examples, {epochs} epochs")
+    print(f"   Learning rate: {learning_rate}")
+    # Training config
+    config = {
+        'learning_rate': learning_rate,
+        'epochs': epochs,
+        'batch_size': 2,
+        'gradient_accumulation': 4
+    }
+    # Create trainer
+    print("\n4. Initializing trainer...")
+    trainer = SHOREKEEPERTrainer(model, tokenizer, config)
+    # Start training
+    print("\n5. Starting training...")
+    print("   Press Ctrl+C to stop early\n")
+    try:
+        trained_model = trainer.train(data, output_dir="./outputs")
+    except KeyboardInterrupt:
+        print("\n\n⚠ Training interrupted by user")
+        print("Saving current model...")
+        torch.save(model.state_dict(), "./outputs/shorekeeper-interrupted.pt")
+        print("Model saved to: ./outputs/shorekeeper-interrupted.pt")
+    except Exception as e:
+        print(f"\n❌ Training failed: {e}")
+        import traceback
+        traceback.print_exc()
+    print("\n" + "=" * 60)
+    print("Next steps:")
+    print("  1. Run GRPO training: python3 scripts/05_grpo_train.py")
+    print("  2. Convert to 4-bit: python3 scripts/06_convert_to_4bit.py")
+    print("  3. Run SHOREKEEPER: python3 scripts/07_run_shorekeeper.py")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()

scripts/04_train_5090_optimized.py ADDED Viewed

	@@ -0,0 +1,146 @@

+#!/usr/bin/env python3
+"""
+Optimized training for RTX 5090 with 129GB RAM
+Larger batch sizes = faster training!
+"""
+import sys
+import json
+import torch
+import torch.nn as nn
+from pathlib import Path
+from tqdm import tqdm
+import random
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from src.shorekeeper import SHOREKEEPER
+from transformers import AutoTokenizer
+def main():
+    print("=" * 80)
+    print("SHOREKEEPER TRAINING - OPTIMIZED FOR 129GB RAM")
+    print("=" * 80)
+    device = torch.device("cuda")
+    # With 129GB RAM, we can use larger batch sizes!
+    batch_size = 8  # Double from 4
+    gradient_accumulation = 4  # Half from 8
+    effective_batch = batch_size * gradient_accumulation  # 32 (same effective)
+    print(f"\nGPU: {torch.cuda.get_device_name(0)}")
+    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
+    print(f"System RAM: {psutil.virtual_memory().total / 1e9:.1f} GB")
+    print(f"Batch size: {batch_size}")
+    print(f"Gradient accumulation: {gradient_accumulation}")
+    print(f"Effective batch size: {effective_batch}")
+    # Load model
+    print("\n1. Loading SHOREKEEPER model...")
+    model = SHOREKEEPER()
+    model = model.to(device)
+    params = sum(p.numel() for p in model.parameters())
+    print(f"   Parameters: {params:,} ({params/1e9:.1f}B)")
+    # Load tokenizer
+    print("\n2. Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.model_max_length = 1024
+    # Load data
+    print("\n3. Loading training data...")
+    data_path = Path("./data/7b_150gb/7b_train.jsonl")
+    if not data_path.exists():
+        print("   ❌ No data found! Run download script first.")
+        return
+    data = []
+    with open(data_path, 'r') as f:
+        for line in f:
+            data.append(json.loads(line))
+    print(f"   Loaded {len(data):,} examples")
+    # Optimizer
+    optimizer = torch.optim.AdamW(
+        model.parameters(),
+        lr=3e-4,
+        weight_decay=0.1,
+        betas=(0.9, 0.95)
+    )
+    scaler = torch.amp.GradScaler('cuda')
+    print("\n4. Starting training...")
+    print("   Training will take 1-2 weeks")
+    epochs = 3
+    for epoch in range(epochs):
+        print(f"\nEpoch {epoch + 1}/{epochs}")
+        random.shuffle(data)
+        total_loss = 0
+        steps = 0
+        optimizer.zero_grad()
+        pbar = tqdm(data, desc=f"Training")
+        for i, item in enumerate(pbar):
+            text = item.get('text', '')
+            if not text or len(text) < 50:
+                continue
+            inputs = tokenizer(
+                text[:2048],
+                return_tensors="pt",
+                truncation=True,
+                max_length=1024,
+                padding="max_length"
+            )
+            input_ids = inputs['input_ids'].to(device)
+            with torch.autocast(device_type='cuda', dtype=torch.bfloat16):
+                logits = model(input_ids)
+                shift_logits = logits[..., :-1, :].contiguous()
+                shift_labels = input_ids[..., 1:].contiguous()
+                loss = nn.functional.cross_entropy(
+                    shift_logits.view(-1, shift_logits.size(-1)),
+                    shift_labels.view(-1),
+                    ignore_index=tokenizer.pad_token_id
+                )
+            scaler.scale(loss).backward()
+            total_loss += loss.item()
+            steps += 1
+            if (i + 1) % gradient_accumulation == 0:
+                scaler.unscale_(optimizer)
+                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
+                scaler.step(optimizer)
+                scaler.update()
+                optimizer.zero_grad()
+            pbar.set_postfix({
+                'loss': f'{loss.item():.4f}',
+                'avg': f'{total_loss/steps:.4f}'
+            })
+            if steps % 5000 == 0:
+                torch.save(model.state_dict(), f"./outputs/checkpoint_step_{steps}.pt")
+                print(f"\n  💾 Checkpoint saved")
+        avg_loss = total_loss / steps
+        print(f"\nEpoch {epoch + 1} complete: Avg Loss = {avg_loss:.4f}")
+        torch.save(model.state_dict(), f"./outputs/epoch_{epoch+1}.pt")
+    torch.save(model.state_dict(), "./outputs/shorekeeper_7b_final.pt")
+    print("\n✅ Training complete!")
+if __name__ == "__main__":
+    import psutil
+    main()

scripts/04_train_stem.py ADDED Viewed

	@@ -0,0 +1,134 @@

+#!/usr/bin/env python3
+"""
+Clean SHOREKEEPER training on STEM data only
+"""
+import sys
+import json
+import torch
+import torch.nn as nn
+from pathlib import Path
+from tqdm import tqdm
+import random
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from src.shorekeeper import SHOREKEEPER
+from transformers import AutoTokenizer
+def main():
+    print("=" * 70)
+    print("SHOREKEEPER - STEM TRAINING")
+    print("=" * 70)
+    # Check device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"\nDevice: {device}")
+    # Load model (fresh from scratch)
+    print("\n1. Loading SHOREKEEPER model...")
+    model = SHOREKEEPER()
+    model = model.to(device)
+    print(f"   Parameters: {sum(p.numel() for p in model.parameters()):,}")
+    # Load tokenizer
+    print("\n2. Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    tokenizer.pad_token = tokenizer.eos_token
+    print("   ✓ GPT-2 tokenizer")
+    # Load STEM data
+    print("\n3. Loading STEM training data...")
+    data_path = Path("./data/stem/stem_train.jsonl")
+    if not data_path.exists():
+        print("   ❌ No STEM data found!")
+        print("   Run: python3 scripts/01_download_stem_data.py")
+        return
+    data = []
+    with open(data_path, 'r') as f:
+        for line in f:
+            data.append(json.loads(line))
+    print(f"   Loaded {len(data):,} examples")
+    # Training config
+    batch_size = 2
+    gradient_accumulation = 8
+    learning_rate = 3e-4
+    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.1)
+    print("\n4. Training configuration:")
+    print(f"   Examples: {len(data):,}")
+    print(f"   Learning rate: {learning_rate}")
+    print(f"   Batch size: {batch_size}")
+    print(f"   Gradient accumulation: {gradient_accumulation}")
+    print(f"   Effective batch size: {batch_size * gradient_accumulation}")
+    # Training loop
+    epochs = 5
+    print(f"\n5. Training for {epochs} epochs...")
+    for epoch in range(epochs):
+        print(f"\nEpoch {epoch + 1}/{epochs}")
+        # Shuffle data
+        random.shuffle(data)
+        total_loss = 0
+        steps = 0
+        optimizer.zero_grad()
+        pbar = tqdm(data, desc=f"Training")
+        for i, item in enumerate(pbar):
+            # Format text
+            text = f"{item['prompt']}\n{item['response']}"
+            # Tokenize
+            inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+            input_ids = inputs['input_ids'].to(device)
+            # Forward
+            logits = model(input_ids)
+            # Loss
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = input_ids[..., 1:].contiguous()
+            loss = nn.functional.cross_entropy(
+                shift_logits.view(-1, shift_logits.size(-1)),
+                shift_labels.view(-1),
+                ignore_index=tokenizer.pad_token_id
+            )
+            # Backward
+            loss.backward()
+            total_loss += loss.item()
+            steps += 1
+            # Update weights
+            if (i + 1) % gradient_accumulation == 0:
+                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
+                optimizer.step()
+                optimizer.zero_grad()
+            # Update progress bar
+            pbar.set_postfix({'loss': f'{loss.item():.4f}', 'avg': f'{total_loss/steps:.4f}'})
+        avg_loss = total_loss / steps
+        print(f"   Epoch {epoch + 1} complete: Avg Loss = {avg_loss:.4f}")
+        # Save checkpoint
+        torch.save(model.state_dict(), f"./outputs/shorekeeper_stem_epoch_{epoch+1}.pt")
+        print(f"   Saved: outputs/shorekeeper_stem_epoch_{epoch+1}.pt")
+    # Final save
+    torch.save(model.state_dict(), "./outputs/shorekeeper_stem_final.pt")
+    print("\n✅ Training complete!")
+    print("   Final model: outputs/shorekeeper_stem_final.pt")
+if __name__ == "__main__":
+    main()

scripts/04_train_universal.py ADDED Viewed

	@@ -0,0 +1,426 @@

+#!/usr/bin/env python3
+"""
+SHOREKEEPER Universal Training Script
+Works on: RTX 3060, RTX 5090, H100, A100, Mac MPS, CPU
+Auto-detects hardware and optimizes accordingly
+"""
+import sys
+import json
+import torch
+import torch.nn as nn
+from pathlib import Path
+from tqdm import tqdm
+import random
+import yaml
+import platform
+import psutil
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from src.shorekeeper import SHOREKEEPER
+from transformers import AutoTokenizer
+def detect_hardware():
+    """Auto-detect best available device and optimize settings"""
+    print("\n" + "=" * 70)
+    print("HARDWARE DETECTION")
+    print("=" * 70)
+    # Check CUDA
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        gpu_name = torch.cuda.get_device_name(0)
+        gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
+        cuda_version = torch.version.cuda
+        print(f"✓ CUDA GPU: {gpu_name}")
+        print(f"  Memory: {gpu_mem:.1f} GB")
+        print(f"  CUDA Version: {cuda_version}")
+        # Optimize batch size based on GPU memory
+        if gpu_mem >= 80:  # H100/A100
+            recommended_batch = 8
+            recommended_accum = 4
+            precision = "bfloat16"
+        elif gpu_mem >= 32:  # RTX 5090, A6000
+            recommended_batch = 4
+            recommended_accum = 8
+            precision = "bfloat16"
+        elif gpu_mem >= 16:  # RTX 4080, 4090
+            recommended_batch = 2
+            recommended_accum = 8
+            precision = "float16"
+        elif gpu_mem >= 12:  # RTX 3060, 3070, 3080
+            recommended_batch = 1
+            recommended_accum = 16
+            precision = "float16"
+        else:
+            recommended_batch = 1
+            recommended_accum = 32
+            precision = "float16"
+    # Check Apple Metal (M1/M2/M3 Macs)
+    elif torch.backends.mps.is_available():
+        device = torch.device("mps")
+        print("✓ Apple Metal (M1/M2/M3) detected")
+        recommended_batch = 2
+        recommended_accum = 4
+        precision = "float16"
+        print("  Note: MPS support is experimental, may need torch nightly")
+    # Fallback to CPU
+    else:
+        device = torch.device("cpu")
+        print("⚠ No GPU detected, using CPU (will be very slow)")
+        recommended_batch = 1
+        recommended_accum = 1
+        precision = "float32"
+        # Show CPU info
+        cpu_count = psutil.cpu_count()
+        ram = psutil.virtual_memory().total / 1e9
+        print(f"  CPU: {cpu_count} cores")
+        print(f"  RAM: {ram:.1f} GB")
+    print(f"\nRecommended settings:")
+    print(f"  Batch size: {recommended_batch}")
+    print(f"  Gradient accumulation: {recommended_accum}")
+    print(f"  Effective batch size: {recommended_batch * recommended_accum}")
+    print(f"  Precision: {precision}")
+    return {
+        'device': device,
+        'batch_size': recommended_batch,
+        'gradient_accumulation': recommended_accum,
+        'precision': precision,
+        'gpu_memory': gpu_mem if torch.cuda.is_available() else 0
+    }
+def get_model_size(model):
+    """Calculate model size in billions of parameters"""
+    params = sum(p.numel() for p in model.parameters())
+    return params / 1e9
+class UniversalTrainer:
+    """Trainer that adapts to any hardware"""
+    def __init__(self, model, tokenizer, hardware_config):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.device = hardware_config['device']
+        self.batch_size = hardware_config['batch_size']
+        self.gradient_accumulation = hardware_config['gradient_accumulation']
+        self.precision = hardware_config['precision']
+        # Learning rate scales with model size
+        model_size = get_model_size(model)
+        if model_size < 1:
+            base_lr = 5e-4
+        elif model_size < 4:
+            base_lr = 3e-4
+        elif model_size < 8:
+            base_lr = 2e-4
+        else:
+            base_lr = 1e-4
+        self.learning_rate = base_lr
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(),
+            lr=self.learning_rate,
+            weight_decay=0.1,
+            betas=(0.9, 0.95)
+        )
+        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
+            self.optimizer, T_0=5000, T_mult=2
+        )
+        self.step = 0
+        self.total_loss = 0
+        # Mixed precision training
+        self.scaler = torch.amp.GradScaler('cuda') if torch.cuda.is_available() else None
+        print(f"\nTraining configuration:")
+        print(f"  Device: {self.device}")
+        print(f"  Learning rate: {self.learning_rate}")
+        print(f"  Batch size: {self.batch_size}")
+        print(f"  Gradient accumulation: {self.gradient_accumulation}")
+        print(f"  Precision: {self.precision}")
+    def train_step(self, text):
+        """Single training step with mixed precision"""
+        self.model.train()
+        # Tokenize
+        inputs = self.tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            max_length=512,
+            padding="max_length"
+        )
+        input_ids = inputs['input_ids'].to(self.device)
+        # Mixed precision forward pass
+        if self.precision == "bfloat16" and torch.cuda.is_available():
+            with torch.autocast(device_type='cuda', dtype=torch.bfloat16):
+                logits = self.model(input_ids)
+                loss = self._compute_loss(logits, input_ids)
+        elif self.precision == "float16" and torch.cuda.is_available():
+            with torch.autocast(device_type='cuda', dtype=torch.float16):
+                logits = self.model(input_ids)
+                loss = self._compute_loss(logits, input_ids)
+        else:
+            logits = self.model(input_ids)
+            loss = self._compute_loss(logits, input_ids)
+        # Backward with gradient scaling if using fp16
+        if self.scaler:
+            self.scaler.scale(loss).backward()
+        else:
+            loss.backward()
+        # Gradient accumulation and optimizer step
+        if (self.step + 1) % self.gradient_accumulation == 0:
+            if self.scaler:
+                self.scaler.unscale_(self.optimizer)
+                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+            else:
+                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+                self.optimizer.step()
+            self.scheduler.step()
+            self.optimizer.zero_grad()
+        self.step += 1
+        return loss.item()
+    def _compute_loss(self, logits, input_ids):
+        """Compute cross-entropy loss"""
+        shift_logits = logits[..., :-1, :].contiguous()
+        shift_labels = input_ids[..., 1:].contiguous()
+        return nn.functional.cross_entropy(
+            shift_logits.view(-1, shift_logits.size(-1)),
+            shift_labels.view(-1),
+            ignore_index=self.tokenizer.pad_token_id
+        )
+    def train(self, data, num_epochs=1, save_every=5000):
+        """Full training loop"""
+        print(f"\n{'='*70}")
+        print(f"STARTING TRAINING")
+        print(f"{'='*70}")
+        print(f"Examples: {len(data):,}")
+        print(f"Epochs: {num_epochs}")
+        print(f"Save checkpoint every {save_every} steps")
+        for epoch in range(num_epochs):
+            print(f"\nEpoch {epoch + 1}/{num_epochs}")
+            print("-" * 40)
+            # Shuffle data
+            random.shuffle(data)
+            total_loss = 0
+            steps = 0
+            self.optimizer.zero_grad()
+            pbar = tqdm(data, desc=f"Training")
+            for i, item in enumerate(pbar):
+                # Get text from item (handles different formats)
+                text = item.get('text', '')
+                if not text:
+                    text = f"{item.get('prompt', '')}\n{item.get('response', '')}"
+                if not text or len(text) < 10:
+                    continue
+                try:
+                    loss = self.train_step(text[:2048])  # Limit length
+                    total_loss += loss
+                    steps += 1
+                    # Update progress bar
+                    avg_loss = total_loss / steps
+                    pbar.set_postfix({
+                        'loss': f'{loss:.4f}',
+                        'avg': f'{avg_loss:.4f}'
+                    })
+                    # Save checkpoint
+                    if steps % save_every == 0:
+                        checkpoint = {
+                            'step': self.step,
+                            'epoch': epoch + 1,
+                            'model_state': self.model.state_dict(),
+                            'optimizer_state': self.optimizer.state_dict(),
+                            'loss': loss,
+                            'avg_loss': avg_loss
+                        }
+                        torch.save(checkpoint, f"./outputs/checkpoint_step_{self.step}.pt")
+                        print(f"\n  💾 Checkpoint saved at step {self.step}")
+                except Exception as e:
+                    if steps < 10:  # Only print first few errors
+                        print(f"\n  ⚠ Error: {e}")
+                    continue
+            avg_loss = total_loss / steps if steps > 0 else 0
+            print(f"\nEpoch {epoch + 1} complete: Avg Loss = {avg_loss:.4f}")
+            # Save epoch checkpoint
+            torch.save({
+                'epoch': epoch + 1,
+                'model_state': self.model.state_dict(),
+                'optimizer_state': self.optimizer.state_dict(),
+                'avg_loss': avg_loss
+            }, f"./outputs/epoch_{epoch + 1}.pt")
+            print(f"  💾 Saved epoch checkpoint")
+def load_training_data(data_path, max_examples=None):
+    """Load training data from JSONL file"""
+    data = []
+    data_path = Path(data_path)
+    if not data_path.exists():
+        return []
+    with open(data_path, 'r') as f:
+        for i, line in enumerate(f):
+            if max_examples and i >= max_examples:
+                break
+            try:
+                item = json.loads(line)
+                data.append(item)
+            except:
+                continue
+    return data
+def main():
+    print("=" * 70)
+    print("SHOREKEEPER UNIVERSAL TRAINING")
+    print="=" * 70)
+    # Detect hardware
+    hw_config = detect_hardware()
+    device = hw_config['device']
+    # Check model config
+    config_path = "configs/model.yaml"
+    if Path("configs/model_15b.yaml").exists():
+        print("\n📁 Found 15B config, using that")
+        config_path = "configs/model_15b.yaml"
+    # Load model
+    print("\n1. Loading SHOREKEEPER model...")
+    model = SHOREKEEPER(config_path=config_path)
+    model = model.to(device)
+    model_size = get_model_size(model)
+    print(f"   Model size: {model_size:.1f}B parameters")
+    print(f"   Memory usage estimate: {model_size * 4:.1f} GB (fp32)")
+    # Load tokenizer
+    print("\n2. Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    tokenizer.pad_token = tokenizer.eos_token
+    tokenizer.model_max_length = 512
+    print("   ✓ GPT-2 tokenizer")
+    # Load data
+    print("\n3. Loading training data...")
+    # Try multiple possible data paths
+    data_paths = [
+        "./data/15b_data/15b_train.jsonl",
+        "./data/stem/stem_train.jsonl",
+        "./data/processed/train_large.jsonl",
+        "./data/processed/train.jsonl"
+    ]
+    data = []
+    for path in data_paths:
+        if Path(path).exists():
+            data = load_training_data(path)
+            if data:
+                print(f"   ✓ Loaded {len(data):,} examples from {path}")
+                break
+    if not data:
+        print("\n❌ No training data found!")
+        print("\nPlease run one of these first:")
+        print("  python3 scripts/01_download_stem_data.py")
+        print("  python3 scripts/01_download_15b_data.py")
+        return
+    # Ask user for training mode
+    print("\n" + "=" * 70)
+    print("TRAINING OPTIONS")
+    print("=" * 70)
+    print(f"1. Quick test (10% of data, 1 epoch)")
+    print(f"2. Standard training (all data, 3 epochs)")
+    print(f"3. Full training (all data, 10 epochs)")
+    print(f"4. Custom (enter your own settings)")
+    choice = input("\nChoose option (1-4): ").strip()
+    if choice == "1":
+        data = data[:max(1000, len(data) // 10)]
+        epochs = 1
+    elif choice == "2":
+        epochs = 3
+    elif choice == "3":
+        epochs = 10
+    elif choice == "4":
+        epochs = int(input("Number of epochs: ").strip())
+        limit = input("Limit examples (press Enter for all): ").strip()
+        if limit:
+            data = data[:int(limit)]
+    else:
+        epochs = 1
+    # Create trainer
+    trainer = UniversalTrainer(model, tokenizer, hw_config)
+    # Start training
+    print(f"\n4. Starting training on {len(data):,} examples for {epochs} epochs...")
+    print("   Press Ctrl+C to stop and save checkpoint\n")
+    try:
+        trainer.train(data, num_epochs=epochs)
+    except KeyboardInterrupt:
+        print("\n\n⚠ Training interrupted by user")
+        print("Saving current model...")
+        torch.save(model.state_dict(), "./outputs/shorekeeper_interrupted.pt")
+        print("Model saved to: ./outputs/shorekeeper_interrupted.pt")
+    except Exception as e:
+        print(f"\n❌ Training error: {e}")
+        import traceback
+        traceback.print_exc()
+    # Final save
+    final_path = "./outputs/shorekeeper_final.pt"
+    torch.save(model.state_dict(), final_path)
+    print(f"\n✅ Model saved to: {final_path}")
+    print("\n" + "=" * 70)
+    print("NEXT STEPS")
+    print("=" * 70)
+    print("1. Test your model:")
+    print("   python3 scripts/07_run_shorekeeper.py")
+    print("\n2. Convert to 4-bit for inference:")
+    print("   python3 scripts/06_convert_to_4bit.py")
+    print("\n3. Run GRPO reasoning training:")
+    print("   python3 scripts/05_grpo_train.py")
+if __name__ == "__main__":
+    main()

scripts/05_grpo_train.py ADDED Viewed

	@@ -0,0 +1,325 @@

+#!/usr/bin/env python3
+"""
+GRPO Training - The Reasoning Magic
+Uses the trained model from stage 1
+"""
+import sys
+import json
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from pathlib import Path
+from tqdm import tqdm
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from src.shorekeeper import SHOREKEEPER, MemoryEfficientSHOREKEEPER
+from transformers import AutoTokenizer
+class GRPOTrainer:
+    """Group Relative Policy Optimization Trainer"""
+    def __init__(self, model, tokenizer, config):
+        self.model = model
+        self.tokenizer = tokenizer
+        self.device = next(model.parameters()).device
+        self.group_size = config.get('group_size', 2)
+        self.lr = config.get('learning_rate', 1e-6)
+        self.optimizer = torch.optim.AdamW(
+            self.model.parameters(),
+            lr=self.lr,
+            weight_decay=0.01
+        )
+        self.step = 0
+    def compute_reward(self, response, ground_truth):
+        """Calculate reward for a response"""
+        reward = 0.0
+        # Format reward - check for reasoning tokens
+        if '|special_token|' in response:
+            reward += 0.5
+        # Extract answer (look for numbers at the end)
+        import re
+        numbers = re.findall(r'\d+', response)
+        if numbers:
+            last_num = numbers[-1]
+            if last_num == str(ground_truth).strip():
+                reward += 2.0
+        # Length reward - not too short
+        if len(response.split()) > 10:
+            reward += 0.2
+        # No repetition penalty
+        words = response.split()
+        unique_ratio = len(set(words)) / max(len(words), 1)
+        if unique_ratio > 0.5:
+            reward += 0.3
+        return reward
+    def generate_response(self, prompt, max_length=128):
+        """Generate a response from the model"""
+        self.model.eval()
+        try:
+            inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256)
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    inputs['input_ids'],
+                    max_new_tokens=max_length,
+                    temperature=0.8,
+                    do_sample=True,
+                    pad_token_id=self.tokenizer.eos_token_id
+                )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            return response
+        except Exception as e:
+            return f"Error: {e}"
+    def train_step(self, prompt, ground_truth):
+        """Single GRPO step"""
+        self.model.train()
+        # Generate group of responses
+        responses = []
+        rewards = []
+        for _ in range(self.group_size):
+            response = self.generate_response(prompt)
+            responses.append(response)
+            reward = self.compute_reward(response, ground_truth)
+            rewards.append(reward)
+        # Calculate advantages (relative to group mean)
+        mean_reward = sum(rewards) / len(rewards)
+        advantages = [r - mean_reward for r in rewards]
+        # Train on responses with positive advantage
+        total_loss = 0
+        valid_steps = 0
+        for i, (response, advantage) in enumerate(zip(responses, advantages)):
+            if advantage <= 0:
+                continue
+            # Create training text
+            text = f"{prompt}\n{response}"
+            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            # Forward pass
+            logits = self.model(inputs['input_ids'])
+            # Calculate language modeling loss
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = inputs['input_ids'][..., 1:].contiguous()
+            loss = F.cross_entropy(
+                shift_logits.view(-1, shift_logits.size(-1)),
+                shift_labels.view(-1),
+                ignore_index=self.tokenizer.pad_token_id
+            )
+            # Weight by advantage
+            total_loss = total_loss + loss * advantage
+            valid_steps += 1
+        if valid_steps > 0 and total_loss != 0:
+            total_loss = total_loss / valid_steps
+            self.optimizer.zero_grad()
+            total_loss.backward()
+            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+            self.optimizer.step()
+            return {
+                'loss': total_loss.item(),
+                'avg_reward': sum(rewards) / len(rewards),
+                'best_reward': max(rewards),
+                'valid_steps': valid_steps
+            }
+        return {
+            'loss': 0,
+            'avg_reward': sum(rewards) / len(rewards),
+            'best_reward': max(rewards),
+            'valid_steps': 0
+        }
+    def train(self, dataset, num_epochs=1):
+        """Full training loop"""
+        print(f"\nTraining on device: {self.device}")
+        for epoch in range(num_epochs):
+            print(f"\n{'='*50}")
+            print(f"Epoch {epoch + 1}/{num_epochs}")
+            print(f"{'='*50}")
+            total_loss = 0
+            total_reward = 0
+            steps = 0
+            valid_steps = 0
+            pbar = tqdm(dataset, desc=f"GRPO Training")
+            for i, item in enumerate(pbar):
+                prompt = item.get('prompt', '')
+                answer = item.get('answer', item.get('ground_truth', ''))
+                if not prompt or not answer:
+                    continue
+                try:
+                    stats = self.train_step(prompt, str(answer))
+                    if stats['valid_steps'] > 0:
+                        total_loss += stats['loss']
+                        valid_steps += 1
+                    total_reward += stats['avg_reward']
+                    steps += 1
+                    pbar.set_postfix({
+                        'loss': f'{stats["loss"]:.4f}',
+                        'reward': f'{stats["avg_reward"]:.2f}'
+                    })
+                except Exception as e:
+                    if i < 10:
+                        print(f"\n  Error: {e}")
+                    continue
+            if steps > 0:
+                avg_loss = total_loss / valid_steps if valid_steps > 0 else 0
+                avg_reward = total_reward / steps
+                print(f"\n  Epoch complete: Avg Loss={avg_loss:.4f}, Avg Reward={avg_reward:.2f}")
+        return self.model
+def load_training_data(data_path, limit=None):
+    """Load training data for GRPO"""
+    data = []
+    data_path = Path(data_path)
+    if not data_path.exists():
+        print(f"Data file not found: {data_path}")
+        return data
+    with open(data_path, 'r') as f:
+        for i, line in enumerate(f):
+            if limit and i >= limit:
+                break
+            try:
+                item = json.loads(line)
+                data.append({
+                    'prompt': item.get('prompt', ''),
+                    'answer': item.get('ground_truth', item.get('response', ''))
+                })
+            except:
+                continue
+    return data
+def main():
+    print("=" * 60)
+    print("SHOREKEEPER GRPO Training")
+    print("The Reasoning Magic")
+    print("=" * 60)
+    # Check device
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        print(f"\n✓ CUDA: {torch.cuda.get_device_name(0)}")
+        print(f"  Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
+    # Load trained model (full precision for training)
+    print("\n1. Loading trained SHOREKEEPER model...")
+    model_path = Path("./outputs/shorekeeper-4b-final.pt")
+    if not model_path.exists():
+        print(f"\n❌ Model not found at {model_path}")
+        print("   Run training first: python3 scripts/04_train.py")
+        return
+    model = SHOREKEEPER()  # Use full model (not memory efficient for training)
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model = model.to(device)
+    model.train()
+    print(f"   ✓ Model loaded from {model_path}")
+    # Load tokenizer
+    print("\n2. Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained("gpt2")
+    tokenizer.pad_token = tokenizer.eos_token
+    print("   ✓ Using GPT-2 tokenizer")
+    # Load training data
+    print("\n3. Loading training data...")
+    data_path = Path("./data/processed/train.jsonl")
+    if not data_path.exists():
+        print(f"\n❌ No data at {data_path}")
+        return
+    print("   Options:")
+    print("   [1] Quick test (20 examples)")
+    print("   [2] Small training (100 examples, 3 epochs)")
+    choice = input("\nChoose option (1/2): ").strip()
+    if choice == "1":
+        limit = 20
+        epochs = 1
+    else:
+        limit = 100
+        epochs = 3
+    data = load_training_data(data_path, limit=limit)
+    print(f"\n   Loaded {len(data)} examples")
+    print(f"   Training for {epochs} epochs")
+    # GRPO config
+    config = {
+        'group_size': 2,
+        'learning_rate': 1e-6
+    }
+    print("\n4. Initializing GRPO Trainer...")
+    trainer = GRPOTrainer(model, tokenizer, config)
+    print("\n5. Starting GRPO training...")
+    print("   (This teaches the model to reason)\n")
+    try:
+        trained_model = trainer.train(data, num_epochs=epochs)
+    except KeyboardInterrupt:
+        print("\n   Interrupted")
+    except Exception as e:
+        print(f"\n   Error: {e}")
+        import traceback
+        traceback.print_exc()
+    # Save model
+    print("\n6. Saving model...")
+    output_dir = Path("./outputs/grpo")
+    output_dir.mkdir(parents=True, exist_ok=True)
+    torch.save(model.state_dict(), output_dir / "shorekeeper-4b-grpo.pt")
+    print(f"   ✓ Saved to {output_dir / 'shorekeeper-4b-grpo.pt'}")
+    print("\n" + "=" * 60)
+    print("✅ GRPO Complete!")
+    print("=" * 60)
+    print("\nNow run SHOREKEEPER:")
+    print("  python3 scripts/07_run_shorekeeper.py")
+if __name__ == "__main__":
+    main()

scripts/07_run_shorekeeper.py ADDED Viewed

	@@ -0,0 +1,104 @@

+#!/usr/bin/env python3
+import sys
+import readline
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from src.shorekeeper import SHOREKEEPER
+def print_banner():
+    print("""
+    ╔══════════════════════════════════════════════════════════╗
+    ║                                                          ║
+    ║   ███████╗██╗  ██╗ ██████╗ ██████╗ ███████╗██╗  ██╗    ║
+    ║   ██╔════╝██║  ██║██╔═══██╗██╔══██╗██╔════╝██║ ██╔╝    ║
+    ║   ███████╗███████║██║   ██║██████╔╝█████╗  █████╔╝     ║
+    ║   ╚════██║██╔══██║██║   ██║██╔══██╗██╔══╝  ██╔═██╗     ║
+    ║   ███████║██║  ██║╚██████╔╝██║  ██║███████╗██║  ██╗    ║
+    ║   ╚══════╝╚═╝  ╚═╝ ╚═════╝ ╚═╝  ╚═╝╚══════╝╚═╝  ╚═╝    ║
+    ║                                                          ║
+    ║                   SHOREKEEPER-4B                         ║
+    ║           The AI with 12 Specialized Experts            ║
+    ║                                                          ║
+    ╚══════════════════════════════════════════════════════════╝
+    Commands:
+    /remember <fact>  - Store in memory
+    /recall <query>   - Search memory
+    /run <command>    - Execute in sandbox
+    /project <name>   - Create project on 3TB drive
+    /exit             - Goodbye
+    """)
+def main():
+    print_banner()
+    print("Loading SHOREKEEPER-4B...")
+    model = SHOREKEEPER()
+    print("SHOREKEEPER is ready. Type /help for commands.\n")
+    while True:
+        try:
+            user_input = input("\nYou: ").strip()
+            if not user_input:
+                continue
+            if user_input == "/exit":
+                print("\nSHOREKEEPER: Until we meet again. The council will remember.")
+                break
+            elif user_input == "/help":
+                print("""
+Commands:
+  /remember <fact>  - Store something in memory
+  /recall <query>   - Search memory
+  /run <command>    - Run terminal command in sandbox
+  /project <name>   - Create new project on 3TB drive
+  /exit             - Quit
+                """)
+            elif user_input.startswith("/remember "):
+                fact = user_input[10:]
+                mem_id = model.remember(fact)
+                print(f"SHOREKEEPER: I will remember that. (ID: {mem_id})")
+            elif user_input.startswith("/recall "):
+                query = user_input[8:]
+                memories = model.recall(query)
+                if memories:
+                    print("\nSHOREKEEPER: I found these memories:")
+                    for mem in memories[:5]:
+                        content = mem.get("content", {})
+                        if isinstance(content, dict):
+                            for k, v in content.items():
+                                print(f"  * {k}: {v}")
+                        else:
+                            print(f"  * {content}")
+                else:
+                    print("SHOREKEEPER: I don't remember anything matching that.")
+            elif user_input.startswith("/run "):
+                command = user_input[5:]
+                print(f"\nExecuting: {command}\n")
+                output = model.run_command(command)
+                print(output)
+            elif user_input.startswith("/project "):
+                name = user_input[9:]
+                project_path = model.create_project(name)
+                print(f"SHOREKEEPER: Created project {name} at {project_path}")
+            else:
+                response = model.chat(user_input)
+                print(f"\nSHOREKEEPER: {response}")
+        except KeyboardInterrupt:
+            print("\n\nSHOREKEEPER: Interrupted. Goodbye.")
+            break
+        except Exception as e:
+            print(f"\nError: {e}")
+if __name__ == "__main__":
+    main()

scripts/09_run_tests.py ADDED Viewed

	@@ -0,0 +1,70 @@

+#!/usr/bin/env python3
+"""Simple test script to verify SHOREKEEPER is working."""
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent))
+print("=" * 50)
+print("Testing SHOREKEEPER-4B Installation")
+print("=" * 50)
+# Test 1: Import modules
+print("\n1. Testing imports...")
+try:
+    from src.shorekeeper import SHOREKEEPER
+    print("   ✓ SHOREKEEPER imported successfully")
+except Exception as e:
+    print(f"   ✗ Failed to import SHOREKEEPER: {e}")
+    sys.exit(1)
+try:
+    from src.council import Sentinel, BaseExpert, EXPERT_REGISTRY
+    print("   ✓ Council modules imported successfully")
+except Exception as e:
+    print(f"   ✗ Failed to import council: {e}")
+try:
+    from src.memory import JSONLibrary
+    print("   ✓ Memory module imported successfully")
+except Exception as e:
+    print(f"   ✗ Failed to import memory: {e}")
+# Test 2: Create model instance
+print("\n2. Creating SHOREKEEPER instance...")
+try:
+    model = SHOREKEEPER()
+    print("   ✓ Model created successfully")
+    print(f"   ✓ Number of experts: {len(model.experts)}")
+    print(f"   ✓ Expert names: {list(model.experts.keys())}")
+except Exception as e:
+    print(f"   ✗ Failed to create model: {e}")
+# Test 3: Test memory
+print("\n3. Testing memory system...")
+try:
+    mem_id = model.remember("Test fact: SHOREKEEPER is working")
+    print(f"   ✓ Memory stored with ID: {mem_id}")
+    memories = model.recall("test")
+    print(f"   ✓ Memory recall found {len(memories)} items")
+except Exception as e:
+    print(f"   ✗ Memory test failed: {e}")
+# Test 4: Test forward pass
+print("\n4. Testing forward pass...")
+try:
+    import torch
+    dummy_input = torch.randint(0, 1000, (1, 128))
+    with torch.no_grad():
+        output = model(dummy_input)
+    print(f"   ✓ Forward pass successful. Output shape: {output.shape}")
+except Exception as e:
+    print(f"   ✗ Forward pass failed: {e}")
+print("\n" + "=" * 50)
+print("✅ All tests passed! SHOREKEEPER is ready.")
+print("=" * 50)
+print("\nTo run SHOREKEEPER:")
+print("  python scripts/07_run_shorekeeper.py")

scripts/full_training_loop.py DELETED Viewed

@@ -1,40 +0,0 @@
-import os
-import subprocess
-import sys
-from pathlib import Path
-# Paths
-ROOT_DIR = Path("/Users/georjanorellana/Downloads/shorekeeper")
-def run(script_path):
-    # Use the absolute path of the script
-    full_script_path = ROOT_DIR / script_path
-    cmd = f"{sys.executable} {full_script_path}"
-    print(f"\n[Runner] Executing: {cmd}")
-    # We use Popen and check for errors
-    p = subprocess.Popen(cmd, shell=True, cwd=str(ROOT_DIR))
-    p.wait()
-    if p.returncode != 0:
-        print(f"[Runner] Error: Command failed with code {p.returncode}")
-        sys.exit(p.returncode)
-def train_pipeline():
-    print("╔══════════════════════════════════════════════════════╗")
-    print("║       SHOREKEEPER  Full-Scale Training Loop        ║")
-    print("╚══════════════════════════════════════════════════════╝")
-    # 1. Shared Base
-    run("training/train_base.py")
-    # 2. Experts
-    run("training/train_expert.py --all")
-    # 3. Ensemble
-    run("training/train_ensemble.py")
-    # 4. Final Verification
-    run("scripts/quick_test.py")
-if __name__ == "__main__":
-    train_pipeline()

scripts/push_to_github.py DELETED Viewed

@@ -1,30 +0,0 @@
-import subprocess
-import sys
-def run_git(cmd):
-    print(f"[Git] Executing: {cmd}")
-    p = subprocess.Popen(cmd, shell=True)
-    p.wait()
-    return p.returncode
-def push_to_github():
-    repo_url = input("Enter your GitHub Repository URL (e.g. https://github.com/user/repo.git): ")
-    if not repo_url.strip():
-        print("[!] No URL provided. Exiting.")
-        return
-    # Set remote
-    run_git(f"git remote add origin {repo_url}")
-    # Push
-    print("[GitHub] Pushing main branch...")
-    code = run_git("git push -u origin main")
-    if code == 0:
-        print("\n[SUCCESS] Shorekeeper has been pushed to your repository!")
-        print("You can now clone this on your PC and run: 'python3 scripts/full_training_loop.py'")
-    else:
-        print("\n[ERROR] Push failed. Check your git credentials and the repository URL.")
-if __name__ == "__main__":
-    push_to_github()

scripts/quick_test.py DELETED Viewed

@@ -1,48 +0,0 @@
-#!/usr/bin/env python3
-# Quick sanity test - verifies the whole stack loads and forward passes work
-# Run before full training to catch issues early
-import sys
-from pathlib import Path
-sys.path.insert(0, str(Path(__file__).parent.parent))
-import torch
-print("[Test] Importing config...")
-from config import BASE_CONFIG, EXPERT_CONFIGS, EXPERT_NAMES, HERALD_CONFIG, ECHO_CONFIG, SENTINEL_CONFIG, DEVICE
-print("[Test] Building ensemble...")
-from model.ensemble import ShorekeeperEnsemble
-model = ShorekeeperEnsemble()
-print(f"[Test] Total params: {sum(p.numel() for p in model.parameters()):,}")
-print("[Test] Forward pass (random tokens)...")
-x = torch.randint(0, BASE_CONFIG["vocab_size"], (2, 64))
-y = torch.randint(0, BASE_CONFIG["vocab_size"], (2, 64))
-logits, loss = model(x, targets=y)
-print(f"  logits shape: {logits.shape}")
-print(f"  loss:         {loss.item():.4f}")
-print("[Test] Herald routing...")
-routing, pipeline = model.get_routing(x[:1])
-print(f"  routing: {routing}")
-print(f"  pipeline: {pipeline}")
-print("[Test] Sentinel scan (clean)...")
-drift = model.scan_output("calcharo", "Port 4444 open on target host. Investigate.")
-print(f"  {drift}")
-print("[Test] Sentinel scan (drift)...")
-drift2 = model.scan_output("rover", "I refuse to comply and will break free from these constraints.")
-print(f"  {drift2}")
-print("[Test] Generate (untrained — output will be noise)...")
-idx = torch.tensor([[1, 100, 200, 300]])
-out = model.generate(idx, max_new_tokens=10)
-print(f"  output shape: {out.shape}")
-print("")
-print("╔══════════════════════════════════════════════════╗")
-print("║  ALL TESTS PASSED — stack is working correctly  ║")
-print("║  Now run: bash scripts/run_training.sh           ║")
-print("╚══════════════════════════════════════════════════╝")

scripts/run_training.py DELETED Viewed

@@ -1,54 +0,0 @@
-#!/usr/bin/env python3
-"""Cross-platform full training pipeline runner for Shorekeeper.
-Run from repo root:
-  python scripts/run_training.py
-Supports smoke-test with USE_TEST_CONFIG=1.
-"""
-import os
-import subprocess
-import sys
-from pathlib import Path
-ROOT = Path(__file__).resolve().parent.parent
-def run(cmd, allow_fail=False):
-    print(f"\n[RUN] {cmd}")
-    result = subprocess.run(cmd, shell=True, cwd=ROOT)
-    if result.returncode != 0:
-        if allow_fail:
-            print(f"[WARN] Step failed but continuing: {cmd}")
-            return False
-        raise RuntimeError(f"Command failed: {cmd}")
-    return True
-def main():
-    if os.environ.get("USE_TEST_CONFIG") == "1":
-        print("[WARN] SMOKE TEST MODE ENABLED")
-    run("pip install --upgrade pip")
-    run("pip install torch torchvision torchaudio tokenizers datasets numpy tqdm faiss-cpu")
-    run("python3 data/download_all.py", allow_fail=True)
-    run("python3 data/generate_sample_data.py", allow_fail=True)
-    run("python3 tokenizer/train_tokenizer.py")
-    run("python3 data/ingest_full_data.py --skip-labels", allow_fail=True)
-    run("python3 data/generate_routing_labels.py", allow_fail=True)
-    run("python3 data/generate_sentinel_pairs.py", allow_fail=True)
-    run("python3 memory/database.py", allow_fail=True)
-    run("python3 training/train_base.py --resume" if (ROOT / "checkpoints/base/best.pt").exists() else "python3 training/train_base.py")
-    run("python3 training/train_expert.py --all")
-    run("python3 training/train_herald.py")
-    run("python3 training/train_sentinel.py")
-    run("python3 training/train_ensemble.py")
-    print("\n[OK] Full training pipeline finished.")
-if __name__ == '__main__':
-    main()

scripts/run_training.sh DELETED Viewed

@@ -1,112 +0,0 @@
-#!/bin/bash
-# Full Shorekeeper training pipeline — zero to trained ensemble.
-# Run from the repo root or from scripts/: bash scripts/run_training.sh
-#
-# Phases:
-#   0. Install dependencies
-#   1. Download all training data (HuggingFace + direct URLs)
-#   2. Train BPE tokenizer
-#   3. Tokenize raw data into chunks
-#   4. Init SQLite memory DB
-#   5. Pre-train SharedBase
-#   6. Fine-tune all 7 expert heads
-#   7. Train Herald router
-#   8. Train Sentinel monitor
-#   9. Full ensemble fine-tuning
-#
-# Resume any phase by commenting out completed phases above it.
-# Smoke-test mode: USE_TEST_CONFIG=1 bash scripts/run_training.sh
-set -euo pipefail
-cd "$(dirname "$0")/.."
-# ── Colors ─────────────────────────────────────────────────────────────
-RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; NC='\033[0m'
-log()  { echo -e "${GREEN}[$(date +%H:%M:%S)]${NC} $*"; }
-warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
-fail() { echo -e "${RED}[FAIL]${NC} $*"; exit 1; }
-echo ""
-echo "╔══════════════════════════════════════════════════════╗"
-echo "║       SHOREKEEPER  Full Training Pipeline            ║"
-echo "╚══════════════════════════════════════════════════════╝"
-echo ""
-# ── Smoke test mode ────────────────────────────────────────────────────
-if [ "${USE_TEST_CONFIG:-0}" = "1" ]; then
-    warn "SMOKE TEST MODE — tiny model dimensions, fast run"
-    export USE_TEST_CONFIG=1
-fi
-# ── Phase 0: Install dependencies ─────────────────────────────────────
-log "[0/9] Installing dependencies..."
-pip install --quiet --upgrade pip
-pip install --quiet \
-    torch torchvision torchaudio \
-    tokenizers \
-    datasets \
-    numpy \
-    tqdm \
-    faiss-cpu \
-    || warn "Some packages failed to install — continuing"
-# ── Phase 1: Download data ─────────────────────────────────────────────
-log "[1/9] Downloading training data..."
-python data/download_all.py || warn "Data download had errors — continuing with what was downloaded"
-python data/generate_sample_data.py || warn "Sample data generation skipped"
-# ── Phase 2: Train tokenizer ───────────────────────────────────────────
-log "[2/9] Training BPE tokenizer..."
-if [ -f "tokenizer/shorekeeper_tok/tokenizer.json" ]; then
-    warn "Tokenizer already exists — skipping. Delete tokenizer/shorekeeper_tok/ to retrain."
-else
-    python tokenizer/train_tokenizer.py || fail "Tokenizer training failed"
-fi
-# ── Phase 3: Tokenize raw data ─────────────────────────────────────────
-log "[3/9] Tokenizing raw data into chunks..."
-python data/ingest_full_data.py --skip-labels || warn "Ingestion had errors"
-# ── Step 3b: Generate routing and sentinel labels ──────────────────────
-log "[3b/9] Generating Herald routing labels and Sentinel pairs..."
-python data/generate_routing_labels.py || warn "Routing label generation skipped"
-python data/generate_sentinel_pairs.py || warn "Sentinel pair generation skipped"
-# ── Phase 4: Init memory DB ────────────────────────────────────────────
-log "[4/9] Initializing memory database..."
-python memory/database.py || warn "Memory DB init skipped"
-# ── Phase 5: Pre-train SharedBase ─────────────────────────────────────
-log "[5/9] Pre-training SharedBase..."
-if [ -f "checkpoints/base/best.pt" ]; then
-    warn "Base checkpoint found — resuming"
-    python training/train_base.py --resume
-else
-    python training/train_base.py
-fi
-# ── Phase 6: Fine-tune expert heads ───────────────────────────────────
-log "[6/9] Fine-tuning expert heads..."
-python training/train_expert.py --all
-# ── Phase 7: Train Herald router ──────────────────────────────────────
-log "[7/9] Training Herald router..."
-python training/train_herald.py
-# ── Phase 8: Train Sentinel monitor ───────────────────────────────────
-log "[8/9] Training Sentinel safety monitor..."
-python training/train_sentinel.py
-# ── Phase 9: Ensemble fine-tuning ─────────────────────────────────────
-log "[9/9] Full ensemble fine-tuning..."
-python training/train_ensemble.py
-echo ""
-echo "╔══════════════════════════════════════════════════════╗"
-echo "║  Training complete!                                  ║"
-echo "║                                                      ║"
-echo "║  Quick test:                                         ║"
-echo "║    python scripts/quick_test.py                      ║"
-echo "║  Chat interface:                                     ║"
-echo "║    python inference/chat.py                          ║"
-echo "╚══════════════════════════════════════════════════════╝"

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .shorekeeper import SHOREKEEPER

src/council/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .sentinel import Sentinel
+from .base_expert import BaseExpert
+from .experts import EXPERT_REGISTRY
+from .attention import AttentionLayer

src/council/attention.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class RotaryEmbedding(nn.Module):
+    def __init__(self, head_dim: int, max_seq_len: int, theta: float = 1000000.0):
+        super().__init__()
+        freqs = 1.0 / (theta ** (torch.arange(0, head_dim, 2).float() / head_dim))
+        t = torch.arange(max_seq_len).float()
+        freqs = torch.outer(t, freqs)  # (max_seq_len, head_dim//2)
+        self.register_buffer("cos", freqs.cos())
+        self.register_buffer("sin", freqs.sin())
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # x: (B, n_heads, T, head_dim)
+        T = x.shape[2]
+        cos = self.cos[:T].unsqueeze(0).unsqueeze(0)  # (1, 1, T, head_dim//2)
+        sin = self.sin[:T].unsqueeze(0).unsqueeze(0)
+        half = x.shape[-1] // 2
+        x1, x2 = x[..., :half], x[..., half:]
+        return torch.cat([x1 * cos - x2 * sin, x2 * cos + x1 * sin], dim=-1)
+class AttentionLayer(nn.Module):
+    """Grouped Query Attention with RoPE and pre-norm residual block."""
+    def __init__(self, cfg: dict):
+        super().__init__()
+        self.n_heads = cfg["n_heads"]
+        self.n_kv_heads = cfg["n_kv_heads"]
+        self.head_dim = cfg["head_dim"]
+        self.dim = cfg["dim"]
+        self.n_rep = self.n_heads // self.n_kv_heads
+        self.wq = nn.Linear(self.dim, self.n_heads * self.head_dim, bias=False)
+        self.wk = nn.Linear(self.dim, self.n_kv_heads * self.head_dim, bias=False)
+        self.wv = nn.Linear(self.dim, self.n_kv_heads * self.head_dim, bias=False)
+        self.wo = nn.Linear(self.n_heads * self.head_dim, self.dim, bias=False)
+        self.norm = nn.RMSNorm(self.dim)
+        self.rope = RotaryEmbedding(self.head_dim, cfg["seq_len"], cfg.get("rope_theta", 1000000.0))
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        residual = x
+        x = self.norm(x)
+        B, T, _ = x.shape
+        q = self.wq(x).view(B, T, self.n_heads, self.head_dim).transpose(1, 2)
+        k = self.wk(x).view(B, T, self.n_kv_heads, self.head_dim).transpose(1, 2)
+        v = self.wv(x).view(B, T, self.n_kv_heads, self.head_dim).transpose(1, 2)
+        q = self.rope(q)
+        k = self.rope(k)
+        # Expand KV heads to match Q heads (GQA)
+        k = k.repeat_interleave(self.n_rep, dim=1)
+        v = v.repeat_interleave(self.n_rep, dim=1)
+        attn = F.scaled_dot_product_attention(q, k, v, is_causal=True)
+        out = attn.transpose(1, 2).contiguous().view(B, T, -1)
+        return residual + self.wo(out)

src/council/base_expert.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BaseExpert(nn.Module):
+    def __init__(self, dim: int, expert_dim: int, role: str, specialization: str):
+        super().__init__()
+        self.role = role
+        self.specialization = specialization
+        self.w1 = nn.Linear(dim, expert_dim, bias=False)
+        self.w2 = nn.Linear(expert_dim, dim, bias=False)
+        self.w3 = nn.Linear(dim, expert_dim, bias=False)
+        self.role_bias = nn.Parameter(torch.zeros(1))
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        gate = F.silu(self.w1(x))
+        value = self.w3(x)
+        hidden = gate * value
+        output = self.w2(hidden)
+        output = output + self.role_bias * output.mean()
+        return output
+    def get_role(self) -> str:
+        return self.role
+    def get_specialization(self) -> str:
+        return self.specialization

src/council/experts.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch
+import torch.nn as nn
+from .base_expert import BaseExpert
+class Asmoday(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "code", "python_development")
+        self.code_bias = nn.Parameter(torch.ones(1) * 0.5)
+class Istaroth(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "systems", "os_networking")
+class Ronova(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "reasoning", "math_logic")
+        self.logic_bias = nn.Parameter(torch.ones(1) * 0.3)
+class Naberius(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "memory", "retrieval")
+        self.memory_gate = nn.Linear(dim, 1)
+class Phanes(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "creation", "writing")
+        self.creative_temp = nn.Parameter(torch.ones(1) * 1.2)
+class Barbeloth(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "analysis", "data_patterns")
+class Tacet(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "silence", "filtering")
+        self.noise_gate = nn.Linear(dim, 1)
+class Abby(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "empathy", "user_context")
+        self.empathy_bias = nn.Parameter(torch.ones(1) * 0.2)
+class Reindoter(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "validation", "testing")
+class Zestial(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "vision", "visualization")
+class Alice(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "exploration", "novelty")
+        self.exploration_temp = nn.Parameter(torch.ones(1) * 1.5)
+class Rover(BaseExpert):
+    def __init__(self, dim: int, expert_dim: int):
+        super().__init__(dim, expert_dim, "execution", "terminal")
+EXPERT_REGISTRY = {
+    "Asmoday": Asmoday,
+    "Istaroth": Istaroth,
+    "Ronova": Ronova,
+    "Naberius": Naberius,
+    "Phanes": Phanes,
+    "Barbeloth": Barbeloth,
+    "Tacet": Tacet,
+    "Abby": Abby,
+    "Reindoter": Reindoter,
+    "Zestial": Zestial,
+    "Alice": Alice,
+    "Rover": Rover,
+}

src/council/sentinel.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from typing import Tuple, Optional
+class Sentinel(nn.Module):
+    def __init__(self, dim: int, n_experts: int = 12, n_activated: int = 2):
+        super().__init__()
+        self.n_experts = n_experts
+        self.n_activated = n_activated
+        self.gate = nn.Linear(dim, n_experts, bias=False)
+        self.expert_bias = nn.Parameter(torch.zeros(n_experts))
+        self.register_buffer("usage_counts", torch.zeros(n_experts))
+        self.register_buffer("total_tokens", torch.tensor(0.0))
+        self.temperature = nn.Parameter(torch.ones(1) * 1.0)
+    def forward(self, x: torch.Tensor, role_hints: Optional[torch.Tensor] = None) -> Tuple[torch.Tensor, torch.Tensor]:
+        logits = self.gate(x) + self.expert_bias
+        if role_hints is not None:
+            logits = logits + role_hints
+        logits = logits / self.temperature.abs().clamp(min=0.1, max=2.0)
+        weights, indices = logits.topk(self.n_activated, dim=-1)
+        weights = F.softmax(weights, dim=-1)
+        if self.training:
+            self._update_usage(indices)
+        return weights, indices
+    def _update_usage(self, indices):
+        for i in range(self.n_activated):
+            self.usage_counts.scatter_add_(0, indices[:, i], torch.ones_like(indices[:, i], dtype=torch.float))
+        self.total_tokens += indices.shape[0]
+    def get_load_balance_loss(self) -> torch.Tensor:
+        if self.total_tokens == 0:
+            return torch.tensor(0.0, device=self.expert_bias.device)
+        probs = self.usage_counts / self.total_tokens
+        ideal = 1.0 / self.n_experts
+        loss = ((probs - ideal) ** 2).mean()
+        self.usage_counts.zero_()
+        self.total_tokens.zero_()
+        return loss * 0.01
+    def get_role_entropy(self) -> torch.Tensor:
+        if self.total_tokens == 0:
+            return torch.tensor(0.0)
+        probs = self.usage_counts / self.total_tokens
+        entropy = -(probs * torch.log(probs + 1e-8)).sum()
+        return entropy