File size: 5,584 Bytes
208eb59 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | """
neural_config.py — Configuration and hyperparameters for MLX LoRA training.
"""
from dataclasses import dataclass, field
from pathlib import Path
import json
import os
@dataclass
class NeuralConfig:
"""Training hyperparameters and daemon configuration."""
# Daemon
daemon_port: int = 8766
daemon_host: str = "0.0.0.0"
# Model (auto-detected from LM Studio)
model_key: str = "" # e.g. "qwen3.5-9b-prism"
model_path: str = "" # e.g. "~/.lmstudio/models/.../model.gguf"
model_architecture: str = "" # e.g. "qwen2"
# LoRA
lora_rank: int = 32
lora_alpha: float = 32.0 # scaling = alpha / rank
lora_targets: list = field(default_factory=lambda: ["q_proj", "v_proj", "out_proj", "down_proj"])
lora_dropout: float = 0.0
lora_num_layers: int = -1 # -1 = all layers, N = last N layers only
# Training
training_backend: str = "mlx" # "mlx" (real autograd) or "ane" (legacy)
learning_rate: float = 5e-4
min_learning_rate: float = 5e-5 # cosine LR floor
cosine_period_steps: int = 5000 # steps for one cosine period
warmup_fraction: float = 0.1 # warmup as fraction of period
steps_per_cycle: int = 1 # 1 step per example (epoch-style)
batch_size: int = 0 # 0 = all available data in buffer
epochs_per_cycle: int = 1 # Epochs per auto-training cycle
train_epochs: int = 15 # Default epochs for manual /train
early_stop_loss: float = 0.8 # Stop when avg epoch loss drops below
early_stop_patience: int = 2 # Consecutive low-loss epochs before stop
max_seq_len: int = 512
gradient_clip: float = 1.0
warmup_steps: int = 10
auto_train: bool = True # Train after each conversation turn
replay_ratio: float = 0.3 # 30% replay buffer in each batch
# Adam optimizer
adam_beta1: float = 0.9
adam_beta2: float = 0.999
adam_eps: float = 1e-8
weight_decay: float = 0.0
# Buffer
rolling_buffer_size: int = 100 # Recent turns in memory
replay_buffer_size: int = 500 # Historical turns on disk
min_response_tokens: int = 10 # Skip training on short responses
# ANE
ane_compile_budget: int = 110 # Max compiles before restart
ane_min_tensor_dim: int = 16 # ANE matmul dims must be multiples of 16
ane_seq_len: int = 16 # ANE sequence length (must be multiple of 16)
# Persistence
base_dir: str = "~/.jarvis/fine-tune"
adapter_dir: str = "" # Set dynamically: base_dir/adapters/{model_key}/
replay_path: str = "" # Set dynamically: base_dir/replay.jsonl
auto_save_interval: int = 10 # Save adapter every N training cycles
# LM Studio
lms_cli_path: str = "" # Auto-detected
lms_api_url: str = "http://localhost:1234"
@property
def lora_scaling(self) -> float:
return self.lora_alpha / self.lora_rank
def resolve_paths(self):
"""Expand ~ and set dynamic paths."""
self.base_dir = str(Path(self.base_dir).expanduser())
if not self.adapter_dir:
key = self.model_key or "default"
self.adapter_dir = str(Path(self.base_dir) / "adapters" / key)
if not self.replay_path:
self.replay_path = str(Path(self.base_dir) / "replay.jsonl")
# Auto-detect lms CLI
if not self.lms_cli_path:
candidates = [
Path.home() / ".lmstudio" / "bin" / "lms",
Path("/usr/local/bin/lms"),
]
for c in candidates:
if c.exists():
self.lms_cli_path = str(c)
break
def ensure_dirs(self):
"""Create required directories."""
self.resolve_paths()
Path(self.base_dir).mkdir(parents=True, exist_ok=True)
Path(self.adapter_dir).mkdir(parents=True, exist_ok=True)
def save(self, path: str = ""):
"""Save config to JSON."""
path = path or str(Path(self.base_dir) / "config.json")
self.resolve_paths()
Path(path).parent.mkdir(parents=True, exist_ok=True)
with open(path, "w") as f:
json.dump(self.__dict__, f, indent=2)
@classmethod
def load(cls, path: str) -> "NeuralConfig":
"""Load config from JSON."""
with open(path) as f:
data = json.load(f)
cfg = cls()
for k, v in data.items():
if hasattr(cfg, k):
setattr(cfg, k, v)
cfg.resolve_paths()
return cfg
def to_dict(self) -> dict:
"""Convert to dict for API responses."""
self.resolve_paths()
d = self.__dict__.copy()
d["lora_scaling"] = self.lora_scaling
return d
def update_from_dict(self, data: dict):
"""Update config from API request."""
allowed = {
"learning_rate", "min_learning_rate", "cosine_period_steps",
"warmup_fraction", "steps_per_cycle", "lora_rank", "lora_alpha",
"lora_targets", "lora_num_layers", "training_backend",
"auto_train", "replay_ratio", "gradient_clip", "warmup_steps",
"rolling_buffer_size", "min_response_tokens", "auto_save_interval",
"max_seq_len", "lora_dropout", "weight_decay",
"epochs_per_cycle", "train_epochs",
"early_stop_loss", "early_stop_patience",
}
for k, v in data.items():
if k in allowed and hasattr(self, k):
setattr(self, k, v)
|