Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,11 +1,13 @@
|
|
| 1 |
# ============================================================================
|
| 2 |
-
#
|
| 3 |
# ============================================================================
|
| 4 |
-
import
|
| 5 |
-
|
| 6 |
-
from safetensors.torch import
|
| 7 |
-
from huggingface_hub import hf_hub_download
|
| 8 |
-
|
|
|
|
|
|
|
| 9 |
import dataclasses
|
| 10 |
|
| 11 |
@dataclasses.dataclass
|
|
@@ -27,238 +29,127 @@ class BDHConfig:
|
|
| 27 |
forget_rate: float = 0.1
|
| 28 |
use_rho_cache: bool = True
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
|
|
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
self.n_neurons = n_neurons
|
| 36 |
-
self.w = torch.zeros(n_neurons, n_neurons)
|
| 37 |
-
self.long_term_w = torch.zeros(n_neurons, n_neurons)
|
| 38 |
-
self.lr = 0.01
|
| 39 |
-
self.consolidation_rate = 0.01
|
| 40 |
-
self.forget_rate = 0.1
|
| 41 |
-
self.acc_pre = torch.zeros(n_neurons)
|
| 42 |
-
self.acc_post = torch.zeros(n_neurons)
|
| 43 |
-
self.threshold = 0.5
|
| 44 |
-
self.bcm_theta = torch.zeros(n_neurons)
|
| 45 |
-
self.lr_bcm = 0.001
|
| 46 |
-
self.target_activity = 0.5
|
| 47 |
-
self.step_count = 0
|
| 48 |
|
| 49 |
-
def adapt_weights(self, weight_matrix):
|
| 50 |
-
"""Адаптирует матрицу весов через пластичность."""
|
| 51 |
-
original_shape = weight_matrix.shape
|
| 52 |
-
|
| 53 |
-
# Приводим к 2D
|
| 54 |
-
if weight_matrix.dim() == 3:
|
| 55 |
-
wm_2d = weight_matrix.reshape(-1, weight_matrix.shape[-1])
|
| 56 |
-
else:
|
| 57 |
-
wm_2d = weight_matrix
|
| 58 |
-
|
| 59 |
-
# Вычисляем a_pre и a_post
|
| 60 |
-
a_pre_raw = wm_2d.mean(dim=1)
|
| 61 |
-
a_post_raw = wm_2d.mean(dim=0)
|
| 62 |
-
|
| 63 |
-
# Приведение a_pre к n_neurons
|
| 64 |
-
if a_pre_raw.shape[0] > self.n_neurons:
|
| 65 |
-
a_pre = a_pre_raw[:self.n_neurons]
|
| 66 |
-
elif a_pre_raw.shape[0] < self.n_neurons:
|
| 67 |
-
repeat_factor = (self.n_neurons + a_pre_raw.shape[0] - 1) // a_pre_raw.shape[0]
|
| 68 |
-
a_pre = a_pre_raw.repeat(repeat_factor)[:self.n_neurons]
|
| 69 |
-
else:
|
| 70 |
-
a_pre = a_pre_raw
|
| 71 |
-
|
| 72 |
-
# Приведение a_post к n_neurons
|
| 73 |
-
if a_post_raw.shape[0] > self.n_neurons:
|
| 74 |
-
a_post = a_post_raw[:self.n_neurons]
|
| 75 |
-
elif a_post_raw.shape[0] < self.n_neurons:
|
| 76 |
-
repeat_factor = (self.n_neurons + a_post_raw.shape[0] - 1) // a_post_raw.shape[0]
|
| 77 |
-
a_post = a_post_raw.repeat(repeat_factor)[:self.n_neurons]
|
| 78 |
-
else:
|
| 79 |
-
a_post = a_post_raw
|
| 80 |
-
|
| 81 |
-
# Псевдоспайки
|
| 82 |
-
self.acc_pre += a_pre
|
| 83 |
-
self.acc_post += a_post
|
| 84 |
-
|
| 85 |
-
spike_pre = (self.acc_pre >= self.threshold).float()
|
| 86 |
-
spike_post = (self.acc_post >= self.threshold).float()
|
| 87 |
-
|
| 88 |
-
self.acc_pre -= spike_pre * self.threshold
|
| 89 |
-
self.acc_post -= spike_post * self.threshold
|
| 90 |
-
|
| 91 |
-
# Хеббовское обновление
|
| 92 |
-
delta = self.lr * torch.outer(spike_pre, spike_post)
|
| 93 |
-
self.w += delta
|
| 94 |
-
|
| 95 |
-
# Применяем адаптацию к ИСХОДНОЙ матрице
|
| 96 |
-
with torch.no_grad():
|
| 97 |
-
# БЕРЁМ СРЕЗ ОТ self.w
|
| 98 |
-
update_slice = self.w[:wm_2d.shape[0], :wm_2d.shape[1]]
|
| 99 |
-
|
| 100 |
-
# ЕСЛИ СРЕЗ МЕНЬШЕ, ЧЕМ НУЖНО, ПОВТОРЯЕМ ЕГО
|
| 101 |
-
if update_slice.shape[0] < wm_2d.shape[0] or update_slice.shape[1] < wm_2d.shape[1]:
|
| 102 |
-
repeat_rows = (wm_2d.shape[0] + update_slice.shape[0] - 1) // update_slice.shape[0]
|
| 103 |
-
repeat_cols = (wm_2d.shape[1] + update_slice.shape[1] - 1) // update_slice.shape[1]
|
| 104 |
-
update = update_slice.repeat(repeat_rows, repeat_cols)[:wm_2d.shape[0], :wm_2d.shape[1]]
|
| 105 |
-
else:
|
| 106 |
-
update = update_slice
|
| 107 |
-
|
| 108 |
-
update = update * 0.01
|
| 109 |
-
|
| 110 |
-
if weight_matrix.dim() == 3:
|
| 111 |
-
update = update.reshape(original_shape)
|
| 112 |
-
weight_matrix = weight_matrix + update
|
| 113 |
-
|
| 114 |
-
self.step_count += 1
|
| 115 |
-
if self.step_count % 10 == 0:
|
| 116 |
-
self.consolidate()
|
| 117 |
-
|
| 118 |
-
return weight_matrix
|
| 119 |
-
|
| 120 |
-
def consolidate(self):
|
| 121 |
-
self.long_term_w += self.consolidation_rate * self.w
|
| 122 |
-
self.w = self.w * (1 - self.forget_rate)
|
| 123 |
-
|
| 124 |
-
# ------------------------------------------------------------
|
| 125 |
-
# Загрузка shard'ов
|
| 126 |
-
# ------------------------------------------------------------
|
| 127 |
-
class ShardCache:
|
| 128 |
-
def __init__(self, repo_id, weight_map):
|
| 129 |
-
self.repo_id = repo_id
|
| 130 |
-
self.weight_map = weight_map
|
| 131 |
-
self.cached_shards = {}
|
| 132 |
-
|
| 133 |
-
def get_layer_weights(self, layer_idx):
|
| 134 |
-
prefix = f"model.layers.{layer_idx}."
|
| 135 |
-
layer_tensors = {n: s for n, s in self.weight_map.items() if n.startswith(prefix)}
|
| 136 |
-
if not layer_tensors:
|
| 137 |
-
return {}
|
| 138 |
-
|
| 139 |
-
for shard_file in set(layer_tensors.values()):
|
| 140 |
-
if shard_file not in self.cached_shards:
|
| 141 |
-
shard_path = hf_hub_download(repo_id=self.repo_id, filename=shard_file)
|
| 142 |
-
self.cached_shards[shard_file] = safe_open(shard_path, framework="pt", device="cpu")
|
| 143 |
-
|
| 144 |
-
weights = {}
|
| 145 |
-
for name, shard_file in layer_tensors.items():
|
| 146 |
-
param_name = name.split('.')[-2]
|
| 147 |
-
weights[param_name] = self.cached_shards[shard_file].get_tensor(name)
|
| 148 |
-
return weights
|
| 149 |
-
|
| 150 |
-
def close(self):
|
| 151 |
-
for s in self.cached_shards.values():
|
| 152 |
-
s.__exit__(None, None, None)
|
| 153 |
# -----------------------------------------------------------------------------
|
| 154 |
-
# ЗАГРУЗКА
|
| 155 |
# -----------------------------------------------------------------------------
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
o_proj = w['o_proj'].float()
|
| 188 |
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
v_expanded = v_proj.repeat(4, 1) # 1024 * 4 = 4096
|
| 205 |
-
v_reshaped = v_expanded.reshape(nh, N, D)
|
| 206 |
-
else:
|
| 207 |
-
v_reshaped = v_proj[:nh*N, :D].reshape(nh, N, D)
|
| 208 |
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
'encoder_v': encoder_v,
|
| 227 |
-
'decoder': decoder,
|
| 228 |
-
'lm_head': torch.randn(D, 256) * 0.02
|
| 229 |
-
}
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
# ------------------------------------------------------------
|
| 234 |
-
os.makedirs("bdh_weights_upload", exist_ok=True)
|
| 235 |
-
bdh_weights_safe = {k: v.cpu() for k, v in bdh_weights.items()}
|
| 236 |
-
save_file(bdh_weights_safe, "bdh_weights_upload/bdh_plasticity.safetensors")
|
| 237 |
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
"n_head": config.n_head,
|
| 243 |
-
"mlp_internal_dim_multiplier": config.mlp_internal_dim_multiplier,
|
| 244 |
-
"vocab_size": config.vocab_size,
|
| 245 |
-
"dropout": config.dropout
|
| 246 |
-
}
|
| 247 |
-
with open("bdh_weights_upload/bdh_config.json", "w") as f:
|
| 248 |
-
json.dump(config_dict, f, indent=2)
|
| 249 |
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
folder_path="bdh_weights_upload",
|
| 255 |
-
repo_id="Andrewstivan/AURA",
|
| 256 |
-
repo_type="model",
|
| 257 |
-
path_in_repo="bdh_plasticity",
|
| 258 |
-
commit_message="🧠 Честный перенос весов Aura в BDH (исправлены размеры v_proj)"
|
| 259 |
-
)
|
| 260 |
-
print("✅ Веса загружены в Andrewstivan/AURA/bdh_plasticity/")
|
| 261 |
-
else:
|
| 262 |
-
print("⚠️ HF_TOKEN не найден! Файлы сохранены локально.")
|
| 263 |
|
| 264 |
-
print("\n
|
|
|
|
|
|
|
|
|
| 1 |
# ============================================================================
|
| 2 |
+
# ТЕСТИРОВАНИЕ ЧЕСТНОЙ BDH
|
| 3 |
# ============================================================================
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn.functional as F
|
| 6 |
+
from safetensors.torch import load_file
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
import json
|
| 9 |
+
import sys
|
| 10 |
+
import numpy as np
|
| 11 |
import dataclasses
|
| 12 |
|
| 13 |
@dataclasses.dataclass
|
|
|
|
| 29 |
forget_rate: float = 0.1
|
| 30 |
use_rho_cache: bool = True
|
| 31 |
|
| 32 |
+
print("=" * 70)
|
| 33 |
+
print("🧪 ТЕСТИРОВАНИЕ ЧЕСТНОЙ BDH (32 слоя, 4096, без усреднения)")
|
| 34 |
+
print("=" * 70)
|
| 35 |
|
| 36 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 37 |
+
print(f"🖥️ Устройство: {device}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# -----------------------------------------------------------------------------
|
| 40 |
+
# 1. ЗАГРУЗКА МОДЕЛИ
|
| 41 |
# -----------------------------------------------------------------------------
|
| 42 |
+
config_path = hf_hub_download(
|
| 43 |
+
repo_id="Andrewstivan/AURA",
|
| 44 |
+
filename="bdh_plasticity/bdh_config.json",
|
| 45 |
+
repo_type="model"
|
| 46 |
+
)
|
| 47 |
+
with open(config_path, 'r') as f:
|
| 48 |
+
config_dict = json.load(f)
|
| 49 |
+
|
| 50 |
+
config = BDHConfig(
|
| 51 |
+
n_layer=config_dict['n_layer'],
|
| 52 |
+
n_embd=config_dict['n_embd'],
|
| 53 |
+
n_head=config_dict['n_head'],
|
| 54 |
+
mlp_internal_dim_multiplier=config_dict['mlp_internal_dim_multiplier'],
|
| 55 |
+
vocab_size=config_dict['vocab_size'],
|
| 56 |
+
dropout=config_dict['dropout'],
|
| 57 |
+
use_plasticity=True, # ← Включаем!
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
bdh_model = BDH(config).to(device)
|
| 61 |
+
|
| 62 |
+
weights_path = hf_hub_download(
|
| 63 |
+
repo_id="Andrewstivan/AURA",
|
| 64 |
+
filename="bdh_plasticity/bdh_plasticity.safetensors",
|
| 65 |
+
repo_type="model"
|
| 66 |
+
)
|
| 67 |
+
weights = load_file(weights_path)
|
| 68 |
+
|
| 69 |
+
with torch.no_grad():
|
| 70 |
+
bdh_model.encoder.weight_fp32.data = weights['encoder'].to(device)
|
| 71 |
+
bdh_model.encoder_v.weight_fp32.data = weights['encoder_v'].to(device)
|
| 72 |
+
bdh_model.decoder.weight_fp32.data = weights['decoder'].to(device)
|
| 73 |
+
|
| 74 |
+
# Обновляем тернарные веса
|
| 75 |
+
bdh_model.encoder.update_ternary_weights()
|
| 76 |
+
bdh_model.encoder_v.update_ternary_weights()
|
| 77 |
+
bdh_model.decoder.update_ternary_weights()
|
| 78 |
|
| 79 |
+
bdh_model.eval()
|
| 80 |
+
print(f"✅ Модель загружена. Параметров: {sum(p.numel() for p in bdh_model.parameters()):,}")
|
|
|
|
| 81 |
|
| 82 |
+
# -----------------------------------------------------------------------------
|
| 83 |
+
# 2. ТЕСТ 1: РАСПРЕДЕЛЕНИЕ ВЕСОВ
|
| 84 |
+
# -----------------------------------------------------------------------------
|
| 85 |
+
print("\n" + "=" * 70)
|
| 86 |
+
print("📊 ТЕСТ 1: РАСПРЕДЕЛЕНИЕ ТЕРНАРНЫХ ВЕСОВ")
|
| 87 |
+
print("=" * 70)
|
| 88 |
+
|
| 89 |
+
for name, param in [
|
| 90 |
+
('encoder', bdh_model.encoder.weight_ternary),
|
| 91 |
+
('encoder_v', bdh_model.encoder_v.weight_ternary),
|
| 92 |
+
('decoder', bdh_model.decoder.weight_ternary),
|
| 93 |
+
]:
|
| 94 |
+
total = param.numel()
|
| 95 |
+
minus1 = (param == -1).sum().item()
|
| 96 |
+
zero = (param == 0).sum().item()
|
| 97 |
+
plus1 = (param == 1).sum().item()
|
| 98 |
|
| 99 |
+
p = np.array([minus1, zero, plus1]) / total
|
| 100 |
+
p = p[p > 0]
|
| 101 |
+
entropy = -np.sum(p * np.log2(p))
|
| 102 |
+
max_entropy = np.log2(3)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
+
print(f"\n{name}:")
|
| 105 |
+
print(f" -1: {minus1/total:.2%}")
|
| 106 |
+
print(f" 0: {zero/total:.2%}")
|
| 107 |
+
print(f" +1: {plus1/total:.2%}")
|
| 108 |
+
print(f" Энтропия: {entropy:.4f} / {max_entropy:.4f} бит")
|
| 109 |
+
print(f" Использование: {entropy/max_entropy:.1%}")
|
| 110 |
+
|
| 111 |
+
# -----------------------------------------------------------------------------
|
| 112 |
+
# 3. ТЕСТ 2: ЖИВУЧЕСТЬ
|
| 113 |
+
# -----------------------------------------------------------------------------
|
| 114 |
+
print("\n" + "=" * 70)
|
| 115 |
+
print("📊 ТЕСТ 2: ЖИВУЧЕСТЬ МОДЕЛИ")
|
| 116 |
+
print("=" * 70)
|
| 117 |
+
|
| 118 |
+
def text_to_bytes(text):
|
| 119 |
+
return torch.tensor(list(text.encode('utf-8')), dtype=torch.long).unsqueeze(0).to(device)
|
| 120 |
|
| 121 |
+
for text in ["Hello world", "The quick brown fox", "A" * 100]:
|
| 122 |
+
bytes_tensor = text_to_bytes(text)
|
| 123 |
+
try:
|
| 124 |
+
with torch.no_grad():
|
| 125 |
+
logits, states = bdh_model.forward_with_states(bytes_tensor)
|
| 126 |
+
print(f"✅ '{text[:30]}...' -> OK (вход: {bytes_tensor.shape[1]}, выход: {logits.shape})")
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f"❌ '{text[:30]}...' -> {str(e)[:60]}")
|
| 129 |
|
| 130 |
+
# -----------------------------------------------------------------------------
|
| 131 |
+
# 4. ТЕСТ 3: СТАТИСТИКА АКТИВАЦИЙ
|
| 132 |
+
# -----------------------------------------------------------------------------
|
| 133 |
+
print("\n" + "=" * 70)
|
| 134 |
+
print("📊 ТЕСТ 3: СТАТИСТИКА АКТИВАЦИЙ")
|
| 135 |
+
print("=" * 70)
|
| 136 |
|
| 137 |
+
text = "The quick brown fox jumps over the lazy dog"
|
| 138 |
+
bytes_tensor = text_to_bytes(text)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
+
with torch.no_grad():
|
| 141 |
+
_, states = bdh_model.forward_with_states(bytes_tensor)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
print(f"\nАктивации по слоям (всего {len(states)} слоёв):")
|
| 144 |
+
print("-" * 60)
|
| 145 |
+
print(f"{'Слой':<6} {'Mean':<10} {'Std':<10} {'Norm':<12} {'Sparsity':<10}")
|
| 146 |
+
print("-" * 60)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
+
for i, s in enumerate(states):
|
| 149 |
+
s_flat = s.squeeze(0)
|
| 150 |
+
sparsity = (s_flat.abs() < 1e-4).float().mean().item()
|
| 151 |
+
print(f"{i:<6} {s_flat.mean():<10.4f} {s_flat.std():<10.4f} {torch.norm(s_flat):<12.2f} {sparsity:<10.2%}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
|
| 153 |
+
print("\n" + "=" * 70)
|
| 154 |
+
print("🎉 ТЕСТИРОВАНИЕ ЗАВЕРШЕНО!")
|
| 155 |
+
print("=" * 70)
|