Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,167 +1,223 @@
|
|
| 1 |
# ============================================================================
|
| 2 |
-
#
|
| 3 |
# ============================================================================
|
| 4 |
-
|
| 5 |
-
import
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
)
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
print(f" decoder после обрезания: {fixed_dec.shape}")
|
| 98 |
-
bdh_model.decoder.weight_fp32.data = fixed_dec
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
# ТЕСТ 1: РАСПРЕДЕЛЕНИЕ ВЕСОВ
|
| 109 |
-
print("\n" + "=" * 80)
|
| 110 |
-
print("📊 ТЕСТ 1/3: РАСПРЕДЕЛЕНИЕ ТЕРНАРНЫХ ВЕСОВ")
|
| 111 |
-
print("=" * 80)
|
| 112 |
-
print(f"{'Слой':<12} {'-1':<10} {'0':<10} {'+1':<10} {'Энтропия':<12} {'Использование':<12}")
|
| 113 |
-
print("-" * 60)
|
| 114 |
-
|
| 115 |
-
for name, param in [('encoder', bdh_model.encoder.weight_ternary), ('encoder_v', bdh_model.encoder_v.weight_ternary), ('decoder', bdh_model.decoder.weight_ternary)]:
|
| 116 |
-
total = param.numel()
|
| 117 |
-
minus1 = (param == -1).sum().item()
|
| 118 |
-
zero = (param == 0).sum().item()
|
| 119 |
-
plus1 = (param == 1).sum().item()
|
| 120 |
-
p = np.array([minus1, zero, plus1]) / total
|
| 121 |
-
p = p[p > 0]
|
| 122 |
-
entropy = -np.sum(p * np.log2(p))
|
| 123 |
-
max_entropy = np.log2(3)
|
| 124 |
-
print(f"{name:<12} {minus1/total:>8.1%} {zero/total:>9.1%} {plus1/total:>9.1%} {entropy:>10.4f} {entropy/max_entropy:>11.1%}")
|
| 125 |
-
|
| 126 |
-
# ТЕСТ 2: ЖИВУЧЕСТЬ
|
| 127 |
-
print("\n" + "=" * 80)
|
| 128 |
-
print("📊 ТЕСТ 2/3: ЖИВУЧЕСТЬ МОДЕЛИ")
|
| 129 |
-
print("=" * 80)
|
| 130 |
-
|
| 131 |
-
def text_to_bytes(text):
|
| 132 |
-
return torch.tensor(list(text.encode('utf-8')), dtype=torch.long).unsqueeze(0).to(device)
|
| 133 |
-
|
| 134 |
-
for text in ["Hello world", "The quick brown fox", "A" * 100]:
|
| 135 |
-
bytes_tensor = text_to_bytes(text)
|
| 136 |
-
try:
|
| 137 |
-
with torch.no_grad():
|
| 138 |
-
logits, states = bdh_model.forward_with_states(bytes_tensor)
|
| 139 |
-
print(f"✅ '{text[:30]}...' -> OK (вход: {bytes_tensor.shape[1]}, выход: {logits.shape})")
|
| 140 |
-
except Exception as e:
|
| 141 |
-
print(f"❌ '{text[:30]}...' -> {str(e)[:60]}")
|
| 142 |
-
|
| 143 |
-
# ТЕСТ 3: ПРОИЗВОДИТЕЛЬНОСТЬ
|
| 144 |
-
print("\n" + "=" * 80)
|
| 145 |
-
print("📊 ТЕСТ 3/3: ПРОИЗВОДИТЕЛЬНОСТЬ")
|
| 146 |
-
print("=" * 80)
|
| 147 |
-
print(f"{'Токенов':<10} {'Время (мс)':<12} {'Токенов/с':<12}")
|
| 148 |
-
print("-" * 50)
|
| 149 |
-
|
| 150 |
-
for length in [10, 50, 100]:
|
| 151 |
-
bytes_tensor = text_to_bytes("A" * length)
|
| 152 |
-
try:
|
| 153 |
-
for _ in range(3):
|
| 154 |
-
_, _ = bdh_model.forward_with_states(bytes_tensor)
|
| 155 |
-
start = time.time()
|
| 156 |
-
for _ in range(10):
|
| 157 |
-
_, _ = bdh_model.forward_with_states(bytes_tensor)
|
| 158 |
-
elapsed = time.time() - start
|
| 159 |
-
tokens = bytes_tensor.shape[1]
|
| 160 |
-
print(f"{tokens:<10} {elapsed*1000/10:<12.2f} {tokens*10/elapsed:<12.0f}")
|
| 161 |
-
except Exception as e:
|
| 162 |
-
print(f"{length:<10} ОШИБКА: {str(e)[:40]}")
|
| 163 |
-
|
| 164 |
-
# ИТОГ
|
| 165 |
-
print("\n" + "=" * 80)
|
| 166 |
-
print("🎉 ВСЕ ТЕСТЫ ПРОЙДЕНЫ!")
|
| 167 |
-
print("=" * 80)
|
|
|
|
| 1 |
# ============================================================================
|
| 2 |
+
# ЧЕСТНЫЙ ПЕРЕНОС: 32 СЛОЯ AURA -> 6 ЦИКЛОВ BDH (БЕЗ УСРЕДНЕНИЯ)
|
| 3 |
# ============================================================================
|
| 4 |
+
import json, torch, os, gc, sys
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
from safetensors.torch import save_file, safe_open
|
| 7 |
+
from huggingface_hub import hf_hub_download, HfApi
|
| 8 |
+
|
| 9 |
+
class Plasticity:
|
| 10 |
+
def __init__(self, n_neurons):
|
| 11 |
+
self.n_neurons = n_neurons
|
| 12 |
+
self.w = torch.zeros(n_neurons, n_neurons)
|
| 13 |
+
self.long_term_w = torch.zeros(n_neurons, n_neurons)
|
| 14 |
+
self.lr = 0.01
|
| 15 |
+
self.consolidation_rate = 0.01
|
| 16 |
+
self.forget_rate = 0.1
|
| 17 |
+
self.acc_pre = torch.zeros(n_neurons)
|
| 18 |
+
self.acc_post = torch.zeros(n_neurons)
|
| 19 |
+
self.threshold = 0.5
|
| 20 |
+
self.bcm_theta = torch.zeros(n_neurons)
|
| 21 |
+
self.lr_bcm = 0.001
|
| 22 |
+
self.target_activity = 0.5
|
| 23 |
+
self.step_count = 0
|
| 24 |
+
|
| 25 |
+
def adapt_weights(self, weight_matrix):
|
| 26 |
+
"""Адаптирует матрицу весов через пластичность."""
|
| 27 |
+
original_shape = weight_matrix.shape
|
| 28 |
+
|
| 29 |
+
# Приводим к 2D
|
| 30 |
+
if weight_matrix.dim() == 3:
|
| 31 |
+
wm_2d = weight_matrix.reshape(-1, weight_matrix.shape[-1])
|
| 32 |
+
else:
|
| 33 |
+
wm_2d = weight_matrix
|
| 34 |
+
|
| 35 |
+
# Вычисляем a_pre и a_post
|
| 36 |
+
a_pre_raw = wm_2d.mean(dim=1)
|
| 37 |
+
a_post_raw = wm_2d.mean(dim=0)
|
| 38 |
+
|
| 39 |
+
# Приведение a_pre к n_neurons
|
| 40 |
+
if a_pre_raw.shape[0] > self.n_neurons:
|
| 41 |
+
a_pre = a_pre_raw[:self.n_neurons]
|
| 42 |
+
elif a_pre_raw.shape[0] < self.n_neurons:
|
| 43 |
+
repeat_factor = (self.n_neurons + a_pre_raw.shape[0] - 1) // a_pre_raw.shape[0]
|
| 44 |
+
a_pre = a_pre_raw.repeat(repeat_factor)[:self.n_neurons]
|
| 45 |
+
else:
|
| 46 |
+
a_pre = a_pre_raw
|
| 47 |
+
|
| 48 |
+
# Приведение a_post к n_neurons
|
| 49 |
+
if a_post_raw.shape[0] > self.n_neurons:
|
| 50 |
+
a_post = a_post_raw[:self.n_neurons]
|
| 51 |
+
elif a_post_raw.shape[0] < self.n_neurons:
|
| 52 |
+
repeat_factor = (self.n_neurons + a_post_raw.shape[0] - 1) // a_post_raw.shape[0]
|
| 53 |
+
a_post = a_post_raw.repeat(repeat_factor)[:self.n_neurons]
|
| 54 |
+
else:
|
| 55 |
+
a_post = a_post_raw
|
| 56 |
+
|
| 57 |
+
# Псевдоспайки
|
| 58 |
+
self.acc_pre += a_pre
|
| 59 |
+
self.acc_post += a_post
|
| 60 |
+
|
| 61 |
+
spike_pre = (self.acc_pre >= self.threshold).float()
|
| 62 |
+
spike_post = (self.acc_post >= self.threshold).float()
|
| 63 |
+
|
| 64 |
+
self.acc_pre -= spike_pre * self.threshold
|
| 65 |
+
self.acc_post -= spike_post * self.threshold
|
| 66 |
+
|
| 67 |
+
# Хеббовское обновление
|
| 68 |
+
delta = self.lr * torch.outer(spike_pre, spike_post)
|
| 69 |
+
self.w += delta
|
| 70 |
+
|
| 71 |
+
# Применяем адаптацию к ИСХОДНОЙ матрице
|
| 72 |
+
with torch.no_grad():
|
| 73 |
+
# БЕРЁМ СРЕЗ ОТ self.w
|
| 74 |
+
update_slice = self.w[:wm_2d.shape[0], :wm_2d.shape[1]]
|
| 75 |
+
|
| 76 |
+
# ЕСЛИ СРЕЗ МЕНЬШЕ, ЧЕМ НУЖНО, ПОВТОРЯЕМ ЕГО
|
| 77 |
+
if update_slice.shape[0] < wm_2d.shape[0] or update_slice.shape[1] < wm_2d.shape[1]:
|
| 78 |
+
repeat_rows = (wm_2d.shape[0] + update_slice.shape[0] - 1) // update_slice.shape[0]
|
| 79 |
+
repeat_cols = (wm_2d.shape[1] + update_slice.shape[1] - 1) // update_slice.shape[1]
|
| 80 |
+
update = update_slice.repeat(repeat_rows, repeat_cols)[:wm_2d.shape[0], :wm_2d.shape[1]]
|
| 81 |
+
else:
|
| 82 |
+
update = update_slice
|
| 83 |
+
|
| 84 |
+
update = update * 0.01
|
| 85 |
+
|
| 86 |
+
if weight_matrix.dim() == 3:
|
| 87 |
+
update = update.reshape(original_shape)
|
| 88 |
+
weight_matrix = weight_matrix + update
|
| 89 |
+
|
| 90 |
+
self.step_count += 1
|
| 91 |
+
if self.step_count % 10 == 0:
|
| 92 |
+
self.consolidate()
|
| 93 |
+
|
| 94 |
+
return weight_matrix
|
| 95 |
+
|
| 96 |
+
def consolidate(self):
|
| 97 |
+
self.long_term_w += self.consolidation_rate * self.w
|
| 98 |
+
self.w = self.w * (1 - self.forget_rate)
|
| 99 |
+
|
| 100 |
+
# ------------------------------------------------------------
|
| 101 |
+
# Загрузка shard'ов
|
| 102 |
+
# ------------------------------------------------------------
|
| 103 |
+
class ShardCache:
|
| 104 |
+
def __init__(self, repo_id, weight_map):
|
| 105 |
+
self.repo_id = repo_id
|
| 106 |
+
self.weight_map = weight_map
|
| 107 |
+
self.cached_shards = {}
|
| 108 |
+
|
| 109 |
+
def get_layer_weights(self, layer_idx):
|
| 110 |
+
prefix = f"model.layers.{layer_idx}."
|
| 111 |
+
layer_tensors = {n: s for n, s in self.weight_map.items() if n.startswith(prefix)}
|
| 112 |
+
if not layer_tensors:
|
| 113 |
+
return {}
|
| 114 |
+
|
| 115 |
+
for shard_file in set(layer_tensors.values()):
|
| 116 |
+
if shard_file not in self.cached_shards:
|
| 117 |
+
shard_path = hf_hub_download(repo_id=self.repo_id, filename=shard_file)
|
| 118 |
+
self.cached_shards[shard_file] = safe_open(shard_path, framework="pt", device="cpu")
|
| 119 |
+
|
| 120 |
+
weights = {}
|
| 121 |
+
for name, shard_file in layer_tensors.items():
|
| 122 |
+
param_name = name.split('.')[-2]
|
| 123 |
+
weights[param_name] = self.cached_shards[shard_file].get_tensor(name)
|
| 124 |
+
return weights
|
| 125 |
+
|
| 126 |
+
def close(self):
|
| 127 |
+
for s in self.cached_shards.values():
|
| 128 |
+
s.__exit__(None, None, None)
|
| 129 |
+
# -----------------------------------------------------------------------------
|
| 130 |
+
# ЗАГРУЗКА ВЕСОВ
|
| 131 |
+
# -----------------------------------------------------------------------------
|
| 132 |
+
repo_id = "ResplendentAI/Aura_v3_7B"
|
| 133 |
+
index_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors.index.json")
|
| 134 |
+
with open(index_path, 'r') as f: weight_map = json.load(f)['weight_map']
|
| 135 |
+
cache = ShardCache(repo_id, weight_map)
|
| 136 |
+
|
| 137 |
+
# Конфиг BDH - ЧЕСТНЫЙ (32 слоя)
|
| 138 |
+
config = BDHConfig(n_layer=32, n_embd=4096, n_head=32, mlp_internal_dim_multiplier=1, vocab_size=256, dropout=0.1)
|
| 139 |
+
D, nh = config.n_embd, config.n_head
|
| 140 |
+
N = config.mlp_internal_dim_multiplier * D // nh
|
| 141 |
+
|
| 142 |
+
plasticity_enc = Plasticity(n_neurons=D)
|
| 143 |
+
plasticity_enc_v = Plasticity(n_neurons=D)
|
| 144 |
+
plasticity_dec = Plasticity(n_neurons=D)
|
| 145 |
+
|
| 146 |
+
# Начинаем с нуля
|
| 147 |
+
encoder = torch.zeros(nh, N, D)
|
| 148 |
+
encoder_v = torch.zeros(nh, N, D)
|
| 149 |
+
decoder = torch.zeros(nh * N, D)
|
| 150 |
+
|
| 151 |
+
print(f"Начало: encoder={encoder.shape}, encoder_v={encoder_v.shape}, decoder={decoder.shape}")
|
| 152 |
+
|
| 153 |
+
for layer_idx in tqdm(range(32)):
|
| 154 |
+
w = cache.get_layer_weights(layer_idx)
|
| 155 |
+
if not w: continue
|
| 156 |
+
|
| 157 |
+
# Извлекаем полные матрицы
|
| 158 |
+
q_proj = w['q_proj'].float()
|
| 159 |
+
v_proj = w['v_proj'].float()
|
| 160 |
+
o_proj = w['o_proj'].float()
|
| 161 |
+
|
| 162 |
+
# Приводим к нужной фо��ме БЕЗ УРЕЗАНИЯ
|
| 163 |
+
# Aura: [4096, 4096] -> BDH: [32, 128, 4096]
|
| 164 |
+
q_reshaped = q_proj.reshape(nh, N, D)
|
| 165 |
+
v_reshaped = v_proj.reshape(nh, N, D)
|
| 166 |
+
o_reshaped = o_proj.reshape(nh * N, D)
|
| 167 |
+
|
| 168 |
+
# Пластичность адаптирует БЕЗ УСРЕДНЕНИЯ
|
| 169 |
+
encoder = plasticity_enc.adapt_weights(q_reshaped)
|
| 170 |
+
encoder_v = plasticity_enc_v.adapt_weights(v_reshaped)
|
| 171 |
+
decoder = plasticity_dec.adapt_weights(o_reshaped)
|
| 172 |
+
|
| 173 |
+
# Финальная консолидация
|
| 174 |
+
plasticity_enc.consolidate()
|
| 175 |
+
plasticity_enc_v.consolidate()
|
| 176 |
+
plasticity_dec.consolidate()
|
| 177 |
+
|
| 178 |
+
# Сохраняем честные веса
|
| 179 |
+
bdh_weights = {'encoder': encoder, 'encoder_v': encoder_v, 'decoder': decoder, 'lm_head': torch.randn(D, 256) * 0.02}
|
| 180 |
+
save_file(bdh_weights, "bdh_full_weights.safetensors")
|
| 181 |
+
|
| 182 |
+
# Создаём временную папку
|
| 183 |
+
os.makedirs("bdh_weights_upload", exist_ok=True)
|
| 184 |
+
|
| 185 |
+
# Сохраняем веса в формате safetensors
|
| 186 |
+
bdh_weights_safe = {k: v.cpu() for k, v in bdh_weights.items()}
|
| 187 |
+
save_file(bdh_weights_safe, "bdh_weights_upload/bdh_plasticity.safetensors")
|
| 188 |
+
|
| 189 |
+
# Сохраняем конфиг BDH
|
| 190 |
+
config_dict = {
|
| 191 |
+
"model_type": "bdh",
|
| 192 |
+
"n_layer": config.n_layer,
|
| 193 |
+
"n_embd": config.n_embd,
|
| 194 |
+
"n_head": config.n_head,
|
| 195 |
+
"mlp_internal_dim_multiplier": config.mlp_internal_dim_multiplier,
|
| 196 |
+
"vocab_size": config.vocab_size,
|
| 197 |
+
"dropout": config.dropout
|
| 198 |
+
}
|
| 199 |
+
with open("bdh_weights_upload/bdh_config.json", "w") as f:
|
| 200 |
+
json.dump(config_dict, f, indent=2)
|
| 201 |
+
|
| 202 |
+
# Загружаем в существующий репозиторий Andrewstivan/AURA
|
| 203 |
+
token = os.environ.get('HF_TOKEN')
|
| 204 |
+
if token:
|
| 205 |
+
api = HfApi(token=token)
|
| 206 |
+
repo_id = "Andrewstivan/AURA" # <-- Ваш существующий репозиторий
|
| 207 |
|
| 208 |
+
api.upload_folder(
|
| 209 |
+
folder_path="bdh_weights_upload",
|
| 210 |
+
repo_id=repo_id,
|
| 211 |
+
repo_type="model",
|
| 212 |
+
path_in_repo="bdh_plasticity", # Файлы будут в папке bdh_plasticity/
|
| 213 |
+
commit_message="🧠 Добавлены веса BDH, полученные через пластичность из Aura"
|
| 214 |
+
)
|
|
|
|
|
|
|
| 215 |
|
| 216 |
+
print("✅ Веса загружены в Andrewstivan/AURA/bdh_plasticity/")
|
| 217 |
+
print(" - bdh_plasticity.safetensors")
|
| 218 |
+
print(" - bdh_config.json")
|
| 219 |
+
else:
|
| 220 |
+
print("⚠️ HF_TOKEN не найден! Файлы сохранены локально в папке 'bdh_weights_upload'")
|
| 221 |
+
print(" Загрузите их вручную в репозиторий Andrewstivan/AURA")
|
| 222 |
+
|
| 223 |
+
print("\n🎉 ГОТОВО!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|