Spaces:

Andrewstivan
/

aur

Sleeping

App Files Files Community

Andrewstivan commited on Apr 14

Commit

2fa8bd2

verified ·

1 Parent(s): 77b8172

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -226

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 # ============================================================================
-# ЧЕСТНЫЙ ПЕРЕНОС: 32 СЛОЯ AURA -> 6 ЦИКЛОВ BDH (БЕЗ УСРЕДНЕНИЯ)
 # ============================================================================
-import json, torch, os, gc, sys
-from tqdm import tqdm
-from safetensors.torch import save_file, safe_open
-from huggingface_hub import hf_hub_download, HfApi
-# ВРЕМЕННОЕ РЕШЕНИЕ: переопределяем конфиг прямо в app.py
 import dataclasses
 @dataclasses.dataclass
@@ -27,238 +29,127 @@ class BDHConfig:
     forget_rate: float = 0.1
     use_rho_cache: bool = True
-# Затем используем этот конфиг вместо импорта из bdh
-# config = BDHConfig(n_layer=32, ...)  # теперь работает!
-class Plasticity:
-    def __init__(self, n_neurons):
-        self.n_neurons = n_neurons
-        self.w = torch.zeros(n_neurons, n_neurons)
-        self.long_term_w = torch.zeros(n_neurons, n_neurons)
-        self.lr = 0.01
-        self.consolidation_rate = 0.01
-        self.forget_rate = 0.1
-        self.acc_pre = torch.zeros(n_neurons)
-        self.acc_post = torch.zeros(n_neurons)
-        self.threshold = 0.5
-        self.bcm_theta = torch.zeros(n_neurons)
-        self.lr_bcm = 0.001
-        self.target_activity = 0.5
-        self.step_count = 0
-    def adapt_weights(self, weight_matrix):
-        """Адаптирует матрицу весов через пластичность."""
-        original_shape = weight_matrix.shape
-        # Приводим к 2D
-        if weight_matrix.dim() == 3:
-            wm_2d = weight_matrix.reshape(-1, weight_matrix.shape[-1])
-        else:
-            wm_2d = weight_matrix
-        # Вычисляем a_pre и a_post
-        a_pre_raw = wm_2d.mean(dim=1)
-        a_post_raw = wm_2d.mean(dim=0)
-        # Приведение a_pre к n_neurons
-        if a_pre_raw.shape[0] > self.n_neurons:
-            a_pre = a_pre_raw[:self.n_neurons]
-        elif a_pre_raw.shape[0] < self.n_neurons:
-            repeat_factor = (self.n_neurons + a_pre_raw.shape[0] - 1) // a_pre_raw.shape[0]
-            a_pre = a_pre_raw.repeat(repeat_factor)[:self.n_neurons]
-        else:
-            a_pre = a_pre_raw
-        # Приведение a_post к n_neurons
-        if a_post_raw.shape[0] > self.n_neurons:
-            a_post = a_post_raw[:self.n_neurons]
-        elif a_post_raw.shape[0] < self.n_neurons:
-            repeat_factor = (self.n_neurons + a_post_raw.shape[0] - 1) // a_post_raw.shape[0]
-            a_post = a_post_raw.repeat(repeat_factor)[:self.n_neurons]
-        else:
-            a_post = a_post_raw
-        # Псевдоспайки
-        self.acc_pre += a_pre
-        self.acc_post += a_post
-        spike_pre = (self.acc_pre >= self.threshold).float()
-        spike_post = (self.acc_post >= self.threshold).float()
-        self.acc_pre -= spike_pre * self.threshold
-        self.acc_post -= spike_post * self.threshold
-        # Хеббовское обновление
-        delta = self.lr * torch.outer(spike_pre, spike_post)
-        self.w += delta
-        # Применяем адаптацию к ИСХОДНОЙ матрице
-        with torch.no_grad():
-            # БЕРЁМ СРЕЗ ОТ self.w
-            update_slice = self.w[:wm_2d.shape[0], :wm_2d.shape[1]]
-            # ЕСЛИ СРЕЗ МЕНЬШЕ, ЧЕМ НУЖНО, ПОВТОРЯЕМ ЕГО
-            if update_slice.shape[0] < wm_2d.shape[0] or update_slice.shape[1] < wm_2d.shape[1]:
-                repeat_rows = (wm_2d.shape[0] + update_slice.shape[0] - 1) // update_slice.shape[0]
-                repeat_cols = (wm_2d.shape[1] + update_slice.shape[1] - 1) // update_slice.shape[1]
-                update = update_slice.repeat(repeat_rows, repeat_cols)[:wm_2d.shape[0], :wm_2d.shape[1]]
-            else:
-                update = update_slice
-            update = update * 0.01
-            if weight_matrix.dim() == 3:
-                update = update.reshape(original_shape)
-            weight_matrix = weight_matrix + update
-        self.step_count += 1
-        if self.step_count % 10 == 0:
-            self.consolidate()
-        return weight_matrix
-    def consolidate(self):
-        self.long_term_w += self.consolidation_rate * self.w
-        self.w = self.w * (1 - self.forget_rate)
-# ------------------------------------------------------------
-# Загрузка shard'ов
-# ------------------------------------------------------------
-class ShardCache:
-    def __init__(self, repo_id, weight_map):
-        self.repo_id = repo_id
-        self.weight_map = weight_map
-        self.cached_shards = {}
-    def get_layer_weights(self, layer_idx):
-        prefix = f"model.layers.{layer_idx}."
-        layer_tensors = {n: s for n, s in self.weight_map.items() if n.startswith(prefix)}
-        if not layer_tensors:
-            return {}
-        for shard_file in set(layer_tensors.values()):
-            if shard_file not in self.cached_shards:
-                shard_path = hf_hub_download(repo_id=self.repo_id, filename=shard_file)
-                self.cached_shards[shard_file] = safe_open(shard_path, framework="pt", device="cpu")
-        weights = {}
-        for name, shard_file in layer_tensors.items():
-            param_name = name.split('.')[-2]
-            weights[param_name] = self.cached_shards[shard_file].get_tensor(name)
-        return weights
-    def close(self):
-        for s in self.cached_shards.values():
-            s.__exit__(None, None, None)
 # -----------------------------------------------------------------------------
-# ЗАГРУЗКА ВЕСОВ
 # -----------------------------------------------------------------------------
-print("=" * 60)
-print("🧠 ЧЕСТНЫЙ ПЕРЕНОС ВЕСОВ AURA -> BDH")
-print("=" * 60)
-repo_id = "ResplendentAI/Aura_v3_7B"
-index_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors.index.json")
-with open(index_path, 'r') as f:
-    weight_map = json.load(f)['weight_map']
-cache = ShardCache(repo_id, weight_map)
-config = BDHConfig(n_layer=32, n_embd=4096, n_head=32, mlp_internal_dim_multiplier=1, vocab_size=256, dropout=0.1)
-D, nh = config.n_embd, config.n_head
-N = config.mlp_internal_dim_multiplier * D // nh
-plasticity_enc = Plasticity(n_neurons=D)
-plasticity_enc_v = Plasticity(n_neurons=D)
-plasticity_dec = Plasticity(n_neurons=D)
-encoder = torch.zeros(nh, N, D)
-encoder_v = torch.zeros(nh, N, D)
-decoder = torch.zeros(nh * N, D)
-print(f"Начало: encoder={encoder.shape}, encoder_v={encoder_v.shape}, decoder={decoder.shape}")
-for layer_idx in tqdm(range(32), desc="Обработка слоёв"):
-    w = cache.get_layer_weights(layer_idx)
-    if not w:
-        continue
-    q_proj = w['q_proj'].float()
-    v_proj = w['v_proj'].float()
-    o_proj = w['o_proj'].float()
-    # --------------------------------------------------------
-    # ИСПРАВЛЕНИЕ: Учитываем реальные размеры Aura
-    # --------------------------------------------------------
-    # q_proj: [4096, 4096] -> [32, 128, 4096]
-    if q_proj.shape == (4096, 4096):
-        q_reshaped = q_proj.reshape(nh, N, D)
-    else:
-        # fallback
-        q_reshaped = q_proj[:nh*N, :D].reshape(nh, N, D)
-    # v_proj: [1024, 4096] -> нужно адаптировать к [32, 128, 4096]
-    # v_proj имеет 1024 строки, нам нужно 32*128 = 4096 строк
-    if v_proj.shape == (1024, 4096):
-        # Повторяем, чтобы получить 4096 строк
-        v_expanded = v_proj.repeat(4, 1)  # 1024 * 4 = 4096
-        v_reshaped = v_expanded.reshape(nh, N, D)
-    else:
-        v_reshaped = v_proj[:nh*N, :D].reshape(nh, N, D)
-    # o_proj: [4096, 4096] -> [4096, 4096]
-    if o_proj.shape == (4096, 4096):
-        o_reshaped = o_proj.reshape(nh * N, D)
-    else:
-        o_reshaped = o_proj[:nh*N, :D]
-    # Пластичность адаптирует
-    encoder = plasticity_enc.adapt_weights(q_reshaped)
-    encoder_v = plasticity_enc_v.adapt_weights(v_reshaped)
-    decoder = plasticity_dec.adapt_weights(o_reshaped)
-plasticity_enc.consolidate()
-plasticity_enc_v.consolidate()
-plasticity_dec.consolidate()
-bdh_weights = {
-    'encoder': encoder,
-    'encoder_v': encoder_v,
-    'decoder': decoder,
-    'lm_head': torch.randn(D, 256) * 0.02
-}
-# ------------------------------------------------------------
-# СОХРАНЕНИЕ
-# ------------------------------------------------------------
-os.makedirs("bdh_weights_upload", exist_ok=True)
-bdh_weights_safe = {k: v.cpu() for k, v in bdh_weights.items()}
-save_file(bdh_weights_safe, "bdh_weights_upload/bdh_plasticity.safetensors")
-config_dict = {
-    "model_type": "bdh",
-    "n_layer": config.n_layer,
-    "n_embd": config.n_embd,
-    "n_head": config.n_head,
-    "mlp_internal_dim_multiplier": config.mlp_internal_dim_multiplier,
-    "vocab_size": config.vocab_size,
-    "dropout": config.dropout
-}
-with open("bdh_weights_upload/bdh_config.json", "w") as f:
-    json.dump(config_dict, f, indent=2)
-token = os.environ.get('HF_TOKEN')
-if token:
-    api = HfApi(token=token)
-    api.upload_folder(
-        folder_path="bdh_weights_upload",
-        repo_id="Andrewstivan/AURA",
-        repo_type="model",
-        path_in_repo="bdh_plasticity",
-        commit_message="🧠 Честный перенос весов Aura в BDH (исправлены размеры v_proj)"
-    )
-    print("✅ Веса загружены в Andrewstivan/AURA/bdh_plasticity/")
-else:
-    print("⚠️ HF_TOKEN не найден! Файлы сохранены локально.")
-print("\n🎉 ГОТОВО!")

 # ============================================================================
+# ТЕСТИРОВАНИЕ ЧЕСТНОЙ BDH
 # ============================================================================
+import torch
+import torch.nn.functional as F
+from safetensors.torch import load_file
+from huggingface_hub import hf_hub_download
+import json
+import sys
+import numpy as np
 import dataclasses
 @dataclasses.dataclass
     forget_rate: float = 0.1
     use_rho_cache: bool = True
+print("=" * 70)
+print("🧪 ТЕСТИРОВАНИЕ ЧЕСТНОЙ BDH (32 слоя, 4096, без усреднения)")
+print("=" * 70)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"🖥️ Устройство: {device}")
 # -----------------------------------------------------------------------------
+# 1. ЗАГРУЗКА МОДЕЛИ
 # -----------------------------------------------------------------------------
+config_path = hf_hub_download(
+    repo_id="Andrewstivan/AURA",
+    filename="bdh_plasticity/bdh_config.json",
+    repo_type="model"
+)
+with open(config_path, 'r') as f:
+    config_dict = json.load(f)
+config = BDHConfig(
+    n_layer=config_dict['n_layer'],
+    n_embd=config_dict['n_embd'],
+    n_head=config_dict['n_head'],
+    mlp_internal_dim_multiplier=config_dict['mlp_internal_dim_multiplier'],
+    vocab_size=config_dict['vocab_size'],
+    dropout=config_dict['dropout'],
+    use_plasticity=True,  # ← Включаем!
+)
+bdh_model = BDH(config).to(device)
+weights_path = hf_hub_download(
+    repo_id="Andrewstivan/AURA",
+    filename="bdh_plasticity/bdh_plasticity.safetensors",
+    repo_type="model"
+)
+weights = load_file(weights_path)
+with torch.no_grad():
+    bdh_model.encoder.weight_fp32.data = weights['encoder'].to(device)
+    bdh_model.encoder_v.weight_fp32.data = weights['encoder_v'].to(device)
+    bdh_model.decoder.weight_fp32.data = weights['decoder'].to(device)
+    # Обновляем тернарные веса
+    bdh_model.encoder.update_ternary_weights()
+    bdh_model.encoder_v.update_ternary_weights()
+    bdh_model.decoder.update_ternary_weights()
+bdh_model.eval()
+print(f"✅ Модель загружена. Параметров: {sum(p.numel() for p in bdh_model.parameters()):,}")
+# -----------------------------------------------------------------------------
+# 2. ТЕСТ 1: РАСПРЕДЕЛЕНИЕ ВЕСОВ
+# -----------------------------------------------------------------------------
+print("\n" + "=" * 70)
+print("📊 ТЕСТ 1: РАСПРЕДЕЛЕНИЕ ТЕРНАРНЫХ ВЕСОВ")
+print("=" * 70)
+for name, param in [
+    ('encoder', bdh_model.encoder.weight_ternary),
+    ('encoder_v', bdh_model.encoder_v.weight_ternary),
+    ('decoder', bdh_model.decoder.weight_ternary),
+]:
+    total = param.numel()
+    minus1 = (param == -1).sum().item()
+    zero = (param == 0).sum().item()
+    plus1 = (param == 1).sum().item()
+    p = np.array([minus1, zero, plus1]) / total
+    p = p[p > 0]
+    entropy = -np.sum(p * np.log2(p))
+    max_entropy = np.log2(3)
+    print(f"\n{name}:")
+    print(f"   -1: {minus1/total:.2%}")
+    print(f"    0: {zero/total:.2%}")
+    print(f"   +1: {plus1/total:.2%}")
+    print(f"   Энтропия: {entropy:.4f} / {max_entropy:.4f} бит")
+    print(f"   Использование: {entropy/max_entropy:.1%}")
+# -----------------------------------------------------------------------------
+# 3. ТЕСТ 2: ЖИВУЧЕСТЬ
+# -----------------------------------------------------------------------------
+print("\n" + "=" * 70)
+print("📊 ТЕСТ 2: ЖИВУЧЕСТЬ МОДЕЛИ")
+print("=" * 70)
+def text_to_bytes(text):
+    return torch.tensor(list(text.encode('utf-8')), dtype=torch.long).unsqueeze(0).to(device)
+for text in ["Hello world", "The quick brown fox", "A" * 100]:
+    bytes_tensor = text_to_bytes(text)
+    try:
+        with torch.no_grad():
+            logits, states = bdh_model.forward_with_states(bytes_tensor)
+        print(f"✅ '{text[:30]}...' -> OK (вход: {bytes_tensor.shape[1]}, выход: {logits.shape})")
+    except Exception as e:
+        print(f"❌ '{text[:30]}...' -> {str(e)[:60]}")
+# -----------------------------------------------------------------------------
+# 4. ТЕСТ 3: СТАТИСТИКА АКТИВАЦИЙ
+# -----------------------------------------------------------------------------
+print("\n" + "=" * 70)
+print("📊 ТЕСТ 3: СТАТИСТИКА АКТИВАЦИЙ")
+print("=" * 70)
+text = "The quick brown fox jumps over the lazy dog"
+bytes_tensor = text_to_bytes(text)
+with torch.no_grad():
+    _, states = bdh_model.forward_with_states(bytes_tensor)
+print(f"\nАктивации по слоям (всего {len(states)} слоёв):")
+print("-" * 60)
+print(f"{'Слой':<6} {'Mean':<10} {'Std':<10} {'Norm':<12} {'Sparsity':<10}")
+print("-" * 60)
+for i, s in enumerate(states):
+    s_flat = s.squeeze(0)
+    sparsity = (s_flat.abs() < 1e-4).float().mean().item()
+    print(f"{i:<6} {s_flat.mean():<10.4f} {s_flat.std():<10.4f} {torch.norm(s_flat):<12.2f} {sparsity:<10.2%}")
+print("\n" + "=" * 70)
+print("🎉 ТЕСТИРОВАНИЕ ЗАВЕРШЕНО!")
+print("=" * 70)