Andrewstivan commited on
Commit
f244686
·
verified ·
1 Parent(s): d429687

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +219 -163
app.py CHANGED
@@ -1,167 +1,223 @@
1
  # ============================================================================
2
- # ФИНАЛЬНЫЙ ТЕСТ BDH (ИСПРАВЛЕННЫЕ РАЗМЕРНОСТИ)
3
  # ============================================================================
4
-
5
- import gc
6
- import torch
7
- import torch.nn.functional as F
8
- from safetensors.torch import load_file
9
- from huggingface_hub import hf_hub_download
10
- import json
11
- import sys
12
- import numpy as np
13
- import time
14
-
15
- gc.collect()
16
- if torch.cuda.is_available():
17
- torch.cuda.empty_cache()
18
-
19
- sys.path.append('.')
20
- from bdh import BDH, BDHConfig
21
-
22
- print("=" * 80)
23
- print("🧪 ФИНАЛЬНЫЙ ТЕСТ BDH (ИСПРАВЛЕННЫЕ РАЗМЕРНОСТИ)")
24
- print("=" * 80)
25
-
26
- device = "cpu"
27
-
28
- # Загрузка модели
29
- config_path = hf_hub_download(repo_id="Andrewstivan/AURA", filename="bdh_plasticity/bdh_config.json", repo_type="model")
30
- with open(config_path, 'r') as f:
31
- config_dict = json.load(f)
32
-
33
- TARGET_N_HEAD = 16
34
- TARGET_N_EMBD = 1024
35
- TARGET_LATENT = 1024 // 16 # 64
36
-
37
- config = BDHConfig(
38
- n_layer=6,
39
- n_embd=TARGET_N_EMBD,
40
- n_head=TARGET_N_HEAD,
41
- mlp_internal_dim_multiplier=1,
42
- vocab_size=256,
43
- dropout=0.1
44
- )
45
- bdh_model = BDH(config).to(device)
46
-
47
- # Выводим ожидаемые размерности
48
- print(f"\n📐 Ожидаемые размерности BDH:")
49
- print(f" encoder: {bdh_model.encoder.weight_fp32.shape}") # [16, 64, 1024]
50
- print(f" encoder_v: {bdh_model.encoder_v.weight_fp32.shape}") # [16, 64, 1024]
51
- print(f" decoder: {bdh_model.decoder.weight_fp32.shape}") # [1024, 1024]
52
-
53
- weights_path = hf_hub_download(repo_id="Andrewstivan/AURA", filename="bdh_plasticity/bdh_plasticity.safetensors", repo_type="model")
54
- weights = load_file(weights_path)
55
-
56
- print(f"\n📦 Загруженные размерности:")
57
- print(f" encoder: {weights['encoder'].shape}") # [32, 4096, 128] или [32, 128, 4096]
58
- print(f" encoder_v: {weights['encoder_v'].shape}") # [32, 4096, 128] или [32, 1024, 128]
59
- print(f" decoder: {weights['decoder'].shape}") # [4096, 4096]
60
-
61
- with torch.no_grad():
62
- # --- ENCODER ---
63
- enc_w = weights['encoder'].to(device)
64
- # Исходный: [32, 4096, 128] -> транспонируем в [32, 128, 4096] -> обрезаем до [16, 64, 1024]
65
- if enc_w.shape == (32, 4096, 128):
66
- enc_w = enc_w.transpose(1, 2) # [32, 128, 4096]
67
- enc_w = enc_w[:TARGET_N_HEAD, :TARGET_LATENT, :TARGET_N_EMBD]
68
- print(f" encoder после обрезания: {enc_w.shape}")
69
- bdh_model.encoder.weight_fp32.data = enc_w
70
-
71
- # --- ENCODER_V ---
72
- encv_w = weights['encoder_v'].to(device)
73
- print(f" encoder_v исходный: {encv_w.shape}")
74
- # Нужно привести к [16, 64, 1024]
75
- if encv_w.dim() == 3:
76
- if encv_w.shape[1] == 4096 and encv_w.shape[2] == 128:
77
- encv_w = encv_w.transpose(1, 2) # [32, 128, 4096]
78
- elif encv_w.shape[1] == 1024 and encv_w.shape[2] == 128:
79
- encv_w = encv_w.transpose(1, 2) # [32, 128, 1024]
80
-
81
- # Обрезаем до нужного размера
82
- fixed_encv = torch.zeros(bdh_model.encoder_v.weight_fp32.shape, device=device)
83
- h = min(encv_w.shape[0], fixed_encv.shape[0])
84
- n = min(encv_w.shape[1], fixed_encv.shape[1])
85
- d = min(encv_w.shape[2], fixed_encv.shape[2])
86
- fixed_encv[:h, :n, :d] = encv_w[:h, :n, :d]
87
- print(f" encoder_v после обрезания: {fixed_encv.shape}")
88
- bdh_model.encoder_v.weight_fp32.data = fixed_encv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- # --- DECODER ---
91
- dec_w = weights['decoder'].to(device)
92
- target_dec = bdh_model.decoder.weight_fp32.shape
93
- fixed_dec = torch.zeros(target_dec, device=device)
94
- copy_0 = min(dec_w.shape[0], target_dec[0])
95
- copy_1 = min(dec_w.shape[1], target_dec[1])
96
- fixed_dec[:copy_0, :copy_1] = dec_w[:copy_0, :copy_1]
97
- print(f" decoder после обрезания: {fixed_dec.shape}")
98
- bdh_model.decoder.weight_fp32.data = fixed_dec
99
 
100
- # Обновляем тернарные веса
101
- bdh_model.encoder.update_ternary_weights()
102
- bdh_model.encoder_v.update_ternary_weights()
103
- bdh_model.decoder.update_ternary_weights()
104
-
105
- bdh_model.eval()
106
- print(f"\n✅ Модель загружена. Параметров: {sum(p.numel() for p in bdh_model.parameters()):,}")
107
-
108
- # ТЕСТ 1: РАСПРЕДЕЛЕНИЕ ВЕСОВ
109
- print("\n" + "=" * 80)
110
- print("📊 ТЕСТ 1/3: РАСПРЕДЕЛЕНИЕ ТЕРНАРНЫХ ВЕСОВ")
111
- print("=" * 80)
112
- print(f"{'Слой':<12} {'-1':<10} {'0':<10} {'+1':<10} {'Энтропия':<12} {'Использование':<12}")
113
- print("-" * 60)
114
-
115
- for name, param in [('encoder', bdh_model.encoder.weight_ternary), ('encoder_v', bdh_model.encoder_v.weight_ternary), ('decoder', bdh_model.decoder.weight_ternary)]:
116
- total = param.numel()
117
- minus1 = (param == -1).sum().item()
118
- zero = (param == 0).sum().item()
119
- plus1 = (param == 1).sum().item()
120
- p = np.array([minus1, zero, plus1]) / total
121
- p = p[p > 0]
122
- entropy = -np.sum(p * np.log2(p))
123
- max_entropy = np.log2(3)
124
- print(f"{name:<12} {minus1/total:>8.1%} {zero/total:>9.1%} {plus1/total:>9.1%} {entropy:>10.4f} {entropy/max_entropy:>11.1%}")
125
-
126
- # ТЕСТ 2: ЖИВУЧЕСТЬ
127
- print("\n" + "=" * 80)
128
- print("📊 ТЕСТ 2/3: ЖИВУЧЕСТЬ МОДЕЛИ")
129
- print("=" * 80)
130
-
131
- def text_to_bytes(text):
132
- return torch.tensor(list(text.encode('utf-8')), dtype=torch.long).unsqueeze(0).to(device)
133
-
134
- for text in ["Hello world", "The quick brown fox", "A" * 100]:
135
- bytes_tensor = text_to_bytes(text)
136
- try:
137
- with torch.no_grad():
138
- logits, states = bdh_model.forward_with_states(bytes_tensor)
139
- print(f"✅ '{text[:30]}...' -> OK (вход: {bytes_tensor.shape[1]}, выход: {logits.shape})")
140
- except Exception as e:
141
- print(f"❌ '{text[:30]}...' -> {str(e)[:60]}")
142
-
143
- # ТЕСТ 3: ПРОИЗВОДИТЕЛЬНОСТЬ
144
- print("\n" + "=" * 80)
145
- print("📊 ТЕСТ 3/3: ПРОИЗВОДИТЕЛЬНОСТЬ")
146
- print("=" * 80)
147
- print(f"{'Токенов':<10} {'Время (мс)':<12} {'Токенов/с':<12}")
148
- print("-" * 50)
149
-
150
- for length in [10, 50, 100]:
151
- bytes_tensor = text_to_bytes("A" * length)
152
- try:
153
- for _ in range(3):
154
- _, _ = bdh_model.forward_with_states(bytes_tensor)
155
- start = time.time()
156
- for _ in range(10):
157
- _, _ = bdh_model.forward_with_states(bytes_tensor)
158
- elapsed = time.time() - start
159
- tokens = bytes_tensor.shape[1]
160
- print(f"{tokens:<10} {elapsed*1000/10:<12.2f} {tokens*10/elapsed:<12.0f}")
161
- except Exception as e:
162
- print(f"{length:<10} ОШИБКА: {str(e)[:40]}")
163
-
164
- # ИТОГ
165
- print("\n" + "=" * 80)
166
- print("🎉 ВСЕ ТЕСТЫ ПРОЙДЕНЫ!")
167
- print("=" * 80)
 
1
  # ============================================================================
2
+ # ЧЕСТНЫЙ ПЕРЕНОС: 32 СЛОЯ AURA -> 6 ЦИКЛОВ BDH (БЕЗ УСРЕДНЕНИЯ)
3
  # ============================================================================
4
+ import json, torch, os, gc, sys
5
+ from tqdm import tqdm
6
+ from safetensors.torch import save_file, safe_open
7
+ from huggingface_hub import hf_hub_download, HfApi
8
+
9
+ class Plasticity:
10
+ def __init__(self, n_neurons):
11
+ self.n_neurons = n_neurons
12
+ self.w = torch.zeros(n_neurons, n_neurons)
13
+ self.long_term_w = torch.zeros(n_neurons, n_neurons)
14
+ self.lr = 0.01
15
+ self.consolidation_rate = 0.01
16
+ self.forget_rate = 0.1
17
+ self.acc_pre = torch.zeros(n_neurons)
18
+ self.acc_post = torch.zeros(n_neurons)
19
+ self.threshold = 0.5
20
+ self.bcm_theta = torch.zeros(n_neurons)
21
+ self.lr_bcm = 0.001
22
+ self.target_activity = 0.5
23
+ self.step_count = 0
24
+
25
+ def adapt_weights(self, weight_matrix):
26
+ """Адаптирует матрицу весов через пластичность."""
27
+ original_shape = weight_matrix.shape
28
+
29
+ # Приводим к 2D
30
+ if weight_matrix.dim() == 3:
31
+ wm_2d = weight_matrix.reshape(-1, weight_matrix.shape[-1])
32
+ else:
33
+ wm_2d = weight_matrix
34
+
35
+ # Вычисляем a_pre и a_post
36
+ a_pre_raw = wm_2d.mean(dim=1)
37
+ a_post_raw = wm_2d.mean(dim=0)
38
+
39
+ # Приведение a_pre к n_neurons
40
+ if a_pre_raw.shape[0] > self.n_neurons:
41
+ a_pre = a_pre_raw[:self.n_neurons]
42
+ elif a_pre_raw.shape[0] < self.n_neurons:
43
+ repeat_factor = (self.n_neurons + a_pre_raw.shape[0] - 1) // a_pre_raw.shape[0]
44
+ a_pre = a_pre_raw.repeat(repeat_factor)[:self.n_neurons]
45
+ else:
46
+ a_pre = a_pre_raw
47
+
48
+ # Приведение a_post к n_neurons
49
+ if a_post_raw.shape[0] > self.n_neurons:
50
+ a_post = a_post_raw[:self.n_neurons]
51
+ elif a_post_raw.shape[0] < self.n_neurons:
52
+ repeat_factor = (self.n_neurons + a_post_raw.shape[0] - 1) // a_post_raw.shape[0]
53
+ a_post = a_post_raw.repeat(repeat_factor)[:self.n_neurons]
54
+ else:
55
+ a_post = a_post_raw
56
+
57
+ # Псевдоспайки
58
+ self.acc_pre += a_pre
59
+ self.acc_post += a_post
60
+
61
+ spike_pre = (self.acc_pre >= self.threshold).float()
62
+ spike_post = (self.acc_post >= self.threshold).float()
63
+
64
+ self.acc_pre -= spike_pre * self.threshold
65
+ self.acc_post -= spike_post * self.threshold
66
+
67
+ # Хеббовское обновление
68
+ delta = self.lr * torch.outer(spike_pre, spike_post)
69
+ self.w += delta
70
+
71
+ # Применяем адаптацию к ИСХОДНОЙ матрице
72
+ with torch.no_grad():
73
+ # БЕРЁМ СРЕЗ ОТ self.w
74
+ update_slice = self.w[:wm_2d.shape[0], :wm_2d.shape[1]]
75
+
76
+ # ЕСЛИ СРЕЗ МЕНЬШЕ, ЧЕМ НУЖНО, ПОВТОРЯЕМ ЕГО
77
+ if update_slice.shape[0] < wm_2d.shape[0] or update_slice.shape[1] < wm_2d.shape[1]:
78
+ repeat_rows = (wm_2d.shape[0] + update_slice.shape[0] - 1) // update_slice.shape[0]
79
+ repeat_cols = (wm_2d.shape[1] + update_slice.shape[1] - 1) // update_slice.shape[1]
80
+ update = update_slice.repeat(repeat_rows, repeat_cols)[:wm_2d.shape[0], :wm_2d.shape[1]]
81
+ else:
82
+ update = update_slice
83
+
84
+ update = update * 0.01
85
+
86
+ if weight_matrix.dim() == 3:
87
+ update = update.reshape(original_shape)
88
+ weight_matrix = weight_matrix + update
89
+
90
+ self.step_count += 1
91
+ if self.step_count % 10 == 0:
92
+ self.consolidate()
93
+
94
+ return weight_matrix
95
+
96
+ def consolidate(self):
97
+ self.long_term_w += self.consolidation_rate * self.w
98
+ self.w = self.w * (1 - self.forget_rate)
99
+
100
+ # ------------------------------------------------------------
101
+ # Загрузка shard'ов
102
+ # ------------------------------------------------------------
103
+ class ShardCache:
104
+ def __init__(self, repo_id, weight_map):
105
+ self.repo_id = repo_id
106
+ self.weight_map = weight_map
107
+ self.cached_shards = {}
108
+
109
+ def get_layer_weights(self, layer_idx):
110
+ prefix = f"model.layers.{layer_idx}."
111
+ layer_tensors = {n: s for n, s in self.weight_map.items() if n.startswith(prefix)}
112
+ if not layer_tensors:
113
+ return {}
114
+
115
+ for shard_file in set(layer_tensors.values()):
116
+ if shard_file not in self.cached_shards:
117
+ shard_path = hf_hub_download(repo_id=self.repo_id, filename=shard_file)
118
+ self.cached_shards[shard_file] = safe_open(shard_path, framework="pt", device="cpu")
119
+
120
+ weights = {}
121
+ for name, shard_file in layer_tensors.items():
122
+ param_name = name.split('.')[-2]
123
+ weights[param_name] = self.cached_shards[shard_file].get_tensor(name)
124
+ return weights
125
+
126
+ def close(self):
127
+ for s in self.cached_shards.values():
128
+ s.__exit__(None, None, None)
129
+ # -----------------------------------------------------------------------------
130
+ # ЗАГРУЗКА ВЕСОВ
131
+ # -----------------------------------------------------------------------------
132
+ repo_id = "ResplendentAI/Aura_v3_7B"
133
+ index_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors.index.json")
134
+ with open(index_path, 'r') as f: weight_map = json.load(f)['weight_map']
135
+ cache = ShardCache(repo_id, weight_map)
136
+
137
+ # Конфиг BDH - ЧЕСТНЫЙ (32 слоя)
138
+ config = BDHConfig(n_layer=32, n_embd=4096, n_head=32, mlp_internal_dim_multiplier=1, vocab_size=256, dropout=0.1)
139
+ D, nh = config.n_embd, config.n_head
140
+ N = config.mlp_internal_dim_multiplier * D // nh
141
+
142
+ plasticity_enc = Plasticity(n_neurons=D)
143
+ plasticity_enc_v = Plasticity(n_neurons=D)
144
+ plasticity_dec = Plasticity(n_neurons=D)
145
+
146
+ # Начинаем с нуля
147
+ encoder = torch.zeros(nh, N, D)
148
+ encoder_v = torch.zeros(nh, N, D)
149
+ decoder = torch.zeros(nh * N, D)
150
+
151
+ print(f"Начало: encoder={encoder.shape}, encoder_v={encoder_v.shape}, decoder={decoder.shape}")
152
+
153
+ for layer_idx in tqdm(range(32)):
154
+ w = cache.get_layer_weights(layer_idx)
155
+ if not w: continue
156
+
157
+ # Извлекаем полные матрицы
158
+ q_proj = w['q_proj'].float()
159
+ v_proj = w['v_proj'].float()
160
+ o_proj = w['o_proj'].float()
161
+
162
+ # Приводим к нужной фо��ме БЕЗ УРЕЗАНИЯ
163
+ # Aura: [4096, 4096] -> BDH: [32, 128, 4096]
164
+ q_reshaped = q_proj.reshape(nh, N, D)
165
+ v_reshaped = v_proj.reshape(nh, N, D)
166
+ o_reshaped = o_proj.reshape(nh * N, D)
167
+
168
+ # Пластичность адаптирует БЕЗ УСРЕДНЕНИЯ
169
+ encoder = plasticity_enc.adapt_weights(q_reshaped)
170
+ encoder_v = plasticity_enc_v.adapt_weights(v_reshaped)
171
+ decoder = plasticity_dec.adapt_weights(o_reshaped)
172
+
173
+ # Финальная консолидация
174
+ plasticity_enc.consolidate()
175
+ plasticity_enc_v.consolidate()
176
+ plasticity_dec.consolidate()
177
+
178
+ # Сохраняем честные веса
179
+ bdh_weights = {'encoder': encoder, 'encoder_v': encoder_v, 'decoder': decoder, 'lm_head': torch.randn(D, 256) * 0.02}
180
+ save_file(bdh_weights, "bdh_full_weights.safetensors")
181
+
182
+ # Создаём временную папку
183
+ os.makedirs("bdh_weights_upload", exist_ok=True)
184
+
185
+ # Сохраняем веса в формате safetensors
186
+ bdh_weights_safe = {k: v.cpu() for k, v in bdh_weights.items()}
187
+ save_file(bdh_weights_safe, "bdh_weights_upload/bdh_plasticity.safetensors")
188
+
189
+ # Сохраняем конфиг BDH
190
+ config_dict = {
191
+ "model_type": "bdh",
192
+ "n_layer": config.n_layer,
193
+ "n_embd": config.n_embd,
194
+ "n_head": config.n_head,
195
+ "mlp_internal_dim_multiplier": config.mlp_internal_dim_multiplier,
196
+ "vocab_size": config.vocab_size,
197
+ "dropout": config.dropout
198
+ }
199
+ with open("bdh_weights_upload/bdh_config.json", "w") as f:
200
+ json.dump(config_dict, f, indent=2)
201
+
202
+ # Загружаем в существующий репозиторий Andrewstivan/AURA
203
+ token = os.environ.get('HF_TOKEN')
204
+ if token:
205
+ api = HfApi(token=token)
206
+ repo_id = "Andrewstivan/AURA" # <-- Ваш существующий репозиторий
207
 
208
+ api.upload_folder(
209
+ folder_path="bdh_weights_upload",
210
+ repo_id=repo_id,
211
+ repo_type="model",
212
+ path_in_repo="bdh_plasticity", # Файлы будут в папке bdh_plasticity/
213
+ commit_message="🧠 Добавлены веса BDH, полученные через пластичность из Aura"
214
+ )
 
 
215
 
216
+ print("✅ Веса загружены в Andrewstivan/AURA/bdh_plasticity/")
217
+ print(" - bdh_plasticity.safetensors")
218
+ print(" - bdh_config.json")
219
+ else:
220
+ print("⚠️ HF_TOKEN не найден! Файлы сохранены локально в папке 'bdh_weights_upload'")
221
+ print(" Загрузите их вручную в репозиторий Andrewstivan/AURA")
222
+
223
+ print("\n🎉 ГОТОВО!")