Andrewstivan commited on
Commit
2fa8bd2
·
verified ·
1 Parent(s): 77b8172

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -226
app.py CHANGED
@@ -1,11 +1,13 @@
1
  # ============================================================================
2
- # ЧЕСТНЫЙ ПЕРЕНОС: 32 СЛОЯ AURA -> 6 ЦИКЛОВ BDH (БЕЗ УСРЕДНЕНИЯ)
3
  # ============================================================================
4
- import json, torch, os, gc, sys
5
- from tqdm import tqdm
6
- from safetensors.torch import save_file, safe_open
7
- from huggingface_hub import hf_hub_download, HfApi
8
- # ВРЕМЕННОЕ РЕШЕНИЕ: переопределяем конфиг прямо в app.py
 
 
9
  import dataclasses
10
 
11
  @dataclasses.dataclass
@@ -27,238 +29,127 @@ class BDHConfig:
27
  forget_rate: float = 0.1
28
  use_rho_cache: bool = True
29
 
30
- # Затем используем этот конфиг вместо импорта из bdh
31
- # config = BDHConfig(n_layer=32, ...) # теперь работает!
 
32
 
33
- class Plasticity:
34
- def __init__(self, n_neurons):
35
- self.n_neurons = n_neurons
36
- self.w = torch.zeros(n_neurons, n_neurons)
37
- self.long_term_w = torch.zeros(n_neurons, n_neurons)
38
- self.lr = 0.01
39
- self.consolidation_rate = 0.01
40
- self.forget_rate = 0.1
41
- self.acc_pre = torch.zeros(n_neurons)
42
- self.acc_post = torch.zeros(n_neurons)
43
- self.threshold = 0.5
44
- self.bcm_theta = torch.zeros(n_neurons)
45
- self.lr_bcm = 0.001
46
- self.target_activity = 0.5
47
- self.step_count = 0
48
 
49
- def adapt_weights(self, weight_matrix):
50
- """Адаптирует матрицу весов через пластичность."""
51
- original_shape = weight_matrix.shape
52
-
53
- # Приводим к 2D
54
- if weight_matrix.dim() == 3:
55
- wm_2d = weight_matrix.reshape(-1, weight_matrix.shape[-1])
56
- else:
57
- wm_2d = weight_matrix
58
-
59
- # Вычисляем a_pre и a_post
60
- a_pre_raw = wm_2d.mean(dim=1)
61
- a_post_raw = wm_2d.mean(dim=0)
62
-
63
- # Приведение a_pre к n_neurons
64
- if a_pre_raw.shape[0] > self.n_neurons:
65
- a_pre = a_pre_raw[:self.n_neurons]
66
- elif a_pre_raw.shape[0] < self.n_neurons:
67
- repeat_factor = (self.n_neurons + a_pre_raw.shape[0] - 1) // a_pre_raw.shape[0]
68
- a_pre = a_pre_raw.repeat(repeat_factor)[:self.n_neurons]
69
- else:
70
- a_pre = a_pre_raw
71
-
72
- # Приведение a_post к n_neurons
73
- if a_post_raw.shape[0] > self.n_neurons:
74
- a_post = a_post_raw[:self.n_neurons]
75
- elif a_post_raw.shape[0] < self.n_neurons:
76
- repeat_factor = (self.n_neurons + a_post_raw.shape[0] - 1) // a_post_raw.shape[0]
77
- a_post = a_post_raw.repeat(repeat_factor)[:self.n_neurons]
78
- else:
79
- a_post = a_post_raw
80
-
81
- # Псевдоспайки
82
- self.acc_pre += a_pre
83
- self.acc_post += a_post
84
-
85
- spike_pre = (self.acc_pre >= self.threshold).float()
86
- spike_post = (self.acc_post >= self.threshold).float()
87
-
88
- self.acc_pre -= spike_pre * self.threshold
89
- self.acc_post -= spike_post * self.threshold
90
-
91
- # Хеббовское обновление
92
- delta = self.lr * torch.outer(spike_pre, spike_post)
93
- self.w += delta
94
-
95
- # Применяем адаптацию к ИСХОДНОЙ матрице
96
- with torch.no_grad():
97
- # БЕРЁМ СРЕЗ ОТ self.w
98
- update_slice = self.w[:wm_2d.shape[0], :wm_2d.shape[1]]
99
-
100
- # ЕСЛИ СРЕЗ МЕНЬШЕ, ЧЕМ НУЖНО, ПОВТОРЯЕМ ЕГО
101
- if update_slice.shape[0] < wm_2d.shape[0] or update_slice.shape[1] < wm_2d.shape[1]:
102
- repeat_rows = (wm_2d.shape[0] + update_slice.shape[0] - 1) // update_slice.shape[0]
103
- repeat_cols = (wm_2d.shape[1] + update_slice.shape[1] - 1) // update_slice.shape[1]
104
- update = update_slice.repeat(repeat_rows, repeat_cols)[:wm_2d.shape[0], :wm_2d.shape[1]]
105
- else:
106
- update = update_slice
107
-
108
- update = update * 0.01
109
-
110
- if weight_matrix.dim() == 3:
111
- update = update.reshape(original_shape)
112
- weight_matrix = weight_matrix + update
113
-
114
- self.step_count += 1
115
- if self.step_count % 10 == 0:
116
- self.consolidate()
117
-
118
- return weight_matrix
119
-
120
- def consolidate(self):
121
- self.long_term_w += self.consolidation_rate * self.w
122
- self.w = self.w * (1 - self.forget_rate)
123
-
124
- # ------------------------------------------------------------
125
- # Загрузка shard'ов
126
- # ------------------------------------------------------------
127
- class ShardCache:
128
- def __init__(self, repo_id, weight_map):
129
- self.repo_id = repo_id
130
- self.weight_map = weight_map
131
- self.cached_shards = {}
132
-
133
- def get_layer_weights(self, layer_idx):
134
- prefix = f"model.layers.{layer_idx}."
135
- layer_tensors = {n: s for n, s in self.weight_map.items() if n.startswith(prefix)}
136
- if not layer_tensors:
137
- return {}
138
-
139
- for shard_file in set(layer_tensors.values()):
140
- if shard_file not in self.cached_shards:
141
- shard_path = hf_hub_download(repo_id=self.repo_id, filename=shard_file)
142
- self.cached_shards[shard_file] = safe_open(shard_path, framework="pt", device="cpu")
143
-
144
- weights = {}
145
- for name, shard_file in layer_tensors.items():
146
- param_name = name.split('.')[-2]
147
- weights[param_name] = self.cached_shards[shard_file].get_tensor(name)
148
- return weights
149
-
150
- def close(self):
151
- for s in self.cached_shards.values():
152
- s.__exit__(None, None, None)
153
  # -----------------------------------------------------------------------------
154
- # ЗАГРУЗКА ВЕСОВ
155
  # -----------------------------------------------------------------------------
156
- print("=" * 60)
157
- print("🧠 ЧЕСТНЫЙ ПЕРЕНОС ВЕСОВ AURA -> BDH")
158
- print("=" * 60)
159
-
160
- repo_id = "ResplendentAI/Aura_v3_7B"
161
- index_path = hf_hub_download(repo_id=repo_id, filename="model.safetensors.index.json")
162
- with open(index_path, 'r') as f:
163
- weight_map = json.load(f)['weight_map']
164
- cache = ShardCache(repo_id, weight_map)
165
-
166
- config = BDHConfig(n_layer=32, n_embd=4096, n_head=32, mlp_internal_dim_multiplier=1, vocab_size=256, dropout=0.1)
167
- D, nh = config.n_embd, config.n_head
168
- N = config.mlp_internal_dim_multiplier * D // nh
169
-
170
- plasticity_enc = Plasticity(n_neurons=D)
171
- plasticity_enc_v = Plasticity(n_neurons=D)
172
- plasticity_dec = Plasticity(n_neurons=D)
173
-
174
- encoder = torch.zeros(nh, N, D)
175
- encoder_v = torch.zeros(nh, N, D)
176
- decoder = torch.zeros(nh * N, D)
177
-
178
- print(f"Начало: encoder={encoder.shape}, encoder_v={encoder_v.shape}, decoder={decoder.shape}")
179
-
180
- for layer_idx in tqdm(range(32), desc="Обработка слоёв"):
181
- w = cache.get_layer_weights(layer_idx)
182
- if not w:
183
- continue
 
 
 
 
 
 
 
 
184
 
185
- q_proj = w['q_proj'].float()
186
- v_proj = w['v_proj'].float()
187
- o_proj = w['o_proj'].float()
188
 
189
- # --------------------------------------------------------
190
- # ИСПРАВЛЕНИЕ: Учитываем реальные размеры Aura
191
- # --------------------------------------------------------
192
-
193
- # q_proj: [4096, 4096] -> [32, 128, 4096]
194
- if q_proj.shape == (4096, 4096):
195
- q_reshaped = q_proj.reshape(nh, N, D)
196
- else:
197
- # fallback
198
- q_reshaped = q_proj[:nh*N, :D].reshape(nh, N, D)
 
 
 
 
 
 
199
 
200
- # v_proj: [1024, 4096] -> нужно адаптировать к [32, 128, 4096]
201
- # v_proj имеет 1024 строки, нам нужно 32*128 = 4096 строк
202
- if v_proj.shape == (1024, 4096):
203
- # Повторяем, чтобы получить 4096 строк
204
- v_expanded = v_proj.repeat(4, 1) # 1024 * 4 = 4096
205
- v_reshaped = v_expanded.reshape(nh, N, D)
206
- else:
207
- v_reshaped = v_proj[:nh*N, :D].reshape(nh, N, D)
208
 
209
- # o_proj: [4096, 4096] -> [4096, 4096]
210
- if o_proj.shape == (4096, 4096):
211
- o_reshaped = o_proj.reshape(nh * N, D)
212
- else:
213
- o_reshaped = o_proj[:nh*N, :D]
 
 
 
 
 
 
 
 
 
 
 
214
 
215
- # Пластичность адаптирует
216
- encoder = plasticity_enc.adapt_weights(q_reshaped)
217
- encoder_v = plasticity_enc_v.adapt_weights(v_reshaped)
218
- decoder = plasticity_dec.adapt_weights(o_reshaped)
 
 
 
 
219
 
220
- plasticity_enc.consolidate()
221
- plasticity_enc_v.consolidate()
222
- plasticity_dec.consolidate()
 
 
 
223
 
224
- bdh_weights = {
225
- 'encoder': encoder,
226
- 'encoder_v': encoder_v,
227
- 'decoder': decoder,
228
- 'lm_head': torch.randn(D, 256) * 0.02
229
- }
230
 
231
- # ------------------------------------------------------------
232
- # СОХРАНЕНИЕ
233
- # ------------------------------------------------------------
234
- os.makedirs("bdh_weights_upload", exist_ok=True)
235
- bdh_weights_safe = {k: v.cpu() for k, v in bdh_weights.items()}
236
- save_file(bdh_weights_safe, "bdh_weights_upload/bdh_plasticity.safetensors")
237
 
238
- config_dict = {
239
- "model_type": "bdh",
240
- "n_layer": config.n_layer,
241
- "n_embd": config.n_embd,
242
- "n_head": config.n_head,
243
- "mlp_internal_dim_multiplier": config.mlp_internal_dim_multiplier,
244
- "vocab_size": config.vocab_size,
245
- "dropout": config.dropout
246
- }
247
- with open("bdh_weights_upload/bdh_config.json", "w") as f:
248
- json.dump(config_dict, f, indent=2)
249
 
250
- token = os.environ.get('HF_TOKEN')
251
- if token:
252
- api = HfApi(token=token)
253
- api.upload_folder(
254
- folder_path="bdh_weights_upload",
255
- repo_id="Andrewstivan/AURA",
256
- repo_type="model",
257
- path_in_repo="bdh_plasticity",
258
- commit_message="🧠 Честный перенос весов Aura в BDH (исправлены размеры v_proj)"
259
- )
260
- print("✅ Веса загружены в Andrewstivan/AURA/bdh_plasticity/")
261
- else:
262
- print("⚠️ HF_TOKEN не найден! Файлы сохранены локально.")
263
 
264
- print("\n🎉 ГОТОВО!")
 
 
 
1
  # ============================================================================
2
+ # ТЕСТИРОВАНИЕ ЧЕСТНОЙ BDH
3
  # ============================================================================
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from safetensors.torch import load_file
7
+ from huggingface_hub import hf_hub_download
8
+ import json
9
+ import sys
10
+ import numpy as np
11
  import dataclasses
12
 
13
  @dataclasses.dataclass
 
29
  forget_rate: float = 0.1
30
  use_rho_cache: bool = True
31
 
32
+ print("=" * 70)
33
+ print("🧪 ТЕСТИРОВАНИЕ ЧЕСТНОЙ BDH (32 слоя, 4096, без усреднения)")
34
+ print("=" * 70)
35
 
36
+ device = "cuda" if torch.cuda.is_available() else "cpu"
37
+ print(f"🖥️ Устройство: {device}")
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # -----------------------------------------------------------------------------
40
+ # 1. ЗАГРУЗКА МОДЕЛИ
41
  # -----------------------------------------------------------------------------
42
+ config_path = hf_hub_download(
43
+ repo_id="Andrewstivan/AURA",
44
+ filename="bdh_plasticity/bdh_config.json",
45
+ repo_type="model"
46
+ )
47
+ with open(config_path, 'r') as f:
48
+ config_dict = json.load(f)
49
+
50
+ config = BDHConfig(
51
+ n_layer=config_dict['n_layer'],
52
+ n_embd=config_dict['n_embd'],
53
+ n_head=config_dict['n_head'],
54
+ mlp_internal_dim_multiplier=config_dict['mlp_internal_dim_multiplier'],
55
+ vocab_size=config_dict['vocab_size'],
56
+ dropout=config_dict['dropout'],
57
+ use_plasticity=True, # ← Включаем!
58
+ )
59
+
60
+ bdh_model = BDH(config).to(device)
61
+
62
+ weights_path = hf_hub_download(
63
+ repo_id="Andrewstivan/AURA",
64
+ filename="bdh_plasticity/bdh_plasticity.safetensors",
65
+ repo_type="model"
66
+ )
67
+ weights = load_file(weights_path)
68
+
69
+ with torch.no_grad():
70
+ bdh_model.encoder.weight_fp32.data = weights['encoder'].to(device)
71
+ bdh_model.encoder_v.weight_fp32.data = weights['encoder_v'].to(device)
72
+ bdh_model.decoder.weight_fp32.data = weights['decoder'].to(device)
73
+
74
+ # Обновляем тернарные веса
75
+ bdh_model.encoder.update_ternary_weights()
76
+ bdh_model.encoder_v.update_ternary_weights()
77
+ bdh_model.decoder.update_ternary_weights()
78
 
79
+ bdh_model.eval()
80
+ print(f"✅ Модель загружена. Параметров: {sum(p.numel() for p in bdh_model.parameters()):,}")
 
81
 
82
+ # -----------------------------------------------------------------------------
83
+ # 2. ТЕСТ 1: РАСПРЕДЕЛЕНИЕ ВЕСОВ
84
+ # -----------------------------------------------------------------------------
85
+ print("\n" + "=" * 70)
86
+ print("📊 ТЕСТ 1: РАСПРЕДЕЛЕНИЕ ТЕРНАРНЫХ ВЕСОВ")
87
+ print("=" * 70)
88
+
89
+ for name, param in [
90
+ ('encoder', bdh_model.encoder.weight_ternary),
91
+ ('encoder_v', bdh_model.encoder_v.weight_ternary),
92
+ ('decoder', bdh_model.decoder.weight_ternary),
93
+ ]:
94
+ total = param.numel()
95
+ minus1 = (param == -1).sum().item()
96
+ zero = (param == 0).sum().item()
97
+ plus1 = (param == 1).sum().item()
98
 
99
+ p = np.array([minus1, zero, plus1]) / total
100
+ p = p[p > 0]
101
+ entropy = -np.sum(p * np.log2(p))
102
+ max_entropy = np.log2(3)
 
 
 
 
103
 
104
+ print(f"\n{name}:")
105
+ print(f" -1: {minus1/total:.2%}")
106
+ print(f" 0: {zero/total:.2%}")
107
+ print(f" +1: {plus1/total:.2%}")
108
+ print(f" Энтропия: {entropy:.4f} / {max_entropy:.4f} бит")
109
+ print(f" Использование: {entropy/max_entropy:.1%}")
110
+
111
+ # -----------------------------------------------------------------------------
112
+ # 3. ТЕСТ 2: ЖИВУЧЕСТЬ
113
+ # -----------------------------------------------------------------------------
114
+ print("\n" + "=" * 70)
115
+ print("📊 ТЕСТ 2: ЖИВУЧЕСТЬ МОДЕЛИ")
116
+ print("=" * 70)
117
+
118
+ def text_to_bytes(text):
119
+ return torch.tensor(list(text.encode('utf-8')), dtype=torch.long).unsqueeze(0).to(device)
120
 
121
+ for text in ["Hello world", "The quick brown fox", "A" * 100]:
122
+ bytes_tensor = text_to_bytes(text)
123
+ try:
124
+ with torch.no_grad():
125
+ logits, states = bdh_model.forward_with_states(bytes_tensor)
126
+ print(f"✅ '{text[:30]}...' -> OK (вход: {bytes_tensor.shape[1]}, выход: {logits.shape})")
127
+ except Exception as e:
128
+ print(f"❌ '{text[:30]}...' -> {str(e)[:60]}")
129
 
130
+ # -----------------------------------------------------------------------------
131
+ # 4. ТЕСТ 3: СТАТИСТИКА АКТИВАЦИЙ
132
+ # -----------------------------------------------------------------------------
133
+ print("\n" + "=" * 70)
134
+ print("📊 ТЕСТ 3: СТАТИСТИКА АКТИВАЦИЙ")
135
+ print("=" * 70)
136
 
137
+ text = "The quick brown fox jumps over the lazy dog"
138
+ bytes_tensor = text_to_bytes(text)
 
 
 
 
139
 
140
+ with torch.no_grad():
141
+ _, states = bdh_model.forward_with_states(bytes_tensor)
 
 
 
 
142
 
143
+ print(f"\nАктивации по слоям (всего {len(states)} слоёв):")
144
+ print("-" * 60)
145
+ print(f"{'Слой':<6} {'Mean':<10} {'Std':<10} {'Norm':<12} {'Sparsity':<10}")
146
+ print("-" * 60)
 
 
 
 
 
 
 
147
 
148
+ for i, s in enumerate(states):
149
+ s_flat = s.squeeze(0)
150
+ sparsity = (s_flat.abs() < 1e-4).float().mean().item()
151
+ print(f"{i:<6} {s_flat.mean():<10.4f} {s_flat.std():<10.4f} {torch.norm(s_flat):<12.2f} {sparsity:<10.2%}")
 
 
 
 
 
 
 
 
 
152
 
153
+ print("\n" + "=" * 70)
154
+ print("🎉 ТЕСТИРОВАНИЕ ЗАВЕРШЕНО!")
155
+ print("=" * 70)