Zandy-Wandy commited on
Commit
1ea8a03
·
verified ·
1 Parent(s): 8d18b7c

Upload Zenith-7B model

Browse files
__pycache__/modeling_zenith.cpython-313.pyc ADDED
Binary file (27.1 kB). View file
 
configs/zenith_config.py CHANGED
@@ -32,11 +32,18 @@ class ZenithConfig:
32
 
33
  # EQ Adapter configuration
34
  use_eq_adapter: bool = True
35
- eq_adapter_hidden_dim: int = 512
36
  eq_num_emotions: int = 8
37
  eq_frustration_dim: int = 256
38
  eq_dropout: float = 0.1
39
 
 
 
 
 
 
 
 
40
  # Normalization & dropout
41
  rms_norm_eps: float = 1e-6
42
  dropout: float = 0.0
 
32
 
33
  # EQ Adapter configuration
34
  use_eq_adapter: bool = True
35
+ eq_adapter_hidden_size: int = 512
36
  eq_num_emotions: int = 8
37
  eq_frustration_dim: int = 256
38
  eq_dropout: float = 0.1
39
 
40
+ # EQ Engine advanced features
41
+ use_eq_attention_bias: bool = False
42
+ use_eq_gated_ffn: bool = False
43
+ use_eq_recurrence: bool = False
44
+ eq_consistency_weight: float = 0.02
45
+ eq_state_dim: int = 256 # Dimension of recurrent EQ state
46
+
47
  # Normalization & dropout
48
  rms_norm_eps: float = 1e-6
49
  dropout: float = 0.0
hf_model_card.md CHANGED
@@ -15,11 +15,11 @@ tags:
15
  datasets:
16
  - open-thoughts/OpenThoughts3-1.2M
17
  model-index:
18
- - name: Zenith-7B
19
  results: []
20
  ---
21
 
22
- # Zenith-7B
23
 
24
  **Production-ready 7B parameter model with code generation, reasoning, and emotional intelligence.**
25
 
 
15
  datasets:
16
  - open-thoughts/OpenThoughts3-1.2M
17
  model-index:
18
+ - name: Zenith-7B-V1
19
  results: []
20
  ---
21
 
22
+ # Zenith-7B-V1
23
 
24
  **Production-ready 7B parameter model with code generation, reasoning, and emotional intelligence.**
25
 
modeling_zenith.py CHANGED
@@ -16,6 +16,7 @@ Zenith features:
16
  import torch
17
  import torch.nn as nn
18
  import torch.nn.functional as F
 
19
  from typing import Optional, Tuple, List, Dict, Any
20
 
21
  from transformers import PreTrainedModel, PretrainedConfig
@@ -176,7 +177,7 @@ class MoELayer(nn.Module):
176
 
177
 
178
  class EQAdapter(nn.Module):
179
- """Emotional Intelligence Adapter."""
180
 
181
  def __init__(self, config: ZenithConfig):
182
  super().__init__()
@@ -197,7 +198,54 @@ class EQAdapter(nn.Module):
197
  nn.Linear(config.eq_adapter_hidden_size, 8)
198
  )
199
 
200
- def forward(self, hidden_states: torch.Tensor):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  # Pool over sequence dimension
202
  pooled = hidden_states.mean(dim=1)
203
 
@@ -207,7 +255,30 @@ class EQAdapter(nn.Module):
207
  # Emotion logits
208
  emotion_logits = self.emotion_classifier(pooled)
209
 
210
- return frustration, emotion_logits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
 
213
  class ZenithLayer(nn.Module):
@@ -220,26 +291,37 @@ class ZenithLayer(nn.Module):
220
 
221
  # Determine if this layer uses MoE
222
  self.use_moe = (
223
- config.num_experts > 0 and
224
  (not config.moe_layers or layer_idx in config.moe_layers)
225
  )
226
 
227
- # Self attention
228
- self.self_attn = nn.MultiheadAttention(
229
- embed_dim=config.hidden_size,
230
- num_heads=config.num_heads,
231
- batch_first=True
232
- )
 
 
233
 
234
  # MoE or dense feed-forward
235
  if self.use_moe:
236
  self.mlp = MoELayer(config)
237
  else:
238
- self.mlp = nn.Sequential(
239
- nn.Linear(config.hidden_size, config.intermediate_size),
240
- nn.SiLU(),
241
- nn.Linear(config.intermediate_size, config.hidden_size)
242
- )
 
 
 
 
 
 
 
 
 
243
 
244
  # Layer norm
245
  self.norm1 = nn.LayerNorm(config.hidden_size)
@@ -248,20 +330,110 @@ class ZenithLayer(nn.Module):
248
  # Dropout
249
  self.dropout = nn.Dropout(0.1)
250
 
 
 
 
 
 
 
251
  def forward(
252
- self,
253
  hidden_states: torch.Tensor,
254
  attention_mask: Optional[torch.Tensor] = None,
255
- output_attentions: bool = False
256
- ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  # Self attention with residual
258
  residual = hidden_states
259
  hidden_states = self.norm1(hidden_states)
260
 
261
- attn_output, attn_weights = self.self_attn(
262
- hidden_states, hidden_states, hidden_states,
263
- attn_mask=attention_mask
264
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  hidden_states = residual + self.dropout(attn_output)
266
 
267
  # Feed-forward with residual
@@ -271,12 +443,21 @@ class ZenithLayer(nn.Module):
271
  if self.use_moe:
272
  mlp_output, moe_loss = self.mlp(hidden_states)
273
  else:
274
- mlp_output = self.mlp(hidden_states)
 
 
 
 
 
 
 
 
 
275
  moe_loss = None
276
 
277
  hidden_states = residual + self.dropout(mlp_output)
278
 
279
- return hidden_states, attn_weights, moe_loss
280
 
281
 
282
  class ZenithPreTrainedModel(PreTrainedModel):
@@ -368,6 +549,11 @@ class ZenithModel(ZenithPreTrainedModel):
368
  all_hidden_states = () if output_hidden_states else None
369
  all_self_attns = () if output_attentions else None
370
  all_moe_losses = []
 
 
 
 
 
371
 
372
  for layer in self.layers:
373
  if output_hidden_states:
@@ -376,11 +562,24 @@ class ZenithModel(ZenithPreTrainedModel):
376
  layer_outputs = layer(
377
  hidden_states,
378
  attention_mask=attention_mask,
379
- output_attentions=output_attentions
 
380
  )
381
 
382
  hidden_states = layer_outputs[0]
383
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  if output_attentions:
385
  all_self_attns = all_self_attns + (layer_outputs[1],)
386
 
@@ -410,9 +609,8 @@ class ZenithModel(ZenithPreTrainedModel):
410
  if all_moe_losses:
411
  loss += torch.stack(all_moe_losses).mean()
412
 
413
- if self.eq_adapter is not None:
414
- # EQ loss would be computed here if emotion/frustration labels available
415
- pass
416
 
417
  if not return_dict:
418
  output = (logits,) + all_hidden_states + all_self_attns
 
16
  import torch
17
  import torch.nn as nn
18
  import torch.nn.functional as F
19
+ import math
20
  from typing import Optional, Tuple, List, Dict, Any
21
 
22
  from transformers import PreTrainedModel, PretrainedConfig
 
177
 
178
 
179
  class EQAdapter(nn.Module):
180
+ """Enhanced Emotional Intelligence Adapter with recurrent state and core architecture integration."""
181
 
182
  def __init__(self, config: ZenithConfig):
183
  super().__init__()
 
198
  nn.Linear(config.eq_adapter_hidden_size, 8)
199
  )
200
 
201
+ # Recurrent EQ state (GRU) for layer-to-layer consistency
202
+ if config.use_eq_recurrence:
203
+ self.eq_gru = nn.GRUCell(
204
+ input_size=config.eq_adapter_hidden_size,
205
+ hidden_size=config.eq_state_dim
206
+ )
207
+ # Projection to generate initial state from pooled features
208
+ self.state_projection = nn.Linear(config.hidden_size, config.eq_state_dim)
209
+ # Projection to reduce pooled features to GRU input size
210
+ self.gru_input_proj = nn.Linear(config.hidden_size, config.eq_adapter_hidden_size)
211
+ else:
212
+ self.eq_gru = None
213
+ self.state_projection = None
214
+ self.gru_input_proj = None
215
+
216
+ # EQ state to attention bias (scalar per head)
217
+ if config.use_eq_attention_bias:
218
+ self.attn_bias_proj = nn.Linear(
219
+ config.eq_state_dim if config.use_eq_recurrence else config.eq_adapter_hidden_size,
220
+ config.num_heads,
221
+ bias=False
222
+ )
223
+ else:
224
+ self.attn_bias_proj = None
225
+
226
+ # EQ state to FFN gate
227
+ if config.use_eq_gated_ffn:
228
+ self.ffn_gate_proj = nn.Linear(
229
+ config.eq_state_dim if config.use_eq_recurrence else config.eq_adapter_hidden_size,
230
+ config.intermediate_size,
231
+ bias=False
232
+ )
233
+ else:
234
+ self.ffn_gate_proj = None
235
+
236
+ def forward(self, hidden_states: torch.Tensor, prev_eq_state: Optional[torch.Tensor] = None):
237
+ """
238
+ Args:
239
+ hidden_states: [batch, seq_len, hidden_size]
240
+ prev_eq_state: [batch, eq_state_dim] previous EQ state (for recurrence)
241
+
242
+ Returns:
243
+ frustration: [batch, 1]
244
+ emotion_logits: [batch, 8]
245
+ eq_state: [batch, eq_state_dim] updated EQ state
246
+ attn_bias: [batch, num_heads, head_dim] or None
247
+ ffn_gate: [batch, d_ff] or None
248
+ """
249
  # Pool over sequence dimension
250
  pooled = hidden_states.mean(dim=1)
251
 
 
255
  # Emotion logits
256
  emotion_logits = self.emotion_classifier(pooled)
257
 
258
+ # Compute EQ state
259
+ if self.config.use_eq_recurrence and self.eq_gru is not None:
260
+ # Project pooled features to GRU input size
261
+ gru_input = torch.tanh(self.gru_input_proj(pooled))
262
+ if prev_eq_state is None:
263
+ # Initialize state from projection
264
+ eq_state = torch.tanh(self.state_projection(pooled))
265
+ else:
266
+ eq_state = self.eq_gru(gru_input, prev_eq_state)
267
+ else:
268
+ # No recurrence, use pooled features directly
269
+ eq_state = torch.tanh(pooled)
270
+
271
+ # Compute attention bias if enabled
272
+ attn_bias = None
273
+ if self.attn_bias_proj is not None:
274
+ attn_bias = self.attn_bias_proj(eq_state) # [batch, num_heads]
275
+
276
+ # Compute FFN gate if enabled
277
+ ffn_gate = None
278
+ if self.ffn_gate_proj is not None:
279
+ ffn_gate = torch.sigmoid(self.ffn_gate_proj(eq_state))
280
+
281
+ return frustration, emotion_logits, eq_state, attn_bias, ffn_gate
282
 
283
 
284
  class ZenithLayer(nn.Module):
 
291
 
292
  # Determine if this layer uses MoE
293
  self.use_moe = (
294
+ config.num_experts > 0 and
295
  (not config.moe_layers or layer_idx in config.moe_layers)
296
  )
297
 
298
+ # Self attention projections
299
+ self.q_proj = nn.Linear(config.hidden_size, config.hidden_size)
300
+ self.k_proj = nn.Linear(config.hidden_size, config.hidden_size)
301
+ self.v_proj = nn.Linear(config.hidden_size, config.hidden_size)
302
+ self.out_proj = nn.Linear(config.hidden_size, config.hidden_size)
303
+
304
+ # Attention dropout
305
+ self.attn_dropout = nn.Dropout(0.1)
306
 
307
  # MoE or dense feed-forward
308
  if self.use_moe:
309
  self.mlp = MoELayer(config)
310
  else:
311
+ if config.use_eq_gated_ffn:
312
+ # Gated MLP: gate applied to intermediate representation
313
+ self.mlp = nn.Sequential(
314
+ nn.Linear(config.hidden_size, config.intermediate_size),
315
+ nn.SiLU(),
316
+ )
317
+ self.gate_proj = nn.Linear(config.intermediate_size, config.intermediate_size)
318
+ self.out_proj_mlp = nn.Linear(config.intermediate_size, config.hidden_size)
319
+ else:
320
+ self.mlp = nn.Sequential(
321
+ nn.Linear(config.hidden_size, config.intermediate_size),
322
+ nn.SiLU(),
323
+ nn.Linear(config.intermediate_size, config.hidden_size)
324
+ )
325
 
326
  # Layer norm
327
  self.norm1 = nn.LayerNorm(config.hidden_size)
 
330
  # Dropout
331
  self.dropout = nn.Dropout(0.1)
332
 
333
+ # EQ adapter (if enabled)
334
+ if config.use_eq_adapter:
335
+ self.eq_adapter = EQAdapter(config)
336
+ else:
337
+ self.eq_adapter = None
338
+
339
  def forward(
340
+ self,
341
  hidden_states: torch.Tensor,
342
  attention_mask: Optional[torch.Tensor] = None,
343
+ output_attentions: bool = False,
344
+ prev_eq_state: Optional[torch.Tensor] = None
345
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
346
+ """
347
+ Args:
348
+ hidden_states: [batch, seq_len, hidden_size]
349
+ attention_mask: attention mask
350
+ output_attentions: whether to output attention weights
351
+ prev_eq_state: [batch, eq_state_dim] previous EQ state from previous layer
352
+
353
+ Returns:
354
+ hidden_states: [batch, seq_len, hidden_size]
355
+ attn_weights: [batch, num_heads, seq_len, seq_len] or None
356
+ moe_loss: scalar or None
357
+ eq_state: [batch, eq_state_dim] or None
358
+ consistency_loss: scalar or None
359
+ """
360
+ # Process EQ adapter if enabled
361
+ eq_state = None
362
+ attn_bias = None
363
+ ffn_gate = None
364
+ consistency_loss = None
365
+
366
+ if self.eq_adapter is not None:
367
+ frustration, emotion_logits, eq_state, attn_bias, ffn_gate = self.eq_adapter(
368
+ hidden_states, prev_eq_state
369
+ )
370
+
371
+ # Compute consistency loss if recurrence enabled and we have previous state
372
+ if self.config.use_eq_recurrence and prev_eq_state is not None:
373
+ consistency_loss = F.mse_loss(eq_state, prev_eq_state.detach())
374
+
375
  # Self attention with residual
376
  residual = hidden_states
377
  hidden_states = self.norm1(hidden_states)
378
 
379
+ # Apply attention bias if enabled (before softmax)
380
+ if attn_bias is not None:
381
+ batch_size, seq_len, _ = hidden_states.shape
382
+
383
+ # Compute Q, K, V from normalized hidden states
384
+ q = self.q_proj(hidden_states) # [batch, seq_len, hidden_size]
385
+ k = self.k_proj(hidden_states)
386
+ v = self.v_proj(hidden_states)
387
+
388
+ # Reshape to multi-head: [batch, seq_len, num_heads, head_dim]
389
+ q = q.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
390
+ k = k.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
391
+ v = v.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
392
+
393
+ # Compute attention scores
394
+ attn_scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.config.head_dim)
395
+
396
+ # Add bias: [batch, num_heads] -> [batch, num_heads, 1, 1] -> broadcast to all positions
397
+ attn_scores = attn_scores + attn_bias.unsqueeze(-1).unsqueeze(-1)
398
+
399
+ # Apply attention mask if provided
400
+ if attention_mask is not None:
401
+ attn_scores = attn_scores + attention_mask
402
+
403
+ # Softmax and dropout
404
+ attn_weights = F.softmax(attn_scores, dim=-1)
405
+ attn_weights = self.attn_dropout(attn_weights)
406
+
407
+ # Apply to values
408
+ attn_output = torch.matmul(attn_weights, v)
409
+ attn_output = attn_output.transpose(1, 2).contiguous().view(
410
+ batch_size, seq_len, self.config.hidden_size
411
+ )
412
+ attn_output = self.out_proj(attn_output)
413
+ else:
414
+ # Standard attention using manual projections
415
+ batch_size, seq_len, _ = hidden_states.shape
416
+
417
+ q = self.q_proj(hidden_states)
418
+ k = self.k_proj(hidden_states)
419
+ v = self.v_proj(hidden_states)
420
+
421
+ q = q.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
422
+ k = k.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
423
+ v = v.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
424
+
425
+ attn_output, attn_weights = F.scaled_dot_product_attention(
426
+ q, k, v,
427
+ attn_mask=attention_mask,
428
+ dropout_p=0.1 if self.training else 0.0,
429
+ is_causal=True
430
+ )
431
+
432
+ attn_output = attn_output.transpose(1, 2).contiguous().view(
433
+ batch_size, seq_len, self.config.hidden_size
434
+ )
435
+ attn_output = self.out_proj(attn_output)
436
+
437
  hidden_states = residual + self.dropout(attn_output)
438
 
439
  # Feed-forward with residual
 
443
  if self.use_moe:
444
  mlp_output, moe_loss = self.mlp(hidden_states)
445
  else:
446
+ if self.config.use_eq_gated_ffn:
447
+ # Apply first part of MLP
448
+ intermediate = self.mlp(hidden_states) # [batch, seq_len, intermediate_size]
449
+ # Apply gate to intermediate representation
450
+ ffn_gate_expanded = ffn_gate.unsqueeze(1).expand(-1, intermediate.size(1), -1)
451
+ gated_intermediate = intermediate * ffn_gate_expanded
452
+ # Apply output projection
453
+ mlp_output = self.out_proj_mlp(gated_intermediate)
454
+ else:
455
+ mlp_output = self.mlp(hidden_states)
456
  moe_loss = None
457
 
458
  hidden_states = residual + self.dropout(mlp_output)
459
 
460
+ return hidden_states, attn_weights, moe_loss, eq_state, consistency_loss
461
 
462
 
463
  class ZenithPreTrainedModel(PreTrainedModel):
 
549
  all_hidden_states = () if output_hidden_states else None
550
  all_self_attns = () if output_attentions else None
551
  all_moe_losses = []
552
+ all_eq_states = [] if self.config.use_eq_adapter else None
553
+ all_consistency_losses = [] if (self.config.use_eq_adapter and self.config.use_eq_recurrence) else None
554
+
555
+ # Initialize recurrent EQ state
556
+ prev_eq_state = None
557
 
558
  for layer in self.layers:
559
  if output_hidden_states:
 
562
  layer_outputs = layer(
563
  hidden_states,
564
  attention_mask=attention_mask,
565
+ output_attentions=output_attentions,
566
+ prev_eq_state=prev_eq_state
567
  )
568
 
569
  hidden_states = layer_outputs[0]
570
 
571
+ # Extract EQ state and consistency loss from layer outputs
572
+ if self.config.use_eq_adapter:
573
+ eq_state = layer_outputs[3] if len(layer_outputs) > 3 else None
574
+ consistency_loss = layer_outputs[4] if len(layer_outputs) > 4 else None
575
+
576
+ if eq_state is not None:
577
+ all_eq_states.append(eq_state)
578
+ prev_eq_state = eq_state # Pass to next layer
579
+
580
+ if consistency_loss is not None:
581
+ all_consistency_losses.append(consistency_loss)
582
+
583
  if output_attentions:
584
  all_self_attns = all_self_attns + (layer_outputs[1],)
585
 
 
609
  if all_moe_losses:
610
  loss += torch.stack(all_moe_losses).mean()
611
 
612
+ if self.eq_adapter is not None and all_consistency_losses:
613
+ loss += self.config.eq_consistency_weight * torch.stack(all_consistency_losses).mean()
 
614
 
615
  if not return_dict:
616
  output = (logits,) + all_hidden_states + all_self_attns
push_to_hf.py CHANGED
@@ -3,22 +3,24 @@
3
  Push Zenith-7B model to Hugging Face Hub.
4
 
5
  Usage:
6
- python push_to_hf.py --repo_id Matrix-Corp/Zenith-7b --token YOUR_TOKEN
7
  """
8
 
9
  import argparse
10
  import os
 
11
  from pathlib import Path
12
- from huggingface_hub import HfApi, login
 
13
 
14
 
15
- def push_model(repo_id: str, token: str = None, folder_path: str = "."):
16
- """Push model files to Hugging Face Hub."""
17
  folder_path = Path(folder_path).resolve()
18
-
19
  if not folder_path.exists():
20
  raise ValueError(f"Folder not found: {folder_path}")
21
-
22
  # Check required files
23
  required_files = [
24
  "modeling_zenith.py",
@@ -31,36 +33,96 @@ def push_model(repo_id: str, token: str = None, folder_path: str = "."):
31
  "finetune_qwen.py",
32
  "Modelfile"
33
  ]
34
-
35
  missing = [f for f in required_files if not (folder_path / f).exists()]
36
  if missing:
37
- print(f"Warning: Missing files: {missing}")
38
  response = input("Continue anyway? (y/N): ")
39
  if response.lower() != 'y':
40
  return
41
-
42
- # Login
43
- if token:
44
- login(token=token)
45
- else:
46
- print("No token provided. Please login:")
47
- login()
48
-
49
- # Upload
50
- print(f"Uploading {folder_path} to {repo_id}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  api = HfApi()
52
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  try:
54
  api.upload_folder(
55
  folder_path=str(folder_path),
56
  repo_id=repo_id,
57
  repo_type="model",
58
- commit_message="Upload Zenith-7B model"
59
  )
60
- print(f"✅ Successfully uploaded to https://huggingface.co/{repo_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  except Exception as e:
62
- print(f"❌ Error: {e}")
63
- raise
 
 
 
 
64
 
65
 
66
  def main():
@@ -68,7 +130,7 @@ def main():
68
  parser.add_argument(
69
  "--repo_id",
70
  type=str,
71
- default="Matrix-Corp/Zenith-7b",
72
  help="Hugging Face repository ID (username/model-name)"
73
  )
74
  parser.add_argument(
@@ -82,19 +144,15 @@ def main():
82
  default=".",
83
  help="Folder containing model files (default: current directory)"
84
  )
85
-
 
 
 
 
 
86
  args = parser.parse_args()
87
-
88
- # Verify we're in the right folder
89
- current_dir = Path.cwd()
90
- if "V1" not in current_dir.parts or "7B" not in current_dir.parts:
91
- print("Warning: Not in V1/7B directory. Make sure you're in Zenith/V1/7B")
92
- response = input("Continue? (y/N): ")
93
- if response.lower() != 'y':
94
- return
95
-
96
- push_model(args.repo_id, args.token, args.folder)
97
 
98
 
99
  if __name__ == "__main__":
100
- main()
 
3
  Push Zenith-7B model to Hugging Face Hub.
4
 
5
  Usage:
6
+ python push_to_hf.py --repo_id Matrix-Corp/Zenith-7b-V1 --token YOUR_TOKEN
7
  """
8
 
9
  import argparse
10
  import os
11
+ import sys
12
  from pathlib import Path
13
+ from huggingface_hub import HfApi, login, create_repo, whoami
14
+ from huggingface_hub.utils import RepositoryNotFoundError, HfHubHTTPError
15
 
16
 
17
+ def push_model(repo_id: str, token: str = None, folder_path: str = ".", private: bool = False):
18
+ """Push model files to Hugging Face Hub with robust error handling."""
19
  folder_path = Path(folder_path).resolve()
20
+
21
  if not folder_path.exists():
22
  raise ValueError(f"Folder not found: {folder_path}")
23
+
24
  # Check required files
25
  required_files = [
26
  "modeling_zenith.py",
 
33
  "finetune_qwen.py",
34
  "Modelfile"
35
  ]
36
+
37
  missing = [f for f in required_files if not (folder_path / f).exists()]
38
  if missing:
39
+ print(f"⚠️ Warning: Missing files: {missing}")
40
  response = input("Continue anyway? (y/N): ")
41
  if response.lower() != 'y':
42
  return
43
+
44
+ # Authenticate
45
+ try:
46
+ if token:
47
+ login(token=token)
48
+ print(" Logged in with provided token")
49
+ else:
50
+ # Check if already logged in
51
+ try:
52
+ user = whoami()
53
+ print(f"✓ Already logged in as: {user['name']}")
54
+ except:
55
+ print("Please login to Hugging Face:")
56
+ login()
57
+ except Exception as e:
58
+ print(f"❌ Authentication failed: {e}")
59
+ print("\nTo get a token:")
60
+ print("1. Go to https://huggingface.co/settings/tokens")
61
+ print("2. Create a new token with 'write' permissions")
62
+ print("3. Run: python push_to_hf.py --token YOUR_TOKEN")
63
+ return
64
+
65
+ # Create API client
66
  api = HfApi()
67
+
68
+ # Check if repo exists, create if not
69
+ try:
70
+ repo_info = api.repo_info(repo_id=repo_id, repo_type="model")
71
+ print(f"✓ Repository exists: {repo_id}")
72
+ except RepositoryNotFoundError:
73
+ print(f"📝 Repository not found. Creating: {repo_id}")
74
+ try:
75
+ create_repo(
76
+ repo_id=repo_id,
77
+ token=token,
78
+ repo_type="model",
79
+ private=private,
80
+ exist_ok=True
81
+ )
82
+ print(f"✓ Repository created")
83
+ except Exception as e:
84
+ print(f"❌ Failed to create repository: {e}")
85
+ return
86
+ except Exception as e:
87
+ print(f"⚠️ Warning: Could not check repository: {e}")
88
+
89
+ # Upload
90
+ print(f"\n📤 Uploading {folder_path} to {repo_id}...")
91
+ print("This may take a while depending on file sizes...\n")
92
+
93
  try:
94
  api.upload_folder(
95
  folder_path=str(folder_path),
96
  repo_id=repo_id,
97
  repo_type="model",
98
+ commit_message=f"Upload Zenith-7B model"
99
  )
100
+ print(f"\n✅ Successfully uploaded to https://huggingface.co/{repo_id}")
101
+ print("\nNext steps:")
102
+ print("1. Visit your model page")
103
+ print("2. Add a model card if needed")
104
+ print("3. Test: from transformers import AutoModel; AutoModel.from_pretrained('your-repo-id')")
105
+ except HfHubHTTPError as e:
106
+ if e.response.status_code == 401:
107
+ print(f"\n❌ Unauthorized: Invalid token or no write access")
108
+ print(" Make sure you:")
109
+ print(" - Have a valid token with 'write' permissions")
110
+ print(" - Own the organization/repository or have collaborator rights")
111
+ elif e.response.status_code == 403:
112
+ print(f"\n❌ Forbidden: You don't have permission to push to this repository")
113
+ print(" Make sure you're a member of the organization with write access")
114
+ elif e.response.status_code == 404:
115
+ print(f"\n❌ Repository not found: {repo_id}")
116
+ print(" Check the repository ID is correct")
117
+ else:
118
+ print(f"\n❌ HTTP Error {e.response.status_code}: {e}")
119
  except Exception as e:
120
+ print(f"\nUpload failed: {e}")
121
+ print("\nTroubleshooting:")
122
+ print("1. Check your internet connection")
123
+ print("2. Verify you have enough disk space")
124
+ print("3. Try logging in again: huggingface-cli login")
125
+ print("4. Check Hugging Face status: https://status.huggingface.co")
126
 
127
 
128
  def main():
 
130
  parser.add_argument(
131
  "--repo_id",
132
  type=str,
133
+ default="Matrix-Corp/Zenith-7b-V1",
134
  help="Hugging Face repository ID (username/model-name)"
135
  )
136
  parser.add_argument(
 
144
  default=".",
145
  help="Folder containing model files (default: current directory)"
146
  )
147
+ parser.add_argument(
148
+ "--private",
149
+ action="store_true",
150
+ help="Create repository as private (default: public)"
151
+ )
152
+
153
  args = parser.parse_args()
154
+ push_model(args.repo_id, args.token, args.folder, args.private)
 
 
 
 
 
 
 
 
 
155
 
156
 
157
  if __name__ == "__main__":
158
+ main()
test_all_models_eq.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Test EQ engine implementation for all Zenith models."""
3
+
4
+ import sys
5
+ import torch
6
+
7
+ def test_model(model_name, config_module, model_module):
8
+ """Test a specific model configuration."""
9
+ print(f"\n{'='*60}")
10
+ print(f"Testing {model_name}...")
11
+ print(f"{'='*60}")
12
+
13
+ try:
14
+ # Create config with all EQ features enabled
15
+ config = config_module.ZenithConfig(
16
+ use_eq_adapter=True,
17
+ use_eq_attention_bias=True,
18
+ use_eq_gated_ffn=True,
19
+ use_eq_recurrence=True,
20
+ eq_consistency_weight=0.02,
21
+ eq_state_dim=256,
22
+ num_layers=2, # Small for testing
23
+ hidden_size=512 if hasattr(config_module.ZenithConfig, 'hidden_size') else 3072,
24
+ num_heads=8,
25
+ head_dim=64,
26
+ intermediate_size=2048 if hasattr(config_module.ZenithConfig, 'intermediate_size') else 8192
27
+ )
28
+
29
+ # Create model
30
+ model = model_module.ZenithModel(config)
31
+ print(f"[OK] Model created successfully")
32
+ print(f" Parameters: {sum(p.numel() for p in model.parameters()):,}")
33
+
34
+ # Test forward pass
35
+ batch_size = 1
36
+ seq_len = 8
37
+ input_ids = torch.randint(0, config.vocab_size, (batch_size, seq_len))
38
+
39
+ # Training mode to test consistency loss
40
+ model.train()
41
+ outputs = model(input_ids=input_ids, labels=input_ids)
42
+
43
+ print(f"[OK] Forward pass successful")
44
+ print(f" Logits shape: {outputs.logits.shape}")
45
+ if outputs.loss is not None:
46
+ print(f" Loss: {outputs.loss.item():.4f}")
47
+
48
+ # Test inference mode
49
+ model.eval()
50
+ with torch.no_grad():
51
+ outputs = model(input_ids=input_ids)
52
+ print(f"[OK] Inference successful")
53
+ print(f" Logits shape: {outputs.logits.shape}")
54
+
55
+ print(f"[SUCCESS] {model_name} EQ Engine is FULLY FUNCTIONAL")
56
+ return True
57
+
58
+ except Exception as e:
59
+ print(f"[FAIL] {model_name} failed:")
60
+ print(f" Error: {type(e).__name__}: {e}")
61
+ import traceback
62
+ traceback.print_exc()
63
+ return False
64
+
65
+ def main():
66
+ print("Testing EQ Engine Implementation for All Zenith Models")
67
+ print("="*60)
68
+
69
+ results = {}
70
+
71
+ # Test 7B model
72
+ try:
73
+ from Zenith.V1_7B import configs as configs_7b
74
+ from Zenith.V1_7B import modeling_zenith as model_7b
75
+ results["7B"] = test_model("Zenith-7B", configs_7b, model_7b)
76
+ except Exception as e:
77
+ print(f"[FAIL] 7B model import error: {e}")
78
+ results["7B"] = False
79
+
80
+ # Test 28B model
81
+ try:
82
+ from Zenith.V1_Tenstorrent_Blackhole_p300_28B import configs as configs_28b
83
+ from Zenith.V1_Tenstorrent_Blackhole_p300_28B import modeling_zenith as model_28b
84
+ results["28B"] = test_model("Zenith-28B-p300", configs_28b, model_28b)
85
+ except Exception as e:
86
+ print(f"[FAIL] 28B model import error: {e}")
87
+ results["28B"] = False
88
+
89
+ # Test 32B model
90
+ try:
91
+ from Zenith.V1_Tenstorrent_Blackhole_p300_32B import configs as configs_32b
92
+ from Zenith.V1_Tenstorrent_Blackhole_p300_32B import modeling_zenith as model_32b
93
+ results["32B"] = test_model("Zenith-32B-p300", configs_32b, model_32b)
94
+ except Exception as e:
95
+ print(f"[FAIL] 32B model import error: {e}")
96
+ results["32B"] = False
97
+
98
+ # Test 70B model
99
+ try:
100
+ from Zenith.V1_Tenstorrent_Blackhole_p300_70B import configs as configs_70b
101
+ from Zenith.V1_Tenstorrent_Blackhole_p300_70B import modeling_zenith as model_70b
102
+ results["70B"] = test_model("Zenith-70B-p300", configs_70b, model_70b)
103
+ except Exception as e:
104
+ print(f"[FAIL] 70B model import error: {e}")
105
+ results["70B"] = False
106
+
107
+ # Summary
108
+ print("\n" + "="*60)
109
+ print("SUMMARY")
110
+ print("="*60)
111
+ for model_name, success in results.items():
112
+ status = "[PASS]" if success else "[FAIL]"
113
+ print(f"{status} {model_name}")
114
+
115
+ all_passed = all(results.values())
116
+ if all_passed:
117
+ print("\n[SUCCESS] All models have functional EQ Engine implementation!")
118
+ return 0
119
+ else:
120
+ print("\n[WARNING] Some models failed. Please review errors above.")
121
+ return 1
122
+
123
+ if __name__ == "__main__":
124
+ sys.exit(main())
test_eq_engine.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Test EQ engine implementation."""
3
+
4
+ import torch
5
+ from modeling_zenith import ZenithConfig, ZenithModel
6
+
7
+ def test_eq_engine():
8
+ print("Testing EQ Engine Implementation...")
9
+
10
+ # Create config with all EQ features enabled
11
+ config = ZenithConfig(
12
+ use_eq_adapter=True,
13
+ use_eq_attention_bias=True,
14
+ use_eq_gated_ffn=True,
15
+ use_eq_recurrence=True,
16
+ eq_consistency_weight=0.02,
17
+ eq_state_dim=256,
18
+ num_layers=4, # Small for testing
19
+ hidden_size=512,
20
+ num_heads=8,
21
+ head_dim=64,
22
+ intermediate_size=2048
23
+ )
24
+
25
+ print(f"Config: {config}")
26
+
27
+ # Create model
28
+ model = ZenithModel(config)
29
+ print(f"[OK] Model created successfully")
30
+ print(f" Parameters: {sum(p.numel() for p in model.parameters()):,}")
31
+
32
+ # Test forward pass
33
+ batch_size = 2
34
+ seq_len = 16
35
+ input_ids = torch.randint(0, config.vocab_size, (batch_size, seq_len))
36
+
37
+ # Training mode to test consistency loss
38
+ model.train()
39
+ outputs = model(input_ids=input_ids, labels=input_ids)
40
+
41
+ print(f"[OK] Forward pass successful")
42
+ print(f" Logits shape: {outputs.logits.shape}")
43
+ print(f" Loss: {outputs.loss.item() if outputs.loss is not None else 'None'}")
44
+
45
+ # Test inference mode
46
+ model.eval()
47
+ with torch.no_grad():
48
+ outputs = model(input_ids=input_ids)
49
+ print(f"[OK] Inference successful")
50
+ print(f" Logits shape: {outputs.logits.shape}")
51
+
52
+ print("\n[SUCCESS] EQ Engine implementation is FULLY FUNCTIONAL")
53
+ print("\nFeatures implemented:")
54
+ print(" [1] EQ attention bias")
55
+ print(" [2] EQ-gated FFN")
56
+ print(" [3] Recurrent EQ state with GRU")
57
+ print(" [4] EQ consistency loss")
58
+ print(" [5] Per-layer EQ adapter integration")
59
+
60
+ if __name__ == "__main__":
61
+ test_eq_engine()
verify_imports.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Verify all models can be imported and instantiated."""
3
+
4
+ import sys
5
+ import os
6
+
7
+ # Add each model directory to path
8
+ base_dir = os.path.dirname(os.path.abspath(__file__))
9
+ sys.path.insert(0, base_dir)
10
+
11
+ print("Testing model imports and basic functionality...")
12
+ print("="*60)
13
+
14
+ # Test 7B
15
+ print("\n[1] Testing Zenith-7B...")
16
+ try:
17
+ from Zenith.V1_7B.configs import zenith_config as cfg_7b
18
+ from Zenith.V1_7B.modeling_zenith import ZenithModel as Model7B, ZenithConfig as Config7B
19
+ config = Config7B(
20
+ use_eq_adapter=True,
21
+ use_eq_attention_bias=True,
22
+ use_eq_gated_ffn=True,
23
+ use_eq_recurrence=True,
24
+ eq_consistency_weight=0.02,
25
+ eq_state_dim=256,
26
+ num_layers=2,
27
+ hidden_size=512,
28
+ num_heads=8,
29
+ head_dim=64,
30
+ intermediate_size=2048
31
+ )
32
+ model = Model7B(config)
33
+ print(f" [OK] 7B model instantiated: {sum(p.numel() for p in model.parameters()):,} parameters")
34
+ except Exception as e:
35
+ print(f" [FAIL] 7B: {e}")
36
+
37
+ # Test 28B-p300
38
+ print("\n[2] Testing Zenith-28B-p300...")
39
+ try:
40
+ p300_28b_dir = os.path.join(base_dir, '..', 'V1-Tenstorrent-Blackhole-p300', '28B')
41
+ sys.path.insert(0, p300_28b_dir)
42
+ from Zenith.V1_Tenstorrent_Blackhole_p300_28B.configs import zenith_config as cfg_28b
43
+ from Zenith.V1_Tenstorrent_Blackhole_p300_28B.modeling_zenith import ZenithModel as Model28B, ZenithConfig as Config28B
44
+ config = Config28B(
45
+ use_eq_adapter=True,
46
+ use_eq_attention_bias=True,
47
+ use_eq_gated_ffn=True,
48
+ use_eq_recurrence=True,
49
+ eq_consistency_weight=0.02,
50
+ eq_state_dim=256,
51
+ num_layers=2,
52
+ hidden_size=3072,
53
+ num_heads=24,
54
+ head_dim=128,
55
+ intermediate_size=8192
56
+ )
57
+ model = Model28B(config)
58
+ print(f" [OK] 28B-p300 model instantiated: {sum(p.numel() for p in model.parameters()):,} parameters")
59
+ except Exception as e:
60
+ print(f" [FAIL] 28B-p300: {e}")
61
+
62
+ # Test 32B-p300
63
+ print("\n[3] Testing Zenith-32B-p300...")
64
+ try:
65
+ p300_32b_dir = os.path.join(base_dir, '..', 'V1-Tenstorrent-Blackhole-p300', '32B')
66
+ sys.path.insert(0, p300_32b_dir)
67
+ from Zenith.V1_Tenstorrent_Blackhole_p300_32B.configs import zenith_config as cfg_32b
68
+ from Zenith.V1_Tenstorrent_Blackhole_p300_32B.modeling_zenith import ZenithModel as Model32B, ZenithConfig as Config32B
69
+ config = Config32B(
70
+ use_eq_adapter=True,
71
+ use_eq_attention_bias=True,
72
+ use_eq_gated_ffn=True,
73
+ use_eq_recurrence=True,
74
+ eq_consistency_weight=0.02,
75
+ eq_state_dim=256,
76
+ num_layers=2,
77
+ hidden_size=4096,
78
+ num_heads=32,
79
+ head_dim=128,
80
+ intermediate_size=11008
81
+ )
82
+ model = Model32B(config)
83
+ print(f" [OK] 32B-p300 model instantiated: {sum(p.numel() for p in model.parameters()):,} parameters")
84
+ except Exception as e:
85
+ print(f" [FAIL] 32B-p300: {e}")
86
+
87
+ # Test 70B-p300
88
+ print("\n[4] Testing Zenith-70B-p300...")
89
+ try:
90
+ p300_70b_dir = os.path.join(base_dir, '..', 'V1-Tenstorrent-Blackhole-p300', '70B')
91
+ sys.path.insert(0, p300_70b_dir)
92
+ from Zenith.V1_Tenstorrent_Blackhole_p300_70B.configs import zenith_config as cfg_70b
93
+ from Zenith.V1_Tenstorrent_Blackhole_p300_70B.modeling_zenith import ZenithModel as Model70B, ZenithConfig as Config70B
94
+ config = Config70B(
95
+ use_eq_adapter=True,
96
+ use_eq_attention_bias=True,
97
+ use_eq_gated_ffn=True,
98
+ use_eq_recurrence=True,
99
+ eq_consistency_weight=0.02,
100
+ eq_state_dim=256,
101
+ num_layers=2,
102
+ hidden_size=8192,
103
+ num_heads=64,
104
+ head_dim=128,
105
+ intermediate_size=28672
106
+ )
107
+ model = Model70B(config)
108
+ print(f" [OK] 70B-p300 model instantiated: {sum(p.numel() for p in model.parameters()):,} parameters")
109
+ except Exception as e:
110
+ print(f" [FAIL] 70B-p300: {e}")
111
+
112
+ print("\n" + "="*60)
113
+ print("EQ ENGINE IMPLEMENTATION VERIFICATION COMPLETE")
114
+ print("="*60)