Upload Zenith-7B model

Browse files

Files changed (8) hide show

__pycache__/modeling_zenith.cpython-313.pyc +0 -0
configs/zenith_config.py +8 -1
hf_model_card.md +2 -2
modeling_zenith.py +226 -28
push_to_hf.py +94 -36
test_all_models_eq.py +124 -0
test_eq_engine.py +61 -0
verify_imports.py +114 -0

__pycache__/modeling_zenith.cpython-313.pyc ADDED Viewed

Binary file (27.1 kB). View file

configs/zenith_config.py CHANGED Viewed

@@ -32,11 +32,18 @@ class ZenithConfig:
     # EQ Adapter configuration
     use_eq_adapter: bool = True
-    eq_adapter_hidden_dim: int = 512
     eq_num_emotions: int = 8
     eq_frustration_dim: int = 256
     eq_dropout: float = 0.1
     # Normalization & dropout
     rms_norm_eps: float = 1e-6
     dropout: float = 0.0

     # EQ Adapter configuration
     use_eq_adapter: bool = True
+    eq_adapter_hidden_size: int = 512
     eq_num_emotions: int = 8
     eq_frustration_dim: int = 256
     eq_dropout: float = 0.1
+    # EQ Engine advanced features
+    use_eq_attention_bias: bool = False
+    use_eq_gated_ffn: bool = False
+    use_eq_recurrence: bool = False
+    eq_consistency_weight: float = 0.02
+    eq_state_dim: int = 256  # Dimension of recurrent EQ state
     # Normalization & dropout
     rms_norm_eps: float = 1e-6
     dropout: float = 0.0

hf_model_card.md CHANGED Viewed

@@ -15,11 +15,11 @@ tags:
 datasets:
 - open-thoughts/OpenThoughts3-1.2M
 model-index:
-- name: Zenith-7B
   results: []
 ---
-# Zenith-7B
 **Production-ready 7B parameter model with code generation, reasoning, and emotional intelligence.**

 datasets:
 - open-thoughts/OpenThoughts3-1.2M
 model-index:
+- name: Zenith-7B-V1
   results: []
 ---
+# Zenith-7B-V1
 **Production-ready 7B parameter model with code generation, reasoning, and emotional intelligence.**

modeling_zenith.py CHANGED Viewed

@@ -16,6 +16,7 @@ Zenith features:
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from typing import Optional, Tuple, List, Dict, Any
 from transformers import PreTrainedModel, PretrainedConfig
@@ -176,7 +177,7 @@ class MoELayer(nn.Module):
 class EQAdapter(nn.Module):
-    """Emotional Intelligence Adapter."""
     def __init__(self, config: ZenithConfig):
         super().__init__()
@@ -197,7 +198,54 @@ class EQAdapter(nn.Module):
             nn.Linear(config.eq_adapter_hidden_size, 8)
         )
-    def forward(self, hidden_states: torch.Tensor):
         # Pool over sequence dimension
         pooled = hidden_states.mean(dim=1)
@@ -207,7 +255,30 @@ class EQAdapter(nn.Module):
         # Emotion logits
         emotion_logits = self.emotion_classifier(pooled)
-        return frustration, emotion_logits
 class ZenithLayer(nn.Module):
@@ -220,26 +291,37 @@ class ZenithLayer(nn.Module):
         # Determine if this layer uses MoE
         self.use_moe = (
-            config.num_experts > 0 and
             (not config.moe_layers or layer_idx in config.moe_layers)
         )
-        # Self attention
-        self.self_attn = nn.MultiheadAttention(
-            embed_dim=config.hidden_size,
-            num_heads=config.num_heads,
-            batch_first=True
-        )
         # MoE or dense feed-forward
         if self.use_moe:
             self.mlp = MoELayer(config)
         else:
-            self.mlp = nn.Sequential(
-                nn.Linear(config.hidden_size, config.intermediate_size),
-                nn.SiLU(),
-                nn.Linear(config.intermediate_size, config.hidden_size)
-            )
         # Layer norm
         self.norm1 = nn.LayerNorm(config.hidden_size)
@@ -248,20 +330,110 @@ class ZenithLayer(nn.Module):
         # Dropout
         self.dropout = nn.Dropout(0.1)
     def forward(
-        self,
         hidden_states: torch.Tensor,
         attention_mask: Optional[torch.Tensor] = None,
-        output_attentions: bool = False
-    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]:
         # Self attention with residual
         residual = hidden_states
         hidden_states = self.norm1(hidden_states)
-        attn_output, attn_weights = self.self_attn(
-            hidden_states, hidden_states, hidden_states,
-            attn_mask=attention_mask
-        )
         hidden_states = residual + self.dropout(attn_output)
         # Feed-forward with residual
@@ -271,12 +443,21 @@ class ZenithLayer(nn.Module):
         if self.use_moe:
             mlp_output, moe_loss = self.mlp(hidden_states)
         else:
-            mlp_output = self.mlp(hidden_states)
             moe_loss = None
         hidden_states = residual + self.dropout(mlp_output)
-        return hidden_states, attn_weights, moe_loss
 class ZenithPreTrainedModel(PreTrainedModel):
@@ -368,6 +549,11 @@ class ZenithModel(ZenithPreTrainedModel):
         all_hidden_states = () if output_hidden_states else None
         all_self_attns = () if output_attentions else None
         all_moe_losses = []
         for layer in self.layers:
             if output_hidden_states:
@@ -376,11 +562,24 @@ class ZenithModel(ZenithPreTrainedModel):
             layer_outputs = layer(
                 hidden_states,
                 attention_mask=attention_mask,
-                output_attentions=output_attentions
             )
             hidden_states = layer_outputs[0]
             if output_attentions:
                 all_self_attns = all_self_attns + (layer_outputs[1],)
@@ -410,9 +609,8 @@ class ZenithModel(ZenithPreTrainedModel):
             if all_moe_losses:
                 loss += torch.stack(all_moe_losses).mean()
-            if self.eq_adapter is not None:
-                # EQ loss would be computed here if emotion/frustration labels available
-                pass
         if not return_dict:
             output = (logits,) + all_hidden_states + all_self_attns

 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import math
 from typing import Optional, Tuple, List, Dict, Any
 from transformers import PreTrainedModel, PretrainedConfig
 class EQAdapter(nn.Module):
+    """Enhanced Emotional Intelligence Adapter with recurrent state and core architecture integration."""
     def __init__(self, config: ZenithConfig):
         super().__init__()
             nn.Linear(config.eq_adapter_hidden_size, 8)
         )
+        # Recurrent EQ state (GRU) for layer-to-layer consistency
+        if config.use_eq_recurrence:
+            self.eq_gru = nn.GRUCell(
+                input_size=config.eq_adapter_hidden_size,
+                hidden_size=config.eq_state_dim
+            )
+            # Projection to generate initial state from pooled features
+            self.state_projection = nn.Linear(config.hidden_size, config.eq_state_dim)
+            # Projection to reduce pooled features to GRU input size
+            self.gru_input_proj = nn.Linear(config.hidden_size, config.eq_adapter_hidden_size)
+        else:
+            self.eq_gru = None
+            self.state_projection = None
+            self.gru_input_proj = None
+        # EQ state to attention bias (scalar per head)
+        if config.use_eq_attention_bias:
+            self.attn_bias_proj = nn.Linear(
+                config.eq_state_dim if config.use_eq_recurrence else config.eq_adapter_hidden_size,
+                config.num_heads,
+                bias=False
+            )
+        else:
+            self.attn_bias_proj = None
+        # EQ state to FFN gate
+        if config.use_eq_gated_ffn:
+            self.ffn_gate_proj = nn.Linear(
+                config.eq_state_dim if config.use_eq_recurrence else config.eq_adapter_hidden_size,
+                config.intermediate_size,
+                bias=False
+            )
+        else:
+            self.ffn_gate_proj = None
+    def forward(self, hidden_states: torch.Tensor, prev_eq_state: Optional[torch.Tensor] = None):
+        """
+        Args:
+            hidden_states: [batch, seq_len, hidden_size]
+            prev_eq_state: [batch, eq_state_dim] previous EQ state (for recurrence)
+        Returns:
+            frustration: [batch, 1]
+            emotion_logits: [batch, 8]
+            eq_state: [batch, eq_state_dim] updated EQ state
+            attn_bias: [batch, num_heads, head_dim] or None
+            ffn_gate: [batch, d_ff] or None
+        """
         # Pool over sequence dimension
         pooled = hidden_states.mean(dim=1)
         # Emotion logits
         emotion_logits = self.emotion_classifier(pooled)
+        # Compute EQ state
+        if self.config.use_eq_recurrence and self.eq_gru is not None:
+            # Project pooled features to GRU input size
+            gru_input = torch.tanh(self.gru_input_proj(pooled))
+            if prev_eq_state is None:
+                # Initialize state from projection
+                eq_state = torch.tanh(self.state_projection(pooled))
+            else:
+                eq_state = self.eq_gru(gru_input, prev_eq_state)
+        else:
+            # No recurrence, use pooled features directly
+            eq_state = torch.tanh(pooled)
+        # Compute attention bias if enabled
+        attn_bias = None
+        if self.attn_bias_proj is not None:
+            attn_bias = self.attn_bias_proj(eq_state)  # [batch, num_heads]
+        # Compute FFN gate if enabled
+        ffn_gate = None
+        if self.ffn_gate_proj is not None:
+            ffn_gate = torch.sigmoid(self.ffn_gate_proj(eq_state))
+        return frustration, emotion_logits, eq_state, attn_bias, ffn_gate
 class ZenithLayer(nn.Module):
         # Determine if this layer uses MoE
         self.use_moe = (
+            config.num_experts > 0 and
             (not config.moe_layers or layer_idx in config.moe_layers)
         )
+        # Self attention projections
+        self.q_proj = nn.Linear(config.hidden_size, config.hidden_size)
+        self.k_proj = nn.Linear(config.hidden_size, config.hidden_size)
+        self.v_proj = nn.Linear(config.hidden_size, config.hidden_size)
+        self.out_proj = nn.Linear(config.hidden_size, config.hidden_size)
+        # Attention dropout
+        self.attn_dropout = nn.Dropout(0.1)
         # MoE or dense feed-forward
         if self.use_moe:
             self.mlp = MoELayer(config)
         else:
+            if config.use_eq_gated_ffn:
+                # Gated MLP: gate applied to intermediate representation
+                self.mlp = nn.Sequential(
+                    nn.Linear(config.hidden_size, config.intermediate_size),
+                    nn.SiLU(),
+                )
+                self.gate_proj = nn.Linear(config.intermediate_size, config.intermediate_size)
+                self.out_proj_mlp = nn.Linear(config.intermediate_size, config.hidden_size)
+            else:
+                self.mlp = nn.Sequential(
+                    nn.Linear(config.hidden_size, config.intermediate_size),
+                    nn.SiLU(),
+                    nn.Linear(config.intermediate_size, config.hidden_size)
+                )
         # Layer norm
         self.norm1 = nn.LayerNorm(config.hidden_size)
         # Dropout
         self.dropout = nn.Dropout(0.1)
+        # EQ adapter (if enabled)
+        if config.use_eq_adapter:
+            self.eq_adapter = EQAdapter(config)
+        else:
+            self.eq_adapter = None
     def forward(
+        self,
         hidden_states: torch.Tensor,
         attention_mask: Optional[torch.Tensor] = None,
+        output_attentions: bool = False,
+        prev_eq_state: Optional[torch.Tensor] = None
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
+        """
+        Args:
+            hidden_states: [batch, seq_len, hidden_size]
+            attention_mask: attention mask
+            output_attentions: whether to output attention weights
+            prev_eq_state: [batch, eq_state_dim] previous EQ state from previous layer
+        Returns:
+            hidden_states: [batch, seq_len, hidden_size]
+            attn_weights: [batch, num_heads, seq_len, seq_len] or None
+            moe_loss: scalar or None
+            eq_state: [batch, eq_state_dim] or None
+            consistency_loss: scalar or None
+        """
+        # Process EQ adapter if enabled
+        eq_state = None
+        attn_bias = None
+        ffn_gate = None
+        consistency_loss = None
+        if self.eq_adapter is not None:
+            frustration, emotion_logits, eq_state, attn_bias, ffn_gate = self.eq_adapter(
+                hidden_states, prev_eq_state
+            )
+            # Compute consistency loss if recurrence enabled and we have previous state
+            if self.config.use_eq_recurrence and prev_eq_state is not None:
+                consistency_loss = F.mse_loss(eq_state, prev_eq_state.detach())
         # Self attention with residual
         residual = hidden_states
         hidden_states = self.norm1(hidden_states)
+        # Apply attention bias if enabled (before softmax)
+        if attn_bias is not None:
+            batch_size, seq_len, _ = hidden_states.shape
+            # Compute Q, K, V from normalized hidden states
+            q = self.q_proj(hidden_states)  # [batch, seq_len, hidden_size]
+            k = self.k_proj(hidden_states)
+            v = self.v_proj(hidden_states)
+            # Reshape to multi-head: [batch, seq_len, num_heads, head_dim]
+            q = q.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
+            k = k.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
+            v = v.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
+            # Compute attention scores
+            attn_scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.config.head_dim)
+            # Add bias: [batch, num_heads] -> [batch, num_heads, 1, 1] -> broadcast to all positions
+            attn_scores = attn_scores + attn_bias.unsqueeze(-1).unsqueeze(-1)
+            # Apply attention mask if provided
+            if attention_mask is not None:
+                attn_scores = attn_scores + attention_mask
+            # Softmax and dropout
+            attn_weights = F.softmax(attn_scores, dim=-1)
+            attn_weights = self.attn_dropout(attn_weights)
+            # Apply to values
+            attn_output = torch.matmul(attn_weights, v)
+            attn_output = attn_output.transpose(1, 2).contiguous().view(
+                batch_size, seq_len, self.config.hidden_size
+            )
+            attn_output = self.out_proj(attn_output)
+        else:
+            # Standard attention using manual projections
+            batch_size, seq_len, _ = hidden_states.shape
+            q = self.q_proj(hidden_states)
+            k = self.k_proj(hidden_states)
+            v = self.v_proj(hidden_states)
+            q = q.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
+            k = k.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
+            v = v.view(batch_size, seq_len, self.config.num_heads, self.config.head_dim).transpose(1, 2)
+            attn_output, attn_weights = F.scaled_dot_product_attention(
+                q, k, v,
+                attn_mask=attention_mask,
+                dropout_p=0.1 if self.training else 0.0,
+                is_causal=True
+            )
+            attn_output = attn_output.transpose(1, 2).contiguous().view(
+                batch_size, seq_len, self.config.hidden_size
+            )
+            attn_output = self.out_proj(attn_output)
         hidden_states = residual + self.dropout(attn_output)
         # Feed-forward with residual
         if self.use_moe:
             mlp_output, moe_loss = self.mlp(hidden_states)
         else:
+            if self.config.use_eq_gated_ffn:
+                # Apply first part of MLP
+                intermediate = self.mlp(hidden_states)  # [batch, seq_len, intermediate_size]
+                # Apply gate to intermediate representation
+                ffn_gate_expanded = ffn_gate.unsqueeze(1).expand(-1, intermediate.size(1), -1)
+                gated_intermediate = intermediate * ffn_gate_expanded
+                # Apply output projection
+                mlp_output = self.out_proj_mlp(gated_intermediate)
+            else:
+                mlp_output = self.mlp(hidden_states)
             moe_loss = None
         hidden_states = residual + self.dropout(mlp_output)
+        return hidden_states, attn_weights, moe_loss, eq_state, consistency_loss
 class ZenithPreTrainedModel(PreTrainedModel):
         all_hidden_states = () if output_hidden_states else None
         all_self_attns = () if output_attentions else None
         all_moe_losses = []
+        all_eq_states = [] if self.config.use_eq_adapter else None
+        all_consistency_losses = [] if (self.config.use_eq_adapter and self.config.use_eq_recurrence) else None
+        # Initialize recurrent EQ state
+        prev_eq_state = None
         for layer in self.layers:
             if output_hidden_states:
             layer_outputs = layer(
                 hidden_states,
                 attention_mask=attention_mask,
+                output_attentions=output_attentions,
+                prev_eq_state=prev_eq_state
             )
             hidden_states = layer_outputs[0]
+            # Extract EQ state and consistency loss from layer outputs
+            if self.config.use_eq_adapter:
+                eq_state = layer_outputs[3] if len(layer_outputs) > 3 else None
+                consistency_loss = layer_outputs[4] if len(layer_outputs) > 4 else None
+                if eq_state is not None:
+                    all_eq_states.append(eq_state)
+                    prev_eq_state = eq_state  # Pass to next layer
+                if consistency_loss is not None:
+                    all_consistency_losses.append(consistency_loss)
             if output_attentions:
                 all_self_attns = all_self_attns + (layer_outputs[1],)
             if all_moe_losses:
                 loss += torch.stack(all_moe_losses).mean()
+            if self.eq_adapter is not None and all_consistency_losses:
+                loss += self.config.eq_consistency_weight * torch.stack(all_consistency_losses).mean()
         if not return_dict:
             output = (logits,) + all_hidden_states + all_self_attns

push_to_hf.py CHANGED Viewed

@@ -3,22 +3,24 @@
 Push Zenith-7B model to Hugging Face Hub.
 Usage:
-    python push_to_hf.py --repo_id Matrix-Corp/Zenith-7b --token YOUR_TOKEN
 """
 import argparse
 import os
 from pathlib import Path
-from huggingface_hub import HfApi, login
-def push_model(repo_id: str, token: str = None, folder_path: str = "."):
-    """Push model files to Hugging Face Hub."""
     folder_path = Path(folder_path).resolve()
     if not folder_path.exists():
         raise ValueError(f"Folder not found: {folder_path}")
     # Check required files
     required_files = [
         "modeling_zenith.py",
@@ -31,36 +33,96 @@ def push_model(repo_id: str, token: str = None, folder_path: str = "."):
         "finetune_qwen.py",
         "Modelfile"
     ]
     missing = [f for f in required_files if not (folder_path / f).exists()]
     if missing:
-        print(f"Warning: Missing files: {missing}")
         response = input("Continue anyway? (y/N): ")
         if response.lower() != 'y':
             return
-    # Login
-    if token:
-        login(token=token)
-    else:
-        print("No token provided. Please login:")
-        login()
-    # Upload
-    print(f"Uploading {folder_path} to {repo_id}...")
     api = HfApi()
     try:
         api.upload_folder(
             folder_path=str(folder_path),
             repo_id=repo_id,
             repo_type="model",
-            commit_message="Upload Zenith-7B model"
         )
-        print(f"✅ Successfully uploaded to https://huggingface.co/{repo_id}")
     except Exception as e:
-        print(f"❌ Error: {e}")
-        raise
 def main():
@@ -68,7 +130,7 @@ def main():
     parser.add_argument(
         "--repo_id",
         type=str,
-        default="Matrix-Corp/Zenith-7b",
         help="Hugging Face repository ID (username/model-name)"
     )
     parser.add_argument(
@@ -82,19 +144,15 @@ def main():
         default=".",
         help="Folder containing model files (default: current directory)"
     )
     args = parser.parse_args()
-    # Verify we're in the right folder
-    current_dir = Path.cwd()
-    if "V1" not in current_dir.parts or "7B" not in current_dir.parts:
-        print("Warning: Not in V1/7B directory. Make sure you're in Zenith/V1/7B")
-        response = input("Continue? (y/N): ")
-        if response.lower() != 'y':
-            return
-    push_model(args.repo_id, args.token, args.folder)
 if __name__ == "__main__":
-    main()

 Push Zenith-7B model to Hugging Face Hub.
 Usage:
+    python push_to_hf.py --repo_id Matrix-Corp/Zenith-7b-V1 --token YOUR_TOKEN
 """
 import argparse
 import os
+import sys
 from pathlib import Path
+from huggingface_hub import HfApi, login, create_repo, whoami
+from huggingface_hub.utils import RepositoryNotFoundError, HfHubHTTPError
+def push_model(repo_id: str, token: str = None, folder_path: str = ".", private: bool = False):
+    """Push model files to Hugging Face Hub with robust error handling."""
     folder_path = Path(folder_path).resolve()
     if not folder_path.exists():
         raise ValueError(f"Folder not found: {folder_path}")
     # Check required files
     required_files = [
         "modeling_zenith.py",
         "finetune_qwen.py",
         "Modelfile"
     ]
     missing = [f for f in required_files if not (folder_path / f).exists()]
     if missing:
+        print(f"⚠️  Warning: Missing files: {missing}")
         response = input("Continue anyway? (y/N): ")
         if response.lower() != 'y':
             return
+    # Authenticate
+    try:
+        if token:
+            login(token=token)
+            print("✓ Logged in with provided token")
+        else:
+            # Check if already logged in
+            try:
+                user = whoami()
+                print(f"✓ Already logged in as: {user['name']}")
+            except:
+                print("Please login to Hugging Face:")
+                login()
+    except Exception as e:
+        print(f"❌ Authentication failed: {e}")
+        print("\nTo get a token:")
+        print("1. Go to https://huggingface.co/settings/tokens")
+        print("2. Create a new token with 'write' permissions")
+        print("3. Run: python push_to_hf.py --token YOUR_TOKEN")
+        return
+    # Create API client
     api = HfApi()
+    # Check if repo exists, create if not
+    try:
+        repo_info = api.repo_info(repo_id=repo_id, repo_type="model")
+        print(f"✓ Repository exists: {repo_id}")
+    except RepositoryNotFoundError:
+        print(f"📝 Repository not found. Creating: {repo_id}")
+        try:
+            create_repo(
+                repo_id=repo_id,
+                token=token,
+                repo_type="model",
+                private=private,
+                exist_ok=True
+            )
+            print(f"✓ Repository created")
+        except Exception as e:
+            print(f"❌ Failed to create repository: {e}")
+            return
+    except Exception as e:
+        print(f"⚠️  Warning: Could not check repository: {e}")
+    # Upload
+    print(f"\n📤 Uploading {folder_path} to {repo_id}...")
+    print("This may take a while depending on file sizes...\n")
     try:
         api.upload_folder(
             folder_path=str(folder_path),
             repo_id=repo_id,
             repo_type="model",
+            commit_message=f"Upload Zenith-7B model"
         )
+        print(f"\n✅ Successfully uploaded to https://huggingface.co/{repo_id}")
+        print("\nNext steps:")
+        print("1. Visit your model page")
+        print("2. Add a model card if needed")
+        print("3. Test: from transformers import AutoModel; AutoModel.from_pretrained('your-repo-id')")
+    except HfHubHTTPError as e:
+        if e.response.status_code == 401:
+            print(f"\n❌ Unauthorized: Invalid token or no write access")
+            print("   Make sure you:")
+            print("   - Have a valid token with 'write' permissions")
+            print("   - Own the organization/repository or have collaborator rights")
+        elif e.response.status_code == 403:
+            print(f"\n❌ Forbidden: You don't have permission to push to this repository")
+            print("   Make sure you're a member of the organization with write access")
+        elif e.response.status_code == 404:
+            print(f"\n❌ Repository not found: {repo_id}")
+            print("   Check the repository ID is correct")
+        else:
+            print(f"\n❌ HTTP Error {e.response.status_code}: {e}")
     except Exception as e:
+        print(f"\n❌ Upload failed: {e}")
+        print("\nTroubleshooting:")
+        print("1. Check your internet connection")
+        print("2. Verify you have enough disk space")
+        print("3. Try logging in again: huggingface-cli login")
+        print("4. Check Hugging Face status: https://status.huggingface.co")
 def main():
     parser.add_argument(
         "--repo_id",
         type=str,
+        default="Matrix-Corp/Zenith-7b-V1",
         help="Hugging Face repository ID (username/model-name)"
     )
     parser.add_argument(
         default=".",
         help="Folder containing model files (default: current directory)"
     )
+    parser.add_argument(
+        "--private",
+        action="store_true",
+        help="Create repository as private (default: public)"
+    )
     args = parser.parse_args()
+    push_model(args.repo_id, args.token, args.folder, args.private)
 if __name__ == "__main__":
+    main()

test_all_models_eq.py ADDED Viewed

	@@ -0,0 +1,124 @@

+#!/usr/bin/env python3
+"""Test EQ engine implementation for all Zenith models."""
+import sys
+import torch
+def test_model(model_name, config_module, model_module):
+    """Test a specific model configuration."""
+    print(f"\n{'='*60}")
+    print(f"Testing {model_name}...")
+    print(f"{'='*60}")
+    try:
+        # Create config with all EQ features enabled
+        config = config_module.ZenithConfig(
+            use_eq_adapter=True,
+            use_eq_attention_bias=True,
+            use_eq_gated_ffn=True,
+            use_eq_recurrence=True,
+            eq_consistency_weight=0.02,
+            eq_state_dim=256,
+            num_layers=2,  # Small for testing
+            hidden_size=512 if hasattr(config_module.ZenithConfig, 'hidden_size') else 3072,
+            num_heads=8,
+            head_dim=64,
+            intermediate_size=2048 if hasattr(config_module.ZenithConfig, 'intermediate_size') else 8192
+        )
+        # Create model
+        model = model_module.ZenithModel(config)
+        print(f"[OK] Model created successfully")
+        print(f"  Parameters: {sum(p.numel() for p in model.parameters()):,}")
+        # Test forward pass
+        batch_size = 1
+        seq_len = 8
+        input_ids = torch.randint(0, config.vocab_size, (batch_size, seq_len))
+        # Training mode to test consistency loss
+        model.train()
+        outputs = model(input_ids=input_ids, labels=input_ids)
+        print(f"[OK] Forward pass successful")
+        print(f"  Logits shape: {outputs.logits.shape}")
+        if outputs.loss is not None:
+            print(f"  Loss: {outputs.loss.item():.4f}")
+        # Test inference mode
+        model.eval()
+        with torch.no_grad():
+            outputs = model(input_ids=input_ids)
+            print(f"[OK] Inference successful")
+            print(f"  Logits shape: {outputs.logits.shape}")
+        print(f"[SUCCESS] {model_name} EQ Engine is FULLY FUNCTIONAL")
+        return True
+    except Exception as e:
+        print(f"[FAIL] {model_name} failed:")
+        print(f"  Error: {type(e).__name__}: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def main():
+    print("Testing EQ Engine Implementation for All Zenith Models")
+    print("="*60)
+    results = {}
+    # Test 7B model
+    try:
+        from Zenith.V1_7B import configs as configs_7b
+        from Zenith.V1_7B import modeling_zenith as model_7b
+        results["7B"] = test_model("Zenith-7B", configs_7b, model_7b)
+    except Exception as e:
+        print(f"[FAIL] 7B model import error: {e}")
+        results["7B"] = False
+    # Test 28B model
+    try:
+        from Zenith.V1_Tenstorrent_Blackhole_p300_28B import configs as configs_28b
+        from Zenith.V1_Tenstorrent_Blackhole_p300_28B import modeling_zenith as model_28b
+        results["28B"] = test_model("Zenith-28B-p300", configs_28b, model_28b)
+    except Exception as e:
+        print(f"[FAIL] 28B model import error: {e}")
+        results["28B"] = False
+    # Test 32B model
+    try:
+        from Zenith.V1_Tenstorrent_Blackhole_p300_32B import configs as configs_32b
+        from Zenith.V1_Tenstorrent_Blackhole_p300_32B import modeling_zenith as model_32b
+        results["32B"] = test_model("Zenith-32B-p300", configs_32b, model_32b)
+    except Exception as e:
+        print(f"[FAIL] 32B model import error: {e}")
+        results["32B"] = False
+    # Test 70B model
+    try:
+        from Zenith.V1_Tenstorrent_Blackhole_p300_70B import configs as configs_70b
+        from Zenith.V1_Tenstorrent_Blackhole_p300_70B import modeling_zenith as model_70b
+        results["70B"] = test_model("Zenith-70B-p300", configs_70b, model_70b)
+    except Exception as e:
+        print(f"[FAIL] 70B model import error: {e}")
+        results["70B"] = False
+    # Summary
+    print("\n" + "="*60)
+    print("SUMMARY")
+    print("="*60)
+    for model_name, success in results.items():
+        status = "[PASS]" if success else "[FAIL]"
+        print(f"{status} {model_name}")
+    all_passed = all(results.values())
+    if all_passed:
+        print("\n[SUCCESS] All models have functional EQ Engine implementation!")
+        return 0
+    else:
+        print("\n[WARNING] Some models failed. Please review errors above.")
+        return 1
+if __name__ == "__main__":
+    sys.exit(main())

test_eq_engine.py ADDED Viewed

	@@ -0,0 +1,61 @@

+#!/usr/bin/env python3
+"""Test EQ engine implementation."""
+import torch
+from modeling_zenith import ZenithConfig, ZenithModel
+def test_eq_engine():
+    print("Testing EQ Engine Implementation...")
+    # Create config with all EQ features enabled
+    config = ZenithConfig(
+        use_eq_adapter=True,
+        use_eq_attention_bias=True,
+        use_eq_gated_ffn=True,
+        use_eq_recurrence=True,
+        eq_consistency_weight=0.02,
+        eq_state_dim=256,
+        num_layers=4,  # Small for testing
+        hidden_size=512,
+        num_heads=8,
+        head_dim=64,
+        intermediate_size=2048
+    )
+    print(f"Config: {config}")
+    # Create model
+    model = ZenithModel(config)
+    print(f"[OK] Model created successfully")
+    print(f"  Parameters: {sum(p.numel() for p in model.parameters()):,}")
+    # Test forward pass
+    batch_size = 2
+    seq_len = 16
+    input_ids = torch.randint(0, config.vocab_size, (batch_size, seq_len))
+    # Training mode to test consistency loss
+    model.train()
+    outputs = model(input_ids=input_ids, labels=input_ids)
+    print(f"[OK] Forward pass successful")
+    print(f"  Logits shape: {outputs.logits.shape}")
+    print(f"  Loss: {outputs.loss.item() if outputs.loss is not None else 'None'}")
+    # Test inference mode
+    model.eval()
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids)
+        print(f"[OK] Inference successful")
+        print(f"  Logits shape: {outputs.logits.shape}")
+    print("\n[SUCCESS] EQ Engine implementation is FULLY FUNCTIONAL")
+    print("\nFeatures implemented:")
+    print("  [1] EQ attention bias")
+    print("  [2] EQ-gated FFN")
+    print("  [3] Recurrent EQ state with GRU")
+    print("  [4] EQ consistency loss")
+    print("  [5] Per-layer EQ adapter integration")
+if __name__ == "__main__":
+    test_eq_engine()

verify_imports.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#!/usr/bin/env python3
+"""Verify all models can be imported and instantiated."""
+import sys
+import os
+# Add each model directory to path
+base_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, base_dir)
+print("Testing model imports and basic functionality...")
+print("="*60)
+# Test 7B
+print("\n[1] Testing Zenith-7B...")
+try:
+    from Zenith.V1_7B.configs import zenith_config as cfg_7b
+    from Zenith.V1_7B.modeling_zenith import ZenithModel as Model7B, ZenithConfig as Config7B
+    config = Config7B(
+        use_eq_adapter=True,
+        use_eq_attention_bias=True,
+        use_eq_gated_ffn=True,
+        use_eq_recurrence=True,
+        eq_consistency_weight=0.02,
+        eq_state_dim=256,
+        num_layers=2,
+        hidden_size=512,
+        num_heads=8,
+        head_dim=64,
+        intermediate_size=2048
+    )
+    model = Model7B(config)
+    print(f"    [OK] 7B model instantiated: {sum(p.numel() for p in model.parameters()):,} parameters")
+except Exception as e:
+    print(f"    [FAIL] 7B: {e}")
+# Test 28B-p300
+print("\n[2] Testing Zenith-28B-p300...")
+try:
+    p300_28b_dir = os.path.join(base_dir, '..', 'V1-Tenstorrent-Blackhole-p300', '28B')
+    sys.path.insert(0, p300_28b_dir)
+    from Zenith.V1_Tenstorrent_Blackhole_p300_28B.configs import zenith_config as cfg_28b
+    from Zenith.V1_Tenstorrent_Blackhole_p300_28B.modeling_zenith import ZenithModel as Model28B, ZenithConfig as Config28B
+    config = Config28B(
+        use_eq_adapter=True,
+        use_eq_attention_bias=True,
+        use_eq_gated_ffn=True,
+        use_eq_recurrence=True,
+        eq_consistency_weight=0.02,
+        eq_state_dim=256,
+        num_layers=2,
+        hidden_size=3072,
+        num_heads=24,
+        head_dim=128,
+        intermediate_size=8192
+    )
+    model = Model28B(config)
+    print(f"    [OK] 28B-p300 model instantiated: {sum(p.numel() for p in model.parameters()):,} parameters")
+except Exception as e:
+    print(f"    [FAIL] 28B-p300: {e}")
+# Test 32B-p300
+print("\n[3] Testing Zenith-32B-p300...")
+try:
+    p300_32b_dir = os.path.join(base_dir, '..', 'V1-Tenstorrent-Blackhole-p300', '32B')
+    sys.path.insert(0, p300_32b_dir)
+    from Zenith.V1_Tenstorrent_Blackhole_p300_32B.configs import zenith_config as cfg_32b
+    from Zenith.V1_Tenstorrent_Blackhole_p300_32B.modeling_zenith import ZenithModel as Model32B, ZenithConfig as Config32B
+    config = Config32B(
+        use_eq_adapter=True,
+        use_eq_attention_bias=True,
+        use_eq_gated_ffn=True,
+        use_eq_recurrence=True,
+        eq_consistency_weight=0.02,
+        eq_state_dim=256,
+        num_layers=2,
+        hidden_size=4096,
+        num_heads=32,
+        head_dim=128,
+        intermediate_size=11008
+    )
+    model = Model32B(config)
+    print(f"    [OK] 32B-p300 model instantiated: {sum(p.numel() for p in model.parameters()):,} parameters")
+except Exception as e:
+    print(f"    [FAIL] 32B-p300: {e}")
+# Test 70B-p300
+print("\n[4] Testing Zenith-70B-p300...")
+try:
+    p300_70b_dir = os.path.join(base_dir, '..', 'V1-Tenstorrent-Blackhole-p300', '70B')
+    sys.path.insert(0, p300_70b_dir)
+    from Zenith.V1_Tenstorrent_Blackhole_p300_70B.configs import zenith_config as cfg_70b
+    from Zenith.V1_Tenstorrent_Blackhole_p300_70B.modeling_zenith import ZenithModel as Model70B, ZenithConfig as Config70B
+    config = Config70B(
+        use_eq_adapter=True,
+        use_eq_attention_bias=True,
+        use_eq_gated_ffn=True,
+        use_eq_recurrence=True,
+        eq_consistency_weight=0.02,
+        eq_state_dim=256,
+        num_layers=2,
+        hidden_size=8192,
+        num_heads=64,
+        head_dim=128,
+        intermediate_size=28672
+    )
+    model = Model70B(config)
+    print(f"    [OK] 70B-p300 model instantiated: {sum(p.numel() for p in model.parameters()):,} parameters")
+except Exception as e:
+    print(f"    [FAIL] 70B-p300: {e}")
+print("\n" + "="*60)
+print("EQ ENGINE IMPLEMENTATION VERIFICATION COMPLETE")
+print("="*60)