Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

README.md +2 -3
config.json +34 -0
merges.txt +0 -0
modeling_qalb.py +45 -0
pytorch_model.bin +3 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer_config.json +31 -0
vocab.json +0 -0

README.md CHANGED Viewed

@@ -1,3 +1,2 @@
----
-license: mit
----


1	+ # Qalb-Pro (Urdu Engram Model)
2	+ Experimental OPT-125M with DeepSeek Engram Module.

config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+    "_remove_final_layer_norm": false,
+    "activation_dropout": 0.0,
+    "activation_function": "relu",
+    "architectures": [
+        "OPTForCausalLM"
+    ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 2,
+    "do_layer_norm_before": true,
+    "dropout": 0.1,
+    "dtype": "float16",
+    "enable_bias": true,
+    "eos_token_id": 2,
+    "ffn_dim": 3072,
+    "hidden_size": 768,
+    "init_std": 0.02,
+    "layer_norm_elementwise_affine": true,
+    "layerdrop": 0.0,
+    "max_position_embeddings": 2048,
+    "model_type": "qalb",
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12,
+    "pad_token_id": 1,
+    "prefix": "</s>",
+    "transformers_version": "4.57.3",
+    "use_cache": true,
+    "vocab_size": 50272,
+    "word_embed_proj_dim": 768,
+    "auto_map": {
+        "AutoConfig": "modeling_qalb.QalbConfig",
+        "AutoModelForCausalLM": "modeling_qalb.FinalPerfectQalb"
+    }
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

modeling_qalb.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+from torch import nn
+from transformers import OPTPreTrainedModel, OPTModel, OPTConfig
+class QalbConfig(OPTConfig):
+    model_type = "qalb"
+    def __init__(self, table_size=500000, **kwargs):
+        super().__init__(**kwargs)
+        self.table_size = table_size
+class DeepSeekEngramModule(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.table_size = getattr(config, "table_size", 500000)
+        self.dim = config.word_embed_proj_dim
+        self.memory_table = nn.Embedding(self.table_size, self.dim)
+        self.gate = nn.Linear(self.dim, 1)
+        self.polynomial_base = 31
+    def forward(self, input_ids, hidden_states):
+        batch_size, seq_len = input_ids.shape
+        hashes = torch.zeros_like(input_ids)
+        for t in range(seq_len):
+            hashes[:, t] = (input_ids[:, :t+1].sum(dim=1) * self.polynomial_base) % self.table_size
+        memory_features = self.memory_table(hashes.abs())
+        g = torch.sigmoid(self.gate(hidden_states))
+        return g * hidden_states + (1 - g) * memory_features.to(hidden_states.dtype)
+class FinalPerfectQalb(OPTPreTrainedModel):
+    config_class = QalbConfig
+    def __init__(self, config):
+        super().__init__(config)
+        self.backbone = OPTModel(config)
+        self.engram = DeepSeekEngramModule(config)
+        self.post_init()
+    def forward(self, input_ids, attention_mask=None, **kwargs):
+        outputs = self.backbone(input_ids, attention_mask=attention_mask)
+        hidden_states = outputs.last_hidden_state
+        enhanced_states = self.engram(input_ids, hidden_states)
+        # Project to vocab using the backbone's embeddings
+        logits = torch.matmul(enhanced_states, self.backbone.decoder.embed_tokens.weight.T)
+        return torch.nn.utils.rnn.PackedSequence(logits) if isinstance(logits, tuple) else type('obj', (object,), {'logits': logits})

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d974e2f5b592d615b960c21246611315afae88487d939c0bfca619f6e2d4ebf
+size 452181515

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "add_bos_token": true,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "</s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "</s>"
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff