Upload RMT

Browse files

Files changed (3) hide show

config.json +5 -1
language_modeling.py +105 -0
model.safetensors +2 -2

config.json CHANGED Viewed

@@ -3,6 +3,10 @@
   "architectures": [
     "RMT"
   ],
   "base_model_name": "HuggingFaceTB/SmolLM2-135M",
   "bos_token_id": 0,
   "eos_token_id": 0,
@@ -13,5 +17,5 @@
   "recurrent_wrapper_cls": "modeling_rmt.experimental:RecurrentWrapperNoSegmentationGenerate",
   "think_token_id": 8,
   "torch_dtype": "float32",
-  "transformers_version": "4.53.1"
 }

   "architectures": [
     "RMT"
   ],
+  "auto_map": {
+    "AutoConfig": "language_modeling.RMTConfig",
+    "AutoModel": "language_modeling.RMT"
+  },
   "base_model_name": "HuggingFaceTB/SmolLM2-135M",
   "bos_token_id": 0,
   "eos_token_id": 0,
   "recurrent_wrapper_cls": "modeling_rmt.experimental:RecurrentWrapperNoSegmentationGenerate",
   "think_token_id": 8,
   "torch_dtype": "float32",
+  "transformers_version": "4.54.1"
 }

language_modeling.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import importlib
+from transformers import PreTrainedModel, PretrainedConfig
+# from lm_experiments_tools.utils import get_cls_by_name
+def get_cls_by_name(name: str) -> type:
+    """Get class by its name and module path.
+    Args:
+        name (str): e.g., transfomers:T5ForConditionalGeneration, modeling_t5:my_class
+    Returns:
+        type: found class for `name`
+    """
+    module_name, cls_name = name.split(':')
+    return getattr(importlib.import_module(module_name), cls_name)
+class RMTConfig(PretrainedConfig):
+    model_type = "rmt"
+    def __init__(self,
+                 base_model_name="HuggingFaceTB/SmolLM2-135M",
+                 num_mem_tokens=16,
+                 max_n_segments=10,
+                 think_token_id=None,
+                 answer_token_id=None,
+                 bos_token_id=None,
+                 eos_token_id=None,
+                 memory_cell_cls='modeling_rmt.language_modeling:MemoryCell',
+                 recurrent_wrapper_cls='modeling_rmt.experimental:RecurrentWrapperNoSegmentationGenerate',
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.base_model_name = base_model_name
+        self.num_mem_tokens = num_mem_tokens
+        self.max_n_segments = max_n_segments
+        self.think_token_id = think_token_id
+        self.answer_token_id = answer_token_id
+        self.bos_token_id = bos_token_id
+        self.eos_token_id = eos_token_id
+        self.memory_cell_cls = memory_cell_cls
+        self.recurrent_wrapper_cls = recurrent_wrapper_cls
+    def get(self, attr: str, default=None):
+        if hasattr(self, attr):
+            return getattr(self, attr)
+        else:
+            return default
+class RMT(PreTrainedModel):
+    config_class = RMTConfig
+    def __init__(self, config: RMTConfig):
+        super().__init__(config)
+        from transformers import AutoConfig, AutoModelForCausalLM
+        base_config = AutoConfig.from_pretrained(config.base_model_name)
+        base_model = AutoModelForCausalLM.from_config(base_config)
+        memory_cell_cls = get_cls_by_name(config.memory_cell_cls)
+        recurrent_wrapper_cls = get_cls_by_name(config.recurrent_wrapper_cls)
+        self.rmt_config = config
+        memory_cell = memory_cell_cls(base_model, num_mem_tokens=config.num_mem_tokens)
+        self.rmt = recurrent_wrapper_cls(
+            memory_cell,
+            max_n_segments=config.max_n_segments,
+            think_token_id=config.think_token_id,
+            answer_token_id=config.answer_token_id,
+            bos_token_id=config.bos_token_id,
+            eos_token_id=config.eos_token_id
+        )
+    def forward(self, *args, **kwargs):
+        return self.rmt(*args, **kwargs)
+    def generate(self, *args, **kwargs):
+        return self.rmt.generate(*args, **kwargs)
+    def load_state_dict(self, state_dict, strict=True, assign=False):
+        try:
+            return super().load_state_dict(state_dict, strict, assign)
+        except RuntimeError:
+            print("Failed to load state, retrying with RMT loader.")
+            self.rmt.load_state_dict(state_dict, strict=True, assign=assign)
+            print("Success!")
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, config=None, *args, **kwargs):
+        if config is None:
+            config = RMTConfig.from_pretrained(pretrained_model_name_or_path)
+        model = cls(config)
+        import os
+        from safetensors import safe_open
+        from collections import OrderedDict
+        safetensors_path = os.path.join(pretrained_model_name_or_path, "model.safetensors")
+        state_dict = OrderedDict()
+        with safe_open(safetensors_path, framework="pt", device="cpu") as f:
+            for key in f.keys():
+                tensor = f.get_tensor(key)
+                state_dict[key] = tensor
+        model.load_state_dict(state_dict, strict=False)
+        return model

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b827dad8b86cc6d24bc2141e73a8f85040d729f1f9a5c1f04aff459397a09ee
-size 538170208

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9aaa76bc4eb456e998ecc096bdd5c05f9b83662ec42362fba6eb4580a839ea9
+size 269140352