flopml
/

mamba

Model card Files Files and versions

xet

Community

flpelerin commited on Aug 24, 2024

Commit

b0e0141

1 Parent(s): 9224c6a

Update 2 files

Browse files

- /util.py
- /model.py

Files changed (2) hide show

model.py +26 -5
util.py +21 -0

model.py CHANGED Viewed

@@ -1,26 +1,47 @@
 from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
 from mamba_ssm.models.config_mamba import MambaConfig
-from util import Config, GetDevice
 class Model:
     def __init__(self, config: Config):
         self.__dict__ = dict(config.__dict__)
-        #print(f"params: {params}")
         self.model = MambaLMHeadModel(MambaConfig(**self.params.__dict__)).to(GetDevice())
     def AutoRegressiveLossFunction(self, input_ids, labels=None, criterion=None):
         lm_logits = self.model(input_ids).logits
-        labels = input_ids.to("cuda")
         shift_logits = lm_logits[:, :-1, :].contiguous()
         labels = labels[:, 1:].contiguous()
         loss_fct = criterion or torch.nn.CrossEntropyLoss()
         lm_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), labels.view(-1))
-        return lm_loss

 from mamba_ssm.models.mixer_seq_simple import MambaLMHeadModel
 from mamba_ssm.models.config_mamba import MambaConfig
+from util import Config, GetDevice, GetNumParams
 class Model:
     def __init__(self, config: Config):
         self.__dict__ = dict(config.__dict__)
         self.model = MambaLMHeadModel(MambaConfig(**self.params.__dict__)).to(GetDevice())
+    def Log(self):
+        model_size, rounded_model_size = GetNumParams(self.model)
+        print(f"Model has {model_size} ({rounded_model_size}) parameters")
     def AutoRegressiveLossFunction(self, input_ids, labels=None, criterion=None):
         lm_logits = self.model(input_ids).logits
+        labels = input_ids.to(self.model.device)
         shift_logits = lm_logits[:, :-1, :].contiguous()
         labels = labels[:, 1:].contiguous()
         loss_fct = criterion or torch.nn.CrossEntropyLoss()
         lm_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), labels.view(-1))
+        return lm_loss
+    def GenerateText(self, tokenizer, seed_text, num_predict):
+        max_len = num_predict + len(seed_text)
+        with torch.no_grad():
+            encoded_ids = tokenizer.encode(seed_text)
+            input_ids = torch.tensor(encoded_ids).unsqueeze(0).to(self.model.device)
+            output = self.model.generate(input_ids, max_length=max_len)
+            logits = output[0].tolist()
+            text = tokenizer.decode(logits)
+        return text
+    @staticmethod
+    def SavePretrained(self, path='./'):
+        self.model.save_pretrained(path)

util.py CHANGED Viewed

@@ -20,6 +20,27 @@ def RandomCode():
     return code
 class Config:
     def __init__(self, data):
         for key, value in data.items():

     return code
+def RoundNumber(number):
+    suffixes = ['', 'k', 'm', 'b']
+    if number < 1000:
+        return str(number)
+    magnitude = 0
+    while abs(number) >= 1000:
+        magnitude += 1
+        number /= 1000.0
+    return '{:.0f}{}'.format(number, suffixes[magnitude])
+def GetNumParams(model):
+    size = sum(p.numel() for p in model.parameters())
+    rounded_size = RoundNumber(size)
+    return size, rounded_size
 class Config:
     def __init__(self, data):
         for key, value in data.items():