robinfaro
/

time-GPT-1B-initial

@@ -403,12 +403,13 @@ class MoEGPTForCausalLM(PreTrainedModel):
         ]
     @torch.no_grad()
-    def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
         """
         Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and complete
         the sequence max_new_tokens times, feeding the predictions back into the model each time.
         Most likely you'll want to make sure to be in model.eval() mode of operation for this.
         """
         for _ in range(max_new_tokens):
             # if the sequence context is growing too long we must crop it at sequence_length
             idx_cond = (
@@ -417,7 +418,7 @@ class MoEGPTForCausalLM(PreTrainedModel):
                 else idx[:, -self.config.sequence_length :]
             )
             # forward the model to get the logits for the index in the sequence
-            logits = self(idx_cond, get_logits=True)["logits"]
             # pluck the logits at the final step and scale by desired temperature
             logits = logits[:, -1, :] / temperature
             # optionally crop the logits to only the top k options
@@ -434,7 +435,7 @@ class MoEGPTForCausalLM(PreTrainedModel):
         return idx
     @torch.no_grad()
-    def generate_from_string(self, in_str, max_new_tokens, temperature=1.0, top_k=None):
         idx = (
             torch.tensor(
                 self.tokenizer.encode(in_str, allowed_special={"<|endoftext|>"})
@@ -443,7 +444,7 @@ class MoEGPTForCausalLM(PreTrainedModel):
             .to(self.lm_head.weight.device)
         )
         out_idx = (
-            self.generate(idx, max_new_tokens, temperature, top_k)
             .view(-1)
             .to("cpu")
             .numpy()

         ]
     @torch.no_grad()
+    def generate(self, input_ids, max_new_tokens, date = None, temperature=1.0, top_k=None):
         """
         Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and complete
         the sequence max_new_tokens times, feeding the predictions back into the model each time.
         Most likely you'll want to make sure to be in model.eval() mode of operation for this.
         """
+        idx = input_ids
         for _ in range(max_new_tokens):
             # if the sequence context is growing too long we must crop it at sequence_length
             idx_cond = (
                 else idx[:, -self.config.sequence_length :]
             )
             # forward the model to get the logits for the index in the sequence
+            logits = self(idx_cond, date, get_logits=True).logits
             # pluck the logits at the final step and scale by desired temperature
             logits = logits[:, -1, :] / temperature
             # optionally crop the logits to only the top k options
         return idx
     @torch.no_grad()
+    def generate_from_string(self, in_str, max_new_tokens, date = None, temperature=1.0, top_k=None):
         idx = (
             torch.tensor(
                 self.tokenizer.encode(in_str, allowed_special={"<|endoftext|>"})
             .to(self.lm_head.weight.device)
         )
         out_idx = (
+            self.generate(idx, max_new_tokens, date, temperature, top_k)
             .view(-1)
             .to("cpu")
             .numpy()