tensorfiend
/

DotLM-165M

Text Generation

thought-experiments

chain-of-thought

small-language-model

custom-architecture

Model card Files Files and versions

tensorfiend commited on Apr 4

Commit

b4b0382

·

verified ·

1 Parent(s): 9b021de

Upload modeling_dotlm.py with huggingface_hub

Files changed (1) hide show

modeling_dotlm.py +33 -0

modeling_dotlm.py CHANGED Viewed

@@ -382,3 +382,36 @@ class DotLMForCausalLM(PreTrainedModel, GenerationMixin):
             (k.index_select(0, beam_idx), v.index_select(0, beam_idx))
             for (k, v) in past_key_values
         )

             (k.index_select(0, beam_idx), v.index_select(0, beam_idx))
             for (k, v) in past_key_values
         )
+    @torch.no_grad()
+    def generate(self, input_ids=None, max_new_tokens=256, temperature=1.0,
+                 top_k=None, do_sample=True, eos_token_id=None, **kwargs):
+        """Custom autoregressive generate that bypasses GenerationMixin internals."""
+        self._ensure_rope_and_mask()
+        kv_cache = None
+        curr_ids = input_ids
+        for _ in range(max_new_tokens):
+            if curr_ids.size(1) > self.config.context_len:
+                curr_ids = curr_ids[:, -self.config.context_len:]
+            model_input = curr_ids if kv_cache is None else curr_ids[:, -1:]
+            out = self.forward(model_input, past_key_values=kv_cache, use_cache=True, return_dict=True)
+            kv_cache = out.past_key_values
+            logits = out.logits[:, -1, :]
+            if do_sample:
+                logits = logits / max(temperature, 1e-8)
+                if top_k is not None:
+                    v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                    logits[logits < v[:, [-1]]] = -float("Inf")
+                probs = F.softmax(logits, dim=-1)
+                next_token = torch.multinomial(probs, num_samples=1)
+            else:
+                next_token = logits.argmax(dim=-1, keepdim=True)
+            curr_ids = torch.cat([curr_ids, next_token], dim=1)
+            if eos_token_id is not None and (next_token == eos_token_id).all():
+                break
+        return curr_ids