NoesisLab
/

Asterisk

Text Generation

hybrid-architecture

graph-reasoning

Model card Files Files and versions

OzTianlu commited on 3 days ago

Commit

39bf691

·

verified ·

1 Parent(s): cd370de

Update handler.py

Files changed (1) hide show

handler.py +2 -2

handler.py CHANGED Viewed

@@ -102,9 +102,9 @@ class EndpointHandler:
             if attention_mask is not None:
                 attention_mask = attention_mask.to(self.model.device)
-            input_len = input_ids.shape[-1]
             gen_ids = self.model.generate(
                 input_ids=input_ids,
                 max_new_tokens=max_new_tokens,
                 do_sample=do_sample,
@@ -118,7 +118,7 @@ class EndpointHandler:
             )
             # Only return newly generated tokens
-            new_tokens = gen_ids[0, input_len:]
             text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
             return {"generated_text": text}

             if attention_mask is not None:
                 attention_mask = attention_mask.to(self.model.device)
             gen_ids = self.model.generate(
+                attention_mask=attention_mask,
                 input_ids=input_ids,
                 max_new_tokens=max_new_tokens,
                 do_sample=do_sample,
             )
             # Only return newly generated tokens
+            new_tokens = gen_ids
             text = self.tokenizer.decode(new_tokens, skip_special_tokens=True)
             return {"generated_text": text}