Fix: Add pad_token and attention_mask for proper generation"
#1
by
RanjithaRuttala
- opened
- handler.py +10 -2
handler.py
CHANGED
|
@@ -15,6 +15,10 @@ class EndpointHandler:
|
|
| 15 |
|
| 16 |
print(f"Loading tokenizer from {path}...")
|
| 17 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
print(f"Loading model from {path} on device: {self.device}...")
|
| 20 |
self.model = AutoModelForCausalLM.from_pretrained(
|
|
@@ -62,11 +66,15 @@ class EndpointHandler:
|
|
| 62 |
print(f"Generating with parameters: {gen_kwargs}")
|
| 63 |
|
| 64 |
# Tokenize input
|
| 65 |
-
enc = self.tokenizer(inputs, return_tensors="pt"
|
|
|
|
|
|
|
| 66 |
|
| 67 |
# Generate with no_grad for efficiency
|
| 68 |
with torch.no_grad():
|
| 69 |
-
out = self.model.generate(**enc,
|
|
|
|
|
|
|
| 70 |
|
| 71 |
# Decode output
|
| 72 |
generated_text = self.tokenizer.decode(out[0], skip_special_tokens=True)
|
|
|
|
| 15 |
|
| 16 |
print(f"Loading tokenizer from {path}...")
|
| 17 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
| 18 |
+
|
| 19 |
+
# ✅ ADD THIS: Set pad token to prevent corruption
|
| 20 |
+
if self.tokenizer.pad_token is None:
|
| 21 |
+
self.tokenizer.pad_token = self.tokenizer.eos_token
|
| 22 |
|
| 23 |
print(f"Loading model from {path} on device: {self.device}...")
|
| 24 |
self.model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
| 66 |
print(f"Generating with parameters: {gen_kwargs}")
|
| 67 |
|
| 68 |
# Tokenize input
|
| 69 |
+
enc = self.tokenizer(inputs, return_tensors="pt",padding=True, # Enable padding
|
| 70 |
+
truncation=True, # Truncate if needed
|
| 71 |
+
max_length=2048).to(self.device)
|
| 72 |
|
| 73 |
# Generate with no_grad for efficiency
|
| 74 |
with torch.no_grad():
|
| 75 |
+
out = self.model.generate(**enc,
|
| 76 |
+
**gen_kwargs,
|
| 77 |
+
pad_token_id=self.tokenizer.pad_token_id) # Tell model which token is padding
|
| 78 |
|
| 79 |
# Decode output
|
| 80 |
generated_text = self.tokenizer.decode(out[0], skip_special_tokens=True)
|