sxtran
/

t5-grammar-corrector-ielts

sxtran commited on Sep 17, 2025

Commit

2f16beb

verified ·

1 Parent(s): 7a92108

Update the prefix before being fed into the model

Files changed (1) hide show

handler.py CHANGED Viewed

@@ -12,14 +12,18 @@ class EndpointHandler:
         self.model.to(self.device)
     def paraphrase_batch(self, sentences, num_return_sequences=1, temperature=1.0):
-        # Your existing paraphrase_batch logic
         inputs = self.tokenizer(
-            sentences,
             padding=True,
             truncation=True,
             max_length=512,
             return_tensors="pt"
         ).to(self.device)
         outputs = self.model.generate(
             **inputs,
             max_length=512,
@@ -28,6 +32,7 @@ class EndpointHandler:
             num_return_sequences=num_return_sequences,
             early_stopping=True
         )
         decoded = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
         if num_return_sequences > 1:
             grouped = [

         self.model.to(self.device)
     def paraphrase_batch(self, sentences, num_return_sequences=1, temperature=1.0):
+        # Add the grammar correction prefix to each sentence
+        prefix = "correct grammar for this sentence: "
+        sentences_with_prefix = [prefix + s for s in sentences]
         inputs = self.tokenizer(
+            sentences_with_prefix,
             padding=True,
             truncation=True,
             max_length=512,
             return_tensors="pt"
         ).to(self.device)
         outputs = self.model.generate(
             **inputs,
             max_length=512,
             num_return_sequences=num_return_sequences,
             early_stopping=True
         )
         decoded = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
         if num_return_sequences > 1:
             grouped = [