propositionizer-wiki-flan-t5-large

slapmack commited on Jan 21, 2025

Commit

20efac6

1 Parent(s): d6978ab

attempted batching

Browse files

Files changed (2) hide show

handler.py +27 -13
handler_test.py +14 -5

handler.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Dict, Any
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
 from torch.cuda.amp import autocast
@@ -13,7 +13,9 @@ class EndpointHandler:
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         self.model = AutoModelForSeq2SeqLM.from_pretrained(path).to(self.device).half()
-    def process_chunks(self, chunks: list, titles: list, dates: list) -> list:
         """
         Process multiple text chunks with the model.
@@ -59,7 +61,7 @@ class EndpointHandler:
         return self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
         """
         Handle the inference request.
@@ -67,15 +69,27 @@ class EndpointHandler:
             data (dict): The payload with text inputs.
         Returns:
-            dict: The processed outputs containing the generated text.
         """
-        inputs = data.pop("inputs", {})
-        missing_keys = [key for key in ["chunk", "title", "date"] if key not in inputs]
-        if missing_keys:
-            raise ValueError(
-                f"The inputs dictionary is missing required keys: {', '.join(missing_keys)}."
-            )
-        chunk, title, date = inputs["chunk"], inputs["title"], inputs["date"]
-        prediction = self.process_chunks([chunk], [title], [date])[0]
-        return {"generated_text": prediction}

+from typing import Dict, Any, List
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
 from torch.cuda.amp import autocast
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         self.model = AutoModelForSeq2SeqLM.from_pretrained(path).to(self.device).half()
+    def process_chunks(
+        self, chunks: List[str], titles: List[str], dates: List[str]
+    ) -> List[str]:
         """
         Process multiple text chunks with the model.
         return self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
         """
         Handle the inference request.
             data (dict): The payload with text inputs.
         Returns:
+            dict: The processed outputs containing the generated text for each input along with their IDs.
         """
+        inputs = data.get("inputs", [])
+        # Ensure inputs is a list of dictionaries
+        if not isinstance(inputs, list) or not all(isinstance(i, dict) for i in inputs):
+            raise ValueError("The inputs must be a list of dictionaries.")
+        chunks, titles, dates, ids = [], [], [], []
+        for item in inputs:
+            for key in ["id", "chunk", "title", "date"]:
+                if key not in item:
+                    raise ValueError(f"Each input must contain the key: {key}.")
+            ids.append(item["id"])
+            chunks.append(item["chunk"])
+            titles.append(item["title"])
+            dates.append(item["date"])
+        predictions = self.process_chunks(chunks, titles, dates)
+        result = [
+            {"id": id_, "generated_text": prediction}
+            for id_, prediction in zip(ids, predictions)
+        ]
+        return {"results": result}

handler_test.py CHANGED Viewed

@@ -5,11 +5,20 @@ my_handler = EndpointHandler(path=".")
 # Example payload
 data = {
-    "inputs": {
-        "chunk": "Prior to restoration work performed between 1990 and 2001, Leaning Tower of Pisa leaned at an angle of 5.5 degrees, but the tower now leans at about 3.99 degrees. This means the top of the tower is displaced horizontally 3.9 meters (12 ft 10 in) from the center.",
-        "title": "Leaning Tower of Pisa",
-        "date": "2025-01-15 12:22:44",
-    },
 }
 # Call the handler and print the output

 # Example payload
 data = {
+    "inputs": [
+        {
+            "id": "1",
+            "chunk": "Prior to restoration work performed between 1990 and 2001, Leaning Tower of Pisa leaned at an angle of 5.5 degrees, but the tower now leans at about 3.99 degrees. This means the top of the tower is displaced horizontally 3.9 meters (12 ft 10 in) from the center.",
+            "title": "Leaning Tower of Pisa",
+            "date": "2023-01-01",
+        },
+        {
+            "id": "2",
+            "chunk": "Prior to restoration work performed between 1990 and 2001, Leaning Tower of Pisa leaned at an angle of 5.5 degrees, but the tower now leans at about 3.99 degrees. This means the top of the tower is displaced horizontally 3.9 meters (12 ft 10 in) from the center.",
+            "title": "Leaning Tower of Pisa",
+            "date": "2023-01-02",
+        },
+    ]
 }
 # Call the handler and print the output