Mir-2002
/

codet5p-google-style-docstrings

+from typing import Any, Dict, List
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+MAX_INPUT_LENGTH = 256
+MAX_OUTPUT_LENGTH = 128
+class EndpointHandler:
+    def __init__(self, model_dir: str = "", **kwargs: Any) -> None:
+        """
+        Initializes the model and tokenizer when the endpoint starts.
+        """
+        self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
+        # Assuming you fine-tuned CodeT5+ for a sequence-to-sequence task
+        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_dir)
+        self.model.eval() # Set model to evaluation mode
+        # You might want to move the model to GPU if available
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.model.to(self.device)
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Handles incoming inference requests.
+        """
+        inputs = data.get("inputs")
+        if not inputs:
+            raise ValueError("No 'inputs' found in the request data.")
+        # Ensure inputs are in a list for batch processing, even if single input
+        if isinstance(inputs, str):
+            inputs = [inputs]
+        # Pre-processing
+        # Adjust max_length and padding based on your model's training and task
+        tokenized_inputs = self.tokenizer(
+            inputs,
+            max_length=MAX_INPUT_LENGTH,
+            padding=True,
+            truncation=True,
+            return_tensors="pt"
+        ).to(self.device)
+        # Inference
+        with torch.no_grad():
+            outputs = self.model.generate(
+                tokenized_inputs["input_ids"],
+                attention_mask=tokenized_inputs["attention_mask"],
+                # Add generation arguments relevant to your task (e.g., max_length, num_beams)
+                max_length=MAX_OUTPUT_LENGTH, # Example, adjust as needed
+                num_beams=8,    # Example, adjust as needed
+                no_repeat_ngram_size=3,
+                pad_token_id=self.tokenizer.pad_token_id) # Fixed: Added self. before tokenizer
+        # Post-processing
+        decoded_outputs = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        # Format the output as a list of dictionaries
+        results = [{"generated_text": text} for text in decoded_outputs]
+        return results