rawcell
/

Qwen2.5-Coder-7B-Instruct-bruno

Safetensors

qwen2

Model card Files Files and versions

xet

Community

rawcell commited on Feb 1

Commit

cca395f

verified ·

1 Parent(s): 44cfc7c

Add handler.py for Inference Endpoints

Browse files

Files changed (1) hide show

handler.py +46 -0

handler.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from typing import Dict, List, Any
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+class EndpointHandler:
+    def __init__(self, path=""):
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+        self.model = AutoModelForCausalLM.from_pretrained(
+            path,
+            torch_dtype=torch.bfloat16,
+            device_map="auto"
+        )
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", {})
+        if isinstance(inputs, list) and len(inputs) > 0 and isinstance(inputs[0], dict):
+            text = self.tokenizer.apply_chat_template(
+                inputs,
+                tokenize=False,
+                add_generation_prompt=True
+            )
+        else:
+            text = inputs
+        encoded = self.tokenizer(text, return_tensors="pt").to(self.model.device)
+        gen_kwargs = {
+            "max_new_tokens": parameters.get("max_new_tokens", 512),
+            "temperature": parameters.get("temperature", 0.7),
+            "top_p": parameters.get("top_p", 0.9),
+            "do_sample": parameters.get("do_sample", True),
+        }
+        with torch.no_grad():
+            outputs = self.model.generate(**encoded, **gen_kwargs)
+        decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        if "<|im_start|>assistant" in decoded:
+            decoded = decoded.split("<|im_start|>assistant")[-1].strip()
+            if decoded.endswith("<|im_end|>"):
+                decoded = decoded[:-10].strip()
+        return [{"generated_text": decoded}]