Utk113Raj's picture
Update handler.py
e3785da verified
from gliner import GLiNER
import torch
class EndpointHandler:
def __init__(self, path=""):
# Load without device_map, then move to GPU
self.model = GLiNER.from_pretrained(path) # Remove device_map="cuda"
self.model = self.model.to("cuda")
self.model.eval() # Lock for inference
def __call__(self, data):
# If data is wrapped in 'inputs' (as Hugging Face does), unwrap it
if isinstance(data, dict) and "inputs" in data:
data = data["inputs"]
text = data.get("text", "")
labels = data.get("labels", [])
if not text or not labels:
return {"error": "Please provide 'text' and 'labels'"}
entities = self.model.predict_entities(text, labels)
return {"entities": entities}