# from gliner import GLiNER # class EndpointHandler: # def __init__(self, path=""): # # Use the provided path for loading the model # self.model = GLiNER.from_pretrained(path) # def __call__(self, data): # try: # text = data.get("text", "") # labels = data.get("labels", []) # if not text or not labels: # return {"error": "Please provide 'text' and 'labels'"} # entities = self.model.predict_entities(text, labels) # return {"entities": entities} # except Exception as e: # return {"error": str(e)} from gliner import GLiNER import torch class EndpointHandler: def __init__(self, path=""): # Load without device_map, then move to GPU self.model = GLiNER.from_pretrained(path) # Remove device_map="cuda" self.model = self.model.to("cuda") self.model.eval() # Lock for inference def __call__(self, data): # If data is wrapped in 'inputs' (as Hugging Face does), unwrap it if isinstance(data, dict) and "inputs" in data: data = data["inputs"] text = data.get("text", "") labels = data.get("labels", []) if not text or not labels: return {"error": "Please provide 'text' and 'labels'"} entities = self.model.predict_entities(text, labels) return {"entities": entities}