File size: 879 Bytes

from sentence_transformers import SentenceTransformer

class EndpointHandler:
    def __init__(self, path=""):
        # Here is the magic override that bypasses the Hugging Face bug
        print("Initializing Nemotron 8B with trust_remote_code=True...")
        self.model = SentenceTransformer("nvidia/llama-embed-nemotron-8b", trust_remote_code=True,model_kwargs={"attn_implementation": "eager"})
        print("Model loaded successfully!")

    def __call__(self, data):
        """
        This runs every time your Vectorize script sends text to the endpoint.
        """
        # Get the text from the API request
        inputs = data.pop("inputs", data)
        
        # Generate the math vectors
        embeddings = self.model.encode(inputs)
        
        # Return it as a standard Python list so your local script can read it
        return embeddings.tolist()