| from sentence_transformers import SentenceTransformer |
|
|
| class EndpointHandler: |
| def __init__(self, path=""): |
| |
| print("Initializing Nemotron 8B with trust_remote_code=True...") |
| self.model = SentenceTransformer("nvidia/llama-embed-nemotron-8b", trust_remote_code=True,model_kwargs={"attn_implementation": "eager"}) |
| print("Model loaded successfully!") |
|
|
| def __call__(self, data): |
| """ |
| This runs every time your Vectorize script sends text to the endpoint. |
| """ |
| |
| inputs = data.pop("inputs", data) |
| |
| |
| embeddings = self.model.encode(inputs) |
| |
| |
| return embeddings.tolist() |