interface sample:
import numpy as np
import onnxruntime as ort
from transformers import AutoTokenizer
class QwenEmbeddingONNX:
def __init__(self, model_dir, model_path, max_length=512, providers=None):
self.max_length = max_length
self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
if providers is None:
providers = ['CPUExecutionProvider']
self.session = ort.InferenceSession(
model_path,
providers=providers
)
self.input_names = [i.name for i in self.session.get_inputs()]
def encode(self, texts, normalize=True):
if isinstance(texts, str):
texts = [texts]
inputs = self.tokenizer(
texts,
padding=True,
truncation=True,
max_length=self.max_length,
return_tensors="np",
)
input_feed = {
k: inputs[k].astype(np.int64)
for k in self.input_names
if k in inputs
}
outputs = self.session.run(None, input_feed)
embeddings = outputs[0]
# 如果输出是 [B, L, H],取第一个 token
if embeddings.ndim == 3:
embeddings = embeddings[:, 0, :]
# L2 normalize
if normalize:
norm = np.linalg.norm(embeddings, axis=1, keepdims=True)
embeddings = embeddings / (norm + 1e-9)
return embeddings
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support