hoanguyenthanh07's picture
Upload folder using huggingface_hub
248a67b verified
import tritonclient.http as httpclient
import numpy as np
from trism import TritonModel
from transformers import AutoTokenizer
client = httpclient.InferenceServerClient("localhost:8000")
input_text = httpclient.InferInput("text", [2, 1], "BYTES") # đúng shape
input_text_np = np.array([["tôi đang học AI"], ["tôi đang học AI"]], dtype=np.object_)
input_text.set_data_from_numpy(input_text_np)
output = httpclient.InferRequestedOutput("logits")
response = client.infer("mbert.ctx.python", inputs=[input_text], outputs=[output])
# print(response)
logits = response.as_numpy("logits")
print(logits)
print("*"*50)
input_text_np = np.array([["tôi đang học AI".encode("utf-8")], ["tôi đang học AI".encode("utf-8")]], dtype=np.object_)
model_bls = TritonModel(
model="mbert.ctx.python",
version=1,
url="localhost:8000",
grpc=False,
)
logits = model_bls.run(
data = [input_text_np],
)
print(logits['logits'])
print("*"*50)
model_onnx = TritonModel(
model="mbert.ctx",
version=1,
url="localhost:8000",
grpc=False,
)
tokenizer = AutoTokenizer.from_pretrained("models_rag/mbert.ctx/1")
input_text = tokenizer(["tôi đang học AI", "tôi đang học AI"], return_tensors="np", padding="max_length", truncation=True, max_length=512)
logits = model_onnx.run(
data = [
input_text["input_ids"],
input_text["attention_mask"],
input_text["token_type_ids"],
]
)
print(logits['last_hidden_state'])