Serkan007's picture
Sentence-Transformers ve E5-Large model aktarımı.
9bbba62 verified
from __future__ import annotations
import numpy as np
import pytest
import torch
from sentence_transformers import CrossEncoder
# These tests fail if optimum.intel.openvino is imported, because openvinotoolkit/nncf
# patches torch._C._nn.gelu in a way that breaks pickling. As a result, we may have issues
# when running both backend tests and multi-process tests in the same session.
@pytest.mark.slow
@pytest.mark.parametrize("convert_to_tensor", (False, True))
@pytest.mark.parametrize("apply_softmax", (False, True))
def test_predict_multi_process(
reranker_bert_tiny_model: CrossEncoder, convert_to_tensor: bool, apply_softmax: bool
) -> None:
model = reranker_bert_tiny_model
pairs = [[f"This is sentence {i}", f"This is another sentence {i}"] for i in range(40)]
# Start the multi-process pool on e.g. two CPU devices & compute the scores using the pool
pool = model.start_multi_process_pool(["cpu", "cpu"])
scores = model.predict(pairs, pool=pool, convert_to_tensor=convert_to_tensor, apply_softmax=apply_softmax)
model.stop_multi_process_pool(pool)
if convert_to_tensor:
assert isinstance(scores, torch.Tensor)
assert scores.shape[0] == len(pairs)
else:
assert isinstance(scores, np.ndarray)
assert scores.shape[0] == len(pairs)
# Make sure the scores aren't just all 0
assert scores.sum() != 0.0
# Compare against normal predictions
scores_normal = model.predict(pairs, convert_to_tensor=convert_to_tensor, apply_softmax=apply_softmax)
if convert_to_tensor:
diff = torch.max(torch.abs(scores - scores_normal))
assert diff < 1e-3
else:
diff = np.max(np.abs(scores - scores_normal))
assert diff < 1e-3
@pytest.mark.slow
def test_multi_process_predict_same_as_standard_predict(reranker_bert_tiny_model: CrossEncoder):
model = reranker_bert_tiny_model
# Test that multi-process prediction gives the same result as standard prediction
pairs = [
["First sentence.", "Second sentence."],
["Second sentence.", "Third sentence."],
["Third sentence.", "Fourth sentence."],
] * 5
# Standard predict
scores_standard = model.predict(pairs)
# Multi-process predict with device=["cpu"] * 2
scores_multi = model.predict(pairs, device=["cpu"] * 2)
# Should produce the same scores
assert np.allclose(scores_standard, scores_multi, atol=1e-6)
@pytest.mark.slow
def test_multi_process_pool(reranker_bert_tiny_model: CrossEncoder):
# Test the start_multi_process_pool and stop_multi_process_pool functions
model = reranker_bert_tiny_model
pairs = [
["First sentence.", "Second sentence."],
["Second sentence.", "Third sentence."],
["Third sentence.", "Fourth sentence."],
] * 5
# Standard predict
scores_standard = model.predict(pairs)
pool = model.start_multi_process_pool(["cpu"] * 2)
try:
# Predict using the pool
scores_multi = model.predict(pairs, pool=pool)
finally:
model.stop_multi_process_pool(pool)
# Should be numpy array with correct shape and the same scores
assert isinstance(scores_multi, np.ndarray)
assert scores_multi.shape == scores_standard.shape
assert np.allclose(scores_standard, scores_multi, atol=1e-6)
@pytest.mark.slow
def test_multi_process_chunk_size(reranker_bert_tiny_model: CrossEncoder):
# Test explicit chunk_size parameter for predict
model = reranker_bert_tiny_model
pairs = [
["First sentence.", "Second sentence."],
["Second sentence.", "Third sentence."],
["Third sentence.", "Fourth sentence."],
] * 10
# Test with explicit chunk size
scores = model.predict(pairs, device=["cpu"] * 2, chunk_size=5)
# Should produce correct scores
assert isinstance(scores, np.ndarray)
assert scores.shape[0] == len(pairs)
@pytest.mark.slow
@pytest.mark.parametrize("convert_to_tensor", [True, False])
@pytest.mark.parametrize("convert_to_numpy", [True, False])
def test_multi_process_with_empty_pairs(
reranker_bert_tiny_model: CrossEncoder, convert_to_tensor: bool, convert_to_numpy: bool
):
# Test predicting with empty pairs
model = reranker_bert_tiny_model
pairs: list[list[str]] = []
# Predict with empty pairs
scores_standard = model.predict(pairs, convert_to_tensor=convert_to_tensor, convert_to_numpy=convert_to_numpy)
scores_multi = model.predict(
pairs,
device=["cpu"] * 2,
convert_to_tensor=convert_to_tensor,
convert_to_numpy=convert_to_numpy,
)
# Should return empty arrays, identical types as without multi-processing
assert type(scores_standard) is type(scores_multi)
if convert_to_tensor:
assert isinstance(scores_standard, torch.Tensor)
assert scores_standard.numel() == 0
elif convert_to_numpy:
assert isinstance(scores_standard, np.ndarray)
assert scores_standard.size == 0
else:
assert isinstance(scores_standard, list)
assert len(scores_standard) == 0
@pytest.mark.slow
@pytest.mark.parametrize("convert_to_tensor", [True, False])
@pytest.mark.parametrize("convert_to_numpy", [True, False])
def test_multi_process_with_single_pair(
reranker_bert_tiny_model: CrossEncoder, convert_to_tensor: bool, convert_to_numpy: bool
):
# Test predicting with a single pair
model = reranker_bert_tiny_model
pair = ["This is a single sentence.", "This is another sentence."]
# Predict with single pair
scores_standard = model.predict(pair, convert_to_tensor=convert_to_tensor, convert_to_numpy=convert_to_numpy)
scores_multi = model.predict(
pair,
device=["cpu"] * 2,
convert_to_tensor=convert_to_tensor,
convert_to_numpy=convert_to_numpy,
)
# Assert that the scores are the same type and shape
assert type(scores_standard) is type(scores_multi)
if isinstance(scores_standard, (np.ndarray, torch.Tensor)):
assert scores_standard.shape == scores_multi.shape
else:
# Scalar outputs for num_labels=1
assert np.allclose(scores_standard, scores_multi, atol=1e-6)
@pytest.mark.slow
def test_multi_process_more_workers_than_pairs(reranker_bert_tiny_model: CrossEncoder):
# Test with more workers than pairs
model = reranker_bert_tiny_model
pairs = [["First sentence.", "Second sentence."], ["Second sentence.", "Third sentence."]]
scores = model.predict(pairs, device=["cpu"] * 3)
# Should be numpy array with correct shape
assert isinstance(scores, np.ndarray)
assert scores.shape[0] == len(pairs)
@pytest.mark.slow
def test_multi_process_with_large_chunk_size(reranker_bert_tiny_model: CrossEncoder):
# Test with a large chunk size
model = reranker_bert_tiny_model
pairs = [["First sentence.", "Second sentence."]] * 20 # 20 pairs
# Use a large chunk size
scores = model.predict(pairs, device=["cpu"] * 2, chunk_size=30)
# Should produce correct scores
assert isinstance(scores, np.ndarray)
assert scores.shape[0] == len(pairs)
@pytest.mark.slow
@pytest.mark.skipif(
not torch.cuda.is_available(), reason="CUDA must be available to experiment with 2 separate devices"
)
def test_multi_process_output_tensors_two_devices(reranker_bert_tiny_model: CrossEncoder):
# Test with two separate devices
model = reranker_bert_tiny_model
pairs = [["First sentence.", "Second sentence."], ["Second sentence.", "Third sentence."]]
# Ensure that scores are moved to CPU so they can be concatenated
scores = model.predict(pairs, device=["cpu", "cuda"], convert_to_tensor=True)
assert isinstance(scores, torch.Tensor)
assert scores.device.type == "cpu"
assert scores.shape[0] == len(pairs)
# But the default is still just numpy
scores = model.predict(pairs, device=["cpu", "cuda"])
assert isinstance(scores, np.ndarray)
assert scores.shape[0] == len(pairs)