Spaces:

kadarakos
/

labeled

Running

App Files Files Community

labeled / tests /test_inference.py

kadarakos

add tests generated with Gemini

1f82587 about 1 month ago

raw

history blame contribute delete

3.55 kB

	import pytest
	import torch
	import numpy as np
	import os
	from unittest.mock import MagicMock, patch

	# IMPORTANT: Replace 'mentioned.inference' with your actual filename/package
	from mentioned.inference import (
	InferenceMentionDetector,
	MentionProcessor,
	ONNXMentionDetectorPipeline,
	)

	# --- FIXTURES ---


	@pytest.fixture
	def mock_tokenizer():
	tokenizer = MagicMock()

	# 1. Mock the BatchEncoding object returned by calling the tokenizer
	mock_encoding = MagicMock()
	mock_encoding.__getitem__.side_effect = {
	"input_ids": torch.tensor([[101, 102, 103, 102]]),
	"attention_mask": torch.tensor([[1, 1, 1, 1]]),
	}.get

	# 2. Mock the .word_ids() method specifically
	mock_encoding.word_ids.return_value = [None, 0, 1, None]

	tokenizer.return_value = mock_encoding
	return tokenizer


	@pytest.fixture
	def mock_inference_detector():
	encoder = MagicMock(spec=torch.nn.Module)
	encoder.max_length = 512
	encoder.return_value = torch.randn(1, 4, 128)

	mention_det = MagicMock(spec=torch.nn.Module)
	mention_det.return_value = (torch.randn(1, 2), torch.randn(1, 2, 2))

	return InferenceMentionDetector(encoder, mention_det)


	# --- TESTS ---


	def test_mention_processor_word_id_mapping(mock_tokenizer):
	processor = MentionProcessor(mock_tokenizer, max_length=10)
	docs = [["The", "cat"]]

	batch = processor(docs)

	assert "word_ids" in batch
	# [None, 0, 1, None] -> [-1, 0, 1, -1]
	expected = torch.tensor([[-1, 0, 1, -1]])
	assert torch.equal(batch["word_ids"], expected)


	def test_pipeline_extraction_logic():
	"""Verify Numpy extraction: thresholding and causal masking."""

	# Prevent ONNX from trying to load a file from disk during init
	with patch("onnxruntime.InferenceSession") as mock_session_class:
	mock_session_instance = mock_session_class.return_value

	# 1 doc, 3 words: "The", "big", "cat"
	s_probs = np.array([[0.9, 0.1, 0.1]])
	e_probs = np.array([[[0.1, 0.1, 0.9], [0.1, 0.1, 0.1], [0.1, 0.1, 0.1]]])

	mock_session_instance.run.return_value = [s_probs, e_probs]

	tokenizer = MagicMock()
	pipeline = ONNXMentionDetectorPipeline("dummy.onnx", tokenizer, threshold=0.5)

	# Mock the processor so it doesn't actually call a real tokenizer
	pipeline.processor = MagicMock(
	return_value={
	"input_ids": torch.zeros((1, 5)),
	"attention_mask": torch.zeros((1, 5)),
	"word_ids": torch.zeros((1, 5)),
	}
	)

	docs = [["The", "big", "cat"]]
	results = pipeline.predict(docs)

	assert len(results) == 1
	mention = results[0][0] # First doc, first mention
	assert mention["start"] == 0
	assert mention["end"] == 2
	assert mention["text"] == "The big cat"
	assert mention["score"] == 0.9


	def test_onnx_export_compilation(mock_inference_detector, tmp_path):
	"""Verify that the model can be exported via torch.onnx.export."""
	# We must import the function from your specific file
	from mentioned.inference import compile_inference_model

	mock_inference_detector.tokenizer = MagicMock()
	output_dir = tmp_path / "onnx_test"

	# Use a try-except to get better error visibility during export
	try:
	compile_inference_model(mock_inference_detector, output_dir=str(output_dir))
	except Exception as e:
	pytest.fail(f"ONNX Export failed: {e}")

	assert os.path.exists(output_dir / "model.onnx")