omniff / tests /python /integration /test_document_to_text.py

Initial upload: OmniFF — FFmpeg for AI

88e3f4a verified 5 days ago

1.19 kB

	import pytest

	from omniff.models.document_reader import DocumentReaderModel


	@pytest.fixture(scope="module")
	def reader():
	model = DocumentReaderModel(llm_model_id="Qwen/Qwen3-4B", device="auto", max_new_tokens=512)
	model.load()
	yield model
	model.unload()


	@pytest.fixture
	def test_txt_doc(tmp_path):
	path = tmp_path / "sample.txt"
	path.write_text(
	"The capital of Kazakhstan is Astana. "
	"Kazakhstan is the largest landlocked country in the world. "
	"It covers an area of 2.7 million square kilometers.",
	encoding="utf-8",
	)
	return str(path)


	def test_extract_text(reader, test_txt_doc):
	result = reader.infer({"document_path": test_txt_doc})
	assert "text" in result
	assert "Kazakhstan" in result["text"]
	assert result["source"] == "extraction"


	def test_summarize_document(reader, test_txt_doc):
	result = reader.infer(
	{
	"document_path": test_txt_doc,
	"prompt": "What is the capital mentioned here? One word answer.",
	}
	)
	assert "text" in result
	assert len(result["text"]) > 0, "LLM returned empty response"
	assert result["source"] == "llm"