omniff / tests /python /integration /test_video_to_text.py

Initial upload: OmniFF — FFmpeg for AI

88e3f4a verified 5 days ago

1.25 kB

	import numpy as np
	import pytest

	from omniff.models.video_captioner import VideoCaptionerModel


	@pytest.fixture(scope="module")
	def captioner():
	model = VideoCaptionerModel(
	model_id="Qwen/Qwen2.5-VL-3B-Instruct",
	device="auto",
	max_new_tokens=128,
	max_frames=4,
	)
	model.load()
	yield model
	model.unload()


	@pytest.fixture
	def test_video(tmp_path):
	import cv2

	path = str(tmp_path / "test.mp4")
	fourcc = cv2.VideoWriter_fourcc(*"mp4v")
	out = cv2.VideoWriter(path, fourcc, 10.0, (64, 64))
	for i in range(30):
	frame = np.zeros((64, 64, 3), dtype=np.uint8)
	frame[:, :, 2] = min(255, i * 8) # red gradient over time
	out.write(frame)
	out.release()
	return path


	def test_video_caption(captioner, test_video):
	result = captioner.infer({"video_path": test_video})
	assert "text" in result
	assert isinstance(result["text"], str)
	assert len(result["text"]) > 5


	def test_video_caption_with_prompt(captioner, test_video):
	result = captioner.infer(
	{
	"video_path": test_video,
	"prompt": "What colors appear in this video?",
	}
	)
	assert "text" in result
	assert len(result["text"]) > 3