import numpy as np import pytest from omniff.models.video_captioner import VideoCaptionerModel @pytest.fixture(scope="module") def captioner(): model = VideoCaptionerModel( model_id="Qwen/Qwen2.5-VL-3B-Instruct", device="auto", max_new_tokens=128, max_frames=4, ) model.load() yield model model.unload() @pytest.fixture def test_video(tmp_path): import cv2 path = str(tmp_path / "test.mp4") fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter(path, fourcc, 10.0, (64, 64)) for i in range(30): frame = np.zeros((64, 64, 3), dtype=np.uint8) frame[:, :, 2] = min(255, i * 8) # red gradient over time out.write(frame) out.release() return path def test_video_caption(captioner, test_video): result = captioner.infer({"video_path": test_video}) assert "text" in result assert isinstance(result["text"], str) assert len(result["text"]) > 5 def test_video_caption_with_prompt(captioner, test_video): result = captioner.infer( { "video_path": test_video, "prompt": "What colors appear in this video?", } ) assert "text" in result assert len(result["text"]) > 3