| import numpy as np |
| import pytest |
|
|
| from omniff.models.video_captioner import VideoCaptionerModel |
|
|
|
|
| @pytest.fixture(scope="module") |
| def captioner(): |
| model = VideoCaptionerModel( |
| model_id="Qwen/Qwen2.5-VL-3B-Instruct", |
| device="auto", |
| max_new_tokens=128, |
| max_frames=4, |
| ) |
| model.load() |
| yield model |
| model.unload() |
|
|
|
|
| @pytest.fixture |
| def test_video(tmp_path): |
| import cv2 |
|
|
| path = str(tmp_path / "test.mp4") |
| fourcc = cv2.VideoWriter_fourcc(*"mp4v") |
| out = cv2.VideoWriter(path, fourcc, 10.0, (64, 64)) |
| for i in range(30): |
| frame = np.zeros((64, 64, 3), dtype=np.uint8) |
| frame[:, :, 2] = min(255, i * 8) |
| out.write(frame) |
| out.release() |
| return path |
|
|
|
|
| def test_video_caption(captioner, test_video): |
| result = captioner.infer({"video_path": test_video}) |
| assert "text" in result |
| assert isinstance(result["text"], str) |
| assert len(result["text"]) > 5 |
|
|
|
|
| def test_video_caption_with_prompt(captioner, test_video): |
| result = captioner.infer( |
| { |
| "video_path": test_video, |
| "prompt": "What colors appear in this video?", |
| } |
| ) |
| assert "text" in result |
| assert len(result["text"]) > 3 |
|
|