omniff / tests /python /integration /test_video_to_text.py
stukenov's picture
Initial upload: OmniFF — FFmpeg for AI
88e3f4a verified
import numpy as np
import pytest
from omniff.models.video_captioner import VideoCaptionerModel
@pytest.fixture(scope="module")
def captioner():
model = VideoCaptionerModel(
model_id="Qwen/Qwen2.5-VL-3B-Instruct",
device="auto",
max_new_tokens=128,
max_frames=4,
)
model.load()
yield model
model.unload()
@pytest.fixture
def test_video(tmp_path):
import cv2
path = str(tmp_path / "test.mp4")
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(path, fourcc, 10.0, (64, 64))
for i in range(30):
frame = np.zeros((64, 64, 3), dtype=np.uint8)
frame[:, :, 2] = min(255, i * 8) # red gradient over time
out.write(frame)
out.release()
return path
def test_video_caption(captioner, test_video):
result = captioner.infer({"video_path": test_video})
assert "text" in result
assert isinstance(result["text"], str)
assert len(result["text"]) > 5
def test_video_caption_with_prompt(captioner, test_video):
result = captioner.infer(
{
"video_path": test_video,
"prompt": "What colors appear in this video?",
}
)
assert "text" in result
assert len(result["text"]) > 3