|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import json |
|
|
import sys |
|
|
import tempfile |
|
|
import unittest |
|
|
from pathlib import Path |
|
|
|
|
|
import transformers |
|
|
from transformers import ( |
|
|
CONFIG_MAPPING, |
|
|
VIDEO_PROCESSOR_MAPPING, |
|
|
AutoConfig, |
|
|
AutoVideoProcessor, |
|
|
LlavaOnevisionConfig, |
|
|
LlavaOnevisionVideoProcessor, |
|
|
) |
|
|
from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER, require_torch |
|
|
|
|
|
|
|
|
sys.path.append(str(Path(__file__).parent.parent.parent.parent / "utils")) |
|
|
|
|
|
from test_module.custom_configuration import CustomConfig |
|
|
from test_module.custom_video_processing import CustomVideoProcessor |
|
|
|
|
|
|
|
|
@require_torch |
|
|
class AutoVideoProcessorTest(unittest.TestCase): |
|
|
def setUp(self): |
|
|
transformers.dynamic_module_utils.TIME_OUT_REMOTE_CODE = 0 |
|
|
|
|
|
def test_video_processor_from_model_shortcut(self): |
|
|
config = AutoVideoProcessor.from_pretrained("llava-hf/llava-onevision-qwen2-0.5b-ov-hf") |
|
|
self.assertIsInstance(config, LlavaOnevisionVideoProcessor) |
|
|
|
|
|
def test_video_processor_from_local_directory_from_key(self): |
|
|
with tempfile.TemporaryDirectory() as tmpdirname: |
|
|
processor_tmpfile = Path(tmpdirname) / "video_preprocessor_config.json" |
|
|
config_tmpfile = Path(tmpdirname) / "config.json" |
|
|
json.dump( |
|
|
{ |
|
|
"video_processor_type": "LlavaOnevisionVideoProcessor", |
|
|
"processor_class": "LlavaOnevisionProcessor", |
|
|
}, |
|
|
open(processor_tmpfile, "w"), |
|
|
) |
|
|
json.dump({"model_type": "llava_onevision"}, open(config_tmpfile, "w")) |
|
|
|
|
|
config = AutoVideoProcessor.from_pretrained(tmpdirname) |
|
|
self.assertIsInstance(config, LlavaOnevisionVideoProcessor) |
|
|
|
|
|
def test_video_processor_from_local_directory_from_preprocessor_key(self): |
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname: |
|
|
processor_tmpfile = Path(tmpdirname) / "preprocessor_config.json" |
|
|
config_tmpfile = Path(tmpdirname) / "config.json" |
|
|
json.dump( |
|
|
{ |
|
|
"video_processor_type": "LlavaOnevisionVideoProcessor", |
|
|
"processor_class": "LlavaOnevisionProcessor", |
|
|
}, |
|
|
open(processor_tmpfile, "w"), |
|
|
) |
|
|
json.dump({"model_type": "llava_onevision"}, open(config_tmpfile, "w")) |
|
|
|
|
|
config = AutoVideoProcessor.from_pretrained(tmpdirname) |
|
|
self.assertIsInstance(config, LlavaOnevisionVideoProcessor) |
|
|
|
|
|
def test_video_processor_from_local_directory_from_config(self): |
|
|
with tempfile.TemporaryDirectory() as tmpdirname: |
|
|
model_config = LlavaOnevisionConfig() |
|
|
|
|
|
|
|
|
processor_tmpfile = Path(tmpdirname) / "video_preprocessor_config.json" |
|
|
config_tmpfile = Path(tmpdirname) / "config.json" |
|
|
json.dump( |
|
|
{ |
|
|
"video_processor_type": "LlavaOnevisionVideoProcessor", |
|
|
"processor_class": "LlavaOnevisionProcessor", |
|
|
}, |
|
|
open(processor_tmpfile, "w"), |
|
|
) |
|
|
json.dump({"model_type": "llava_onevision"}, open(config_tmpfile, "w")) |
|
|
|
|
|
|
|
|
config_dict = AutoVideoProcessor.from_pretrained(tmpdirname).to_dict() |
|
|
|
|
|
config_dict.pop("video_processor_type") |
|
|
config = LlavaOnevisionVideoProcessor(**config_dict) |
|
|
|
|
|
|
|
|
model_config.save_pretrained(tmpdirname) |
|
|
config.save_pretrained(tmpdirname) |
|
|
|
|
|
config = AutoVideoProcessor.from_pretrained(tmpdirname) |
|
|
|
|
|
|
|
|
dict_as_saved = json.loads(config.to_json_string()) |
|
|
self.assertTrue("_processor_class" not in dict_as_saved) |
|
|
|
|
|
self.assertIsInstance(config, LlavaOnevisionVideoProcessor) |
|
|
|
|
|
def test_video_processor_from_local_file(self): |
|
|
with tempfile.TemporaryDirectory() as tmpdirname: |
|
|
processor_tmpfile = Path(tmpdirname) / "video_preprocessor_config.json" |
|
|
json.dump( |
|
|
{ |
|
|
"video_processor_type": "LlavaOnevisionVideoProcessor", |
|
|
"processor_class": "LlavaOnevisionProcessor", |
|
|
}, |
|
|
open(processor_tmpfile, "w"), |
|
|
) |
|
|
|
|
|
config = AutoVideoProcessor.from_pretrained(processor_tmpfile) |
|
|
self.assertIsInstance(config, LlavaOnevisionVideoProcessor) |
|
|
|
|
|
def test_repo_not_found(self): |
|
|
with self.assertRaisesRegex( |
|
|
EnvironmentError, |
|
|
"llava-hf/llava-doesnt-exist is not a local folder and is not a valid model identifier", |
|
|
): |
|
|
_ = AutoVideoProcessor.from_pretrained("llava-hf/llava-doesnt-exist") |
|
|
|
|
|
def test_revision_not_found(self): |
|
|
with self.assertRaisesRegex( |
|
|
EnvironmentError, r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)" |
|
|
): |
|
|
_ = AutoVideoProcessor.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER, revision="aaaaaa") |
|
|
|
|
|
def test_video_processor_not_found(self): |
|
|
with self.assertRaisesRegex( |
|
|
EnvironmentError, |
|
|
"hf-internal-testing/config-no-model does not appear to have a file named preprocessor_config.json.", |
|
|
): |
|
|
_ = AutoVideoProcessor.from_pretrained("hf-internal-testing/config-no-model") |
|
|
|
|
|
def test_from_pretrained_dynamic_video_processor(self): |
|
|
|
|
|
with self.assertRaises(ValueError): |
|
|
video_processor = AutoVideoProcessor.from_pretrained("hf-internal-testing/test_dynamic_video_processor") |
|
|
|
|
|
with self.assertRaises(ValueError): |
|
|
video_processor = AutoVideoProcessor.from_pretrained( |
|
|
"hf-internal-testing/test_dynamic_video_processor", trust_remote_code=False |
|
|
) |
|
|
|
|
|
video_processor = AutoVideoProcessor.from_pretrained( |
|
|
"hf-internal-testing/test_dynamic_video_processor", trust_remote_code=True |
|
|
) |
|
|
self.assertEqual(video_processor.__class__.__name__, "NewVideoProcessor") |
|
|
|
|
|
|
|
|
reloaded_video_processor = AutoVideoProcessor.from_pretrained( |
|
|
"hf-internal-testing/test_dynamic_video_processor", trust_remote_code=True |
|
|
) |
|
|
self.assertIs(video_processor.__class__, reloaded_video_processor.__class__) |
|
|
|
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmp_dir: |
|
|
video_processor.save_pretrained(tmp_dir) |
|
|
reloaded_video_processor = AutoVideoProcessor.from_pretrained(tmp_dir, trust_remote_code=True) |
|
|
self.assertEqual(reloaded_video_processor.__class__.__name__, "NewVideoProcessor") |
|
|
|
|
|
def test_new_video_processor_registration(self): |
|
|
try: |
|
|
AutoConfig.register("custom", CustomConfig) |
|
|
AutoVideoProcessor.register(CustomConfig, CustomVideoProcessor) |
|
|
|
|
|
with self.assertRaises(ValueError): |
|
|
AutoVideoProcessor.register(LlavaOnevisionConfig, LlavaOnevisionVideoProcessor) |
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmpdirname: |
|
|
processor_tmpfile = Path(tmpdirname) / "video_preprocessor_config.json" |
|
|
config_tmpfile = Path(tmpdirname) / "config.json" |
|
|
json.dump( |
|
|
{ |
|
|
"video_processor_type": "LlavaOnevisionVideoProcessor", |
|
|
"processor_class": "LlavaOnevisionProcessor", |
|
|
}, |
|
|
open(processor_tmpfile, "w"), |
|
|
) |
|
|
json.dump({"model_type": "llava_onevision"}, open(config_tmpfile, "w")) |
|
|
|
|
|
video_processor = CustomVideoProcessor.from_pretrained(tmpdirname) |
|
|
|
|
|
|
|
|
with tempfile.TemporaryDirectory() as tmp_dir: |
|
|
video_processor.save_pretrained(tmp_dir) |
|
|
new_video_processor = AutoVideoProcessor.from_pretrained(tmp_dir) |
|
|
self.assertIsInstance(new_video_processor, CustomVideoProcessor) |
|
|
|
|
|
finally: |
|
|
if "custom" in CONFIG_MAPPING._extra_content: |
|
|
del CONFIG_MAPPING._extra_content["custom"] |
|
|
if CustomConfig in VIDEO_PROCESSOR_MAPPING._extra_content: |
|
|
del VIDEO_PROCESSOR_MAPPING._extra_content[CustomConfig] |
|
|
|
|
|
def test_from_pretrained_dynamic_video_processor_conflict(self): |
|
|
class NewVideoProcessor(LlavaOnevisionVideoProcessor): |
|
|
is_local = True |
|
|
|
|
|
try: |
|
|
AutoConfig.register("custom", CustomConfig) |
|
|
AutoVideoProcessor.register(CustomConfig, NewVideoProcessor) |
|
|
|
|
|
video_processor = AutoVideoProcessor.from_pretrained("hf-internal-testing/test_dynamic_video_processor") |
|
|
self.assertEqual(video_processor.__class__.__name__, "NewVideoProcessor") |
|
|
self.assertTrue(video_processor.is_local) |
|
|
|
|
|
|
|
|
video_processor = AutoVideoProcessor.from_pretrained( |
|
|
"hf-internal-testing/test_dynamic_video_processor", trust_remote_code=False |
|
|
) |
|
|
self.assertEqual(video_processor.__class__.__name__, "NewVideoProcessor") |
|
|
self.assertTrue(video_processor.is_local) |
|
|
|
|
|
|
|
|
video_processor = AutoVideoProcessor.from_pretrained( |
|
|
"hf-internal-testing/test_dynamic_video_processor", trust_remote_code=True |
|
|
) |
|
|
self.assertEqual(video_processor.__class__.__name__, "NewVideoProcessor") |
|
|
self.assertTrue(not hasattr(video_processor, "is_local")) |
|
|
|
|
|
finally: |
|
|
if "custom" in CONFIG_MAPPING._extra_content: |
|
|
del CONFIG_MAPPING._extra_content["custom"] |
|
|
if CustomConfig in VIDEO_PROCESSOR_MAPPING._extra_content: |
|
|
del VIDEO_PROCESSOR_MAPPING._extra_content[CustomConfig] |
|
|
|