# auto-load.py # This script forces transformers to use the real Qwen2VLConfig from transformers import AutoConfig, Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor def load_model(repo_path): # Use trust_remote_code so the hub config class is rehydrated config = AutoConfig.from_pretrained(repo_path, trust_remote_code=True) model = Qwen2VLForConditionalGeneration.from_pretrained(repo_path, config=config, torch_dtype=torch.float16) tokenizer = AutoTokenizer.from_pretrained(repo_path, trust_remote_code=True) processor = AutoProcessor.from_pretrained(repo_path, trust_remote_code=True) return model, tokenizer, processor