Code 실행에러

#25

by coby0504 - opened Jun 18, 2025

Jun 18, 2025

from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer

model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device="cuda")
preprocessor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)

예시대로 진행했는데, 계속 아래와 같은 에러가 발생합니다.

Exception Traceback (most recent call last)
Cell In[1], line 5
1 from transformers import AutoModelForCausalLM, AutoProcessor
3 model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
----> 5 model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to("cuda")
6 preprocessor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
8 # tokenizer는 model에서 접근

File /usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:561, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
559 else:
560 cls.register(config.class, model_class, exist_ok=True)
--> 561 return model_class.from_pretrained(
562 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
563 )
564 elif type(config) in cls._model_mapping.keys():
565 model_class = _get_model_class(config, cls._model_mapping)

File ~/.cache/huggingface/modules/transformers_modules/naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B/cd33a27fcc2348f809bb70b8bc0623547279bf0b/modeling_hyperclovax.py:1241, in HCXVisionForCausalLM.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1239 if pretrained_model_name_or_path is not None: # when evaluate or load instruction tunned model
1240 model: HCXVisionForCausalLM = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
-> 1241 model.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
1243 img_start_id = model.tokenizer.encode(IMG_LOC, add_special_tokens=False)
1244 assert (
1245 len(img_start_id) == 1
1246 ), f'"<|dummy3|>" was not encoded into a single special token. Encoding result: {img_start_id}'

File /usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py:814, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
810 if tokenizer_class is None:
811 raise ValueError(
812 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
813 )
--> 814 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
816 # Otherwise we have to be creative.
817 # if model is an encoder decoder, the encoder tokenizer class is used by default
818 if isinstance(config, EncoderDecoderConfig):

File /usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2029, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
2026 else:
2027 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2029 return cls._from_pretrained(
2030 resolved_vocab_files,
2031 pretrained_model_name_or_path,
2032 init_configuration,
2033 *init_inputs,
2034 token=token,
2035 cache_dir=cache_dir,
2036 local_files_only=local_files_only,
2037 _commit_hash=commit_hash,
2038 _is_local=is_local,
2039 **kwargs,
2040 )

File /usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2261, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2259 # Instantiate the tokenizer.
2260 try:
-> 2261 tokenizer = cls(*init_inputs, **init_kwargs)
2262 except OSError:
2263 raise OSError(
2264 "Unable to load vocabulary from file. "
2265 "Please check that the provided vocabulary is accessible and not corrupted."
2266 )

File /usr/local/lib/python3.10/dist-packages/transformers/models/gpt2/tokenization_gpt2_fast.py:134, in GPT2TokenizerFast.init(self, vocab_file, merges_file, tokenizer_file, unk_token, bos_token, eos_token, add_prefix_space, **kwargs)
123 def init(
124 self,
125 vocab_file=None,
(...)
132 **kwargs,
133 ):
--> 134 super().init(
135 vocab_file,
136 merges_file,
137 tokenizer_file=tokenizer_file,
138 unk_token=unk_token,
139 bos_token=bos_token,
140 eos_token=eos_token,
141 add_prefix_space=add_prefix_space,
142 **kwargs,
143 )
145 self.add_bos_token = kwargs.pop("add_bos_token", False)
147 pre_tok_state = json.loads(self.backend_tokenizer.pre_tokenizer.getstate())

File /usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_fast.py:111, in PreTrainedTokenizerFast.init(self, *args, **kwargs)
108 fast_tokenizer = copy.deepcopy(tokenizer_object)
109 elif fast_tokenizer_file is not None and not from_slow:
110 # We have a serialization from tokenizers which let us directly build the backend
--> 111 fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
112 elif slow_tokenizer is not None:
113 # We need to convert a slow tokenizer to build the backend
114 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)

Exception: data did not match any variant of untagged enum ModelWrapper at line 552251 column 3

Click to add a cell.

coby0504 changed discussion status to closed Jun 18, 2025

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment