Code 실행에러

#25
by coby0504 - opened

from transformers import AutoModelForCausalLM, AutoProcessor, AutoTokenizer

model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device="cuda")
preprocessor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)

예시대로 진행했는데, 계속 아래와 같은 에러가 발생합니다.

Exception Traceback (most recent call last)
Cell In[1], line 5
1 from transformers import AutoModelForCausalLM, AutoProcessor
3 model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
----> 5 model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to("cuda")
6 preprocessor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True)
8 # tokenizer는 model에서 접근

File /usr/local/lib/python3.10/dist-packages/transformers/models/auto/auto_factory.py:561, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
559 else:
560 cls.register(config.class, model_class, exist_ok=True)
--> 561 return model_class.from_pretrained(
562 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
563 )
564 elif type(config) in cls._model_mapping.keys():
565 model_class = _get_model_class(config, cls._model_mapping)

File ~/.cache/huggingface/modules/transformers_modules/naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B/cd33a27fcc2348f809bb70b8bc0623547279bf0b/modeling_hyperclovax.py:1241, in HCXVisionForCausalLM.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1239 if pretrained_model_name_or_path is not None: # when evaluate or load instruction tunned model
1240 model: HCXVisionForCausalLM = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
-> 1241 model.tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
1243 img_start_id = model.tokenizer.encode(IMG_LOC, add_special_tokens=False)
1244 assert (
1245 len(img_start_id) == 1
1246 ), f'"<|dummy3|>" was not encoded into a single special token. Encoding result: {img_start_id}'

File /usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py:814, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
810 if tokenizer_class is None:
811 raise ValueError(
812 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
813 )
--> 814 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
816 # Otherwise we have to be creative.
817 # if model is an encoder decoder, the encoder tokenizer class is used by default
818 if isinstance(config, EncoderDecoderConfig):

File /usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2029, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
2026 else:
2027 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2029 return cls._from_pretrained(
2030 resolved_vocab_files,
2031 pretrained_model_name_or_path,
2032 init_configuration,
2033 *init_inputs,
2034 token=token,
2035 cache_dir=cache_dir,
2036 local_files_only=local_files_only,
2037 _commit_hash=commit_hash,
2038 _is_local=is_local,
2039 **kwargs,
2040 )

File /usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:2261, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2259 # Instantiate the tokenizer.
2260 try:
-> 2261 tokenizer = cls(*init_inputs, **init_kwargs)
2262 except OSError:
2263 raise OSError(
2264 "Unable to load vocabulary from file. "
2265 "Please check that the provided vocabulary is accessible and not corrupted."
2266 )

File /usr/local/lib/python3.10/dist-packages/transformers/models/gpt2/tokenization_gpt2_fast.py:134, in GPT2TokenizerFast.init(self, vocab_file, merges_file, tokenizer_file, unk_token, bos_token, eos_token, add_prefix_space, **kwargs)
123 def init(
124 self,
125 vocab_file=None,
(...)
132 **kwargs,
133 ):
--> 134 super().init(
135 vocab_file,
136 merges_file,
137 tokenizer_file=tokenizer_file,
138 unk_token=unk_token,
139 bos_token=bos_token,
140 eos_token=eos_token,
141 add_prefix_space=add_prefix_space,
142 **kwargs,
143 )
145 self.add_bos_token = kwargs.pop("add_bos_token", False)
147 pre_tok_state = json.loads(self.backend_tokenizer.pre_tokenizer.getstate())

File /usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_fast.py:111, in PreTrainedTokenizerFast.init(self, *args, **kwargs)
108 fast_tokenizer = copy.deepcopy(tokenizer_object)
109 elif fast_tokenizer_file is not None and not from_slow:
110 # We have a serialization from tokenizers which let us directly build the backend
--> 111 fast_tokenizer = TokenizerFast.from_file(fast_tokenizer_file)
112 elif slow_tokenizer is not None:
113 # We need to convert a slow tokenizer to build the backend
114 fast_tokenizer = convert_slow_tokenizer(slow_tokenizer)

Exception: data did not match any variant of untagged enum ModelWrapper at line 552251 column 3

Click to add a cell.

coby0504 changed discussion status to closed

Sign up or log in to comment