# Copyright 2024 NVIDIA CORPORATION & AFFILIATES # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # SPDX-License-Identifier: Apache-2.0 # This file is modified from https://github.com/haotian-liu/LLaVA/ and https://github.com/NVlabs/VILA/ import torch # noqa import math import warnings import os.path as osp from transformers import PretrainedConfig, PreTrainedModel from transformers import ( AutoTokenizer, AutoModelForCausalLM, AutoConfig, ) def has_tokenizer(path): if ( osp.exists(osp.join(path, "special_tokens_map.json")) and osp.exists(osp.join(path, "tokenizer_config.json")) and (osp.exists(osp.join(path, "tokenizer.model")) or osp.exists(osp.join(path, "tokenizer.json"))) ): # print("[has_tokenizer]", path, True) return True from huggingface_hub import HfApi, file_exists from huggingface_hub.utils import HFValidationError api = HfApi() try: valid_hf_repo = api.repo_exists(path) except HFValidationError: valid_hf_repo = False if ( valid_hf_repo and file_exists(path, "special_tokens_map.json") and file_exists(path, "tokenizer_config.json") and (file_exists(path, "tokenizer.model") or file_exists(path, "tokenizer.json")) ): # print("[has_tokenizer]", path, True) return True # print("[has_tokenizer]", path, False) return False def context_length_extension(config): orig_ctx_len = getattr(config, "max_position_embeddings", None) model_max_length = getattr(config, "model_max_length", None) if orig_ctx_len and model_max_length > orig_ctx_len: print(f"Scaling RoPE from {orig_ctx_len} to {model_max_length}") scaling_factor = float(math.ceil(model_max_length / orig_ctx_len)) config.rope_scaling = {"type": "linear", "factor": scaling_factor} return config def build_llm_and_tokenizer( model_name_or_path: str, config: PretrainedConfig, # config_cls: PretrainedConfig = None, # llm_cls: PreTrainedModel = None, attn_implementation=None, model_max_length=None, *args, **kwargs, ) -> PreTrainedModel: # if config_cls is None: # config_cls = AutoConfig # if llm_cls is None: # llm_cls = AutoModelForCausalLM # config_cls = AutoConfig # llm_cls = AutoModelForCausalLM ## extra configuration for llm # print("build_llm_and_tokenizer():", model_name_or_path); input("DEBUG") llm_cfg = AutoConfig.from_pretrained(model_name_or_path) llm_cfg._attn_implementation = attn_implementation llm_cfg.model_max_length = model_max_length if model_max_length is not None: context_length_extension(llm_cfg) llm = AutoModelForCausalLM.from_pretrained( model_name_or_path, config=llm_cfg, torch_dtype=eval(config.model_dtype), *args, **kwargs ) llm_path = model_name_or_path if not has_tokenizer(llm_path): warnings.warn("tokenizer found in VLM root folder. Move to ./{VILA}/llm in the future.") llm_path = osp.join(llm_path, "llm") # TODO(ligeng): use LLM class to judge to better compability. if "mpt" in model_name_or_path: tokenizer = AutoTokenizer.from_pretrained( llm_path, model_max_length=llm_cfg.model_max_length, padding_side="right", ) elif "yi" in model_name_or_path.lower(): tokenizer = AutoTokenizer.from_pretrained( llm_path, model_max_length=llm_cfg.model_max_length, padding_side="right", use_fast=False, ) else: tokenizer = AutoTokenizer.from_pretrained( llm_path, model_max_length=llm_cfg.model_max_length, padding_side="right", use_fast=False, legacy=False, ) # TODO(ligeng): is this necessary for llava? config.hidden_size = llm.config.hidden_size return llm, tokenizer