Spaces:
Running
on
Zero
Running
on
Zero
| import math | |
| import warnings | |
| import os, os.path as osp | |
| import torch | |
| from transformers import PretrainedConfig, PreTrainedModel | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForCausalLM, | |
| AutoConfig, | |
| BitsAndBytesConfig, | |
| PretrainedConfig, | |
| PreTrainedModel, | |
| ) | |
| def has_tokenizer(path): | |
| if ( | |
| osp.exists(osp.join(path, "special_tokens_map.json")) | |
| and osp.exists(osp.join(path, "tokenizer_config.json")) | |
| and (osp.exists(osp.join(path, "tokenizer.model")) or osp.exists(osp.join(path, "tokenizer.json"))) | |
| ): | |
| # print("[has_tokenizer]", path, True) | |
| return True | |
| from huggingface_hub import HfApi, file_exists | |
| from huggingface_hub.utils import validate_repo_id, HFValidationError | |
| api = HfApi() | |
| try: | |
| valid_hf_repo = api.repo_exists(path) | |
| except HFValidationError as e: | |
| valid_hf_repo = False | |
| if ( | |
| valid_hf_repo | |
| and file_exists(path, "special_tokens_map.json") | |
| and file_exists(path, "tokenizer_config.json") | |
| and (file_exists(path, "tokenizer.model") or file_exists(path, "tokenizer.json")) | |
| ): | |
| # print("[has_tokenizer]", path, True) | |
| return True | |
| # print("[has_tokenizer]", path, False) | |
| return False | |
| def context_length_extension(config): | |
| orig_ctx_len = getattr(config, "max_position_embeddings", None) | |
| model_max_length = getattr(config, "model_max_length", None) | |
| if orig_ctx_len and model_max_length > orig_ctx_len: | |
| print(f"Scaling RoPE from {orig_ctx_len} to {model_max_length}") | |
| scaling_factor = float(math.ceil(model_max_length / orig_ctx_len)) | |
| config.rope_scaling = {"type": "linear", "factor": scaling_factor} | |
| return config | |
| def build_llm_and_tokenizer( | |
| model_name_or_path: str, | |
| config: PretrainedConfig, | |
| # config_cls: PretrainedConfig = None, | |
| # llm_cls: PreTrainedModel = None, | |
| attn_implementation=None, | |
| model_max_length=None, | |
| *args, | |
| **kwargs, | |
| ) -> PreTrainedModel: | |
| # if config_cls is None: | |
| # config_cls = AutoConfig | |
| # if llm_cls is None: | |
| # llm_cls = AutoModelForCausalLM | |
| # config_cls = AutoConfig | |
| # llm_cls = AutoModelForCausalLM | |
| ## extra configuration for llm | |
| # print("build_llm_and_tokenizer():", model_name_or_path); input("DEBUG") | |
| llm_cfg = AutoConfig.from_pretrained(model_name_or_path) | |
| llm_cfg._attn_implementation = attn_implementation | |
| llm_cfg.model_max_length = model_max_length | |
| if model_max_length is not None: | |
| context_length_extension(llm_cfg) | |
| llm = AutoModelForCausalLM.from_pretrained( | |
| model_name_or_path, config=llm_cfg, torch_dtype=eval(config.model_dtype), *args, **kwargs | |
| ) | |
| llm_path = model_name_or_path | |
| if not has_tokenizer(llm_path): | |
| warnings.warn("tokenizer found in VLM root folder. Move to ./{VILA}/llm in the future.") | |
| llm_path = osp.join(llm_path, "llm") | |
| # TODO(ligeng): use LLM class to judge to better compability. | |
| if "mpt" in model_name_or_path: | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| llm_path, | |
| model_max_length=llm_cfg.model_max_length, | |
| padding_side="right", | |
| ) | |
| elif "yi" in model_name_or_path.lower(): | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| llm_path, | |
| model_max_length=llm_cfg.model_max_length, | |
| padding_side="right", | |
| use_fast=False, | |
| ) | |
| else: | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| llm_path, | |
| model_max_length=llm_cfg.model_max_length, | |
| padding_side="right", | |
| use_fast=False, | |
| legacy=False, | |
| ) | |
| # TODO(ligeng): is this necessary for llava? | |
| config.hidden_size = llm.config.hidden_size | |
| return llm, tokenizer |