Spaces:

Dovakiins
/

qwerrwe

Build error

App Files Files Community

Nanobit commited on May 28, 2023

Commit

8bd7a49

1 Parent(s): 18d41ce

Refactor to use DictDefault instead

Browse files

Files changed (2) hide show

scripts/finetune.py +3 -3
src/axolotl/utils/models.py +7 -7

scripts/finetune.py CHANGED Viewed

@@ -10,11 +10,11 @@ from typing import Optional, List, Dict, Any, Union
 import fire
 import torch
 import yaml
-from addict import Dict
 # add src to the pythonpath so we don't need to pip install this
 from axolotl.utils.tokenization import check_dataset_labels
 from axolotl.utils.validation import validate_config
 project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 src_dir = os.path.join(project_root, "src")
@@ -83,7 +83,7 @@ def do_inference(cfg, model, tokenizer, prompter="AlpacaPrompter"):
                 temperature=0.9,
                 top_p=0.95,
                 top_k=40,
-                return_dict_in_generate=True,
                 output_attentions=False,
                 output_hidden_states=False,
                 output_scores=False,
@@ -131,7 +131,7 @@ def train(
     # load the config from the yaml file
     with open(config, "r") as f:
-        cfg: Dict = Dict(yaml.load(f, Loader=yaml.Loader))
     # if there are any options passed in the cli, if it is something that seems valid from the yaml,
     # then overwrite the value
     cfg_keys = cfg.keys()

 import fire
 import torch
 import yaml
 # add src to the pythonpath so we don't need to pip install this
 from axolotl.utils.tokenization import check_dataset_labels
 from axolotl.utils.validation import validate_config
+from axolotl.utils.dict import DictDefault
 project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
 src_dir = os.path.join(project_root, "src")
                 temperature=0.9,
                 top_p=0.95,
                 top_k=40,
+                return_DictDefault_in_generate=True,
                 output_attentions=False,
                 output_hidden_states=False,
                 output_scores=False,
     # load the config from the yaml file
     with open(config, "r") as f:
+        cfg: DictDefault = DictDefault(yaml.load(f, Loader=yaml.Loader))
     # if there are any options passed in the cli, if it is something that seems valid from the yaml,
     # then overwrite the value
     cfg_keys = cfg.keys()

src/axolotl/utils/models.py CHANGED Viewed

@@ -29,7 +29,7 @@ from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN
 if TYPE_CHECKING:
     from peft import PeftModel, PeftConfig
-    from addict import Dict
     from transformers import PreTrainedTokenizer
@@ -79,7 +79,7 @@ def load_model(
     adapter="lora",
     inference=False,
 ):
-    # type: (str, str, str, str, Dict, Optional[str], bool) -> Tuple[PreTrainedModel, PreTrainedTokenizer, Optional[PeftConfig]]
     # TODO refactor as a kwarg
     load_in_8bit = cfg.load_in_8bit
@@ -184,9 +184,9 @@ def load_model(
         #     # https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/tests/models/test_gpt_neox.py#L12
         #     # https://github.com/HazyResearch/flash-attention/tree/main/training#model-components
         #     # add `**kwargs` to https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/flash_attn/models/gpt.py#L442
-        #     from flash_attn.utils.pretrained import state_dict_from_pretrained
         #     from flash_attn.models.gpt import GPTLMHeadModel
-        #     from flash_attn.models.gpt_neox import remap_state_dict_hf_gpt_neox, gpt_neox_config_to_gpt2_config
         #     from transformers import GPTNeoXConfig
         #     config = gpt_neox_config_to_gpt2_config(GPTNeoXConfig.from_pretrained(base_model))
         #     config.use_flash_attn = True
@@ -294,7 +294,7 @@ def load_model(
 def load_adapter(model, cfg, adapter):
-    # type: (PreTrainedModel, Dict, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
     if adapter is None:
         return model, None
@@ -307,7 +307,7 @@ def load_adapter(model, cfg, adapter):
 def load_llama_adapter(model, cfg):
-    # type: (PreTrainedModel, Dict) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
     from peft import (
         AdaptionPromptConfig,
         get_peft_model,
@@ -355,7 +355,7 @@ def find_all_linear_names(bits, model):
 def load_lora(model, cfg):
-    # type: (PreTrainedModel, Dict) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
     from peft import (
         LoraConfig,

 if TYPE_CHECKING:
     from peft import PeftModel, PeftConfig
+    from axolotl.utils.dict import DictDefault
     from transformers import PreTrainedTokenizer
     adapter="lora",
     inference=False,
 ):
+    # type: (str, str, str, str, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, PreTrainedTokenizer, Optional[PeftConfig]]
     # TODO refactor as a kwarg
     load_in_8bit = cfg.load_in_8bit
         #     # https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/tests/models/test_gpt_neox.py#L12
         #     # https://github.com/HazyResearch/flash-attention/tree/main/training#model-components
         #     # add `**kwargs` to https://github.com/HazyResearch/flash-attention/blob/40a25c8ee7465cf547b929cfa2937034e37bfce9/flash_attn/models/gpt.py#L442
+        #     from flash_attn.utils.pretrained import state_DictDefault_from_pretrained
         #     from flash_attn.models.gpt import GPTLMHeadModel
+        #     from flash_attn.models.gpt_neox import remap_state_DictDefault_hf_gpt_neox, gpt_neox_config_to_gpt2_config
         #     from transformers import GPTNeoXConfig
         #     config = gpt_neox_config_to_gpt2_config(GPTNeoXConfig.from_pretrained(base_model))
         #     config.use_flash_attn = True
 def load_adapter(model, cfg, adapter):
+    # type: (PreTrainedModel, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
     if adapter is None:
         return model, None
 def load_llama_adapter(model, cfg):
+    # type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
     from peft import (
         AdaptionPromptConfig,
         get_peft_model,
 def load_lora(model, cfg):
+    # type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
     from peft import (
         LoraConfig,