mini-omni-safetensors

Runtime error

App Files Files Community

leafspark commited on Sep 3, 2024

Commit

641ee6f

verified ·

1 Parent(s): 6eacc63

feat(model): support using JSON config

Browse files

Files changed (1) hide show

litgpt/config.py +8 -49

litgpt/config.py CHANGED Viewed

@@ -1,12 +1,10 @@
-# Copyright Lightning AI. Licensed under the Apache License 2.0, see LICENSE file.
 from copy import deepcopy
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Literal, Optional, Type, Union
 import torch
-import yaml
 from typing_extensions import Self
 import litgpt.model
@@ -30,33 +28,11 @@ class Config:
     parallel_residual: bool = True
     bias: bool = True
     lm_head_bias: bool = False
-    # to use multi-head attention (MHA), set this to `n_head` (default)
-    # to use multi-query attention (MQA), set this to 1
-    # to use grouped-query attention (GQA), set this to a value in between
-    # Example with `n_head=4`
-    # ┌───┐┌───┐┌───┐┌───┐     ┌───┐    ┌───┐             ┌───┐
-    # │ v ││ v ││ v ││ v │     │ v │    │ v │             │ v │
-    # └───┘└───┘└───┘└───┘     └───┘    └───┘             └───┘
-    #   │    │    │    │         │        │                 │
-    # ┌───┐┌───┐┌───┐┌───┐     ┌───┐    ┌───┐             ┌───┐
-    # │ k ││ k ││ k ││ k │     │ k │    │ k │             │ k │
-    # └───┘└───┘└───┘└───┘     └───┘    └───┘             └───┘
-    #   │    │    │    │      ┌──┴──┐  ┌──┴──┐      ┌────┬──┴─┬────┐
-    # ┌───┐┌───┐┌───┐┌───┐  ┌───┐┌───┐┌───┐┌───┐  ┌───┐┌───┐┌───┐┌───┐
-    # │ q ││ q ││ q ││ q │  │ q ││ q ││ q ││ q │  │ q ││ q ││ q ││ q │
-    # └───┘└───┘└───┘└───┘  └───┘└───┘└───┘└───┘  └───┘└───┘└───┘└───┘
-    # ◀──────────────────▶  ◀──────────────────▶  ◀──────────────────▶
-    #         MHA                    GQA                   MQA
-    #   n_query_groups=4       n_query_groups=2      n_query_groups=1
-    #
-    # credit https://arxiv.org/pdf/2305.13245.pdf
     n_query_groups: Optional[int] = None
     shared_attention_norm: bool = False
     norm_class_name: Literal["LayerNorm", "RMSNorm"] = "LayerNorm"
     norm_eps: float = 1e-5
-    mlp_class_name: Literal["GptNeoxMLP", "LLaMAMLP", "GemmaMLP", "LLaMAMoE"] = (
-        "GptNeoxMLP"
-    )
     gelu_approximate: str = "none"
     intermediate_size: Optional[int] = None
     rope_condense_ratio: int = 1
@@ -90,27 +66,19 @@ class Config:
             assert self.n_embd % self.n_head == 0
             self.head_size = self.n_embd // self.n_head
-        # vocab size should be a power of 2 to be optimal on hardware. compute the closest value
         if self.padded_vocab_size is None:
-            self.padded_vocab_size = find_multiple(
-                self.vocab_size, self.padding_multiple
-            )
         else:
-            # vocab size shouldn't be larger than padded vocab size
             self.vocab_size = min(self.vocab_size, self.padded_vocab_size)
-        # compute the number of query groups
         if self.n_query_groups is not None:
             assert self.n_head % self.n_query_groups == 0
         else:
             self.n_query_groups = self.n_head
-        # compute the intermediate size for MLP if not set
         if self.intermediate_size is None:
             if self.mlp_class_name == "LLaMAMLP":
-                raise ValueError(
-                    f"The config {self.name!r}, needs to set the `intermediate_size`"
-                )
             self.intermediate_size = 4 * self.n_embd
         self.rope_n_elem = int(self.rotary_percentage * self.head_size)
@@ -121,14 +89,12 @@ class Config:
     @classmethod
     def from_name(cls, name: str, **kwargs: Any) -> Optional[Self]:
         if name not in name_to_config:
-            # search through all `config['hf_config']['name']`
             try:
                 conf_dict = next(
                     config
                     for config in configs
                     if name == config["hf_config"]["name"]
-                    or config["hf_config"]["org"] + "/" + config["hf_config"]["name"]
-                    == name
                 )
             except StopIteration:
                 raise ValueError(f"{name!r} is not a supported config name")
@@ -142,7 +108,7 @@ class Config:
     @classmethod
     def from_file(cls, path: Union[str, Path], **kwargs: Any) -> Self:
         with open(path, encoding="utf-8") as fp:
-            file_kwargs = yaml.safe_load(fp)
             if file_kwargs is None:
                 raise ValueError(f"{path} is empty which is likely unexpected.")
         file_kwargs.update(kwargs)
@@ -150,28 +116,21 @@ class Config:
     @classmethod
     def from_checkpoint(cls, path: Path, **kwargs: Any) -> Self:
-        """Automatically load `model_config.yaml` and if it doesn't exist - a matching config from `litgpt/config.py`."""
-        if (config_path := path / "model_config.yaml").is_file():
             return cls.from_file(config_path, **kwargs)
         if (model_name := path.name) in name_to_config:
             return cls.from_name(model_name, **kwargs)
-        raise FileNotFoundError(
-            f"For {str(path)!r} neither 'model_config.yaml' nor matching config exists."
-        )
     @property
     def mlp_class(self) -> Type:
-        # `self.mlp_class_name` cannot be the type to keep the config serializable
         return getattr(litgpt.model, self.mlp_class_name)
     @property
     def norm_class(self) -> Type:
-        # `self.norm_class_name` cannot be the type to keep the config serializable
         if self.norm_class_name == "RMSNorm":
             from functools import partial
             from litgpt.model import RMSNorm
             return partial(RMSNorm, add_unit_offset="Gemma" in self.name)
         return getattr(torch.nn, self.norm_class_name)

+import json
 from copy import deepcopy
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Literal, Optional, Type, Union
 import torch
 from typing_extensions import Self
 import litgpt.model
     parallel_residual: bool = True
     bias: bool = True
     lm_head_bias: bool = False
     n_query_groups: Optional[int] = None
     shared_attention_norm: bool = False
     norm_class_name: Literal["LayerNorm", "RMSNorm"] = "LayerNorm"
     norm_eps: float = 1e-5
+    mlp_class_name: Literal["GptNeoxMLP", "LLaMAMLP", "GemmaMLP", "LLaMAMoE"] = "GptNeoxMLP"
     gelu_approximate: str = "none"
     intermediate_size: Optional[int] = None
     rope_condense_ratio: int = 1
             assert self.n_embd % self.n_head == 0
             self.head_size = self.n_embd // self.n_head
         if self.padded_vocab_size is None:
+            self.padded_vocab_size = find_multiple(self.vocab_size, self.padding_multiple)
         else:
             self.vocab_size = min(self.vocab_size, self.padded_vocab_size)
         if self.n_query_groups is not None:
             assert self.n_head % self.n_query_groups == 0
         else:
             self.n_query_groups = self.n_head
         if self.intermediate_size is None:
             if self.mlp_class_name == "LLaMAMLP":
+                raise ValueError(f"The config {self.name!r}, needs to set the `intermediate_size`")
             self.intermediate_size = 4 * self.n_embd
         self.rope_n_elem = int(self.rotary_percentage * self.head_size)
     @classmethod
     def from_name(cls, name: str, **kwargs: Any) -> Optional[Self]:
         if name not in name_to_config:
             try:
                 conf_dict = next(
                     config
                     for config in configs
                     if name == config["hf_config"]["name"]
+                    or config["hf_config"]["org"] + "/" + config["hf_config"]["name"] == name
                 )
             except StopIteration:
                 raise ValueError(f"{name!r} is not a supported config name")
     @classmethod
     def from_file(cls, path: Union[str, Path], **kwargs: Any) -> Self:
         with open(path, encoding="utf-8") as fp:
+            file_kwargs = json.load(fp)
             if file_kwargs is None:
                 raise ValueError(f"{path} is empty which is likely unexpected.")
         file_kwargs.update(kwargs)
     @classmethod
     def from_checkpoint(cls, path: Path, **kwargs: Any) -> Self:
+        if (config_path := path / "config.json").is_file():
             return cls.from_file(config_path, **kwargs)
         if (model_name := path.name) in name_to_config:
             return cls.from_name(model_name, **kwargs)
+        raise FileNotFoundError(f"For {str(path)!r} neither 'config.json' nor matching config exists.")
     @property
     def mlp_class(self) -> Type:
         return getattr(litgpt.model, self.mlp_class_name)
     @property
     def norm_class(self) -> Type:
         if self.norm_class_name == "RMSNorm":
             from functools import partial
             from litgpt.model import RMSNorm
             return partial(RMSNorm, add_unit_offset="Gemma" in self.name)
         return getattr(torch.nn, self.norm_class_name)