| | """
|
| | Vortex configuration for HuggingFace.
|
| | """
|
| |
|
| | from typing import Optional, List, Dict, Any
|
| | from transformers import PretrainedConfig
|
| |
|
| |
|
| | class VortexConfig(PretrainedConfig):
|
| | """
|
| | Configuration class for Vortex model.
|
| | Compatible with HuggingFace transformers.
|
| | """
|
| |
|
| | model_type = "vortex"
|
| | tie_word_embeddings = True
|
| |
|
| | def __init__(
|
| | self,
|
| | d_model: int = 4096,
|
| | num_layers: int = 32,
|
| | num_heads: int = 32,
|
| | d_state: int = 16,
|
| | d_conv: int = 4,
|
| | window_size: int = 512,
|
| | ffn_expansion: int = 4,
|
| | num_domains: int = 7,
|
| | vocab_size: int = 50000,
|
| | max_seq_len: int = 16384,
|
| | ssm_ratio: float = 0.6,
|
| | enable_equation_module: bool = True,
|
| | enable_numerical_module: bool = True,
|
| | enable_citation_module: bool = True,
|
| | enable_molecular_module: bool = True,
|
| | special_tokens: Optional[Dict[str, int]] = None,
|
| | domain_tags: Optional[List[str]] = None,
|
| | initializer_range: float = 0.02,
|
| | tie_word_embeddings: bool = True,
|
| | **kwargs
|
| | ):
|
| | super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
|
| | self.d_model = d_model
|
| | self.num_layers = num_layers
|
| | self.num_heads = num_heads
|
| | self.d_state = d_state
|
| | self.d_conv = d_conv
|
| | self.window_size = window_size
|
| | self.ffn_expansion = ffn_expansion
|
| | self.num_domains = num_domains
|
| | self.vocab_size = vocab_size
|
| | self.max_seq_len = max_seq_len
|
| | self.ssm_ratio = ssm_ratio
|
| | self.enable_equation_module = enable_equation_module
|
| | self.enable_numerical_module = enable_numerical_module
|
| | self.enable_citation_module = enable_citation_module
|
| | self.enable_molecular_module = enable_molecular_module
|
| | self.special_tokens = special_tokens or {
|
| | "[PAD]": 0, "[UNK]": 1, "[BOS]": 2, "[EOS]": 3,
|
| | "[EQUATION]": 4, "[/EQUATION]": 5,
|
| | "[CITATION]": 6, "[/CITATION]": 7,
|
| | "[MOLECULE]": 8, "[/MOLECULE]": 9,
|
| | "[FIGURE]": 10, "[TABLE]": 11,
|
| | "[MATH]": 12, "[CHEM]": 13, "[BIO]": 14,
|
| | "[PHYS]": 15, "[EARTH]": 16, "[SPACE]": 17, "[ZOO]": 18,
|
| | }
|
| | self.domain_tags = domain_tags or ["[MATH]", "[CHEM]", "[BIO]", "[PHYS]", "[EARTH]", "[SPACE]", "[ZOO]"]
|
| | self.initializer_range = initializer_range
|
| |
|
| | self.head_dim = self.d_model // self.num_heads
|
| |
|
| | @classmethod
|
| | def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
|
| | """Load config from pretrained model."""
|
| | import json
|
| | import os
|
| |
|
| | config_path = os.path.join(pretrained_model_name_or_path, "config.json")
|
| | if os.path.exists(config_path):
|
| | with open(config_path, "r") as f:
|
| | config_dict = json.load(f)
|
| | config_dict.update(kwargs)
|
| | return cls(**config_dict)
|
| | else:
|
| |
|
| | return cls(**kwargs)
|
| |
|
| | def to_dict(self) -> Dict[str, Any]:
|
| | """Convert to dictionary."""
|
| | return {
|
| | "model_type": self.model_type,
|
| | "d_model": self.d_model,
|
| | "num_layers": self.num_layers,
|
| | "num_heads": self.num_heads,
|
| | "head_dim": self.head_dim,
|
| | "d_state": self.d_state,
|
| | "d_conv": self.d_conv,
|
| | "window_size": self.window_size,
|
| | "ffn_expansion": self.ffn_expansion,
|
| | "num_domains": self.num_domains,
|
| | "vocab_size": self.vocab_size,
|
| | "max_seq_len": self.max_seq_len,
|
| | "ssm_ratio": self.ssm_ratio,
|
| | "enable_equation_module": self.enable_equation_module,
|
| | "enable_numerical_module": self.enable_numerical_module,
|
| | "enable_citation_module": self.enable_citation_module,
|
| | "enable_molecular_module": self.enable_molecular_module,
|
| | "special_tokens": self.special_tokens,
|
| | "domain_tags": self.domain_tags,
|
| | "initializer_range": self.initializer_range,
|
| | }
|
| |
|