Vortex-7b-V1 / configuration_vortex.py
Zandy-Wandy's picture
Upload Vortex model
bf64b03 verified
"""
Vortex configuration for HuggingFace.
"""
from typing import Optional, List, Dict, Any
from transformers import PretrainedConfig
class VortexConfig(PretrainedConfig):
"""
Configuration class for Vortex model.
Compatible with HuggingFace transformers.
"""
model_type = "vortex"
tie_word_embeddings = True
def __init__(
self,
d_model: int = 4096,
num_layers: int = 32,
num_heads: int = 32,
d_state: int = 16,
d_conv: int = 4,
window_size: int = 512,
ffn_expansion: int = 4,
num_domains: int = 7,
vocab_size: int = 50000,
max_seq_len: int = 16384,
ssm_ratio: float = 0.6,
enable_equation_module: bool = True,
enable_numerical_module: bool = True,
enable_citation_module: bool = True,
enable_molecular_module: bool = True,
special_tokens: Optional[Dict[str, int]] = None,
domain_tags: Optional[List[str]] = None,
initializer_range: float = 0.02,
tie_word_embeddings: bool = True,
**kwargs
):
super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
self.d_model = d_model
self.num_layers = num_layers
self.num_heads = num_heads
self.d_state = d_state
self.d_conv = d_conv
self.window_size = window_size
self.ffn_expansion = ffn_expansion
self.num_domains = num_domains
self.vocab_size = vocab_size
self.max_seq_len = max_seq_len
self.ssm_ratio = ssm_ratio
self.enable_equation_module = enable_equation_module
self.enable_numerical_module = enable_numerical_module
self.enable_citation_module = enable_citation_module
self.enable_molecular_module = enable_molecular_module
self.special_tokens = special_tokens or {
"[PAD]": 0, "[UNK]": 1, "[BOS]": 2, "[EOS]": 3,
"[EQUATION]": 4, "[/EQUATION]": 5,
"[CITATION]": 6, "[/CITATION]": 7,
"[MOLECULE]": 8, "[/MOLECULE]": 9,
"[FIGURE]": 10, "[TABLE]": 11,
"[MATH]": 12, "[CHEM]": 13, "[BIO]": 14,
"[PHYS]": 15, "[EARTH]": 16, "[SPACE]": 17, "[ZOO]": 18,
}
self.domain_tags = domain_tags or ["[MATH]", "[CHEM]", "[BIO]", "[PHYS]", "[EARTH]", "[SPACE]", "[ZOO]"]
self.initializer_range = initializer_range
# Compute derived attributes
self.head_dim = self.d_model // self.num_heads
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
"""Load config from pretrained model."""
import json
import os
config_path = os.path.join(pretrained_model_name_or_path, "config.json")
if os.path.exists(config_path):
with open(config_path, "r") as f:
config_dict = json.load(f)
config_dict.update(kwargs)
return cls(**config_dict)
else:
# Return default config
return cls(**kwargs)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
"model_type": self.model_type,
"d_model": self.d_model,
"num_layers": self.num_layers,
"num_heads": self.num_heads,
"head_dim": self.head_dim,
"d_state": self.d_state,
"d_conv": self.d_conv,
"window_size": self.window_size,
"ffn_expansion": self.ffn_expansion,
"num_domains": self.num_domains,
"vocab_size": self.vocab_size,
"max_seq_len": self.max_seq_len,
"ssm_ratio": self.ssm_ratio,
"enable_equation_module": self.enable_equation_module,
"enable_numerical_module": self.enable_numerical_module,
"enable_citation_module": self.enable_citation_module,
"enable_molecular_module": self.enable_molecular_module,
"special_tokens": self.special_tokens,
"domain_tags": self.domain_tags,
"initializer_range": self.initializer_range,
}