claudeson / claudson /model_args.py
joebruce1313's picture
Upload 38004 files
1f5470c verified
from dataclasses import dataclass
@dataclass
class ModelArgs:
# General parameters
max_seq_len: int = 256 # Increased for potential longer sequences with multi-model input
vocab_size: int = 50000 # Shared vocabulary size for all models (adjust if different)
dim: int = 1024 # Increased dimensionality for richer representations
n_layers: int = 16 # Increased number of layers for more complex processing
n_heads: int = 16 # Increased number of heads for better multi-modal attention
dropout: float = 0.1 # Dropout probability
# Model-specific parameters
tranny_dim: int = 768 # Dimensionality for the 'tranny' model
tranny_n_layers: int = 12 # Number of layers for the 'tranny' model
claudeson_dim: int = 512 # Dimensionality for the 'claudeson_clone' model
claudeson_n_layers: int = 8 # Number of layers for the 'claudeson_clone' model
# Multi-modal fusion parameters
fusion_dim: int = 1536 # Dimensionality of the fused multi-modal representation
fusion_n_layers: int = 4 # Number of fusion layers
# Training parameters
learning_rate: float = 1e-4 # Learning rate
weight_decay: float = 0.01 # Weight decay (L2 regularization)
batch_size: int = 8 # Reduced batch size for potentially larger memory footprint
num_epochs: int = 10 # Number of training epochs
# ... (other parameters as needed)