joebruce1313
/

claudeson

Model card Files Files and versions

claudeson / claudson /model_args.py

joebruce1313's picture

Upload 38004 files

1f5470c verified 6 months ago

history blame contribute delete

1.4 kB

	from dataclasses import dataclass

	@dataclass
	class ModelArgs:
	# General parameters
	max_seq_len: int = 256 # Increased for potential longer sequences with multi-model input
	vocab_size: int = 50000 # Shared vocabulary size for all models (adjust if different)
	dim: int = 1024 # Increased dimensionality for richer representations
	n_layers: int = 16 # Increased number of layers for more complex processing
	n_heads: int = 16 # Increased number of heads for better multi-modal attention
	dropout: float = 0.1 # Dropout probability

	# Model-specific parameters
	tranny_dim: int = 768 # Dimensionality for the 'tranny' model
	tranny_n_layers: int = 12 # Number of layers for the 'tranny' model
	claudeson_dim: int = 512 # Dimensionality for the 'claudeson_clone' model
	claudeson_n_layers: int = 8 # Number of layers for the 'claudeson_clone' model

	# Multi-modal fusion parameters
	fusion_dim: int = 1536 # Dimensionality of the fused multi-modal representation
	fusion_n_layers: int = 4 # Number of fusion layers

	# Training parameters
	learning_rate: float = 1e-4 # Learning rate
	weight_decay: float = 0.01 # Weight decay (L2 regularization)
	batch_size: int = 8 # Reduced batch size for potentially larger memory footprint
	num_epochs: int = 10 # Number of training epochs

	# ... (other parameters as needed)