Spaces:
Sleeping
Sleeping
File size: 717 Bytes
84f0b80 97e312a 84f0b80 b79954f 84f0b80 ddb0136 84f0b80 ddb0136 97e312a ddb0136 f45427d ddb0136 b79954f ddb0136 b79954f 97e312a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
from dataclasses import dataclass
from dtypes import DType
@dataclass
class Model:
vocab_size: int
num_layers: int
hidden_dim: int
intermediate_size: int
weight_tied_embeddings: bool
active_experts: int
total_experts: int
is_moe: bool
@dataclass
class Parallelism:
tensor_parallelism: int
pipeline_parallelism: int
context_parallelism: int
expert_parallelism: int
fsdp_enabled: bool
fsdp_parallelism: int
fsdp_strategy: str
@dataclass
class Training:
sequence_length: int
batch_size: int
gradient_checkpointing: bool
grad_accumulation: bool
precision: DType
mixed_precision: bool
param_dtype: DType
reduce_dtype: DType
|