File size: 717 Bytes
84f0b80
97e312a
84f0b80
b79954f
84f0b80
ddb0136
84f0b80
 
ddb0136
 
 
97e312a
 
 
ddb0136
 
 
 
 
 
 
 
f45427d
 
 
ddb0136
b79954f
ddb0136
 
 
b79954f
97e312a
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from dataclasses import dataclass
from dtypes import DType


@dataclass
class Model:
    vocab_size: int
    num_layers: int
    hidden_dim: int
    intermediate_size: int
    weight_tied_embeddings: bool
    active_experts: int
    total_experts: int
    is_moe: bool


@dataclass
class Parallelism:
    tensor_parallelism: int
    pipeline_parallelism: int
    context_parallelism: int
    expert_parallelism: int
    fsdp_enabled: bool
    fsdp_parallelism: int
    fsdp_strategy: str


@dataclass
class Training:
    sequence_length: int
    batch_size: int
    gradient_checkpointing: bool
    grad_accumulation: bool
    precision: DType
    mixed_precision: bool
    param_dtype: DType
    reduce_dtype: DType