File size: 3,629 Bytes
f37be5a
 
 
 
 
 
1c73cc2
 
 
 
 
f37be5a
 
 
 
 
 
 
 
db0d9bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f37be5a
 
 
 
 
1c73cc2
f37be5a
 
1c73cc2
db0d9bf
 
 
 
 
 
 
 
 
 
 
 
 
f37be5a
 
db0d9bf
 
f37be5a
 
 
1c73cc2
db0d9bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f37be5a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from src.model.testformer import TestFormerLM
from src.model.testformer_combined import TestFormerCombinedLM
from src.model.testformer_combined_config import (
    TestFormerCombinedConfig,
    build_testformer_combined_config,
)
from src.model.motif_moe_external import (
    MotifMoEConfig,
    MotifMoETransformer,
    build_wikitext_motif_moe_config,
)
from src.model.testformer_config import (
    TESTFORMER_MOTIFS,
    TESTFORMER_SCALE_PRESETS,
    TestFormerConfig,
    TestFormerMotifConfig,
    build_testformer_config,
    build_scaled_testformer_config,
)
from src.model.qwen_motif_config import (
    DEFAULT_QWEN_ATTENTION_MOTIFS,
    DEFAULT_QWEN_MOTIFS,
    LowRankAdapterConfig,
    QwenFFNExpertLoRAConfig,
    QwenMotifAttentionPatchConfig,
    QwenMotifFullConfig,
    QwenMotifPatchConfig,
    QwenMotifRouterConfig,
    build_default_attention_patch_config,
    build_default_ffn_lora_configs,
    build_contiguous_motif_index,
    build_layer_motif_indices,
    build_layer_range,
    build_motif_index,
    build_random_motif_index,
    build_round_robin_motif_index,
)
from src.model.qwen_motif_router import (
    ContextualMotifRouter,
    StaticMotifRouter,
    build_motif_router,
)
from src.model.qwen_motif_ffn import QwenMotifSplitMLP
from src.model.qwen_motif_lora import (
    LowRankLinearAdapter,
    QwenMotifSplitLoRAMLP,
    RuntimeScaledLoRALinear,
)
from src.model.qwen_motif_attention import QwenMotifAttentionAdapter
from src.model.qwen_motif_patch import (
    apply_qwen_motif_pipeline,
    build_and_patch_qwen_attention_layers,
    build_and_patch_qwen_ffn_layers,
    build_and_patch_qwen_ffn_lora_layers,
    collect_qwen_motif_attention_adapters,
    collect_qwen_motif_mlps,
    collect_qwen_motif_trainable_names,
    freeze_model_except_qwen_motif_trainables,
    freeze_model_except_motif_routers,
    get_qwen_decoder_layers,
    partial_reinit_qwen_motif_modules,
    patch_qwen_attention_layers,
    patch_qwen_ffn_layers,
)

__all__ = [
    "TestFormerConfig",
    "TestFormerMotifConfig",
    "TestFormerCombinedConfig",
    "MotifMoEConfig",
    "TestFormerLM",
    "TestFormerCombinedLM",
    "MotifMoETransformer",
    "LowRankAdapterConfig",
    "QwenFFNExpertLoRAConfig",
    "QwenMotifAttentionPatchConfig",
    "QwenMotifFullConfig",
    "QwenMotifPatchConfig",
    "QwenMotifRouterConfig",
    "QwenMotifSplitMLP",
    "QwenMotifSplitLoRAMLP",
    "QwenMotifAttentionAdapter",
    "LowRankLinearAdapter",
    "RuntimeScaledLoRALinear",
    "StaticMotifRouter",
    "ContextualMotifRouter",
    "TESTFORMER_MOTIFS",
    "TESTFORMER_SCALE_PRESETS",
    "DEFAULT_QWEN_ATTENTION_MOTIFS",
    "DEFAULT_QWEN_MOTIFS",
    "build_scaled_testformer_config",
    "build_testformer_config",
    "build_testformer_combined_config",
    "build_wikitext_motif_moe_config",
    "build_default_ffn_lora_configs",
    "build_default_attention_patch_config",
    "build_layer_range",
    "build_motif_index",
    "build_contiguous_motif_index",
    "build_random_motif_index",
    "build_round_robin_motif_index",
    "build_layer_motif_indices",
    "build_motif_router",
    "get_qwen_decoder_layers",
    "patch_qwen_ffn_layers",
    "patch_qwen_attention_layers",
    "build_and_patch_qwen_ffn_layers",
    "build_and_patch_qwen_ffn_lora_layers",
    "build_and_patch_qwen_attention_layers",
    "apply_qwen_motif_pipeline",
    "collect_qwen_motif_mlps",
    "collect_qwen_motif_attention_adapters",
    "collect_qwen_motif_trainable_names",
    "freeze_model_except_motif_routers",
    "freeze_model_except_qwen_motif_trainables",
    "partial_reinit_qwen_motif_modules",
]