File size: 3,953 Bytes
751ad26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from .llama import (
    LlamaDotCacheHarness,
    LlamaDotCacheModelAdapter,
    LlamaReplayRecord,
    build_llama_page_trace_records,
    build_llama_prefill_page_trace_records,
    export_llama_page_traces,
    run_llama_generation_harness,
    run_llama_page_trace_capture_harness,
    run_llama_replay_harness,
    transformers_available,
)
from .qwen2 import (
    Qwen2DotCacheHarness,
    Qwen2DotCacheModelAdapter,
    run_qwen2_generation_harness,
    run_qwen2_loss_harness,
    run_qwen2_replay_harness,
)
from .qwen35 import (
    Qwen35AttentionSubsetDotCacheHarness,
    Qwen35AttentionSubsetDotCacheModelAdapter,
    Qwen35AttentionSubsetHarness,
    Qwen35AttentionSubsetModelAdapter,
    Qwen35DeltaNetStateRecord,
    Qwen35DeltaNetStateHarness,
    Qwen35DeltaNetStateModelAdapter,
    build_qwen35_deltanet_state_sample,
    capture_qwen35_deltanet_state_sample,
    Qwen35TextHarness,
    Qwen35TextModelAdapter,
    build_attention_subset_page_trace_records,
    build_attention_subset_prefill_page_trace_records,
    inspect_qwen35_deltanet_state,
    inspect_qwen35_hybrid_state,
    load_qwen35_text_only_from_pretrained,
    export_attention_subset_page_traces,
    run_qwen35_attention_subset_prefill_ablation_harness,
    run_qwen35_attention_subset_page_trace_capture_harness,
    run_qwen35_attention_subset_dotcache_harness,
    run_qwen35_attention_subset_replay_harness,
    run_qwen35_deltanet_state_ablation_harness,
    run_qwen35_text_generation_harness,
    run_qwen35_text_loss_harness,
    save_qwen35_deltanet_state_sample,
    summarize_qwen35_dotcache_fit,
    summarize_qwen35_hybrid_state,
)
from .vllm_adapter import (
    VllmAdapterConfig,
    VllmDotCacheModelAdapter,
    VllmPagedKVCache,
    VLLM_V1_MULTIPROCESSING_ENV,
    configure_vllm_inprocess_runtime,
    get_vllm_version,
    install_dotcache_on_vllm_model,
    install_dotcache_on_vllm_runtime,
    require_supported_vllm_version,
    vllm_available,
)

__all__ = [
    "LlamaDotCacheHarness",
    "LlamaDotCacheModelAdapter",
    "LlamaReplayRecord",
    "build_llama_page_trace_records",
    "build_llama_prefill_page_trace_records",
    "export_llama_page_traces",
    "Qwen2DotCacheHarness",
    "Qwen2DotCacheModelAdapter",
    "Qwen35AttentionSubsetDotCacheHarness",
    "Qwen35AttentionSubsetDotCacheModelAdapter",
    "Qwen35AttentionSubsetHarness",
    "Qwen35AttentionSubsetModelAdapter",
    "Qwen35DeltaNetStateRecord",
    "Qwen35DeltaNetStateHarness",
    "Qwen35DeltaNetStateModelAdapter",
    "build_qwen35_deltanet_state_sample",
    "capture_qwen35_deltanet_state_sample",
    "Qwen35TextHarness",
    "Qwen35TextModelAdapter",
    "build_attention_subset_page_trace_records",
    "build_attention_subset_prefill_page_trace_records",
    "VllmAdapterConfig",
    "VllmDotCacheModelAdapter",
    "VllmPagedKVCache",
    "VLLM_V1_MULTIPROCESSING_ENV",
    "configure_vllm_inprocess_runtime",
    "get_vllm_version",
    "install_dotcache_on_vllm_model",
    "install_dotcache_on_vllm_runtime",
    "require_supported_vllm_version",
    "inspect_qwen35_deltanet_state",
    "inspect_qwen35_hybrid_state",
    "load_qwen35_text_only_from_pretrained",
    "export_attention_subset_page_traces",
    "run_qwen35_attention_subset_prefill_ablation_harness",
    "run_qwen35_attention_subset_page_trace_capture_harness",
    "run_qwen35_attention_subset_dotcache_harness",
    "run_qwen35_attention_subset_replay_harness",
    "run_qwen35_deltanet_state_ablation_harness",
    "run_llama_generation_harness",
    "run_llama_page_trace_capture_harness",
    "run_llama_replay_harness",
    "run_qwen2_generation_harness",
    "run_qwen2_loss_harness",
    "run_qwen2_replay_harness",
    "run_qwen35_text_generation_harness",
    "run_qwen35_text_loss_harness",
    "save_qwen35_deltanet_state_sample",
    "summarize_qwen35_dotcache_fit",
    "summarize_qwen35_hybrid_state",
    "transformers_available",
    "vllm_available",
]