File size: 3,234 Bytes
751ad26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from .cpu_ref import mix_page_cpu_ref, score_page_cpu_ref
from .torch_cuda import (
    cuda_available,
    decode_grouped_multiquery_step_cuda_tensor,
    decode_grouped_multiquery_step_prepared_cuda_tensor,
    decode_grouped_multiquery_step_prepared_cuda_tensor_output_only,
    decode_multi_query_step_cuda,
    decode_multi_query_step_cuda_tensor,
    decode_step_cuda,
    mix_page_cuda,
    page_supported_cuda,
    prepare_page_cuda,
    prepare_pages_cuda,
    PreparedPageCUDA,
    score_page_cuda,
    score_pages_cuda,
)
from .torch_mps import (
    clear_prepared_chunk_cache,
    configure_prepared_chunk_cache,
    decode_grouped_multiquery_step_prepared_mps_tensor,
    decode_grouped_multiquery_step_mps_tensor,
    decode_grouped_multiquery_step_prepared_torch_tensor,
    decode_grouped_multiquery_step_prepared_torch_tensor_output_only,
    decode_grouped_multiquery_step_torch_tensor,
    decode_multi_query_step_torch,
    decode_multi_query_step_torch_tensor,
    decode_step_torch,
    PreparedPageMPS,
    PreparedPageTorch,
    decode_multi_query_step_mps,
    decode_multi_query_step_mps_tensor,
    decode_step_mps,
    mix_page_mps,
    mps_available,
    page_supported_mps,
    page_supported_torch,
    prepare_m0_affine_pages_from_tensor_torch,
    prepare_page_mps,
    prepare_page_torch,
    prepare_pages_mps,
    prepare_pages_torch,
    prepared_chunk_cache_resident_bytes,
    score_pages_mps,
    score_pages_torch,
    score_page_mps,
    score_page_torch,
    set_prepared_chunk_cache_budget_override,
    torch_device_available,
    mix_page_torch,
)

__all__ = [
    "clear_prepared_chunk_cache",
    "configure_prepared_chunk_cache",
    "cuda_available",
    "decode_grouped_multiquery_step_cuda_tensor",
    "decode_grouped_multiquery_step_prepared_cuda_tensor",
    "decode_grouped_multiquery_step_prepared_cuda_tensor_output_only",
    "PreparedPageMPS",
    "PreparedPageCUDA",
    "PreparedPageTorch",
    "decode_grouped_multiquery_step_prepared_torch_tensor",
    "decode_grouped_multiquery_step_prepared_torch_tensor_output_only",
    "decode_grouped_multiquery_step_prepared_mps_tensor",
    "decode_grouped_multiquery_step_torch_tensor",
    "decode_grouped_multiquery_step_mps_tensor",
    "decode_multi_query_step_cuda",
    "decode_multi_query_step_cuda_tensor",
    "decode_multi_query_step_torch",
    "decode_multi_query_step_torch_tensor",
    "decode_multi_query_step_mps",
    "decode_multi_query_step_mps_tensor",
    "decode_step_cuda",
    "decode_step_torch",
    "decode_step_mps",
    "mix_page_cpu_ref",
    "mix_page_cuda",
    "mix_page_torch",
    "mix_page_mps",
    "mps_available",
    "page_supported_cuda",
    "page_supported_mps",
    "page_supported_torch",
    "prepare_m0_affine_pages_from_tensor_torch",
    "prepare_page_cuda",
    "prepare_page_torch",
    "prepare_page_mps",
    "prepare_pages_cuda",
    "prepare_pages_torch",
    "prepare_pages_mps",
    "prepared_chunk_cache_resident_bytes",
    "score_pages_cuda",
    "score_pages_torch",
    "score_pages_mps",
    "score_page_cpu_ref",
    "score_page_cuda",
    "score_page_torch",
    "score_page_mps",
    "set_prepared_chunk_cache_budget_override",
    "torch_device_available",
]