koichi12 commited on
Commit
7d4012e
·
verified ·
1 Parent(s): ed5792f

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .venv/lib/python3.11/site-packages/gguf/__init__.py +9 -0
  2. .venv/lib/python3.11/site-packages/gguf/__pycache__/__init__.cpython-311.pyc +0 -0
  3. .venv/lib/python3.11/site-packages/gguf/__pycache__/constants.cpython-311.pyc +0 -0
  4. .venv/lib/python3.11/site-packages/gguf/__pycache__/gguf.cpython-311.pyc +0 -0
  5. .venv/lib/python3.11/site-packages/gguf/__pycache__/gguf_reader.cpython-311.pyc +0 -0
  6. .venv/lib/python3.11/site-packages/gguf/__pycache__/gguf_writer.cpython-311.pyc +0 -0
  7. .venv/lib/python3.11/site-packages/gguf/__pycache__/lazy.cpython-311.pyc +0 -0
  8. .venv/lib/python3.11/site-packages/gguf/__pycache__/metadata.cpython-311.pyc +0 -0
  9. .venv/lib/python3.11/site-packages/gguf/__pycache__/quants.cpython-311.pyc +0 -0
  10. .venv/lib/python3.11/site-packages/gguf/__pycache__/tensor_mapping.cpython-311.pyc +0 -0
  11. .venv/lib/python3.11/site-packages/gguf/__pycache__/utility.cpython-311.pyc +0 -0
  12. .venv/lib/python3.11/site-packages/gguf/__pycache__/vocab.cpython-311.pyc +0 -0
  13. .venv/lib/python3.11/site-packages/gguf/constants.py +1398 -0
  14. .venv/lib/python3.11/site-packages/gguf/gguf.py +15 -0
  15. .venv/lib/python3.11/site-packages/gguf/gguf_reader.py +317 -0
  16. .venv/lib/python3.11/site-packages/gguf/gguf_writer.py +888 -0
  17. .venv/lib/python3.11/site-packages/gguf/lazy.py +213 -0
  18. .venv/lib/python3.11/site-packages/gguf/metadata.py +510 -0
  19. .venv/lib/python3.11/site-packages/gguf/py.typed +0 -0
  20. .venv/lib/python3.11/site-packages/gguf/quants.py +1188 -0
  21. .venv/lib/python3.11/site-packages/gguf/tensor_mapping.py +657 -0
  22. .venv/lib/python3.11/site-packages/gguf/utility.py +69 -0
  23. .venv/lib/python3.11/site-packages/gguf/vocab.py +465 -0
  24. .venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/INSTALLER +1 -0
  25. .venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/License.txt +1568 -0
  26. .venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/RECORD +20 -0
  27. .venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/WHEEL +5 -0
  28. .venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/top_level.txt +1 -0
  29. .venv/lib/python3.11/site-packages/openai-1.61.1.dist-info/METADATA +851 -0
  30. .venv/lib/python3.11/site-packages/openai-1.61.1.dist-info/licenses/LICENSE +201 -0
  31. .venv/lib/python3.11/site-packages/smart_open/__init__.py +79 -0
  32. .venv/lib/python3.11/site-packages/smart_open/__pycache__/__init__.cpython-311.pyc +0 -0
  33. .venv/lib/python3.11/site-packages/smart_open/__pycache__/azure.cpython-311.pyc +0 -0
  34. .venv/lib/python3.11/site-packages/smart_open/__pycache__/bytebuffer.cpython-311.pyc +0 -0
  35. .venv/lib/python3.11/site-packages/smart_open/__pycache__/compression.cpython-311.pyc +0 -0
  36. .venv/lib/python3.11/site-packages/smart_open/__pycache__/concurrency.cpython-311.pyc +0 -0
  37. .venv/lib/python3.11/site-packages/smart_open/__pycache__/constants.cpython-311.pyc +0 -0
  38. .venv/lib/python3.11/site-packages/smart_open/__pycache__/doctools.cpython-311.pyc +0 -0
  39. .venv/lib/python3.11/site-packages/smart_open/__pycache__/ftp.cpython-311.pyc +0 -0
  40. .venv/lib/python3.11/site-packages/smart_open/__pycache__/gcs.cpython-311.pyc +0 -0
  41. .venv/lib/python3.11/site-packages/smart_open/__pycache__/hdfs.cpython-311.pyc +0 -0
  42. .venv/lib/python3.11/site-packages/smart_open/__pycache__/http.cpython-311.pyc +0 -0
  43. .venv/lib/python3.11/site-packages/smart_open/__pycache__/local_file.cpython-311.pyc +0 -0
  44. .venv/lib/python3.11/site-packages/smart_open/__pycache__/s3.cpython-311.pyc +0 -0
  45. .venv/lib/python3.11/site-packages/smart_open/__pycache__/smart_open_lib.cpython-311.pyc +0 -0
  46. .venv/lib/python3.11/site-packages/smart_open/__pycache__/ssh.cpython-311.pyc +0 -0
  47. .venv/lib/python3.11/site-packages/smart_open/__pycache__/transport.cpython-311.pyc +0 -0
  48. .venv/lib/python3.11/site-packages/smart_open/__pycache__/utils.cpython-311.pyc +0 -0
  49. .venv/lib/python3.11/site-packages/smart_open/__pycache__/version.cpython-311.pyc +0 -0
  50. .venv/lib/python3.11/site-packages/smart_open/__pycache__/webhdfs.cpython-311.pyc +0 -0
.venv/lib/python3.11/site-packages/gguf/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from .constants import *
2
+ from .lazy import *
3
+ from .gguf_reader import *
4
+ from .gguf_writer import *
5
+ from .quants import *
6
+ from .tensor_mapping import *
7
+ from .vocab import *
8
+ from .utility import *
9
+ from .metadata import *
.venv/lib/python3.11/site-packages/gguf/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (453 Bytes). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/constants.cpython-311.pyc ADDED
Binary file (44 kB). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/gguf.cpython-311.pyc ADDED
Binary file (621 Bytes). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/gguf_reader.cpython-311.pyc ADDED
Binary file (17.5 kB). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/gguf_writer.cpython-311.pyc ADDED
Binary file (73.3 kB). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/lazy.cpython-311.pyc ADDED
Binary file (12.4 kB). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/metadata.cpython-311.pyc ADDED
Binary file (27.7 kB). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/quants.cpython-311.pyc ADDED
Binary file (92.9 kB). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/tensor_mapping.cpython-311.pyc ADDED
Binary file (20 kB). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/utility.cpython-311.pyc ADDED
Binary file (3.9 kB). View file
 
.venv/lib/python3.11/site-packages/gguf/__pycache__/vocab.cpython-311.pyc ADDED
Binary file (29.9 kB). View file
 
.venv/lib/python3.11/site-packages/gguf/constants.py ADDED
@@ -0,0 +1,1398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from enum import Enum, IntEnum, auto
4
+ from typing import Any
5
+
6
+ #
7
+ # constants
8
+ #
9
+
10
+ GGUF_MAGIC = 0x46554747 # "GGUF"
11
+ GGUF_VERSION = 3
12
+ GGUF_DEFAULT_ALIGNMENT = 32
13
+ GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
14
+
15
+ #
16
+ # metadata keys
17
+ #
18
+
19
+
20
+ class Keys:
21
+ class General:
22
+ TYPE = "general.type"
23
+ ARCHITECTURE = "general.architecture"
24
+ QUANTIZATION_VERSION = "general.quantization_version"
25
+ ALIGNMENT = "general.alignment"
26
+ FILE_TYPE = "general.file_type"
27
+
28
+ # Authorship Metadata
29
+ NAME = "general.name"
30
+ AUTHOR = "general.author"
31
+ VERSION = "general.version"
32
+ ORGANIZATION = "general.organization"
33
+
34
+ FINETUNE = "general.finetune"
35
+ BASENAME = "general.basename"
36
+
37
+ DESCRIPTION = "general.description"
38
+ QUANTIZED_BY = "general.quantized_by"
39
+
40
+ SIZE_LABEL = "general.size_label"
41
+
42
+ # Licensing details
43
+ LICENSE = "general.license"
44
+ LICENSE_NAME = "general.license.name"
45
+ LICENSE_LINK = "general.license.link"
46
+
47
+ # Typically represents the converted GGUF repo (Unless native)
48
+ URL = "general.url" # Model Website/Paper
49
+ DOI = "general.doi"
50
+ UUID = "general.uuid"
51
+ REPO_URL = "general.repo_url" # Model Source Repository (git/svn/etc...)
52
+
53
+ # Model Source during conversion
54
+ SOURCE_URL = "general.source.url" # Model Website/Paper
55
+ SOURCE_DOI = "general.source.doi"
56
+ SOURCE_UUID = "general.source.uuid"
57
+ SOURCE_REPO_URL = "general.source.repo_url" # Model Source Repository (git/svn/etc...)
58
+
59
+ # Base Model Source. There can be more than one source if it's a merged
60
+ # model like with 'Mistral-7B-Merge-14-v0.1'. This will assist in
61
+ # tracing linage of models as it is finetuned or merged over time.
62
+ BASE_MODEL_COUNT = "general.base_model.count"
63
+ BASE_MODEL_NAME = "general.base_model.{id}.name"
64
+ BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
65
+ BASE_MODEL_VERSION = "general.base_model.{id}.version"
66
+ BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
67
+ BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
68
+ BASE_MODEL_DOI = "general.base_model.{id}.doi"
69
+ BASE_MODEL_UUID = "general.base_model.{id}.uuid"
70
+ BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
71
+
72
+ # Array based KV stores
73
+ TAGS = "general.tags"
74
+ LANGUAGES = "general.languages"
75
+ DATASETS = "general.datasets"
76
+
77
+ class LLM:
78
+ VOCAB_SIZE = "{arch}.vocab_size"
79
+ CONTEXT_LENGTH = "{arch}.context_length"
80
+ EMBEDDING_LENGTH = "{arch}.embedding_length"
81
+ BLOCK_COUNT = "{arch}.block_count"
82
+ LEADING_DENSE_BLOCK_COUNT = "{arch}.leading_dense_block_count"
83
+ FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
84
+ EXPERT_FEED_FORWARD_LENGTH = "{arch}.expert_feed_forward_length"
85
+ EXPERT_SHARED_FEED_FORWARD_LENGTH = "{arch}.expert_shared_feed_forward_length"
86
+ USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
87
+ TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
88
+ EXPERT_COUNT = "{arch}.expert_count"
89
+ EXPERT_USED_COUNT = "{arch}.expert_used_count"
90
+ EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
91
+ EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
92
+ POOLING_TYPE = "{arch}.pooling_type"
93
+ LOGIT_SCALE = "{arch}.logit_scale"
94
+ DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
95
+ ATTN_LOGIT_SOFTCAPPING = "{arch}.attn_logit_softcapping"
96
+ FINAL_LOGIT_SOFTCAPPING = "{arch}.final_logit_softcapping"
97
+
98
+ class Attention:
99
+ HEAD_COUNT = "{arch}.attention.head_count"
100
+ HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
101
+ MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
102
+ CLAMP_KQV = "{arch}.attention.clamp_kqv"
103
+ KEY_LENGTH = "{arch}.attention.key_length"
104
+ VALUE_LENGTH = "{arch}.attention.value_length"
105
+ LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
106
+ LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
107
+ CAUSAL = "{arch}.attention.causal"
108
+ Q_LORA_RANK = "{arch}.attention.q_lora_rank"
109
+ KV_LORA_RANK = "{arch}.attention.kv_lora_rank"
110
+ REL_BUCKETS_COUNT = "{arch}.attention.relative_buckets_count"
111
+ SLIDING_WINDOW = "{arch}.attention.sliding_window"
112
+
113
+ class Rope:
114
+ DIMENSION_COUNT = "{arch}.rope.dimension_count"
115
+ FREQ_BASE = "{arch}.rope.freq_base"
116
+ SCALING_TYPE = "{arch}.rope.scaling.type"
117
+ SCALING_FACTOR = "{arch}.rope.scaling.factor"
118
+ SCALING_ATTN_FACTOR = "{arch}.rope.scaling.attn_factor"
119
+ SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
120
+ SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
121
+ SCALING_YARN_LOG_MUL = "{arch}.rope.scaling.yarn_log_multiplier"
122
+
123
+ class Split:
124
+ LLM_KV_SPLIT_NO = "split.no"
125
+ LLM_KV_SPLIT_COUNT = "split.count"
126
+ LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count"
127
+
128
+ class SSM:
129
+ CONV_KERNEL = "{arch}.ssm.conv_kernel"
130
+ INNER_SIZE = "{arch}.ssm.inner_size"
131
+ STATE_SIZE = "{arch}.ssm.state_size"
132
+ TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
133
+ DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
134
+
135
+ class Tokenizer:
136
+ MODEL = "tokenizer.ggml.model"
137
+ PRE = "tokenizer.ggml.pre"
138
+ LIST = "tokenizer.ggml.tokens"
139
+ TOKEN_TYPE = "tokenizer.ggml.token_type"
140
+ TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
141
+ SCORES = "tokenizer.ggml.scores"
142
+ MERGES = "tokenizer.ggml.merges"
143
+ BOS_ID = "tokenizer.ggml.bos_token_id"
144
+ EOS_ID = "tokenizer.ggml.eos_token_id"
145
+ UNK_ID = "tokenizer.ggml.unknown_token_id"
146
+ SEP_ID = "tokenizer.ggml.seperator_token_id"
147
+ PAD_ID = "tokenizer.ggml.padding_token_id"
148
+ CLS_ID = "tokenizer.ggml.cls_token_id"
149
+ MASK_ID = "tokenizer.ggml.mask_token_id"
150
+ ADD_BOS = "tokenizer.ggml.add_bos_token"
151
+ ADD_EOS = "tokenizer.ggml.add_eos_token"
152
+ ADD_PREFIX = "tokenizer.ggml.add_space_prefix"
153
+ REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces"
154
+ PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap"
155
+ HF_JSON = "tokenizer.huggingface.json"
156
+ RWKV = "tokenizer.rwkv.world"
157
+ CHAT_TEMPLATE = "tokenizer.chat_template"
158
+ CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
159
+ CHAT_TEMPLATES = "tokenizer.chat_templates"
160
+ # FIM/Infill special tokens constants
161
+ PREFIX_ID = "tokenizer.ggml.prefix_token_id"
162
+ SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
163
+ MIDDLE_ID = "tokenizer.ggml.middle_token_id"
164
+ EOT_ID = "tokenizer.ggml.eot_token_id"
165
+ EOM_ID = "tokenizer.ggml.eom_token_id"
166
+
167
+ class Adapter:
168
+ TYPE = "adapter.type"
169
+ LORA_ALPHA = "adapter.lora.alpha"
170
+
171
+ #
172
+ # recommended mapping of model tensor names for storage in gguf
173
+ #
174
+
175
+
176
+ class GGUFType:
177
+ MODEL = "model"
178
+ ADAPTER = "adapter"
179
+
180
+
181
+ class MODEL_ARCH(IntEnum):
182
+ LLAMA = auto()
183
+ FALCON = auto()
184
+ BAICHUAN = auto()
185
+ GROK = auto()
186
+ GPT2 = auto()
187
+ GPTJ = auto()
188
+ GPTNEOX = auto()
189
+ MPT = auto()
190
+ STARCODER = auto()
191
+ REFACT = auto()
192
+ BERT = auto()
193
+ NOMIC_BERT = auto()
194
+ JINA_BERT_V2 = auto()
195
+ BLOOM = auto()
196
+ STABLELM = auto()
197
+ QWEN = auto()
198
+ QWEN2 = auto()
199
+ QWEN2MOE = auto()
200
+ PHI2 = auto()
201
+ PHI3 = auto()
202
+ PLAMO = auto()
203
+ CODESHELL = auto()
204
+ ORION = auto()
205
+ INTERNLM2 = auto()
206
+ MINICPM = auto()
207
+ GEMMA = auto()
208
+ GEMMA2 = auto()
209
+ STARCODER2 = auto()
210
+ MAMBA = auto()
211
+ XVERSE = auto()
212
+ COMMAND_R = auto()
213
+ DBRX = auto()
214
+ OLMO = auto()
215
+ OPENELM = auto()
216
+ ARCTIC = auto()
217
+ DEEPSEEK2 = auto()
218
+ CHATGLM = auto()
219
+ BITNET = auto()
220
+ T5 = auto()
221
+ T5ENCODER = auto()
222
+ JAIS = auto()
223
+ NEMOTRON = auto()
224
+ EXAONE = auto()
225
+
226
+
227
+ class MODEL_TENSOR(IntEnum):
228
+ TOKEN_EMBD = auto()
229
+ TOKEN_EMBD_NORM = auto()
230
+ TOKEN_TYPES = auto()
231
+ POS_EMBD = auto()
232
+ OUTPUT = auto()
233
+ OUTPUT_NORM = auto()
234
+ ROPE_FREQS = auto()
235
+ ROPE_FACTORS_LONG = auto()
236
+ ROPE_FACTORS_SHORT = auto()
237
+ ATTN_Q = auto()
238
+ ATTN_K = auto()
239
+ ATTN_V = auto()
240
+ ATTN_QKV = auto()
241
+ ATTN_OUT = auto()
242
+ ATTN_NORM = auto()
243
+ ATTN_NORM_2 = auto()
244
+ ATTN_OUT_NORM = auto()
245
+ ATTN_POST_NORM = auto()
246
+ ATTN_ROT_EMBD = auto()
247
+ FFN_GATE_INP = auto()
248
+ FFN_GATE_INP_SHEXP = auto()
249
+ FFN_NORM = auto()
250
+ FFN_PRE_NORM = auto()
251
+ FFN_POST_NORM = auto()
252
+ FFN_GATE = auto()
253
+ FFN_DOWN = auto()
254
+ FFN_UP = auto()
255
+ FFN_ACT = auto()
256
+ FFN_NORM_EXP = auto()
257
+ FFN_GATE_EXP = auto()
258
+ FFN_DOWN_EXP = auto()
259
+ FFN_UP_EXP = auto()
260
+ FFN_GATE_SHEXP = auto()
261
+ FFN_DOWN_SHEXP = auto()
262
+ FFN_UP_SHEXP = auto()
263
+ ATTN_Q_NORM = auto()
264
+ ATTN_K_NORM = auto()
265
+ LAYER_OUT_NORM = auto()
266
+ SSM_IN = auto()
267
+ SSM_CONV1D = auto()
268
+ SSM_X = auto()
269
+ SSM_DT = auto()
270
+ SSM_A = auto()
271
+ SSM_D = auto()
272
+ SSM_OUT = auto()
273
+ ATTN_Q_A = auto()
274
+ ATTN_Q_B = auto()
275
+ ATTN_KV_A_MQA = auto()
276
+ ATTN_KV_B = auto()
277
+ ATTN_Q_A_NORM = auto()
278
+ ATTN_KV_A_NORM = auto()
279
+ FFN_SUB_NORM = auto()
280
+ ATTN_SUB_NORM = auto()
281
+ DEC_ATTN_NORM = auto()
282
+ DEC_ATTN_Q = auto()
283
+ DEC_ATTN_K = auto()
284
+ DEC_ATTN_V = auto()
285
+ DEC_ATTN_OUT = auto()
286
+ DEC_ATTN_REL_B = auto()
287
+ DEC_CROSS_ATTN_NORM = auto()
288
+ DEC_CROSS_ATTN_Q = auto()
289
+ DEC_CROSS_ATTN_K = auto()
290
+ DEC_CROSS_ATTN_V = auto()
291
+ DEC_CROSS_ATTN_OUT = auto()
292
+ DEC_CROSS_ATTN_REL_B = auto()
293
+ DEC_FFN_NORM = auto()
294
+ DEC_FFN_GATE = auto()
295
+ DEC_FFN_DOWN = auto()
296
+ DEC_FFN_UP = auto()
297
+ DEC_OUTPUT_NORM = auto()
298
+ ENC_ATTN_NORM = auto()
299
+ ENC_ATTN_Q = auto()
300
+ ENC_ATTN_K = auto()
301
+ ENC_ATTN_V = auto()
302
+ ENC_ATTN_OUT = auto()
303
+ ENC_ATTN_REL_B = auto()
304
+ ENC_FFN_NORM = auto()
305
+ ENC_FFN_GATE = auto()
306
+ ENC_FFN_DOWN = auto()
307
+ ENC_FFN_UP = auto()
308
+ ENC_OUTPUT_NORM = auto()
309
+
310
+
311
+ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
312
+ MODEL_ARCH.LLAMA: "llama",
313
+ MODEL_ARCH.FALCON: "falcon",
314
+ MODEL_ARCH.BAICHUAN: "baichuan",
315
+ MODEL_ARCH.GROK: "grok",
316
+ MODEL_ARCH.GPT2: "gpt2",
317
+ MODEL_ARCH.GPTJ: "gptj",
318
+ MODEL_ARCH.GPTNEOX: "gptneox",
319
+ MODEL_ARCH.MPT: "mpt",
320
+ MODEL_ARCH.STARCODER: "starcoder",
321
+ MODEL_ARCH.REFACT: "refact",
322
+ MODEL_ARCH.BERT: "bert",
323
+ MODEL_ARCH.NOMIC_BERT: "nomic-bert",
324
+ MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
325
+ MODEL_ARCH.BLOOM: "bloom",
326
+ MODEL_ARCH.STABLELM: "stablelm",
327
+ MODEL_ARCH.QWEN: "qwen",
328
+ MODEL_ARCH.QWEN2: "qwen2",
329
+ MODEL_ARCH.QWEN2MOE: "qwen2moe",
330
+ MODEL_ARCH.PHI2: "phi2",
331
+ MODEL_ARCH.PHI3: "phi3",
332
+ MODEL_ARCH.PLAMO: "plamo",
333
+ MODEL_ARCH.CODESHELL: "codeshell",
334
+ MODEL_ARCH.ORION: "orion",
335
+ MODEL_ARCH.INTERNLM2: "internlm2",
336
+ MODEL_ARCH.MINICPM: "minicpm",
337
+ MODEL_ARCH.GEMMA: "gemma",
338
+ MODEL_ARCH.GEMMA2: "gemma2",
339
+ MODEL_ARCH.STARCODER2: "starcoder2",
340
+ MODEL_ARCH.MAMBA: "mamba",
341
+ MODEL_ARCH.XVERSE: "xverse",
342
+ MODEL_ARCH.COMMAND_R: "command-r",
343
+ MODEL_ARCH.DBRX: "dbrx",
344
+ MODEL_ARCH.OLMO: "olmo",
345
+ MODEL_ARCH.OPENELM: "openelm",
346
+ MODEL_ARCH.ARCTIC: "arctic",
347
+ MODEL_ARCH.DEEPSEEK2: "deepseek2",
348
+ MODEL_ARCH.CHATGLM: "chatglm",
349
+ MODEL_ARCH.BITNET: "bitnet",
350
+ MODEL_ARCH.T5: "t5",
351
+ MODEL_ARCH.T5ENCODER: "t5encoder",
352
+ MODEL_ARCH.JAIS: "jais",
353
+ MODEL_ARCH.NEMOTRON: "nemotron",
354
+ MODEL_ARCH.EXAONE: "exaone",
355
+ }
356
+
357
+ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
358
+ MODEL_TENSOR.TOKEN_EMBD: "token_embd",
359
+ MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
360
+ MODEL_TENSOR.TOKEN_TYPES: "token_types",
361
+ MODEL_TENSOR.POS_EMBD: "position_embd",
362
+ MODEL_TENSOR.OUTPUT_NORM: "output_norm",
363
+ MODEL_TENSOR.OUTPUT: "output",
364
+ MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
365
+ MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
366
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
367
+ MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
368
+ MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
369
+ MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
370
+ MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
371
+ MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
372
+ MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
373
+ MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
374
+ MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
375
+ MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
376
+ MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
377
+ MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
378
+ MODEL_TENSOR.ATTN_POST_NORM: "blk.{bid}.post_attention_norm",
379
+ MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
380
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
381
+ MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
382
+ MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
383
+ MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
384
+ MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
385
+ MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
386
+ MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
387
+ MODEL_TENSOR.FFN_GATE_SHEXP: "blk.{bid}.ffn_gate_shexp",
388
+ MODEL_TENSOR.FFN_DOWN_SHEXP: "blk.{bid}.ffn_down_shexp",
389
+ MODEL_TENSOR.FFN_UP_SHEXP: "blk.{bid}.ffn_up_shexp",
390
+ MODEL_TENSOR.FFN_ACT: "blk.{bid}.ffn",
391
+ MODEL_TENSOR.FFN_NORM_EXP: "blk.{bid}.ffn_norm_exps",
392
+ MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
393
+ MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
394
+ MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
395
+ MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
396
+ MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
397
+ MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
398
+ MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
399
+ MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
400
+ MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
401
+ MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
402
+ MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
403
+ MODEL_TENSOR.ATTN_Q_A: "blk.{bid}.attn_q_a",
404
+ MODEL_TENSOR.ATTN_Q_B: "blk.{bid}.attn_q_b",
405
+ MODEL_TENSOR.ATTN_KV_A_MQA: "blk.{bid}.attn_kv_a_mqa",
406
+ MODEL_TENSOR.ATTN_KV_B: "blk.{bid}.attn_kv_b",
407
+ MODEL_TENSOR.ATTN_Q_A_NORM: "blk.{bid}.attn_q_a_norm",
408
+ MODEL_TENSOR.ATTN_KV_A_NORM: "blk.{bid}.attn_kv_a_norm",
409
+ MODEL_TENSOR.ATTN_SUB_NORM: "blk.{bid}.attn_sub_norm",
410
+ MODEL_TENSOR.FFN_SUB_NORM: "blk.{bid}.ffn_sub_norm",
411
+ MODEL_TENSOR.DEC_ATTN_NORM: "dec.blk.{bid}.attn_norm",
412
+ MODEL_TENSOR.DEC_ATTN_Q: "dec.blk.{bid}.attn_q",
413
+ MODEL_TENSOR.DEC_ATTN_K: "dec.blk.{bid}.attn_k",
414
+ MODEL_TENSOR.DEC_ATTN_V: "dec.blk.{bid}.attn_v",
415
+ MODEL_TENSOR.DEC_ATTN_OUT: "dec.blk.{bid}.attn_o",
416
+ MODEL_TENSOR.DEC_ATTN_REL_B: "dec.blk.{bid}.attn_rel_b",
417
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: "dec.blk.{bid}.cross_attn_norm",
418
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: "dec.blk.{bid}.cross_attn_q",
419
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: "dec.blk.{bid}.cross_attn_k",
420
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: "dec.blk.{bid}.cross_attn_v",
421
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: "dec.blk.{bid}.cross_attn_o",
422
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: "dec.blk.{bid}.cross_attn_rel_b",
423
+ MODEL_TENSOR.DEC_FFN_NORM: "dec.blk.{bid}.ffn_norm",
424
+ MODEL_TENSOR.DEC_FFN_GATE: "dec.blk.{bid}.ffn_gate",
425
+ MODEL_TENSOR.DEC_FFN_DOWN: "dec.blk.{bid}.ffn_down",
426
+ MODEL_TENSOR.DEC_FFN_UP: "dec.blk.{bid}.ffn_up",
427
+ MODEL_TENSOR.DEC_OUTPUT_NORM: "dec.output_norm",
428
+ MODEL_TENSOR.ENC_ATTN_NORM: "enc.blk.{bid}.attn_norm",
429
+ MODEL_TENSOR.ENC_ATTN_Q: "enc.blk.{bid}.attn_q",
430
+ MODEL_TENSOR.ENC_ATTN_K: "enc.blk.{bid}.attn_k",
431
+ MODEL_TENSOR.ENC_ATTN_V: "enc.blk.{bid}.attn_v",
432
+ MODEL_TENSOR.ENC_ATTN_OUT: "enc.blk.{bid}.attn_o",
433
+ MODEL_TENSOR.ENC_ATTN_REL_B: "enc.blk.{bid}.attn_rel_b",
434
+ MODEL_TENSOR.ENC_FFN_NORM: "enc.blk.{bid}.ffn_norm",
435
+ MODEL_TENSOR.ENC_FFN_GATE: "enc.blk.{bid}.ffn_gate",
436
+ MODEL_TENSOR.ENC_FFN_DOWN: "enc.blk.{bid}.ffn_down",
437
+ MODEL_TENSOR.ENC_FFN_UP: "enc.blk.{bid}.ffn_up",
438
+ MODEL_TENSOR.ENC_OUTPUT_NORM: "enc.output_norm",
439
+ }
440
+
441
+ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
442
+ MODEL_ARCH.LLAMA: [
443
+ MODEL_TENSOR.TOKEN_EMBD,
444
+ MODEL_TENSOR.OUTPUT_NORM,
445
+ MODEL_TENSOR.OUTPUT,
446
+ MODEL_TENSOR.ROPE_FREQS,
447
+ MODEL_TENSOR.ATTN_NORM,
448
+ MODEL_TENSOR.ATTN_Q,
449
+ MODEL_TENSOR.ATTN_K,
450
+ MODEL_TENSOR.ATTN_V,
451
+ MODEL_TENSOR.ATTN_OUT,
452
+ MODEL_TENSOR.ATTN_ROT_EMBD,
453
+ MODEL_TENSOR.FFN_GATE_INP,
454
+ MODEL_TENSOR.FFN_NORM,
455
+ MODEL_TENSOR.FFN_GATE,
456
+ MODEL_TENSOR.FFN_DOWN,
457
+ MODEL_TENSOR.FFN_UP,
458
+ MODEL_TENSOR.FFN_GATE_EXP,
459
+ MODEL_TENSOR.FFN_DOWN_EXP,
460
+ MODEL_TENSOR.FFN_UP_EXP,
461
+ ],
462
+ MODEL_ARCH.GROK: [
463
+ MODEL_TENSOR.TOKEN_EMBD,
464
+ MODEL_TENSOR.OUTPUT_NORM,
465
+ MODEL_TENSOR.OUTPUT,
466
+ MODEL_TENSOR.ROPE_FREQS,
467
+ MODEL_TENSOR.ATTN_NORM,
468
+ MODEL_TENSOR.ATTN_Q,
469
+ MODEL_TENSOR.ATTN_K,
470
+ MODEL_TENSOR.ATTN_V,
471
+ MODEL_TENSOR.ATTN_OUT,
472
+ MODEL_TENSOR.ATTN_ROT_EMBD,
473
+ MODEL_TENSOR.ATTN_OUT_NORM,
474
+ MODEL_TENSOR.FFN_GATE_INP,
475
+ MODEL_TENSOR.FFN_NORM,
476
+ MODEL_TENSOR.FFN_GATE,
477
+ MODEL_TENSOR.FFN_DOWN,
478
+ MODEL_TENSOR.FFN_UP,
479
+ MODEL_TENSOR.FFN_GATE_EXP,
480
+ MODEL_TENSOR.FFN_DOWN_EXP,
481
+ MODEL_TENSOR.FFN_UP_EXP,
482
+ MODEL_TENSOR.LAYER_OUT_NORM,
483
+ ],
484
+ MODEL_ARCH.GPTNEOX: [
485
+ MODEL_TENSOR.TOKEN_EMBD,
486
+ MODEL_TENSOR.OUTPUT_NORM,
487
+ MODEL_TENSOR.OUTPUT,
488
+ MODEL_TENSOR.ATTN_NORM,
489
+ MODEL_TENSOR.ATTN_QKV,
490
+ MODEL_TENSOR.ATTN_OUT,
491
+ MODEL_TENSOR.FFN_NORM,
492
+ MODEL_TENSOR.FFN_DOWN,
493
+ MODEL_TENSOR.FFN_UP,
494
+ ],
495
+ MODEL_ARCH.FALCON: [
496
+ MODEL_TENSOR.TOKEN_EMBD,
497
+ MODEL_TENSOR.OUTPUT_NORM,
498
+ MODEL_TENSOR.OUTPUT,
499
+ MODEL_TENSOR.ATTN_NORM,
500
+ MODEL_TENSOR.ATTN_NORM_2,
501
+ MODEL_TENSOR.ATTN_QKV,
502
+ MODEL_TENSOR.ATTN_OUT,
503
+ MODEL_TENSOR.FFN_DOWN,
504
+ MODEL_TENSOR.FFN_UP,
505
+ ],
506
+ MODEL_ARCH.BAICHUAN: [
507
+ MODEL_TENSOR.TOKEN_EMBD,
508
+ MODEL_TENSOR.OUTPUT_NORM,
509
+ MODEL_TENSOR.OUTPUT,
510
+ MODEL_TENSOR.ROPE_FREQS,
511
+ MODEL_TENSOR.ATTN_NORM,
512
+ MODEL_TENSOR.ATTN_Q,
513
+ MODEL_TENSOR.ATTN_K,
514
+ MODEL_TENSOR.ATTN_V,
515
+ MODEL_TENSOR.ATTN_OUT,
516
+ MODEL_TENSOR.ATTN_ROT_EMBD,
517
+ MODEL_TENSOR.FFN_NORM,
518
+ MODEL_TENSOR.FFN_GATE,
519
+ MODEL_TENSOR.FFN_DOWN,
520
+ MODEL_TENSOR.FFN_UP,
521
+ ],
522
+ MODEL_ARCH.STARCODER: [
523
+ MODEL_TENSOR.TOKEN_EMBD,
524
+ MODEL_TENSOR.POS_EMBD,
525
+ MODEL_TENSOR.OUTPUT_NORM,
526
+ MODEL_TENSOR.OUTPUT,
527
+ MODEL_TENSOR.ATTN_NORM,
528
+ MODEL_TENSOR.ATTN_QKV,
529
+ MODEL_TENSOR.ATTN_OUT,
530
+ MODEL_TENSOR.FFN_NORM,
531
+ MODEL_TENSOR.FFN_DOWN,
532
+ MODEL_TENSOR.FFN_UP,
533
+ ],
534
+ MODEL_ARCH.BERT: [
535
+ MODEL_TENSOR.TOKEN_EMBD,
536
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
537
+ MODEL_TENSOR.TOKEN_TYPES,
538
+ MODEL_TENSOR.POS_EMBD,
539
+ MODEL_TENSOR.OUTPUT_NORM,
540
+ MODEL_TENSOR.ATTN_OUT_NORM,
541
+ MODEL_TENSOR.ATTN_Q,
542
+ MODEL_TENSOR.ATTN_K,
543
+ MODEL_TENSOR.ATTN_V,
544
+ MODEL_TENSOR.ATTN_OUT,
545
+ MODEL_TENSOR.FFN_DOWN,
546
+ MODEL_TENSOR.FFN_UP,
547
+ MODEL_TENSOR.LAYER_OUT_NORM,
548
+ ],
549
+ MODEL_ARCH.NOMIC_BERT: [
550
+ MODEL_TENSOR.TOKEN_EMBD,
551
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
552
+ MODEL_TENSOR.TOKEN_TYPES,
553
+ MODEL_TENSOR.POS_EMBD,
554
+ MODEL_TENSOR.OUTPUT_NORM,
555
+ MODEL_TENSOR.ATTN_OUT_NORM,
556
+ MODEL_TENSOR.ATTN_QKV,
557
+ MODEL_TENSOR.ATTN_OUT,
558
+ MODEL_TENSOR.FFN_GATE,
559
+ MODEL_TENSOR.FFN_DOWN,
560
+ MODEL_TENSOR.FFN_UP,
561
+ MODEL_TENSOR.LAYER_OUT_NORM,
562
+ ],
563
+ MODEL_ARCH.JINA_BERT_V2: [
564
+ MODEL_TENSOR.TOKEN_EMBD,
565
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
566
+ MODEL_TENSOR.TOKEN_TYPES,
567
+ MODEL_TENSOR.ATTN_NORM_2,
568
+ MODEL_TENSOR.ATTN_OUT_NORM,
569
+ MODEL_TENSOR.ATTN_Q,
570
+ MODEL_TENSOR.ATTN_Q_NORM,
571
+ MODEL_TENSOR.ATTN_K,
572
+ MODEL_TENSOR.ATTN_K_NORM,
573
+ MODEL_TENSOR.ATTN_V,
574
+ MODEL_TENSOR.ATTN_OUT,
575
+ MODEL_TENSOR.FFN_UP,
576
+ MODEL_TENSOR.FFN_GATE,
577
+ MODEL_TENSOR.FFN_DOWN,
578
+ MODEL_TENSOR.LAYER_OUT_NORM,
579
+ ],
580
+ MODEL_ARCH.MPT: [
581
+ MODEL_TENSOR.TOKEN_EMBD,
582
+ MODEL_TENSOR.OUTPUT_NORM,
583
+ MODEL_TENSOR.OUTPUT,
584
+ MODEL_TENSOR.ATTN_NORM,
585
+ MODEL_TENSOR.ATTN_QKV,
586
+ MODEL_TENSOR.ATTN_OUT,
587
+ MODEL_TENSOR.FFN_NORM,
588
+ MODEL_TENSOR.FFN_DOWN,
589
+ MODEL_TENSOR.FFN_UP,
590
+ MODEL_TENSOR.FFN_ACT,
591
+ MODEL_TENSOR.ATTN_Q_NORM,
592
+ MODEL_TENSOR.ATTN_K_NORM,
593
+ MODEL_TENSOR.POS_EMBD,
594
+ ],
595
+ MODEL_ARCH.GPTJ: [
596
+ MODEL_TENSOR.TOKEN_EMBD,
597
+ MODEL_TENSOR.OUTPUT_NORM,
598
+ MODEL_TENSOR.OUTPUT,
599
+ MODEL_TENSOR.ATTN_NORM,
600
+ MODEL_TENSOR.ATTN_Q,
601
+ MODEL_TENSOR.ATTN_K,
602
+ MODEL_TENSOR.ATTN_V,
603
+ MODEL_TENSOR.ATTN_OUT,
604
+ MODEL_TENSOR.FFN_DOWN,
605
+ MODEL_TENSOR.FFN_UP,
606
+ ],
607
+ MODEL_ARCH.REFACT: [
608
+ MODEL_TENSOR.TOKEN_EMBD,
609
+ MODEL_TENSOR.OUTPUT_NORM,
610
+ MODEL_TENSOR.OUTPUT,
611
+ MODEL_TENSOR.ATTN_NORM,
612
+ MODEL_TENSOR.ATTN_Q,
613
+ MODEL_TENSOR.ATTN_K,
614
+ MODEL_TENSOR.ATTN_V,
615
+ MODEL_TENSOR.ATTN_OUT,
616
+ MODEL_TENSOR.FFN_NORM,
617
+ MODEL_TENSOR.FFN_GATE,
618
+ MODEL_TENSOR.FFN_DOWN,
619
+ MODEL_TENSOR.FFN_UP,
620
+ ],
621
+ MODEL_ARCH.BLOOM: [
622
+ MODEL_TENSOR.TOKEN_EMBD,
623
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
624
+ MODEL_TENSOR.OUTPUT_NORM,
625
+ MODEL_TENSOR.OUTPUT,
626
+ MODEL_TENSOR.ATTN_NORM,
627
+ MODEL_TENSOR.ATTN_QKV,
628
+ MODEL_TENSOR.ATTN_OUT,
629
+ MODEL_TENSOR.FFN_NORM,
630
+ MODEL_TENSOR.FFN_DOWN,
631
+ MODEL_TENSOR.FFN_UP,
632
+ ],
633
+ MODEL_ARCH.STABLELM: [
634
+ MODEL_TENSOR.TOKEN_EMBD,
635
+ MODEL_TENSOR.OUTPUT_NORM,
636
+ MODEL_TENSOR.OUTPUT,
637
+ MODEL_TENSOR.ROPE_FREQS,
638
+ MODEL_TENSOR.ATTN_NORM,
639
+ MODEL_TENSOR.ATTN_Q,
640
+ MODEL_TENSOR.ATTN_K,
641
+ MODEL_TENSOR.ATTN_V,
642
+ MODEL_TENSOR.ATTN_OUT,
643
+ MODEL_TENSOR.FFN_NORM,
644
+ MODEL_TENSOR.FFN_GATE,
645
+ MODEL_TENSOR.FFN_DOWN,
646
+ MODEL_TENSOR.FFN_UP,
647
+ MODEL_TENSOR.ATTN_Q_NORM,
648
+ MODEL_TENSOR.ATTN_K_NORM,
649
+ ],
650
+ MODEL_ARCH.QWEN: [
651
+ MODEL_TENSOR.TOKEN_EMBD,
652
+ MODEL_TENSOR.OUTPUT_NORM,
653
+ MODEL_TENSOR.OUTPUT,
654
+ MODEL_TENSOR.ROPE_FREQS,
655
+ MODEL_TENSOR.ATTN_NORM,
656
+ MODEL_TENSOR.ATTN_QKV,
657
+ MODEL_TENSOR.ATTN_OUT,
658
+ MODEL_TENSOR.ATTN_ROT_EMBD,
659
+ MODEL_TENSOR.FFN_NORM,
660
+ MODEL_TENSOR.FFN_GATE,
661
+ MODEL_TENSOR.FFN_DOWN,
662
+ MODEL_TENSOR.FFN_UP,
663
+ ],
664
+ MODEL_ARCH.QWEN2: [
665
+ MODEL_TENSOR.TOKEN_EMBD,
666
+ MODEL_TENSOR.OUTPUT_NORM,
667
+ MODEL_TENSOR.OUTPUT,
668
+ MODEL_TENSOR.ATTN_NORM,
669
+ MODEL_TENSOR.ATTN_Q,
670
+ MODEL_TENSOR.ATTN_K,
671
+ MODEL_TENSOR.ATTN_V,
672
+ MODEL_TENSOR.ATTN_OUT,
673
+ MODEL_TENSOR.FFN_NORM,
674
+ MODEL_TENSOR.FFN_GATE,
675
+ MODEL_TENSOR.FFN_DOWN,
676
+ MODEL_TENSOR.FFN_UP,
677
+ ],
678
+ MODEL_ARCH.QWEN2MOE: [
679
+ MODEL_TENSOR.TOKEN_EMBD,
680
+ MODEL_TENSOR.OUTPUT_NORM,
681
+ MODEL_TENSOR.OUTPUT,
682
+ MODEL_TENSOR.ATTN_NORM,
683
+ MODEL_TENSOR.ATTN_Q,
684
+ MODEL_TENSOR.ATTN_K,
685
+ MODEL_TENSOR.ATTN_V,
686
+ MODEL_TENSOR.ATTN_OUT,
687
+ MODEL_TENSOR.FFN_NORM,
688
+ MODEL_TENSOR.FFN_GATE_INP,
689
+ MODEL_TENSOR.FFN_GATE_EXP,
690
+ MODEL_TENSOR.FFN_DOWN_EXP,
691
+ MODEL_TENSOR.FFN_UP_EXP,
692
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP,
693
+ MODEL_TENSOR.FFN_GATE_SHEXP,
694
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
695
+ MODEL_TENSOR.FFN_UP_SHEXP,
696
+ ],
697
+ MODEL_ARCH.PLAMO: [
698
+ MODEL_TENSOR.TOKEN_EMBD,
699
+ MODEL_TENSOR.OUTPUT_NORM,
700
+ MODEL_TENSOR.OUTPUT,
701
+ MODEL_TENSOR.ROPE_FREQS,
702
+ MODEL_TENSOR.ATTN_NORM,
703
+ MODEL_TENSOR.ATTN_Q,
704
+ MODEL_TENSOR.ATTN_K,
705
+ MODEL_TENSOR.ATTN_V,
706
+ MODEL_TENSOR.ATTN_OUT,
707
+ MODEL_TENSOR.ATTN_ROT_EMBD,
708
+ MODEL_TENSOR.FFN_GATE,
709
+ MODEL_TENSOR.FFN_DOWN,
710
+ MODEL_TENSOR.FFN_UP,
711
+ ],
712
+ MODEL_ARCH.GPT2: [
713
+ MODEL_TENSOR.TOKEN_EMBD,
714
+ MODEL_TENSOR.POS_EMBD,
715
+ MODEL_TENSOR.OUTPUT_NORM,
716
+ MODEL_TENSOR.OUTPUT,
717
+ MODEL_TENSOR.ATTN_NORM,
718
+ MODEL_TENSOR.ATTN_QKV,
719
+ MODEL_TENSOR.ATTN_OUT,
720
+ MODEL_TENSOR.FFN_NORM,
721
+ MODEL_TENSOR.FFN_DOWN,
722
+ MODEL_TENSOR.FFN_UP,
723
+ ],
724
+ MODEL_ARCH.PHI2: [
725
+ MODEL_TENSOR.TOKEN_EMBD,
726
+ MODEL_TENSOR.OUTPUT_NORM,
727
+ MODEL_TENSOR.OUTPUT,
728
+ MODEL_TENSOR.ATTN_NORM,
729
+ MODEL_TENSOR.ATTN_QKV,
730
+ MODEL_TENSOR.ATTN_Q,
731
+ MODEL_TENSOR.ATTN_K,
732
+ MODEL_TENSOR.ATTN_V,
733
+ MODEL_TENSOR.ATTN_OUT,
734
+ MODEL_TENSOR.FFN_NORM,
735
+ MODEL_TENSOR.FFN_DOWN,
736
+ MODEL_TENSOR.FFN_UP,
737
+ ],
738
+ MODEL_ARCH.PHI3: [
739
+ MODEL_TENSOR.TOKEN_EMBD,
740
+ MODEL_TENSOR.OUTPUT_NORM,
741
+ MODEL_TENSOR.OUTPUT,
742
+ MODEL_TENSOR.ATTN_NORM,
743
+ MODEL_TENSOR.ATTN_QKV,
744
+ MODEL_TENSOR.ATTN_Q,
745
+ MODEL_TENSOR.ATTN_K,
746
+ MODEL_TENSOR.ATTN_V,
747
+ MODEL_TENSOR.ATTN_OUT,
748
+ MODEL_TENSOR.FFN_NORM,
749
+ MODEL_TENSOR.FFN_DOWN,
750
+ MODEL_TENSOR.FFN_UP,
751
+ ],
752
+ MODEL_ARCH.CODESHELL: [
753
+ MODEL_TENSOR.TOKEN_EMBD,
754
+ MODEL_TENSOR.POS_EMBD,
755
+ MODEL_TENSOR.OUTPUT_NORM,
756
+ MODEL_TENSOR.OUTPUT,
757
+ MODEL_TENSOR.ATTN_NORM,
758
+ MODEL_TENSOR.ATTN_QKV,
759
+ MODEL_TENSOR.ATTN_OUT,
760
+ MODEL_TENSOR.ATTN_ROT_EMBD,
761
+ MODEL_TENSOR.FFN_NORM,
762
+ MODEL_TENSOR.FFN_DOWN,
763
+ MODEL_TENSOR.FFN_UP,
764
+ ],
765
+ MODEL_ARCH.ORION: [
766
+ MODEL_TENSOR.TOKEN_EMBD,
767
+ MODEL_TENSOR.OUTPUT_NORM,
768
+ MODEL_TENSOR.OUTPUT,
769
+ MODEL_TENSOR.ROPE_FREQS,
770
+ MODEL_TENSOR.ATTN_NORM,
771
+ MODEL_TENSOR.ATTN_Q,
772
+ MODEL_TENSOR.ATTN_K,
773
+ MODEL_TENSOR.ATTN_V,
774
+ MODEL_TENSOR.ATTN_OUT,
775
+ MODEL_TENSOR.ATTN_ROT_EMBD,
776
+ MODEL_TENSOR.FFN_NORM,
777
+ MODEL_TENSOR.FFN_GATE,
778
+ MODEL_TENSOR.FFN_DOWN,
779
+ MODEL_TENSOR.FFN_UP,
780
+ ],
781
+ MODEL_ARCH.INTERNLM2: [
782
+ MODEL_TENSOR.TOKEN_EMBD,
783
+ MODEL_TENSOR.OUTPUT_NORM,
784
+ MODEL_TENSOR.OUTPUT,
785
+ MODEL_TENSOR.ATTN_NORM,
786
+ MODEL_TENSOR.ATTN_Q,
787
+ MODEL_TENSOR.ATTN_K,
788
+ MODEL_TENSOR.ATTN_V,
789
+ MODEL_TENSOR.ATTN_OUT,
790
+ MODEL_TENSOR.ATTN_ROT_EMBD,
791
+ MODEL_TENSOR.FFN_NORM,
792
+ MODEL_TENSOR.FFN_GATE,
793
+ MODEL_TENSOR.FFN_DOWN,
794
+ MODEL_TENSOR.FFN_UP,
795
+ ],
796
+ MODEL_ARCH.MINICPM: [
797
+ MODEL_TENSOR.TOKEN_EMBD,
798
+ MODEL_TENSOR.OUTPUT,
799
+ MODEL_TENSOR.OUTPUT_NORM,
800
+ MODEL_TENSOR.ROPE_FREQS,
801
+ MODEL_TENSOR.ATTN_NORM,
802
+ MODEL_TENSOR.ATTN_Q,
803
+ MODEL_TENSOR.ATTN_K,
804
+ MODEL_TENSOR.ATTN_V,
805
+ MODEL_TENSOR.ATTN_OUT,
806
+ MODEL_TENSOR.ATTN_ROT_EMBD,
807
+ MODEL_TENSOR.FFN_GATE_INP,
808
+ MODEL_TENSOR.FFN_NORM,
809
+ MODEL_TENSOR.FFN_GATE,
810
+ MODEL_TENSOR.FFN_DOWN,
811
+ MODEL_TENSOR.FFN_UP,
812
+ MODEL_TENSOR.FFN_GATE_EXP,
813
+ MODEL_TENSOR.FFN_DOWN_EXP,
814
+ MODEL_TENSOR.FFN_UP_EXP,
815
+ ],
816
+ MODEL_ARCH.GEMMA: [
817
+ MODEL_TENSOR.TOKEN_EMBD,
818
+ MODEL_TENSOR.OUTPUT_NORM,
819
+ MODEL_TENSOR.ATTN_NORM,
820
+ MODEL_TENSOR.ATTN_Q,
821
+ MODEL_TENSOR.ATTN_K,
822
+ MODEL_TENSOR.ATTN_V,
823
+ MODEL_TENSOR.ATTN_OUT,
824
+ MODEL_TENSOR.FFN_GATE,
825
+ MODEL_TENSOR.FFN_DOWN,
826
+ MODEL_TENSOR.FFN_UP,
827
+ MODEL_TENSOR.FFN_NORM,
828
+ ],
829
+ MODEL_ARCH.GEMMA2: [
830
+ MODEL_TENSOR.TOKEN_EMBD,
831
+ MODEL_TENSOR.OUTPUT_NORM,
832
+ MODEL_TENSOR.ATTN_Q,
833
+ MODEL_TENSOR.ATTN_K,
834
+ MODEL_TENSOR.ATTN_V,
835
+ MODEL_TENSOR.ATTN_OUT,
836
+ MODEL_TENSOR.FFN_GATE,
837
+ MODEL_TENSOR.FFN_DOWN,
838
+ MODEL_TENSOR.FFN_UP,
839
+ MODEL_TENSOR.ATTN_NORM,
840
+ MODEL_TENSOR.ATTN_POST_NORM,
841
+ MODEL_TENSOR.FFN_PRE_NORM,
842
+ MODEL_TENSOR.FFN_POST_NORM,
843
+ ],
844
+ MODEL_ARCH.STARCODER2: [
845
+ MODEL_TENSOR.TOKEN_EMBD,
846
+ MODEL_TENSOR.OUTPUT_NORM,
847
+ MODEL_TENSOR.OUTPUT,
848
+ MODEL_TENSOR.ROPE_FREQS,
849
+ MODEL_TENSOR.ATTN_NORM,
850
+ MODEL_TENSOR.ATTN_Q,
851
+ MODEL_TENSOR.ATTN_K,
852
+ MODEL_TENSOR.ATTN_V,
853
+ MODEL_TENSOR.ATTN_OUT,
854
+ MODEL_TENSOR.ATTN_ROT_EMBD,
855
+ MODEL_TENSOR.FFN_NORM,
856
+ MODEL_TENSOR.FFN_DOWN,
857
+ MODEL_TENSOR.FFN_UP,
858
+ ],
859
+ MODEL_ARCH.MAMBA: [
860
+ MODEL_TENSOR.TOKEN_EMBD,
861
+ MODEL_TENSOR.OUTPUT_NORM,
862
+ MODEL_TENSOR.OUTPUT,
863
+ MODEL_TENSOR.ATTN_NORM,
864
+ MODEL_TENSOR.SSM_IN,
865
+ MODEL_TENSOR.SSM_CONV1D,
866
+ MODEL_TENSOR.SSM_X,
867
+ MODEL_TENSOR.SSM_DT,
868
+ MODEL_TENSOR.SSM_A,
869
+ MODEL_TENSOR.SSM_D,
870
+ MODEL_TENSOR.SSM_OUT,
871
+ ],
872
+ MODEL_ARCH.XVERSE: [
873
+ MODEL_TENSOR.TOKEN_EMBD,
874
+ MODEL_TENSOR.OUTPUT_NORM,
875
+ MODEL_TENSOR.OUTPUT,
876
+ MODEL_TENSOR.ROPE_FREQS,
877
+ MODEL_TENSOR.ATTN_NORM,
878
+ MODEL_TENSOR.ATTN_Q,
879
+ MODEL_TENSOR.ATTN_K,
880
+ MODEL_TENSOR.ATTN_V,
881
+ MODEL_TENSOR.ATTN_OUT,
882
+ MODEL_TENSOR.ATTN_ROT_EMBD,
883
+ MODEL_TENSOR.FFN_NORM,
884
+ MODEL_TENSOR.FFN_GATE,
885
+ MODEL_TENSOR.FFN_DOWN,
886
+ MODEL_TENSOR.FFN_UP,
887
+ ],
888
+ MODEL_ARCH.COMMAND_R: [
889
+ MODEL_TENSOR.TOKEN_EMBD,
890
+ MODEL_TENSOR.OUTPUT_NORM,
891
+ MODEL_TENSOR.ATTN_NORM,
892
+ MODEL_TENSOR.ATTN_Q,
893
+ MODEL_TENSOR.ATTN_K,
894
+ MODEL_TENSOR.ATTN_V,
895
+ MODEL_TENSOR.ATTN_OUT,
896
+ MODEL_TENSOR.FFN_GATE,
897
+ MODEL_TENSOR.FFN_DOWN,
898
+ MODEL_TENSOR.FFN_UP,
899
+ MODEL_TENSOR.ATTN_K_NORM,
900
+ MODEL_TENSOR.ATTN_Q_NORM,
901
+ ],
902
+ MODEL_ARCH.DBRX: [
903
+ MODEL_TENSOR.TOKEN_EMBD,
904
+ MODEL_TENSOR.OUTPUT_NORM,
905
+ MODEL_TENSOR.OUTPUT,
906
+ MODEL_TENSOR.ATTN_NORM,
907
+ MODEL_TENSOR.ATTN_QKV,
908
+ MODEL_TENSOR.ATTN_OUT,
909
+ MODEL_TENSOR.ATTN_OUT_NORM,
910
+ MODEL_TENSOR.FFN_GATE_INP,
911
+ MODEL_TENSOR.FFN_GATE_EXP,
912
+ MODEL_TENSOR.FFN_DOWN_EXP,
913
+ MODEL_TENSOR.FFN_UP_EXP,
914
+ ],
915
+ MODEL_ARCH.OLMO: [
916
+ MODEL_TENSOR.TOKEN_EMBD,
917
+ MODEL_TENSOR.OUTPUT,
918
+ MODEL_TENSOR.ATTN_Q,
919
+ MODEL_TENSOR.ATTN_K,
920
+ MODEL_TENSOR.ATTN_V,
921
+ MODEL_TENSOR.ATTN_OUT,
922
+ MODEL_TENSOR.FFN_GATE,
923
+ MODEL_TENSOR.FFN_DOWN,
924
+ MODEL_TENSOR.FFN_UP,
925
+ ],
926
+ MODEL_ARCH.OPENELM: [
927
+ MODEL_TENSOR.TOKEN_EMBD,
928
+ MODEL_TENSOR.OUTPUT_NORM,
929
+ MODEL_TENSOR.ATTN_NORM,
930
+ MODEL_TENSOR.ATTN_QKV,
931
+ MODEL_TENSOR.ATTN_Q_NORM,
932
+ MODEL_TENSOR.ATTN_K_NORM,
933
+ MODEL_TENSOR.ATTN_OUT,
934
+ MODEL_TENSOR.FFN_NORM,
935
+ MODEL_TENSOR.FFN_GATE,
936
+ MODEL_TENSOR.FFN_DOWN,
937
+ MODEL_TENSOR.FFN_UP,
938
+ ],
939
+ MODEL_ARCH.ARCTIC: [
940
+ MODEL_TENSOR.TOKEN_EMBD,
941
+ MODEL_TENSOR.OUTPUT_NORM,
942
+ MODEL_TENSOR.OUTPUT,
943
+ MODEL_TENSOR.ROPE_FREQS,
944
+ MODEL_TENSOR.ATTN_NORM,
945
+ MODEL_TENSOR.ATTN_Q,
946
+ MODEL_TENSOR.ATTN_K,
947
+ MODEL_TENSOR.ATTN_V,
948
+ MODEL_TENSOR.ATTN_OUT,
949
+ MODEL_TENSOR.ATTN_ROT_EMBD,
950
+ MODEL_TENSOR.FFN_GATE_INP,
951
+ MODEL_TENSOR.FFN_NORM,
952
+ MODEL_TENSOR.FFN_GATE,
953
+ MODEL_TENSOR.FFN_DOWN,
954
+ MODEL_TENSOR.FFN_UP,
955
+ MODEL_TENSOR.FFN_NORM_EXP,
956
+ MODEL_TENSOR.FFN_GATE_EXP,
957
+ MODEL_TENSOR.FFN_DOWN_EXP,
958
+ MODEL_TENSOR.FFN_UP_EXP,
959
+ ],
960
+ MODEL_ARCH.DEEPSEEK2: [
961
+ MODEL_TENSOR.TOKEN_EMBD,
962
+ MODEL_TENSOR.OUTPUT_NORM,
963
+ MODEL_TENSOR.OUTPUT,
964
+ MODEL_TENSOR.ROPE_FREQS,
965
+ MODEL_TENSOR.ATTN_NORM,
966
+ MODEL_TENSOR.ATTN_Q,
967
+ MODEL_TENSOR.ATTN_Q_A,
968
+ MODEL_TENSOR.ATTN_Q_B,
969
+ MODEL_TENSOR.ATTN_KV_A_MQA,
970
+ MODEL_TENSOR.ATTN_KV_B,
971
+ MODEL_TENSOR.ATTN_Q_A_NORM,
972
+ MODEL_TENSOR.ATTN_KV_A_NORM,
973
+ MODEL_TENSOR.ATTN_OUT,
974
+ MODEL_TENSOR.ATTN_ROT_EMBD,
975
+ MODEL_TENSOR.FFN_GATE_INP,
976
+ MODEL_TENSOR.FFN_NORM,
977
+ MODEL_TENSOR.FFN_GATE,
978
+ MODEL_TENSOR.FFN_DOWN,
979
+ MODEL_TENSOR.FFN_UP,
980
+ MODEL_TENSOR.FFN_GATE_EXP,
981
+ MODEL_TENSOR.FFN_DOWN_EXP,
982
+ MODEL_TENSOR.FFN_UP_EXP,
983
+ MODEL_TENSOR.FFN_GATE_SHEXP,
984
+ MODEL_TENSOR.FFN_DOWN_SHEXP,
985
+ MODEL_TENSOR.FFN_UP_SHEXP,
986
+ ],
987
+ MODEL_ARCH.CHATGLM : [
988
+ MODEL_TENSOR.TOKEN_EMBD,
989
+ MODEL_TENSOR.ROPE_FREQS,
990
+ MODEL_TENSOR.OUTPUT_NORM,
991
+ MODEL_TENSOR.OUTPUT,
992
+ MODEL_TENSOR.ATTN_NORM,
993
+ MODEL_TENSOR.ATTN_QKV,
994
+ MODEL_TENSOR.ATTN_OUT,
995
+ MODEL_TENSOR.FFN_NORM,
996
+ MODEL_TENSOR.FFN_DOWN,
997
+ MODEL_TENSOR.FFN_UP,
998
+ ],
999
+ MODEL_ARCH.BITNET: [
1000
+ MODEL_TENSOR.ATTN_Q,
1001
+ MODEL_TENSOR.ATTN_K,
1002
+ MODEL_TENSOR.ATTN_V,
1003
+ MODEL_TENSOR.TOKEN_EMBD,
1004
+ MODEL_TENSOR.OUTPUT_NORM,
1005
+ MODEL_TENSOR.ATTN_NORM,
1006
+ MODEL_TENSOR.ATTN_OUT,
1007
+ MODEL_TENSOR.FFN_NORM,
1008
+ MODEL_TENSOR.FFN_GATE,
1009
+ MODEL_TENSOR.FFN_DOWN,
1010
+ MODEL_TENSOR.FFN_UP,
1011
+ MODEL_TENSOR.ATTN_SUB_NORM,
1012
+ MODEL_TENSOR.FFN_SUB_NORM,
1013
+ ],
1014
+ MODEL_ARCH.T5: [
1015
+ MODEL_TENSOR.TOKEN_EMBD,
1016
+ MODEL_TENSOR.OUTPUT,
1017
+ MODEL_TENSOR.DEC_ATTN_NORM,
1018
+ MODEL_TENSOR.DEC_ATTN_Q,
1019
+ MODEL_TENSOR.DEC_ATTN_K,
1020
+ MODEL_TENSOR.DEC_ATTN_V,
1021
+ MODEL_TENSOR.DEC_ATTN_OUT,
1022
+ MODEL_TENSOR.DEC_ATTN_REL_B,
1023
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM,
1024
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q,
1025
+ MODEL_TENSOR.DEC_CROSS_ATTN_K,
1026
+ MODEL_TENSOR.DEC_CROSS_ATTN_V,
1027
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT,
1028
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B,
1029
+ MODEL_TENSOR.DEC_FFN_NORM,
1030
+ MODEL_TENSOR.DEC_FFN_GATE,
1031
+ MODEL_TENSOR.DEC_FFN_DOWN,
1032
+ MODEL_TENSOR.DEC_FFN_UP,
1033
+ MODEL_TENSOR.DEC_OUTPUT_NORM,
1034
+ MODEL_TENSOR.ENC_ATTN_NORM,
1035
+ MODEL_TENSOR.ENC_ATTN_Q,
1036
+ MODEL_TENSOR.ENC_ATTN_K,
1037
+ MODEL_TENSOR.ENC_ATTN_V,
1038
+ MODEL_TENSOR.ENC_ATTN_OUT,
1039
+ MODEL_TENSOR.ENC_ATTN_REL_B,
1040
+ MODEL_TENSOR.ENC_FFN_NORM,
1041
+ MODEL_TENSOR.ENC_FFN_GATE,
1042
+ MODEL_TENSOR.ENC_FFN_DOWN,
1043
+ MODEL_TENSOR.ENC_FFN_UP,
1044
+ MODEL_TENSOR.ENC_OUTPUT_NORM,
1045
+ ],
1046
+ MODEL_ARCH.T5ENCODER: [
1047
+ MODEL_TENSOR.TOKEN_EMBD,
1048
+ MODEL_TENSOR.OUTPUT,
1049
+ MODEL_TENSOR.ENC_ATTN_NORM,
1050
+ MODEL_TENSOR.ENC_ATTN_Q,
1051
+ MODEL_TENSOR.ENC_ATTN_K,
1052
+ MODEL_TENSOR.ENC_ATTN_V,
1053
+ MODEL_TENSOR.ENC_ATTN_OUT,
1054
+ MODEL_TENSOR.ENC_ATTN_REL_B,
1055
+ MODEL_TENSOR.ENC_FFN_NORM,
1056
+ MODEL_TENSOR.ENC_FFN_GATE,
1057
+ MODEL_TENSOR.ENC_FFN_DOWN,
1058
+ MODEL_TENSOR.ENC_FFN_UP,
1059
+ MODEL_TENSOR.ENC_OUTPUT_NORM,
1060
+ ],
1061
+ MODEL_ARCH.JAIS: [
1062
+ MODEL_TENSOR.TOKEN_EMBD,
1063
+ MODEL_TENSOR.OUTPUT_NORM,
1064
+ MODEL_TENSOR.OUTPUT,
1065
+ MODEL_TENSOR.ATTN_NORM,
1066
+ MODEL_TENSOR.ATTN_QKV,
1067
+ MODEL_TENSOR.ATTN_OUT,
1068
+ MODEL_TENSOR.FFN_NORM,
1069
+ MODEL_TENSOR.FFN_DOWN,
1070
+ MODEL_TENSOR.FFN_GATE,
1071
+ MODEL_TENSOR.FFN_UP,
1072
+ ],
1073
+ MODEL_ARCH.NEMOTRON: [
1074
+ MODEL_TENSOR.TOKEN_EMBD,
1075
+ MODEL_TENSOR.OUTPUT_NORM,
1076
+ MODEL_TENSOR.OUTPUT,
1077
+ MODEL_TENSOR.ROPE_FREQS,
1078
+ MODEL_TENSOR.ATTN_NORM,
1079
+ MODEL_TENSOR.ATTN_Q,
1080
+ MODEL_TENSOR.ATTN_K,
1081
+ MODEL_TENSOR.ATTN_V,
1082
+ MODEL_TENSOR.ATTN_OUT,
1083
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1084
+ MODEL_TENSOR.FFN_NORM,
1085
+ MODEL_TENSOR.FFN_DOWN,
1086
+ MODEL_TENSOR.FFN_UP,
1087
+ ],
1088
+ MODEL_ARCH.EXAONE: [
1089
+ MODEL_TENSOR.TOKEN_EMBD,
1090
+ MODEL_TENSOR.OUTPUT_NORM,
1091
+ MODEL_TENSOR.OUTPUT,
1092
+ MODEL_TENSOR.ROPE_FREQS,
1093
+ MODEL_TENSOR.ATTN_NORM,
1094
+ MODEL_TENSOR.ATTN_Q,
1095
+ MODEL_TENSOR.ATTN_K,
1096
+ MODEL_TENSOR.ATTN_V,
1097
+ MODEL_TENSOR.ATTN_OUT,
1098
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1099
+ MODEL_TENSOR.FFN_NORM,
1100
+ MODEL_TENSOR.FFN_GATE,
1101
+ MODEL_TENSOR.FFN_DOWN,
1102
+ MODEL_TENSOR.FFN_UP,
1103
+ ],
1104
+ # TODO
1105
+ }
1106
+
1107
+ # tensors that will not be serialized
1108
+ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
1109
+ MODEL_ARCH.LLAMA: [
1110
+ MODEL_TENSOR.ROPE_FREQS,
1111
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1112
+ ],
1113
+ MODEL_ARCH.BAICHUAN: [
1114
+ MODEL_TENSOR.ROPE_FREQS,
1115
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1116
+ ],
1117
+ MODEL_ARCH.QWEN: [
1118
+ MODEL_TENSOR.ROPE_FREQS,
1119
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1120
+ ],
1121
+ MODEL_ARCH.CODESHELL: [
1122
+ MODEL_TENSOR.ROPE_FREQS,
1123
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1124
+ ],
1125
+ MODEL_ARCH.ORION: [
1126
+ MODEL_TENSOR.ROPE_FREQS,
1127
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1128
+ ],
1129
+ MODEL_ARCH.STARCODER2: [
1130
+ MODEL_TENSOR.ROPE_FREQS,
1131
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1132
+ ],
1133
+ MODEL_ARCH.XVERSE: [
1134
+ MODEL_TENSOR.ROPE_FREQS,
1135
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1136
+ ],
1137
+ MODEL_ARCH.DEEPSEEK2: [
1138
+ MODEL_TENSOR.ROPE_FREQS,
1139
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1140
+ ],
1141
+ MODEL_ARCH.CHATGLM: [
1142
+ MODEL_TENSOR.ROPE_FREQS,
1143
+ ],
1144
+ MODEL_ARCH.NEMOTRON: [
1145
+ MODEL_TENSOR.ROPE_FREQS,
1146
+ MODEL_TENSOR.ATTN_ROT_EMBD,
1147
+ ],
1148
+ }
1149
+
1150
+ #
1151
+ # types
1152
+ #
1153
+
1154
+
1155
+ class TokenType(IntEnum):
1156
+ NORMAL = 1
1157
+ UNKNOWN = 2
1158
+ CONTROL = 3
1159
+ USER_DEFINED = 4
1160
+ UNUSED = 5
1161
+ BYTE = 6
1162
+
1163
+
1164
+ class RopeScalingType(Enum):
1165
+ NONE = 'none'
1166
+ LINEAR = 'linear'
1167
+ YARN = 'yarn'
1168
+
1169
+
1170
+ class PoolingType(IntEnum):
1171
+ NONE = 0
1172
+ MEAN = 1
1173
+ CLS = 2
1174
+
1175
+
1176
+ class GGMLQuantizationType(IntEnum):
1177
+ F32 = 0
1178
+ F16 = 1
1179
+ Q4_0 = 2
1180
+ Q4_1 = 3
1181
+ Q5_0 = 6
1182
+ Q5_1 = 7
1183
+ Q8_0 = 8
1184
+ Q8_1 = 9
1185
+ Q2_K = 10
1186
+ Q3_K = 11
1187
+ Q4_K = 12
1188
+ Q5_K = 13
1189
+ Q6_K = 14
1190
+ Q8_K = 15
1191
+ IQ2_XXS = 16
1192
+ IQ2_XS = 17
1193
+ IQ3_XXS = 18
1194
+ IQ1_S = 19
1195
+ IQ4_NL = 20
1196
+ IQ3_S = 21
1197
+ IQ2_S = 22
1198
+ IQ4_XS = 23
1199
+ I8 = 24
1200
+ I16 = 25
1201
+ I32 = 26
1202
+ I64 = 27
1203
+ F64 = 28
1204
+ IQ1_M = 29
1205
+ BF16 = 30
1206
+ Q4_0_4_4 = 31
1207
+ Q4_0_4_8 = 32
1208
+ Q4_0_8_8 = 33
1209
+
1210
+
1211
+ # TODO: add GGMLFileType from ggml_ftype in ggml.h
1212
+
1213
+
1214
+ # from llama_ftype in llama.h
1215
+ # ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE.
1216
+ class LlamaFileType(IntEnum):
1217
+ ALL_F32 = 0
1218
+ MOSTLY_F16 = 1 # except 1d tensors
1219
+ MOSTLY_Q4_0 = 2 # except 1d tensors
1220
+ MOSTLY_Q4_1 = 3 # except 1d tensors
1221
+ # MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
1222
+ # MOSTLY_Q4_2 = 5 # support has been removed
1223
+ # MOSTLY_Q4_3 = 6 # support has been removed
1224
+ MOSTLY_Q8_0 = 7 # except 1d tensors
1225
+ MOSTLY_Q5_0 = 8 # except 1d tensors
1226
+ MOSTLY_Q5_1 = 9 # except 1d tensors
1227
+ MOSTLY_Q2_K = 10 # except 1d tensors
1228
+ MOSTLY_Q3_K_S = 11 # except 1d tensors
1229
+ MOSTLY_Q3_K_M = 12 # except 1d tensors
1230
+ MOSTLY_Q3_K_L = 13 # except 1d tensors
1231
+ MOSTLY_Q4_K_S = 14 # except 1d tensors
1232
+ MOSTLY_Q4_K_M = 15 # except 1d tensors
1233
+ MOSTLY_Q5_K_S = 16 # except 1d tensors
1234
+ MOSTLY_Q5_K_M = 17 # except 1d tensors
1235
+ MOSTLY_Q6_K = 18 # except 1d tensors
1236
+ MOSTLY_IQ2_XXS = 19 # except 1d tensors
1237
+ MOSTLY_IQ2_XS = 20 # except 1d tensors
1238
+ MOSTLY_Q2_K_S = 21 # except 1d tensors
1239
+ MOSTLY_IQ3_XS = 22 # except 1d tensors
1240
+ MOSTLY_IQ3_XXS = 23 # except 1d tensors
1241
+ MOSTLY_IQ1_S = 24 # except 1d tensors
1242
+ MOSTLY_IQ4_NL = 25 # except 1d tensors
1243
+ MOSTLY_IQ3_S = 26 # except 1d tensors
1244
+ MOSTLY_IQ3_M = 27 # except 1d tensors
1245
+ MOSTLY_IQ2_S = 28 # except 1d tensors
1246
+ MOSTLY_IQ2_M = 29 # except 1d tensors
1247
+ MOSTLY_IQ4_XS = 30 # except 1d tensors
1248
+ MOSTLY_IQ1_M = 31 # except 1d tensors
1249
+ MOSTLY_BF16 = 32 # except 1d tensors
1250
+ MOSTLY_Q4_0_4_4 = 33 # except 1d tensors
1251
+ MOSTLY_Q4_0_4_8 = 34 # except 1d tensors
1252
+ MOSTLY_Q4_0_8_8 = 35 # except 1d tensors
1253
+
1254
+ GUESSED = 1024 # not specified in the model file
1255
+
1256
+
1257
+ class GGUFEndian(IntEnum):
1258
+ LITTLE = 0
1259
+ BIG = 1
1260
+
1261
+
1262
+ class GGUFValueType(IntEnum):
1263
+ UINT8 = 0
1264
+ INT8 = 1
1265
+ UINT16 = 2
1266
+ INT16 = 3
1267
+ UINT32 = 4
1268
+ INT32 = 5
1269
+ FLOAT32 = 6
1270
+ BOOL = 7
1271
+ STRING = 8
1272
+ ARRAY = 9
1273
+ UINT64 = 10
1274
+ INT64 = 11
1275
+ FLOAT64 = 12
1276
+
1277
+ @staticmethod
1278
+ def get_type(val: Any) -> GGUFValueType:
1279
+ if isinstance(val, (str, bytes, bytearray)):
1280
+ return GGUFValueType.STRING
1281
+ elif isinstance(val, list):
1282
+ return GGUFValueType.ARRAY
1283
+ elif isinstance(val, float):
1284
+ return GGUFValueType.FLOAT32
1285
+ elif isinstance(val, bool):
1286
+ return GGUFValueType.BOOL
1287
+ elif isinstance(val, int):
1288
+ return GGUFValueType.INT32
1289
+ # TODO: need help with 64-bit types in Python
1290
+ else:
1291
+ raise ValueError(f"Unknown type: {type(val)}")
1292
+
1293
+
1294
+ # Items here are (block size, type size)
1295
+ QK_K = 256
1296
+ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
1297
+ GGMLQuantizationType.F32: (1, 4),
1298
+ GGMLQuantizationType.F16: (1, 2),
1299
+ GGMLQuantizationType.Q4_0: (32, 2 + 16),
1300
+ GGMLQuantizationType.Q4_1: (32, 2 + 2 + 16),
1301
+ GGMLQuantizationType.Q5_0: (32, 2 + 4 + 16),
1302
+ GGMLQuantizationType.Q5_1: (32, 2 + 2 + 4 + 16),
1303
+ GGMLQuantizationType.Q8_0: (32, 2 + 32),
1304
+ GGMLQuantizationType.Q8_1: (32, 4 + 4 + 32),
1305
+ GGMLQuantizationType.Q2_K: (256, 2 + 2 + QK_K // 16 + QK_K // 4),
1306
+ GGMLQuantizationType.Q3_K: (256, 2 + QK_K // 4 + QK_K // 8 + 12),
1307
+ GGMLQuantizationType.Q4_K: (256, 2 + 2 + QK_K // 2 + 12),
1308
+ GGMLQuantizationType.Q5_K: (256, 2 + 2 + QK_K // 2 + QK_K // 8 + 12),
1309
+ GGMLQuantizationType.Q6_K: (256, 2 + QK_K // 2 + QK_K // 4 + QK_K // 16),
1310
+ GGMLQuantizationType.Q8_K: (256, 4 + QK_K + QK_K // 8),
1311
+ GGMLQuantizationType.IQ2_XXS: (256, 2 + QK_K // 4),
1312
+ GGMLQuantizationType.IQ2_XS: (256, 2 + QK_K // 4 + QK_K // 32),
1313
+ GGMLQuantizationType.IQ3_XXS: (256, 2 + QK_K // 4 + QK_K // 8),
1314
+ GGMLQuantizationType.IQ1_S: (256, 2 + QK_K // 8 + QK_K // 16),
1315
+ GGMLQuantizationType.IQ4_NL: (32, 2 + 16),
1316
+ GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
1317
+ GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
1318
+ GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
1319
+ GGMLQuantizationType.I8: (1, 1),
1320
+ GGMLQuantizationType.I16: (1, 2),
1321
+ GGMLQuantizationType.I32: (1, 4),
1322
+ GGMLQuantizationType.I64: (1, 8),
1323
+ GGMLQuantizationType.F64: (1, 8),
1324
+ GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
1325
+ GGMLQuantizationType.BF16: (1, 2),
1326
+ GGMLQuantizationType.Q4_0_4_4:(32, 2 + 16),
1327
+ GGMLQuantizationType.Q4_0_4_8:(32, 2 + 16),
1328
+ GGMLQuantizationType.Q4_0_8_8:(32, 2 + 16),
1329
+ }
1330
+
1331
+
1332
+ # Aliases for backward compatibility.
1333
+
1334
+ # general
1335
+ KEY_GENERAL_ARCHITECTURE = Keys.General.ARCHITECTURE
1336
+ KEY_GENERAL_QUANTIZATION_VERSION = Keys.General.QUANTIZATION_VERSION
1337
+ KEY_GENERAL_ALIGNMENT = Keys.General.ALIGNMENT
1338
+ KEY_GENERAL_NAME = Keys.General.NAME
1339
+ KEY_GENERAL_AUTHOR = Keys.General.AUTHOR
1340
+ KEY_GENERAL_URL = Keys.General.URL
1341
+ KEY_GENERAL_DESCRIPTION = Keys.General.DESCRIPTION
1342
+ KEY_GENERAL_LICENSE = Keys.General.LICENSE
1343
+ KEY_GENERAL_SOURCE_URL = Keys.General.SOURCE_URL
1344
+ KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
1345
+
1346
+ # LLM
1347
+ KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE
1348
+ KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH
1349
+ KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH
1350
+ KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT
1351
+ KEY_FEED_FORWARD_LENGTH = Keys.LLM.FEED_FORWARD_LENGTH
1352
+ KEY_USE_PARALLEL_RESIDUAL = Keys.LLM.USE_PARALLEL_RESIDUAL
1353
+ KEY_TENSOR_DATA_LAYOUT = Keys.LLM.TENSOR_DATA_LAYOUT
1354
+
1355
+ # attention
1356
+ KEY_ATTENTION_HEAD_COUNT = Keys.Attention.HEAD_COUNT
1357
+ KEY_ATTENTION_HEAD_COUNT_KV = Keys.Attention.HEAD_COUNT_KV
1358
+ KEY_ATTENTION_MAX_ALIBI_BIAS = Keys.Attention.MAX_ALIBI_BIAS
1359
+ KEY_ATTENTION_CLAMP_KQV = Keys.Attention.CLAMP_KQV
1360
+ KEY_ATTENTION_LAYERNORM_EPS = Keys.Attention.LAYERNORM_EPS
1361
+ KEY_ATTENTION_LAYERNORM_RMS_EPS = Keys.Attention.LAYERNORM_RMS_EPS
1362
+
1363
+ # RoPE
1364
+ KEY_ROPE_DIMENSION_COUNT = Keys.Rope.DIMENSION_COUNT
1365
+ KEY_ROPE_FREQ_BASE = Keys.Rope.FREQ_BASE
1366
+ KEY_ROPE_SCALING_TYPE = Keys.Rope.SCALING_TYPE
1367
+ KEY_ROPE_SCALING_FACTOR = Keys.Rope.SCALING_FACTOR
1368
+ KEY_ROPE_SCALING_ORIG_CTX_LEN = Keys.Rope.SCALING_ORIG_CTX_LEN
1369
+ KEY_ROPE_SCALING_FINETUNED = Keys.Rope.SCALING_FINETUNED
1370
+
1371
+ # SSM
1372
+ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
1373
+ KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
1374
+ KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
1375
+ KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
1376
+ KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
1377
+
1378
+ # tokenization
1379
+ KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
1380
+ KEY_TOKENIZER_PRE = Keys.Tokenizer.PRE
1381
+ KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
1382
+ KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE
1383
+ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
1384
+ KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
1385
+ KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
1386
+ KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
1387
+ KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
1388
+ KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
1389
+ KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
1390
+ KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
1391
+ KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
1392
+ KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
1393
+ KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
1394
+ KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
1395
+ KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
1396
+ KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
1397
+ KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
1398
+ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
.venv/lib/python3.11/site-packages/gguf/gguf.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file left for compatibility. If you want to use the GGUF API from Python
2
+ # then don't import gguf/gguf.py directly. If you're looking for examples, see the
3
+ # examples/ directory for gguf-py
4
+
5
+ import importlib
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ sys.path.insert(0, str(Path(__file__).parent.parent))
10
+
11
+ # Compatibility for people trying to import gguf/gguf.py directly instead of as a package.
12
+ importlib.invalidate_caches()
13
+ import gguf # noqa: E402
14
+
15
+ importlib.reload(gguf)
.venv/lib/python3.11/site-packages/gguf/gguf_reader.py ADDED
@@ -0,0 +1,317 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # GGUF file reading/modification support. For API usage information,
3
+ # please see the files scripts/ for some fairly simple examples.
4
+ #
5
+ from __future__ import annotations
6
+
7
+ import logging
8
+ import os
9
+ from collections import OrderedDict
10
+ from typing import Any, Literal, NamedTuple, TypeVar, Union
11
+
12
+ import numpy as np
13
+ import numpy.typing as npt
14
+
15
+ from .quants import quant_shape_to_byte_shape
16
+
17
+ if __name__ == "__main__":
18
+ import sys
19
+ from pathlib import Path
20
+
21
+ # Allow running file in package as a script.
22
+ sys.path.insert(0, str(Path(__file__).parent.parent))
23
+
24
+ from gguf.constants import (
25
+ GGML_QUANT_SIZES,
26
+ GGUF_DEFAULT_ALIGNMENT,
27
+ GGUF_MAGIC,
28
+ GGUF_VERSION,
29
+ GGMLQuantizationType,
30
+ GGUFValueType,
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
36
+
37
+
38
+ class ReaderField(NamedTuple):
39
+ # Offset to start of this field.
40
+ offset: int
41
+
42
+ # Name of the field (not necessarily from file data).
43
+ name: str
44
+
45
+ # Data parts. Some types have multiple components, such as strings
46
+ # that consist of a length followed by the string data.
47
+ parts: list[npt.NDArray[Any]] = []
48
+
49
+ # Indexes into parts that we can call the actual data. For example
50
+ # an array of strings will be populated with indexes to the actual
51
+ # string data.
52
+ data: list[int] = [-1]
53
+
54
+ types: list[GGUFValueType] = []
55
+
56
+
57
+ class ReaderTensor(NamedTuple):
58
+ name: str
59
+ tensor_type: GGMLQuantizationType
60
+ shape: npt.NDArray[np.uint32]
61
+ n_elements: int
62
+ n_bytes: int
63
+ data_offset: int
64
+ data: npt.NDArray[Any]
65
+ field: ReaderField
66
+
67
+
68
+ class GGUFReader:
69
+ # I - same as host, S - swapped
70
+ byte_order: Literal['I', 'S'] = 'I'
71
+ alignment: int = GGUF_DEFAULT_ALIGNMENT
72
+ data_offset: int
73
+
74
+ # Note: Internal helper, API may change.
75
+ gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = {
76
+ GGUFValueType.UINT8: np.uint8,
77
+ GGUFValueType.INT8: np.int8,
78
+ GGUFValueType.UINT16: np.uint16,
79
+ GGUFValueType.INT16: np.int16,
80
+ GGUFValueType.UINT32: np.uint32,
81
+ GGUFValueType.INT32: np.int32,
82
+ GGUFValueType.FLOAT32: np.float32,
83
+ GGUFValueType.UINT64: np.uint64,
84
+ GGUFValueType.INT64: np.int64,
85
+ GGUFValueType.FLOAT64: np.float64,
86
+ GGUFValueType.BOOL: np.bool_,
87
+ }
88
+
89
+ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'):
90
+ self.data = np.memmap(path, mode = mode)
91
+ offs = 0
92
+
93
+ # Check for GGUF magic
94
+ if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
95
+ raise ValueError('GGUF magic invalid')
96
+ offs += 4
97
+
98
+ # Check GGUF version
99
+ temp_version = self._get(offs, np.uint32)
100
+ if temp_version[0] & 65535 == 0:
101
+ # If we get 0 here that means it's (probably) a GGUF file created for
102
+ # the opposite byte order of the machine this script is running on.
103
+ self.byte_order = 'S'
104
+ temp_version = temp_version.newbyteorder(self.byte_order)
105
+ version = temp_version[0]
106
+ if version not in READER_SUPPORTED_VERSIONS:
107
+ raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
108
+ self.fields: OrderedDict[str, ReaderField] = OrderedDict()
109
+ self.tensors: list[ReaderTensor] = []
110
+ offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
111
+
112
+ # Check tensor count and kv count
113
+ temp_counts = self._get(offs, np.uint64, 2)
114
+ offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
115
+ offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
116
+ tensor_count, kv_count = temp_counts
117
+ offs = self._build_fields(offs, kv_count)
118
+
119
+ # Build Tensor Info Fields
120
+ offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
121
+ new_align = self.fields.get('general.alignment')
122
+ if new_align is not None:
123
+ if new_align.types != [GGUFValueType.UINT32]:
124
+ raise ValueError('Bad type for general.alignment field')
125
+ self.alignment = new_align.parts[-1][0]
126
+ padding = offs % self.alignment
127
+ if padding != 0:
128
+ offs += self.alignment - padding
129
+ self.data_offset = offs
130
+ self._build_tensors(offs, tensors_fields)
131
+
132
+ _DT = TypeVar('_DT', bound = npt.DTypeLike)
133
+
134
+ # Fetch a key/value metadata field by key.
135
+ def get_field(self, key: str) -> Union[ReaderField, None]:
136
+ return self.fields.get(key, None)
137
+
138
+ # Fetch a tensor from the list by index.
139
+ def get_tensor(self, idx: int) -> ReaderTensor:
140
+ return self.tensors[idx]
141
+
142
+ def _get(
143
+ self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
144
+ ) -> npt.NDArray[Any]:
145
+ count = int(count)
146
+ itemsize = int(np.empty([], dtype = dtype).itemsize)
147
+ end_offs = offset + itemsize * count
148
+ return (
149
+ self.data[offset:end_offs]
150
+ .view(dtype = dtype)[:count]
151
+ .newbyteorder(override_order or self.byte_order)
152
+ )
153
+
154
+ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
155
+ if field.name in self.fields:
156
+ # TODO: add option to generate error on duplicate keys
157
+ # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
158
+
159
+ logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
160
+ self.fields[field.name + '_{}'.format(field.offset)] = field
161
+ else:
162
+ self.fields[field.name] = field
163
+ return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
164
+
165
+ def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
166
+ slen = self._get(offset, np.uint64)
167
+ return slen, self._get(offset + 8, np.uint8, slen[0])
168
+
169
+ def _get_field_parts(
170
+ self, orig_offs: int, raw_type: int,
171
+ ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
172
+ offs = orig_offs
173
+ types: list[GGUFValueType] = []
174
+ gtype = GGUFValueType(raw_type)
175
+ types.append(gtype)
176
+ # Handle strings.
177
+ if gtype == GGUFValueType.STRING:
178
+ sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
179
+ size = sum(int(part.nbytes) for part in sparts)
180
+ return size, sparts, [1], types
181
+ # Check if it's a simple scalar type.
182
+ nptype = self.gguf_scalar_to_np.get(gtype)
183
+ if nptype is not None:
184
+ val = self._get(offs, nptype)
185
+ return int(val.nbytes), [val], [0], types
186
+ # Handle arrays.
187
+ if gtype == GGUFValueType.ARRAY:
188
+ raw_itype = self._get(offs, np.uint32)
189
+ offs += int(raw_itype.nbytes)
190
+ alen = self._get(offs, np.uint64)
191
+ offs += int(alen.nbytes)
192
+ aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
193
+ data_idxs: list[int] = []
194
+ for idx in range(alen[0]):
195
+ curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
196
+ if idx == 0:
197
+ types += curr_types
198
+ idxs_offs = len(aparts)
199
+ aparts += curr_parts
200
+ data_idxs += (idx + idxs_offs for idx in curr_idxs)
201
+ offs += curr_size
202
+ return offs - orig_offs, aparts, data_idxs, types
203
+ # We can't deal with this one.
204
+ raise ValueError('Unknown/unhandled field type {gtype}')
205
+
206
+ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
207
+ offs = orig_offs
208
+
209
+ # Get Tensor Name
210
+ name_len, name_data = self._get_str(offs)
211
+ offs += int(name_len.nbytes + name_data.nbytes)
212
+
213
+ # Get Tensor Dimensions Count
214
+ n_dims = self._get(offs, np.uint32)
215
+ offs += int(n_dims.nbytes)
216
+
217
+ # Get Tensor Dimension Array
218
+ dims = self._get(offs, np.uint64, n_dims[0])
219
+ offs += int(dims.nbytes)
220
+
221
+ # Get Tensor Encoding Scheme Type
222
+ raw_dtype = self._get(offs, np.uint32)
223
+ offs += int(raw_dtype.nbytes)
224
+
225
+ # Get Tensor Offset
226
+ offset_tensor = self._get(offs, np.uint64)
227
+ offs += int(offset_tensor.nbytes)
228
+
229
+ return ReaderField(
230
+ orig_offs,
231
+ str(bytes(name_data), encoding = 'utf-8'),
232
+ [name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
233
+ [1, 3, 4, 5],
234
+ )
235
+
236
+ def _build_fields(self, offs: int, count: int) -> int:
237
+ for _ in range(count):
238
+ orig_offs = offs
239
+ kv_klen, kv_kdata = self._get_str(offs)
240
+ offs += int(kv_klen.nbytes + kv_kdata.nbytes)
241
+ raw_kv_type = self._get(offs, np.uint32)
242
+ offs += int(raw_kv_type.nbytes)
243
+ parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
244
+ idxs_offs = len(parts)
245
+ field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
246
+ parts += field_parts
247
+ self._push_field(ReaderField(
248
+ orig_offs,
249
+ str(bytes(kv_kdata), encoding = 'utf-8'),
250
+ parts,
251
+ [idx + idxs_offs for idx in field_idxs],
252
+ field_types,
253
+ ), skip_sum = True)
254
+ offs += field_size
255
+ return offs
256
+
257
+ def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
258
+ tensor_fields = []
259
+ for _ in range(count):
260
+ field = self._get_tensor_info_field(offs)
261
+ offs += sum(int(part.nbytes) for part in field.parts)
262
+ tensor_fields.append(field)
263
+ return offs, tensor_fields
264
+
265
+ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
266
+ tensors = []
267
+ tensor_names = set() # keep track of name to prevent duplicated tensors
268
+ for field in fields:
269
+ _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
270
+ # check if there's any tensor having same name already in the list
271
+ tensor_name = str(bytes(name_data), encoding = 'utf-8')
272
+ if tensor_name in tensor_names:
273
+ raise ValueError(f'Found duplicated tensor with name {tensor_name}')
274
+ tensor_names.add(tensor_name)
275
+ ggml_type = GGMLQuantizationType(raw_dtype[0])
276
+ n_elems = int(np.prod(dims))
277
+ np_dims = tuple(reversed(dims.tolist()))
278
+ block_size, type_size = GGML_QUANT_SIZES[ggml_type]
279
+ n_bytes = n_elems * type_size // block_size
280
+ data_offs = int(start_offs + offset_tensor[0])
281
+ item_type: npt.DTypeLike
282
+ if ggml_type == GGMLQuantizationType.F16:
283
+ item_count = n_elems
284
+ item_type = np.float16
285
+ elif ggml_type == GGMLQuantizationType.F32:
286
+ item_count = n_elems
287
+ item_type = np.float32
288
+ elif ggml_type == GGMLQuantizationType.F64:
289
+ item_count = n_elems
290
+ item_type = np.float64
291
+ elif ggml_type == GGMLQuantizationType.I8:
292
+ item_count = n_elems
293
+ item_type = np.int8
294
+ elif ggml_type == GGMLQuantizationType.I16:
295
+ item_count = n_elems
296
+ item_type = np.int16
297
+ elif ggml_type == GGMLQuantizationType.I32:
298
+ item_count = n_elems
299
+ item_type = np.int32
300
+ elif ggml_type == GGMLQuantizationType.I64:
301
+ item_count = n_elems
302
+ item_type = np.int64
303
+ else:
304
+ item_count = n_bytes
305
+ item_type = np.uint8
306
+ np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
307
+ tensors.append(ReaderTensor(
308
+ name = tensor_name,
309
+ tensor_type = ggml_type,
310
+ shape = dims,
311
+ n_elements = n_elems,
312
+ n_bytes = n_bytes,
313
+ data_offset = data_offs,
314
+ data = self._get(data_offs, item_type, item_count).reshape(np_dims),
315
+ field = field,
316
+ ))
317
+ self.tensors = tensors
.venv/lib/python3.11/site-packages/gguf/gguf_writer.py ADDED
@@ -0,0 +1,888 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import shutil
6
+ import struct
7
+ import tempfile
8
+ from dataclasses import dataclass
9
+ from enum import Enum, auto
10
+ from math import prod
11
+ from pathlib import Path
12
+ from io import BufferedWriter
13
+ from typing import IO, Any, Sequence, Mapping
14
+ from string import ascii_letters, digits
15
+
16
+ import numpy as np
17
+
18
+ from .constants import (
19
+ GGUF_DEFAULT_ALIGNMENT,
20
+ GGUF_MAGIC,
21
+ GGUF_VERSION,
22
+ GGMLQuantizationType,
23
+ GGUFEndian,
24
+ GGUFValueType,
25
+ Keys,
26
+ RopeScalingType,
27
+ PoolingType,
28
+ TokenType,
29
+ )
30
+
31
+ from .quants import quant_shape_from_byte_shape
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf"
37
+
38
+
39
+ @dataclass
40
+ class TensorInfo:
41
+ shape: Sequence[int]
42
+ dtype: GGMLQuantizationType
43
+ nbytes: int
44
+ tensor: np.ndarray[Any, Any] | None = None
45
+
46
+
47
+ @dataclass
48
+ class GGUFValue:
49
+ value: Any
50
+ type: GGUFValueType
51
+
52
+
53
+ class WriterState(Enum):
54
+ NO_FILE = auto()
55
+ EMPTY = auto()
56
+ HEADER = auto()
57
+ KV_DATA = auto()
58
+ TI_DATA = auto()
59
+ WEIGHTS = auto()
60
+
61
+
62
+ class GGUFWriter:
63
+ fout: list[BufferedWriter] | None
64
+ path: Path | None
65
+ temp_file: tempfile.SpooledTemporaryFile[bytes] | None
66
+ tensors: list[dict[str, TensorInfo]]
67
+ kv_data: list[dict[str, GGUFValue]]
68
+ state: WriterState
69
+ _simple_value_packing = {
70
+ GGUFValueType.UINT8: "B",
71
+ GGUFValueType.INT8: "b",
72
+ GGUFValueType.UINT16: "H",
73
+ GGUFValueType.INT16: "h",
74
+ GGUFValueType.UINT32: "I",
75
+ GGUFValueType.INT32: "i",
76
+ GGUFValueType.FLOAT32: "f",
77
+ GGUFValueType.UINT64: "Q",
78
+ GGUFValueType.INT64: "q",
79
+ GGUFValueType.FLOAT64: "d",
80
+ GGUFValueType.BOOL: "?",
81
+ }
82
+
83
+ def __init__(
84
+ self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
85
+ split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
86
+ ):
87
+ self.fout = None
88
+ self.path = Path(path) if path else None
89
+ self.arch = arch
90
+ self.endianess = endianess
91
+ self.data_alignment = GGUF_DEFAULT_ALIGNMENT
92
+ self.use_temp_file = use_temp_file
93
+ self.temp_file = None
94
+ self.tensors = [{}]
95
+ self.kv_data = [{}]
96
+ self.split_max_tensors = split_max_tensors
97
+ self.split_max_size = split_max_size
98
+ self.dry_run = dry_run
99
+ self.small_first_shard = small_first_shard
100
+ logger.info("gguf: This GGUF file is for {0} Endian only".format(
101
+ "Big" if self.endianess == GGUFEndian.BIG else "Little",
102
+ ))
103
+ self.state = WriterState.NO_FILE
104
+
105
+ if self.small_first_shard:
106
+ self.tensors.append({})
107
+
108
+ self.add_architecture()
109
+
110
+ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
111
+ total_params = 0
112
+ shared_params = 0
113
+ expert_params = 0
114
+
115
+ expert_sum = 0
116
+ n_expert_tensors = 0
117
+
118
+ last_lora_a: tuple[str, TensorInfo] | None = None
119
+
120
+ for tensors in self.tensors:
121
+ for name, info in tensors.items():
122
+
123
+ shape = info.shape
124
+
125
+ if name.endswith(".lora_a"):
126
+ last_lora_a = (name, info)
127
+ continue
128
+ elif name.endswith(".lora_b"):
129
+ if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
130
+ # Bail when the LoRA pair can't be found trivially
131
+ logger.warning("can't measure LoRA size correctly, tensor order is unusual")
132
+ return 0, 0, 0, 0
133
+ else:
134
+ shape = (*shape[:-1], last_lora_a[1].shape[-1])
135
+
136
+ size = prod(shape)
137
+
138
+ if "_exps." in name:
139
+ expert_params += (size // shape[-3])
140
+ expert_sum += shape[-3]
141
+ n_expert_tensors += 1
142
+ else:
143
+ shared_params += size
144
+
145
+ total_params += size
146
+
147
+ # Hopefully this should work even for variable-expert-count models
148
+ expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0
149
+
150
+ # Negate the total to signal it's likely not exact
151
+ if last_lora_a is not None:
152
+ total_params = -total_params
153
+
154
+ # NOTE: keep the output in the same order as accepted by 'size_label' in gguf-py/gguf/utility.py
155
+ return total_params, shared_params, expert_params, expert_count
156
+
157
+ def format_shard_names(self, path: Path) -> list[Path]:
158
+ if len(self.tensors) == 1:
159
+ return [path]
160
+ return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))]
161
+
162
+ def open_output_file(self, path: Path | None = None) -> None:
163
+ if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
164
+ # allow calling this multiple times as long as the path is the same
165
+ return
166
+
167
+ if self.state is not WriterState.NO_FILE:
168
+ raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
169
+
170
+ if path is not None:
171
+ self.path = path
172
+
173
+ if self.path is not None:
174
+ filenames = self.print_plan()
175
+ self.fout = [open(filename, "wb") for filename in filenames]
176
+ self.state = WriterState.EMPTY
177
+
178
+ def print_plan(self) -> list[Path]:
179
+ logger.info("Writing the following files:")
180
+ assert self.path is not None
181
+ filenames = self.format_shard_names(self.path)
182
+ assert len(filenames) == len(self.tensors)
183
+ for name, tensors in zip(filenames, self.tensors):
184
+ logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}")
185
+
186
+ if self.dry_run:
187
+ logger.info("Dry run, not writing files")
188
+ for name in filenames:
189
+ print(name) # noqa: NP100
190
+ exit()
191
+
192
+ return filenames
193
+
194
+ def add_shard_kv_data(self) -> None:
195
+ if len(self.tensors) == 1:
196
+ return
197
+
198
+ total_tensors = sum(len(t) for t in self.tensors)
199
+ assert self.fout is not None
200
+ total_splits = len(self.fout)
201
+ self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits))
202
+ for i, kv_data in enumerate(self.kv_data):
203
+ kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
204
+ kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16)
205
+ kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32)
206
+
207
+ def write_header_to_file(self, path: Path | None = None) -> None:
208
+ if len(self.tensors) == 1 and (self.split_max_tensors != 0 or self.split_max_size != 0):
209
+ logger.warning("Model fails split requirements, not splitting")
210
+
211
+ self.open_output_file(path)
212
+
213
+ if self.state is not WriterState.EMPTY:
214
+ raise ValueError(f'Expected output file to be empty, got {self.state}')
215
+
216
+ assert self.fout is not None
217
+ assert len(self.fout) == len(self.tensors)
218
+ assert len(self.kv_data) == 1
219
+
220
+ self.add_shard_kv_data()
221
+
222
+ for fout, tensors, kv_data in zip(self.fout, self.tensors, self.kv_data):
223
+ fout.write(self._pack("<I", GGUF_MAGIC, skip_pack_prefix = True))
224
+ fout.write(self._pack("I", GGUF_VERSION))
225
+ fout.write(self._pack("Q", len(tensors)))
226
+ fout.write(self._pack("Q", len(kv_data)))
227
+ fout.flush()
228
+ self.state = WriterState.HEADER
229
+
230
+ def write_kv_data_to_file(self) -> None:
231
+ if self.state is not WriterState.HEADER:
232
+ raise ValueError(f'Expected output file to contain the header, got {self.state}')
233
+ assert self.fout is not None
234
+
235
+ for fout, kv_data in zip(self.fout, self.kv_data):
236
+ kv_bytes = bytearray()
237
+
238
+ for key, val in kv_data.items():
239
+ kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
240
+ kv_bytes += self._pack_val(val.value, val.type, add_vtype=True)
241
+
242
+ fout.write(kv_bytes)
243
+
244
+ self.flush()
245
+ self.state = WriterState.KV_DATA
246
+
247
+ def write_ti_data_to_file(self) -> None:
248
+ if self.state is not WriterState.KV_DATA:
249
+ raise ValueError(f'Expected output file to contain KV data, got {self.state}')
250
+ assert self.fout is not None
251
+
252
+ for fout, tensors in zip(self.fout, self.tensors):
253
+ ti_data = bytearray()
254
+ offset_tensor = 0
255
+
256
+ for name, ti in tensors.items():
257
+ ti_data += self._pack_val(name, GGUFValueType.STRING, add_vtype=False)
258
+ n_dims = len(ti.shape)
259
+ ti_data += self._pack("I", n_dims)
260
+ for j in range(n_dims):
261
+ ti_data += self._pack("Q", ti.shape[n_dims - 1 - j])
262
+ ti_data += self._pack("I", ti.dtype)
263
+ ti_data += self._pack("Q", offset_tensor)
264
+ offset_tensor += GGUFWriter.ggml_pad(ti.nbytes, self.data_alignment)
265
+
266
+ fout.write(ti_data)
267
+ fout.flush()
268
+ self.state = WriterState.TI_DATA
269
+
270
+ def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
271
+ if any(key in kv_data for kv_data in self.kv_data):
272
+ raise ValueError(f'Duplicated key name {key!r}')
273
+
274
+ self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
275
+
276
+ def add_uint8(self, key: str, val: int) -> None:
277
+ self.add_key_value(key,val, GGUFValueType.UINT8)
278
+
279
+ def add_int8(self, key: str, val: int) -> None:
280
+ self.add_key_value(key, val, GGUFValueType.INT8)
281
+
282
+ def add_uint16(self, key: str, val: int) -> None:
283
+ self.add_key_value(key, val, GGUFValueType.UINT16)
284
+
285
+ def add_int16(self, key: str, val: int) -> None:
286
+ self.add_key_value(key, val, GGUFValueType.INT16)
287
+
288
+ def add_uint32(self, key: str, val: int) -> None:
289
+ self.add_key_value(key, val, GGUFValueType.UINT32)
290
+
291
+ def add_int32(self, key: str, val: int) -> None:
292
+ self.add_key_value(key, val, GGUFValueType.INT32)
293
+
294
+ def add_float32(self, key: str, val: float) -> None:
295
+ self.add_key_value(key, val, GGUFValueType.FLOAT32)
296
+
297
+ def add_uint64(self, key: str, val: int) -> None:
298
+ self.add_key_value(key, val, GGUFValueType.UINT64)
299
+
300
+ def add_int64(self, key: str, val: int) -> None:
301
+ self.add_key_value(key, val, GGUFValueType.INT64)
302
+
303
+ def add_float64(self, key: str, val: float) -> None:
304
+ self.add_key_value(key, val, GGUFValueType.FLOAT64)
305
+
306
+ def add_bool(self, key: str, val: bool) -> None:
307
+ self.add_key_value(key, val, GGUFValueType.BOOL)
308
+
309
+ def add_string(self, key: str, val: str) -> None:
310
+ if not val:
311
+ return
312
+ self.add_key_value(key, val, GGUFValueType.STRING)
313
+
314
+ def add_array(self, key: str, val: Sequence[Any]) -> None:
315
+ if len(val) == 0:
316
+ return
317
+ self.add_key_value(key, val, GGUFValueType.ARRAY)
318
+
319
+ @staticmethod
320
+ def ggml_pad(x: int, n: int) -> int:
321
+ return ((x + n - 1) // n) * n
322
+
323
+ def add_tensor_info(
324
+ self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype,
325
+ tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None,
326
+ ) -> None:
327
+ if self.state is not WriterState.NO_FILE:
328
+ raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
329
+
330
+ if any(name in tensors for tensors in self.tensors):
331
+ raise ValueError(f'Duplicated tensor name {name!r}')
332
+
333
+ if raw_dtype is None:
334
+ if tensor_dtype == np.float16:
335
+ dtype = GGMLQuantizationType.F16
336
+ elif tensor_dtype == np.float32:
337
+ dtype = GGMLQuantizationType.F32
338
+ elif tensor_dtype == np.float64:
339
+ dtype = GGMLQuantizationType.F64
340
+ elif tensor_dtype == np.int8:
341
+ dtype = GGMLQuantizationType.I8
342
+ elif tensor_dtype == np.int16:
343
+ dtype = GGMLQuantizationType.I16
344
+ elif tensor_dtype == np.int32:
345
+ dtype = GGMLQuantizationType.I32
346
+ elif tensor_dtype == np.int64:
347
+ dtype = GGMLQuantizationType.I64
348
+ else:
349
+ raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
350
+ else:
351
+ dtype = raw_dtype
352
+ if tensor_dtype == np.uint8:
353
+ tensor_shape = quant_shape_from_byte_shape(tensor_shape, raw_dtype)
354
+
355
+ # make sure there is at least one tensor before splitting
356
+ if len(self.tensors[-1]) > 0:
357
+ if ( # split when over tensor limit
358
+ self.split_max_tensors != 0
359
+ and len(self.tensors[-1]) >= self.split_max_tensors
360
+ ) or ( # split when over size limit
361
+ self.split_max_size != 0
362
+ and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes > self.split_max_size
363
+ ):
364
+ self.tensors.append({})
365
+
366
+ self.tensors[-1][name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes)
367
+
368
+ def add_tensor(
369
+ self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
370
+ raw_dtype: GGMLQuantizationType | None = None,
371
+ ) -> None:
372
+ if self.endianess == GGUFEndian.BIG:
373
+ tensor.byteswap(inplace=True)
374
+ if self.use_temp_file and self.temp_file is None:
375
+ fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
376
+ fp.seek(0)
377
+ self.temp_file = fp
378
+
379
+ shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
380
+ self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype)
381
+
382
+ if self.temp_file is None:
383
+ self.tensors[-1][name].tensor = tensor
384
+ return
385
+
386
+ tensor.tofile(self.temp_file)
387
+ self.write_padding(self.temp_file, tensor.nbytes)
388
+
389
+ def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
390
+ pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
391
+ if pad != 0:
392
+ fp.write(bytes([0] * pad))
393
+
394
+ def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
395
+ if self.state is not WriterState.TI_DATA and self.state is not WriterState.WEIGHTS:
396
+ raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
397
+ assert self.fout is not None
398
+
399
+ if self.endianess == GGUFEndian.BIG:
400
+ tensor.byteswap(inplace=True)
401
+
402
+ file_id = -1
403
+ for i, tensors in enumerate(self.tensors):
404
+ if len(tensors) > 0:
405
+ file_id = i
406
+ break
407
+
408
+ fout = self.fout[file_id]
409
+
410
+ # pop the first tensor info
411
+ # TODO: cleaner way to get the first key
412
+ first_tensor_name = [name for name, _ in zip(self.tensors[file_id].keys(), range(1))][0]
413
+ ti = self.tensors[file_id].pop(first_tensor_name)
414
+ assert ti.nbytes == tensor.nbytes
415
+
416
+ self.write_padding(fout, fout.tell())
417
+ tensor.tofile(fout)
418
+ self.write_padding(fout, tensor.nbytes)
419
+
420
+ self.state = WriterState.WEIGHTS
421
+
422
+ def write_tensors_to_file(self, *, progress: bool = False) -> None:
423
+ self.write_ti_data_to_file()
424
+
425
+ assert self.fout is not None
426
+
427
+ for fout in self.fout:
428
+ self.write_padding(fout, fout.tell())
429
+
430
+ if self.temp_file is None:
431
+ shard_bar = None
432
+ bar = None
433
+
434
+ if progress:
435
+ from tqdm import tqdm
436
+
437
+ total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values())
438
+
439
+ if len(self.fout) > 1:
440
+ shard_bar = tqdm(desc=f"Shard (0/{len(self.fout)})", total=None, unit="byte", unit_scale=True)
441
+ bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
442
+
443
+ for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)):
444
+ if shard_bar is not None:
445
+ shard_bar.set_description(f"Shard ({i + 1}/{len(self.fout)})")
446
+ total = sum(ti.nbytes for ti in tensors.values())
447
+ shard_bar.reset(total=(total if total > 0 else None))
448
+
449
+ # relying on the fact that Python dicts preserve insertion order (since 3.7)
450
+ for ti in tensors.values():
451
+ assert ti.tensor is not None # can only iterate once over the tensors
452
+ assert ti.tensor.nbytes == ti.nbytes
453
+ ti.tensor.tofile(fout)
454
+ if shard_bar is not None:
455
+ shard_bar.update(ti.nbytes)
456
+ if bar is not None:
457
+ bar.update(ti.nbytes)
458
+ self.write_padding(fout, ti.nbytes)
459
+ ti.tensor = None
460
+ else:
461
+ self.temp_file.seek(0)
462
+
463
+ shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1])
464
+ self.flush()
465
+ self.temp_file.close()
466
+
467
+ self.state = WriterState.WEIGHTS
468
+
469
+ def flush(self) -> None:
470
+ assert self.fout is not None
471
+ for fout in self.fout:
472
+ fout.flush()
473
+
474
+ def close(self) -> None:
475
+ if self.fout is not None:
476
+ for fout in self.fout:
477
+ fout.close()
478
+ self.fout = None
479
+
480
+ def add_type(self, type_name: str) -> None:
481
+ self.add_string(Keys.General.TYPE, type_name)
482
+
483
+ def add_architecture(self) -> None:
484
+ self.add_string(Keys.General.ARCHITECTURE, self.arch)
485
+
486
+ def add_quantization_version(self, quantization_version: int) -> None:
487
+ self.add_uint32(Keys.General.QUANTIZATION_VERSION, quantization_version)
488
+
489
+ def add_custom_alignment(self, alignment: int) -> None:
490
+ self.data_alignment = alignment
491
+ self.add_uint32(Keys.General.ALIGNMENT, alignment)
492
+
493
+ def add_file_type(self, ftype: int) -> None:
494
+ self.add_uint32(Keys.General.FILE_TYPE, ftype)
495
+
496
+ def add_name(self, name: str) -> None:
497
+ self.add_string(Keys.General.NAME, name)
498
+
499
+ def add_author(self, author: str) -> None:
500
+ self.add_string(Keys.General.AUTHOR, author)
501
+
502
+ def add_version(self, version: str) -> None:
503
+ self.add_string(Keys.General.VERSION, version)
504
+
505
+ def add_organization(self, organization: str) -> None:
506
+ self.add_string(Keys.General.ORGANIZATION, organization)
507
+
508
+ def add_finetune(self, finetune: str) -> None:
509
+ self.add_string(Keys.General.FINETUNE, finetune)
510
+
511
+ def add_basename(self, basename: str) -> None:
512
+ self.add_string(Keys.General.BASENAME, basename)
513
+
514
+ def add_description(self, description: str) -> None:
515
+ self.add_string(Keys.General.DESCRIPTION, description)
516
+
517
+ def add_quantized_by(self, quantized: str) -> None:
518
+ self.add_string(Keys.General.QUANTIZED_BY, quantized)
519
+
520
+ def add_size_label(self, size_label: str) -> None:
521
+ self.add_string(Keys.General.SIZE_LABEL, size_label)
522
+
523
+ def add_license(self, license: str) -> None:
524
+ self.add_string(Keys.General.LICENSE, license)
525
+
526
+ def add_license_name(self, license: str) -> None:
527
+ self.add_string(Keys.General.LICENSE_NAME, license)
528
+
529
+ def add_license_link(self, license: str) -> None:
530
+ self.add_string(Keys.General.LICENSE_LINK, license)
531
+
532
+ def add_url(self, url: str) -> None:
533
+ self.add_string(Keys.General.URL, url)
534
+
535
+ def add_doi(self, doi: str) -> None:
536
+ self.add_string(Keys.General.DOI, doi)
537
+
538
+ def add_uuid(self, uuid: str) -> None:
539
+ self.add_string(Keys.General.UUID, uuid)
540
+
541
+ def add_repo_url(self, repo_url: str) -> None:
542
+ self.add_string(Keys.General.REPO_URL, repo_url)
543
+
544
+ def add_source_url(self, url: str) -> None:
545
+ self.add_string(Keys.General.SOURCE_URL, url)
546
+
547
+ def add_source_doi(self, doi: str) -> None:
548
+ self.add_string(Keys.General.SOURCE_DOI, doi)
549
+
550
+ def add_source_uuid(self, uuid: str) -> None:
551
+ self.add_string(Keys.General.SOURCE_UUID, uuid)
552
+
553
+ def add_source_repo_url(self, repo_url: str) -> None:
554
+ self.add_string(Keys.General.SOURCE_REPO_URL, repo_url)
555
+
556
+ def add_base_model_count(self, source_count: int) -> None:
557
+ self.add_uint32(Keys.General.BASE_MODEL_COUNT, source_count)
558
+
559
+ def add_base_model_name(self, source_id: int, name: str) -> None:
560
+ self.add_string(Keys.General.BASE_MODEL_NAME.format(id=source_id), name)
561
+
562
+ def add_base_model_author(self, source_id: int, author: str) -> None:
563
+ self.add_string(Keys.General.BASE_MODEL_AUTHOR.format(id=source_id), author)
564
+
565
+ def add_base_model_version(self, source_id: int, version: str) -> None:
566
+ self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
567
+
568
+ def add_base_model_organization(self, source_id: int, organization: str) -> None:
569
+ self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
570
+
571
+ def add_base_model_url(self, source_id: int, url: str) -> None:
572
+ self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
573
+
574
+ def add_base_model_doi(self, source_id: int, doi: str) -> None:
575
+ self.add_string(Keys.General.BASE_MODEL_DOI.format(id=source_id), doi)
576
+
577
+ def add_base_model_uuid(self, source_id: int, uuid: str) -> None:
578
+ self.add_string(Keys.General.BASE_MODEL_UUID.format(id=source_id), uuid)
579
+
580
+ def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
581
+ self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
582
+
583
+ def add_tags(self, tags: Sequence[str]) -> None:
584
+ self.add_array(Keys.General.TAGS, tags)
585
+
586
+ def add_languages(self, languages: Sequence[str]) -> None:
587
+ self.add_array(Keys.General.LANGUAGES, languages)
588
+
589
+ def add_datasets(self, datasets: Sequence[str]) -> None:
590
+ self.add_array(Keys.General.DATASETS, datasets)
591
+
592
+ def add_tensor_data_layout(self, layout: str) -> None:
593
+ self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
594
+
595
+ def add_vocab_size(self, size: int) -> None:
596
+ self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size)
597
+
598
+ def add_context_length(self, length: int) -> None:
599
+ self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length)
600
+
601
+ def add_embedding_length(self, length: int) -> None:
602
+ self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
603
+
604
+ def add_block_count(self, length: int) -> None:
605
+ self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
606
+
607
+ def add_leading_dense_block_count(self, length: int) -> None:
608
+ self.add_uint32(Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length)
609
+
610
+ def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
611
+ if isinstance(length, int):
612
+ self.add_uint32(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
613
+ else:
614
+ self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
615
+
616
+ def add_expert_feed_forward_length(self, length: int) -> None:
617
+ self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
618
+
619
+ def add_expert_shared_feed_forward_length(self, length: int) -> None:
620
+ self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
621
+
622
+ def add_parallel_residual(self, use: bool) -> None:
623
+ self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
624
+
625
+ def add_decoder_start_token_id(self, id: int) -> None:
626
+ self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id)
627
+
628
+ def add_head_count(self, count: int | Sequence[int]) -> None:
629
+ if isinstance(count, int):
630
+ self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
631
+ else:
632
+ self.add_array(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
633
+
634
+ def add_head_count_kv(self, count: int | Sequence[int]) -> None:
635
+ if isinstance(count, int):
636
+ self.add_uint32(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
637
+ else:
638
+ self.add_array(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
639
+
640
+ def add_key_length(self, length: int) -> None:
641
+ self.add_uint32(Keys.Attention.KEY_LENGTH.format(arch=self.arch), length)
642
+
643
+ def add_value_length(self, length: int) -> None:
644
+ self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
645
+
646
+ def add_max_alibi_bias(self, bias: float) -> None:
647
+ self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
648
+
649
+ def add_clamp_kqv(self, value: float) -> None:
650
+ self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
651
+
652
+ def add_logit_scale(self, value: float) -> None:
653
+ self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value)
654
+
655
+ def add_attn_logit_softcapping(self, value: float) -> None:
656
+ self.add_float32(Keys.LLM.ATTN_LOGIT_SOFTCAPPING.format(arch=self.arch), value)
657
+
658
+ def add_final_logit_softcapping(self, value: float) -> None:
659
+ self.add_float32(Keys.LLM.FINAL_LOGIT_SOFTCAPPING.format(arch=self.arch), value)
660
+
661
+ def add_expert_count(self, count: int) -> None:
662
+ self.add_uint32(Keys.LLM.EXPERT_COUNT.format(arch=self.arch), count)
663
+
664
+ def add_expert_used_count(self, count: int) -> None:
665
+ self.add_uint32(Keys.LLM.EXPERT_USED_COUNT.format(arch=self.arch), count)
666
+
667
+ def add_expert_shared_count(self, count: int) -> None:
668
+ self.add_uint32(Keys.LLM.EXPERT_SHARED_COUNT.format(arch=self.arch), count)
669
+
670
+ def add_expert_weights_scale(self, value: float) -> None:
671
+ self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
672
+
673
+ def add_layer_norm_eps(self, value: float) -> None:
674
+ self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
675
+
676
+ def add_layer_norm_rms_eps(self, value: float) -> None:
677
+ self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
678
+
679
+ def add_causal_attention(self, value: bool) -> None:
680
+ self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
681
+
682
+ def add_q_lora_rank(self, length: int) -> None:
683
+ self.add_uint32(Keys.Attention.Q_LORA_RANK.format(arch=self.arch), length)
684
+
685
+ def add_kv_lora_rank(self, length: int) -> None:
686
+ self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
687
+
688
+ def add_relative_attn_buckets_count(self, value: int) -> None:
689
+ self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
690
+
691
+ def add_sliding_window(self, value: int) -> None:
692
+ self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
693
+
694
+ def add_pooling_type(self, value: PoolingType) -> None:
695
+ self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
696
+
697
+ def add_rope_dimension_count(self, count: int) -> None:
698
+ self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
699
+
700
+ def add_rope_freq_base(self, value: float) -> None:
701
+ self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
702
+
703
+ def add_rope_scaling_type(self, value: RopeScalingType) -> None:
704
+ self.add_string(Keys.Rope.SCALING_TYPE.format(arch=self.arch), value.value)
705
+
706
+ def add_rope_scaling_factor(self, value: float) -> None:
707
+ self.add_float32(Keys.Rope.SCALING_FACTOR.format(arch=self.arch), value)
708
+
709
+ def add_rope_scaling_attn_factors(self, value: float) -> None:
710
+ self.add_float32(Keys.Rope.SCALING_ATTN_FACTOR.format(arch=self.arch), value)
711
+
712
+ def add_rope_scaling_orig_ctx_len(self, value: int) -> None:
713
+ self.add_uint32(Keys.Rope.SCALING_ORIG_CTX_LEN.format(arch=self.arch), value)
714
+
715
+ def add_rope_scaling_finetuned(self, value: bool) -> None:
716
+ self.add_bool(Keys.Rope.SCALING_FINETUNED.format(arch=self.arch), value)
717
+
718
+ def add_rope_scaling_yarn_log_mul(self, value: float) -> None:
719
+ self.add_float32(Keys.Rope.SCALING_YARN_LOG_MUL.format(arch=self.arch), value)
720
+
721
+ def add_ssm_conv_kernel(self, value: int) -> None:
722
+ self.add_uint32(Keys.SSM.CONV_KERNEL.format(arch=self.arch), value)
723
+
724
+ def add_ssm_inner_size(self, value: int) -> None:
725
+ self.add_uint32(Keys.SSM.INNER_SIZE.format(arch=self.arch), value)
726
+
727
+ def add_ssm_state_size(self, value: int) -> None:
728
+ self.add_uint32(Keys.SSM.STATE_SIZE.format(arch=self.arch), value)
729
+
730
+ def add_ssm_time_step_rank(self, value: int) -> None:
731
+ self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
732
+
733
+ def add_ssm_dt_b_c_rms(self, value: bool) -> None:
734
+ self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
735
+
736
+ def add_tokenizer_model(self, model: str) -> None:
737
+ self.add_string(Keys.Tokenizer.MODEL, model)
738
+
739
+ def add_tokenizer_pre(self, pre: str) -> None:
740
+ self.add_string(Keys.Tokenizer.PRE, pre)
741
+
742
+ def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
743
+ self.add_array(Keys.Tokenizer.LIST, tokens)
744
+
745
+ def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
746
+ self.add_array(Keys.Tokenizer.MERGES, merges)
747
+
748
+ def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
749
+ self.add_array(Keys.Tokenizer.TOKEN_TYPE, types)
750
+
751
+ def add_token_type_count(self, value: int) -> None:
752
+ self.add_uint32(Keys.Tokenizer.TOKEN_TYPE_COUNT, value)
753
+
754
+ def add_token_scores(self, scores: Sequence[float]) -> None:
755
+ self.add_array(Keys.Tokenizer.SCORES, scores)
756
+
757
+ def add_bos_token_id(self, id: int) -> None:
758
+ self.add_uint32(Keys.Tokenizer.BOS_ID, id)
759
+
760
+ def add_eos_token_id(self, id: int) -> None:
761
+ self.add_uint32(Keys.Tokenizer.EOS_ID, id)
762
+
763
+ def add_unk_token_id(self, id: int) -> None:
764
+ self.add_uint32(Keys.Tokenizer.UNK_ID, id)
765
+
766
+ def add_sep_token_id(self, id: int) -> None:
767
+ self.add_uint32(Keys.Tokenizer.SEP_ID, id)
768
+
769
+ def add_pad_token_id(self, id: int) -> None:
770
+ self.add_uint32(Keys.Tokenizer.PAD_ID, id)
771
+
772
+ def add_cls_token_id(self, id: int) -> None:
773
+ self.add_uint32(Keys.Tokenizer.CLS_ID, id)
774
+
775
+ def add_mask_token_id(self, id: int) -> None:
776
+ self.add_uint32(Keys.Tokenizer.MASK_ID, id)
777
+
778
+ def add_add_bos_token(self, value: bool) -> None:
779
+ self.add_bool(Keys.Tokenizer.ADD_BOS, value)
780
+
781
+ def add_add_eos_token(self, value: bool) -> None:
782
+ self.add_bool(Keys.Tokenizer.ADD_EOS, value)
783
+
784
+ def add_add_space_prefix(self, value: bool) -> None:
785
+ self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
786
+
787
+ def add_remove_extra_whitespaces(self, value: bool) -> None:
788
+ self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
789
+
790
+ def add_precompiled_charsmap(self, charsmap: Sequence[bytes]) -> None:
791
+ self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
792
+
793
+ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
794
+ if not isinstance(value, str):
795
+ template_default = None
796
+ template_names = set()
797
+
798
+ for choice in value:
799
+ name = choice.get('name', '')
800
+ template = choice.get('template')
801
+
802
+ # Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
803
+ name = ''.join((c if c in ascii_letters + digits else '_' for c in name))
804
+
805
+ if name and template is not None:
806
+ if name == 'default':
807
+ template_default = template
808
+ else:
809
+ template_names.add(name)
810
+ self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template)
811
+
812
+ if template_names:
813
+ self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
814
+
815
+ if template_default is None:
816
+ return
817
+
818
+ value = template_default
819
+
820
+ self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
821
+
822
+ def add_prefix_token_id(self, id: int) -> None:
823
+ self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
824
+
825
+ def add_suffix_token_id(self, id: int) -> None:
826
+ self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
827
+
828
+ def add_middle_token_id(self, id: int) -> None:
829
+ self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
830
+
831
+ def add_eot_token_id(self, id: int) -> None:
832
+ self.add_uint32(Keys.Tokenizer.EOT_ID, id)
833
+
834
+ def add_eom_token_id(self, id: int) -> None:
835
+ self.add_uint32(Keys.Tokenizer.EOM_ID, id)
836
+
837
+ def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
838
+ pack_prefix = ''
839
+ if not skip_pack_prefix:
840
+ pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
841
+ return struct.pack(f'{pack_prefix}{fmt}', value)
842
+
843
+ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
844
+ kv_data = bytearray()
845
+
846
+ if add_vtype:
847
+ kv_data += self._pack("I", vtype)
848
+
849
+ pack_fmt = self._simple_value_packing.get(vtype)
850
+ if pack_fmt is not None:
851
+ kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL)
852
+ elif vtype == GGUFValueType.STRING:
853
+ encoded_val = val.encode("utf-8") if isinstance(val, str) else val
854
+ kv_data += self._pack("Q", len(encoded_val))
855
+ kv_data += encoded_val
856
+ elif vtype == GGUFValueType.ARRAY:
857
+
858
+ if not isinstance(val, Sequence):
859
+ raise ValueError("Invalid GGUF metadata array, expecting sequence")
860
+
861
+ if len(val) == 0:
862
+ raise ValueError("Invalid GGUF metadata array. Empty array")
863
+
864
+ if isinstance(val, bytes):
865
+ ltype = GGUFValueType.UINT8
866
+ else:
867
+ ltype = GGUFValueType.get_type(val[0])
868
+ if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
869
+ raise ValueError("All items in a GGUF array should be of the same type")
870
+ kv_data += self._pack("I", ltype)
871
+ kv_data += self._pack("Q", len(val))
872
+ for item in val:
873
+ kv_data += self._pack_val(item, ltype, add_vtype=False)
874
+ else:
875
+ raise ValueError("Invalid GGUF metadata value type or value")
876
+
877
+ return kv_data
878
+
879
+ @staticmethod
880
+ def format_n_bytes_to_str(num: int) -> str:
881
+ if num == 0:
882
+ return "negligible - metadata only"
883
+ fnum = float(num)
884
+ for unit in ("", "K", "M", "G"):
885
+ if abs(fnum) < 1000.0:
886
+ return f"{fnum:3.1f}{unit}"
887
+ fnum /= 1000.0
888
+ return f"{fnum:.1f}T - over 1TB, split recommended"
.venv/lib/python3.11/site-packages/gguf/lazy.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from abc import ABC, ABCMeta, abstractmethod
3
+
4
+ import logging
5
+ from typing import Any, Callable
6
+
7
+ import numpy as np
8
+ from numpy.typing import DTypeLike
9
+
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class LazyMeta(ABCMeta):
15
+
16
+ def __new__(cls, name: str, bases: tuple[type, ...], namespace: dict[str, Any], **kwargs):
17
+ def __getattr__(self, name: str) -> Any:
18
+ meta_attr = getattr(self._meta, name)
19
+ if callable(meta_attr):
20
+ return type(self)._wrap_fn(
21
+ (lambda s, *args, **kwargs: getattr(s, name)(*args, **kwargs)),
22
+ use_self=self,
23
+ )
24
+ elif isinstance(meta_attr, self._tensor_type):
25
+ # e.g. self.T with torch.Tensor should still be wrapped
26
+ return type(self)._wrap_fn(lambda s: getattr(s, name))(self)
27
+ else:
28
+ # no need to wrap non-tensor properties,
29
+ # and they likely don't depend on the actual contents of the tensor
30
+ return meta_attr
31
+
32
+ namespace["__getattr__"] = __getattr__
33
+
34
+ # need to make a builder for the wrapped wrapper to copy the name,
35
+ # or else it fails with very cryptic error messages,
36
+ # because somehow the same string would end up in every closures
37
+ def mk_wrap(op_name: str, *, meta_noop: bool = False):
38
+ # need to wrap the wrapper to get self
39
+ def wrapped_special_op(self, *args, **kwargs):
40
+ return type(self)._wrap_fn(
41
+ getattr(type(self)._tensor_type, op_name),
42
+ meta_noop=meta_noop,
43
+ )(self, *args, **kwargs)
44
+ return wrapped_special_op
45
+
46
+ # special methods bypass __getattr__, so they need to be added manually
47
+ # ref: https://docs.python.org/3/reference/datamodel.html#special-lookup
48
+ # NOTE: doing this from a metaclass is very convenient
49
+ # TODO: make this even more comprehensive
50
+ for binary_op in (
51
+ "lt", "le", "eq", "ne", "ge", "gt", "not"
52
+ "abs", "add", "and", "floordiv", "invert", "lshift", "mod", "mul", "matmul",
53
+ "neg", "or", "pos", "pow", "rshift", "sub", "truediv", "xor",
54
+ "iadd", "iand", "ifloordiv", "ilshift", "imod", "imul", "ior", "irshift", "isub", "ixor",
55
+ "radd", "rand", "rfloordiv", "rmul", "ror", "rpow", "rsub", "rtruediv", "rxor",
56
+ ):
57
+ attr_name = f"__{binary_op}__"
58
+ # the result of these operators usually has the same shape and dtype as the input,
59
+ # so evaluation on the meta tensor can be skipped.
60
+ namespace[attr_name] = mk_wrap(attr_name, meta_noop=True)
61
+
62
+ for special_op in (
63
+ "getitem", "setitem", "len",
64
+ ):
65
+ attr_name = f"__{special_op}__"
66
+ namespace[attr_name] = mk_wrap(attr_name, meta_noop=False)
67
+
68
+ return super().__new__(cls, name, bases, namespace, **kwargs)
69
+
70
+
71
+ # Tree of lazy tensors
72
+ class LazyBase(ABC, metaclass=LazyMeta):
73
+ _tensor_type: type
74
+ _meta: Any
75
+ _data: Any | None
76
+ _args: tuple
77
+ _kwargs: dict[str, Any]
78
+ _func: Callable[[Any], Any] | None
79
+
80
+ def __init__(self, *, meta: Any, data: Any | None = None, args: tuple = (), kwargs: dict[str, Any] | None = None, func: Callable[[Any], Any] | None = None):
81
+ super().__init__()
82
+ self._meta = meta
83
+ self._data = data
84
+ self._args = args
85
+ self._kwargs = kwargs if kwargs is not None else {}
86
+ self._func = func
87
+ assert self._func is not None or self._data is not None
88
+
89
+ def __init_subclass__(cls) -> None:
90
+ if "_tensor_type" not in cls.__dict__:
91
+ raise TypeError(f"property '_tensor_type' must be defined for {cls!r}")
92
+ return super().__init_subclass__()
93
+
94
+ @staticmethod
95
+ def _recurse_apply(o: Any, fn: Callable[[Any], Any]) -> Any:
96
+ # TODO: dict and set
97
+ if isinstance(o, (list, tuple)):
98
+ L = []
99
+ for item in o:
100
+ L.append(LazyBase._recurse_apply(item, fn))
101
+ if isinstance(o, tuple):
102
+ L = tuple(L)
103
+ return L
104
+ elif isinstance(o, LazyBase):
105
+ return fn(o)
106
+ else:
107
+ return o
108
+
109
+ @classmethod
110
+ def _wrap_fn(cls, fn: Callable, *, use_self: LazyBase | None = None, meta_noop: bool | DTypeLike | tuple[DTypeLike, Callable[[tuple[int, ...]], tuple[int, ...]]] = False) -> Callable[[Any], Any]:
111
+ def wrapped_fn(*args, **kwargs):
112
+ if kwargs is None:
113
+ kwargs = {}
114
+ args = ((use_self,) if use_self is not None else ()) + args
115
+
116
+ meta_args = LazyBase._recurse_apply(args, lambda t: t._meta)
117
+ # TODO: maybe handle tensors in kwargs too
118
+
119
+ if isinstance(meta_noop, bool) and not meta_noop:
120
+ try:
121
+ res = fn(*meta_args, **kwargs)
122
+ except NotImplementedError:
123
+ # running some operations on PyTorch's Meta tensors can cause this exception
124
+ res = None
125
+ else:
126
+ # some operators don't need to actually run on the meta tensors
127
+ assert len(args) > 0
128
+ res = args[0]
129
+ assert isinstance(res, cls)
130
+ res = res._meta
131
+ # allow operations to override the dtype and shape
132
+ if meta_noop is not True:
133
+ if isinstance(meta_noop, tuple):
134
+ dtype, shape = meta_noop
135
+ assert callable(shape)
136
+ res = cls.meta_with_dtype_and_shape(dtype, shape(res.shape))
137
+ else:
138
+ res = cls.meta_with_dtype_and_shape(meta_noop, res.shape)
139
+
140
+ if isinstance(res, cls._tensor_type):
141
+ return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
142
+ else:
143
+ del res # not needed
144
+ # non-tensor return likely relies on the contents of the args
145
+ # (e.g. the result of torch.equal)
146
+ eager_args = cls.to_eager(args)
147
+ return fn(*eager_args, **kwargs)
148
+ return wrapped_fn
149
+
150
+ @classmethod
151
+ def to_eager(cls, t: Any) -> Any:
152
+ def simple_to_eager(_t: LazyBase) -> Any:
153
+ if _t._data is not None:
154
+ return _t._data
155
+
156
+ # NOTE: there's a recursion limit in Python (usually 1000)
157
+
158
+ assert _t._func is not None
159
+ _t._args = cls._recurse_apply(_t._args, simple_to_eager)
160
+ _t._data = _t._func(*_t._args, **_t._kwargs)
161
+ # sanity check
162
+ assert _t._data is not None
163
+ assert _t._data.dtype == _t._meta.dtype
164
+ assert _t._data.shape == _t._meta.shape
165
+
166
+ return _t._data
167
+
168
+ # recurse into lists and/or tuples, keeping their structure
169
+ return cls._recurse_apply(t, simple_to_eager)
170
+
171
+ @classmethod
172
+ def eager_to_meta(cls, t: Any) -> Any:
173
+ return cls.meta_with_dtype_and_shape(t.dtype, t.shape)
174
+
175
+ # must be overridden, meta tensor init is backend-specific
176
+ @classmethod
177
+ @abstractmethod
178
+ def meta_with_dtype_and_shape(cls, dtype: Any, shape: Any) -> Any: pass
179
+
180
+ @classmethod
181
+ def from_eager(cls, t: Any) -> Any:
182
+ if type(t) is cls:
183
+ # already lazy
184
+ return t
185
+ elif isinstance(t, cls._tensor_type):
186
+ return cls(meta=cls.eager_to_meta(t), data=t)
187
+ else:
188
+ return TypeError(f"{type(t)!r} is not compatible with {cls._tensor_type!r}")
189
+
190
+
191
+ class LazyNumpyTensor(LazyBase):
192
+ _tensor_type = np.ndarray
193
+
194
+ shape: tuple[int, ...] # Makes the type checker happy in quants.py
195
+
196
+ @classmethod
197
+ def meta_with_dtype_and_shape(cls, dtype: DTypeLike, shape: tuple[int, ...]) -> np.ndarray[Any, Any]:
198
+ # The initial idea was to use np.nan as the fill value,
199
+ # but non-float types like np.int16 can't use that.
200
+ # So zero it is.
201
+ cheat = np.zeros(1, dtype)
202
+ return np.lib.stride_tricks.as_strided(cheat, shape, (0 for _ in shape))
203
+
204
+ def astype(self, dtype, *args, **kwargs):
205
+ meta = type(self).meta_with_dtype_and_shape(dtype, self._meta.shape)
206
+ full_args = (self, dtype,) + args
207
+ return type(self)(meta=meta, args=full_args, kwargs=kwargs, func=(lambda a, *args, **kwargs: a.astype(*args, **kwargs)))
208
+
209
+ def tofile(self, *args, **kwargs):
210
+ eager = LazyNumpyTensor.to_eager(self)
211
+ return eager.tofile(*args, **kwargs)
212
+
213
+ # TODO: __array_function__
.venv/lib/python3.11/site-packages/gguf/metadata.py ADDED
@@ -0,0 +1,510 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import json
5
+ import yaml
6
+ import logging
7
+ from pathlib import Path
8
+ from typing import Any, Literal, Optional
9
+ from dataclasses import dataclass
10
+
11
+ from .constants import Keys
12
+
13
+ import gguf
14
+
15
+ logger = logging.getLogger("metadata")
16
+
17
+
18
+ @dataclass
19
+ class Metadata:
20
+ # Authorship Metadata to be written to GGUF KV Store
21
+ name: Optional[str] = None
22
+ author: Optional[str] = None
23
+ version: Optional[str] = None
24
+ organization: Optional[str] = None
25
+ finetune: Optional[str] = None
26
+ basename: Optional[str] = None
27
+ description: Optional[str] = None
28
+ quantized_by: Optional[str] = None
29
+ size_label: Optional[str] = None
30
+ url: Optional[str] = None
31
+ doi: Optional[str] = None
32
+ uuid: Optional[str] = None
33
+ repo_url: Optional[str] = None
34
+ source_url: Optional[str] = None
35
+ source_doi: Optional[str] = None
36
+ source_uuid: Optional[str] = None
37
+ source_repo_url: Optional[str] = None
38
+ license: Optional[str] = None
39
+ license_name: Optional[str] = None
40
+ license_link: Optional[str] = None
41
+ base_models: Optional[list[dict]] = None
42
+ tags: Optional[list[str]] = None
43
+ languages: Optional[list[str]] = None
44
+ datasets: Optional[list[str]] = None
45
+
46
+ @staticmethod
47
+ def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
48
+ # This grabs as many contextual authorship metadata as possible from the model repository
49
+ # making any conversion as required to match the gguf kv store metadata format
50
+ # as well as giving users the ability to override any authorship metadata that may be incorrect
51
+
52
+ # Create a new Metadata instance
53
+ metadata = Metadata()
54
+
55
+ model_card = Metadata.load_model_card(model_path)
56
+ hf_params = Metadata.load_hf_parameters(model_path)
57
+ # TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter
58
+
59
+ # heuristics
60
+ metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params)
61
+
62
+ # Metadata Override File Provided
63
+ # This is based on LLM_KV_NAMES mapping in llama.cpp
64
+ metadata_override = Metadata.load_metadata_override(metadata_override_path)
65
+
66
+ metadata.name = metadata_override.get(Keys.General.NAME, metadata.name)
67
+ metadata.author = metadata_override.get(Keys.General.AUTHOR, metadata.author)
68
+ metadata.version = metadata_override.get(Keys.General.VERSION, metadata.version)
69
+ metadata.organization = metadata_override.get(Keys.General.ORGANIZATION, metadata.organization)
70
+
71
+ metadata.finetune = metadata_override.get(Keys.General.FINETUNE, metadata.finetune)
72
+ metadata.basename = metadata_override.get(Keys.General.BASENAME, metadata.basename)
73
+
74
+ metadata.description = metadata_override.get(Keys.General.DESCRIPTION, metadata.description)
75
+ metadata.quantized_by = metadata_override.get(Keys.General.QUANTIZED_BY, metadata.quantized_by)
76
+
77
+ metadata.size_label = metadata_override.get(Keys.General.SIZE_LABEL, metadata.size_label)
78
+ metadata.license_name = metadata_override.get(Keys.General.LICENSE_NAME, metadata.license_name)
79
+ metadata.license_link = metadata_override.get(Keys.General.LICENSE_LINK, metadata.license_link)
80
+
81
+ metadata.url = metadata_override.get(Keys.General.URL, metadata.url)
82
+ metadata.doi = metadata_override.get(Keys.General.DOI, metadata.doi)
83
+ metadata.uuid = metadata_override.get(Keys.General.UUID, metadata.uuid)
84
+ metadata.repo_url = metadata_override.get(Keys.General.REPO_URL, metadata.repo_url)
85
+
86
+ metadata.source_url = metadata_override.get(Keys.General.SOURCE_URL, metadata.source_url)
87
+ metadata.source_doi = metadata_override.get(Keys.General.SOURCE_DOI, metadata.source_doi)
88
+ metadata.source_uuid = metadata_override.get(Keys.General.SOURCE_UUID, metadata.source_uuid)
89
+ metadata.source_repo_url = metadata_override.get(Keys.General.SOURCE_REPO_URL, metadata.source_repo_url)
90
+
91
+ # Base Models is received here as an array of models
92
+ metadata.base_models = metadata_override.get("general.base_models", metadata.base_models)
93
+
94
+ metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
95
+ metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages)
96
+ metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets)
97
+
98
+ # Direct Metadata Override (via direct cli argument)
99
+ if model_name is not None:
100
+ metadata.name = model_name
101
+
102
+ return metadata
103
+
104
+ @staticmethod
105
+ def load_metadata_override(metadata_override_path: Optional[Path] = None) -> dict[str, Any]:
106
+ if metadata_override_path is None or not metadata_override_path.is_file():
107
+ return {}
108
+
109
+ with open(metadata_override_path, "r", encoding="utf-8") as f:
110
+ return json.load(f)
111
+
112
+ @staticmethod
113
+ def load_model_card(model_path: Optional[Path] = None) -> dict[str, Any]:
114
+ if model_path is None or not model_path.is_dir():
115
+ return {}
116
+
117
+ model_card_path = model_path / "README.md"
118
+
119
+ if not model_card_path.is_file():
120
+ return {}
121
+
122
+ # The model card metadata is assumed to always be in YAML
123
+ # ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473
124
+ with open(model_card_path, "r", encoding="utf-8") as f:
125
+ if f.readline() == "---\n":
126
+ raw = f.read().partition("---\n")[0]
127
+ data = yaml.safe_load(raw)
128
+ if isinstance(data, dict):
129
+ return data
130
+ else:
131
+ logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
132
+ return {}
133
+ else:
134
+ return {}
135
+
136
+ @staticmethod
137
+ def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
138
+ if model_path is None or not model_path.is_dir():
139
+ return {}
140
+
141
+ config_path = model_path / "config.json"
142
+
143
+ if not config_path.is_file():
144
+ return {}
145
+
146
+ with open(config_path, "r", encoding="utf-8") as f:
147
+ return json.load(f)
148
+
149
+ @staticmethod
150
+ def id_to_title(string):
151
+ # Convert capitalization into title form unless acronym or version number
152
+ return ' '.join([w.title() if w.islower() and not re.match(r'^(v\d+(?:\.\d+)*|\d.*)$', w) else w for w in string.strip().replace('-', ' ').split()])
153
+
154
+ @staticmethod
155
+ def get_model_id_components(model_id: Optional[str] = None, total_params: int = 0) -> tuple[str | None, str | None, str | None, str | None, str | None, str | None]:
156
+ # Huggingface often store model id as '<org>/<model name>'
157
+ # so let's parse it and apply some heuristics if possible for model name components
158
+
159
+ if model_id is None:
160
+ # model ID missing
161
+ return None, None, None, None, None, None
162
+
163
+ if ' ' in model_id:
164
+ # model ID is actually a normal human sentence
165
+ # which means its most likely a normal model name only
166
+ # not part of the hugging face naming standard, but whatever
167
+ return model_id, None, None, None, None, None
168
+
169
+ if '/' in model_id:
170
+ # model ID (huggingface style)
171
+ org_component, model_full_name_component = model_id.split('/', 1)
172
+ else:
173
+ # model ID but missing org components
174
+ org_component, model_full_name_component = None, model_id
175
+
176
+ # Check if we erroneously matched against './' or '../' etc...
177
+ if org_component is not None and len(org_component) > 0 and org_component[0] == '.':
178
+ org_component = None
179
+
180
+ name_parts: list[str] = model_full_name_component.split('-')
181
+
182
+ # Remove empty parts
183
+ for i in reversed(range(len(name_parts))):
184
+ if len(name_parts[i]) == 0:
185
+ del name_parts[i]
186
+
187
+ name_types: list[
188
+ set[Literal["basename", "size_label", "finetune", "version", "type"]]
189
+ ] = [set() for _ in name_parts]
190
+
191
+ # Annotate the name
192
+ for i, part in enumerate(name_parts):
193
+ # Version
194
+ if re.fullmatch(r'(v|iter)?\d+([.]\d+)*', part, re.IGNORECASE):
195
+ name_types[i].add("version")
196
+ # Quant type (should not be there for base models, but still annotated)
197
+ elif re.fullmatch(r'i?q\d(_\w)*|b?fp?(16|32)', part, re.IGNORECASE):
198
+ name_types[i].add("type")
199
+ name_parts[i] = part.upper()
200
+ # Model size
201
+ elif i > 0 and re.fullmatch(r'(([A]|\d+[x])?\d+([._]\d+)?[KMBT][\d]?|small|mini|medium|large|x?xl)', part, re.IGNORECASE):
202
+ part = part.replace("_", ".")
203
+ # Handle weird bloom-7b1 notation
204
+ if part[-1].isdecimal():
205
+ part = part[:-2] + "." + part[-1] + part[-2]
206
+ # Normalize the size suffixes
207
+ if len(part) > 1 and part[-2].isdecimal():
208
+ if part[-1] in "kmbt":
209
+ part = part[:-1] + part[-1].upper()
210
+ if total_params != 0:
211
+ try:
212
+ label_params = float(part[:-1]) * pow(1000, " KMBT".find(part[-1]))
213
+ # Only use it as a size label if it's close or bigger than the model size
214
+ # Note that LoRA adapters don't necessarily include all layers,
215
+ # so this is why bigger label sizes are accepted.
216
+ # Do not use the size label when it's smaller than 1/8 of the model size
217
+ if (total_params < 0 and label_params < abs(total_params) // 8) or (
218
+ # Check both directions when the current model isn't a LoRA adapter
219
+ total_params > 0 and abs(label_params - total_params) > 7 * total_params // 8
220
+ ):
221
+ # Likely a context length
222
+ name_types[i].add("finetune")
223
+ # Lowercase the size when it's a context length
224
+ part = part[:-1] + part[-1].lower()
225
+ except ValueError:
226
+ # Failed to convert the size label to float, use it anyway
227
+ pass
228
+ if len(name_types[i]) == 0:
229
+ name_types[i].add("size_label")
230
+ name_parts[i] = part
231
+ # Some easy to recognize finetune names
232
+ elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE):
233
+ if total_params < 0 and part.lower() == "lora":
234
+ # ignore redundant "lora" in the finetune part when the output is a lora adapter
235
+ name_types[i].add("type")
236
+ else:
237
+ name_types[i].add("finetune")
238
+
239
+ # Ignore word-based size labels when there is at least a number-based one present
240
+ # TODO: should word-based size labels always be removed instead?
241
+ if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n):
242
+ for n, t in zip(name_parts, name_types):
243
+ if "size_label" in t:
244
+ if all(c.isalpha() for c in n):
245
+ t.remove("size_label")
246
+
247
+ at_start = True
248
+ # Find the basename through the annotated name
249
+ for part, t in zip(name_parts, name_types):
250
+ if at_start and ((len(t) == 0 and part[0].isalpha()) or "version" in t):
251
+ t.add("basename")
252
+ else:
253
+ if at_start:
254
+ at_start = False
255
+ if len(t) == 0:
256
+ t.add("finetune")
257
+
258
+ # Remove the basename annotation from trailing version
259
+ for part, t in zip(reversed(name_parts), reversed(name_types)):
260
+ if "basename" in t and len(t) > 1:
261
+ t.remove("basename")
262
+ else:
263
+ break
264
+
265
+ basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None
266
+ # Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys)
267
+ size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None
268
+ finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None
269
+ # TODO: should the basename version always be excluded?
270
+ # NOTE: multiple finetune versions are joined together
271
+ version = "-".join(v for v, t, in zip(name_parts, name_types) if "version" in t and "basename" not in t) or None
272
+
273
+ if size_label is None and finetune is None and version is None:
274
+ # Too ambiguous, output nothing
275
+ basename = None
276
+
277
+ return model_full_name_component, org_component, basename, finetune, version, size_label
278
+
279
+ @staticmethod
280
+ def apply_metadata_heuristic(metadata: Metadata, model_card: Optional[dict] = None, hf_params: Optional[dict] = None, model_path: Optional[Path] = None, total_params: int = 0) -> Metadata:
281
+ # Reference Model Card Metadata: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
282
+
283
+ # Model Card Heuristics
284
+ ########################
285
+ if model_card is not None:
286
+
287
+ def use_model_card_metadata(metadata_key: str, model_card_key: str):
288
+ if model_card_key in model_card and getattr(metadata, metadata_key, None) is None:
289
+ setattr(metadata, metadata_key, model_card.get(model_card_key))
290
+
291
+ def use_array_model_card_metadata(metadata_key: str, model_card_key: str):
292
+ # Note: Will append rather than replace if already exist
293
+ tags_value = model_card.get(model_card_key, None)
294
+ if tags_value is None:
295
+ return
296
+
297
+ current_value = getattr(metadata, metadata_key, None)
298
+ if current_value is None:
299
+ current_value = []
300
+
301
+ if isinstance(tags_value, str):
302
+ current_value.append(tags_value)
303
+ elif isinstance(tags_value, list):
304
+ current_value.extend(tags_value)
305
+
306
+ setattr(metadata, metadata_key, current_value)
307
+
308
+ # LLAMA.cpp's direct internal convention
309
+ # (Definitely not part of hugging face formal/informal standard)
310
+ #########################################
311
+ use_model_card_metadata("name", "name")
312
+ use_model_card_metadata("author", "author")
313
+ use_model_card_metadata("version", "version")
314
+ use_model_card_metadata("organization", "organization")
315
+ use_model_card_metadata("description", "description")
316
+ use_model_card_metadata("finetune", "finetune")
317
+ use_model_card_metadata("basename", "basename")
318
+ use_model_card_metadata("size_label", "size_label")
319
+ use_model_card_metadata("source_url", "url")
320
+ use_model_card_metadata("source_doi", "doi")
321
+ use_model_card_metadata("source_uuid", "uuid")
322
+ use_model_card_metadata("source_repo_url", "repo_url")
323
+
324
+ # LLAMA.cpp's huggingface style convention
325
+ # (Definitely not part of hugging face formal/informal standard... but with model_ appended to match their style)
326
+ ###########################################
327
+ use_model_card_metadata("name", "model_name")
328
+ use_model_card_metadata("author", "model_author")
329
+ use_model_card_metadata("version", "model_version")
330
+ use_model_card_metadata("organization", "model_organization")
331
+ use_model_card_metadata("description", "model_description")
332
+ use_model_card_metadata("finetune", "model_finetune")
333
+ use_model_card_metadata("basename", "model_basename")
334
+ use_model_card_metadata("size_label", "model_size_label")
335
+ use_model_card_metadata("source_url", "model_url")
336
+ use_model_card_metadata("source_doi", "model_doi")
337
+ use_model_card_metadata("source_uuid", "model_uuid")
338
+ use_model_card_metadata("source_repo_url", "model_repo_url")
339
+
340
+ # Hugging Face Direct Convention
341
+ #################################
342
+
343
+ # Not part of huggingface model card standard but notice some model creator using it
344
+ # such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
345
+ use_model_card_metadata("name", "model_name")
346
+ use_model_card_metadata("author", "model_creator")
347
+ use_model_card_metadata("basename", "model_type")
348
+
349
+ if "base_model" in model_card:
350
+ # This represents the parent models that this is based on
351
+ # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
352
+ # Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
353
+ metadata_base_models = []
354
+ base_model_value = model_card.get("base_model", None)
355
+
356
+ if base_model_value is not None:
357
+ if isinstance(base_model_value, str):
358
+ metadata_base_models.append(base_model_value)
359
+ elif isinstance(base_model_value, list):
360
+ metadata_base_models.extend(base_model_value)
361
+
362
+ if metadata.base_models is None:
363
+ metadata.base_models = []
364
+
365
+ for model_id in metadata_base_models:
366
+ # NOTE: model size of base model is assumed to be similar to the size of the current model
367
+ model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
368
+ base_model = {}
369
+ if model_full_name_component is not None:
370
+ base_model["name"] = Metadata.id_to_title(model_full_name_component)
371
+ if org_component is not None:
372
+ base_model["organization"] = Metadata.id_to_title(org_component)
373
+ if version is not None:
374
+ base_model["version"] = version
375
+ if org_component is not None and model_full_name_component is not None:
376
+ base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
377
+ metadata.base_models.append(base_model)
378
+
379
+ use_model_card_metadata("license", "license")
380
+ use_model_card_metadata("license_name", "license_name")
381
+ use_model_card_metadata("license_link", "license_link")
382
+
383
+ use_array_model_card_metadata("tags", "tags")
384
+ use_array_model_card_metadata("tags", "pipeline_tag")
385
+
386
+ use_array_model_card_metadata("languages", "languages")
387
+ use_array_model_card_metadata("languages", "language")
388
+
389
+ use_array_model_card_metadata("datasets", "datasets")
390
+ use_array_model_card_metadata("datasets", "dataset")
391
+
392
+ # Hugging Face Parameter Heuristics
393
+ ####################################
394
+
395
+ if hf_params is not None:
396
+
397
+ hf_name_or_path = hf_params.get("_name_or_path")
398
+ if hf_name_or_path is not None and hf_name_or_path.count('/') <= 1:
399
+ # Use _name_or_path only if its actually a model name and not some computer path
400
+ # e.g. 'meta-llama/Llama-2-7b-hf'
401
+ model_id = hf_name_or_path
402
+ model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
403
+ if metadata.name is None and model_full_name_component is not None:
404
+ metadata.name = Metadata.id_to_title(model_full_name_component)
405
+ if metadata.organization is None and org_component is not None:
406
+ metadata.organization = Metadata.id_to_title(org_component)
407
+ if metadata.basename is None and basename is not None:
408
+ metadata.basename = basename
409
+ if metadata.finetune is None and finetune is not None:
410
+ metadata.finetune = finetune
411
+ if metadata.version is None and version is not None:
412
+ metadata.version = version
413
+ if metadata.size_label is None and size_label is not None:
414
+ metadata.size_label = size_label
415
+
416
+ # Directory Folder Name Fallback Heuristics
417
+ ############################################
418
+ if model_path is not None:
419
+ model_id = model_path.name
420
+ model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
421
+ if metadata.name is None and model_full_name_component is not None:
422
+ metadata.name = Metadata.id_to_title(model_full_name_component)
423
+ if metadata.organization is None and org_component is not None:
424
+ metadata.organization = Metadata.id_to_title(org_component)
425
+ if metadata.basename is None and basename is not None:
426
+ metadata.basename = basename
427
+ if metadata.finetune is None and finetune is not None:
428
+ metadata.finetune = finetune
429
+ if metadata.version is None and version is not None:
430
+ metadata.version = version
431
+ if metadata.size_label is None and size_label is not None:
432
+ metadata.size_label = size_label
433
+
434
+ return metadata
435
+
436
+ def set_gguf_meta_model(self, gguf_writer: gguf.GGUFWriter):
437
+ assert self.name is not None
438
+ gguf_writer.add_name(self.name)
439
+
440
+ if self.author is not None:
441
+ gguf_writer.add_author(self.author)
442
+ if self.version is not None:
443
+ gguf_writer.add_version(self.version)
444
+ if self.organization is not None:
445
+ gguf_writer.add_organization(self.organization)
446
+
447
+ if self.finetune is not None:
448
+ gguf_writer.add_finetune(self.finetune)
449
+ if self.basename is not None:
450
+ gguf_writer.add_basename(self.basename)
451
+
452
+ if self.description is not None:
453
+ gguf_writer.add_description(self.description)
454
+ if self.quantized_by is not None:
455
+ gguf_writer.add_quantized_by(self.quantized_by)
456
+
457
+ if self.size_label is not None:
458
+ gguf_writer.add_size_label(self.size_label)
459
+
460
+ if self.license is not None:
461
+ gguf_writer.add_license(self.license)
462
+ if self.license_name is not None:
463
+ gguf_writer.add_license_name(self.license_name)
464
+ if self.license_link is not None:
465
+ gguf_writer.add_license_link(self.license_link)
466
+
467
+ if self.url is not None:
468
+ gguf_writer.add_url(self.url)
469
+ if self.doi is not None:
470
+ gguf_writer.add_doi(self.doi)
471
+ if self.uuid is not None:
472
+ gguf_writer.add_uuid(self.uuid)
473
+ if self.repo_url is not None:
474
+ gguf_writer.add_repo_url(self.repo_url)
475
+
476
+ if self.source_url is not None:
477
+ gguf_writer.add_source_url(self.source_url)
478
+ if self.source_doi is not None:
479
+ gguf_writer.add_source_doi(self.source_doi)
480
+ if self.source_uuid is not None:
481
+ gguf_writer.add_source_uuid(self.source_uuid)
482
+ if self.source_repo_url is not None:
483
+ gguf_writer.add_source_repo_url(self.source_repo_url)
484
+
485
+ if self.base_models is not None:
486
+ gguf_writer.add_base_model_count(len(self.base_models))
487
+ for key, base_model_entry in enumerate(self.base_models):
488
+ if "name" in base_model_entry:
489
+ gguf_writer.add_base_model_name(key, base_model_entry["name"])
490
+ if "author" in base_model_entry:
491
+ gguf_writer.add_base_model_author(key, base_model_entry["author"])
492
+ if "version" in base_model_entry:
493
+ gguf_writer.add_base_model_version(key, base_model_entry["version"])
494
+ if "organization" in base_model_entry:
495
+ gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
496
+ if "url" in base_model_entry:
497
+ gguf_writer.add_base_model_url(key, base_model_entry["url"])
498
+ if "doi" in base_model_entry:
499
+ gguf_writer.add_base_model_doi(key, base_model_entry["doi"])
500
+ if "uuid" in base_model_entry:
501
+ gguf_writer.add_base_model_uuid(key, base_model_entry["uuid"])
502
+ if "repo_url" in base_model_entry:
503
+ gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
504
+
505
+ if self.tags is not None:
506
+ gguf_writer.add_tags(self.tags)
507
+ if self.languages is not None:
508
+ gguf_writer.add_languages(self.languages)
509
+ if self.datasets is not None:
510
+ gguf_writer.add_datasets(self.datasets)
.venv/lib/python3.11/site-packages/gguf/py.typed ADDED
File without changes
.venv/lib/python3.11/site-packages/gguf/quants.py ADDED
@@ -0,0 +1,1188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any, Callable, Sequence
4
+ from math import log2, ceil
5
+
6
+ from numpy.typing import DTypeLike
7
+
8
+ from .constants import GGML_QUANT_SIZES, GGMLQuantizationType, QK_K
9
+ from .lazy import LazyNumpyTensor
10
+
11
+ import numpy as np
12
+
13
+
14
+ def quant_shape_to_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType) -> tuple[int, ...]:
15
+ block_size, type_size = GGML_QUANT_SIZES[quant_type]
16
+ if shape[-1] % block_size != 0:
17
+ raise ValueError(f"Quantized tensor row size ({shape[-1]}) is not a multiple of {quant_type.name} block size ({block_size})")
18
+ return (*shape[:-1], shape[-1] // block_size * type_size)
19
+
20
+
21
+ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizationType) -> tuple[int, ...]:
22
+ block_size, type_size = GGML_QUANT_SIZES[quant_type]
23
+ if shape[-1] % type_size != 0:
24
+ raise ValueError(f"Quantized tensor bytes per row ({shape[-1]}) is not a multiple of {quant_type.name} type size ({type_size})")
25
+ return (*shape[:-1], shape[-1] // type_size * block_size)
26
+
27
+
28
+ # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
29
+ def _apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
30
+ rows = arr.reshape((-1, arr.shape[-1]))
31
+ osize = 1
32
+ for dim in oshape:
33
+ osize *= dim
34
+ out = np.empty(shape=osize, dtype=otype)
35
+ # compute over groups of 16 rows (arbitrary, but seems good for performance)
36
+ n_groups = (rows.shape[0] // 16) or 1
37
+ np.concatenate([func(group).ravel() for group in np.array_split(rows, n_groups)], axis=0, out=out)
38
+ return out.reshape(oshape)
39
+
40
+
41
+ # round away from zero
42
+ # ref: https://stackoverflow.com/a/59143326/22827863
43
+ def np_roundf(n: np.ndarray) -> np.ndarray:
44
+ a = abs(n)
45
+ floored = np.floor(a)
46
+ b = floored + np.floor(2 * (a - floored))
47
+ return np.sign(n) * b
48
+
49
+
50
+ class QuantError(Exception): ...
51
+
52
+
53
+ _type_traits: dict[GGMLQuantizationType, type[__Quant]] = {}
54
+
55
+
56
+ def quantize(data: np.ndarray, qtype: GGMLQuantizationType) -> np.ndarray:
57
+ if qtype == GGMLQuantizationType.F32:
58
+ return data.astype(np.float32, copy=False)
59
+ elif qtype == GGMLQuantizationType.F16:
60
+ return data.astype(np.float16, copy=False)
61
+ elif (q := _type_traits.get(qtype)) is not None:
62
+ return q.quantize(data)
63
+ else:
64
+ raise NotImplementedError(f"Quantization for {qtype.name} is not yet implemented")
65
+
66
+
67
+ def dequantize(data: np.ndarray, qtype: GGMLQuantizationType) -> np.ndarray:
68
+ if qtype == GGMLQuantizationType.F32:
69
+ return data.view(np.float32)
70
+ elif qtype == GGMLQuantizationType.F16:
71
+ return data.view(np.float16).astype(np.float32)
72
+ elif (q := _type_traits.get(qtype)) is not None:
73
+ return q.dequantize(data)
74
+ else:
75
+ raise NotImplementedError(f"Dequantization for {qtype.name} is not yet implemented")
76
+
77
+
78
+ class __Quant(ABC):
79
+ qtype: GGMLQuantizationType
80
+ block_size: int
81
+ type_size: int
82
+
83
+ grid: np.ndarray[Any, np.dtype[np.float32]] | None = None
84
+ grid_shape: tuple[int, int] = (0, 0)
85
+ grid_map: tuple[int | float, ...] = ()
86
+ grid_hex: bytes | None = None
87
+
88
+ def __init__(self):
89
+ return TypeError("Quant conversion classes can't have instances")
90
+
91
+ def __init_subclass__(cls, qtype: GGMLQuantizationType) -> None:
92
+ cls.qtype = qtype
93
+ cls.block_size, cls.type_size = GGML_QUANT_SIZES[qtype]
94
+ cls.__quantize_lazy = LazyNumpyTensor._wrap_fn(
95
+ cls.__quantize_array,
96
+ meta_noop=(np.uint8, cls.__shape_to_bytes)
97
+ )
98
+ cls.__dequantize_lazy = LazyNumpyTensor._wrap_fn(
99
+ cls.__dequantize_array,
100
+ meta_noop=(np.float32, cls.__shape_from_bytes)
101
+ )
102
+ assert qtype not in _type_traits
103
+ _type_traits[qtype] = cls
104
+
105
+ @classmethod
106
+ def init_grid(cls):
107
+ if cls.grid is not None or cls.grid_hex is None:
108
+ return
109
+
110
+ bits_per_elem = ceil(log2(len(cls.grid_map)))
111
+ assert bits_per_elem != 0, cls.qtype.name
112
+ elems_per_byte = 8 // bits_per_elem
113
+
114
+ grid = np.frombuffer(cls.grid_hex, dtype=np.uint8)
115
+ # decode hexadecimal chars from grid
116
+ grid = grid.reshape((-1, 2))
117
+ grid = (np.where(grid > 0x40, grid + 9, grid) & 0x0F) << np.array([4, 0], dtype=np.uint8).reshape((1, 2))
118
+ grid = grid[..., 0] | grid[..., 1]
119
+ # unpack the grid values
120
+ grid = grid.reshape((-1, 1)) >> np.array([i for i in range(0, 8, 8 // elems_per_byte)], dtype=np.uint8).reshape((1, elems_per_byte))
121
+ grid = (grid & ((1 << bits_per_elem) - 1)).reshape((-1, 1))
122
+ grid_map = np.array(cls.grid_map, dtype=np.float32).reshape((1, -1))
123
+ grid = np.take_along_axis(grid_map, grid, axis=-1)
124
+ cls.grid = grid.reshape((1, 1, *cls.grid_shape))
125
+
126
+ @classmethod
127
+ @abstractmethod
128
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
129
+ raise NotImplementedError
130
+
131
+ @classmethod
132
+ @abstractmethod
133
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
134
+ raise NotImplementedError
135
+
136
+ @classmethod
137
+ def quantize_rows(cls, rows: np.ndarray) -> np.ndarray:
138
+ rows = rows.astype(np.float32, copy=False)
139
+ shape = rows.shape
140
+ n_blocks = rows.size // cls.block_size
141
+ blocks = rows.reshape((n_blocks, cls.block_size))
142
+ blocks = cls.quantize_blocks(blocks)
143
+ assert blocks.dtype == np.uint8
144
+ assert blocks.shape[-1] == cls.type_size
145
+ return blocks.reshape(cls.__shape_to_bytes(shape))
146
+
147
+ @classmethod
148
+ def dequantize_rows(cls, rows: np.ndarray) -> np.ndarray:
149
+ rows = rows.view(np.uint8)
150
+ shape = rows.shape
151
+ n_blocks = rows.size // cls.type_size
152
+ blocks = rows.reshape((n_blocks, cls.type_size))
153
+ blocks = cls.dequantize_blocks(blocks)
154
+ assert blocks.dtype == np.float32
155
+ assert blocks.shape[-1] == cls.block_size
156
+ return blocks.reshape(cls.__shape_from_bytes(shape))
157
+
158
+ @classmethod
159
+ def __shape_to_bytes(cls, shape: Sequence[int]):
160
+ return quant_shape_to_byte_shape(shape, cls.qtype)
161
+
162
+ @classmethod
163
+ def __shape_from_bytes(cls, shape: Sequence[int]):
164
+ return quant_shape_from_byte_shape(shape, cls.qtype)
165
+
166
+ @classmethod
167
+ def __quantize_array(cls, array: np.ndarray) -> np.ndarray:
168
+ return _apply_over_grouped_rows(cls.quantize_rows, arr=array, otype=np.uint8, oshape=cls.__shape_to_bytes(array.shape))
169
+
170
+ @classmethod
171
+ def __dequantize_array(cls, array: np.ndarray) -> np.ndarray:
172
+ cls.init_grid()
173
+ return _apply_over_grouped_rows(cls.dequantize_rows, arr=array, otype=np.float32, oshape=cls.__shape_from_bytes(array.shape))
174
+
175
+ @classmethod
176
+ def __quantize_lazy(cls, lazy_tensor: LazyNumpyTensor, /) -> Any:
177
+ pass
178
+
179
+ @classmethod
180
+ def __dequantize_lazy(cls, lazy_tensor: LazyNumpyTensor, /) -> Any:
181
+ pass
182
+
183
+ @classmethod
184
+ def can_quantize(cls, tensor: np.ndarray | LazyNumpyTensor) -> bool:
185
+ return tensor.shape[-1] % cls.block_size == 0
186
+
187
+ @classmethod
188
+ def quantize(cls, tensor: np.ndarray | LazyNumpyTensor) -> np.ndarray:
189
+ if not cls.can_quantize(tensor):
190
+ raise QuantError(f"Can't quantize tensor with shape {tensor.shape} to {cls.qtype.name}")
191
+ if isinstance(tensor, LazyNumpyTensor):
192
+ return cls.__quantize_lazy(tensor)
193
+ else:
194
+ return cls.__quantize_array(tensor)
195
+
196
+ @classmethod
197
+ def dequantize(cls, tensor: np.ndarray | LazyNumpyTensor) -> np.ndarray:
198
+ if isinstance(tensor, LazyNumpyTensor):
199
+ return cls.__dequantize_lazy(tensor)
200
+ else:
201
+ return cls.__dequantize_array(tensor)
202
+
203
+
204
+ class BF16(__Quant, qtype=GGMLQuantizationType.BF16):
205
+ @classmethod
206
+ # same as ggml_compute_fp32_to_bf16 in ggml-impl.h
207
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
208
+ n = blocks.view(np.uint32)
209
+ # force nan to quiet
210
+ n = np.where((n & 0x7fffffff) > 0x7f800000, (n & np.uint32(0xffff0000)) | np.uint32(64 << 16), n)
211
+ # round to nearest even
212
+ n = (np.uint64(n) + (0x7fff + ((n >> 16) & 1))) >> 16
213
+ return n.astype(np.uint16).view(np.uint8)
214
+
215
+ @classmethod
216
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
217
+ return (blocks.view(np.int16).astype(np.int32) << 16).view(np.float32)
218
+
219
+
220
+ class Q4_0(__Quant, qtype=GGMLQuantizationType.Q4_0):
221
+ @classmethod
222
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
223
+ n_blocks = blocks.shape[0]
224
+
225
+ imax = abs(blocks).argmax(axis=-1, keepdims=True)
226
+ max = np.take_along_axis(blocks, imax, axis=-1)
227
+
228
+ d = max / -8
229
+ with np.errstate(divide="ignore"):
230
+ id = np.where(d == 0, 0, 1 / d)
231
+ # FIXME: Q4_0's reference rounding is cursed and depends on FMA
232
+ qs = np.trunc((np.float64(blocks) * np.float64(id)) + np.float64(8.5), dtype=np.float32).astype(np.uint8).clip(0, 15)
233
+
234
+ qs = qs.reshape((n_blocks, 2, cls.block_size // 2))
235
+ qs = qs[..., 0, :] | (qs[..., 1, :] << np.uint8(4))
236
+
237
+ d = d.astype(np.float16).view(np.uint8)
238
+
239
+ return np.concatenate([d, qs], axis=-1)
240
+
241
+ @classmethod
242
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
243
+ n_blocks = blocks.shape[0]
244
+
245
+ d, qs = np.hsplit(blocks, [2])
246
+
247
+ d = d.view(np.float16).astype(np.float32)
248
+
249
+ qs = qs.reshape((n_blocks, -1, 1, cls.block_size // 2)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
250
+ qs = (qs & np.uint8(0x0F)).reshape((n_blocks, -1)).astype(np.int8) - np.int8(8)
251
+
252
+ return (d * qs.astype(np.float32))
253
+
254
+
255
+ class Q4_1(__Quant, qtype=GGMLQuantizationType.Q4_1):
256
+ @classmethod
257
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
258
+ n_blocks = blocks.shape[0]
259
+
260
+ max = blocks.max(axis=-1, keepdims=True)
261
+ min = blocks.min(axis=-1, keepdims=True)
262
+
263
+ d = (max - min) / 15
264
+ with np.errstate(divide="ignore"):
265
+ id = np.where(d == 0, 0, 1 / d)
266
+ qs = np.trunc((blocks - min) * id + np.float32(0.5), dtype=np.float32).astype(np.uint8).clip(0, 15)
267
+
268
+ qs = qs.reshape((n_blocks, 2, cls.block_size // 2))
269
+ qs = qs[..., 0, :] | (qs[..., 1, :] << np.uint8(4))
270
+
271
+ d = d.astype(np.float16).view(np.uint8)
272
+ m = min.astype(np.float16).view(np.uint8)
273
+
274
+ return np.concatenate([d, m, qs], axis=-1)
275
+
276
+ @classmethod
277
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
278
+ n_blocks = blocks.shape[0]
279
+
280
+ d, rest = np.hsplit(blocks, [2])
281
+ m, qs = np.hsplit(rest, [2])
282
+
283
+ d = d.view(np.float16).astype(np.float32)
284
+ m = m.view(np.float16).astype(np.float32)
285
+
286
+ qs = qs.reshape((n_blocks, -1, 1, cls.block_size // 2)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
287
+ qs = (qs & np.uint8(0x0F)).reshape((n_blocks, -1)).astype(np.float32)
288
+
289
+ return (d * qs) + m
290
+
291
+
292
+ class Q5_0(__Quant, qtype=GGMLQuantizationType.Q5_0):
293
+ @classmethod
294
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
295
+ n_blocks = blocks.shape[0]
296
+
297
+ imax = abs(blocks).argmax(axis=-1, keepdims=True)
298
+ max = np.take_along_axis(blocks, imax, axis=-1)
299
+
300
+ d = max / -16
301
+ with np.errstate(divide="ignore"):
302
+ id = np.where(d == 0, 0, 1 / d)
303
+ # FIXME: Q5_0's reference rounding is cursed and depends on FMA
304
+ q = np.trunc((np.float64(blocks) * np.float64(id)) + np.float64(16.5), dtype=np.float32).astype(np.uint8).clip(0, 31)
305
+
306
+ qs = q.reshape((n_blocks, 2, cls.block_size // 2))
307
+ qs = (qs[..., 0, :] & np.uint8(0x0F)) | (qs[..., 1, :] << np.uint8(4))
308
+
309
+ qh = np.packbits(q.reshape((n_blocks, 1, 32)) >> np.uint8(4), axis=-1, bitorder="little").reshape(n_blocks, 4)
310
+
311
+ d = d.astype(np.float16).view(np.uint8)
312
+
313
+ return np.concatenate([d, qh, qs], axis=-1)
314
+
315
+ @classmethod
316
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
317
+ n_blocks = blocks.shape[0]
318
+
319
+ d, rest = np.hsplit(blocks, [2])
320
+ qh, qs = np.hsplit(rest, [4])
321
+
322
+ d = d.view(np.float16).astype(np.float32)
323
+ qh = qh.view(np.uint32)
324
+
325
+ qh = qh.reshape((n_blocks, 1)) >> np.array([i for i in range(32)], dtype=np.uint32).reshape((1, 32))
326
+ ql = qs.reshape((n_blocks, -1, 1, cls.block_size // 2)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
327
+ qh = (qh & np.uint32(0x01)).astype(np.uint8)
328
+ ql = (ql & np.uint8(0x0F)).reshape((n_blocks, -1))
329
+
330
+ qs = (ql | (qh << np.uint8(4))).astype(np.int8) - np.int8(16)
331
+
332
+ return (d * qs.astype(np.float32))
333
+
334
+
335
+ class Q5_1(__Quant, qtype=GGMLQuantizationType.Q5_1):
336
+ @classmethod
337
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
338
+ n_blocks = blocks.shape[0]
339
+
340
+ max = blocks.max(axis=-1, keepdims=True)
341
+ min = blocks.min(axis=-1, keepdims=True)
342
+
343
+ d = (max - min) / 31
344
+ with np.errstate(divide="ignore"):
345
+ id = np.where(d == 0, 0, 1 / d)
346
+ q = np.trunc((blocks - min) * id + np.float32(0.5), dtype=np.float32).astype(np.uint8).clip(0, 31)
347
+
348
+ qs = q.reshape((n_blocks, 2, cls.block_size // 2))
349
+ qs = (qs[..., 0, :] & np.uint8(0x0F)) | (qs[..., 1, :] << np.uint8(4))
350
+
351
+ qh = np.packbits(q.reshape((n_blocks, 1, 32)) >> np.uint8(4), axis=-1, bitorder="little").reshape(n_blocks, 4)
352
+
353
+ d = d.astype(np.float16).view(np.uint8)
354
+ m = min.astype(np.float16).view(np.uint8)
355
+
356
+ return np.concatenate([d, m, qh, qs], axis=-1)
357
+
358
+ @classmethod
359
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
360
+ n_blocks = blocks.shape[0]
361
+
362
+ d, rest = np.hsplit(blocks, [2])
363
+ m, rest = np.hsplit(rest, [2])
364
+ qh, qs = np.hsplit(rest, [4])
365
+
366
+ d = d.view(np.float16).astype(np.float32)
367
+ m = m.view(np.float16).astype(np.float32)
368
+ qh = qh.view(np.uint32)
369
+
370
+ qh = qh.reshape((n_blocks, 1)) >> np.array([i for i in range(32)], dtype=np.uint32).reshape((1, 32))
371
+ ql = qs.reshape((n_blocks, -1, 1, cls.block_size // 2)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
372
+ qh = (qh & np.uint32(0x01)).astype(np.uint8)
373
+ ql = (ql & np.uint8(0x0F)).reshape((n_blocks, -1))
374
+
375
+ qs = (ql | (qh << np.uint8(4))).astype(np.float32)
376
+
377
+ return (d * qs) + m
378
+
379
+
380
+ class Q8_0(__Quant, qtype=GGMLQuantizationType.Q8_0):
381
+ @classmethod
382
+ # Implementation of Q8_0 with bit-exact same results as reference implementation in ggml-quants.c
383
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
384
+
385
+ d = abs(blocks).max(axis=1, keepdims=True) / 127
386
+ with np.errstate(divide="ignore"):
387
+ id = np.where(d == 0, 0, 1 / d)
388
+ qs = np_roundf(blocks * id)
389
+
390
+ # (n_blocks, 2)
391
+ d = d.astype(np.float16).view(np.uint8)
392
+ # (n_blocks, block_size)
393
+ qs = qs.astype(np.int8).view(np.uint8)
394
+
395
+ return np.concatenate([d, qs], axis=1)
396
+
397
+ @classmethod
398
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
399
+ d, x = np.split(blocks, [2], axis=1)
400
+ d = d.view(np.float16).astype(np.float32)
401
+ x = x.view(np.int8).astype(np.float32)
402
+
403
+ return (x * d)
404
+
405
+
406
+ class Q2_K(__Quant, qtype=GGMLQuantizationType.Q2_K):
407
+ @classmethod
408
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
409
+ n_blocks = blocks.shape[0]
410
+
411
+ scales, rest = np.hsplit(blocks, [QK_K // 16])
412
+ qs, rest = np.hsplit(rest, [QK_K // 4])
413
+ d, dmin = np.hsplit(rest, [2])
414
+
415
+ d = d.view(np.float16).astype(np.float32)
416
+ dmin = dmin.view(np.float16).astype(np.float32)
417
+
418
+ # (n_blocks, 16, 1)
419
+ dl = (d * (scales & 0xF).astype(np.float32)).reshape((n_blocks, QK_K // 16, 1))
420
+ ml = (dmin * (scales >> 4).astype(np.float32)).reshape((n_blocks, QK_K // 16, 1))
421
+
422
+ shift = np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1))
423
+
424
+ qs = (qs.reshape((n_blocks, -1, 1, 32)) >> shift) & np.uint8(3)
425
+
426
+ qs = qs.reshape((n_blocks, QK_K // 16, 16)).astype(np.float32)
427
+
428
+ qs = dl * qs - ml
429
+
430
+ return qs.reshape((n_blocks, -1))
431
+
432
+
433
+ class Q3_K(__Quant, qtype=GGMLQuantizationType.Q3_K):
434
+ @classmethod
435
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
436
+ n_blocks = blocks.shape[0]
437
+
438
+ hmask, rest = np.hsplit(blocks, [QK_K // 8])
439
+ qs, rest = np.hsplit(rest, [QK_K // 4])
440
+ scales, d = np.hsplit(rest, [12])
441
+
442
+ d = d.view(np.float16).astype(np.float32)
443
+
444
+ # The scales are packed at 6-bit each in this pattern:
445
+ # 0: IIIIAAAA
446
+ # 1: JJJJBBBB
447
+ # 2: KKKKCCCC
448
+ # 3: LLLLDDDD
449
+ # 4: MMMMEEEE
450
+ # 5: NNNNFFFF
451
+ # 6: OOOOGGGG
452
+ # 7: PPPPHHHH
453
+ # 8: MMIIEEAA
454
+ # 9: NNJJFFBB
455
+ # 10: OOKKGGCC
456
+ # 11: PPLLHHDD
457
+ lscales, hscales = np.hsplit(scales, [8])
458
+ lscales = lscales.reshape((n_blocks, 1, 8)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 2, 1))
459
+ lscales = lscales.reshape((n_blocks, 16))
460
+ hscales = hscales.reshape((n_blocks, 1, 4)) >> np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 4, 1))
461
+ hscales = hscales.reshape((n_blocks, 16))
462
+ scales = (lscales & np.uint8(0x0F)) | ((hscales & np.uint8(0x03)) << np.uint8(4))
463
+ scales = (scales.astype(np.int8) - np.int8(32)).astype(np.float32)
464
+
465
+ dl = (d * scales).reshape((n_blocks, 16, 1))
466
+
467
+ ql = qs.reshape((n_blocks, -1, 1, 32)) >> np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1))
468
+ qh = hmask.reshape(n_blocks, -1, 1, 32) >> np.array([i for i in range(8)], dtype=np.uint8).reshape((1, 1, 8, 1))
469
+ ql = ql.reshape((n_blocks, 16, QK_K // 16)) & np.uint8(3)
470
+ qh = (qh.reshape((n_blocks, 16, QK_K // 16)) & np.uint8(1))
471
+ qh = qh ^ np.uint8(1) # strangely, the offset is zero when the bitmask is 1
472
+ q = (ql.astype(np.int8) - (qh << np.uint8(2)).astype(np.int8)).astype(np.float32)
473
+
474
+ return (dl * q).reshape((n_blocks, QK_K))
475
+
476
+
477
+ class Q4_K(__Quant, qtype=GGMLQuantizationType.Q4_K):
478
+ K_SCALE_SIZE = 12
479
+
480
+ @staticmethod
481
+ def get_scale_min(scales: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
482
+ n_blocks = scales.shape[0]
483
+ scales = scales.view(np.uint8)
484
+ ### Unpacking the following: ###
485
+ # 0 EEAAAAAA
486
+ # 1 FFBBBBBB
487
+ # 2 GGCCCCCC
488
+ # 3 HHDDDDDD
489
+ # 4 eeaaaaaa
490
+ # 5 ffbbbbbb
491
+ # 6 ggcccccc
492
+ # 7 hhdddddd
493
+ # 8 eeeeEEEE
494
+ # 9 ffffFFFF
495
+ # 10 ggggGGGG
496
+ # 11 hhhhHHHH
497
+ scales = scales.reshape((n_blocks, 3, 4))
498
+ d, m, m_d = np.split(scales, 3, axis=-2)
499
+
500
+ sc = np.concatenate([d & 0x3F, (m_d & 0x0F) | ((d >> 2) & 0x30)], axis=-1)
501
+ min = np.concatenate([m & 0x3F, (m_d >> 4) | ((m >> 2) & 0x30)], axis=-1)
502
+
503
+ return (sc.reshape((n_blocks, 8)), min.reshape((n_blocks, 8)))
504
+
505
+ @classmethod
506
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
507
+ n_blocks = blocks.shape[0]
508
+
509
+ d, rest = np.hsplit(blocks, [2])
510
+ dmin, rest = np.hsplit(rest, [2])
511
+ scales, qs = np.hsplit(rest, [cls.K_SCALE_SIZE])
512
+
513
+ d = d.view(np.float16).astype(np.float32)
514
+ dmin = dmin.view(np.float16).astype(np.float32)
515
+
516
+ sc, m = Q4_K.get_scale_min(scales)
517
+
518
+ d = (d * sc.astype(np.float32)).reshape((n_blocks, -1, 1))
519
+ dm = (dmin * m.astype(np.float32)).reshape((n_blocks, -1, 1))
520
+
521
+ qs = qs.reshape((n_blocks, -1, 1, 32)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
522
+ qs = (qs & np.uint8(0x0F)).reshape((n_blocks, -1, 32)).astype(np.float32)
523
+
524
+ return (d * qs - dm).reshape((n_blocks, QK_K))
525
+
526
+
527
+ class Q5_K(__Quant, qtype=GGMLQuantizationType.Q5_K):
528
+ @classmethod
529
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
530
+ n_blocks = blocks.shape[0]
531
+
532
+ d, rest = np.hsplit(blocks, [2])
533
+ dmin, rest = np.hsplit(rest, [2])
534
+ scales, rest = np.hsplit(rest, [Q4_K.K_SCALE_SIZE])
535
+ qh, qs = np.hsplit(rest, [QK_K // 8])
536
+
537
+ d = d.view(np.float16).astype(np.float32)
538
+ dmin = dmin.view(np.float16).astype(np.float32)
539
+
540
+ sc, m = Q4_K.get_scale_min(scales)
541
+
542
+ d = (d * sc.astype(np.float32)).reshape((n_blocks, -1, 1))
543
+ dm = (dmin * m.astype(np.float32)).reshape((n_blocks, -1, 1))
544
+
545
+ ql = qs.reshape((n_blocks, -1, 1, 32)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
546
+ qh = qh.reshape((n_blocks, -1, 1, 32)) >> np.array([i for i in range(8)], dtype=np.uint8).reshape((1, 1, 8, 1))
547
+ ql = (ql & np.uint8(0x0F)).reshape((n_blocks, -1, 32))
548
+ qh = (qh & np.uint8(0x01)).reshape((n_blocks, -1, 32))
549
+ q = (ql | (qh << np.uint8(4))).astype(np.float32)
550
+
551
+ return (d * q - dm).reshape((n_blocks, QK_K))
552
+
553
+
554
+ class Q6_K(__Quant, qtype=GGMLQuantizationType.Q6_K):
555
+ @classmethod
556
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
557
+ n_blocks = blocks.shape[0]
558
+
559
+ ql, rest = np.hsplit(blocks, [QK_K // 2])
560
+ qh, rest = np.hsplit(rest, [QK_K // 4])
561
+ scales, d = np.hsplit(rest, [QK_K // 16])
562
+
563
+ scales = scales.view(np.int8).astype(np.float32)
564
+ d = d.view(np.float16).astype(np.float32)
565
+ d = (d * scales).reshape((n_blocks, QK_K // 16, 1))
566
+
567
+ ql = ql.reshape((n_blocks, -1, 1, 64)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
568
+ ql = (ql & np.uint8(0x0F)).reshape((n_blocks, -1, 32))
569
+ qh = qh.reshape((n_blocks, -1, 1, 32)) >> np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1))
570
+ qh = (qh & np.uint8(0x03)).reshape((n_blocks, -1, 32))
571
+ q = (ql | (qh << np.uint8(4))).astype(np.int8) - np.int8(32)
572
+ q = q.reshape((n_blocks, QK_K // 16, -1)).astype(np.float32)
573
+
574
+ return (d * q).reshape((n_blocks, QK_K))
575
+
576
+
577
+ class IQ2_XXS(__Quant, qtype=GGMLQuantizationType.IQ2_XXS):
578
+ ksigns: bytes = (
579
+ b"\x00\x81\x82\x03\x84\x05\x06\x87\x88\x09\x0a\x8b\x0c\x8d\x8e\x0f"
580
+ b"\x90\x11\x12\x93\x14\x95\x96\x17\x18\x99\x9a\x1b\x9c\x1d\x1e\x9f"
581
+ b"\xa0\x21\x22\xa3\x24\xa5\xa6\x27\x28\xa9\xaa\x2b\xac\x2d\x2e\xaf"
582
+ b"\x30\xb1\xb2\x33\xb4\x35\x36\xb7\xb8\x39\x3a\xbb\x3c\xbd\xbe\x3f"
583
+ b"\xc0\x41\x42\xc3\x44\xc5\xc6\x47\x48\xc9\xca\x4b\xcc\x4d\x4e\xcf"
584
+ b"\x50\xd1\xd2\x53\xd4\x55\x56\xd7\xd8\x59\x5a\xdb\x5c\xdd\xde\x5f"
585
+ b"\x60\xe1\xe2\x63\xe4\x65\x66\xe7\xe8\x69\x6a\xeb\x6c\xed\xee\x6f"
586
+ b"\xf0\x71\x72\xf3\x74\xf5\xf6\x77\x78\xf9\xfa\x7b\xfc\x7d\x7e\xff"
587
+ )
588
+
589
+ # iq2xxs_grid, but with each byte of the original packed in 2 bits,
590
+ # by mapping 0x08 to 0, 0x19 to 1, and 0x2b to 2.
591
+ grid_shape = (256, 8)
592
+ grid_map = (0x08, 0x19, 0x2b)
593
+ grid_hex = (
594
+ b"00000200050008000a00110014002000220028002a0041004400500058006100"
595
+ b"6400800082008a00a20001010401100115014001840198010002020222028202"
596
+ b"010404041004210424044004420448046004810484049004a404000502050805"
597
+ b"200546056905800591050906100640068406a406000805080808140828084108"
598
+ b"440850085208880804094009020a140a01100410101021104010601084109010"
599
+ b"951000110811201150115a118011241245120014081420142514491480141815"
600
+ b"6215001616160118041810184018811800190519a019511a002002200a204420"
601
+ b"6120802082202921482100220222012404241024402456240025412564259026"
602
+ b"082820289428442a014004401040184021402440404048405640604081408440"
603
+ b"9040004120416141804185410142104248425642684200440844204480449944"
604
+ b"124524450046014804481048404845480049584961498249454a904a00500850"
605
+ b"1150195020508050885004514251a4519152905492540a550156545600581158"
606
+ b"195864584059085a046010604060686000615561186260620064056410651265"
607
+ b"84654268008002800a8041808280048118814081118201840484108415844084"
608
+ b"608400854685948509864086608602880489118a0490109024904090a1901691"
609
+ b"8091459200942294449451958198209902a050a085a009a100a218a450a804a9"
610
+ )
611
+
612
+ @classmethod
613
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
614
+ n_blocks = blocks.shape[0]
615
+
616
+ d, qs = np.hsplit(blocks, [2])
617
+
618
+ d = d.view(np.float16).astype(np.float32)
619
+
620
+ qs = qs.view(np.uint32).reshape(n_blocks, -1, 2)
621
+
622
+ db = d * (np.float32(0.5) + (qs[..., 1] >> 28).astype(np.float32)) * np.float32(0.25)
623
+ db = db.reshape((n_blocks, -1, 1, 1))
624
+
625
+ # get the sign indices and unpack the bits
626
+ signs = qs[..., 1].reshape((n_blocks, -1, 1)) >> np.array([0, 7, 14, 21], dtype=np.uint32).reshape((1, 1, 4))
627
+ ksigns = np.frombuffer(cls.ksigns, dtype=np.uint8).reshape((1, 1, 1, 128))
628
+ signs = (signs & np.uint32(0x7F)).reshape((n_blocks, -1, 4, 1))
629
+ signs = np.take_along_axis(ksigns, signs, axis=-1)
630
+ signs = signs.reshape((n_blocks, -1, 4, 1)) >> np.array([i for i in range(8)], dtype=np.uint8).reshape((1, 1, 1, 8))
631
+ signs = signs & np.uint8(0x01)
632
+ signs = np.where(signs == 0, np.float32(1), np.float32(-1))
633
+ signs = signs.reshape((n_blocks, -1, 4, 8))
634
+
635
+ assert cls.grid is not None
636
+ grid = np.take_along_axis(cls.grid, qs[..., 0].copy().view(np.uint8).reshape((n_blocks, -1, 1, 1)), axis=-2)
637
+ grid = grid.reshape((n_blocks, -1, 4, 8))
638
+
639
+ return (db * grid * signs).reshape((n_blocks, -1))
640
+
641
+
642
+ class IQ2_XS(__Quant, qtype=GGMLQuantizationType.IQ2_XS):
643
+ # iq2xs_grid, but with each byte of the original packed in 2 bits,
644
+ # by mapping 0x08 to 0, 0x19 to 1, and 0x2b to 2.
645
+ grid_shape = (512, 8)
646
+ grid_map = (0x08, 0x19, 0x2b)
647
+ grid_hex = (
648
+ b"00000200050008000a0011001400160019002000220025002800410044004600"
649
+ b"49005000520055005800610064008000820085008800910094009900a0000101"
650
+ b"04010601090110011201150118011a0121012401400142014501480151015401"
651
+ b"6001680181018401900100020202050208021102140220024102440250025502"
652
+ b"80028a0201040404060409041004120415041804210424044004420445044804"
653
+ b"5104540456046004810484049004000502050505080511051405200541054405"
654
+ b"500561058005010604061006260640064206840600080208050808080a081108"
655
+ b"14082008250841084408500858088008a008aa08010904091009400981098909"
656
+ b"000a200a280a960aa00a01100410061009101010121015101810211024104010"
657
+ b"4210451048105110541060106a10811084109010001102110511081111111411"
658
+ b"2011411144115011801194119611011204120612101240126012001402140514"
659
+ b"0814111414142014411444144914501464148014011504151015401500161416"
660
+ b"49160118041810181218401854188618001905196619511aa91a002002200520"
661
+ b"08200a201120142020204120442050208020a020012104211021402148216521"
662
+ b"002222228022a82201240424102429244024002541255225992501261a26a626"
663
+ b"002808280a28202855288828a22868299029082a202a822a882a8a2a01400440"
664
+ b"0640094010401240154018402140244040404240454048404a40514054406040"
665
+ b"6540814084409040004102410541084111411441204141414441504180418541"
666
+ b"a241014204421042124229424042004402440544084411441444194420444144"
667
+ b"4444504480449444014504451045244540459a4500460a464446504601480448"
668
+ b"1048404845485448624800491149444950496949044a00500250055008501150"
669
+ b"145020502850415044505050805001510451105115514051425100524452aa52"
670
+ b"0154045410542154405460548154a154005508558055885521566856a1560058"
671
+ b"14584158505899581a5940594259855a0160046010604060546062608660a960"
672
+ b"006124624a62926200641664106540654565a46501686a682569066a546a626a"
673
+ b"00800280058008801180148020802a8041804480508080808280a880aa800181"
674
+ b"0481068110814081518159810082208280828282a082a8820184048410841284"
675
+ b"158440846084898400854485a58518866a860088088825885a8880888288a888"
676
+ b"0689228a808a888a968aa88a0190049010904090569084900091229164915692"
677
+ b"89920094059444945094589429959095929541965198a6984999159a609a00a0"
678
+ b"02a008a00aa020a02aa0a0a051a159a1a6a100a202a208a22aa280a2a0a240a4"
679
+ b"95a465a698a60aa820a822a828a8a0a8a8a804a984a986a928aa2aaa91aaaaaa"
680
+ )
681
+
682
+ @classmethod
683
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
684
+ n_blocks = blocks.shape[0]
685
+
686
+ d, rest = np.hsplit(blocks, [2])
687
+ qs, scales = np.hsplit(rest, [2 * QK_K // 8])
688
+
689
+ d = d.view(np.float16).astype(np.float32)
690
+ qs = qs.view(np.uint16)
691
+
692
+ scales = scales.reshape((n_blocks, -1, 1)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2))
693
+ scales = (scales & 0x0F).reshape((n_blocks, -1))
694
+ db = d * (np.float32(0.5) + scales) * np.float32(0.25)
695
+ db = db.reshape((n_blocks, -1, 1, 1))
696
+
697
+ # get the sign indices and unpack the bits
698
+ signs = np.frombuffer(IQ2_XXS.ksigns, dtype=np.uint8).reshape(1, 1, 128)
699
+ signs = np.take_along_axis(signs, (qs >> 9).reshape((n_blocks, -1, 1)), axis=-1)
700
+ signs = signs.reshape((n_blocks, -1, 1)) >> np.array([i for i in range(8)], dtype=np.uint8).reshape((1, 1, 8))
701
+ signs = signs & np.uint8(0x01)
702
+ signs = np.where(signs == 0, np.float32(1), np.float32(-1))
703
+ signs = signs.reshape((n_blocks, -1, 2, 8))
704
+
705
+ assert cls.grid is not None
706
+ grid = np.take_along_axis(cls.grid, (qs & np.uint16(511)).reshape((n_blocks, -1, 1, 1)), axis=-2)
707
+ grid = grid.reshape((n_blocks, -1, 2, 8))
708
+
709
+ return (db * grid * signs).reshape((n_blocks, -1))
710
+
711
+
712
+ class IQ2_S(__Quant, qtype=GGMLQuantizationType.IQ2_S):
713
+ # iq2s_grid, but with each byte of the original packed in 2 bits,
714
+ # by mapping 0x08 to 0, 0x19 to 1, and 0x2b to 2.
715
+ grid_shape = (1024, 8)
716
+ grid_map = (0x08, 0x19, 0x2b)
717
+ grid_hex = (
718
+ b"00000200050008000a0011001400160019002000220025002800410044004600"
719
+ b"490050005200550058006100640066006900800082008500880091009400a000"
720
+ b"a500aa0001010401060109011001120115011801210124014001420145014801"
721
+ b"510154015601590160016501680181018401900192019501a101a40100020202"
722
+ b"050208021102140220022a02410244024602490250025502800285028a029402"
723
+ b"a202010404040604090410041204150418042104240426042904400442044504"
724
+ b"48044a0451045404560459046004620465048104840486048904900495049804"
725
+ b"a104a40400050205050508050a05110514051605190520052505280541054405"
726
+ b"46054905500552055505580561056405800582058505880591059405a0050106"
727
+ b"0406060609061006150640064506480651065406600681068406900600080208"
728
+ b"050808081108140816081908200825082a084108440846084908500852085508"
729
+ b"580861086408800885089408aa08010904091009120915091809210940094509"
730
+ b"480951095409600981099009000a110a140a220a280a2a0a500a990a01100410"
731
+ b"0610091010101210151018102110241026104010421045104810511054105610"
732
+ b"59106010621065106810811084108610901095109810a110a410001102110511"
733
+ b"08110a1111111411161119112011221125112811411144114611491150115211"
734
+ b"5511581161116411801182118511881191119411011204120912101215122112"
735
+ b"2412401245125112541281128412901200140214051408141114141416141914"
736
+ b"2014251428144114441446144914501452145514581461146414801482148514"
737
+ b"881491149414a014011504150615091510151215151518152115241540154215"
738
+ b"4515481551155415601581158415901500160516081611161416201641164416"
739
+ b"50168016aa160118041806180918101815181818211840184218451848185118"
740
+ b"541860188118841800190219051908191119141920194119441950196919a219"
741
+ b"041a101a401a561a00200220052008201120142016201920202025202a204120"
742
+ b"4420502052205520642080208a209420aa200121042110211221152121214021"
743
+ b"4221452151215421602181218421902100220a22222228222a22442250228822"
744
+ b"8a22a82201240424062409241024152418242124242440244224452448245124"
745
+ b"5424602481248424902400250525082511251425202541254425502566258025"
746
+ b"0126042610264026592600280528112814284128442850288a28aa2801290429"
747
+ b"102995290a2a222a642a882a8a2a014004400640094010401240154018401a40"
748
+ b"21402440264040404240454048404a4051405440564059406040624065408140"
749
+ b"8440904095409840a140a4400041024105410841114114411641194120412241"
750
+ b"2541414144414641494150415241554158416141644180418241854188419141"
751
+ b"9441a04101420442104212421542184224424042454248425142544260428142"
752
+ b"844200440244054408440a441144144416441944204422442544284441444444"
753
+ b"46444944504452445544584461446444804482448544884491449444a0440145"
754
+ b"0445064509451045124515451845214524454045424545454845514554456045"
755
+ b"6a4581458445904500460246054608461146144620464146444650468046a546"
756
+ b"0148044809481048124815481848214824484048424845484848514854486048"
757
+ b"84489048004902490549084911491449204941494449504980499649014a044a"
758
+ b"104a404a00500250055008501150145016501950205022502550285041504450"
759
+ b"4650495050505250555058506150645080508250855088509150945001510451"
760
+ b"0651095110511251155118512151245140514251455148515151545160518151"
761
+ b"8451905100520552085211521452205241524452505269528052015404540654"
762
+ b"0954105412541554185421542454405442544554485451545454605481548454"
763
+ b"9054005502550555085511551455205541554455505580550156045610562656"
764
+ b"405600580258055808581158145820584158445850585a588058015904591059"
765
+ b"4059005a195a855aa85a01600460066010601260156018602160246040604560"
766
+ b"4860516054606060846090600061026105610861116114612061416144615061"
767
+ b"806199610462106240625662a162006405640864116414642064416444645064"
768
+ b"806401650465106540654a656865926500669466016804681068656898680069"
769
+ b"2a69426aa16a0080028005800880118014801980208025804180448050805280"
770
+ b"5580588061808080858091809480018104810981108112811581188121812481"
771
+ b"408142814581488151815481818184819081a981008205820a82118214824182"
772
+ b"4482508201840484068409841084128415841884218440844284458448845184"
773
+ b"5484608481848484908400850285058508851185148520854185448550858085"
774
+ b"8a85018604861086298640860088058811881488418844885088a28801890489"
775
+ b"40896589228a588a5a8a828aa28a019004900990109012901590189024904090"
776
+ b"4290459048905190549060908190849090900091059111911491419144915091"
777
+ b"5a910192049210924092a6920094029405940894119414942094419444945094"
778
+ b"8094969401950495109540959895a19500964696649601980498109826984098"
779
+ b"a998009949995299909a00a005a00aa014a022a02aa041a044a050a0a2a0aaa0"
780
+ b"40a165a102a20aa222a228a22aa282a288a28aa2a8a201a404a410a440a489a4"
781
+ b"a4a400a519a551a60aa828a8a2a854a986a908aa0aaa20aa22aa28aa88aaaaaa"
782
+ )
783
+
784
+ @classmethod
785
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
786
+ n_blocks = blocks.shape[0]
787
+
788
+ d, rest = np.hsplit(blocks, [2])
789
+ qs, rest = np.hsplit(rest, [QK_K // 8])
790
+ signs, rest = np.hsplit(rest, [QK_K // 8])
791
+ qh, scales = np.hsplit(rest, [QK_K // 32])
792
+
793
+ d = d.view(np.float16).astype(np.float32)
794
+
795
+ scales = scales.reshape((n_blocks, -1, 1)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2))
796
+ scales = (scales & 0x0F).reshape((n_blocks, -1))
797
+ db = d * (np.float32(0.5) + scales) * np.float32(0.25)
798
+ db = db.reshape((n_blocks, -1, 1, 1))
799
+
800
+ # unpack the sign bits
801
+ signs = signs.reshape((n_blocks, -1, 1)) >> np.array([i for i in range(8)], dtype=np.uint8).reshape((1, 1, 8))
802
+ signs = signs & np.uint8(0x01)
803
+ signs = np.where(signs == 0, np.float32(1), np.float32(-1))
804
+ signs = signs.reshape((n_blocks, -1, 2, 8))
805
+
806
+ qh = qh.reshape((n_blocks, -1, 1)) >> np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4))
807
+ qs = qs.astype(np.uint16) | ((qh & 0x03).astype(np.uint16) << 8).reshape((n_blocks, -1))
808
+
809
+ assert cls.grid is not None
810
+ grid = np.take_along_axis(cls.grid, qs.reshape((n_blocks, -1, 1, 1)), axis=-2)
811
+ grid = grid.reshape((n_blocks, -1, 2, 8))
812
+
813
+ return (db * grid * signs).reshape((n_blocks, -1))
814
+
815
+
816
+ class IQ3_XXS(__Quant, qtype=GGMLQuantizationType.IQ3_XXS):
817
+ grid_shape = (256, 4)
818
+ grid_map = (0x04, 0x0c, 0x14, 0x1c, 0x24, 0x2c, 0x34, 0x3e)
819
+ grid_hex = (
820
+ b"0000020004001100130017002000220031004200730075000101030110011201"
821
+ b"2101250130013201410154017001000202020402110220022202310233023702"
822
+ b"5102570275020103070310031203250370031304370444045704730475040105"
823
+ b"0705320552053506640610071407160743076107011003101010121021102310"
824
+ b"3010321034104710501000110211111120112211011203121012121221123012"
825
+ b"7212001302132013311346136613011405145014201524154615711505162217"
826
+ b"4017002002201120132020202220262031204220012103210521102112212121"
827
+ b"3021632167217021002202221122172220222222372240225522012310231423"
828
+ b"7023742335245324032527254125742501270327162745270130103012302130"
829
+ b"2330503065307230003102312031313144314631013203321032253252327232"
830
+ b"1133333330344734723400350635223555351436363663363337603704401740"
831
+ b"3540374053405740744120423742404260426642074345430444514464442545"
832
+ b"4345704505471047124730471250415070500051065126515551145232527252"
833
+ b"0253535310542354275472540255315550562457425724604460466064602161"
834
+ b"6161176264623063366344640565526533660367216703700570077010703270"
835
+ b"5270267140711272457252720073157333736073217441740075027524753076"
836
+ )
837
+
838
+ @classmethod
839
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
840
+ n_blocks = blocks.shape[0]
841
+
842
+ d, rest = np.hsplit(blocks, [2])
843
+ qs, scales = np.hsplit(rest, [QK_K // 4])
844
+
845
+ d = d.view(np.float16).astype(np.float32)
846
+ scales = scales.view(np.uint32)
847
+
848
+ db = d * (np.float32(0.5) + (scales >> 28).astype(np.float32)) * np.float32(0.5)
849
+ db = db.reshape((n_blocks, -1, 1, 1))
850
+
851
+ # get the sign indices and unpack the bits
852
+ signs = scales.reshape((n_blocks, -1, 1)) >> np.array([0, 7, 14, 21], dtype=np.uint32).reshape((1, 1, 4))
853
+ ksigns = np.frombuffer(IQ2_XXS.ksigns, dtype=np.uint8).reshape((1, 1, 1, 128))
854
+ signs = (signs & np.uint32(0x7F)).reshape((n_blocks, -1, 4, 1))
855
+ signs = np.take_along_axis(ksigns, signs, axis=-1)
856
+ signs = signs.reshape((n_blocks, -1, 4, 1)) >> np.array([i for i in range(8)], dtype=np.uint8).reshape((1, 1, 1, 8))
857
+ signs = signs & np.uint8(0x01)
858
+ signs = np.where(signs == 0, np.float32(1), np.float32(-1))
859
+ signs = signs.reshape((n_blocks, -1, 4, 8))
860
+
861
+ assert cls.grid is not None
862
+ grid = np.take_along_axis(cls.grid, qs.reshape((n_blocks, -1, 1, 1)), axis=-2)
863
+ grid = grid.reshape((n_blocks, -1, 4, 8))
864
+
865
+ return (db * grid * signs).reshape((n_blocks, -1))
866
+
867
+
868
+ class IQ3_S(__Quant, qtype=GGMLQuantizationType.IQ3_S):
869
+ grid_shape = (512, 4)
870
+ grid_map = (0x01, 0x03, 0x05, 0x07, 0x09, 0x0b, 0x0d, 0x0f)
871
+ grid_hex = (
872
+ b"0000010002000500070010001100120014001600200021002500330040004200"
873
+ b"4500470051005300600062007100740077000001010102010401100111011501"
874
+ b"2001230127013101350144016101650172010002010205020702100213021602"
875
+ b"2102250230023402420245024702510253027002730203031103150320032203"
876
+ b"3103330336034403500352036703710375030004130417042104240432044004"
877
+ b"4304510470040205040520052205260533054105450547056605730506061106"
878
+ b"1306310652067106000702070407200722072607330750075407001001100210"
879
+ b"0410101011101310151017102010221031103410361054105610611072100011"
880
+ b"0111031106111011141121113011331141115011521170117611001212121512"
881
+ b"1712201224123212401243125512601272120113041307131013131321132713"
882
+ b"3013341341136213701303140514121414143114331442144614501454140115"
883
+ b"1015131521153015321551152016241627164416461601170317101712172117"
884
+ b"3517411762177017002001200320052007201020122014201620212023202720"
885
+ b"3020322041204320452050205220672070207320752000210221102113211721"
886
+ b"2221252131213421422151210122042207222122232230223722412253225722"
887
+ b"7122742200230223052311232223242331233323422350236623012407242024"
888
+ b"2324322435244124722475240425112522253725402553257025002602260726"
889
+ b"2126552661260527112726273027432750270230113013301530173022303130"
890
+ b"3330353042304430473051306330713001310331053114312131233140316031"
891
+ b"7231763100321232203232323432503201331033143321332333273330334133"
892
+ b"4333473355337333033411341634223431345234603464340135103512352535"
893
+ b"3235443556357335163641360137033720372237353700400440124020402440"
894
+ b"2740324041405040704002410741114113412241304135414341514155410142"
895
+ b"0342104215422142334240425742624270420443114313432043224331433543"
896
+ b"0044024424443744404471440545074521456245134634466046104715473047"
897
+ b"4347514702501050145022504050445047505250665074500151035105511251"
898
+ b"2151325172510052115223523052365253520253075310532753445351536553"
899
+ b"7353015404542054325446541255265551555355425602570457225711601360"
900
+ b"1560316033606060006120612761646112623462426255626262706200631463"
901
+ b"2163406325644364626400650365346560650566406611671367007004700770"
902
+ b"2070227036704070547062700271117124714371457101720472107216722172"
903
+ b"3072517202733273357353730174057413742074507422754275027631760077"
904
+ )
905
+
906
+ @classmethod
907
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
908
+ n_blocks = blocks.shape[0]
909
+
910
+ d, rest = np.hsplit(blocks, [2])
911
+ qs, rest = np.hsplit(rest, [QK_K // 4])
912
+ qh, rest = np.hsplit(rest, [QK_K // 32])
913
+ signs, scales = np.hsplit(rest, [QK_K // 8])
914
+
915
+ d = d.view(np.float16).astype(np.float32)
916
+
917
+ scales = scales.reshape((n_blocks, -1, 1)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2))
918
+ scales = (scales & 0x0F).reshape((n_blocks, -1))
919
+ db = d * (1 + 2 * scales)
920
+ db = db.reshape((n_blocks, -1, 1, 1))
921
+
922
+ # unpack the sign bits
923
+ signs = signs.reshape((n_blocks, -1, 1)) >> np.array([i for i in range(8)], dtype=np.uint8).reshape((1, 1, 8))
924
+ signs = signs & np.uint8(0x01)
925
+ signs = np.where(signs == 0, np.float32(1), np.float32(-1))
926
+ signs = signs.reshape((n_blocks, -1, 4, 8))
927
+
928
+ qh = qh.reshape((n_blocks, -1, 1)) >> np.array([i for i in range(8)], dtype=np.uint8)
929
+ qh = (qh & 0x01).astype(np.uint16).reshape((n_blocks, -1))
930
+ qs = qs.astype(np.uint16) | (qh << 8)
931
+
932
+ assert cls.grid is not None
933
+ grid = np.take_along_axis(cls.grid, qs.reshape((n_blocks, -1, 1, 1)), axis=-2)
934
+ grid = grid.reshape((n_blocks, -1, 4, 8))
935
+
936
+ return (db * grid * signs).reshape((n_blocks, -1))
937
+
938
+
939
+ class IQ1_S(__Quant, qtype=GGMLQuantizationType.IQ1_S):
940
+ # iq1s_grid, with each byte packed into 2 bits
941
+ # -1, 0, 1 <=> 0, 1, 2
942
+ grid_shape = (2048, 8)
943
+ grid_map = (-1, 0, 1)
944
+ grid_hex = (
945
+ b"00000200050008000a00110015002000220028002a0045005100540056006500"
946
+ b"8000820088008a009500a000a200a800aa000401050111011401160119011a01"
947
+ b"2501410146014901520155015a0161016401660168018501910194019601a501"
948
+ b"0002020208020a0215022002220228022a024502510259026402690280028202"
949
+ b"88028a02910295029902a002a202a802aa021104140416042504410449045504"
950
+ b"5a046404650491049904a5040105040505050605150518051a05290540054505"
951
+ b"4a0550055105540555055605590560056205650568056a058105910595059805"
952
+ b"9a05a105a405a505a605a9051406190641064406500652065506580660066106"
953
+ b"6606690685069106940699060008020808080a0815082008220828082a084508"
954
+ b"5108560865088008820888088a089508a008a208a808aa080509110914091909"
955
+ b"2409250941095009510955096109640969099109940996099909a509000a020a"
956
+ b"080a0a0a150a200a220a280a2a0a450a510a590a610a650a800a820a850a880a"
957
+ b"8a0a950aa00aa20aa80aaa0a1010111014101910241025104110441050105510"
958
+ b"58106110641065106910911094109610a110a510011104110611091110111211"
959
+ b"1511181121112411291145114a11501151115211541155115611591160116511"
960
+ b"841192119511a111a41111121412161225124012461249125212551258125a12"
961
+ b"641266128512911294129612a512011406140914141415141814191421142614"
962
+ b"41144514461448144a1451145414551456145914621465146814841489149014"
963
+ b"94149514981499149a14a114a414a514a914021505150a151115141515151615"
964
+ b"191520152215251528152a154115441545154615511552155415551556155915"
965
+ b"5a1561156415651566156915801582158415851588158a159015911594159515"
966
+ b"961599159a15a015a215a51501160416051606161516161618161a1621162616"
967
+ b"401642164416451648164a165116551656165816591661166416651668166916"
968
+ b"6a1686168a1692169516a416a916111816182518411844184618491850185518"
969
+ b"58185a1860186118641866186918851891189418a5181019121915191a192119"
970
+ b"25194219441945194819511954195519561959195a19601965196a1989199119"
971
+ b"921995199819a119a619a919091a161a241a261a441a461a491a501a521a551a"
972
+ b"581a611a661a691a851a911a961a9a1a0020022008200a201520202022202520"
973
+ b"28202a20452051205920612065208020822088208a209520a020a220a520a820"
974
+ b"aa2005211121142119212521422144214921552158215a216121642165216621"
975
+ b"8521902196219921a521012208220a22112215222022222228222a2245225122"
976
+ b"562259226522812288228a2291229522a022a222a822aa220524142416241924"
977
+ b"252444244524462449245224552458245a2466248524912494249924a124a524"
978
+ b"0925152521252925402545254825512554255525592562256525682589259025"
979
+ b"9425952598259a25a125a425a625a92505261026122619262526412649265526"
980
+ b"6026612669268426862690269a260028022808280a2815282028222828282a28"
981
+ b"45285128542865288028822888288a28a028a228a828aa280929112914291929"
982
+ b"2529462949295229552961296429662969298529902996299929a429a529002a"
983
+ b"022a082a0a2a202a222a282a2a2a452a512a562a592a652a802a822a882a8a2a"
984
+ b"952aa02aa22aa82aaa2a054011401640254049405240554058405a4061406440"
985
+ b"664094409940a140a6400041014104410641094112411541164118411a412141"
986
+ b"26412941454148414a41514154415541564159415a41654168416a4181418441"
987
+ b"8641904192419541a041a141a241054211421442164225424142524255425a42"
988
+ b"6442694289429442a5420144154419442944454448444a445144544455445644"
989
+ b"61446244654468446a44814486448944904492449544a044a144a94401450245"
990
+ b"05450a4511451445154516451945204525452a45414544454545464549455045"
991
+ b"5145544555455645584559456145644565456645694582458445854588459145"
992
+ b"94459545964599459a45a545a845aa450146054609461446154618461a462146"
993
+ b"2446294640464246454648465046514652465546564659466246654668468146"
994
+ b"85468a4694469546a146a446a6460548114815481a4825484248494850485548"
995
+ b"5848614864486648694885489148944896489948a5480149054906490a491049"
996
+ b"144915491849214924492649404945494a495149524954495549564959496049"
997
+ b"6249654966496a49864989499249954996499849a149a449a649a949164a444a"
998
+ b"464a494a554a584a5a4a644a694a944aa54a0150045005500650095012501550"
999
+ b"1a50215024502950405045504850515054505550565059506550685086508950"
1000
+ b"95509850a050a150a650a9500551085109510a51115114511551165118511951"
1001
+ b"20512551265128512a5141514451455146514951505151515251545155515651"
1002
+ b"585159515a51615164516551665169518251855191519451955196519951a051"
1003
+ b"a551aa5101520652125215521a5221522452425245524a525152545255525652"
1004
+ b"595262526552855290529252955299529a52a452045405541154145415541654"
1005
+ b"185419542154255428542a54415444544554465449544a545054515454545554"
1006
+ b"5654585459545a54615462546454655466546954805488548a54915494549554"
1007
+ b"96549954a154a454a554aa540155025504550555065509551055115512551455"
1008
+ b"1555165519551a55215524552555265529554055415542554455455546554855"
1009
+ b"4955505551555255545555555655585559555a55605561556455655566556855"
1010
+ b"69556a5581558455855589558a559055915594559555965598559955a155a455"
1011
+ b"a555a655a9550056015602560456065608560956115614561556185619562056"
1012
+ b"2156225624562556265628562956415645564656485649564a56505651565256"
1013
+ b"545655565656585659565a566156645665566956825685568656885689568a56"
1014
+ b"915695569a56a256a556a656a856a95604580558065809581058155818582158"
1015
+ b"2a58455848584a58515854585558565858585958605862586458655882588958"
1016
+ b"9058925895589858a158a9580159025905590a59115914591559165919592559"
1017
+ b"41594459455946594959505951595259545955595659585959595a5961596459"
1018
+ b"655966596959815985598959915994599559965998599959a559045a085a155a"
1019
+ b"1a5a205a255a265a295a455a485a495a515a555a565a585a595a625a655a685a"
1020
+ b"6a5a815a8a5a925a955a965a985a9a5aa15a0560146016601960256044605060"
1021
+ b"5560566058605a60616064606660696081609660a56001610461066109611261"
1022
+ b"15612161226126612961456149615161556156615961656166616a6184618a61"
1023
+ b"92619561a161a661a96111621662196240624162466255625662586260628562"
1024
+ b"91629662a56211641264156416641a6421642664296440644264456448644a64"
1025
+ b"516454645564566459645a646064626465648464856489649064926494649564"
1026
+ b"966498649a64a164a464a964056508650a651165156516651965446545654665"
1027
+ b"496550655165546555655665596561656465656566656965866589658a659165"
1028
+ b"9565966599659a65a265a565a665a86502660966156620662666286629664066"
1029
+ b"456648664a66516654665566566658665a666066656668668066826685668a66"
1030
+ b"9466966698669966a066a466a666aa661668196825684168526855685a686168"
1031
+ b"6968856891689868a66801690469106915692169246926692969406941694569"
1032
+ b"4669486951695469556956695969606965696a69826984698a699569a169a469"
1033
+ b"a569a969116a166a186a416a446a496a506a556a586a5a6a646a656a696a866a"
1034
+ b"946a986a9a6aa66a0080028008800a802080228028802a804580508051805480"
1035
+ b"5680598065808080828088808a809580a080a280a880aa800581118114811681"
1036
+ b"1981258141814481498150815281558156815881598164816681698185818981"
1037
+ b"948196819981a5810082028208820a8215822082228228822a82518254825982"
1038
+ b"65828082828288828a829582a082a282a882aa82148419844184448451845584"
1039
+ b"5a846184648469849484998401850985128515851a8526852985408541854585"
1040
+ b"4885518554855585568559855a856585668568856a8581858485868589859085"
1041
+ b"928595859885a68511861686198625864186448649864a865086558659865a86"
1042
+ b"618666866a86858691869a86a4860088028808880a8815882088228828882a88"
1043
+ b"41884588518854885988658869888088828888888a889588a088a288a888aa88"
1044
+ b"05890689118914891689258941894489468949895089528955895a8961896489"
1045
+ b"858996899989a589008a028a088a0a8a158a208a228a288a2a8a458a518a548a"
1046
+ b"568a808a828a888a8a8a958aa08aa28aa88aaa8a059011901690189019902590"
1047
+ b"419046904990559058905a9069906a9085909190949096909990a59001910491"
1048
+ b"069109911091159118911a912191249126912991409145915091519154915591"
1049
+ b"569159916291659184918691929195919891a191a491a691a991059211921492"
1050
+ b"19922592449246924992509252925592589266926992859294929692a9920194"
1051
+ b"04940694109415941894269440944a9451945494559456945894599460946194"
1052
+ b"62946594849486949294949495949894a194a9940095059508950a9510951195"
1053
+ b"14951595169519952195259529952a9541954495459546954995509551955295"
1054
+ b"549555955695589559955a956195649565956695699581958595889591959295"
1055
+ b"94959595969599959a95a095a295a595a895aa95019604961096159619962096"
1056
+ b"2696299645964896499651965296559656965996659668968296849689968a96"
1057
+ b"929694969596a496a696a9960598169819982598419846985098529855985698"
1058
+ b"5a98649865988598919896989998a59804990699099910991299159918991a99"
1059
+ b"209921992499269940994299459948994a995199549955995699599962996599"
1060
+ b"66996a99819984999099929995999a99a199a699059a159a259a449a469a499a"
1061
+ b"509a559a589a619a859a919a949a959a969a00a002a008a00aa015a020a022a0"
1062
+ b"28a02aa045a051a054a056a059a080a082a088a08aa095a0a0a0a2a0a8a0aaa0"
1063
+ b"05a109a111a114a116a119a11aa146a149a151a155a158a15aa161a164a185a1"
1064
+ b"90a192a196a199a102a208a20aa210a219a222a228a22aa245a251a256a259a2"
1065
+ b"65a280a282a288a28aa295a2a0a2a2a2a8a2aaa219a425a441a444a450a454a4"
1066
+ b"55a458a45aa461a465a466a468a469a485a406a509a510a512a515a518a526a5"
1067
+ b"29a542a545a551a554a555a556a559a565a56aa581a584a585a586a589a592a5"
1068
+ b"95a598a505a611a616a61aa621a625a644a646a64aa652a655a656a658a660a6"
1069
+ b"62a686a690a695a696a699a6a1a6a4a6a6a600a802a808a80aa820a822a828a8"
1070
+ b"2aa851a854a856a859a880a882a888a88aa895a8a0a8a2a8a8a8aaa805a914a9"
1071
+ b"19a921a925a941a950a955a95aa961a966a969a990a996a900aa02aa08aa0aaa"
1072
+ b"20aa22aa28aa2aaa51aa54aa56aa80aa82aa88aa8aaa95aaa0aaa2aaa8aaaaaa"
1073
+ )
1074
+
1075
+ delta = np.float32(0.125)
1076
+
1077
+ @classmethod
1078
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
1079
+ n_blocks = blocks.shape[0]
1080
+
1081
+ d, rest = np.hsplit(blocks, [2])
1082
+ qs, qh = np.hsplit(rest, [QK_K // 8])
1083
+
1084
+ d = d.view(np.float16).astype(np.float32)
1085
+ qh = qh.view(np.uint16)
1086
+
1087
+ dl = d * (2 * ((qh >> 12) & 7) + 1)
1088
+ dl = dl.reshape((n_blocks, -1, 1, 1))
1089
+ delta = np.where((qh & np.uint16(0x8000)) == 0, cls.delta, -cls.delta)
1090
+ delta = delta.reshape((n_blocks, -1, 1, 1))
1091
+
1092
+ qh = qh.reshape((n_blocks, -1, 1)) >> np.array([0, 3, 6, 9], dtype=np.uint16).reshape((1, 1, 4))
1093
+ qs = qs.astype(np.uint16) | ((qh & 7) << 8).reshape((n_blocks, -1))
1094
+
1095
+ assert cls.grid is not None
1096
+ grid = np.take_along_axis(cls.grid, qs.reshape((n_blocks, -1, 1, 1)), axis=-2)
1097
+ grid = grid.reshape((n_blocks, -1, 4, 8))
1098
+
1099
+ return (dl * (grid + delta)).reshape((n_blocks, -1))
1100
+
1101
+
1102
+ class IQ1_M(__Quant, qtype=GGMLQuantizationType.IQ1_M):
1103
+ grid_shape = IQ1_S.grid_shape
1104
+ grid_map = IQ1_S.grid_map
1105
+ grid_hex = IQ1_S.grid_hex
1106
+
1107
+ delta = IQ1_S.delta
1108
+
1109
+ # Okay *this* type is weird. It's the only one which stores the f16 scales in multiple parts.
1110
+ @classmethod
1111
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
1112
+ n_blocks = blocks.shape[0]
1113
+
1114
+ qs, rest = np.hsplit(blocks, [QK_K // 8])
1115
+ qh, scales = np.hsplit(rest, [QK_K // 16])
1116
+
1117
+ # The f16 scale is packed across multiple bytes
1118
+ scales = scales.view(np.uint16)
1119
+ d = (scales.reshape((n_blocks, 4)) & np.uint16(0xF000)) >> np.array([12, 8, 4, 0], dtype=np.uint16).reshape((1, 4))
1120
+ d = d[..., 0] | d[..., 1] | d[..., 2] | d[..., 3]
1121
+ d = d.view(np.float16).astype(np.float32).reshape((n_blocks, 1))
1122
+
1123
+ scales = scales.reshape(n_blocks, -1, 1) >> np.array([0, 3, 6, 9], dtype=np.uint16).reshape((1, 1, 4))
1124
+ scales = (scales & 0x07).reshape((n_blocks, -1))
1125
+ dl = d * (2 * scales + 1)
1126
+ dl = dl.reshape((n_blocks, -1, 2, 1, 1))
1127
+
1128
+ qh = qh.reshape((n_blocks, -1, 1)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2))
1129
+ qs = qs.astype(np.uint16) | ((qh & 0x07).astype(np.uint16) << 8).reshape((n_blocks, -1))
1130
+
1131
+ delta = np.where(qh & 0x08 == 0, cls.delta, -cls.delta)
1132
+ delta = delta.reshape((n_blocks, -1, 2, 2, 1))
1133
+
1134
+ assert cls.grid is not None
1135
+ grid = np.take_along_axis(cls.grid, qs.reshape((n_blocks, -1, 1, 1)), axis=-2)
1136
+ grid = grid.reshape((n_blocks, -1, 2, 2, 8))
1137
+
1138
+ return (dl * (grid + delta)).reshape((n_blocks, -1))
1139
+
1140
+
1141
+ class IQ4_NL(__Quant, qtype=GGMLQuantizationType.IQ4_NL):
1142
+ kvalues = (-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113)
1143
+
1144
+ @classmethod
1145
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
1146
+ n_blocks = blocks.shape[0]
1147
+
1148
+ d, qs = np.hsplit(blocks, [2])
1149
+
1150
+ d = d.view(np.float16).astype(np.float32)
1151
+
1152
+ qs = qs.reshape((n_blocks, -1, 1, cls.block_size // 2)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
1153
+
1154
+ qs = (qs & np.uint8(0x0F)).reshape((n_blocks, -1, 1))
1155
+
1156
+ kvalues = np.array(cls.kvalues, dtype=np.int8).reshape(1, 1, 16)
1157
+ qs = np.take_along_axis(kvalues, qs, axis=-1).astype(np.float32).reshape((n_blocks, -1))
1158
+
1159
+ return (d * qs)
1160
+
1161
+
1162
+ class IQ4_XS(__Quant, qtype=GGMLQuantizationType.IQ4_XS):
1163
+ @classmethod
1164
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
1165
+ n_blocks = blocks.shape[0]
1166
+
1167
+ d, rest = np.hsplit(blocks, [2])
1168
+ scales_h, rest = np.hsplit(rest, [2])
1169
+ scales_l, qs = np.hsplit(rest, [QK_K // 64])
1170
+
1171
+ d = d.view(np.float16).astype(np.float32)
1172
+ scales_h = scales_h.view(np.uint16)
1173
+
1174
+ scales_l = scales_l.reshape((n_blocks, -1, 1)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2))
1175
+ scales_h = scales_h.reshape((n_blocks, 1, -1)) >> np.array([2 * i for i in range(QK_K // 32)], dtype=np.uint16).reshape((1, -1, 1))
1176
+ scales_l = scales_l.reshape((n_blocks, -1)) & np.uint8(0x0F)
1177
+ scales_h = scales_h.reshape((n_blocks, -1)).astype(np.uint8) & np.uint8(0x03)
1178
+
1179
+ scales = (scales_l | (scales_h << np.uint8(4))).astype(np.int8) - np.int8(32)
1180
+ dl = (d * scales.astype(np.float32)).reshape((n_blocks, -1, 1))
1181
+
1182
+ qs = qs.reshape((n_blocks, -1, 1, 16)) >> np.array([0, 4], dtype=np.uint8).reshape((1, 1, 2, 1))
1183
+ qs = qs.reshape((n_blocks, -1, 32, 1)) & np.uint8(0x0F)
1184
+
1185
+ kvalues = np.array(IQ4_NL.kvalues, dtype=np.int8).reshape((1, 1, 1, -1))
1186
+ qs = np.take_along_axis(kvalues, qs, axis=-1).astype(np.float32).reshape((n_blocks, -1, 32))
1187
+
1188
+ return (dl * qs).reshape((n_blocks, -1))
.venv/lib/python3.11/site-packages/gguf/tensor_mapping.py ADDED
@@ -0,0 +1,657 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Sequence
4
+
5
+ from .constants import MODEL_ARCH, MODEL_TENSOR, MODEL_TENSORS, TENSOR_NAMES
6
+
7
+
8
+ class TensorNameMap:
9
+ mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
10
+ # Token embeddings
11
+ MODEL_TENSOR.TOKEN_EMBD: (
12
+ "gpt_neox.embed_in", # gptneox
13
+ "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
14
+ "transformer.word_embeddings", # falcon
15
+ "word_embeddings", # bloom
16
+ "model.embed_tokens", # llama-hf nemotron
17
+ "tok_embeddings", # llama-pth
18
+ "embeddings.word_embeddings", # bert nomic-bert
19
+ "language_model.embedding.word_embeddings", # persimmon
20
+ "wte", # gpt2
21
+ "transformer.embd.wte", # phi2
22
+ "model.tok_embeddings", # internlm2
23
+ "model.embedding", # mamba-qbert
24
+ "backbone.embedding", # mamba
25
+ "backbone.embeddings", # mamba-hf
26
+ "transformer.in_out_embed", # Grok
27
+ "embedding.word_embeddings", # chatglm
28
+ "transformer.token_embeddings", # openelm
29
+ "shared", # t5
30
+ ),
31
+
32
+ # Token type embeddings
33
+ MODEL_TENSOR.TOKEN_TYPES: (
34
+ "embeddings.token_type_embeddings", # bert nomic-bert
35
+ ),
36
+
37
+ # Normalization of token embeddings
38
+ MODEL_TENSOR.TOKEN_EMBD_NORM: (
39
+ "word_embeddings_layernorm", # bloom
40
+ "embeddings.LayerNorm", # bert
41
+ "emb_ln", # nomic-bert
42
+ "transformer.norm", # openelm
43
+ ),
44
+
45
+ # Position embeddings
46
+ MODEL_TENSOR.POS_EMBD: (
47
+ "transformer.wpe", # gpt2
48
+ "embeddings.position_embeddings", # bert
49
+ "wpe", # gpt2
50
+ ),
51
+
52
+ # Output
53
+ MODEL_TENSOR.OUTPUT: (
54
+ "embed_out", # gptneox
55
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone
56
+ "output", # llama-pth bloom internlm2
57
+ "word_embeddings_for_head", # persimmon
58
+ "lm_head.linear", # phi2
59
+ "output_layer", # chatglm
60
+ ),
61
+
62
+ # Output norm
63
+ MODEL_TENSOR.OUTPUT_NORM: (
64
+ "gpt_neox.final_layer_norm", # gptneox
65
+ "transformer.ln_f", # gpt2 gpt-j falcon jais exaone
66
+ "model.norm", # llama-hf baichuan internlm2
67
+ "norm", # llama-pth
68
+ "transformer.norm_f", # mpt dbrx
69
+ "ln_f", # refact bloom qwen gpt2
70
+ "language_model.encoder.final_layernorm", # persimmon
71
+ "model.final_layernorm", # persimmon
72
+ "lm_head.ln", # phi2
73
+ "model.norm_f", # mamba-qbert
74
+ "backbone.norm_f", # mamba
75
+ "transformer.rms_norm", # Grok
76
+ "encoder.final_layernorm", # chatglm
77
+ "transformer.norm", # openelm
78
+ "model.norm", # nemotron
79
+ ),
80
+
81
+ # Rope frequencies
82
+ MODEL_TENSOR.ROPE_FREQS: (
83
+ "rope.freqs", # llama-pth
84
+ "rotary_pos_emb.inv_freq", # chatglm
85
+ ),
86
+ }
87
+
88
+ block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {
89
+ # Attention norm
90
+ MODEL_TENSOR.ATTN_NORM: (
91
+ "gpt_neox.layers.{bid}.input_layernorm", # gptneox
92
+ "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais exaone
93
+ "transformer.blocks.{bid}.norm_1", # mpt
94
+ "transformer.h.{bid}.input_layernorm", # falcon7b
95
+ "h.{bid}.input_layernorm", # bloom
96
+ "transformer.h.{bid}.ln_mlp", # falcon40b
97
+ "model.layers.{bid}.input_layernorm", # llama-hf nemotron
98
+ "layers.{bid}.attention_norm", # llama-pth
99
+ "language_model.encoder.layers.{bid}.input_layernorm", # persimmon
100
+ "model.layers.{bid}.ln1", # yi
101
+ "h.{bid}.ln_1", # gpt2
102
+ "transformer.h.{bid}.ln", # phi2
103
+ "model.layers.layers.{bid}.norm", # plamo
104
+ "model.layers.{bid}.attention_norm", # internlm2
105
+ "model.layers.{bid}.norm", # mamba-qbert
106
+ "backbone.layers.{bid}.norm", # mamba
107
+ "transformer.decoder_layer.{bid}.rms_norm", # Grok
108
+ "transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
109
+ "encoder.layers.{bid}.input_layernorm", # chatglm
110
+ "transformer.layers.{bid}.attn_norm", # openelm
111
+ ),
112
+
113
+ # Attention norm 2
114
+ MODEL_TENSOR.ATTN_NORM_2: (
115
+ "transformer.h.{bid}.ln_attn", # falcon40b
116
+ "encoder.layer.{bid}.layer_norm_1", # jina-v2-code
117
+ ),
118
+
119
+ # Attention query-key-value
120
+ MODEL_TENSOR.ATTN_QKV: (
121
+ "gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
122
+ "transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
123
+ "transformer.blocks.{bid}.attn.Wqkv", # mpt
124
+ "transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
125
+ "transformer.h.{bid}.self_attention.query_key_value", # falcon
126
+ "h.{bid}.self_attention.query_key_value", # bloom
127
+ "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon
128
+ "model.layers.{bid}.self_attn.query_key_value", # persimmon
129
+ "h.{bid}.attn.c_attn", # gpt2
130
+ "transformer.h.{bid}.mixer.Wqkv", # phi2
131
+ "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
132
+ "model.layers.{bid}.self_attn.qkv_proj", # phi3
133
+ "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
134
+ "transformer.layers.{bid}.attn.qkv_proj", # openelm
135
+ ),
136
+
137
+ # Attention query
138
+ MODEL_TENSOR.ATTN_Q: (
139
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf nemotron
140
+ "layers.{bid}.attention.wq", # llama-pth
141
+ "encoder.layer.{bid}.attention.self.query", # bert
142
+ "transformer.h.{bid}.attn.q_proj", # gpt-j
143
+ "model.layers.layers.{bid}.self_attn.q_proj", # plamo
144
+ "model.layers.{bid}.attention.wq", # internlm2
145
+ "transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
146
+ "transformer.h.{bid}.attn.attention.q_proj", # exaone
147
+ ),
148
+
149
+ # Attention key
150
+ MODEL_TENSOR.ATTN_K: (
151
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf nemotron
152
+ "layers.{bid}.attention.wk", # llama-pth
153
+ "encoder.layer.{bid}.attention.self.key", # bert
154
+ "transformer.h.{bid}.attn.k_proj", # gpt-j
155
+ "transformer.h.{bid}.attn.k", # refact
156
+ "model.layers.layers.{bid}.self_attn.k_proj", # plamo
157
+ "model.layers.{bid}.attention.wk", # internlm2
158
+ "transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
159
+ "transformer.h.{bid}.attn.attention.k_proj", # exaone
160
+ ),
161
+
162
+ # Attention value
163
+ MODEL_TENSOR.ATTN_V: (
164
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf nemotron
165
+ "layers.{bid}.attention.wv", # llama-pth
166
+ "encoder.layer.{bid}.attention.self.value", # bert
167
+ "transformer.h.{bid}.attn.v_proj", # gpt-j
168
+ "transformer.h.{bid}.attn.v", # refact
169
+ "model.layers.layers.{bid}.self_attn.v_proj", # plamo
170
+ "model.layers.{bid}.attention.wv", # internlm2
171
+ "transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
172
+ "transformer.h.{bid}.attn.attention.v_proj", # exaone
173
+ ),
174
+
175
+ # Attention output
176
+ MODEL_TENSOR.ATTN_OUT: (
177
+ "gpt_neox.layers.{bid}.attention.dense", # gptneox
178
+ "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
179
+ "transformer.blocks.{bid}.attn.out_proj", # mpt
180
+ "transformer.h.{bid}.self_attention.dense", # falcon
181
+ "h.{bid}.self_attention.dense", # bloom
182
+ "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron
183
+ "layers.{bid}.attention.wo", # llama-pth
184
+ "encoder.layer.{bid}.attention.output.dense", # bert
185
+ "transformer.h.{bid}.attn.out_proj", # gpt-j
186
+ "language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
187
+ "model.layers.{bid}.self_attn.dense", # persimmon
188
+ "h.{bid}.attn.c_proj", # gpt2
189
+ "transformer.h.{bid}.mixer.out_proj", # phi2
190
+ "model.layers.layers.{bid}.self_attn.o_proj", # plamo
191
+ "model.layers.{bid}.attention.wo", # internlm2
192
+ "encoder.layers.{bid}.attn.out_proj", # nomic-bert
193
+ "transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
194
+ "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
195
+ "encoder.layers.{bid}.self_attention.dense", # chatglm
196
+ "transformer.layers.{bid}.attn.out_proj", # openelm
197
+ "transformer.h.{bid}.attn.attention.out_proj", # exaone
198
+ ),
199
+
200
+ # Attention output norm
201
+ MODEL_TENSOR.ATTN_OUT_NORM: (
202
+ "encoder.layer.{bid}.attention.output.LayerNorm", # bert
203
+ "encoder.layers.{bid}.norm1", # nomic-bert
204
+ "transformer.decoder_layer.{bid}.rms_norm_1", # Grok
205
+ "transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
206
+ ),
207
+
208
+ MODEL_TENSOR.ATTN_POST_NORM: (
209
+ "model.layers.{bid}.post_attention_layernorm", # gemma2
210
+ ),
211
+
212
+ # Rotary embeddings
213
+ MODEL_TENSOR.ATTN_ROT_EMBD: (
214
+ "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
215
+ "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
216
+ "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
217
+ "transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
218
+ ),
219
+
220
+ # Feed-forward norm
221
+ MODEL_TENSOR.FFN_NORM: (
222
+ "gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
223
+ "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais exaone
224
+ "h.{bid}.post_attention_layernorm", # bloom
225
+ "transformer.blocks.{bid}.norm_2", # mpt
226
+ "model.layers.{bid}.post_attention_layernorm", # llama-hf nemotron
227
+ "layers.{bid}.ffn_norm", # llama-pth
228
+ "language_model.encoder.layers.{bid}.post_attention_layernorm", # persimmon
229
+ "model.layers.{bid}.ln2", # yi
230
+ "h.{bid}.ln_2", # gpt2
231
+ "model.layers.{bid}.ffn_norm", # internlm2
232
+ "transformer.decoder_layer.{bid}.rms_norm_2", # Grok
233
+ "encoder.layers.{bid}.post_attention_layernorm", # chatglm
234
+ "transformer.layers.{bid}.ffn_norm", # openelm
235
+ ),
236
+
237
+ # Post feed-forward norm
238
+ MODEL_TENSOR.FFN_PRE_NORM: (
239
+ "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
240
+ ),
241
+
242
+ # Post feed-forward norm
243
+ MODEL_TENSOR.FFN_POST_NORM: (
244
+ "model.layers.{bid}.post_feedforward_layernorm", # gemma2
245
+ ),
246
+
247
+ MODEL_TENSOR.FFN_GATE_INP: (
248
+ "layers.{bid}.feed_forward.gate", # mixtral
249
+ "model.layers.{bid}.block_sparse_moe.gate", # mixtral
250
+ "model.layers.{bid}.mlp.gate", # qwen2moe
251
+ "transformer.decoder_layer.{bid}.router", # Grok
252
+ "transformer.blocks.{bid}.ffn.router.layer", # dbrx
253
+ ),
254
+
255
+ MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
256
+ "model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
257
+ ),
258
+
259
+ # Feed-forward up
260
+ MODEL_TENSOR.FFN_UP: (
261
+ "gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
262
+ "transformer.h.{bid}.mlp.c_fc", # gpt2 jais
263
+ "transformer.blocks.{bid}.ffn.up_proj", # mpt
264
+ "transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
265
+ "h.{bid}.mlp.dense_h_to_4h", # bloom
266
+ "model.layers.{bid}.mlp.up_proj", # llama-hf refact nemotron
267
+ "layers.{bid}.feed_forward.w3", # llama-pth
268
+ "encoder.layer.{bid}.intermediate.dense", # bert
269
+ "transformer.h.{bid}.mlp.fc_in", # gpt-j
270
+ "transformer.h.{bid}.mlp.linear_3", # refact
271
+ "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
272
+ "model.layers.{bid}.mlp.dense_h_to_4h", # persimmon
273
+ "transformer.h.{bid}.mlp.w1", # qwen
274
+ "h.{bid}.mlp.c_fc", # gpt2
275
+ "transformer.h.{bid}.mlp.fc1", # phi2
276
+ "model.layers.{bid}.mlp.fc1", # phi2
277
+ "model.layers.{bid}.mlp.gate_up_proj", # phi3
278
+ "model.layers.layers.{bid}.mlp.up_proj", # plamo
279
+ "model.layers.{bid}.feed_forward.w3", # internlm2
280
+ "encoder.layers.{bid}.mlp.fc11", # nomic-bert
281
+ "model.layers.{bid}.mlp.c_fc", # starcoder2
282
+ "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
283
+ "model.layers.{bid}.residual_mlp.w3", # arctic
284
+ "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
285
+ "transformer.h.{bid}.mlp.c_fc_1", # exaone
286
+ ),
287
+
288
+ MODEL_TENSOR.FFN_UP_EXP: (
289
+ "layers.{bid}.feed_forward.experts.w3", # mixtral (merged)
290
+ "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged)
291
+ "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
292
+ "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe (merged)
293
+ ),
294
+
295
+ MODEL_TENSOR.FFN_UP_SHEXP: (
296
+ "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
297
+ "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
298
+ ),
299
+
300
+ # AWQ-activation gate
301
+ MODEL_TENSOR.FFN_ACT: (
302
+ "transformer.blocks.{bid}.ffn.act", # mpt
303
+ ),
304
+
305
+ # Feed-forward gate
306
+ MODEL_TENSOR.FFN_GATE: (
307
+ "model.layers.{bid}.mlp.gate_proj", # llama-hf refact
308
+ "layers.{bid}.feed_forward.w1", # llama-pth
309
+ "transformer.h.{bid}.mlp.w2", # qwen
310
+ "transformer.h.{bid}.mlp.c_fc2", # jais
311
+ "model.layers.layers.{bid}.mlp.gate_proj", # plamo
312
+ "model.layers.{bid}.feed_forward.w1", # internlm2
313
+ "encoder.layers.{bid}.mlp.fc12", # nomic-bert
314
+ "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
315
+ "transformer.h.{bid}.mlp.linear_1", # refact
316
+ "model.layers.{bid}.residual_mlp.w1", # arctic
317
+ "transformer.h.{bid}.mlp.c_fc_0", # exaone
318
+ ),
319
+
320
+ MODEL_TENSOR.FFN_GATE_EXP: (
321
+ "layers.{bid}.feed_forward.experts.w1", # mixtral (merged)
322
+ "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged)
323
+ "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
324
+ "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe (merged)
325
+ ),
326
+
327
+ MODEL_TENSOR.FFN_GATE_SHEXP: (
328
+ "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
329
+ "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
330
+ ),
331
+
332
+ # Feed-forward down
333
+ MODEL_TENSOR.FFN_DOWN: (
334
+ "gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
335
+ "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
336
+ "transformer.blocks.{bid}.ffn.down_proj", # mpt
337
+ "transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
338
+ "h.{bid}.mlp.dense_4h_to_h", # bloom
339
+ "model.layers.{bid}.mlp.down_proj", # llama-hf nemotron
340
+ "layers.{bid}.feed_forward.w2", # llama-pth
341
+ "encoder.layer.{bid}.output.dense", # bert
342
+ "transformer.h.{bid}.mlp.fc_out", # gpt-j
343
+ "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
344
+ "model.layers.{bid}.mlp.dense_4h_to_h", # persimmon
345
+ "h.{bid}.mlp.c_proj", # gpt2
346
+ "transformer.h.{bid}.mlp.fc2", # phi2
347
+ "model.layers.{bid}.mlp.fc2", # phi2
348
+ "model.layers.layers.{bid}.mlp.down_proj", # plamo
349
+ "model.layers.{bid}.feed_forward.w2", # internlm2
350
+ "encoder.layers.{bid}.mlp.fc2", # nomic-bert
351
+ "model.layers.{bid}.mlp.c_proj", # starcoder2
352
+ "encoder.layer.{bid}.mlp.wo", # jina-bert-v2
353
+ "transformer.layers.{bid}.ffn.proj_2", # openelm
354
+ "model.layers.{bid}.residual_mlp.w2", # arctic
355
+ "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
356
+ "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
357
+ "model.layers.h.{bid}.mlp.c_proj", # exaone
358
+ ),
359
+
360
+ MODEL_TENSOR.FFN_DOWN_EXP: (
361
+ "layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
362
+ "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
363
+ "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx
364
+ "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe (merged)
365
+ ),
366
+
367
+ MODEL_TENSOR.FFN_DOWN_SHEXP: (
368
+ "model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
369
+ "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
370
+ ),
371
+
372
+ MODEL_TENSOR.ATTN_Q_NORM: (
373
+ "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
374
+ "model.layers.{bid}.self_attn.q_layernorm", # persimmon
375
+ "model.layers.{bid}.self_attn.q_norm", # cohere
376
+ "transformer.blocks.{bid}.attn.q_ln", # sea-lion
377
+ "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
378
+ "transformer.layers.{bid}.attn.q_norm", # openelm
379
+ ),
380
+
381
+ MODEL_TENSOR.ATTN_K_NORM: (
382
+ "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
383
+ "model.layers.{bid}.self_attn.k_layernorm", # persimmon
384
+ "model.layers.{bid}.self_attn.k_norm", # cohere
385
+ "transformer.blocks.{bid}.attn.k_ln", # sea-lion
386
+ "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
387
+ "transformer.layers.{bid}.attn.k_norm", # openelm
388
+ ),
389
+
390
+ MODEL_TENSOR.ROPE_FREQS: (
391
+ "language_model.encoder.layers.{bid}.self_attention.rotary_emb.inv_freq", # persimmon
392
+ ),
393
+
394
+ MODEL_TENSOR.LAYER_OUT_NORM: (
395
+ "encoder.layer.{bid}.output.LayerNorm", # bert
396
+ "encoder.layers.{bid}.norm2", # nomic-bert
397
+ "transformer.decoder_layer.{bid}.rms_norm_3", # Grok
398
+ "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
399
+ "encoder.layer.{bid}.layer_norm_2" # jina-v2-code
400
+ ),
401
+
402
+ MODEL_TENSOR.SSM_IN: (
403
+ "model.layers.{bid}.in_proj",
404
+ "backbone.layers.{bid}.mixer.in_proj",
405
+ ),
406
+
407
+ MODEL_TENSOR.SSM_CONV1D: (
408
+ "model.layers.{bid}.conv1d",
409
+ "backbone.layers.{bid}.mixer.conv1d",
410
+ ),
411
+
412
+ MODEL_TENSOR.SSM_X: (
413
+ "model.layers.{bid}.x_proj",
414
+ "backbone.layers.{bid}.mixer.x_proj",
415
+ ),
416
+
417
+ MODEL_TENSOR.SSM_DT: (
418
+ "model.layers.{bid}.dt_proj",
419
+ "backbone.layers.{bid}.mixer.dt_proj",
420
+ ),
421
+
422
+ MODEL_TENSOR.SSM_A: (
423
+ "model.layers.{bid}.A_log",
424
+ "backbone.layers.{bid}.mixer.A_log",
425
+ ),
426
+
427
+ MODEL_TENSOR.SSM_D: (
428
+ "model.layers.{bid}.D",
429
+ "backbone.layers.{bid}.mixer.D",
430
+ ),
431
+
432
+ MODEL_TENSOR.SSM_OUT: (
433
+ "model.layers.{bid}.out_proj",
434
+ "backbone.layers.{bid}.mixer.out_proj",
435
+ ),
436
+
437
+ MODEL_TENSOR.ATTN_Q_A: (
438
+ "model.layers.{bid}.self_attn.q_a_proj", # deepseek2
439
+ ),
440
+
441
+ MODEL_TENSOR.ATTN_Q_B: (
442
+ "model.layers.{bid}.self_attn.q_b_proj", # deepseek2
443
+ ),
444
+
445
+ MODEL_TENSOR.ATTN_KV_A_MQA: (
446
+ "model.layers.{bid}.self_attn.kv_a_proj_with_mqa", # deepseek2
447
+ ),
448
+
449
+ MODEL_TENSOR.ATTN_KV_B: (
450
+ "model.layers.{bid}.self_attn.kv_b_proj", # deepseek2
451
+ ),
452
+
453
+ MODEL_TENSOR.ATTN_Q_A_NORM: (
454
+ "model.layers.{bid}.self_attn.q_a_layernorm", # deepseek2
455
+ ),
456
+
457
+ MODEL_TENSOR.ATTN_KV_A_NORM: (
458
+ "model.layers.{bid}.self_attn.kv_a_layernorm", # deepseek2
459
+ ),
460
+
461
+ MODEL_TENSOR.ATTN_SUB_NORM: (
462
+ "model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
463
+ ),
464
+
465
+ MODEL_TENSOR.FFN_SUB_NORM: (
466
+ "model.layers.{bid}.mlp.ffn_layernorm", # bitnet
467
+ ),
468
+
469
+ MODEL_TENSOR.DEC_ATTN_NORM: (
470
+ "decoder.block.{bid}.layer.0.layer_norm", # t5
471
+ ),
472
+
473
+ MODEL_TENSOR.DEC_ATTN_Q: (
474
+ "decoder.block.{bid}.layer.0.SelfAttention.q", # t5
475
+ ),
476
+
477
+ MODEL_TENSOR.DEC_ATTN_K: (
478
+ "decoder.block.{bid}.layer.0.SelfAttention.k", # t5
479
+ ),
480
+
481
+ MODEL_TENSOR.DEC_ATTN_V: (
482
+ "decoder.block.{bid}.layer.0.SelfAttention.v", # t5
483
+ ),
484
+
485
+ MODEL_TENSOR.DEC_ATTN_OUT: (
486
+ "decoder.block.{bid}.layer.0.SelfAttention.o", # t5
487
+ ),
488
+
489
+ MODEL_TENSOR.DEC_ATTN_REL_B: (
490
+ "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
491
+ ),
492
+
493
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
494
+ "decoder.block.{bid}.layer.1.layer_norm", # t5
495
+ ),
496
+
497
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
498
+ "decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
499
+ ),
500
+
501
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: (
502
+ "decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
503
+ ),
504
+
505
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: (
506
+ "decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
507
+ ),
508
+
509
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
510
+ "decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
511
+ ),
512
+
513
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
514
+ "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
515
+ ),
516
+
517
+ MODEL_TENSOR.DEC_FFN_NORM: (
518
+ "decoder.block.{bid}.layer.2.layer_norm", # t5
519
+ ),
520
+
521
+ MODEL_TENSOR.DEC_FFN_GATE: (
522
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
523
+ ),
524
+
525
+ MODEL_TENSOR.DEC_FFN_UP: (
526
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
527
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
528
+ ),
529
+
530
+ MODEL_TENSOR.DEC_FFN_DOWN: (
531
+ "decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
532
+ ),
533
+
534
+ MODEL_TENSOR.DEC_OUTPUT_NORM: (
535
+ "decoder.final_layer_norm", # t5
536
+ ),
537
+
538
+ MODEL_TENSOR.ENC_ATTN_NORM: (
539
+ "encoder.block.{bid}.layer.0.layer_norm", # t5
540
+ ),
541
+
542
+ MODEL_TENSOR.ENC_ATTN_Q: (
543
+ "encoder.block.{bid}.layer.0.SelfAttention.q", # t5
544
+ ),
545
+
546
+ MODEL_TENSOR.ENC_ATTN_K: (
547
+ "encoder.block.{bid}.layer.0.SelfAttention.k", # t5
548
+ ),
549
+
550
+ MODEL_TENSOR.ENC_ATTN_V: (
551
+ "encoder.block.{bid}.layer.0.SelfAttention.v", # t5
552
+ ),
553
+
554
+ MODEL_TENSOR.ENC_ATTN_OUT: (
555
+ "encoder.block.{bid}.layer.0.SelfAttention.o", # t5
556
+ ),
557
+
558
+ MODEL_TENSOR.ENC_ATTN_REL_B: (
559
+ "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
560
+ ),
561
+
562
+ MODEL_TENSOR.ENC_FFN_NORM: (
563
+ "encoder.block.{bid}.layer.1.layer_norm", # t5
564
+ ),
565
+
566
+ MODEL_TENSOR.ENC_FFN_GATE: (
567
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
568
+ ),
569
+
570
+ MODEL_TENSOR.ENC_FFN_UP: (
571
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
572
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
573
+ ),
574
+
575
+ MODEL_TENSOR.ENC_FFN_DOWN: (
576
+ "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
577
+ ),
578
+
579
+ MODEL_TENSOR.ENC_OUTPUT_NORM: (
580
+ "encoder.final_layer_norm", # t5
581
+ ),
582
+ }
583
+
584
+ # architecture-specific block mappings
585
+ arch_block_mappings_cfg: dict[MODEL_ARCH, dict[MODEL_TENSOR, tuple[str, ...]]] = {
586
+ MODEL_ARCH.ARCTIC: {
587
+ MODEL_TENSOR.FFN_NORM: (
588
+ "model.layers.{bid}.residual_layernorm",
589
+ ),
590
+ MODEL_TENSOR.FFN_NORM_EXP: (
591
+ "model.layers.{bid}.post_attention_layernorm",
592
+ ),
593
+ },
594
+ }
595
+
596
+ mapping: dict[str, tuple[MODEL_TENSOR, str]]
597
+
598
+ def __init__(self, arch: MODEL_ARCH, n_blocks: int):
599
+ self.mapping = {}
600
+ for tensor, keys in self.mappings_cfg.items():
601
+ if tensor not in MODEL_TENSORS[arch]:
602
+ continue
603
+ tensor_name = TENSOR_NAMES[tensor]
604
+ self.mapping[tensor_name] = (tensor, tensor_name)
605
+ for key in keys:
606
+ self.mapping[key] = (tensor, tensor_name)
607
+ if arch in self.arch_block_mappings_cfg:
608
+ self.block_mappings_cfg.update(self.arch_block_mappings_cfg[arch])
609
+ for bid in range(n_blocks):
610
+ for tensor, keys in self.block_mappings_cfg.items():
611
+ if tensor not in MODEL_TENSORS[arch]:
612
+ continue
613
+
614
+ tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
615
+ self.mapping[tensor_name] = (tensor, tensor_name)
616
+ for key in keys:
617
+ key = key.format(bid = bid)
618
+ self.mapping[key] = (tensor, tensor_name)
619
+
620
+ def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
621
+ result = self.mapping.get(key)
622
+ if result is not None:
623
+ return result
624
+ for suffix in try_suffixes:
625
+ if key.endswith(suffix):
626
+ result = self.mapping.get(key[:-len(suffix)])
627
+ if result is not None:
628
+ return result[0], result[1] + suffix
629
+ return None
630
+
631
+ def get_name(self, key: str, try_suffixes: Sequence[str] = ()) -> str | None:
632
+ result = self.get_type_and_name(key, try_suffixes = try_suffixes)
633
+ if result is None:
634
+ return None
635
+ return result[1]
636
+
637
+ def get_type(self, key: str, try_suffixes: Sequence[str] = ()) -> MODEL_TENSOR | None:
638
+ result = self.get_type_and_name(key, try_suffixes = try_suffixes)
639
+ if result is None:
640
+ return None
641
+ return result[0]
642
+
643
+ def __getitem__(self, key: str) -> str:
644
+ try:
645
+ return self.mapping[key][1]
646
+ except KeyError:
647
+ raise KeyError(key)
648
+
649
+ def __contains__(self, key: str) -> bool:
650
+ return key in self.mapping
651
+
652
+ def __repr__(self) -> str:
653
+ return repr(self.mapping)
654
+
655
+
656
+ def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> TensorNameMap:
657
+ return TensorNameMap(arch, n_blocks)
.venv/lib/python3.11/site-packages/gguf/utility.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+
6
+ def fill_templated_filename(filename: str, output_type: str | None) -> str:
7
+ # Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
8
+ ftype_lowercase: str = output_type.lower() if output_type is not None else ""
9
+ ftype_uppercase: str = output_type.upper() if output_type is not None else ""
10
+ return filename.format(ftype_lowercase,
11
+ outtype=ftype_lowercase, ftype=ftype_lowercase,
12
+ OUTTYPE=ftype_uppercase, FTYPE=ftype_uppercase)
13
+
14
+
15
+ def model_weight_count_rounded_notation(model_params_count: int, min_digits: int = 2) -> str:
16
+ if model_params_count > 1e12 :
17
+ # Trillions Of Parameters
18
+ scaled_model_params = model_params_count * 1e-12
19
+ scale_suffix = "T"
20
+ elif model_params_count > 1e9 :
21
+ # Billions Of Parameters
22
+ scaled_model_params = model_params_count * 1e-9
23
+ scale_suffix = "B"
24
+ elif model_params_count > 1e6 :
25
+ # Millions Of Parameters
26
+ scaled_model_params = model_params_count * 1e-6
27
+ scale_suffix = "M"
28
+ else:
29
+ # Thousands Of Parameters
30
+ scaled_model_params = model_params_count * 1e-3
31
+ scale_suffix = "K"
32
+
33
+ fix = max(min_digits - len(str(round(scaled_model_params)).lstrip('0')), 0)
34
+
35
+ return f"{scaled_model_params:.{fix}f}{scale_suffix}"
36
+
37
+
38
+ def size_label(total_params: int, shared_params: int, expert_params: int, expert_count: int) -> str:
39
+
40
+ if expert_count > 0:
41
+ pretty_size = model_weight_count_rounded_notation(abs(shared_params) + abs(expert_params), min_digits=2)
42
+ size_class = f"{expert_count}x{pretty_size}"
43
+ else:
44
+ size_class = model_weight_count_rounded_notation(abs(total_params), min_digits=2)
45
+
46
+ return size_class
47
+
48
+
49
+ def naming_convention(model_name: str | None, base_name: str | None, finetune_string: str | None, version_string: str | None, size_label: str | None, output_type: str | None, model_type: Literal['vocab', 'LoRA'] | None = None) -> str:
50
+ # Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention
51
+
52
+ if base_name is not None:
53
+ name = base_name.strip().replace(' ', '-').replace('/', '-')
54
+ elif model_name is not None:
55
+ name = model_name.strip().replace(' ', '-').replace('/', '-')
56
+ else:
57
+ name = "ggml-model"
58
+
59
+ parameters = f"-{size_label}" if size_label is not None else ""
60
+
61
+ finetune = f"-{finetune_string.strip().replace(' ', '-')}" if finetune_string is not None else ""
62
+
63
+ version = f"-{version_string.strip().replace(' ', '-')}" if version_string is not None else ""
64
+
65
+ encoding = f"-{output_type.strip().replace(' ', '-').upper()}" if output_type is not None else ""
66
+
67
+ kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
68
+
69
+ return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
.venv/lib/python3.11/site-packages/gguf/vocab.py ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ import logging
5
+ import json
6
+ import os
7
+ from pathlib import Path
8
+ from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable
9
+
10
+ from sentencepiece import SentencePieceProcessor
11
+
12
+ import gguf
13
+
14
+ from .gguf_writer import GGUFWriter
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class SpecialVocab:
20
+ merges: list[str]
21
+ add_special_token: dict[str, bool]
22
+ special_token_ids: dict[str, int]
23
+ chat_template: str | Sequence[Mapping[str, str]] | None
24
+
25
+ def __init__(
26
+ self, path: str | os.PathLike[str], load_merges: bool = False,
27
+ special_token_types: Iterable[str] | None = None,
28
+ n_vocab: int | None = None,
29
+ ):
30
+ self.special_token_ids = {}
31
+ self.add_special_token = {}
32
+ self.n_vocab = n_vocab
33
+ self.load_merges = load_merges
34
+ self.merges = []
35
+ self.chat_template = None
36
+ if special_token_types is not None:
37
+ self.special_token_types = special_token_types
38
+ else:
39
+ self.special_token_types = ('bos', 'eos', 'unk', 'sep', 'pad', 'cls', 'mask')
40
+ self._load(Path(path))
41
+
42
+ def __repr__(self) -> str:
43
+ return '<SpecialVocab with {} merges, special tokens {}, add special tokens {}>'.format(
44
+ len(self.merges), self.special_token_ids or "unset", self.add_special_token or "unset",
45
+ )
46
+
47
+ def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
48
+ if self.merges:
49
+ if not quiet:
50
+ logger.info(f'Adding {len(self.merges)} merge(s).')
51
+ gw.add_token_merges(self.merges)
52
+ elif self.load_merges:
53
+ logger.warning('Adding merges requested but no merges found, output may be non-functional.')
54
+ for typ, tokid in self.special_token_ids.items():
55
+ id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
56
+ if id_handler is None:
57
+ logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
58
+ continue
59
+ if not quiet:
60
+ logger.info(f'Setting special token type {typ} to {tokid}')
61
+ id_handler(tokid)
62
+ for typ, value in self.add_special_token.items():
63
+ add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
64
+ if add_handler is None:
65
+ logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
66
+ continue
67
+ if not quiet:
68
+ logger.info(f'Setting add_{typ}_token to {value}')
69
+ add_handler(value)
70
+ if self.chat_template is not None:
71
+ if not quiet:
72
+ logger.info(f'Setting chat_template to {self.chat_template}')
73
+ gw.add_chat_template(self.chat_template)
74
+
75
+ def _load(self, path: Path) -> None:
76
+ self._try_load_from_tokenizer_json(path)
77
+ self._try_load_from_config_json(path)
78
+ if self.load_merges and not self.merges:
79
+ self._try_load_merges_txt(path)
80
+
81
+ def _try_load_merges_txt(self, path: Path) -> bool:
82
+ merges_file = path / 'merges.txt'
83
+ if not merges_file.is_file():
84
+ return False
85
+ with open(merges_file, 'r', encoding = 'utf-8') as fp:
86
+ first_line = next(fp, '').strip()
87
+ if not first_line.startswith('#'):
88
+ fp.seek(0)
89
+ line_num = 0
90
+ else:
91
+ line_num = 1
92
+ merges = []
93
+ for line in fp:
94
+ line_num += 1
95
+ line = line.strip()
96
+ if not line:
97
+ continue
98
+ parts = line.split(None, 3)
99
+ if len(parts) != 2:
100
+ logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
101
+ continue
102
+ merges.append(f'{parts[0]} {parts[1]}')
103
+ self.merges = merges
104
+ return True
105
+
106
+ def _set_special_token(self, typ: str, tid: Any) -> None:
107
+ if not isinstance(tid, int):
108
+ return
109
+ if tid < 0:
110
+ raise ValueError(f'invalid value for special token type {typ}: {tid}')
111
+ if self.n_vocab is None or tid < self.n_vocab:
112
+ if typ in self.special_token_ids:
113
+ return
114
+ self.special_token_ids[typ] = tid
115
+ return
116
+ logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
117
+
118
+ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
119
+ tokenizer_file = path / 'tokenizer.json'
120
+ if tokenizer_file.is_file():
121
+ with open(tokenizer_file, encoding = 'utf-8') as f:
122
+ tokenizer = json.load(f)
123
+ if self.load_merges:
124
+ merges = tokenizer.get('model', {}).get('merges')
125
+ if isinstance(merges, list) and merges and isinstance(merges[0], str):
126
+ self.merges = merges
127
+ added_tokens = tokenizer.get('added_tokens', {})
128
+ else:
129
+ added_tokens = {}
130
+ tokenizer_config_file = path / 'tokenizer_config.json'
131
+ if not tokenizer_config_file.is_file():
132
+ return True
133
+ with open(tokenizer_config_file, encoding = 'utf-8') as f:
134
+ tokenizer_config = json.load(f)
135
+ chat_template = tokenizer_config.get('chat_template')
136
+ if chat_template is None or isinstance(chat_template, (str, list)):
137
+ self.chat_template = chat_template
138
+ else:
139
+ logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
140
+ for typ in self.special_token_types:
141
+ add_entry = tokenizer_config.get(f'add_{typ}_token')
142
+ if isinstance(add_entry, bool):
143
+ self.add_special_token[typ] = add_entry
144
+ entry = tokenizer_config.get(f'{typ}_token')
145
+ if isinstance(entry, str):
146
+ tc_content = entry
147
+ elif isinstance(entry, dict):
148
+ entry_content = entry.get('content')
149
+ if not isinstance(entry_content, str):
150
+ continue
151
+ tc_content = entry_content
152
+ else:
153
+ continue
154
+ # We only need the first match here.
155
+ maybe_token_id = next(
156
+ (atok.get('id') for atok in added_tokens if atok.get('content') == tc_content),
157
+ None,
158
+ )
159
+ self._set_special_token(typ, maybe_token_id)
160
+ return True
161
+
162
+ def _try_load_from_config_json(self, path: Path) -> bool:
163
+ config_file = path / 'config.json'
164
+ if not config_file.is_file():
165
+ return False
166
+ with open(config_file, encoding = 'utf-8') as f:
167
+ config = json.load(f)
168
+ for typ in self.special_token_types:
169
+ self._set_special_token(typ, config.get(f'{typ}_token_id'))
170
+ return True
171
+
172
+
173
+ @runtime_checkable
174
+ class BaseVocab(Protocol):
175
+ tokenizer_model: ClassVar[str]
176
+ name: ClassVar[str]
177
+
178
+
179
+ @runtime_checkable
180
+ class Vocab(BaseVocab, Protocol):
181
+ vocab_size: int
182
+ added_tokens_dict: dict[str, int]
183
+ added_tokens_list: list[str]
184
+ fname_tokenizer: Path
185
+
186
+ def __init__(self, base_path: Path): ...
187
+ def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]: ...
188
+
189
+
190
+ class NoVocab(BaseVocab):
191
+ tokenizer_model = "no_vocab"
192
+ name = "no_vocab"
193
+
194
+ def __repr__(self) -> str:
195
+ return "<NoVocab for a model without integrated vocabulary>"
196
+
197
+
198
+ class BpeVocab(Vocab):
199
+ tokenizer_model = "gpt2"
200
+ name = "bpe"
201
+
202
+ def __init__(self, base_path: Path):
203
+ added_tokens: dict[str, int] = {}
204
+
205
+ if (fname_tokenizer := base_path / 'vocab.json').exists():
206
+ # "slow" tokenizer
207
+ with open(fname_tokenizer, encoding="utf-8") as f:
208
+ self.vocab = json.load(f)
209
+
210
+ try:
211
+ # FIXME: Verify that added tokens here _cannot_ overlap with the main vocab.
212
+ with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
213
+ added_tokens = json.load(f)
214
+ except FileNotFoundError:
215
+ pass
216
+ else:
217
+ # "fast" tokenizer
218
+ fname_tokenizer = base_path / 'tokenizer.json'
219
+
220
+ # if this fails, FileNotFoundError propagates to caller
221
+ with open(fname_tokenizer, encoding="utf-8") as f:
222
+ tokenizer_json = json.load(f)
223
+
224
+ tokenizer_model: dict[str, Any] = tokenizer_json['model']
225
+ if (
226
+ tokenizer_model['type'] != 'BPE' or tokenizer_model.get('byte_fallback', False)
227
+ or tokenizer_json['decoder']['type'] != 'ByteLevel'
228
+ ):
229
+ raise FileNotFoundError('Cannot find GPT-2 BPE tokenizer')
230
+
231
+ self.vocab = tokenizer_model["vocab"]
232
+
233
+ if (added := tokenizer_json.get('added_tokens')) is not None:
234
+ # Added tokens here can be duplicates of the main vocabulary.
235
+ added_tokens = {item['content']: item['id']
236
+ for item in added
237
+ if item['content'] not in self.vocab}
238
+
239
+ vocab_size = len(self.vocab)
240
+ expected_ids = list(range(vocab_size, vocab_size + len(added_tokens)))
241
+ actual_ids = sorted(added_tokens.values())
242
+ if expected_ids != actual_ids:
243
+ expected_end_id = vocab_size + len(actual_ids) - 1
244
+ raise ValueError(f"Expected the {len(actual_ids)} added token ID(s) to be sequential in the range "
245
+ f"{vocab_size} - {expected_end_id}; got {actual_ids}")
246
+
247
+ items = sorted(added_tokens.items(), key=lambda text_idx: text_idx[1])
248
+ self.added_tokens_dict = added_tokens
249
+ self.added_tokens_list = [text for (text, idx) in items]
250
+ self.vocab_size_base = vocab_size
251
+ self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
252
+ self.fname_tokenizer = fname_tokenizer
253
+
254
+ def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
255
+ reverse_vocab = {id: encoded_tok for encoded_tok, id in self.vocab.items()}
256
+
257
+ for i, _ in enumerate(self.vocab):
258
+ yield reverse_vocab[i], 0.0, gguf.TokenType.NORMAL
259
+
260
+ def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
261
+ for text in self.added_tokens_list:
262
+ score = -1000.0
263
+ yield text.encode("utf-8"), score, gguf.TokenType.CONTROL
264
+
265
+ def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
266
+ yield from self.bpe_tokens()
267
+ yield from self.added_tokens()
268
+
269
+ def __repr__(self) -> str:
270
+ return f"<BpeVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
271
+
272
+
273
+ class SentencePieceVocab(Vocab):
274
+ tokenizer_model = "llama"
275
+ name = "spm"
276
+
277
+ def __init__(self, base_path: Path):
278
+ added_tokens: dict[str, int] = {}
279
+ if (fname_tokenizer := base_path / 'tokenizer.model').exists():
280
+ # normal location
281
+ try:
282
+ with open(base_path / 'added_tokens.json', encoding="utf-8") as f:
283
+ added_tokens = json.load(f)
284
+ except FileNotFoundError:
285
+ pass
286
+ elif not (fname_tokenizer := base_path.parent / 'tokenizer.model').exists():
287
+ # not found in alternate location either
288
+ raise FileNotFoundError('Cannot find tokenizer.model')
289
+
290
+ self.sentencepiece_tokenizer = SentencePieceProcessor()
291
+ self.sentencepiece_tokenizer.LoadFromFile(str(fname_tokenizer))
292
+ vocab_size = self.sentencepiece_tokenizer.vocab_size()
293
+
294
+ new_tokens = {id: piece for piece, id in added_tokens.items() if id >= vocab_size}
295
+ expected_new_ids = list(range(vocab_size, vocab_size + len(new_tokens)))
296
+ actual_new_ids = sorted(new_tokens.keys())
297
+
298
+ if expected_new_ids != actual_new_ids:
299
+ raise ValueError(f"Expected new token IDs {expected_new_ids} to be sequential; got {actual_new_ids}")
300
+
301
+ # Token pieces that were added to the base vocabulary.
302
+ self.added_tokens_dict = added_tokens
303
+ self.added_tokens_list = [new_tokens[id] for id in actual_new_ids]
304
+ self.vocab_size_base = vocab_size
305
+ self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
306
+ self.fname_tokenizer = fname_tokenizer
307
+
308
+ def sentencepiece_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
309
+ tokenizer = self.sentencepiece_tokenizer
310
+ for i in range(tokenizer.vocab_size()):
311
+ piece = tokenizer.IdToPiece(i)
312
+ text = piece.encode("utf-8")
313
+ score: float = tokenizer.GetScore(i)
314
+
315
+ toktype = gguf.TokenType.NORMAL
316
+ if tokenizer.IsUnknown(i):
317
+ toktype = gguf.TokenType.UNKNOWN
318
+ if tokenizer.IsControl(i):
319
+ toktype = gguf.TokenType.CONTROL
320
+
321
+ # NOTE: I think added_tokens are user defined.
322
+ # ref: https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto
323
+ # if tokenizer.is_user_defined(i): toktype = gguf.TokenType.USER_DEFINED
324
+
325
+ if tokenizer.IsUnused(i):
326
+ toktype = gguf.TokenType.UNUSED
327
+ if tokenizer.IsByte(i):
328
+ toktype = gguf.TokenType.BYTE
329
+
330
+ yield text, score, toktype
331
+
332
+ def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
333
+ for text in self.added_tokens_list:
334
+ score = -1000.0
335
+ yield text.encode("utf-8"), score, gguf.TokenType.USER_DEFINED
336
+
337
+ def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
338
+ yield from self.sentencepiece_tokens()
339
+ yield from self.added_tokens()
340
+
341
+ def __repr__(self) -> str:
342
+ return f"<SentencePieceVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
343
+
344
+
345
+ class LlamaHfVocab(Vocab):
346
+ tokenizer_model = "llama"
347
+ name = "hfft"
348
+
349
+ def __init__(self, base_path: Path):
350
+ fname_tokenizer = base_path / 'tokenizer.json'
351
+ # if this fails, FileNotFoundError propagates to caller
352
+ with open(fname_tokenizer, encoding='utf-8') as f:
353
+ tokenizer_json = json.load(f)
354
+
355
+ # pre-check so we know if we need transformers
356
+ tokenizer_model: dict[str, Any] = tokenizer_json['model']
357
+ is_llama3 = (
358
+ tokenizer_model['type'] == 'BPE' and tokenizer_model.get('ignore_merges', False)
359
+ and not tokenizer_model.get('byte_fallback', True)
360
+ )
361
+ if is_llama3:
362
+ raise TypeError('Llama 3 must be converted with BpeVocab')
363
+
364
+ if not is_llama3 and (
365
+ tokenizer_model['type'] != 'BPE' or not tokenizer_model.get('byte_fallback', False)
366
+ or tokenizer_json['decoder']['type'] != 'Sequence'
367
+ ):
368
+ raise FileNotFoundError('Cannot find Llama BPE tokenizer')
369
+
370
+ try:
371
+ from transformers import AutoTokenizer
372
+ except ImportError as e:
373
+ raise ImportError(
374
+ "To use LlamaHfVocab, please install the `transformers` package. "
375
+ "You can install it with `pip install transformers`."
376
+ ) from e
377
+
378
+ # Allow the tokenizer to default to slow or fast versions.
379
+ # Explicitly set tokenizer to use local paths.
380
+ self.tokenizer = AutoTokenizer.from_pretrained(
381
+ base_path,
382
+ cache_dir=base_path,
383
+ local_files_only=True,
384
+ )
385
+ assert self.tokenizer.is_fast # assume tokenizer.json is used
386
+
387
+ # Initialize lists and dictionaries for added tokens
388
+ self.added_tokens_list = []
389
+ self.added_tokens_dict = dict()
390
+ self.added_tokens_ids = set()
391
+
392
+ # Process added tokens
393
+ for tok, tokidx in sorted(
394
+ self.tokenizer.get_added_vocab().items(), key=lambda x: x[1]
395
+ ):
396
+ # Only consider added tokens that are not in the base vocabulary
397
+ if tokidx >= self.tokenizer.vocab_size:
398
+ self.added_tokens_list.append(tok)
399
+ self.added_tokens_dict[tok] = tokidx
400
+ self.added_tokens_ids.add(tokidx)
401
+
402
+ # Store special tokens and their IDs
403
+ self.specials = {
404
+ tok: self.tokenizer.get_vocab()[tok]
405
+ for tok in self.tokenizer.all_special_tokens
406
+ }
407
+ self.special_ids = set(self.tokenizer.all_special_ids)
408
+
409
+ # Set vocabulary sizes
410
+ self.vocab_size_base = self.tokenizer.vocab_size
411
+ self.vocab_size = self.vocab_size_base + len(self.added_tokens_list)
412
+
413
+ self.fname_tokenizer = fname_tokenizer
414
+
415
+ def hf_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
416
+ reverse_vocab = {
417
+ id: encoded_tok for encoded_tok, id in self.tokenizer.get_vocab().items()
418
+ }
419
+
420
+ for token_id in range(self.vocab_size_base):
421
+ # Skip processing added tokens here
422
+ if token_id in self.added_tokens_ids:
423
+ continue
424
+
425
+ # Convert token text to bytes
426
+ token_text = reverse_vocab[token_id].encode("utf-8")
427
+
428
+ # Yield token text, score, and type
429
+ yield token_text, self.get_token_score(token_id), self.get_token_type(
430
+ token_id, token_text, self.special_ids # Reuse already stored special IDs
431
+ )
432
+
433
+ def get_token_type(self, token_id: int, token_text: bytes, special_ids: set[int]) -> gguf.TokenType:
434
+ # Special case for byte tokens
435
+ if re.fullmatch(br"<0x[0-9A-Fa-f]{2}>", token_text):
436
+ return gguf.TokenType.BYTE
437
+
438
+ # Determine token type based on whether it's a special token
439
+ return gguf.TokenType.CONTROL if token_id in special_ids else gguf.TokenType.NORMAL
440
+
441
+ def get_token_score(self, token_id: int) -> float:
442
+ # Placeholder for actual logic to determine the token's score
443
+ # This needs to be implemented based on specific requirements
444
+ return -1000.0 # Default score
445
+
446
+ def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
447
+ for text in self.added_tokens_list:
448
+ if text in self.specials:
449
+ toktype = self.get_token_type(self.specials[text], b'', self.special_ids)
450
+ score = self.get_token_score(self.specials[text])
451
+ else:
452
+ toktype = gguf.TokenType.USER_DEFINED
453
+ score = -1000.0
454
+
455
+ yield text.encode("utf-8"), score, toktype
456
+
457
+ def has_newline_token(self):
458
+ return "<0x0A>" in self.tokenizer.vocab or "\n" in self.tokenizer.vocab
459
+
460
+ def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
461
+ yield from self.hf_tokens()
462
+ yield from self.added_tokens()
463
+
464
+ def __repr__(self) -> str:
465
+ return f"<LlamaHfVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
.venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
.venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/License.txt ADDED
@@ -0,0 +1,1568 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ End User License Agreement
2
+ --------------------------
3
+
4
+
5
+ Preface
6
+ -------
7
+
8
+ The Software License Agreement in Chapter 1 and the Supplement
9
+ in Chapter 2 contain license terms and conditions that govern
10
+ the use of NVIDIA software. By accepting this agreement, you
11
+ agree to comply with all the terms and conditions applicable
12
+ to the product(s) included herein.
13
+
14
+
15
+ NVIDIA Driver
16
+
17
+
18
+ Description
19
+
20
+ This package contains the operating system driver and
21
+ fundamental system software components for NVIDIA GPUs.
22
+
23
+
24
+ NVIDIA CUDA Toolkit
25
+
26
+
27
+ Description
28
+
29
+ The NVIDIA CUDA Toolkit provides command-line and graphical
30
+ tools for building, debugging and optimizing the performance
31
+ of applications accelerated by NVIDIA GPUs, runtime and math
32
+ libraries, and documentation including programming guides,
33
+ user manuals, and API references.
34
+
35
+
36
+ Default Install Location of CUDA Toolkit
37
+
38
+ Windows platform:
39
+
40
+ %ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.#
41
+
42
+ Linux platform:
43
+
44
+ /usr/local/cuda-#.#
45
+
46
+ Mac platform:
47
+
48
+ /Developer/NVIDIA/CUDA-#.#
49
+
50
+
51
+ NVIDIA CUDA Samples
52
+
53
+
54
+ Description
55
+
56
+ This package includes over 100+ CUDA examples that demonstrate
57
+ various CUDA programming principles, and efficient CUDA
58
+ implementation of algorithms in specific application domains.
59
+
60
+
61
+ Default Install Location of CUDA Samples
62
+
63
+ Windows platform:
64
+
65
+ %ProgramData%\NVIDIA Corporation\CUDA Samples\v#.#
66
+
67
+ Linux platform:
68
+
69
+ /usr/local/cuda-#.#/samples
70
+
71
+ and
72
+
73
+ $HOME/NVIDIA_CUDA-#.#_Samples
74
+
75
+ Mac platform:
76
+
77
+ /Developer/NVIDIA/CUDA-#.#/samples
78
+
79
+
80
+ NVIDIA Nsight Visual Studio Edition (Windows only)
81
+
82
+
83
+ Description
84
+
85
+ NVIDIA Nsight Development Platform, Visual Studio Edition is a
86
+ development environment integrated into Microsoft Visual
87
+ Studio that provides tools for debugging, profiling, analyzing
88
+ and optimizing your GPU computing and graphics applications.
89
+
90
+
91
+ Default Install Location of Nsight Visual Studio Edition
92
+
93
+ Windows platform:
94
+
95
+ %ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.#
96
+
97
+
98
+ 1. License Agreement for NVIDIA Software Development Kits
99
+ ---------------------------------------------------------
100
+
101
+
102
+ Release Date: July 26, 2018
103
+ ---------------------------
104
+
105
+
106
+ Important NoticeRead before downloading, installing,
107
+ copying or using the licensed software:
108
+ -------------------------------------------------------
109
+
110
+ This license agreement, including exhibits attached
111
+ ("Agreement”) is a legal agreement between you and NVIDIA
112
+ Corporation ("NVIDIA") and governs your use of a NVIDIA
113
+ software development kit (“SDK”).
114
+
115
+ Each SDK has its own set of software and materials, but here
116
+ is a description of the types of items that may be included in
117
+ a SDK: source code, header files, APIs, data sets and assets
118
+ (examples include images, textures, models, scenes, videos,
119
+ native API input/output files), binary software, sample code,
120
+ libraries, utility programs, programming code and
121
+ documentation.
122
+
123
+ This Agreement can be accepted only by an adult of legal age
124
+ of majority in the country in which the SDK is used.
125
+
126
+ If you are entering into this Agreement on behalf of a company
127
+ or other legal entity, you represent that you have the legal
128
+ authority to bind the entity to this Agreement, in which case
129
+ “you” will mean the entity you represent.
130
+
131
+ If you don’t have the required age or authority to accept
132
+ this Agreement, or if you don’t accept all the terms and
133
+ conditions of this Agreement, do not download, install or use
134
+ the SDK.
135
+
136
+ You agree to use the SDK only for purposes that are permitted
137
+ by (a) this Agreement, and (b) any applicable law, regulation
138
+ or generally accepted practices or guidelines in the relevant
139
+ jurisdictions.
140
+
141
+
142
+ 1.1. License
143
+
144
+
145
+ 1.1.1. License Grant
146
+
147
+ Subject to the terms of this Agreement, NVIDIA hereby grants
148
+ you a non-exclusive, non-transferable license, without the
149
+ right to sublicense (except as expressly provided in this
150
+ Agreement) to:
151
+
152
+ 1. Install and use the SDK,
153
+
154
+ 2. Modify and create derivative works of sample source code
155
+ delivered in the SDK, and
156
+
157
+ 3. Distribute those portions of the SDK that are identified
158
+ in this Agreement as distributable, as incorporated in
159
+ object code format into a software application that meets
160
+ the distribution requirements indicated in this Agreement.
161
+
162
+
163
+ 1.1.2. Distribution Requirements
164
+
165
+ These are the distribution requirements for you to exercise
166
+ the distribution grant:
167
+
168
+ 1. Your application must have material additional
169
+ functionality, beyond the included portions of the SDK.
170
+
171
+ 2. The distributable portions of the SDK shall only be
172
+ accessed by your application.
173
+
174
+ 3. The following notice shall be included in modifications
175
+ and derivative works of sample source code distributed:
176
+ “This software contains source code provided by NVIDIA
177
+ Corporation.”
178
+
179
+ 4. Unless a developer tool is identified in this Agreement
180
+ as distributable, it is delivered for your internal use
181
+ only.
182
+
183
+ 5. The terms under which you distribute your application
184
+ must be consistent with the terms of this Agreement,
185
+ including (without limitation) terms relating to the
186
+ license grant and license restrictions and protection of
187
+ NVIDIA’s intellectual property rights. Additionally, you
188
+ agree that you will protect the privacy, security and
189
+ legal rights of your application users.
190
+
191
+ 6. You agree to notify NVIDIA in writing of any known or
192
+ suspected distribution or use of the SDK not in compliance
193
+ with the requirements of this Agreement, and to enforce
194
+ the terms of your agreements with respect to distributed
195
+ SDK.
196
+
197
+
198
+ 1.1.3. Authorized Users
199
+
200
+ You may allow employees and contractors of your entity or of
201
+ your subsidiary(ies) to access and use the SDK from your
202
+ secure network to perform work on your behalf.
203
+
204
+ If you are an academic institution you may allow users
205
+ enrolled or employed by the academic institution to access and
206
+ use the SDK from your secure network.
207
+
208
+ You are responsible for the compliance with the terms of this
209
+ Agreement by your authorized users. If you become aware that
210
+ your authorized users didn’t follow the terms of this
211
+ Agreement, you agree to take reasonable steps to resolve the
212
+ non-compliance and prevent new occurrences.
213
+
214
+
215
+ 1.1.4. Pre-Release SDK
216
+
217
+ The SDK versions identified as alpha, beta, preview or
218
+ otherwise as pre-release, may not be fully functional, may
219
+ contain errors or design flaws, and may have reduced or
220
+ different security, privacy, accessibility, availability, and
221
+ reliability standards relative to commercial versions of
222
+ NVIDIA software and materials. Use of a pre-release SDK may
223
+ result in unexpected results, loss of data, project delays or
224
+ other unpredictable damage or loss.
225
+
226
+ You may use a pre-release SDK at your own risk, understanding
227
+ that pre-release SDKs are not intended for use in production
228
+ or business-critical systems.
229
+
230
+ NVIDIA may choose not to make available a commercial version
231
+ of any pre-release SDK. NVIDIA may also choose to abandon
232
+ development and terminate the availability of a pre-release
233
+ SDK at any time without liability.
234
+
235
+
236
+ 1.1.5. Updates
237
+
238
+ NVIDIA may, at its option, make available patches, workarounds
239
+ or other updates to this SDK. Unless the updates are provided
240
+ with their separate governing terms, they are deemed part of
241
+ the SDK licensed to you as provided in this Agreement. You
242
+ agree that the form and content of the SDK that NVIDIA
243
+ provides may change without prior notice to you. While NVIDIA
244
+ generally maintains compatibility between versions, NVIDIA may
245
+ in some cases make changes that introduce incompatibilities in
246
+ future versions of the SDK.
247
+
248
+
249
+ 1.1.6. Third Party Licenses
250
+
251
+ The SDK may come bundled with, or otherwise include or be
252
+ distributed with, third party software licensed by a NVIDIA
253
+ supplier and/or open source software provided under an open
254
+ source license. Use of third party software is subject to the
255
+ third-party license terms, or in the absence of third party
256
+ terms, the terms of this Agreement. Copyright to third party
257
+ software is held by the copyright holders indicated in the
258
+ third-party software or license.
259
+
260
+
261
+ 1.1.7. Reservation of Rights
262
+
263
+ NVIDIA reserves all rights, title, and interest in and to the
264
+ SDK, not expressly granted to you under this Agreement.
265
+
266
+
267
+ 1.2. Limitations
268
+
269
+ The following license limitations apply to your use of the
270
+ SDK:
271
+
272
+ 1. You may not reverse engineer, decompile or disassemble,
273
+ or remove copyright or other proprietary notices from any
274
+ portion of the SDK or copies of the SDK.
275
+
276
+ 2. Except as expressly provided in this Agreement, you may
277
+ not copy, sell, rent, sublicense, transfer, distribute,
278
+ modify, or create derivative works of any portion of the
279
+ SDK. For clarity, you may not distribute or sublicense the
280
+ SDK as a stand-alone product.
281
+
282
+ 3. Unless you have an agreement with NVIDIA for this
283
+ purpose, you may not indicate that an application created
284
+ with the SDK is sponsored or endorsed by NVIDIA.
285
+
286
+ 4. You may not bypass, disable, or circumvent any
287
+ encryption, security, digital rights management or
288
+ authentication mechanism in the SDK.
289
+
290
+ 5. You may not use the SDK in any manner that would cause it
291
+ to become subject to an open source software license. As
292
+ examples, licenses that require as a condition of use,
293
+ modification, and/or distribution that the SDK be:
294
+
295
+ a. Disclosed or distributed in source code form;
296
+
297
+ b. Licensed for the purpose of making derivative works;
298
+ or
299
+
300
+ c. Redistributable at no charge.
301
+
302
+ 6. Unless you have an agreement with NVIDIA for this
303
+ purpose, you may not use the SDK with any system or
304
+ application where the use or failure of the system or
305
+ application can reasonably be expected to threaten or
306
+ result in personal injury, death, or catastrophic loss.
307
+ Examples include use in avionics, navigation, military,
308
+ medical, life support or other life critical applications.
309
+ NVIDIA does not design, test or manufacture the SDK for
310
+ these critical uses and NVIDIA shall not be liable to you
311
+ or any third party, in whole or in part, for any claims or
312
+ damages arising from such uses.
313
+
314
+ 7. You agree to defend, indemnify and hold harmless NVIDIA
315
+ and its affiliates, and their respective employees,
316
+ contractors, agents, officers and directors, from and
317
+ against any and all claims, damages, obligations, losses,
318
+ liabilities, costs or debt, fines, restitutions and
319
+ expenses (including but not limited to attorney’s fees
320
+ and costs incident to establishing the right of
321
+ indemnification) arising out of or related to your use of
322
+ the SDK outside of the scope of this Agreement, or not in
323
+ compliance with its terms.
324
+
325
+
326
+ 1.3. Ownership
327
+
328
+ 1. NVIDIA or its licensors hold all rights, title and
329
+ interest in and to the SDK and its modifications and
330
+ derivative works, including their respective intellectual
331
+ property rights, subject to your rights described in this
332
+ section. This SDK may include software and materials from
333
+ NVIDIA’s licensors, and these licensors are intended
334
+ third party beneficiaries that may enforce this Agreement
335
+ with respect to their intellectual property rights.
336
+
337
+ 2. You hold all rights, title and interest in and to your
338
+ applications and your derivative works of the sample
339
+ source code delivered in the SDK, including their
340
+ respective intellectual property rights, subject to
341
+ NVIDIA’s rights described in this section.
342
+
343
+ 3. You may, but don’t have to, provide to NVIDIA
344
+ suggestions, feature requests or other feedback regarding
345
+ the SDK, including possible enhancements or modifications
346
+ to the SDK. For any feedback that you voluntarily provide,
347
+ you hereby grant NVIDIA and its affiliates a perpetual,
348
+ non-exclusive, worldwide, irrevocable license to use,
349
+ reproduce, modify, license, sublicense (through multiple
350
+ tiers of sublicensees), and distribute (through multiple
351
+ tiers of distributors) it without the payment of any
352
+ royalties or fees to you. NVIDIA will use feedback at its
353
+ choice. NVIDIA is constantly looking for ways to improve
354
+ its products, so you may send feedback to NVIDIA through
355
+ the developer portal at https://developer.nvidia.com.
356
+
357
+
358
+ 1.4. No Warranties
359
+
360
+ THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL
361
+ FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND
362
+ ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND
363
+ OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING,
364
+ BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
365
+ FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE
366
+ ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO
367
+ WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF
368
+ DEALING OR COURSE OF TRADE.
369
+
370
+
371
+ 1.5. Limitation of Liability
372
+
373
+ TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS
374
+ AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
375
+ PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS
376
+ OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF
377
+ PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION
378
+ WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK,
379
+ WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH
380
+ OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE),
381
+ PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF
382
+ LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES
383
+ TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS
384
+ AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE
385
+ NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS
386
+ LIMIT.
387
+
388
+ These exclusions and limitations of liability shall apply
389
+ regardless if NVIDIA or its affiliates have been advised of
390
+ the possibility of such damages, and regardless of whether a
391
+ remedy fails its essential purpose. These exclusions and
392
+ limitations of liability form an essential basis of the
393
+ bargain between the parties, and, absent any of these
394
+ exclusions or limitations of liability, the provisions of this
395
+ Agreement, including, without limitation, the economic terms,
396
+ would be substantially different.
397
+
398
+
399
+ 1.6. Termination
400
+
401
+ 1. This Agreement will continue to apply until terminated by
402
+ either you or NVIDIA as described below.
403
+
404
+ 2. If you want to terminate this Agreement, you may do so by
405
+ stopping to use the SDK.
406
+
407
+ 3. NVIDIA may, at any time, terminate this Agreement if:
408
+
409
+ a. (i) you fail to comply with any term of this
410
+ Agreement and the non-compliance is not fixed within
411
+ thirty (30) days following notice from NVIDIA (or
412
+ immediately if you violate NVIDIA’s intellectual
413
+ property rights);
414
+
415
+ b. (ii) you commence or participate in any legal
416
+ proceeding against NVIDIA with respect to the SDK; or
417
+
418
+ c. (iii) NVIDIA decides to no longer provide the SDK in
419
+ a country or, in NVIDIA’s sole discretion, the
420
+ continued use of it is no longer commercially viable.
421
+
422
+ 4. Upon any termination of this Agreement, you agree to
423
+ promptly discontinue use of the SDK and destroy all copies
424
+ in your possession or control. Your prior distributions in
425
+ accordance with this Agreement are not affected by the
426
+ termination of this Agreement. Upon written request, you
427
+ will certify in writing that you have complied with your
428
+ commitments under this section. Upon any termination of
429
+ this Agreement all provisions survive except for the
430
+ license grant provisions.
431
+
432
+
433
+ 1.7. General
434
+
435
+ If you wish to assign this Agreement or your rights and
436
+ obligations, including by merger, consolidation, dissolution
437
+ or operation of law, contact NVIDIA to ask for permission. Any
438
+ attempted assignment not approved by NVIDIA in writing shall
439
+ be void and of no effect. NVIDIA may assign, delegate or
440
+ transfer this Agreement and its rights and obligations, and if
441
+ to a non-affiliate you will be notified.
442
+
443
+ You agree to cooperate with NVIDIA and provide reasonably
444
+ requested information to verify your compliance with this
445
+ Agreement.
446
+
447
+ This Agreement will be governed in all respects by the laws of
448
+ the United States and of the State of Delaware as those laws
449
+ are applied to contracts entered into and performed entirely
450
+ within Delaware by Delaware residents, without regard to the
451
+ conflicts of laws principles. The United Nations Convention on
452
+ Contracts for the International Sale of Goods is specifically
453
+ disclaimed. You agree to all terms of this Agreement in the
454
+ English language.
455
+
456
+ The state or federal courts residing in Santa Clara County,
457
+ California shall have exclusive jurisdiction over any dispute
458
+ or claim arising out of this Agreement. Notwithstanding this,
459
+ you agree that NVIDIA shall still be allowed to apply for
460
+ injunctive remedies or an equivalent type of urgent legal
461
+ relief in any jurisdiction.
462
+
463
+ If any court of competent jurisdiction determines that any
464
+ provision of this Agreement is illegal, invalid or
465
+ unenforceable, such provision will be construed as limited to
466
+ the extent necessary to be consistent with and fully
467
+ enforceable under the law and the remaining provisions will
468
+ remain in full force and effect. Unless otherwise specified,
469
+ remedies are cumulative.
470
+
471
+ Each party acknowledges and agrees that the other is an
472
+ independent contractor in the performance of this Agreement.
473
+
474
+ The SDK has been developed entirely at private expense and is
475
+ “commercial items” consisting of “commercial computer
476
+ software” and “commercial computer software
477
+ documentation” provided with RESTRICTED RIGHTS. Use,
478
+ duplication or disclosure by the U.S. Government or a U.S.
479
+ Government subcontractor is subject to the restrictions in
480
+ this Agreement pursuant to DFARS 227.7202-3(a) or as set forth
481
+ in subparagraphs (c)(1) and (2) of the Commercial Computer
482
+ Software - Restricted Rights clause at FAR 52.227-19, as
483
+ applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas
484
+ Expressway, Santa Clara, CA 95051.
485
+
486
+ The SDK is subject to United States export laws and
487
+ regulations. You agree that you will not ship, transfer or
488
+ export the SDK into any country, or use the SDK in any manner,
489
+ prohibited by the United States Bureau of Industry and
490
+ Security or economic sanctions regulations administered by the
491
+ U.S. Department of Treasury’s Office of Foreign Assets
492
+ Control (OFAC), or any applicable export laws, restrictions or
493
+ regulations. These laws include restrictions on destinations,
494
+ end users and end use. By accepting this Agreement, you
495
+ confirm that you are not a resident or citizen of any country
496
+ currently embargoed by the U.S. and that you are not otherwise
497
+ prohibited from receiving the SDK.
498
+
499
+ Any notice delivered by NVIDIA to you under this Agreement
500
+ will be delivered via mail, email or fax. You agree that any
501
+ notices that NVIDIA sends you electronically will satisfy any
502
+ legal communication requirements. Please direct your legal
503
+ notices or other correspondence to NVIDIA Corporation, 2788
504
+ San Tomas Expressway, Santa Clara, California 95051, United
505
+ States of America, Attention: Legal Department.
506
+
507
+ This Agreement and any exhibits incorporated into this
508
+ Agreement constitute the entire agreement of the parties with
509
+ respect to the subject matter of this Agreement and supersede
510
+ all prior negotiations or documentation exchanged between the
511
+ parties relating to this SDK license. Any additional and/or
512
+ conflicting terms on documents issued by you are null, void,
513
+ and invalid. Any amendment or waiver under this Agreement
514
+ shall be in writing and signed by representatives of both
515
+ parties.
516
+
517
+
518
+ 2. CUDA Toolkit Supplement to Software License Agreement for
519
+ NVIDIA Software Development Kits
520
+ ------------------------------------------------------------
521
+
522
+
523
+ Release date: August 16, 2018
524
+ -----------------------------
525
+
526
+ The terms in this supplement govern your use of the NVIDIA
527
+ CUDA Toolkit SDK under the terms of your license agreement
528
+ (“Agreement”) as modified by this supplement. Capitalized
529
+ terms used but not defined below have the meaning assigned to
530
+ them in the Agreement.
531
+
532
+ This supplement is an exhibit to the Agreement and is
533
+ incorporated as an integral part of the Agreement. In the
534
+ event of conflict between the terms in this supplement and the
535
+ terms in the Agreement, the terms in this supplement govern.
536
+
537
+
538
+ 2.1. License Scope
539
+
540
+ The SDK is licensed for you to develop applications only for
541
+ use in systems with NVIDIA GPUs.
542
+
543
+
544
+ 2.2. Distribution
545
+
546
+ The portions of the SDK that are distributable under the
547
+ Agreement are listed in Attachment A.
548
+
549
+
550
+ 2.3. Operating Systems
551
+
552
+ Those portions of the SDK designed exclusively for use on the
553
+ Linux or FreeBSD operating systems, or other operating systems
554
+ derived from the source code to these operating systems, may
555
+ be copied and redistributed for use in accordance with this
556
+ Agreement, provided that the object code files are not
557
+ modified in any way (except for unzipping of compressed
558
+ files).
559
+
560
+
561
+ 2.4. Audio and Video Encoders and Decoders
562
+
563
+ You acknowledge and agree that it is your sole responsibility
564
+ to obtain any additional third-party licenses required to
565
+ make, have made, use, have used, sell, import, and offer for
566
+ sale your products or services that include or incorporate any
567
+ third-party software and content relating to audio and/or
568
+ video encoders and decoders from, including but not limited
569
+ to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A.,
570
+ MPEG-LA, and Coding Technologies. NVIDIA does not grant to you
571
+ under this Agreement any necessary patent or other rights with
572
+ respect to any audio and/or video encoders and decoders.
573
+
574
+
575
+ 2.5. Licensing
576
+
577
+ If the distribution terms in this Agreement are not suitable
578
+ for your organization, or for any questions regarding this
579
+ Agreement, please contact NVIDIA at
580
+ nvidia-compute-license-questions@nvidia.com.
581
+
582
+
583
+ 2.6. Attachment A
584
+
585
+ The following portions of the SDK are distributable under the
586
+ Agreement:
587
+
588
+ Component
589
+
590
+ CUDA Runtime
591
+
592
+ Windows
593
+
594
+ cudart.dll, cudart_static.lib, cudadevrt.lib
595
+
596
+ Mac OSX
597
+
598
+ libcudart.dylib, libcudart_static.a, libcudadevrt.a
599
+
600
+ Linux
601
+
602
+ libcudart.so, libcudart_static.a, libcudadevrt.a
603
+
604
+ Android
605
+
606
+ libcudart.so, libcudart_static.a, libcudadevrt.a
607
+
608
+ Component
609
+
610
+ CUDA FFT Library
611
+
612
+ Windows
613
+
614
+ cufft.dll, cufftw.dll, cufft.lib, cufftw.lib
615
+
616
+ Mac OSX
617
+
618
+ libcufft.dylib, libcufft_static.a, libcufftw.dylib,
619
+ libcufftw_static.a
620
+
621
+ Linux
622
+
623
+ libcufft.so, libcufft_static.a, libcufftw.so,
624
+ libcufftw_static.a
625
+
626
+ Android
627
+
628
+ libcufft.so, libcufft_static.a, libcufftw.so,
629
+ libcufftw_static.a
630
+
631
+ Component
632
+
633
+ CUDA BLAS Library
634
+
635
+ Windows
636
+
637
+ cublas.dll, cublasLt.dll
638
+
639
+ Mac OSX
640
+
641
+ libcublas.dylib, libcublasLt.dylib, libcublas_static.a,
642
+ libcublasLt_static.a
643
+
644
+ Linux
645
+
646
+ libcublas.so, libcublasLt.so, libcublas_static.a,
647
+ libcublasLt_static.a
648
+
649
+ Android
650
+
651
+ libcublas.so, libcublasLt.so, libcublas_static.a,
652
+ libcublasLt_static.a
653
+
654
+ Component
655
+
656
+ NVIDIA "Drop-in" BLAS Library
657
+
658
+ Windows
659
+
660
+ nvblas.dll
661
+
662
+ Mac OSX
663
+
664
+ libnvblas.dylib
665
+
666
+ Linux
667
+
668
+ libnvblas.so
669
+
670
+ Component
671
+
672
+ CUDA Sparse Matrix Library
673
+
674
+ Windows
675
+
676
+ cusparse.dll, cusparse.lib
677
+
678
+ Mac OSX
679
+
680
+ libcusparse.dylib, libcusparse_static.a
681
+
682
+ Linux
683
+
684
+ libcusparse.so, libcusparse_static.a
685
+
686
+ Android
687
+
688
+ libcusparse.so, libcusparse_static.a
689
+
690
+ Component
691
+
692
+ CUDA Linear Solver Library
693
+
694
+ Windows
695
+
696
+ cusolver.dll, cusolver.lib
697
+
698
+ Mac OSX
699
+
700
+ libcusolver.dylib, libcusolver_static.a
701
+
702
+ Linux
703
+
704
+ libcusolver.so, libcusolver_static.a
705
+
706
+ Android
707
+
708
+ libcusolver.so, libcusolver_static.a
709
+
710
+ Component
711
+
712
+ CUDA Random Number Generation Library
713
+
714
+ Windows
715
+
716
+ curand.dll, curand.lib
717
+
718
+ Mac OSX
719
+
720
+ libcurand.dylib, libcurand_static.a
721
+
722
+ Linux
723
+
724
+ libcurand.so, libcurand_static.a
725
+
726
+ Android
727
+
728
+ libcurand.so, libcurand_static.a
729
+
730
+ Component
731
+
732
+ CUDA Accelerated Graph Library
733
+
734
+ Component
735
+
736
+ NVIDIA Performance Primitives Library
737
+
738
+ Windows
739
+
740
+ nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll,
741
+ nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll,
742
+ nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib,
743
+ nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll,
744
+ nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib
745
+
746
+ Mac OSX
747
+
748
+ libnppc.dylib, libnppc_static.a, libnppial.dylib,
749
+ libnppial_static.a, libnppicc.dylib, libnppicc_static.a,
750
+ libnppicom.dylib, libnppicom_static.a, libnppidei.dylib,
751
+ libnppidei_static.a, libnppif.dylib, libnppif_static.a,
752
+ libnppig.dylib, libnppig_static.a, libnppim.dylib,
753
+ libnppisu_static.a, libnppitc.dylib, libnppitc_static.a,
754
+ libnpps.dylib, libnpps_static.a
755
+
756
+ Linux
757
+
758
+ libnppc.so, libnppc_static.a, libnppial.so,
759
+ libnppial_static.a, libnppicc.so, libnppicc_static.a,
760
+ libnppicom.so, libnppicom_static.a, libnppidei.so,
761
+ libnppidei_static.a, libnppif.so, libnppif_static.a
762
+ libnppig.so, libnppig_static.a, libnppim.so,
763
+ libnppim_static.a, libnppist.so, libnppist_static.a,
764
+ libnppisu.so, libnppisu_static.a, libnppitc.so
765
+ libnppitc_static.a, libnpps.so, libnpps_static.a
766
+
767
+ Android
768
+
769
+ libnppc.so, libnppc_static.a, libnppial.so,
770
+ libnppial_static.a, libnppicc.so, libnppicc_static.a,
771
+ libnppicom.so, libnppicom_static.a, libnppidei.so,
772
+ libnppidei_static.a, libnppif.so, libnppif_static.a
773
+ libnppig.so, libnppig_static.a, libnppim.so,
774
+ libnppim_static.a, libnppist.so, libnppist_static.a,
775
+ libnppisu.so, libnppisu_static.a, libnppitc.so
776
+ libnppitc_static.a, libnpps.so, libnpps_static.a
777
+
778
+ Component
779
+
780
+ NVIDIA JPEG Library
781
+
782
+ Linux
783
+
784
+ libnvjpeg.so, libnvjpeg_static.a
785
+
786
+ Component
787
+
788
+ Internal common library required for statically linking to
789
+ cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP
790
+
791
+ Mac OSX
792
+
793
+ libculibos.a
794
+
795
+ Linux
796
+
797
+ libculibos.a
798
+
799
+ Component
800
+
801
+ NVIDIA Runtime Compilation Library and Header
802
+
803
+ All
804
+
805
+ nvrtc.h
806
+
807
+ Windows
808
+
809
+ nvrtc.dll, nvrtc-builtins.dll
810
+
811
+ Mac OSX
812
+
813
+ libnvrtc.dylib, libnvrtc-builtins.dylib
814
+
815
+ Linux
816
+
817
+ libnvrtc.so, libnvrtc-builtins.so
818
+
819
+ Component
820
+
821
+ NVIDIA Optimizing Compiler Library
822
+
823
+ Windows
824
+
825
+ nvvm.dll
826
+
827
+ Mac OSX
828
+
829
+ libnvvm.dylib
830
+
831
+ Linux
832
+
833
+ libnvvm.so
834
+
835
+ Component
836
+
837
+ NVIDIA Common Device Math Functions Library
838
+
839
+ Windows
840
+
841
+ libdevice.10.bc
842
+
843
+ Mac OSX
844
+
845
+ libdevice.10.bc
846
+
847
+ Linux
848
+
849
+ libdevice.10.bc
850
+
851
+ Component
852
+
853
+ CUDA Occupancy Calculation Header Library
854
+
855
+ All
856
+
857
+ cuda_occupancy.h
858
+
859
+ Component
860
+
861
+ CUDA Half Precision Headers
862
+
863
+ All
864
+
865
+ cuda_fp16.h, cuda_fp16.hpp
866
+
867
+ Component
868
+
869
+ CUDA Profiling Tools Interface (CUPTI) Library
870
+
871
+ Windows
872
+
873
+ cupti.dll
874
+
875
+ Mac OSX
876
+
877
+ libcupti.dylib
878
+
879
+ Linux
880
+
881
+ libcupti.so
882
+
883
+ Component
884
+
885
+ NVIDIA Tools Extension Library
886
+
887
+ Windows
888
+
889
+ nvToolsExt.dll, nvToolsExt.lib
890
+
891
+ Mac OSX
892
+
893
+ libnvToolsExt.dylib
894
+
895
+ Linux
896
+
897
+ libnvToolsExt.so
898
+
899
+ Component
900
+
901
+ NVIDIA CUDA Driver Libraries
902
+
903
+ Linux
904
+
905
+ libcuda.so, libnvidia-fatbinaryloader.so,
906
+ libnvidia-ptxjitcompiler.so
907
+
908
+ The NVIDIA CUDA Driver Libraries are only distributable in
909
+ applications that meet this criteria:
910
+
911
+ 1. The application was developed starting from a NVIDIA CUDA
912
+ container obtained from Docker Hub or the NVIDIA GPU
913
+ Cloud, and
914
+
915
+ 2. The resulting application is packaged as a Docker
916
+ container and distributed to users on Docker Hub or the
917
+ NVIDIA GPU Cloud only.
918
+
919
+
920
+ 2.7. Attachment B
921
+
922
+
923
+ Additional Licensing Obligations
924
+
925
+ The following third party components included in the SOFTWARE
926
+ are licensed to Licensee pursuant to the following terms and
927
+ conditions:
928
+
929
+ 1. Licensee's use of the GDB third party component is
930
+ subject to the terms and conditions of GNU GPL v3:
931
+
932
+ This product includes copyrighted third-party software licensed
933
+ under the terms of the GNU General Public License v3 ("GPL v3").
934
+ All third-party software packages are copyright by their respective
935
+ authors. GPL v3 terms and conditions are hereby incorporated into
936
+ the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt
937
+
938
+ Consistent with these licensing requirements, the software
939
+ listed below is provided under the terms of the specified
940
+ open source software licenses. To obtain source code for
941
+ software provided under licenses that require
942
+ redistribution of source code, including the GNU General
943
+ Public License (GPL) and GNU Lesser General Public License
944
+ (LGPL), contact oss-requests@nvidia.com. This offer is
945
+ valid for a period of three (3) years from the date of the
946
+ distribution of this product by NVIDIA CORPORATION.
947
+
948
+ Component License
949
+ CUDA-GDB GPL v3
950
+
951
+ 2. Licensee represents and warrants that any and all third
952
+ party licensing and/or royalty payment obligations in
953
+ connection with Licensee's use of the H.264 video codecs
954
+ are solely the responsibility of Licensee.
955
+
956
+ 3. Licensee's use of the Thrust library is subject to the
957
+ terms and conditions of the Apache License Version 2.0.
958
+ All third-party software packages are copyright by their
959
+ respective authors. Apache License Version 2.0 terms and
960
+ conditions are hereby incorporated into the Agreement by
961
+ this reference.
962
+ http://www.apache.org/licenses/LICENSE-2.0.html
963
+
964
+ In addition, Licensee acknowledges the following notice:
965
+ Thrust includes source code from the Boost Iterator,
966
+ Tuple, System, and Random Number libraries.
967
+
968
+ Boost Software License - Version 1.0 - August 17th, 2003
969
+ . . . .
970
+
971
+ Permission is hereby granted, free of charge, to any person or
972
+ organization obtaining a copy of the software and accompanying
973
+ documentation covered by this license (the "Software") to use,
974
+ reproduce, display, distribute, execute, and transmit the Software,
975
+ and to prepare derivative works of the Software, and to permit
976
+ third-parties to whom the Software is furnished to do so, all
977
+ subject to the following:
978
+
979
+ The copyright notices in the Software and this entire statement,
980
+ including the above license grant, this restriction and the following
981
+ disclaimer, must be included in all copies of the Software, in whole
982
+ or in part, and all derivative works of the Software, unless such
983
+ copies or derivative works are solely in the form of machine-executable
984
+ object code generated by a source language processor.
985
+
986
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
987
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
988
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
989
+ NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
990
+ ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
991
+ OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
992
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
993
+ OTHER DEALINGS IN THE SOFTWARE.
994
+
995
+ 4. Licensee's use of the LLVM third party component is
996
+ subject to the following terms and conditions:
997
+
998
+ ======================================================
999
+ LLVM Release License
1000
+ ======================================================
1001
+ University of Illinois/NCSA
1002
+ Open Source License
1003
+
1004
+ Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
1005
+ All rights reserved.
1006
+
1007
+ Developed by:
1008
+
1009
+ LLVM Team
1010
+
1011
+ University of Illinois at Urbana-Champaign
1012
+
1013
+ http://llvm.org
1014
+
1015
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1016
+ of this software and associated documentation files (the "Software"), to
1017
+ deal with the Software without restriction, including without limitation the
1018
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
1019
+ sell copies of the Software, and to permit persons to whom the Software is
1020
+ furnished to do so, subject to the following conditions:
1021
+
1022
+ * Redistributions of source code must retain the above copyright notice,
1023
+ this list of conditions and the following disclaimers.
1024
+
1025
+ * Redistributions in binary form must reproduce the above copyright
1026
+ notice, this list of conditions and the following disclaimers in the
1027
+ documentation and/or other materials provided with the distribution.
1028
+
1029
+ * Neither the names of the LLVM Team, University of Illinois at Urbana-
1030
+ Champaign, nor the names of its contributors may be used to endorse or
1031
+ promote products derived from this Software without specific prior
1032
+ written permission.
1033
+
1034
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1035
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1036
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1037
+ THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
1038
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1039
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1040
+ DEALINGS WITH THE SOFTWARE.
1041
+
1042
+ 5. Licensee's use (e.g. nvprof) of the PCRE third party
1043
+ component is subject to the following terms and
1044
+ conditions:
1045
+
1046
+ ------------
1047
+ PCRE LICENCE
1048
+ ------------
1049
+ PCRE is a library of functions to support regular expressions whose syntax
1050
+ and semantics are as close as possible to those of the Perl 5 language.
1051
+ Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
1052
+ specified below. The documentation for PCRE, supplied in the "doc"
1053
+ directory, is distributed under the same terms as the software itself. The
1054
+ basic library functions are written in C and are freestanding. Also
1055
+ included in the distribution is a set of C++ wrapper functions, and a just-
1056
+ in-time compiler that can be used to optimize pattern matching. These are
1057
+ both optional features that can be omitted when the library is built.
1058
+
1059
+ THE BASIC LIBRARY FUNCTIONS
1060
+ ---------------------------
1061
+ Written by: Philip Hazel
1062
+ Email local part: ph10
1063
+ Email domain: cam.ac.uk
1064
+ University of Cambridge Computing Service,
1065
+ Cambridge, England.
1066
+ Copyright (c) 1997-2012 University of Cambridge
1067
+ All rights reserved.
1068
+
1069
+ PCRE JUST-IN-TIME COMPILATION SUPPORT
1070
+ -------------------------------------
1071
+ Written by: Zoltan Herczeg
1072
+ Email local part: hzmester
1073
+ Emain domain: freemail.hu
1074
+ Copyright(c) 2010-2012 Zoltan Herczeg
1075
+ All rights reserved.
1076
+
1077
+ STACK-LESS JUST-IN-TIME COMPILER
1078
+ --------------------------------
1079
+ Written by: Zoltan Herczeg
1080
+ Email local part: hzmester
1081
+ Emain domain: freemail.hu
1082
+ Copyright(c) 2009-2012 Zoltan Herczeg
1083
+ All rights reserved.
1084
+
1085
+ THE C++ WRAPPER FUNCTIONS
1086
+ -------------------------
1087
+ Contributed by: Google Inc.
1088
+ Copyright (c) 2007-2012, Google Inc.
1089
+ All rights reserved.
1090
+
1091
+ THE "BSD" LICENCE
1092
+ -----------------
1093
+ Redistribution and use in source and binary forms, with or without
1094
+ modification, are permitted provided that the following conditions are met:
1095
+
1096
+ * Redistributions of source code must retain the above copyright notice,
1097
+ this list of conditions and the following disclaimer.
1098
+
1099
+ * Redistributions in binary form must reproduce the above copyright
1100
+ notice, this list of conditions and the following disclaimer in the
1101
+ documentation and/or other materials provided with the distribution.
1102
+
1103
+ * Neither the name of the University of Cambridge nor the name of Google
1104
+ Inc. nor the names of their contributors may be used to endorse or
1105
+ promote products derived from this software without specific prior
1106
+ written permission.
1107
+
1108
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
1109
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1110
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1111
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
1112
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1113
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1114
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1115
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1116
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1117
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1118
+ POSSIBILITY OF SUCH DAMAGE.
1119
+
1120
+ 6. Some of the cuBLAS library routines were written by or
1121
+ derived from code written by Vasily Volkov and are subject
1122
+ to the Modified Berkeley Software Distribution License as
1123
+ follows:
1124
+
1125
+ Copyright (c) 2007-2009, Regents of the University of California
1126
+
1127
+ All rights reserved.
1128
+
1129
+ Redistribution and use in source and binary forms, with or without
1130
+ modification, are permitted provided that the following conditions are
1131
+ met:
1132
+ * Redistributions of source code must retain the above copyright
1133
+ notice, this list of conditions and the following disclaimer.
1134
+ * Redistributions in binary form must reproduce the above
1135
+ copyright notice, this list of conditions and the following
1136
+ disclaimer in the documentation and/or other materials provided
1137
+ with the distribution.
1138
+ * Neither the name of the University of California, Berkeley nor
1139
+ the names of its contributors may be used to endorse or promote
1140
+ products derived from this software without specific prior
1141
+ written permission.
1142
+
1143
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
1144
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1145
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1146
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1147
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1148
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1149
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1150
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1151
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1152
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1153
+ POSSIBILITY OF SUCH DAMAGE.
1154
+
1155
+ 7. Some of the cuBLAS library routines were written by or
1156
+ derived from code written by Davide Barbieri and are
1157
+ subject to the Modified Berkeley Software Distribution
1158
+ License as follows:
1159
+
1160
+ Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata.
1161
+
1162
+ All rights reserved.
1163
+
1164
+ Redistribution and use in source and binary forms, with or without
1165
+ modification, are permitted provided that the following conditions are
1166
+ met:
1167
+ * Redistributions of source code must retain the above copyright
1168
+ notice, this list of conditions and the following disclaimer.
1169
+ * Redistributions in binary form must reproduce the above
1170
+ copyright notice, this list of conditions and the following
1171
+ disclaimer in the documentation and/or other materials provided
1172
+ with the distribution.
1173
+ * The name of the author may not be used to endorse or promote
1174
+ products derived from this software without specific prior
1175
+ written permission.
1176
+
1177
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
1178
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1179
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1180
+ DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
1181
+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1182
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
1183
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1184
+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
1185
+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
1186
+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1187
+ POSSIBILITY OF SUCH DAMAGE.
1188
+
1189
+ 8. Some of the cuBLAS library routines were derived from
1190
+ code developed by the University of Tennessee and are
1191
+ subject to the Modified Berkeley Software Distribution
1192
+ License as follows:
1193
+
1194
+ Copyright (c) 2010 The University of Tennessee.
1195
+
1196
+ All rights reserved.
1197
+
1198
+ Redistribution and use in source and binary forms, with or without
1199
+ modification, are permitted provided that the following conditions are
1200
+ met:
1201
+ * Redistributions of source code must retain the above copyright
1202
+ notice, this list of conditions and the following disclaimer.
1203
+ * Redistributions in binary form must reproduce the above
1204
+ copyright notice, this list of conditions and the following
1205
+ disclaimer listed in this license in the documentation and/or
1206
+ other materials provided with the distribution.
1207
+ * Neither the name of the copyright holders nor the names of its
1208
+ contributors may be used to endorse or promote products derived
1209
+ from this software without specific prior written permission.
1210
+
1211
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1212
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1213
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1214
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1215
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1216
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1217
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1218
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1219
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1220
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1221
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1222
+
1223
+ 9. Some of the cuBLAS library routines were written by or
1224
+ derived from code written by Jonathan Hogg and are subject
1225
+ to the Modified Berkeley Software Distribution License as
1226
+ follows:
1227
+
1228
+ Copyright (c) 2012, The Science and Technology Facilities Council (STFC).
1229
+
1230
+ All rights reserved.
1231
+
1232
+ Redistribution and use in source and binary forms, with or without
1233
+ modification, are permitted provided that the following conditions are
1234
+ met:
1235
+ * Redistributions of source code must retain the above copyright
1236
+ notice, this list of conditions and the following disclaimer.
1237
+ * Redistributions in binary form must reproduce the above
1238
+ copyright notice, this list of conditions and the following
1239
+ disclaimer in the documentation and/or other materials provided
1240
+ with the distribution.
1241
+ * Neither the name of the STFC nor the names of its contributors
1242
+ may be used to endorse or promote products derived from this
1243
+ software without specific prior written permission.
1244
+
1245
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1246
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1247
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1248
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE
1249
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1250
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1251
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
1252
+ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
1253
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
1254
+ OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
1255
+ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1256
+
1257
+ 10. Some of the cuBLAS library routines were written by or
1258
+ derived from code written by Ahmad M. Abdelfattah, David
1259
+ Keyes, and Hatem Ltaief, and are subject to the Apache
1260
+ License, Version 2.0, as follows:
1261
+
1262
+ -- (C) Copyright 2013 King Abdullah University of Science and Technology
1263
+ Authors:
1264
+ Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa)
1265
+ David Keyes (david.keyes@kaust.edu.sa)
1266
+ Hatem Ltaief (hatem.ltaief@kaust.edu.sa)
1267
+
1268
+ Redistribution and use in source and binary forms, with or without
1269
+ modification, are permitted provided that the following conditions
1270
+ are met:
1271
+
1272
+ * Redistributions of source code must retain the above copyright
1273
+ notice, this list of conditions and the following disclaimer.
1274
+ * Redistributions in binary form must reproduce the above copyright
1275
+ notice, this list of conditions and the following disclaimer in the
1276
+ documentation and/or other materials provided with the distribution.
1277
+ * Neither the name of the King Abdullah University of Science and
1278
+ Technology nor the names of its contributors may be used to endorse
1279
+ or promote products derived from this software without specific prior
1280
+ written permission.
1281
+
1282
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1283
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1284
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1285
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1286
+ HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1287
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1288
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1289
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1290
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1291
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1292
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
1293
+
1294
+ 11. Some of the cuSPARSE library routines were written by or
1295
+ derived from code written by Li-Wen Chang and are subject
1296
+ to the NCSA Open Source License as follows:
1297
+
1298
+ Copyright (c) 2012, University of Illinois.
1299
+
1300
+ All rights reserved.
1301
+
1302
+ Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu
1303
+
1304
+ Permission is hereby granted, free of charge, to any person obtaining
1305
+ a copy of this software and associated documentation files (the
1306
+ "Software"), to deal with the Software without restriction, including
1307
+ without limitation the rights to use, copy, modify, merge, publish,
1308
+ distribute, sublicense, and/or sell copies of the Software, and to
1309
+ permit persons to whom the Software is furnished to do so, subject to
1310
+ the following conditions:
1311
+ * Redistributions of source code must retain the above copyright
1312
+ notice, this list of conditions and the following disclaimer.
1313
+ * Redistributions in binary form must reproduce the above
1314
+ copyright notice, this list of conditions and the following
1315
+ disclaimers in the documentation and/or other materials provided
1316
+ with the distribution.
1317
+ * Neither the names of IMPACT Group, University of Illinois, nor
1318
+ the names of its contributors may be used to endorse or promote
1319
+ products derived from this Software without specific prior
1320
+ written permission.
1321
+
1322
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1323
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1324
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
1325
+ NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
1326
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
1327
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
1328
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
1329
+ SOFTWARE.
1330
+
1331
+ 12. Some of the cuRAND library routines were written by or
1332
+ derived from code written by Mutsuo Saito and Makoto
1333
+ Matsumoto and are subject to the following license:
1334
+
1335
+ Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
1336
+ University. All rights reserved.
1337
+
1338
+ Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
1339
+ University and University of Tokyo. All rights reserved.
1340
+
1341
+ Redistribution and use in source and binary forms, with or without
1342
+ modification, are permitted provided that the following conditions are
1343
+ met:
1344
+ * Redistributions of source code must retain the above copyright
1345
+ notice, this list of conditions and the following disclaimer.
1346
+ * Redistributions in binary form must reproduce the above
1347
+ copyright notice, this list of conditions and the following
1348
+ disclaimer in the documentation and/or other materials provided
1349
+ with the distribution.
1350
+ * Neither the name of the Hiroshima University nor the names of
1351
+ its contributors may be used to endorse or promote products
1352
+ derived from this software without specific prior written
1353
+ permission.
1354
+
1355
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1356
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1357
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1358
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1359
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1360
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1361
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1362
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1363
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1364
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1365
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1366
+
1367
+ 13. Some of the cuRAND library routines were derived from
1368
+ code developed by D. E. Shaw Research and are subject to
1369
+ the following license:
1370
+
1371
+ Copyright 2010-2011, D. E. Shaw Research.
1372
+
1373
+ All rights reserved.
1374
+
1375
+ Redistribution and use in source and binary forms, with or without
1376
+ modification, are permitted provided that the following conditions are
1377
+ met:
1378
+ * Redistributions of source code must retain the above copyright
1379
+ notice, this list of conditions, and the following disclaimer.
1380
+ * Redistributions in binary form must reproduce the above
1381
+ copyright notice, this list of conditions, and the following
1382
+ disclaimer in the documentation and/or other materials provided
1383
+ with the distribution.
1384
+ * Neither the name of D. E. Shaw Research nor the names of its
1385
+ contributors may be used to endorse or promote products derived
1386
+ from this software without specific prior written permission.
1387
+
1388
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1389
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1390
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1391
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1392
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1393
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1394
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1395
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1396
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1397
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1398
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1399
+
1400
+ 14. Some of the Math library routines were written by or
1401
+ derived from code developed by Norbert Juffa and are
1402
+ subject to the following license:
1403
+
1404
+ Copyright (c) 2015-2017, Norbert Juffa
1405
+ All rights reserved.
1406
+
1407
+ Redistribution and use in source and binary forms, with or without
1408
+ modification, are permitted provided that the following conditions
1409
+ are met:
1410
+
1411
+ 1. Redistributions of source code must retain the above copyright
1412
+ notice, this list of conditions and the following disclaimer.
1413
+
1414
+ 2. Redistributions in binary form must reproduce the above copyright
1415
+ notice, this list of conditions and the following disclaimer in the
1416
+ documentation and/or other materials provided with the distribution.
1417
+
1418
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1419
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1420
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1421
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1422
+ HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1423
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1424
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1425
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1426
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1427
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1428
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1429
+
1430
+ 15. Licensee's use of the lz4 third party component is
1431
+ subject to the following terms and conditions:
1432
+
1433
+ Copyright (C) 2011-2013, Yann Collet.
1434
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
1435
+
1436
+ Redistribution and use in source and binary forms, with or without
1437
+ modification, are permitted provided that the following conditions are
1438
+ met:
1439
+
1440
+ * Redistributions of source code must retain the above copyright
1441
+ notice, this list of conditions and the following disclaimer.
1442
+ * Redistributions in binary form must reproduce the above
1443
+ copyright notice, this list of conditions and the following disclaimer
1444
+ in the documentation and/or other materials provided with the
1445
+ distribution.
1446
+
1447
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1448
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1449
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1450
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1451
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1452
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
1453
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
1454
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
1455
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
1456
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
1457
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1458
+
1459
+ 16. The NPP library uses code from the Boost Math Toolkit,
1460
+ and is subject to the following license:
1461
+
1462
+ Boost Software License - Version 1.0 - August 17th, 2003
1463
+ . . . .
1464
+
1465
+ Permission is hereby granted, free of charge, to any person or
1466
+ organization obtaining a copy of the software and accompanying
1467
+ documentation covered by this license (the "Software") to use,
1468
+ reproduce, display, distribute, execute, and transmit the Software,
1469
+ and to prepare derivative works of the Software, and to permit
1470
+ third-parties to whom the Software is furnished to do so, all
1471
+ subject to the following:
1472
+
1473
+ The copyright notices in the Software and this entire statement,
1474
+ including the above license grant, this restriction and the following
1475
+ disclaimer, must be included in all copies of the Software, in whole
1476
+ or in part, and all derivative works of the Software, unless such
1477
+ copies or derivative works are solely in the form of machine-executable
1478
+ object code generated by a source language processor.
1479
+
1480
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
1481
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
1482
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
1483
+ NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
1484
+ ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
1485
+ OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
1486
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
1487
+ OTHER DEALINGS IN THE SOFTWARE.
1488
+
1489
+ 17. Portions of the Nsight Eclipse Edition is subject to the
1490
+ following license:
1491
+
1492
+ The Eclipse Foundation makes available all content in this plug-in
1493
+ ("Content"). Unless otherwise indicated below, the Content is provided
1494
+ to you under the terms and conditions of the Eclipse Public License
1495
+ Version 1.0 ("EPL"). A copy of the EPL is available at http://
1496
+ www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program"
1497
+ will mean the Content.
1498
+
1499
+ If you did not receive this Content directly from the Eclipse
1500
+ Foundation, the Content is being redistributed by another party
1501
+ ("Redistributor") and different terms and conditions may apply to your
1502
+ use of any object code in the Content. Check the Redistributor's
1503
+ license that was provided with the Content. If no such license exists,
1504
+ contact the Redistributor. Unless otherwise indicated below, the terms
1505
+ and conditions of the EPL still apply to any source code in the
1506
+ Content and such source code may be obtained at http://www.eclipse.org.
1507
+
1508
+ 18. Some of the cuBLAS library routines uses code from
1509
+ OpenAI, which is subject to the following license:
1510
+
1511
+ License URL
1512
+ https://github.com/openai/openai-gemm/blob/master/LICENSE
1513
+
1514
+ License Text
1515
+ The MIT License
1516
+
1517
+ Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc.
1518
+
1519
+ Permission is hereby granted, free of charge, to any person obtaining a copy
1520
+ of this software and associated documentation files (the "Software"), to deal
1521
+ in the Software without restriction, including without limitation the rights
1522
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1523
+ copies of the Software, and to permit persons to whom the Software is
1524
+ furnished to do so, subject to the following conditions:
1525
+
1526
+ The above copyright notice and this permission notice shall be included in
1527
+ all copies or substantial portions of the Software.
1528
+
1529
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1530
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1531
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1532
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1533
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1534
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
1535
+ THE SOFTWARE.
1536
+
1537
+ 19. Licensee's use of the Visual Studio Setup Configuration
1538
+ Samples is subject to the following license:
1539
+
1540
+ The MIT License (MIT)
1541
+ Copyright (C) Microsoft Corporation. All rights reserved.
1542
+
1543
+ Permission is hereby granted, free of charge, to any person
1544
+ obtaining a copy of this software and associated documentation
1545
+ files (the "Software"), to deal in the Software without restriction,
1546
+ including without limitation the rights to use, copy, modify, merge,
1547
+ publish, distribute, sublicense, and/or sell copies of the Software,
1548
+ and to permit persons to whom the Software is furnished to do so,
1549
+ subject to the following conditions:
1550
+
1551
+ The above copyright notice and this permission notice shall be included
1552
+ in all copies or substantial portions of the Software.
1553
+
1554
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
1555
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1556
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1557
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1558
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1559
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1560
+
1561
+ 20. Licensee's use of linmath.h header for CPU functions for
1562
+ GL vector/matrix operations from lunarG is subject to the
1563
+ Apache License Version 2.0.
1564
+
1565
+ 21. The DX12-CUDA sample uses the d3dx12.h header, which is
1566
+ subject to the MIT license .
1567
+
1568
+ -----------------
.venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/RECORD ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ nvidia/__pycache__/__init__.cpython-311.pyc,,
3
+ nvidia/cufft/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ nvidia/cufft/__pycache__/__init__.cpython-311.pyc,,
5
+ nvidia/cufft/include/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ nvidia/cufft/include/__pycache__/__init__.cpython-311.pyc,,
7
+ nvidia/cufft/include/cudalibxt.h,sha256=9GDuRiOzJuO61zRDhIpWpF7XHp8FXSOIlHJNoIMwOZQ,4105
8
+ nvidia/cufft/include/cufft.h,sha256=OPTrbN3YvHR2HZTy4Kr_azbFUz8ZGXAkmT_1ero1y3I,13109
9
+ nvidia/cufft/include/cufftXt.h,sha256=bTMo9ixYPn-FnrCw2VYZ2XVwDYT7N8WrRdXp4CmBilY,11148
10
+ nvidia/cufft/include/cufftw.h,sha256=Uzfj1IVMlLQU_G50u84hXYX1K95HLXIwOcjQoAg5pGE,20051
11
+ nvidia/cufft/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ nvidia/cufft/lib/__pycache__/__init__.cpython-311.pyc,,
13
+ nvidia/cufft/lib/libcufft.so.11,sha256=85IcQTOSUkJFnr_b95AdtOv65rvP_53FzlOx_xP7Qv8,292889192
14
+ nvidia/cufft/lib/libcufftw.so.11,sha256=IwelrPzMm0D5iThAOCGM_q1WTNQ2M3AdMMiTBH50T0Q,974888
15
+ nvidia_cufft_cu12-11.2.1.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
16
+ nvidia_cufft_cu12-11.2.1.3.dist-info/License.txt,sha256=rW9YU_ugyg0VnQ9Y1JrkmDDC-Mk_epJki5zpCttMbM0,59262
17
+ nvidia_cufft_cu12-11.2.1.3.dist-info/METADATA,sha256=e3c8JR1hTBAIlY96lfSibInmFGkkwNcYO6CExGuXQ6w,1502
18
+ nvidia_cufft_cu12-11.2.1.3.dist-info/RECORD,,
19
+ nvidia_cufft_cu12-11.2.1.3.dist-info/WHEEL,sha256=XDTs3wIbcE-BcRO08VJlZpA6z9OaC1mOKPCGGGwuM2g,109
20
+ nvidia_cufft_cu12-11.2.1.3.dist-info/top_level.txt,sha256=fTkAtiFuL16nUrB9ytDDtpytz2t0B4NvYTnRzwAhO14,7
.venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/WHEEL ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.42.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-manylinux2014_x86_64
5
+
.venv/lib/python3.11/site-packages/nvidia_cufft_cu12-11.2.1.3.dist-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ nvidia
.venv/lib/python3.11/site-packages/openai-1.61.1.dist-info/METADATA ADDED
@@ -0,0 +1,851 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: openai
3
+ Version: 1.61.1
4
+ Summary: The official Python library for the openai API
5
+ Project-URL: Homepage, https://github.com/openai/openai-python
6
+ Project-URL: Repository, https://github.com/openai/openai-python
7
+ Author-email: OpenAI <support@openai.com>
8
+ License-Expression: Apache-2.0
9
+ License-File: LICENSE
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: Apache Software License
12
+ Classifier: Operating System :: MacOS
13
+ Classifier: Operating System :: Microsoft :: Windows
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Operating System :: POSIX
16
+ Classifier: Operating System :: POSIX :: Linux
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
+ Classifier: Typing :: Typed
24
+ Requires-Python: >=3.8
25
+ Requires-Dist: anyio<5,>=3.5.0
26
+ Requires-Dist: distro<2,>=1.7.0
27
+ Requires-Dist: httpx<1,>=0.23.0
28
+ Requires-Dist: jiter<1,>=0.4.0
29
+ Requires-Dist: pydantic<3,>=1.9.0
30
+ Requires-Dist: sniffio
31
+ Requires-Dist: tqdm>4
32
+ Requires-Dist: typing-extensions<5,>=4.11
33
+ Provides-Extra: datalib
34
+ Requires-Dist: numpy>=1; extra == 'datalib'
35
+ Requires-Dist: pandas-stubs>=1.1.0.11; extra == 'datalib'
36
+ Requires-Dist: pandas>=1.2.3; extra == 'datalib'
37
+ Provides-Extra: realtime
38
+ Requires-Dist: websockets<15,>=13; extra == 'realtime'
39
+ Description-Content-Type: text/markdown
40
+
41
+ # OpenAI Python API library
42
+
43
+ [![PyPI version](https://img.shields.io/pypi/v/openai.svg)](https://pypi.org/project/openai/)
44
+
45
+ The OpenAI Python library provides convenient access to the OpenAI REST API from any Python 3.8+
46
+ application. The library includes type definitions for all request params and response fields,
47
+ and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
48
+
49
+ It is generated from our [OpenAPI specification](https://github.com/openai/openai-openapi) with [Stainless](https://stainlessapi.com/).
50
+
51
+ ## Documentation
52
+
53
+ The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs). The full API of this library can be found in [api.md](https://github.com/openai/openai-python/tree/main/api.md).
54
+
55
+ ## Installation
56
+
57
+ > [!IMPORTANT]
58
+ > The SDK was rewritten in v1, which was released November 6th 2023. See the [v1 migration guide](https://github.com/openai/openai-python/discussions/742), which includes scripts to automatically update your code.
59
+
60
+ ```sh
61
+ # install from PyPI
62
+ pip install openai
63
+ ```
64
+
65
+ ## Usage
66
+
67
+ The full API of this library can be found in [api.md](https://github.com/openai/openai-python/tree/main/api.md).
68
+
69
+ ```python
70
+ import os
71
+ from openai import OpenAI
72
+
73
+ client = OpenAI(
74
+ api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
75
+ )
76
+
77
+ chat_completion = client.chat.completions.create(
78
+ messages=[
79
+ {
80
+ "role": "user",
81
+ "content": "Say this is a test",
82
+ }
83
+ ],
84
+ model="gpt-4o",
85
+ )
86
+ ```
87
+
88
+ While you can provide an `api_key` keyword argument,
89
+ we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
90
+ to add `OPENAI_API_KEY="My API Key"` to your `.env` file
91
+ so that your API Key is not stored in source control.
92
+
93
+ ### Vision
94
+
95
+ With a hosted image:
96
+
97
+ ```python
98
+ response = client.chat.completions.create(
99
+ model="gpt-4o-mini",
100
+ messages=[
101
+ {
102
+ "role": "user",
103
+ "content": [
104
+ {"type": "text", "text": prompt},
105
+ {
106
+ "type": "image_url",
107
+ "image_url": {"url": f"{img_url}"},
108
+ },
109
+ ],
110
+ }
111
+ ],
112
+ )
113
+ ```
114
+
115
+ With the image as a base64 encoded string:
116
+
117
+ ```python
118
+ response = client.chat.completions.create(
119
+ model="gpt-4o-mini",
120
+ messages=[
121
+ {
122
+ "role": "user",
123
+ "content": [
124
+ {"type": "text", "text": prompt},
125
+ {
126
+ "type": "image_url",
127
+ "image_url": {"url": f"data:{img_type};base64,{img_b64_str}"},
128
+ },
129
+ ],
130
+ }
131
+ ],
132
+ )
133
+ ```
134
+
135
+ ### Polling Helpers
136
+
137
+ When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete. The SDK includes
138
+ helper functions which will poll the status until it reaches a terminal state and then return the resulting object.
139
+ If an API method results in an action that could benefit from polling there will be a corresponding version of the
140
+ method ending in '\_and_poll'.
141
+
142
+ For instance to create a Run and poll until it reaches a terminal state you can run:
143
+
144
+ ```python
145
+ run = client.beta.threads.runs.create_and_poll(
146
+ thread_id=thread.id,
147
+ assistant_id=assistant.id,
148
+ )
149
+ ```
150
+
151
+ More information on the lifecycle of a Run can be found in the [Run Lifecycle Documentation](https://platform.openai.com/docs/assistants/how-it-works/run-lifecycle)
152
+
153
+ ### Bulk Upload Helpers
154
+
155
+ When creating and interacting with vector stores, you can use polling helpers to monitor the status of operations.
156
+ For convenience, we also provide a bulk upload helper to allow you to simultaneously upload several files at once.
157
+
158
+ ```python
159
+ sample_files = [Path("sample-paper.pdf"), ...]
160
+
161
+ batch = await client.vector_stores.file_batches.upload_and_poll(
162
+ store.id,
163
+ files=sample_files,
164
+ )
165
+ ```
166
+
167
+ ### Streaming Helpers
168
+
169
+ The SDK also includes helpers to process streams and handle incoming events.
170
+
171
+ ```python
172
+ with client.beta.threads.runs.stream(
173
+ thread_id=thread.id,
174
+ assistant_id=assistant.id,
175
+ instructions="Please address the user as Jane Doe. The user has a premium account.",
176
+ ) as stream:
177
+ for event in stream:
178
+ # Print the text from text delta events
179
+ if event.type == "thread.message.delta" and event.data.delta.content:
180
+ print(event.data.delta.content[0].text)
181
+ ```
182
+
183
+ More information on streaming helpers can be found in the dedicated documentation: [helpers.md](https://github.com/openai/openai-python/tree/main/helpers.md)
184
+
185
+ ## Async usage
186
+
187
+ Simply import `AsyncOpenAI` instead of `OpenAI` and use `await` with each API call:
188
+
189
+ ```python
190
+ import os
191
+ import asyncio
192
+ from openai import AsyncOpenAI
193
+
194
+ client = AsyncOpenAI(
195
+ api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
196
+ )
197
+
198
+
199
+ async def main() -> None:
200
+ chat_completion = await client.chat.completions.create(
201
+ messages=[
202
+ {
203
+ "role": "user",
204
+ "content": "Say this is a test",
205
+ }
206
+ ],
207
+ model="gpt-4o",
208
+ )
209
+
210
+
211
+ asyncio.run(main())
212
+ ```
213
+
214
+ Functionality between the synchronous and asynchronous clients is otherwise identical.
215
+
216
+ ## Streaming responses
217
+
218
+ We provide support for streaming responses using Server Side Events (SSE).
219
+
220
+ ```python
221
+ from openai import OpenAI
222
+
223
+ client = OpenAI()
224
+
225
+ stream = client.chat.completions.create(
226
+ messages=[
227
+ {
228
+ "role": "user",
229
+ "content": "Say this is a test",
230
+ }
231
+ ],
232
+ model="gpt-4o",
233
+ stream=True,
234
+ )
235
+ for chunk in stream:
236
+ print(chunk.choices[0].delta.content or "", end="")
237
+ ```
238
+
239
+ The async client uses the exact same interface.
240
+
241
+ ```python
242
+ import asyncio
243
+ from openai import AsyncOpenAI
244
+
245
+ client = AsyncOpenAI()
246
+
247
+
248
+ async def main():
249
+ stream = await client.chat.completions.create(
250
+ model="gpt-4",
251
+ messages=[{"role": "user", "content": "Say this is a test"}],
252
+ stream=True,
253
+ )
254
+ async for chunk in stream:
255
+ print(chunk.choices[0].delta.content or "", end="")
256
+
257
+
258
+ asyncio.run(main())
259
+ ```
260
+
261
+ ## Module-level client
262
+
263
+ > [!IMPORTANT]
264
+ > We highly recommend instantiating client instances instead of relying on the global client.
265
+
266
+ We also expose a global client instance that is accessible in a similar fashion to versions prior to v1.
267
+
268
+ ```py
269
+ import openai
270
+
271
+ # optional; defaults to `os.environ['OPENAI_API_KEY']`
272
+ openai.api_key = '...'
273
+
274
+ # all client options can be configured just like the `OpenAI` instantiation counterpart
275
+ openai.base_url = "https://..."
276
+ openai.default_headers = {"x-foo": "true"}
277
+
278
+ completion = openai.chat.completions.create(
279
+ model="gpt-4o",
280
+ messages=[
281
+ {
282
+ "role": "user",
283
+ "content": "How do I output all files in a directory using Python?",
284
+ },
285
+ ],
286
+ )
287
+ print(completion.choices[0].message.content)
288
+ ```
289
+
290
+ The API is the exact same as the standard client instance-based API.
291
+
292
+ This is intended to be used within REPLs or notebooks for faster iteration, **not** in application code.
293
+
294
+ We recommend that you always instantiate a client (e.g., with `client = OpenAI()`) in application code because:
295
+
296
+ - It can be difficult to reason about where client options are configured
297
+ - It's not possible to change certain client options without potentially causing race conditions
298
+ - It's harder to mock for testing purposes
299
+ - It's not possible to control cleanup of network connections
300
+
301
+ ## Realtime API beta
302
+
303
+ The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as [function calling](https://platform.openai.com/docs/guides/function-calling) through a WebSocket connection.
304
+
305
+ Under the hood the SDK uses the [`websockets`](https://websockets.readthedocs.io/en/stable/) library to manage connections.
306
+
307
+ The Realtime API works through a combination of client-sent events and server-sent events. Clients can send events to do things like update session configuration or send text and audio inputs. Server events confirm when audio responses have completed, or when a text response from the model has been received. A full event reference can be found [here](https://platform.openai.com/docs/api-reference/realtime-client-events) and a guide can be found [here](https://platform.openai.com/docs/guides/realtime).
308
+
309
+ Basic text based example:
310
+
311
+ ```py
312
+ import asyncio
313
+ from openai import AsyncOpenAI
314
+
315
+ async def main():
316
+ client = AsyncOpenAI()
317
+
318
+ async with client.beta.realtime.connect(model="gpt-4o-realtime-preview") as connection:
319
+ await connection.session.update(session={'modalities': ['text']})
320
+
321
+ await connection.conversation.item.create(
322
+ item={
323
+ "type": "message",
324
+ "role": "user",
325
+ "content": [{"type": "input_text", "text": "Say hello!"}],
326
+ }
327
+ )
328
+ await connection.response.create()
329
+
330
+ async for event in connection:
331
+ if event.type == 'response.text.delta':
332
+ print(event.delta, flush=True, end="")
333
+
334
+ elif event.type == 'response.text.done':
335
+ print()
336
+
337
+ elif event.type == "response.done":
338
+ break
339
+
340
+ asyncio.run(main())
341
+ ```
342
+
343
+ However the real magic of the Realtime API is handling audio inputs / outputs, see this example [TUI script](https://github.com/openai/openai-python/blob/main/examples/realtime/push_to_talk_app.py) for a fully fledged example.
344
+
345
+ ### Realtime error handling
346
+
347
+ Whenever an error occurs, the Realtime API will send an [`error` event](https://platform.openai.com/docs/guides/realtime-model-capabilities#error-handling) and the connection will stay open and remain usable. This means you need to handle it yourself, as *no errors are raised directly* by the SDK when an `error` event comes in.
348
+
349
+ ```py
350
+ client = AsyncOpenAI()
351
+
352
+ async with client.beta.realtime.connect(model="gpt-4o-realtime-preview") as connection:
353
+ ...
354
+ async for event in connection:
355
+ if event.type == 'error':
356
+ print(event.error.type)
357
+ print(event.error.code)
358
+ print(event.error.event_id)
359
+ print(event.error.message)
360
+ ```
361
+
362
+ ## Using types
363
+
364
+ Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
365
+
366
+ - Serializing back into JSON, `model.to_json()`
367
+ - Converting to a dictionary, `model.to_dict()`
368
+
369
+ Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
370
+
371
+ ## Pagination
372
+
373
+ List methods in the OpenAI API are paginated.
374
+
375
+ This library provides auto-paginating iterators with each list response, so you do not have to request successive pages manually:
376
+
377
+ ```python
378
+ from openai import OpenAI
379
+
380
+ client = OpenAI()
381
+
382
+ all_jobs = []
383
+ # Automatically fetches more pages as needed.
384
+ for job in client.fine_tuning.jobs.list(
385
+ limit=20,
386
+ ):
387
+ # Do something with job here
388
+ all_jobs.append(job)
389
+ print(all_jobs)
390
+ ```
391
+
392
+ Or, asynchronously:
393
+
394
+ ```python
395
+ import asyncio
396
+ from openai import AsyncOpenAI
397
+
398
+ client = AsyncOpenAI()
399
+
400
+
401
+ async def main() -> None:
402
+ all_jobs = []
403
+ # Iterate through items across all pages, issuing requests as needed.
404
+ async for job in client.fine_tuning.jobs.list(
405
+ limit=20,
406
+ ):
407
+ all_jobs.append(job)
408
+ print(all_jobs)
409
+
410
+
411
+ asyncio.run(main())
412
+ ```
413
+
414
+ Alternatively, you can use the `.has_next_page()`, `.next_page_info()`, or `.get_next_page()` methods for more granular control working with pages:
415
+
416
+ ```python
417
+ first_page = await client.fine_tuning.jobs.list(
418
+ limit=20,
419
+ )
420
+ if first_page.has_next_page():
421
+ print(f"will fetch next page using these details: {first_page.next_page_info()}")
422
+ next_page = await first_page.get_next_page()
423
+ print(f"number of items we just fetched: {len(next_page.data)}")
424
+
425
+ # Remove `await` for non-async usage.
426
+ ```
427
+
428
+ Or just work directly with the returned data:
429
+
430
+ ```python
431
+ first_page = await client.fine_tuning.jobs.list(
432
+ limit=20,
433
+ )
434
+
435
+ print(f"next page cursor: {first_page.after}") # => "next page cursor: ..."
436
+ for job in first_page.data:
437
+ print(job.id)
438
+
439
+ # Remove `await` for non-async usage.
440
+ ```
441
+
442
+ ## Nested params
443
+
444
+ Nested parameters are dictionaries, typed using `TypedDict`, for example:
445
+
446
+ ```python
447
+ from openai import OpenAI
448
+
449
+ client = OpenAI()
450
+
451
+ completion = client.chat.completions.create(
452
+ messages=[
453
+ {
454
+ "role": "user",
455
+ "content": "Can you generate an example json object describing a fruit?",
456
+ }
457
+ ],
458
+ model="gpt-4o",
459
+ response_format={"type": "json_object"},
460
+ )
461
+ ```
462
+
463
+ ## File uploads
464
+
465
+ Request parameters that correspond to file uploads can be passed as `bytes`, a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
466
+
467
+ ```python
468
+ from pathlib import Path
469
+ from openai import OpenAI
470
+
471
+ client = OpenAI()
472
+
473
+ client.files.create(
474
+ file=Path("input.jsonl"),
475
+ purpose="fine-tune",
476
+ )
477
+ ```
478
+
479
+ The async client uses the exact same interface. If you pass a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance, the file contents will be read asynchronously automatically.
480
+
481
+ ## Handling errors
482
+
483
+ When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `openai.APIConnectionError` is raised.
484
+
485
+ When the API returns a non-success status code (that is, 4xx or 5xx
486
+ response), a subclass of `openai.APIStatusError` is raised, containing `status_code` and `response` properties.
487
+
488
+ All errors inherit from `openai.APIError`.
489
+
490
+ ```python
491
+ import openai
492
+ from openai import OpenAI
493
+
494
+ client = OpenAI()
495
+
496
+ try:
497
+ client.fine_tuning.jobs.create(
498
+ model="gpt-4o",
499
+ training_file="file-abc123",
500
+ )
501
+ except openai.APIConnectionError as e:
502
+ print("The server could not be reached")
503
+ print(e.__cause__) # an underlying Exception, likely raised within httpx.
504
+ except openai.RateLimitError as e:
505
+ print("A 429 status code was received; we should back off a bit.")
506
+ except openai.APIStatusError as e:
507
+ print("Another non-200-range status code was received")
508
+ print(e.status_code)
509
+ print(e.response)
510
+ ```
511
+
512
+ Error codes are as follows:
513
+
514
+ | Status Code | Error Type |
515
+ | ----------- | -------------------------- |
516
+ | 400 | `BadRequestError` |
517
+ | 401 | `AuthenticationError` |
518
+ | 403 | `PermissionDeniedError` |
519
+ | 404 | `NotFoundError` |
520
+ | 422 | `UnprocessableEntityError` |
521
+ | 429 | `RateLimitError` |
522
+ | >=500 | `InternalServerError` |
523
+ | N/A | `APIConnectionError` |
524
+
525
+ ## Request IDs
526
+
527
+ > For more information on debugging requests, see [these docs](https://platform.openai.com/docs/api-reference/debugging-requests)
528
+
529
+ All object responses in the SDK provide a `_request_id` property which is added from the `x-request-id` response header so that you can quickly log failing requests and report them back to OpenAI.
530
+
531
+ ```python
532
+ completion = await client.chat.completions.create(
533
+ messages=[{"role": "user", "content": "Say this is a test"}], model="gpt-4"
534
+ )
535
+ print(completion._request_id) # req_123
536
+ ```
537
+
538
+ Note that unlike other properties that use an `_` prefix, the `_request_id` property
539
+ *is* public. Unless documented otherwise, *all* other `_` prefix properties,
540
+ methods and modules are *private*.
541
+
542
+ > [!IMPORTANT]
543
+ > If you need to access request IDs for failed requests you must catch the `APIStatusError` exception
544
+
545
+ ```python
546
+ import openai
547
+
548
+ try:
549
+ completion = await client.chat.completions.create(
550
+ messages=[{"role": "user", "content": "Say this is a test"}], model="gpt-4"
551
+ )
552
+ except openai.APIStatusError as exc:
553
+ print(exc.request_id) # req_123
554
+ raise exc
555
+ ```
556
+
557
+
558
+ ### Retries
559
+
560
+ Certain errors are automatically retried 2 times by default, with a short exponential backoff.
561
+ Connection errors (for example, due to a network connectivity problem), 408 Request Timeout, 409 Conflict,
562
+ 429 Rate Limit, and >=500 Internal errors are all retried by default.
563
+
564
+ You can use the `max_retries` option to configure or disable retry settings:
565
+
566
+ ```python
567
+ from openai import OpenAI
568
+
569
+ # Configure the default for all requests:
570
+ client = OpenAI(
571
+ # default is 2
572
+ max_retries=0,
573
+ )
574
+
575
+ # Or, configure per-request:
576
+ client.with_options(max_retries=5).chat.completions.create(
577
+ messages=[
578
+ {
579
+ "role": "user",
580
+ "content": "How can I get the name of the current day in JavaScript?",
581
+ }
582
+ ],
583
+ model="gpt-4o",
584
+ )
585
+ ```
586
+
587
+ ### Timeouts
588
+
589
+ By default requests time out after 10 minutes. You can configure this with a `timeout` option,
590
+ which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/timeouts/#fine-tuning-the-configuration) object:
591
+
592
+ ```python
593
+ from openai import OpenAI
594
+
595
+ # Configure the default for all requests:
596
+ client = OpenAI(
597
+ # 20 seconds (default is 10 minutes)
598
+ timeout=20.0,
599
+ )
600
+
601
+ # More granular control:
602
+ client = OpenAI(
603
+ timeout=httpx.Timeout(60.0, read=5.0, write=10.0, connect=2.0),
604
+ )
605
+
606
+ # Override per-request:
607
+ client.with_options(timeout=5.0).chat.completions.create(
608
+ messages=[
609
+ {
610
+ "role": "user",
611
+ "content": "How can I list all files in a directory using Python?",
612
+ }
613
+ ],
614
+ model="gpt-4o",
615
+ )
616
+ ```
617
+
618
+ On timeout, an `APITimeoutError` is thrown.
619
+
620
+ Note that requests that time out are [retried twice by default](https://github.com/openai/openai-python/tree/main/#retries).
621
+
622
+ ## Advanced
623
+
624
+ ### Logging
625
+
626
+ We use the standard library [`logging`](https://docs.python.org/3/library/logging.html) module.
627
+
628
+ You can enable logging by setting the environment variable `OPENAI_LOG` to `info`.
629
+
630
+ ```shell
631
+ $ export OPENAI_LOG=info
632
+ ```
633
+
634
+ Or to `debug` for more verbose logging.
635
+
636
+ ### How to tell whether `None` means `null` or missing
637
+
638
+ In an API response, a field may be explicitly `null`, or missing entirely; in either case, its value is `None` in this library. You can differentiate the two cases with `.model_fields_set`:
639
+
640
+ ```py
641
+ if response.my_field is None:
642
+ if 'my_field' not in response.model_fields_set:
643
+ print('Got json like {}, without a "my_field" key present at all.')
644
+ else:
645
+ print('Got json like {"my_field": null}.')
646
+ ```
647
+
648
+ ### Accessing raw response data (e.g. headers)
649
+
650
+ The "raw" Response object can be accessed by prefixing `.with_raw_response.` to any HTTP method call, e.g.,
651
+
652
+ ```py
653
+ from openai import OpenAI
654
+
655
+ client = OpenAI()
656
+ response = client.chat.completions.with_raw_response.create(
657
+ messages=[{
658
+ "role": "user",
659
+ "content": "Say this is a test",
660
+ }],
661
+ model="gpt-4o",
662
+ )
663
+ print(response.headers.get('X-My-Header'))
664
+
665
+ completion = response.parse() # get the object that `chat.completions.create()` would have returned
666
+ print(completion)
667
+ ```
668
+
669
+ These methods return a [`LegacyAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_legacy_response.py) object. This is a legacy class as we're changing it slightly in the next major version.
670
+
671
+ For the sync client this will mostly be the same with the exception
672
+ of `content` & `text` will be methods instead of properties. In the
673
+ async client, all methods will be async.
674
+
675
+ A migration script will be provided & the migration in general should
676
+ be smooth.
677
+
678
+ #### `.with_streaming_response`
679
+
680
+ The above interface eagerly reads the full response body when you make the request, which may not always be what you want.
681
+
682
+ To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
683
+
684
+ As such, `.with_streaming_response` methods return a different [`APIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object, and the async client returns an [`AsyncAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_response.py) object.
685
+
686
+ ```python
687
+ with client.chat.completions.with_streaming_response.create(
688
+ messages=[
689
+ {
690
+ "role": "user",
691
+ "content": "Say this is a test",
692
+ }
693
+ ],
694
+ model="gpt-4o",
695
+ ) as response:
696
+ print(response.headers.get("X-My-Header"))
697
+
698
+ for line in response.iter_lines():
699
+ print(line)
700
+ ```
701
+
702
+ The context manager is required so that the response will reliably be closed.
703
+
704
+ ### Making custom/undocumented requests
705
+
706
+ This library is typed for convenient access to the documented API.
707
+
708
+ If you need to access undocumented endpoints, params, or response properties, the library can still be used.
709
+
710
+ #### Undocumented endpoints
711
+
712
+ To make requests to undocumented endpoints, you can make requests using `client.get`, `client.post`, and other
713
+ http verbs. Options on the client will be respected (such as retries) when making this request.
714
+
715
+ ```py
716
+ import httpx
717
+
718
+ response = client.post(
719
+ "/foo",
720
+ cast_to=httpx.Response,
721
+ body={"my_param": True},
722
+ )
723
+
724
+ print(response.headers.get("x-foo"))
725
+ ```
726
+
727
+ #### Undocumented request params
728
+
729
+ If you want to explicitly send an extra param, you can do so with the `extra_query`, `extra_body`, and `extra_headers` request
730
+ options.
731
+
732
+ #### Undocumented response properties
733
+
734
+ To access undocumented response properties, you can access the extra fields like `response.unknown_prop`. You
735
+ can also get all the extra fields on the Pydantic model as a dict with
736
+ [`response.model_extra`](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel.model_extra).
737
+
738
+ ### Configuring the HTTP client
739
+
740
+ You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
741
+
742
+ - Support for [proxies](https://www.python-httpx.org/advanced/proxies/)
743
+ - Custom [transports](https://www.python-httpx.org/advanced/transports/)
744
+ - Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality
745
+
746
+ ```python
747
+ import httpx
748
+ from openai import OpenAI, DefaultHttpxClient
749
+
750
+ client = OpenAI(
751
+ # Or use the `OPENAI_BASE_URL` env var
752
+ base_url="http://my.test.server.example.com:8083/v1",
753
+ http_client=DefaultHttpxClient(
754
+ proxy="http://my.test.proxy.example.com",
755
+ transport=httpx.HTTPTransport(local_address="0.0.0.0"),
756
+ ),
757
+ )
758
+ ```
759
+
760
+ You can also customize the client on a per-request basis by using `with_options()`:
761
+
762
+ ```python
763
+ client.with_options(http_client=DefaultHttpxClient(...))
764
+ ```
765
+
766
+ ### Managing HTTP resources
767
+
768
+ By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
769
+
770
+ ```py
771
+ from openai import OpenAI
772
+
773
+ with OpenAI() as client:
774
+ # make requests here
775
+ ...
776
+
777
+ # HTTP client is now closed
778
+ ```
779
+
780
+ ## Microsoft Azure OpenAI
781
+
782
+ To use this library with [Azure OpenAI](https://learn.microsoft.com/azure/ai-services/openai/overview), use the `AzureOpenAI`
783
+ class instead of the `OpenAI` class.
784
+
785
+ > [!IMPORTANT]
786
+ > The Azure API shape differs from the core API shape which means that the static types for responses / params
787
+ > won't always be correct.
788
+
789
+ ```py
790
+ from openai import AzureOpenAI
791
+
792
+ # gets the API Key from environment variable AZURE_OPENAI_API_KEY
793
+ client = AzureOpenAI(
794
+ # https://learn.microsoft.com/azure/ai-services/openai/reference#rest-api-versioning
795
+ api_version="2023-07-01-preview",
796
+ # https://learn.microsoft.com/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
797
+ azure_endpoint="https://example-endpoint.openai.azure.com",
798
+ )
799
+
800
+ completion = client.chat.completions.create(
801
+ model="deployment-name", # e.g. gpt-35-instant
802
+ messages=[
803
+ {
804
+ "role": "user",
805
+ "content": "How do I output all files in a directory using Python?",
806
+ },
807
+ ],
808
+ )
809
+ print(completion.to_json())
810
+ ```
811
+
812
+ In addition to the options provided in the base `OpenAI` client, the following options are provided:
813
+
814
+ - `azure_endpoint` (or the `AZURE_OPENAI_ENDPOINT` environment variable)
815
+ - `azure_deployment`
816
+ - `api_version` (or the `OPENAI_API_VERSION` environment variable)
817
+ - `azure_ad_token` (or the `AZURE_OPENAI_AD_TOKEN` environment variable)
818
+ - `azure_ad_token_provider`
819
+
820
+ An example of using the client with Microsoft Entra ID (formerly known as Azure Active Directory) can be found [here](https://github.com/openai/openai-python/blob/main/examples/azure_ad.py).
821
+
822
+ ## Versioning
823
+
824
+ This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:
825
+
826
+ 1. Changes that only affect static types, without breaking runtime behavior.
827
+ 2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals.)_
828
+ 3. Changes that we do not expect to impact the vast majority of users in practice.
829
+
830
+ We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
831
+
832
+ We are keen for your feedback; please open an [issue](https://www.github.com/openai/openai-python/issues) with questions, bugs, or suggestions.
833
+
834
+ ### Determining the installed version
835
+
836
+ If you've upgraded to the latest version but aren't seeing any new features you were expecting then your python environment is likely still using an older version.
837
+
838
+ You can determine the version that is being used at runtime with:
839
+
840
+ ```py
841
+ import openai
842
+ print(openai.__version__)
843
+ ```
844
+
845
+ ## Requirements
846
+
847
+ Python 3.8 or higher.
848
+
849
+ ## Contributing
850
+
851
+ See [the contributing documentation](https://github.com/openai/openai-python/tree/main/./CONTRIBUTING.md).
.venv/lib/python3.11/site-packages/openai-1.61.1.dist-info/licenses/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2025 OpenAI
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
.venv/lib/python3.11/site-packages/smart_open/__init__.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2019 Radim Rehurek <me@radimrehurek.com>
4
+ #
5
+ # This code is distributed under the terms and conditions
6
+ # from the MIT License (MIT).
7
+ #
8
+
9
+ """
10
+ Utilities for streaming to/from several file-like data storages: S3 / HDFS / local
11
+ filesystem / compressed files, and many more, using a simple, Pythonic API.
12
+
13
+ The streaming makes heavy use of generators and pipes, to avoid loading
14
+ full file contents into memory, allowing work with arbitrarily large files.
15
+
16
+ The main functions are:
17
+
18
+ * `open()`, which opens the given file for reading/writing
19
+ * `parse_uri()`
20
+ * `s3_iter_bucket()`, which goes over all keys in an S3 bucket in parallel
21
+ * `register_compressor()`, which registers callbacks for transparent compressor handling
22
+
23
+ """
24
+
25
+ import logging
26
+
27
+ #
28
+ # Prevent regression of #474 and #475
29
+ #
30
+ logger = logging.getLogger(__name__)
31
+ logger.addHandler(logging.NullHandler())
32
+
33
+ from smart_open import version # noqa: E402
34
+ from .smart_open_lib import open, parse_uri, smart_open, register_compressor # noqa: E402
35
+
36
+ _WARNING = """smart_open.s3_iter_bucket is deprecated and will stop functioning
37
+ in a future version. Please import iter_bucket from the smart_open.s3 module instead:
38
+
39
+ from smart_open.s3 import iter_bucket as s3_iter_bucket
40
+
41
+ """
42
+ _WARNED = False
43
+
44
+
45
+ def s3_iter_bucket(
46
+ bucket_name,
47
+ prefix='',
48
+ accept_key=None,
49
+ key_limit=None,
50
+ workers=16,
51
+ retries=3,
52
+ **session_kwargs
53
+ ):
54
+ """Deprecated. Use smart_open.s3.iter_bucket instead."""
55
+ global _WARNED
56
+ from .s3 import iter_bucket
57
+ if not _WARNED:
58
+ logger.warning(_WARNING)
59
+ _WARNED = True
60
+ return iter_bucket(
61
+ bucket_name=bucket_name,
62
+ prefix=prefix,
63
+ accept_key=accept_key,
64
+ key_limit=key_limit,
65
+ workers=workers,
66
+ retries=retries,
67
+ session_kwargs=session_kwargs
68
+ )
69
+
70
+
71
+ __all__ = [
72
+ 'open',
73
+ 'parse_uri',
74
+ 'register_compressor',
75
+ 's3_iter_bucket',
76
+ 'smart_open',
77
+ ]
78
+
79
+ __version__ = version.__version__
.venv/lib/python3.11/site-packages/smart_open/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (2.2 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/azure.cpython-311.pyc ADDED
Binary file (23.6 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/bytebuffer.cpython-311.pyc ADDED
Binary file (7.51 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/compression.cpython-311.pyc ADDED
Binary file (6.5 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/concurrency.cpython-311.pyc ADDED
Binary file (4.3 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/constants.cpython-311.pyc ADDED
Binary file (498 Bytes). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/doctools.cpython-311.pyc ADDED
Binary file (11 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/ftp.cpython-311.pyc ADDED
Binary file (7.1 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/gcs.cpython-311.pyc ADDED
Binary file (5.93 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/hdfs.cpython-311.pyc ADDED
Binary file (7.75 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/http.cpython-311.pyc ADDED
Binary file (14.4 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/local_file.cpython-311.pyc ADDED
Binary file (1.5 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/s3.cpython-311.pyc ADDED
Binary file (52.9 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/smart_open_lib.cpython-311.pyc ADDED
Binary file (16 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/ssh.cpython-311.pyc ADDED
Binary file (10 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/transport.cpython-311.pyc ADDED
Binary file (4.11 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/utils.cpython-311.pyc ADDED
Binary file (10.2 kB). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/version.cpython-311.pyc ADDED
Binary file (306 Bytes). View file
 
.venv/lib/python3.11/site-packages/smart_open/__pycache__/webhdfs.cpython-311.pyc ADDED
Binary file (14.1 kB). View file