Brunobkr commited on
Commit
b594be6
·
verified ·
1 Parent(s): 227920e

Upload 5 files

Browse files

ENJOY QUANTIS GGUF LLM HELICOIDAL ** ΩFFΣLLIα_Quantis **LLAMA LLM # QUANTIZAÇÃO GEOMÉTRICA PARA MODELOS LLaMA (LLM)

https://zenodo.org/records/18529943

Files changed (5) hide show
  1. __init__.py +9 -0
  2. gguf.py +15 -0
  3. gguf_reader.py +371 -0
  4. gguf_writer.py +1276 -0
  5. llama-webui-clone.zip +3 -0
__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from .constants import *
2
+ from .lazy import *
3
+ from .gguf_reader import *
4
+ from .gguf_writer import *
5
+ from .tensor_mapping import *
6
+ from .vocab import *
7
+ from .utility import *
8
+ from .metadata import *
9
+ from gguf.quants import HelicoidalZetaCore # Importação necessária!
gguf.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file left for compatibility. If you want to use the GGUF API from Python
2
+ # then don't import gguf/gguf.py directly. If you're looking for examples, see the
3
+ # examples/ directory for gguf-py
4
+
5
+ import importlib
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ sys.path.insert(0, str(Path(__file__).parent.parent))
10
+
11
+ # Compatibility for people trying to import gguf/gguf.py directly instead of as a package.
12
+ importlib.invalidate_caches()
13
+ import gguf # noqa: E402
14
+
15
+ importlib.reload(gguf)
gguf_reader.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #BRUNO BECKER / OFFELLIA 2026
2
+ #brunoconta1980@gmail.com
3
+ #brunoconta1980@hotmail.com
4
+ # X @Brunoxuser
5
+
6
+ #
7
+ # GGUF file reading/modification support. For API usage information,
8
+ # please see the files scripts/ for some fairly simple examples.
9
+ #
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ import os
14
+ import sys
15
+ from collections import OrderedDict
16
+ from typing import Any, Literal, NamedTuple, TypeVar, Union
17
+
18
+ import numpy as np
19
+ import numpy.typing as npt
20
+
21
+ from .quants import quant_shape_to_byte_shape
22
+
23
+ if __name__ == "__main__":
24
+ from pathlib import Path
25
+
26
+ # Allow running file in package as a script.
27
+ sys.path.insert(0, str(Path(__file__).parent.parent))
28
+
29
+ from gguf.constants import (
30
+ GGML_QUANT_SIZES,
31
+ GGUF_DEFAULT_ALIGNMENT,
32
+ GGUF_MAGIC,
33
+ GGUF_VERSION,
34
+ GGMLQuantizationType,
35
+ GGUFValueType,
36
+ GGUFEndian,
37
+ )
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+ READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
42
+
43
+
44
+ class ReaderField(NamedTuple):
45
+ # Offset to start of this field.
46
+ offset: int
47
+
48
+ # Name of the field (not necessarily from file data).
49
+ name: str
50
+
51
+ # Data parts. Some types have multiple components, such as strings
52
+ # that consist of a length followed by the string data.
53
+ parts: list[npt.NDArray[Any]] = []
54
+
55
+ # Indexes into parts that we can call the actual data. For example
56
+ # an array of strings will be populated with indexes to the actual
57
+ # string data.
58
+ data: list[int] = [-1]
59
+
60
+ types: list[GGUFValueType] = []
61
+
62
+ def contents(self, index_or_slice: int | slice = slice(None)) -> Any:
63
+ if self.types:
64
+ to_string = lambda x: str(x.tobytes(), encoding='utf-8') # noqa: E731
65
+ main_type = self.types[0]
66
+
67
+ if main_type == GGUFValueType.ARRAY:
68
+ sub_type = self.types[-1]
69
+
70
+ if sub_type == GGUFValueType.STRING:
71
+ indices = self.data[index_or_slice]
72
+
73
+ if isinstance(index_or_slice, int):
74
+ return to_string(self.parts[indices]) # type: ignore
75
+ else:
76
+ return [to_string(self.parts[idx]) for idx in indices] # type: ignore
77
+ else:
78
+ # FIXME: When/if _get_field_parts() support multi-dimensional arrays, this must do so too
79
+
80
+ # Check if it's unsafe to perform slice optimization on data
81
+ # if any(True for idx in self.data if len(self.parts[idx]) != 1):
82
+ # optim_slice = slice(None)
83
+ # else:
84
+ # optim_slice = index_or_slice
85
+ # index_or_slice = slice(None)
86
+
87
+ # if isinstance(optim_slice, int):
88
+ # return self.parts[self.data[optim_slice]].tolist()[0]
89
+ # else:
90
+ # return [pv for idx in self.data[optim_slice] for pv in self.parts[idx].tolist()][index_or_slice]
91
+
92
+ if isinstance(index_or_slice, int):
93
+ return self.parts[self.data[index_or_slice]].tolist()[0]
94
+ else:
95
+ return [pv for idx in self.data[index_or_slice] for pv in self.parts[idx].tolist()]
96
+
97
+ if main_type == GGUFValueType.STRING:
98
+ return to_string(self.parts[-1])
99
+ else:
100
+ return self.parts[-1].tolist()[0]
101
+
102
+ return None
103
+
104
+
105
+ class ReaderTensor(NamedTuple):
106
+ name: str
107
+ tensor_type: GGMLQuantizationType
108
+ shape: npt.NDArray[np.uint32]
109
+ n_elements: int
110
+ n_bytes: int
111
+ data_offset: int
112
+ data: npt.NDArray[Any]
113
+ field: ReaderField
114
+
115
+
116
+ class GGUFReader:
117
+ # I - same as host, S - swapped
118
+ byte_order: Literal['I', 'S'] = 'I'
119
+ alignment: int = GGUF_DEFAULT_ALIGNMENT
120
+ data_offset: int
121
+
122
+ # Note: Internal helper, API may change.
123
+ gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = {
124
+ GGUFValueType.UINT8: np.uint8,
125
+ GGUFValueType.INT8: np.int8,
126
+ GGUFValueType.UINT16: np.uint16,
127
+ GGUFValueType.INT16: np.int16,
128
+ GGUFValueType.UINT32: np.uint32,
129
+ GGUFValueType.INT32: np.int32,
130
+ GGUFValueType.FLOAT32: np.float32,
131
+ GGUFValueType.UINT64: np.uint64,
132
+ GGUFValueType.INT64: np.int64,
133
+ GGUFValueType.FLOAT64: np.float64,
134
+ GGUFValueType.BOOL: np.bool_,
135
+ }
136
+
137
+ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r', 'r+', 'c'] = 'r'):
138
+ self.data = np.memmap(path, mode = mode)
139
+ offs = 0
140
+
141
+ # Check for GGUF magic
142
+ if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
143
+ raise ValueError('GGUF magic invalid')
144
+ offs += 4
145
+
146
+ # Check GGUF version
147
+ temp_version = self._get(offs, np.uint32)
148
+ if temp_version[0] & 65535 == 0:
149
+ # If we get 0 here that means it's (probably) a GGUF file created for
150
+ # the opposite byte order of the machine this script is running on.
151
+ self.byte_order = 'S'
152
+ temp_version = temp_version.view(temp_version.dtype.newbyteorder(self.byte_order))
153
+ version = temp_version[0]
154
+ if version not in READER_SUPPORTED_VERSIONS:
155
+ raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
156
+ if sys.byteorder == "little":
157
+ # Host is little endian
158
+ host_endian = GGUFEndian.LITTLE
159
+ swapped_endian = GGUFEndian.BIG
160
+ else:
161
+ # Sorry PDP or other weird systems that don't use BE or LE.
162
+ host_endian = GGUFEndian.BIG
163
+ swapped_endian = GGUFEndian.LITTLE
164
+ self.endianess = swapped_endian if self.byte_order == "S" else host_endian
165
+ self.fields: OrderedDict[str, ReaderField] = OrderedDict()
166
+ self.tensors: list[ReaderTensor] = []
167
+ offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
168
+
169
+ # Check tensor count and kv count
170
+ temp_counts = self._get(offs, np.uint64, 2)
171
+ offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
172
+ offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
173
+ tensor_count, kv_count = temp_counts
174
+ offs = self._build_fields(offs, kv_count)
175
+
176
+ # Build Tensor Info Fields
177
+ offs, tensors_fields = self._build_tensor_info(offs, tensor_count)
178
+ new_align = self.fields.get('general.alignment')
179
+ if new_align is not None:
180
+ if new_align.types != [GGUFValueType.UINT32]:
181
+ raise ValueError('Bad type for general.alignment field')
182
+ self.alignment = new_align.parts[-1][0]
183
+ padding = offs % self.alignment
184
+ if padding != 0:
185
+ offs += self.alignment - padding
186
+ self.data_offset = offs
187
+ self._build_tensors(offs, tensors_fields)
188
+
189
+ _DT = TypeVar('_DT', bound = npt.DTypeLike)
190
+
191
+ # Fetch a key/value metadata field by key.
192
+ def get_field(self, key: str) -> Union[ReaderField, None]:
193
+ return self.fields.get(key, None)
194
+
195
+ # Fetch a tensor from the list by index.
196
+ def get_tensor(self, idx: int) -> ReaderTensor:
197
+ return self.tensors[idx]
198
+
199
+ def _get(
200
+ self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I', 'S', '<'] = None,
201
+ ) -> npt.NDArray[Any]:
202
+ count = int(count)
203
+ itemsize = int(np.empty([], dtype = dtype).itemsize)
204
+ end_offs = offset + itemsize * count
205
+ arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
206
+ return arr.view(arr.dtype.newbyteorder(self.byte_order if override_order is None else override_order))
207
+
208
+ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
209
+ if field.name in self.fields:
210
+ # TODO: add option to generate error on duplicate keys
211
+ # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
212
+
213
+ logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
214
+ self.fields[field.name + '_{}'.format(field.offset)] = field
215
+ else:
216
+ self.fields[field.name] = field
217
+ return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
218
+
219
+ def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
220
+ slen = self._get(offset, np.uint64)
221
+ return slen, self._get(offset + 8, np.uint8, slen[0])
222
+
223
+ def _get_field_parts(
224
+ self, orig_offs: int, raw_type: int,
225
+ ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
226
+ offs = orig_offs
227
+ types: list[GGUFValueType] = []
228
+ gtype = GGUFValueType(raw_type)
229
+ types.append(gtype)
230
+ # Handle strings.
231
+ if gtype == GGUFValueType.STRING:
232
+ sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
233
+ size = sum(int(part.nbytes) for part in sparts)
234
+ return size, sparts, [1], types
235
+ # Check if it's a simple scalar type.
236
+ nptype = self.gguf_scalar_to_np.get(gtype)
237
+ if nptype is not None:
238
+ val = self._get(offs, nptype)
239
+ return int(val.nbytes), [val], [0], types
240
+ # Handle arrays.
241
+ if gtype == GGUFValueType.ARRAY:
242
+ raw_itype = self._get(offs, np.uint32) # <-- Adicionado np.uint32 aqui
243
+ offs += int(raw_itype.nbytes)
244
+ alen = self._get(offs, np.uint64) # <-- GGUFv3 usa uint64 para tamanho de array
245
+ offs += int(alen.nbytes)
246
+ aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
247
+ data_idxs: list[int] = []
248
+ # FIXME: Handle multi-dimensional arrays properly instead of flattening
249
+ for idx in range(int(alen[0])):
250
+ curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
251
+ if idx == 0:
252
+ types += curr_types
253
+ idxs_offs = len(aparts)
254
+ aparts += curr_parts
255
+ data_idxs += [i + idxs_offs for i in curr_idxs]
256
+ offs += curr_size
257
+ return offs - orig_offs, aparts, data_idxs, types # We can't deal with this one.
258
+ raise ValueError(f'Unknown/unhandled field type {gtype}')
259
+
260
+ def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
261
+ offs = orig_offs
262
+
263
+ # Get Tensor Name
264
+ name_len, name_data = self._get_str(offs)
265
+ offs += int(name_len.nbytes + name_data.nbytes)
266
+
267
+ # Get Tensor Dimensions Count
268
+ n_dims = self._get(offs, np.uint32)
269
+ offs += int(n_dims.nbytes)
270
+
271
+ # Get Tensor Dimension Array
272
+ dims = self._get(offs, np.uint64, n_dims[0])
273
+ offs += int(dims.nbytes)
274
+
275
+ # Get Tensor Encoding Scheme Type
276
+ raw_dtype = self._get(offs, np.uint32)
277
+ offs += int(raw_dtype.nbytes)
278
+
279
+ # Get Tensor Offset
280
+ offset_tensor = self._get(offs, np.uint64)
281
+ offs += int(offset_tensor.nbytes)
282
+
283
+ return ReaderField(
284
+ orig_offs,
285
+ str(bytes(name_data), encoding = 'utf-8'),
286
+ [name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
287
+ [1, 3, 4, 5],
288
+ )
289
+
290
+ def _build_fields(self, offs: int, count: int) -> int:
291
+ for _ in range(count):
292
+ orig_offs = offs
293
+ kv_klen, kv_kdata = self._get_str(offs)
294
+ offs += int(kv_klen.nbytes + kv_kdata.nbytes)
295
+ raw_kv_type = self._get(offs, np.uint32)
296
+ offs += int(raw_kv_type.nbytes)
297
+ parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
298
+ idxs_offs = len(parts)
299
+ field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
300
+ parts += field_parts
301
+ self._push_field(ReaderField(
302
+ orig_offs,
303
+ str(bytes(kv_kdata), encoding = 'utf-8'),
304
+ parts,
305
+ [idx + idxs_offs for idx in field_idxs],
306
+ field_types,
307
+ ), skip_sum = True)
308
+ offs += field_size
309
+ return offs
310
+
311
+ def _build_tensor_info(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
312
+ tensor_fields = []
313
+ for _ in range(count):
314
+ field = self._get_tensor_info_field(offs)
315
+ offs += sum(int(part.nbytes) for part in field.parts)
316
+ tensor_fields.append(field)
317
+ return offs, tensor_fields
318
+
319
+ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
320
+ tensors = []
321
+ tensor_names = set() # keep track of name to prevent duplicated tensors
322
+ for field in fields:
323
+ _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
324
+ # check if there's any tensor having same name already in the list
325
+ tensor_name = str(bytes(name_data), encoding = 'utf-8')
326
+ if tensor_name in tensor_names:
327
+ raise ValueError(f'Found duplicated tensor with name {tensor_name}')
328
+ tensor_names.add(tensor_name)
329
+ ggml_type = GGMLQuantizationType(raw_dtype[0])
330
+ n_elems = int(np.prod(dims))
331
+ np_dims = tuple(reversed(dims.tolist()))
332
+ block_size, type_size = GGML_QUANT_SIZES[ggml_type]
333
+ n_bytes = n_elems * type_size // block_size
334
+ data_offs = int(start_offs + offset_tensor[0])
335
+ item_type: npt.DTypeLike
336
+ if ggml_type == GGMLQuantizationType.F16:
337
+ item_count = n_elems
338
+ item_type = np.float16
339
+ elif ggml_type == GGMLQuantizationType.F32:
340
+ item_count = n_elems
341
+ item_type = np.float32
342
+ elif ggml_type == GGMLQuantizationType.F64:
343
+ item_count = n_elems
344
+ item_type = np.float64
345
+ elif ggml_type == GGMLQuantizationType.I8:
346
+ item_count = n_elems
347
+ item_type = np.int8
348
+ elif ggml_type == GGMLQuantizationType.I16:
349
+ item_count = n_elems
350
+ item_type = np.int16
351
+ elif ggml_type == GGMLQuantizationType.I32:
352
+ item_count = n_elems
353
+ item_type = np.int32
354
+ elif ggml_type == GGMLQuantizationType.I64:
355
+ item_count = n_elems
356
+ item_type = np.int64
357
+ else:
358
+ item_count = n_bytes
359
+ item_type = np.uint8
360
+ np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
361
+ tensors.append(ReaderTensor(
362
+ name = tensor_name,
363
+ tensor_type = ggml_type,
364
+ shape = dims,
365
+ n_elements = n_elems,
366
+ n_bytes = n_bytes,
367
+ data_offset = data_offs,
368
+ data = self._get(data_offs, item_type, item_count).reshape(np_dims),
369
+ field = field,
370
+ ))
371
+ self.tensors = tensors
gguf_writer.py ADDED
@@ -0,0 +1,1276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #BRUNO BECKER / OFFELLIA 2026
2
+ #brunoconta1980@gmail.com
3
+ #brunoconta1980@hotmail.com
4
+ # X @Brunoxuser
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import os
10
+ import shutil
11
+ import struct
12
+ import sys
13
+ import tempfile
14
+ from dataclasses import dataclass
15
+ from enum import Enum, auto
16
+ from math import prod
17
+ from pathlib import Path
18
+ from io import BufferedWriter
19
+ from typing import IO, Any, Sequence, Mapping
20
+ from string import ascii_letters, digits
21
+
22
+ import numpy as np
23
+
24
+ from .constants import (
25
+ GGUF_DEFAULT_ALIGNMENT,
26
+ GGUF_MAGIC,
27
+ GGUF_VERSION,
28
+ GGMLQuantizationType,
29
+ GGUFEndian,
30
+ GGUFValueType,
31
+ Keys,
32
+ RopeScalingType,
33
+ PoolingType,
34
+ TokenType,
35
+ ExpertGatingFuncType,
36
+ )
37
+
38
+ from .quants import quant_shape_from_byte_shape
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ SHARD_NAME_FORMAT = "{:s}-{:05d}-of-{:05d}.gguf"
44
+
45
+
46
+ @dataclass
47
+ class TensorInfo:
48
+ shape: Sequence[int]
49
+ dtype: GGMLQuantizationType
50
+ nbytes: int
51
+ tensor: np.ndarray[Any, Any] | None = None
52
+
53
+
54
+ @dataclass
55
+ class GGUFValue:
56
+ value: Any
57
+ type: GGUFValueType
58
+ sub_type: GGUFValueType | None = None
59
+
60
+
61
+ class WriterState(Enum):
62
+ NO_FILE = auto()
63
+ EMPTY = auto()
64
+ HEADER = auto()
65
+ KV_DATA = auto()
66
+ TI_DATA = auto()
67
+ WEIGHTS = auto()
68
+
69
+
70
+ class GGUFWriter:
71
+ fout: list[BufferedWriter] | None
72
+ path: Path | None
73
+ temp_file: tempfile.SpooledTemporaryFile[bytes] | None
74
+ tensors: list[dict[str, TensorInfo]]
75
+ kv_data: list[dict[str, GGUFValue]]
76
+ state: WriterState
77
+ _simple_value_packing = {
78
+ GGUFValueType.UINT8: "B",
79
+ GGUFValueType.INT8: "b",
80
+ GGUFValueType.UINT16: "H",
81
+ GGUFValueType.INT16: "h",
82
+ GGUFValueType.UINT32: "I",
83
+ GGUFValueType.INT32: "i",
84
+ GGUFValueType.FLOAT32: "f",
85
+ GGUFValueType.UINT64: "Q",
86
+ GGUFValueType.INT64: "q",
87
+ GGUFValueType.FLOAT64: "d",
88
+ GGUFValueType.BOOL: "?",
89
+ }
90
+
91
+ def __init__(
92
+ self, path: os.PathLike[str] | str | None, arch: str, use_temp_file: bool = False, endianess: GGUFEndian = GGUFEndian.LITTLE,
93
+ split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False
94
+ ):
95
+ self.fout = None
96
+ self.path = Path(path) if path else None
97
+ self.arch = arch
98
+ self.endianess = endianess
99
+ self.data_alignment = GGUF_DEFAULT_ALIGNMENT
100
+ self.use_temp_file = use_temp_file
101
+ self.temp_file = None
102
+ self.tensors = [{}]
103
+ self.kv_data = [{}]
104
+ self.split_max_tensors = split_max_tensors
105
+ self.split_max_size = split_max_size
106
+ self.dry_run = dry_run
107
+ self.small_first_shard = small_first_shard
108
+ logger.info("gguf: This GGUF file is for {0} Endian only".format(
109
+ "Big" if self.endianess == GGUFEndian.BIG else "Little",
110
+ ))
111
+ self.state = WriterState.NO_FILE
112
+
113
+ if self.small_first_shard:
114
+ self.tensors.append({})
115
+
116
+ self.add_architecture()
117
+
118
+ def get_total_parameter_count(self) -> tuple[int, int, int, int]:
119
+ total_params = 0
120
+ shared_params = 0
121
+ expert_params = 0
122
+
123
+ expert_sum = 0
124
+ n_expert_tensors = 0
125
+
126
+ last_lora_a: tuple[str, TensorInfo] | None = None
127
+
128
+ for tensors in self.tensors:
129
+ for name, info in tensors.items():
130
+
131
+ shape = info.shape
132
+
133
+ if name.endswith(".lora_a"):
134
+ last_lora_a = (name, info)
135
+ continue
136
+ elif name.endswith(".lora_b"):
137
+ if last_lora_a is None or last_lora_a[0] != name[:-1] + "a":
138
+ # Bail when the LoRA pair can't be found trivially
139
+ logger.warning("can't measure LoRA size correctly, tensor order is unusual")
140
+ return 0, 0, 0, 0
141
+ else:
142
+ shape = (*shape[:-1], last_lora_a[1].shape[-1])
143
+
144
+ size = prod(shape)
145
+
146
+ if "_exps." in name:
147
+ expert_count = shape[-2 if ".bias" in name else -3]
148
+ expert_params += (size // expert_count)
149
+ expert_sum += expert_count
150
+ n_expert_tensors += 1
151
+ else:
152
+ shared_params += size
153
+
154
+ total_params += size
155
+
156
+ # Hopefully this should work even for variable-expert-count models
157
+ expert_count = (expert_sum // n_expert_tensors) if n_expert_tensors > 0 else 0
158
+
159
+ # Negate the total to signal it's likely not exact
160
+ if last_lora_a is not None:
161
+ total_params = -total_params
162
+
163
+ # NOTE: keep the output in the same order as accepted by 'size_label' in gguf-py/gguf/utility.py
164
+ return total_params, shared_params, expert_params, expert_count
165
+
166
+ def format_shard_names(self, path: Path) -> list[Path]:
167
+ if len(self.tensors) == 1:
168
+ return [path]
169
+ return [path.with_name(SHARD_NAME_FORMAT.format(path.stem, i + 1, len(self.tensors))) for i in range(len(self.tensors))]
170
+
171
+ def open_output_file(self, path: Path | None = None) -> None:
172
+ if self.state is WriterState.EMPTY and self.fout is not None and (path is None or path == self.path):
173
+ # allow calling this multiple times as long as the path is the same
174
+ return
175
+
176
+ if self.state is not WriterState.NO_FILE:
177
+ raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
178
+
179
+ if path is not None:
180
+ self.path = path
181
+
182
+ if self.path is not None:
183
+ filenames = self.print_plan()
184
+ self.fout = [open(filename, "wb") for filename in filenames]
185
+ self.state = WriterState.EMPTY
186
+
187
+ def print_plan(self) -> list[Path]:
188
+ logger.info("Writing the following files:")
189
+ assert self.path is not None
190
+ filenames = self.format_shard_names(self.path)
191
+ assert len(filenames) == len(self.tensors)
192
+ for name, tensors in zip(filenames, self.tensors):
193
+ logger.info(f"{name}: n_tensors = {len(tensors)}, total_size = {GGUFWriter.format_n_bytes_to_str(sum(ti.nbytes for ti in tensors.values()))}")
194
+
195
+ if self.dry_run:
196
+ logger.info("Dry run, not writing files")
197
+ for name in filenames:
198
+ print(name) # noqa: NP100
199
+ exit()
200
+
201
+ return filenames
202
+
203
+ def add_shard_kv_data(self) -> None:
204
+ if len(self.tensors) == 1:
205
+ return
206
+
207
+ total_tensors = sum(len(t) for t in self.tensors)
208
+ assert self.fout is not None
209
+ total_splits = len(self.fout)
210
+ self.kv_data.extend({} for _ in range(len(self.kv_data), total_splits))
211
+ for i, kv_data in enumerate(self.kv_data):
212
+ kv_data[Keys.Split.LLM_KV_SPLIT_NO] = GGUFValue(i, GGUFValueType.UINT16)
213
+ kv_data[Keys.Split.LLM_KV_SPLIT_COUNT] = GGUFValue(total_splits, GGUFValueType.UINT16)
214
+ kv_data[Keys.Split.LLM_KV_SPLIT_TENSORS_COUNT] = GGUFValue(total_tensors, GGUFValueType.INT32)
215
+
216
+ def write_header_to_file(self, path: Path | None = None) -> None:
217
+ if len(self.tensors) == 1 and (self.split_max_tensors != 0 or self.split_max_size != 0):
218
+ logger.warning("Model fails split requirements, not splitting")
219
+
220
+ self.open_output_file(path)
221
+
222
+ if self.state is not WriterState.EMPTY:
223
+ raise ValueError(f'Expected output file to be empty, got {self.state}')
224
+
225
+ assert self.fout is not None
226
+ assert len(self.fout) == len(self.tensors)
227
+ assert len(self.kv_data) == 1
228
+
229
+ self.add_shard_kv_data()
230
+
231
+ for fout, tensors, kv_data in zip(self.fout, self.tensors, self.kv_data):
232
+ fout.write(self._pack("<I", GGUF_MAGIC, skip_pack_prefix = True))
233
+ fout.write(self._pack("I", GGUF_VERSION))
234
+ fout.write(self._pack("Q", len(tensors)))
235
+ fout.write(self._pack("Q", len(kv_data)))
236
+ fout.flush()
237
+ self.state = WriterState.HEADER
238
+
239
+ def write_kv_data_to_file(self) -> None:
240
+ if self.state is not WriterState.HEADER:
241
+ raise ValueError(f'Expected output file to contain the header, got {self.state}')
242
+ assert self.fout is not None
243
+
244
+ for fout, kv_data in zip(self.fout, self.kv_data):
245
+ kv_bytes = bytearray()
246
+
247
+ for key, val in kv_data.items():
248
+ kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
249
+ kv_bytes += self._pack_val(val.value, val.type, add_vtype=True, sub_type=val.sub_type)
250
+
251
+ fout.write(kv_bytes)
252
+
253
+ self.flush()
254
+ self.state = WriterState.KV_DATA
255
+
256
+ def write_ti_data_to_file(self) -> None:
257
+ if self.state is not WriterState.KV_DATA:
258
+ raise ValueError(f'Expected output file to contain KV data, got {self.state}')
259
+ assert self.fout is not None
260
+
261
+ for fout, tensors in zip(self.fout, self.tensors):
262
+ ti_data = bytearray()
263
+ offset_tensor = 0
264
+
265
+ for name, ti in tensors.items():
266
+ ti_data += self._pack_val(name, GGUFValueType.STRING, add_vtype=False)
267
+ n_dims = len(ti.shape)
268
+ ti_data += self._pack("I", n_dims)
269
+ for j in range(n_dims):
270
+ ti_data += self._pack("Q", ti.shape[n_dims - 1 - j])
271
+ ti_data += self._pack("I", ti.dtype)
272
+ ti_data += self._pack("Q", offset_tensor)
273
+ offset_tensor += GGUFWriter.ggml_pad(ti.nbytes, self.data_alignment)
274
+
275
+ fout.write(ti_data)
276
+ fout.flush()
277
+ self.state = WriterState.TI_DATA
278
+
279
+ def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None:
280
+ if any(key in kv_data for kv_data in self.kv_data):
281
+ logger.warning(f'Duplicated key name {key!r}, overwriting it with new value {val!r} of type {vtype.name}')
282
+
283
+ self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type)
284
+
285
+ def add_uint8(self, key: str, val: int) -> None:
286
+ self.add_key_value(key,val, GGUFValueType.UINT8)
287
+
288
+ def add_int8(self, key: str, val: int) -> None:
289
+ self.add_key_value(key, val, GGUFValueType.INT8)
290
+
291
+ def add_uint16(self, key: str, val: int) -> None:
292
+ self.add_key_value(key, val, GGUFValueType.UINT16)
293
+
294
+ def add_int16(self, key: str, val: int) -> None:
295
+ self.add_key_value(key, val, GGUFValueType.INT16)
296
+
297
+ def add_uint32(self, key: str, val: int) -> None:
298
+ self.add_key_value(key, val, GGUFValueType.UINT32)
299
+
300
+ def add_int32(self, key: str, val: int) -> None:
301
+ self.add_key_value(key, val, GGUFValueType.INT32)
302
+
303
+ def add_float32(self, key: str, val: float) -> None:
304
+ self.add_key_value(key, val, GGUFValueType.FLOAT32)
305
+
306
+ def add_uint64(self, key: str, val: int) -> None:
307
+ self.add_key_value(key, val, GGUFValueType.UINT64)
308
+
309
+ def add_int64(self, key: str, val: int) -> None:
310
+ self.add_key_value(key, val, GGUFValueType.INT64)
311
+
312
+ def add_float64(self, key: str, val: float) -> None:
313
+ self.add_key_value(key, val, GGUFValueType.FLOAT64)
314
+
315
+ def add_bool(self, key: str, val: bool) -> None:
316
+ self.add_key_value(key, val, GGUFValueType.BOOL)
317
+
318
+ def add_string(self, key: str, val: str) -> None:
319
+ if not val:
320
+ return
321
+ self.add_key_value(key, val, GGUFValueType.STRING)
322
+
323
+ def add_array(self, key: str, val: Sequence[Any]) -> None:
324
+ if len(val) == 0:
325
+ return
326
+ self.add_key_value(key, val, GGUFValueType.ARRAY)
327
+
328
+ @staticmethod
329
+ def ggml_pad(x: int, n: int) -> int:
330
+ return ((x + n - 1) // n) * n
331
+
332
+ def add_tensor_info(
333
+ self, name: str, tensor_shape: Sequence[int], tensor_dtype: np.dtype,
334
+ tensor_nbytes: int, raw_dtype: GGMLQuantizationType | None = None,
335
+ ) -> None:
336
+ if self.state is not WriterState.NO_FILE:
337
+ raise ValueError(f'Expected output file to be not yet opened, got {self.state}')
338
+
339
+ if any(name in tensors for tensors in self.tensors):
340
+ raise ValueError(f'Duplicated tensor name {name!r}')
341
+
342
+ if raw_dtype is None:
343
+ if tensor_dtype == np.float16:
344
+ dtype = GGMLQuantizationType.F16
345
+ elif tensor_dtype == np.float32:
346
+ dtype = GGMLQuantizationType.F32
347
+ elif tensor_dtype == np.float64:
348
+ dtype = GGMLQuantizationType.F64
349
+ elif tensor_dtype == np.int8:
350
+ dtype = GGMLQuantizationType.I8
351
+ elif tensor_dtype == np.int16:
352
+ dtype = GGMLQuantizationType.I16
353
+ elif tensor_dtype == np.int32:
354
+ dtype = GGMLQuantizationType.I32
355
+ elif tensor_dtype == np.int64:
356
+ dtype = GGMLQuantizationType.I64
357
+ else:
358
+ raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
359
+ else:
360
+ dtype = raw_dtype
361
+ if tensor_dtype == np.uint8:
362
+ tensor_shape = quant_shape_from_byte_shape(tensor_shape, raw_dtype)
363
+
364
+ # make sure there is at least one tensor before splitting
365
+ if len(self.tensors[-1]) > 0:
366
+ if ( # split when over tensor limit
367
+ self.split_max_tensors != 0
368
+ and len(self.tensors[-1]) >= self.split_max_tensors
369
+ ) or ( # split when over size limit
370
+ self.split_max_size != 0
371
+ and sum(ti.nbytes for ti in self.tensors[-1].values()) + tensor_nbytes > self.split_max_size
372
+ ):
373
+ self.tensors.append({})
374
+
375
+ self.tensors[-1][name] = TensorInfo(shape=tensor_shape, dtype=dtype, nbytes=tensor_nbytes)
376
+
377
+ def add_tensor(
378
+ self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
379
+ raw_dtype: GGMLQuantizationType | None = None, tensor_endianess: GGUFEndian | None = None
380
+ ) -> None:
381
+ # if tensor endianness is not passed, assume it's native to system
382
+ if tensor_endianess is None:
383
+ tensor_endianess = GGUFEndian.BIG if sys.byteorder == 'big' else GGUFEndian.LITTLE
384
+
385
+ if tensor_endianess != self.endianess:
386
+ # Don't byteswap inplace since lazy copies cannot handle it
387
+ tensor = tensor.byteswap(inplace=False)
388
+ if self.use_temp_file and self.temp_file is None:
389
+ fp = tempfile.SpooledTemporaryFile(mode="w+b", max_size=256 * 1024 * 1024)
390
+ fp.seek(0)
391
+ self.temp_file = fp
392
+
393
+ shape: Sequence[int] = raw_shape if raw_shape is not None else tensor.shape
394
+ self.add_tensor_info(name, shape, tensor.dtype, tensor.nbytes, raw_dtype=raw_dtype)
395
+
396
+ if self.temp_file is None:
397
+ self.tensors[-1][name].tensor = tensor
398
+ return
399
+
400
+ tensor.tofile(self.temp_file)
401
+ self.write_padding(self.temp_file, tensor.nbytes)
402
+
403
+ def write_padding(self, fp: IO[bytes], n: int, align: int | None = None) -> None:
404
+ pad = GGUFWriter.ggml_pad(n, align if align is not None else self.data_alignment) - n
405
+ if pad != 0:
406
+ fp.write(bytes([0] * pad))
407
+
408
+ def write_tensor_data(self, tensor: np.ndarray[Any, Any], tensor_endianess: GGUFEndian | None = None) -> None:
409
+ if self.state is not WriterState.TI_DATA and self.state is not WriterState.WEIGHTS:
410
+ raise ValueError(f'Expected output file to contain tensor info or weights, got {self.state}')
411
+ assert self.fout is not None
412
+
413
+ # if tensor endianness is not passed, assume it's native to system
414
+ if tensor_endianess is None:
415
+ tensor_endianess = GGUFEndian.BIG if sys.byteorder == 'big' else GGUFEndian.LITTLE
416
+
417
+ if tensor_endianess != self.endianess:
418
+ # Don't byteswap inplace since lazy copies cannot handle it
419
+ tensor = tensor.byteswap(inplace=False)
420
+
421
+ file_id = -1
422
+ for i, tensors in enumerate(self.tensors):
423
+ if len(tensors) > 0:
424
+ file_id = i
425
+ break
426
+
427
+ fout = self.fout[file_id]
428
+
429
+ # pop the first tensor info
430
+ # TODO: cleaner way to get the first key
431
+ first_tensor_name = [name for name, _ in zip(self.tensors[file_id].keys(), range(1))][0]
432
+ ti = self.tensors[file_id].pop(first_tensor_name)
433
+ assert ti.nbytes == tensor.nbytes
434
+
435
+ self.write_padding(fout, fout.tell())
436
+ tensor.tofile(fout)
437
+ self.write_padding(fout, tensor.nbytes)
438
+
439
+ self.state = WriterState.WEIGHTS
440
+
441
+ def write_tensors_to_file(self, *, progress: bool = False) -> None:
442
+ self.write_ti_data_to_file()
443
+
444
+ assert self.fout is not None
445
+
446
+ for fout in self.fout:
447
+ self.write_padding(fout, fout.tell())
448
+
449
+ if self.temp_file is None:
450
+ shard_bar = None
451
+ bar = None
452
+
453
+ if progress:
454
+ from tqdm import tqdm
455
+
456
+ total_bytes = sum(ti.nbytes for t in self.tensors for ti in t.values())
457
+
458
+ if len(self.fout) > 1:
459
+ shard_bar = tqdm(desc=f"Shard (0/{len(self.fout)})", total=None, unit="byte", unit_scale=True)
460
+ bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
461
+
462
+ for i, (fout, tensors) in enumerate(zip(self.fout, self.tensors)):
463
+ if shard_bar is not None:
464
+ shard_bar.set_description(f"Shard ({i + 1}/{len(self.fout)})")
465
+ total = sum(ti.nbytes for ti in tensors.values())
466
+ shard_bar.reset(total=(total if total > 0 else None))
467
+
468
+ # relying on the fact that Python dicts preserve insertion order (since 3.7)
469
+ for ti in tensors.values():
470
+ assert ti.tensor is not None # can only iterate once over the tensors
471
+ assert ti.tensor.nbytes == ti.nbytes
472
+ ti.tensor.tofile(fout)
473
+ if shard_bar is not None:
474
+ shard_bar.update(ti.nbytes)
475
+ if bar is not None:
476
+ bar.update(ti.nbytes)
477
+ self.write_padding(fout, ti.nbytes)
478
+ ti.tensor = None
479
+ else:
480
+ self.temp_file.seek(0)
481
+
482
+ shutil.copyfileobj(self.temp_file, self.fout[0 if not self.small_first_shard else 1])
483
+ self.flush()
484
+ self.temp_file.close()
485
+
486
+ self.state = WriterState.WEIGHTS
487
+
488
+ def flush(self) -> None:
489
+ assert self.fout is not None
490
+ for fout in self.fout:
491
+ fout.flush()
492
+
493
+ def close(self) -> None:
494
+ if self.fout is not None:
495
+ for fout in self.fout:
496
+ fout.close()
497
+ self.fout = None
498
+
499
+ def add_type(self, type_name: str) -> None:
500
+ self.add_string(Keys.General.TYPE, type_name)
501
+
502
+ def add_architecture(self) -> None:
503
+ self.add_string(Keys.General.ARCHITECTURE, self.arch)
504
+
505
+ def add_quantization_version(self, quantization_version: int) -> None:
506
+ self.add_uint32(Keys.General.QUANTIZATION_VERSION, quantization_version)
507
+
508
+ def add_custom_alignment(self, alignment: int) -> None:
509
+ self.data_alignment = alignment
510
+ self.add_uint32(Keys.General.ALIGNMENT, alignment)
511
+
512
+ def add_file_type(self, ftype: int) -> None:
513
+ self.add_uint32(Keys.General.FILE_TYPE, ftype)
514
+
515
+ def add_sampling_sequence(self, sequence: str) -> None:
516
+ self.add_string(Keys.General.SAMPLING_SEQUENCE, sequence)
517
+
518
+ def add_sampling_top_k(self, top_k: int) -> None:
519
+ self.add_int41(Keys.General.SAMPLING_TOP_K, top_k)
520
+
521
+ def add_sampling_top_p(self, top_p: float) -> None:
522
+ self.add_float41(Keys.General.SAMPLING_TOP_P, top_p)
523
+
524
+ def add_sampling_min_p(self, min_p: float) -> None:
525
+ self.add_float41(Keys.General.SAMPLING_MIN_P, min_p)
526
+
527
+ def add_sampling_xtc_probability(self, xtc_probability: float) -> None:
528
+ self.add_float41(Keys.General.SAMPLING_XTC_PROBABILITY, xtc_probability)
529
+
530
+ def add_sampling_xtc_threshold(self, xtc_threshold: float) -> None:
531
+ self.add_float41(Keys.General.SAMPLING_XTC_THRESHOLD, xtc_threshold)
532
+
533
+ def add_sampling_temp(self, temp: float) -> None:
534
+ self.add_float41(Keys.General.SAMPLING_TEMP, temp)
535
+
536
+ def add_sampling_penalty_last_n(self, penalty_last_n: int) -> None:
537
+ self.add_int41(Keys.General.SAMPLING_PENALTY_LAST_N, penalty_last_n)
538
+
539
+ def add_sampling_penalty_repeat(self, penalty_repeat: float) -> None:
540
+ self.add_float41(Keys.General.SAMPLING_PENALTY_REPEAT, penalty_repeat)
541
+
542
+ def add_sampling_mirostat(self, mirostat: int) -> None:
543
+ self.add_int41(Keys.General.SAMPLING_MIROSTAT, mirostat)
544
+
545
+ def add_sampling_mirostat_tau(self, mirostat_tau: float) -> None:
546
+ self.add_float41(Keys.General.SAMPLING_MIROSTAT_TAU, mirostat_tau)
547
+
548
+ def add_sampling_mirostat_eta(self, mirostat_eta: float) -> None:
549
+ self.add_float41(Keys.General.SAMPLING_MIROSTAT_ETA, mirostat_eta)
550
+
551
+ def add_name(self, name: str) -> None:
552
+ self.add_string(Keys.General.NAME, name)
553
+
554
+ def add_author(self, author: str) -> None:
555
+ self.add_string(Keys.General.AUTHOR, author)
556
+
557
+ def add_version(self, version: str) -> None:
558
+ self.add_string(Keys.General.VERSION, version)
559
+
560
+ def add_organization(self, organization: str) -> None:
561
+ self.add_string(Keys.General.ORGANIZATION, organization)
562
+
563
+ def add_finetune(self, finetune: str) -> None:
564
+ self.add_string(Keys.General.FINETUNE, finetune)
565
+
566
+ def add_basename(self, basename: str) -> None:
567
+ self.add_string(Keys.General.BASENAME, basename)
568
+
569
+ def add_description(self, description: str) -> None:
570
+ self.add_string(Keys.General.DESCRIPTION, description)
571
+
572
+ def add_quantized_by(self, quantized: str) -> None:
573
+ self.add_string(Keys.General.QUANTIZED_BY, quantized)
574
+
575
+ def add_size_label(self, size_label: str) -> None:
576
+ self.add_string(Keys.General.SIZE_LABEL, size_label)
577
+
578
+ def add_license(self, license: str) -> None:
579
+ self.add_string(Keys.General.LICENSE, license)
580
+
581
+ def add_license_name(self, license: str) -> None:
582
+ self.add_string(Keys.General.LICENSE_NAME, license)
583
+
584
+ def add_license_link(self, license: str) -> None:
585
+ self.add_string(Keys.General.LICENSE_LINK, license)
586
+
587
+ def add_url(self, url: str) -> None:
588
+ self.add_string(Keys.General.URL, url)
589
+
590
+ def add_doi(self, doi: str) -> None:
591
+ self.add_string(Keys.General.DOI, doi)
592
+
593
+ def add_uuid(self, uuid: str) -> None:
594
+ self.add_string(Keys.General.UUID, uuid)
595
+
596
+ def add_repo_url(self, repo_url: str) -> None:
597
+ self.add_string(Keys.General.REPO_URL, repo_url)
598
+
599
+ def add_source_url(self, url: str) -> None:
600
+ self.add_string(Keys.General.SOURCE_URL, url)
601
+
602
+ def add_source_doi(self, doi: str) -> None:
603
+ self.add_string(Keys.General.SOURCE_DOI, doi)
604
+
605
+ def add_source_uuid(self, uuid: str) -> None:
606
+ self.add_string(Keys.General.SOURCE_UUID, uuid)
607
+
608
+ def add_source_repo_url(self, repo_url: str) -> None:
609
+ self.add_string(Keys.General.SOURCE_REPO_URL, repo_url)
610
+
611
+ def add_base_model_count(self, source_count: int) -> None:
612
+ self.add_uint32(Keys.General.BASE_MODEL_COUNT, source_count)
613
+
614
+ def add_base_model_name(self, source_id: int, name: str) -> None:
615
+ self.add_string(Keys.General.BASE_MODEL_NAME.format(id=source_id), name)
616
+
617
+ def add_base_model_author(self, source_id: int, author: str) -> None:
618
+ self.add_string(Keys.General.BASE_MODEL_AUTHOR.format(id=source_id), author)
619
+
620
+ def add_base_model_version(self, source_id: int, version: str) -> None:
621
+ self.add_string(Keys.General.BASE_MODEL_VERSION.format(id=source_id), version)
622
+
623
+ def add_base_model_organization(self, source_id: int, organization: str) -> None:
624
+ self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
625
+
626
+ def add_base_model_description(self, source_id: int, description: str) -> None:
627
+ self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
628
+
629
+ def add_base_model_url(self, source_id: int, url: str) -> None:
630
+ self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
631
+
632
+ def add_base_model_doi(self, source_id: int, doi: str) -> None:
633
+ self.add_string(Keys.General.BASE_MODEL_DOI.format(id=source_id), doi)
634
+
635
+ def add_base_model_uuid(self, source_id: int, uuid: str) -> None:
636
+ self.add_string(Keys.General.BASE_MODEL_UUID.format(id=source_id), uuid)
637
+
638
+ def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
639
+ self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
640
+
641
+ def add_dataset_count(self, source_count: int) -> None:
642
+ self.add_uint32(Keys.General.DATASET_COUNT, source_count)
643
+
644
+ def add_dataset_name(self, source_id: int, name: str) -> None:
645
+ self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
646
+
647
+ def add_dataset_author(self, source_id: int, author: str) -> None:
648
+ self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
649
+
650
+ def add_dataset_version(self, source_id: int, version: str) -> None:
651
+ self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
652
+
653
+ def add_dataset_organization(self, source_id: int, organization: str) -> None:
654
+ self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
655
+
656
+ def add_dataset_description(self, source_id: int, description: str) -> None:
657
+ self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
658
+
659
+ def add_dataset_url(self, source_id: int, url: str) -> None:
660
+ self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
661
+
662
+ def add_dataset_doi(self, source_id: int, doi: str) -> None:
663
+ self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
664
+
665
+ def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
666
+ self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
667
+
668
+ def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
669
+ self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
670
+
671
+ def add_tags(self, tags: Sequence[str]) -> None:
672
+ self.add_array(Keys.General.TAGS, tags)
673
+
674
+ def add_languages(self, languages: Sequence[str]) -> None:
675
+ self.add_array(Keys.General.LANGUAGES, languages)
676
+
677
+ def add_tensor_data_layout(self, layout: str) -> None:
678
+ self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
679
+
680
+ def add_vocab_size(self, size: int) -> None:
681
+ self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size)
682
+
683
+ def add_context_length(self, length: int) -> None:
684
+ self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length)
685
+
686
+ def add_embedding_length(self, length: int) -> None:
687
+ self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
688
+
689
+ def add_embedding_length_out(self, length: int) -> None:
690
+ self.add_uint32(Keys.LLM.EMBEDDING_LENGTH_OUT.format(arch=self.arch), length)
691
+
692
+ def add_features_length(self, length: int) -> None:
693
+ self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
694
+
695
+ def add_posnet_embedding_length(self, length: int) -> None:
696
+ self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
697
+
698
+ def add_posnet_block_count(self, length: int) -> None:
699
+ self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
700
+
701
+ def add_convnext_embedding_length(self, length: int) -> None:
702
+ self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
703
+
704
+ def add_convnext_block_count(self, length: int) -> None:
705
+ self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
706
+
707
+ def add_shortconv_l_cache(self, length: int) -> None:
708
+ self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length)
709
+
710
+ def add_block_count(self, length: int) -> None:
711
+ self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
712
+
713
+ def add_leading_dense_block_count(self, length: int) -> None:
714
+ self.add_uint32(Keys.LLM.LEADING_DENSE_BLOCK_COUNT.format(arch=self.arch), length)
715
+
716
+ def add_feed_forward_length(self, length: int | Sequence[int]) -> None:
717
+ if isinstance(length, int):
718
+ self.add_uint32(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
719
+ else:
720
+ self.add_array(Keys.LLM.FEED_FORWARD_LENGTH.format(arch=self.arch), length)
721
+
722
+ def add_expert_feed_forward_length(self, length: int) -> None:
723
+ self.add_uint32(Keys.LLM.EXPERT_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
724
+
725
+ def add_expert_shared_feed_forward_length(self, length: int) -> None:
726
+ self.add_uint32(Keys.LLM.EXPERT_SHARED_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
727
+
728
+ def add_expert_chunk_feed_forward_length(self, length: int) -> None:
729
+ self.add_uint32(Keys.LLM.EXPERT_CHUNK_FEED_FORWARD_LENGTH.format(arch=self.arch), length)
730
+
731
+ def add_parallel_residual(self, use: bool) -> None:
732
+ self.add_bool(Keys.LLM.USE_PARALLEL_RESIDUAL.format(arch=self.arch), use)
733
+
734
+ def add_decoder_start_token_id(self, id: int) -> None:
735
+ self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id)
736
+
737
+ def add_decoder_block_count(self, value: int) -> None:
738
+ self.add_uint32(Keys.LLM.DECODER_BLOCK_COUNT.format(arch=self.arch), value)
739
+
740
+ def add_embedding_length_per_layer_input(self, value: int) -> None:
741
+ self.add_uint32(Keys.LLM.EMBD_LENGTH_PER_LAYER_INP.format(arch=self.arch), value)
742
+
743
+ def add_altup_active_idx(self, val: int) -> None:
744
+ self.add_uint32(Keys.LLM.ALTUP_ACTIVE_IDX.format(arch=self.arch), val)
745
+
746
+ def add_altup_num_inputs(self, val: int) -> None:
747
+ self.add_uint32(Keys.LLM.ALTUP_NUM_INPUTS.format(arch=self.arch), val)
748
+
749
+ def add_activation_sparsity_scale(self, values: Sequence[float]) -> None:
750
+ self.add_array(Keys.LLM.ACTIVATION_SPARSITY_SCALE.format(arch=self.arch), values)
751
+
752
+ def add_head_count(self, count: int | Sequence[int]) -> None:
753
+ if isinstance(count, int):
754
+ self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
755
+ else:
756
+ self.add_array(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
757
+
758
+ def add_head_count_kv(self, count: int | Sequence[int]) -> None:
759
+ if isinstance(count, int):
760
+ self.add_uint32(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
761
+ else:
762
+ self.add_array(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
763
+
764
+ def add_key_length(self, length: int) -> None:
765
+ self.add_uint32(Keys.Attention.KEY_LENGTH.format(arch=self.arch), length)
766
+
767
+ def add_value_length(self, length: int) -> None:
768
+ self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
769
+
770
+ def add_key_length_mla(self, length: int) -> None:
771
+ self.add_uint32(Keys.Attention.KEY_LENGTH_MLA.format(arch=self.arch), length)
772
+
773
+ def add_value_length_mla(self, length: int) -> None:
774
+ self.add_uint32(Keys.Attention.VALUE_LENGTH_MLA.format(arch=self.arch), length)
775
+
776
+ def add_max_alibi_bias(self, bias: float) -> None:
777
+ self.add_float41(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
778
+
779
+ def add_clamp_kqv(self, value: float) -> None:
780
+ self.add_float41(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value)
781
+
782
+ def add_shared_kv_layers(self, value: int) -> None:
783
+ self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value)
784
+
785
+ def add_sliding_window_pattern(self, value: int | Sequence[bool]) -> None:
786
+ key = Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch)
787
+ if isinstance(value, int):
788
+ self.add_uint32(key, value)
789
+ else:
790
+ self.add_array(key, value)
791
+
792
+ def add_dense_features_dims(self, dense:str, in_f:int, out_f:int) -> None:
793
+ self.add_uint32(Keys.LLM.DENSE_FEAT_IN_SIZE.format(arch=self.arch, dense=dense), in_f)
794
+ self.add_uint32(Keys.LLM.DENSE_FEAT_OUT_SIZE.format(arch=self.arch, dense=dense), out_f)
795
+
796
+ def add_logit_scale(self, value: float) -> None:
797
+ self.add_float41(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value)
798
+
799
+ def add_attn_logit_softcapping(self, value: float) -> None:
800
+ self.add_float41(Keys.LLM.ATTN_LOGIT_SOFTCAPPING.format(arch=self.arch), value)
801
+
802
+ def add_router_logit_softcapping(self, value: float) -> None:
803
+ self.add_float41(Keys.LLM.ROUTER_LOGIT_SOFTCAPPING.format(arch=self.arch), value)
804
+
805
+ def add_final_logit_softcapping(self, value: float) -> None:
806
+ self.add_float41(Keys.LLM.FINAL_LOGIT_SOFTCAPPING.format(arch=self.arch), value)
807
+
808
+ def add_expert_count(self, count: int) -> None:
809
+ self.add_uint32(Keys.LLM.EXPERT_COUNT.format(arch=self.arch), count)
810
+
811
+ def add_expert_used_count(self, count: int) -> None:
812
+ self.add_uint32(Keys.LLM.EXPERT_USED_COUNT.format(arch=self.arch), count)
813
+
814
+ def add_expert_shared_count(self, count: int) -> None:
815
+ self.add_uint32(Keys.LLM.EXPERT_SHARED_COUNT.format(arch=self.arch), count)
816
+
817
+ def add_expert_group_count(self, count: int) -> None:
818
+ self.add_uint32(Keys.LLM.EXPERT_GROUP_COUNT.format(arch=self.arch), count)
819
+
820
+ def add_expert_group_used_count(self, count: int) -> None:
821
+ self.add_uint32(Keys.LLM.EXPERT_GROUP_USED_COUNT.format(arch=self.arch), count)
822
+
823
+ def add_expert_weights_scale(self, value: float) -> None:
824
+ self.add_float41(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
825
+
826
+ def add_expert_weights_norm(self, value: bool) -> None:
827
+ self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
828
+
829
+ def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
830
+ self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
831
+
832
+ def add_expert_group_scale(self, value: float) -> None:
833
+ self.add_float41(Keys.LLM.EXPERT_GROUP_SCALE.format(arch=self.arch), value)
834
+
835
+ def add_experts_per_group(self, count: int) -> None:
836
+ self.add_uint32(Keys.LLM.EXPERTS_PER_GROUP.format(arch=self.arch), count)
837
+
838
+ def add_moe_every_n_layers(self, value: int) -> None:
839
+ self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value)
840
+
841
+ def add_nextn_predict_layers(self, count: int) -> None:
842
+ self.add_uint32(Keys.LLM.NEXTN_PREDICT_LAYERS.format(arch=self.arch), count)
843
+
844
+ def add_swin_norm(self, value: bool) -> None:
845
+ self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
846
+
847
+ def add_rescale_every_n_layers(self, count: int) -> None:
848
+ self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count)
849
+
850
+ def add_time_mix_extra_dim(self, dim: int) -> None:
851
+ self.add_uint32(Keys.LLM.TIME_MIX_EXTRA_DIM.format(arch=self.arch), dim)
852
+
853
+ def add_time_decay_extra_dim(self, dim: int) -> None:
854
+ self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
855
+
856
+ def add_residual_scale(self, value: float) -> None:
857
+ self.add_float41(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
858
+
859
+ def add_embedding_scale(self, value: float) -> None:
860
+ self.add_float41(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
861
+
862
+ def add_wkv_head_size(self, size: int) -> None:
863
+ self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
864
+
865
+ def add_token_shift_count(self, count: int) -> None:
866
+ self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
867
+
868
+ def add_interleave_moe_layer_step(self, value: int) -> None:
869
+ self.add_uint32(Keys.LLM.INTERLEAVE_MOE_LAYER_STEP.format(arch=self.arch), value)
870
+
871
+ def add_layer_norm_eps(self, value: float) -> None:
872
+ self.add_float41(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
873
+
874
+ def add_layer_norm_rms_eps(self, value: float) -> None:
875
+ self.add_float41(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
876
+
877
+ def add_group_norm_eps(self, value: float) -> None:
878
+ self.add_float41(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
879
+
880
+ def add_group_norm_groups(self, value: int) -> None:
881
+ self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
882
+
883
+ def add_causal_attention(self, value: bool) -> None:
884
+ self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
885
+
886
+ def add_q_lora_rank(self, length: int) -> None:
887
+ self.add_uint32(Keys.Attention.Q_LORA_RANK.format(arch=self.arch), length)
888
+
889
+ def add_kv_lora_rank(self, length: int) -> None:
890
+ self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
891
+
892
+ def add_decay_lora_rank(self, length: int) -> None:
893
+ self.add_uint32(Keys.Attention.DECAY_LORA_RANK.format(arch=self.arch), length)
894
+
895
+ def add_iclr_lora_rank(self, length: int) -> None:
896
+ self.add_uint32(Keys.Attention.ICLR_LORA_RANK.format(arch=self.arch), length)
897
+
898
+ def add_value_residual_mix_lora_rank(self, length: int) -> None:
899
+ self.add_uint32(Keys.Attention.VALUE_RESIDUAL_MIX_LORA_RANK.format(arch=self.arch), length)
900
+
901
+ def add_rope_freq_base_swa(self, value: float) -> None:
902
+ self.add_float41(Keys.Rope.FREQ_BASE_SWA.format(arch=self.arch), value)
903
+
904
+ def add_gate_lora_rank(self, length: int) -> None:
905
+ self.add_uint32(Keys.Attention.GATE_LORA_RANK.format(arch=self.arch), length)
906
+
907
+ def add_relative_attn_buckets_count(self, value: int) -> None:
908
+ self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
909
+
910
+ def add_sliding_window(self, value: int) -> None:
911
+ self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
912
+
913
+ def add_attention_scale(self, value: float) -> None:
914
+ self.add_float41(Keys.Attention.SCALE.format(arch=self.arch), value)
915
+
916
+ def add_attn_output_scale(self, value: float) -> None:
917
+ self.add_float41(Keys.Attention.OUTPUT_SCALE.format(arch=self.arch), value)
918
+
919
+ def add_attn_temperature_length(self, value: int) -> None:
920
+ self.add_uint32(Keys.Attention.TEMPERATURE_LENGTH.format(arch=self.arch), value)
921
+
922
+ def add_attn_temperature_scale(self, value: float) -> None:
923
+ self.add_float41(Keys.Attention.TEMPERATURE_SCALE.format(arch=self.arch), value)
924
+
925
+ def add_pooling_type(self, value: PoolingType) -> None:
926
+ self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
927
+
928
+ def add_num_deepstack_layers(self, count: int) -> None:
929
+ self.add_uint32(Keys.LLM.NUM_DEEPSTACK_LAYERS.format(arch=self.arch), count)
930
+
931
+ def add_rope_dimension_count(self, count: int) -> None:
932
+ self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
933
+
934
+ def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
935
+ self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
936
+
937
+ def add_rope_freq_base(self, value: float) -> None:
938
+ self.add_float41(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
939
+
940
+ def add_rope_scaling_type(self, value: RopeScalingType) -> None:
941
+ self.add_string(Keys.Rope.SCALING_TYPE.format(arch=self.arch), value.value)
942
+
943
+ def add_rope_scaling_factor(self, value: float) -> None:
944
+ self.add_float41(Keys.Rope.SCALING_FACTOR.format(arch=self.arch), value)
945
+
946
+ def add_rope_scaling_attn_factors(self, value: float) -> None:
947
+ self.add_float41(Keys.Rope.SCALING_ATTN_FACTOR.format(arch=self.arch), value)
948
+
949
+ def add_rope_scaling_orig_ctx_len(self, value: int) -> None:
950
+ self.add_uint32(Keys.Rope.SCALING_ORIG_CTX_LEN.format(arch=self.arch), value)
951
+
952
+ def add_rope_scaling_finetuned(self, value: bool) -> None:
953
+ self.add_bool(Keys.Rope.SCALING_FINETUNED.format(arch=self.arch), value)
954
+
955
+ def add_rope_scaling_yarn_log_mul(self, value: float) -> None:
956
+ self.add_float41(Keys.Rope.SCALING_YARN_LOG_MUL.format(arch=self.arch), value)
957
+
958
+ def add_rope_scaling_yarn_ext_factor(self, value: float) -> None:
959
+ self.add_float41(Keys.Rope.SCALING_YARN_EXT_FACTOR.format(arch=self.arch), value)
960
+
961
+ def add_rope_scaling_yarn_attn_factor(self, value: float) -> None:
962
+ self.add_float41(Keys.Rope.SCALING_YARN_ATTN_FACTOR.format(arch=self.arch), value)
963
+
964
+ def add_rope_scaling_yarn_beta_fast(self, value: float) -> None:
965
+ self.add_float41(Keys.Rope.SCALING_YARN_BETA_FAST.format(arch=self.arch), value)
966
+
967
+ def add_rope_scaling_yarn_beta_slow(self, value: float) -> None:
968
+ self.add_float41(Keys.Rope.SCALING_YARN_BETA_SLOW.format(arch=self.arch), value)
969
+
970
+ def add_ssm_conv_kernel(self, value: int) -> None:
971
+ self.add_uint32(Keys.SSM.CONV_KERNEL.format(arch=self.arch), value)
972
+
973
+ def add_ssm_inner_size(self, value: int) -> None:
974
+ self.add_uint32(Keys.SSM.INNER_SIZE.format(arch=self.arch), value)
975
+
976
+ def add_ssm_state_size(self, value: int) -> None:
977
+ self.add_uint32(Keys.SSM.STATE_SIZE.format(arch=self.arch), value)
978
+
979
+ def add_ssm_time_step_rank(self, value: int) -> None:
980
+ self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
981
+
982
+ def add_ssm_group_count(self, value: int) -> None:
983
+ self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value)
984
+
985
+ def add_ssm_dt_b_c_rms(self, value: bool) -> None:
986
+ self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
987
+
988
+ def add_tokenizer_model(self, model: str) -> None:
989
+ self.add_string(Keys.Tokenizer.MODEL, model)
990
+
991
+ def add_tokenizer_pre(self, pre: str) -> None:
992
+ self.add_string(Keys.Tokenizer.PRE, pre)
993
+
994
+ def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
995
+ self.add_array(Keys.Tokenizer.LIST, tokens)
996
+
997
+ def add_token_merges(self, merges: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
998
+ self.add_array(Keys.Tokenizer.MERGES, merges)
999
+
1000
+ def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
1001
+ self.add_array(Keys.Tokenizer.TOKEN_TYPE, types)
1002
+
1003
+ def add_token_type_count(self, value: int) -> None:
1004
+ self.add_uint32(Keys.Tokenizer.TOKEN_TYPE_COUNT, value)
1005
+
1006
+ def add_token_scores(self, scores: Sequence[float]) -> None:
1007
+ self.add_array(Keys.Tokenizer.SCORES, scores)
1008
+
1009
+ def add_bos_token_id(self, id: int) -> None:
1010
+ self.add_uint32(Keys.Tokenizer.BOS_ID, id)
1011
+
1012
+ def add_eos_token_id(self, id: int) -> None:
1013
+ self.add_uint32(Keys.Tokenizer.EOS_ID, id)
1014
+
1015
+ def add_unk_token_id(self, id: int) -> None:
1016
+ self.add_uint32(Keys.Tokenizer.UNK_ID, id)
1017
+
1018
+ def add_sep_token_id(self, id: int) -> None:
1019
+ self.add_uint32(Keys.Tokenizer.SEP_ID, id)
1020
+
1021
+ def add_pad_token_id(self, id: int) -> None:
1022
+ self.add_uint32(Keys.Tokenizer.PAD_ID, id)
1023
+
1024
+ def add_mask_token_id(self, id: int) -> None:
1025
+ self.add_uint32(Keys.Tokenizer.MASK_ID, id)
1026
+
1027
+ def add_add_bos_token(self, value: bool) -> None:
1028
+ self.add_bool(Keys.Tokenizer.ADD_BOS, value)
1029
+
1030
+ def add_add_eos_token(self, value: bool) -> None:
1031
+ self.add_bool(Keys.Tokenizer.ADD_EOS, value)
1032
+
1033
+ def add_add_sep_token(self, value: bool) -> None:
1034
+ self.add_bool(Keys.Tokenizer.ADD_SEP, value)
1035
+
1036
+ def add_add_space_prefix(self, value: bool) -> None:
1037
+ self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
1038
+
1039
+ def add_remove_extra_whitespaces(self, value: bool) -> None:
1040
+ self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
1041
+
1042
+ def add_precompiled_charsmap(self, charsmap: bytes) -> None:
1043
+ self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
1044
+
1045
+ def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
1046
+ if not isinstance(value, str):
1047
+ template_default = None
1048
+ template_names = set()
1049
+
1050
+ for choice in value:
1051
+ name = choice.get('name', '')
1052
+ template = choice.get('template')
1053
+
1054
+ # Allowing non-alphanumerical characters in template name is probably not a good idea, so filter it
1055
+ name = ''.join((c if c in ascii_letters + digits else '_' for c in name))
1056
+
1057
+ if name and template is not None:
1058
+ if name == 'default':
1059
+ template_default = template
1060
+ else:
1061
+ template_names.add(name)
1062
+ self.add_string(Keys.Tokenizer.CHAT_TEMPLATE_N.format(name=name), template)
1063
+
1064
+ if template_names:
1065
+ self.add_array(Keys.Tokenizer.CHAT_TEMPLATES, list(template_names))
1066
+
1067
+ if template_default is None:
1068
+ return
1069
+
1070
+ value = template_default
1071
+
1072
+ self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
1073
+
1074
+ def add_eot_token_id(self, id: int) -> None:
1075
+ self.add_uint32(Keys.Tokenizer.EOT_ID, id)
1076
+
1077
+ def add_eom_token_id(self, id: int) -> None:
1078
+ self.add_uint32(Keys.Tokenizer.EOM_ID, id)
1079
+
1080
+ def add_classifier_output_labels(self, labels: Sequence[str]) -> None:
1081
+ self.add_array(Keys.Classifier.OUTPUT_LABELS.format(arch=self.arch), labels)
1082
+
1083
+ # for vision models
1084
+
1085
+ def add_clip_has_vision_encoder(self, value: bool) -> None:
1086
+ self.add_bool(Keys.Clip.HAS_VISION_ENCODER, value)
1087
+
1088
+ def add_clip_has_audio_encoder(self, value: bool) -> None:
1089
+ self.add_bool(Keys.Clip.HAS_AUDIO_ENCODER, value)
1090
+
1091
+ def add_clip_projector_type(self, value: str) -> None:
1092
+ self.add_string(Keys.Clip.PROJECTOR_TYPE, value)
1093
+
1094
+ def add_clip_vision_projector_type(self, value: str) -> None:
1095
+ self.add_string(Keys.ClipVision.PROJECTOR_TYPE, value)
1096
+
1097
+ def add_vision_projection_dim(self, value: int) -> None:
1098
+ self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value)
1099
+
1100
+ def add_vision_patch_size(self, value: int) -> None:
1101
+ self.add_uint32(Keys.ClipVision.PATCH_SIZE, value)
1102
+
1103
+ def add_vision_embedding_length(self, value: int) -> None:
1104
+ self.add_uint32(Keys.ClipVision.EMBEDDING_LENGTH, value)
1105
+
1106
+ def add_vision_feed_forward_length(self, value: int) -> None:
1107
+ self.add_uint32(Keys.ClipVision.FEED_FORWARD_LENGTH, value)
1108
+
1109
+ def add_vision_block_count(self, value: int) -> None:
1110
+ self.add_uint32(Keys.ClipVision.BLOCK_COUNT, value)
1111
+
1112
+ def add_vision_head_count(self, value: int) -> None:
1113
+ self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value)
1114
+
1115
+ def add_vision_attention_layernorm_eps(self, value: float) -> None:
1116
+ self.add_float41(Keys.ClipVision.Attention.LAYERNORM_EPS, value)
1117
+
1118
+ def add_vision_image_size(self, value: int) -> None:
1119
+ self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
1120
+
1121
+ def add_vision_preproc_image_size(self, value: int) -> None:
1122
+ self.add_uint32(Keys.ClipVision.PREPROC_IMAGE_SIZE, value)
1123
+
1124
+ def add_vision_image_mean(self, values: Sequence[float]) -> None:
1125
+ self.add_array(Keys.ClipVision.IMAGE_MEAN, values)
1126
+
1127
+ def add_vision_image_std(self, values: Sequence[float]) -> None:
1128
+ self.add_array(Keys.ClipVision.IMAGE_STD, values)
1129
+
1130
+ def add_vision_spatial_merge_size(self, value: int) -> None:
1131
+ self.add_uint32(Keys.ClipVision.SPATIAL_MERGE_SIZE, value)
1132
+
1133
+ def add_vision_use_gelu(self, value: bool) -> None:
1134
+ self.add_bool(Keys.ClipVision.USE_GELU, value)
1135
+
1136
+ def add_vision_use_silu(self, value: bool) -> None:
1137
+ self.add_bool(Keys.ClipVision.USE_SILU, value)
1138
+
1139
+ def add_vision_projector_scale_factor(self, value: int) -> None:
1140
+ self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value)
1141
+
1142
+ def add_vision_n_wa_pattern(self, value: int) -> None:
1143
+ """Add window attention pattern interval for vision models.
1144
+
1145
+ This defines the pattern interval for window attention vs full attention layers.
1146
+ For example, if n_wa_pattern=4, then layers 3, 7, 11, ... use full attention,
1147
+ while other layers use window attention.
1148
+
1149
+ Used by models like Qwen2.5-VL where full attention layers follow a regular pattern.
1150
+ """
1151
+ self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
1152
+
1153
+ def add_vision_wa_layer_indexes(self, layers: Sequence[int]) -> None:
1154
+ """Add explicit layer indexes that use full attention in vision models.
1155
+
1156
+ This specifies the exact layer indices (0-based) that should use full attention
1157
+ instead of window attention. All other layers will use window attention.
1158
+
1159
+ Args:
1160
+ layers: List of layer indices that use full attention (e.g., [3, 7, 11, 15])
1161
+
1162
+ Used by models like YoutuVL where full attention layers are explicitly specified
1163
+ rather than following a regular pattern.
1164
+
1165
+ Difference from add_vision_n_wa_pattern:
1166
+ - n_wa_pattern: Defines a regular interval pattern (every Nth layer uses full attention)
1167
+ - wa_layer_indexes: Explicitly lists which layers use full attention (irregular pattern)
1168
+ """
1169
+ self.add_array(Keys.ClipVision.WA_LAYER_INDEXES, layers)
1170
+
1171
+ def add_vision_is_deepstack_layers(self, layers: Sequence[bool]) -> None:
1172
+ self.add_array(Keys.ClipVision.IS_DEEPSTACK_LAYERS, layers)
1173
+
1174
+ def add_vision_window_size(self, value: int) -> None:
1175
+ self.add_uint32(Keys.ClipVision.WINDOW_SIZE, value)
1176
+
1177
+ # audio models
1178
+
1179
+ def add_clip_audio_projector_type(self, value: str) -> None:
1180
+ self.add_string(Keys.ClipAudio.PROJECTOR_TYPE, value)
1181
+
1182
+ def add_audio_projection_dim(self, value: int) -> None:
1183
+ self.add_uint32(Keys.ClipAudio.PROJECTION_DIM, value)
1184
+
1185
+ def add_audio_embedding_length(self, value: int) -> None:
1186
+ self.add_uint32(Keys.ClipAudio.EMBEDDING_LENGTH, value)
1187
+
1188
+ def add_audio_feed_forward_length(self, value: int) -> None:
1189
+ self.add_uint32(Keys.ClipAudio.FEED_FORWARD_LENGTH, value)
1190
+
1191
+ def add_audio_block_count(self, value: int) -> None:
1192
+ self.add_uint32(Keys.ClipAudio.BLOCK_COUNT, value)
1193
+
1194
+ def add_audio_head_count(self, value: int) -> None:
1195
+ self.add_uint32(Keys.ClipAudio.Attention.HEAD_COUNT, value)
1196
+
1197
+ def add_audio_attention_layernorm_eps(self, value: float) -> None:
1198
+ self.add_float41(Keys.ClipAudio.Attention.LAYERNORM_EPS, value)
1199
+
1200
+ def add_audio_num_mel_bins(self, value: int) -> None:
1201
+ self.add_uint32(Keys.ClipAudio.NUM_MEL_BINS, value)
1202
+
1203
+ def add_audio_stack_factor(self, value: int) -> None:
1204
+ self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
1205
+
1206
+ def add_xielu_alpha_p(self, values: Sequence[float]):
1207
+ self.add_array(Keys.xIELU.ALPHA_P, values)
1208
+
1209
+ def add_xielu_alpha_n(self, values: Sequence[float]):
1210
+ self.add_array(Keys.xIELU.ALPHA_N, values)
1211
+
1212
+ def add_xielu_beta(self, values: Sequence[float]):
1213
+ self.add_array(Keys.xIELU.BETA, values)
1214
+
1215
+ def add_xielu_eps(self, values: Sequence[float]):
1216
+ self.add_array(Keys.xIELU.EPS, values)
1217
+
1218
+ # diffusion models
1219
+
1220
+ def add_diffusion_shift_logits(self, value: bool) -> None:
1221
+ self.add_bool(Keys.Diffusion.SHIFT_LOGITS, value)
1222
+
1223
+ def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
1224
+ pack_prefix = ''
1225
+ if not skip_pack_prefix:
1226
+ pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
1227
+ return struct.pack(f'{pack_prefix}{fmt}', value)
1228
+
1229
+ def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: GGUFValueType | None = None) -> bytes:
1230
+ kv_data = bytearray()
1231
+
1232
+ if add_vtype:
1233
+ kv_data += self._pack("I", vtype)
1234
+
1235
+ pack_fmt = self._simple_value_packing.get(vtype)
1236
+ if pack_fmt is not None:
1237
+ kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL)
1238
+ elif vtype == GGUFValueType.STRING:
1239
+ encoded_val = val.encode("utf-8") if isinstance(val, str) else val
1240
+ kv_data += self._pack("Q", len(encoded_val))
1241
+ kv_data += encoded_val
1242
+ elif vtype == GGUFValueType.ARRAY:
1243
+
1244
+ if not isinstance(val, Sequence):
1245
+ raise ValueError("Invalid GGUF metadata array, expecting sequence")
1246
+
1247
+ if len(val) == 0:
1248
+ raise ValueError("Invalid GGUF metadata array. Empty array")
1249
+
1250
+ if sub_type is not None:
1251
+ ltype = sub_type
1252
+ elif isinstance(val, bytes):
1253
+ ltype = GGUFValueType.UINT8
1254
+ else:
1255
+ ltype = GGUFValueType.get_type(val[0])
1256
+ if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]):
1257
+ raise ValueError("All items in a GGUF array should be of the same type")
1258
+ kv_data += self._pack("I", ltype)
1259
+ kv_data += self._pack("Q", len(val))
1260
+ for item in val:
1261
+ kv_data += self._pack_val(item, ltype, add_vtype=False)
1262
+ else:
1263
+ raise ValueError("Invalid GGUF metadata value type or value")
1264
+
1265
+ return kv_data
1266
+
1267
+ @staticmethod
1268
+ def format_n_bytes_to_str(num: int) -> str:
1269
+ if num == 0:
1270
+ return "negligible - metadata only"
1271
+ fnum = float(num)
1272
+ for unit in ("", "K", "M", "G"):
1273
+ if abs(fnum) < 1000.0:
1274
+ return f"{fnum:3.1f}{unit}"
1275
+ fnum /= 1000.0
1276
+ return f"{fnum:.1f}T - over 1TB, split recommended"
llama-webui-clone.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8246b593545af3c942beff5c830a5c5a6e26fcc7b1e2cd57825e501f5edd9529
3
+ size 118468909