Spaces:
Running
Running
File size: 4,373 Bytes
9bd422a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | /**
* SafeTensorsParser - Bộ Phân Tích SafeTensors
* Parses SafeTensors binary files by reading the 8-byte little-endian header size,
* decoding the JSON header, and extracting tensor info and metadata.
* Requirements: 37.1, 37.2, 37.3, 37.4, 37.5, 37.6, 37.7
*/
class SafeTensorsParser {
constructor() {
/**
* Bytes per element for each SafeTensors dtype.
* @type {Record<string, number>}
*/
this.BYTES_PER_ELEMENT = {
'BOOL': 1,
'U8': 1,
'I8': 1,
'U16': 2,
'I16': 2,
'F16': 2,
'BF16': 2,
'I32': 4,
'U32': 4,
'F32': 4,
'F64': 8,
'I64': 8,
'U64': 8
};
}
/**
* Parse an ArrayBuffer containing a .safetensors file.
* @param {ArrayBuffer} buffer - The raw file content
* @returns {{ success: boolean, data?: { tensors: Array, metadata: Object|null, headerSize: number }, error?: string }}
*/
parse(buffer) {
try {
// Req 37.4: buffer must be at least 8 bytes
if (!buffer || buffer.byteLength < 8) {
return {
success: false,
error: 'Tệp không hợp lệ: không đủ dữ liệu để đọc header size'
};
}
// Req 37.1: Read first 8 bytes as little-endian uint64 (header size)
const view = new DataView(buffer);
const headerSize = view.getUint32(0, true);
// High 32 bits — for files < 4GB this is 0
// const headerSizeHigh = view.getUint32(4, true);
// Req 37.5: header size must not exceed remaining data
if (headerSize > buffer.byteLength - 8) {
return {
success: false,
error: 'Tệp không hợp lệ: header size lớn hơn dữ liệu có sẵn'
};
}
// Req 37.2: Read N bytes from offset 8, decode as UTF-8, parse as JSON
const headerBytes = new Uint8Array(buffer, 8, headerSize);
const headerString = new TextDecoder('utf-8').decode(headerBytes);
let headerObj;
try {
headerObj = JSON.parse(headerString);
} catch (_e) {
// Req 37.6: invalid JSON
return {
success: false,
error: 'Tệp không hợp lệ: header không phải JSON hợp lệ'
};
}
// Req 37.7: Separate __metadata__ from tensor entries
const metadata = headerObj.__metadata__ || null;
const tensors = [];
for (const [name, info] of Object.entries(headerObj)) {
if (name === '__metadata__') continue;
const dtype = info.dtype || '';
const shape = info.shape || [];
const dataOffsets = info.data_offsets || [0, 0];
const elementCount = this._computeElementCount(shape);
const bytesPerEl = this.BYTES_PER_ELEMENT[dtype] || 1;
const byteSize = elementCount * bytesPerEl;
tensors.push({
name,
dtype,
shape,
data_offsets: dataOffsets,
elementCount,
byteSize
});
}
// Req 37.3: Return parsed data
return {
success: true,
data: {
tensors,
metadata,
headerSize
}
};
} catch (err) {
return {
success: false,
error: 'Tệp không hợp lệ: ' + (err.message || 'lỗi không xác định')
};
}
}
/**
* Compute total element count as the product of all shape dimensions.
* Returns 0 for empty shapes (scalar tensors have shape []).
* @param {number[]} shape
* @returns {number}
*/
_computeElementCount(shape) {
if (!shape || shape.length === 0) return 1;
return shape.reduce((acc, dim) => acc * dim, 1);
}
}
window.SafeTensorsParser = SafeTensorsParser;
|