model-explorer / js /core /safeTensorsParser.js
mr4's picture
Upload 71 files
9bd422a verified
/**
* SafeTensorsParser - Bộ Phân Tích SafeTensors
* Parses SafeTensors binary files by reading the 8-byte little-endian header size,
* decoding the JSON header, and extracting tensor info and metadata.
* Requirements: 37.1, 37.2, 37.3, 37.4, 37.5, 37.6, 37.7
*/
class SafeTensorsParser {
constructor() {
/**
* Bytes per element for each SafeTensors dtype.
* @type {Record<string, number>}
*/
this.BYTES_PER_ELEMENT = {
'BOOL': 1,
'U8': 1,
'I8': 1,
'U16': 2,
'I16': 2,
'F16': 2,
'BF16': 2,
'I32': 4,
'U32': 4,
'F32': 4,
'F64': 8,
'I64': 8,
'U64': 8
};
}
/**
* Parse an ArrayBuffer containing a .safetensors file.
* @param {ArrayBuffer} buffer - The raw file content
* @returns {{ success: boolean, data?: { tensors: Array, metadata: Object|null, headerSize: number }, error?: string }}
*/
parse(buffer) {
try {
// Req 37.4: buffer must be at least 8 bytes
if (!buffer || buffer.byteLength < 8) {
return {
success: false,
error: 'Tệp không hợp lệ: không đủ dữ liệu để đọc header size'
};
}
// Req 37.1: Read first 8 bytes as little-endian uint64 (header size)
const view = new DataView(buffer);
const headerSize = view.getUint32(0, true);
// High 32 bits — for files < 4GB this is 0
// const headerSizeHigh = view.getUint32(4, true);
// Req 37.5: header size must not exceed remaining data
if (headerSize > buffer.byteLength - 8) {
return {
success: false,
error: 'Tệp không hợp lệ: header size lớn hơn dữ liệu có sẵn'
};
}
// Req 37.2: Read N bytes from offset 8, decode as UTF-8, parse as JSON
const headerBytes = new Uint8Array(buffer, 8, headerSize);
const headerString = new TextDecoder('utf-8').decode(headerBytes);
let headerObj;
try {
headerObj = JSON.parse(headerString);
} catch (_e) {
// Req 37.6: invalid JSON
return {
success: false,
error: 'Tệp không hợp lệ: header không phải JSON hợp lệ'
};
}
// Req 37.7: Separate __metadata__ from tensor entries
const metadata = headerObj.__metadata__ || null;
const tensors = [];
for (const [name, info] of Object.entries(headerObj)) {
if (name === '__metadata__') continue;
const dtype = info.dtype || '';
const shape = info.shape || [];
const dataOffsets = info.data_offsets || [0, 0];
const elementCount = this._computeElementCount(shape);
const bytesPerEl = this.BYTES_PER_ELEMENT[dtype] || 1;
const byteSize = elementCount * bytesPerEl;
tensors.push({
name,
dtype,
shape,
data_offsets: dataOffsets,
elementCount,
byteSize
});
}
// Req 37.3: Return parsed data
return {
success: true,
data: {
tensors,
metadata,
headerSize
}
};
} catch (err) {
return {
success: false,
error: 'Tệp không hợp lệ: ' + (err.message || 'lỗi không xác định')
};
}
}
/**
* Compute total element count as the product of all shape dimensions.
* Returns 0 for empty shapes (scalar tensors have shape []).
* @param {number[]} shape
* @returns {number}
*/
_computeElementCount(shape) {
if (!shape || shape.length === 0) return 1;
return shape.reduce((acc, dim) => acc * dim, 1);
}
}
window.SafeTensorsParser = SafeTensorsParser;