/** * SafeTensorsParser - Bộ Phân Tích SafeTensors * Parses SafeTensors binary files by reading the 8-byte little-endian header size, * decoding the JSON header, and extracting tensor info and metadata. * Requirements: 37.1, 37.2, 37.3, 37.4, 37.5, 37.6, 37.7 */ class SafeTensorsParser { constructor() { /** * Bytes per element for each SafeTensors dtype. * @type {Record} */ this.BYTES_PER_ELEMENT = { 'BOOL': 1, 'U8': 1, 'I8': 1, 'U16': 2, 'I16': 2, 'F16': 2, 'BF16': 2, 'I32': 4, 'U32': 4, 'F32': 4, 'F64': 8, 'I64': 8, 'U64': 8 }; } /** * Parse an ArrayBuffer containing a .safetensors file. * @param {ArrayBuffer} buffer - The raw file content * @returns {{ success: boolean, data?: { tensors: Array, metadata: Object|null, headerSize: number }, error?: string }} */ parse(buffer) { try { // Req 37.4: buffer must be at least 8 bytes if (!buffer || buffer.byteLength < 8) { return { success: false, error: 'Tệp không hợp lệ: không đủ dữ liệu để đọc header size' }; } // Req 37.1: Read first 8 bytes as little-endian uint64 (header size) const view = new DataView(buffer); const headerSize = view.getUint32(0, true); // High 32 bits — for files < 4GB this is 0 // const headerSizeHigh = view.getUint32(4, true); // Req 37.5: header size must not exceed remaining data if (headerSize > buffer.byteLength - 8) { return { success: false, error: 'Tệp không hợp lệ: header size lớn hơn dữ liệu có sẵn' }; } // Req 37.2: Read N bytes from offset 8, decode as UTF-8, parse as JSON const headerBytes = new Uint8Array(buffer, 8, headerSize); const headerString = new TextDecoder('utf-8').decode(headerBytes); let headerObj; try { headerObj = JSON.parse(headerString); } catch (_e) { // Req 37.6: invalid JSON return { success: false, error: 'Tệp không hợp lệ: header không phải JSON hợp lệ' }; } // Req 37.7: Separate __metadata__ from tensor entries const metadata = headerObj.__metadata__ || null; const tensors = []; for (const [name, info] of Object.entries(headerObj)) { if (name === '__metadata__') continue; const dtype = info.dtype || ''; const shape = info.shape || []; const dataOffsets = info.data_offsets || [0, 0]; const elementCount = this._computeElementCount(shape); const bytesPerEl = this.BYTES_PER_ELEMENT[dtype] || 1; const byteSize = elementCount * bytesPerEl; tensors.push({ name, dtype, shape, data_offsets: dataOffsets, elementCount, byteSize }); } // Req 37.3: Return parsed data return { success: true, data: { tensors, metadata, headerSize } }; } catch (err) { return { success: false, error: 'Tệp không hợp lệ: ' + (err.message || 'lỗi không xác định') }; } } /** * Compute total element count as the product of all shape dimensions. * Returns 0 for empty shapes (scalar tensors have shape []). * @param {number[]} shape * @returns {number} */ _computeElementCount(shape) { if (!shape || shape.length === 0) return 1; return shape.reduce((acc, dim) => acc * dim, 1); } } window.SafeTensorsParser = SafeTensorsParser;