Spaces:
Running
Running
| /** | |
| * SafeTensorsParser - Bộ Phân Tích SafeTensors | |
| * Parses SafeTensors binary files by reading the 8-byte little-endian header size, | |
| * decoding the JSON header, and extracting tensor info and metadata. | |
| * Requirements: 37.1, 37.2, 37.3, 37.4, 37.5, 37.6, 37.7 | |
| */ | |
| class SafeTensorsParser { | |
| constructor() { | |
| /** | |
| * Bytes per element for each SafeTensors dtype. | |
| * @type {Record<string, number>} | |
| */ | |
| this.BYTES_PER_ELEMENT = { | |
| 'BOOL': 1, | |
| 'U8': 1, | |
| 'I8': 1, | |
| 'U16': 2, | |
| 'I16': 2, | |
| 'F16': 2, | |
| 'BF16': 2, | |
| 'I32': 4, | |
| 'U32': 4, | |
| 'F32': 4, | |
| 'F64': 8, | |
| 'I64': 8, | |
| 'U64': 8 | |
| }; | |
| } | |
| /** | |
| * Parse an ArrayBuffer containing a .safetensors file. | |
| * @param {ArrayBuffer} buffer - The raw file content | |
| * @returns {{ success: boolean, data?: { tensors: Array, metadata: Object|null, headerSize: number }, error?: string }} | |
| */ | |
| parse(buffer) { | |
| try { | |
| // Req 37.4: buffer must be at least 8 bytes | |
| if (!buffer || buffer.byteLength < 8) { | |
| return { | |
| success: false, | |
| error: 'Tệp không hợp lệ: không đủ dữ liệu để đọc header size' | |
| }; | |
| } | |
| // Req 37.1: Read first 8 bytes as little-endian uint64 (header size) | |
| const view = new DataView(buffer); | |
| const headerSize = view.getUint32(0, true); | |
| // High 32 bits — for files < 4GB this is 0 | |
| // const headerSizeHigh = view.getUint32(4, true); | |
| // Req 37.5: header size must not exceed remaining data | |
| if (headerSize > buffer.byteLength - 8) { | |
| return { | |
| success: false, | |
| error: 'Tệp không hợp lệ: header size lớn hơn dữ liệu có sẵn' | |
| }; | |
| } | |
| // Req 37.2: Read N bytes from offset 8, decode as UTF-8, parse as JSON | |
| const headerBytes = new Uint8Array(buffer, 8, headerSize); | |
| const headerString = new TextDecoder('utf-8').decode(headerBytes); | |
| let headerObj; | |
| try { | |
| headerObj = JSON.parse(headerString); | |
| } catch (_e) { | |
| // Req 37.6: invalid JSON | |
| return { | |
| success: false, | |
| error: 'Tệp không hợp lệ: header không phải JSON hợp lệ' | |
| }; | |
| } | |
| // Req 37.7: Separate __metadata__ from tensor entries | |
| const metadata = headerObj.__metadata__ || null; | |
| const tensors = []; | |
| for (const [name, info] of Object.entries(headerObj)) { | |
| if (name === '__metadata__') continue; | |
| const dtype = info.dtype || ''; | |
| const shape = info.shape || []; | |
| const dataOffsets = info.data_offsets || [0, 0]; | |
| const elementCount = this._computeElementCount(shape); | |
| const bytesPerEl = this.BYTES_PER_ELEMENT[dtype] || 1; | |
| const byteSize = elementCount * bytesPerEl; | |
| tensors.push({ | |
| name, | |
| dtype, | |
| shape, | |
| data_offsets: dataOffsets, | |
| elementCount, | |
| byteSize | |
| }); | |
| } | |
| // Req 37.3: Return parsed data | |
| return { | |
| success: true, | |
| data: { | |
| tensors, | |
| metadata, | |
| headerSize | |
| } | |
| }; | |
| } catch (err) { | |
| return { | |
| success: false, | |
| error: 'Tệp không hợp lệ: ' + (err.message || 'lỗi không xác định') | |
| }; | |
| } | |
| } | |
| /** | |
| * Compute total element count as the product of all shape dimensions. | |
| * Returns 0 for empty shapes (scalar tensors have shape []). | |
| * @param {number[]} shape | |
| * @returns {number} | |
| */ | |
| _computeElementCount(shape) { | |
| if (!shape || shape.length === 0) return 1; | |
| return shape.reduce((acc, dim) => acc * dim, 1); | |
| } | |
| } | |
| window.SafeTensorsParser = SafeTensorsParser; | |