/** * Unit tests for SafeTensorsParser * Validates: Requirements 37.1, 37.2, 37.3, 37.4, 37.5, 37.6, 37.7 */ import { describe, it, expect, beforeEach } from 'vitest'; // ─── Re-implement the pure logic from SafeTensorsParser for testability ── const BYTES_PER_ELEMENT = { 'BOOL': 1, 'U8': 1, 'I8': 1, 'U16': 2, 'I16': 2, 'F16': 2, 'BF16': 2, 'I32': 4, 'U32': 4, 'F32': 4, 'F64': 8, 'I64': 8, 'U64': 8 }; function computeElementCount(shape) { if (!shape || shape.length === 0) return 1; return shape.reduce((acc, dim) => acc * dim, 1); } /** * Build a fake .safetensors ArrayBuffer from a header object. */ function buildSafeTensorsBuffer(headerObj) { const headerStr = JSON.stringify(headerObj); const encoder = new TextEncoder(); const headerBytes = encoder.encode(headerStr); const headerSize = headerBytes.byteLength; // 8 bytes for header size (little-endian uint64) + header bytes const totalSize = 8 + headerSize; const buffer = new ArrayBuffer(totalSize); const view = new DataView(buffer); // Write header size as little-endian uint32 (low 32 bits) view.setUint32(0, headerSize, true); // High 32 bits = 0 view.setUint32(4, 0, true); const dest = new Uint8Array(buffer, 8, headerSize); dest.set(headerBytes); return buffer; } /** * Minimal parse function mirroring SafeTensorsParser.parse() */ function parse(buffer) { try { if (!buffer || buffer.byteLength < 8) { return { success: false, error: 'Tệp không hợp lệ: không đủ dữ liệu để đọc header size' }; } const view = new DataView(buffer); const headerSize = view.getUint32(0, true); if (headerSize > buffer.byteLength - 8) { return { success: false, error: 'Tệp không hợp lệ: header size lớn hơn dữ liệu có sẵn' }; } const headerBytes = new Uint8Array(buffer, 8, headerSize); const headerString = new TextDecoder('utf-8').decode(headerBytes); let headerObj; try { headerObj = JSON.parse(headerString); } catch (_e) { return { success: false, error: 'Tệp không hợp lệ: header không phải JSON hợp lệ' }; } const metadata = headerObj.__metadata__ || null; const tensors = []; for (const [name, info] of Object.entries(headerObj)) { if (name === '__metadata__') continue; const dtype = info.dtype || ''; const shape = info.shape || []; const dataOffsets = info.data_offsets || [0, 0]; const elementCount = computeElementCount(shape); const bytesPerEl = BYTES_PER_ELEMENT[dtype] || 1; const byteSize = elementCount * bytesPerEl; tensors.push({ name, dtype, shape, data_offsets: dataOffsets, elementCount, byteSize }); } return { success: true, data: { tensors, metadata, headerSize } }; } catch (err) { return { success: false, error: 'Tệp không hợp lệ: ' + (err.message || 'lỗi không xác định') }; } } // ─── Tests ────────────────────────────────────────────────────────────── describe('SafeTensorsParser - parse', () => { describe('Error handling', () => { it('should return error for null buffer (Req 37.4)', () => { const result = parse(null); expect(result.success).toBe(false); expect(result.error).toContain('không đủ dữ liệu để đọc header size'); }); it('should return error for buffer smaller than 8 bytes (Req 37.4)', () => { const buffer = new ArrayBuffer(4); const result = parse(buffer); expect(result.success).toBe(false); expect(result.error).toContain('không đủ dữ liệu để đọc header size'); }); it('should return error when header size exceeds remaining data (Req 37.5)', () => { // Create buffer with 8 bytes header size pointing to 1000 bytes, but only 16 bytes total const buffer = new ArrayBuffer(16); const view = new DataView(buffer); view.setUint32(0, 1000, true); // header size = 1000 view.setUint32(4, 0, true); const result = parse(buffer); expect(result.success).toBe(false); expect(result.error).toContain('header size lớn hơn dữ liệu có sẵn'); }); it('should return error for invalid JSON header (Req 37.6)', () => { // Build buffer with non-JSON content const invalidJson = 'this is not json{{{'; const encoder = new TextEncoder(); const headerBytes = encoder.encode(invalidJson); const buffer = new ArrayBuffer(8 + headerBytes.byteLength); const view = new DataView(buffer); view.setUint32(0, headerBytes.byteLength, true); view.setUint32(4, 0, true); new Uint8Array(buffer, 8).set(headerBytes); const result = parse(buffer); expect(result.success).toBe(false); expect(result.error).toContain('header không phải JSON hợp lệ'); }); }); describe('Successful parsing', () => { it('should parse a valid safetensors buffer with tensors (Req 37.1, 37.2, 37.3)', () => { const header = { 'weight': { dtype: 'F32', shape: [768, 768], data_offsets: [0, 2359296] }, 'bias': { dtype: 'F32', shape: [768], data_offsets: [2359296, 2362368] } }; const buffer = buildSafeTensorsBuffer(header); const result = parse(buffer); expect(result.success).toBe(true); expect(result.data.tensors).toHaveLength(2); expect(result.data.metadata).toBeNull(); const weight = result.data.tensors.find(t => t.name === 'weight'); expect(weight.dtype).toBe('F32'); expect(weight.shape).toEqual([768, 768]); expect(weight.elementCount).toBe(768 * 768); expect(weight.byteSize).toBe(768 * 768 * 4); }); it('should separate __metadata__ from tensors (Req 37.7)', () => { const header = { '__metadata__': { format: 'pt', framework: 'pytorch' }, 'layer.weight': { dtype: 'F16', shape: [512, 256], data_offsets: [0, 262144] } }; const buffer = buildSafeTensorsBuffer(header); const result = parse(buffer); expect(result.success).toBe(true); expect(result.data.tensors).toHaveLength(1); expect(result.data.tensors[0].name).toBe('layer.weight'); expect(result.data.metadata).toEqual({ format: 'pt', framework: 'pytorch' }); }); it('should handle empty header (no tensors, no metadata)', () => { const buffer = buildSafeTensorsBuffer({}); const result = parse(buffer); expect(result.success).toBe(true); expect(result.data.tensors).toHaveLength(0); expect(result.data.metadata).toBeNull(); }); it('should return correct headerSize', () => { const header = { 'x': { dtype: 'I8', shape: [10], data_offsets: [0, 10] } }; const buffer = buildSafeTensorsBuffer(header); const result = parse(buffer); const expectedHeaderSize = new TextEncoder().encode(JSON.stringify(header)).byteLength; expect(result.data.headerSize).toBe(expectedHeaderSize); }); }); describe('Element count and byte size calculation', () => { it('should compute elementCount as product of shape', () => { expect(computeElementCount([3, 4, 5])).toBe(60); expect(computeElementCount([1])).toBe(1); expect(computeElementCount([])).toBe(1); // scalar }); it('should compute correct byteSize for each dtype', () => { const dtypes = { 'F32': 4, 'F16': 2, 'BF16': 2, 'I8': 1, 'I64': 8, 'BOOL': 1, 'U32': 4, 'F64': 8 }; const shape = [10, 20]; // 200 elements for (const [dtype, bpe] of Object.entries(dtypes)) { const header = { 't': { dtype, shape, data_offsets: [0, 200 * bpe] } }; const buffer = buildSafeTensorsBuffer(header); const result = parse(buffer); expect(result.success).toBe(true); expect(result.data.tensors[0].elementCount).toBe(200); expect(result.data.tensors[0].byteSize).toBe(200 * bpe); } }); }); });