Spaces:
Running
Running
| /** | |
| * Unit tests for SafeTensorsParser | |
| * Validates: Requirements 37.1, 37.2, 37.3, 37.4, 37.5, 37.6, 37.7 | |
| */ | |
| import { describe, it, expect, beforeEach } from 'vitest'; | |
| // βββ Re-implement the pure logic from SafeTensorsParser for testability ββ | |
| const BYTES_PER_ELEMENT = { | |
| 'BOOL': 1, 'U8': 1, 'I8': 1, | |
| 'U16': 2, 'I16': 2, 'F16': 2, 'BF16': 2, | |
| 'I32': 4, 'U32': 4, 'F32': 4, | |
| 'F64': 8, 'I64': 8, 'U64': 8 | |
| }; | |
| function computeElementCount(shape) { | |
| if (!shape || shape.length === 0) return 1; | |
| return shape.reduce((acc, dim) => acc * dim, 1); | |
| } | |
| /** | |
| * Build a fake .safetensors ArrayBuffer from a header object. | |
| */ | |
| function buildSafeTensorsBuffer(headerObj) { | |
| const headerStr = JSON.stringify(headerObj); | |
| const encoder = new TextEncoder(); | |
| const headerBytes = encoder.encode(headerStr); | |
| const headerSize = headerBytes.byteLength; | |
| // 8 bytes for header size (little-endian uint64) + header bytes | |
| const totalSize = 8 + headerSize; | |
| const buffer = new ArrayBuffer(totalSize); | |
| const view = new DataView(buffer); | |
| // Write header size as little-endian uint32 (low 32 bits) | |
| view.setUint32(0, headerSize, true); | |
| // High 32 bits = 0 | |
| view.setUint32(4, 0, true); | |
| const dest = new Uint8Array(buffer, 8, headerSize); | |
| dest.set(headerBytes); | |
| return buffer; | |
| } | |
| /** | |
| * Minimal parse function mirroring SafeTensorsParser.parse() | |
| */ | |
| function parse(buffer) { | |
| try { | |
| if (!buffer || buffer.byteLength < 8) { | |
| return { success: false, error: 'Tα»p khΓ΄ng hợp lα»: khΓ΄ng Δα»§ dα»― liα»u Δα» Δα»c header size' }; | |
| } | |
| const view = new DataView(buffer); | |
| const headerSize = view.getUint32(0, true); | |
| if (headerSize > buffer.byteLength - 8) { | |
| return { success: false, error: 'Tα»p khΓ΄ng hợp lα»: header size lα»n hΖ‘n dα»― liα»u cΓ³ sαΊ΅n' }; | |
| } | |
| const headerBytes = new Uint8Array(buffer, 8, headerSize); | |
| const headerString = new TextDecoder('utf-8').decode(headerBytes); | |
| let headerObj; | |
| try { | |
| headerObj = JSON.parse(headerString); | |
| } catch (_e) { | |
| return { success: false, error: 'Tα»p khΓ΄ng hợp lα»: header khΓ΄ng phαΊ£i JSON hợp lα»' }; | |
| } | |
| const metadata = headerObj.__metadata__ || null; | |
| const tensors = []; | |
| for (const [name, info] of Object.entries(headerObj)) { | |
| if (name === '__metadata__') continue; | |
| const dtype = info.dtype || ''; | |
| const shape = info.shape || []; | |
| const dataOffsets = info.data_offsets || [0, 0]; | |
| const elementCount = computeElementCount(shape); | |
| const bytesPerEl = BYTES_PER_ELEMENT[dtype] || 1; | |
| const byteSize = elementCount * bytesPerEl; | |
| tensors.push({ name, dtype, shape, data_offsets: dataOffsets, elementCount, byteSize }); | |
| } | |
| return { success: true, data: { tensors, metadata, headerSize } }; | |
| } catch (err) { | |
| return { success: false, error: 'Tα»p khΓ΄ng hợp lα»: ' + (err.message || 'lα»i khΓ΄ng xΓ‘c Δα»nh') }; | |
| } | |
| } | |
| // βββ Tests ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| describe('SafeTensorsParser - parse', () => { | |
| describe('Error handling', () => { | |
| it('should return error for null buffer (Req 37.4)', () => { | |
| const result = parse(null); | |
| expect(result.success).toBe(false); | |
| expect(result.error).toContain('khΓ΄ng Δα»§ dα»― liα»u Δα» Δα»c header size'); | |
| }); | |
| it('should return error for buffer smaller than 8 bytes (Req 37.4)', () => { | |
| const buffer = new ArrayBuffer(4); | |
| const result = parse(buffer); | |
| expect(result.success).toBe(false); | |
| expect(result.error).toContain('khΓ΄ng Δα»§ dα»― liα»u Δα» Δα»c header size'); | |
| }); | |
| it('should return error when header size exceeds remaining data (Req 37.5)', () => { | |
| // Create buffer with 8 bytes header size pointing to 1000 bytes, but only 16 bytes total | |
| const buffer = new ArrayBuffer(16); | |
| const view = new DataView(buffer); | |
| view.setUint32(0, 1000, true); // header size = 1000 | |
| view.setUint32(4, 0, true); | |
| const result = parse(buffer); | |
| expect(result.success).toBe(false); | |
| expect(result.error).toContain('header size lα»n hΖ‘n dα»― liα»u cΓ³ sαΊ΅n'); | |
| }); | |
| it('should return error for invalid JSON header (Req 37.6)', () => { | |
| // Build buffer with non-JSON content | |
| const invalidJson = 'this is not json{{{'; | |
| const encoder = new TextEncoder(); | |
| const headerBytes = encoder.encode(invalidJson); | |
| const buffer = new ArrayBuffer(8 + headerBytes.byteLength); | |
| const view = new DataView(buffer); | |
| view.setUint32(0, headerBytes.byteLength, true); | |
| view.setUint32(4, 0, true); | |
| new Uint8Array(buffer, 8).set(headerBytes); | |
| const result = parse(buffer); | |
| expect(result.success).toBe(false); | |
| expect(result.error).toContain('header khΓ΄ng phαΊ£i JSON hợp lα»'); | |
| }); | |
| }); | |
| describe('Successful parsing', () => { | |
| it('should parse a valid safetensors buffer with tensors (Req 37.1, 37.2, 37.3)', () => { | |
| const header = { | |
| 'weight': { dtype: 'F32', shape: [768, 768], data_offsets: [0, 2359296] }, | |
| 'bias': { dtype: 'F32', shape: [768], data_offsets: [2359296, 2362368] } | |
| }; | |
| const buffer = buildSafeTensorsBuffer(header); | |
| const result = parse(buffer); | |
| expect(result.success).toBe(true); | |
| expect(result.data.tensors).toHaveLength(2); | |
| expect(result.data.metadata).toBeNull(); | |
| const weight = result.data.tensors.find(t => t.name === 'weight'); | |
| expect(weight.dtype).toBe('F32'); | |
| expect(weight.shape).toEqual([768, 768]); | |
| expect(weight.elementCount).toBe(768 * 768); | |
| expect(weight.byteSize).toBe(768 * 768 * 4); | |
| }); | |
| it('should separate __metadata__ from tensors (Req 37.7)', () => { | |
| const header = { | |
| '__metadata__': { format: 'pt', framework: 'pytorch' }, | |
| 'layer.weight': { dtype: 'F16', shape: [512, 256], data_offsets: [0, 262144] } | |
| }; | |
| const buffer = buildSafeTensorsBuffer(header); | |
| const result = parse(buffer); | |
| expect(result.success).toBe(true); | |
| expect(result.data.tensors).toHaveLength(1); | |
| expect(result.data.tensors[0].name).toBe('layer.weight'); | |
| expect(result.data.metadata).toEqual({ format: 'pt', framework: 'pytorch' }); | |
| }); | |
| it('should handle empty header (no tensors, no metadata)', () => { | |
| const buffer = buildSafeTensorsBuffer({}); | |
| const result = parse(buffer); | |
| expect(result.success).toBe(true); | |
| expect(result.data.tensors).toHaveLength(0); | |
| expect(result.data.metadata).toBeNull(); | |
| }); | |
| it('should return correct headerSize', () => { | |
| const header = { 'x': { dtype: 'I8', shape: [10], data_offsets: [0, 10] } }; | |
| const buffer = buildSafeTensorsBuffer(header); | |
| const result = parse(buffer); | |
| const expectedHeaderSize = new TextEncoder().encode(JSON.stringify(header)).byteLength; | |
| expect(result.data.headerSize).toBe(expectedHeaderSize); | |
| }); | |
| }); | |
| describe('Element count and byte size calculation', () => { | |
| it('should compute elementCount as product of shape', () => { | |
| expect(computeElementCount([3, 4, 5])).toBe(60); | |
| expect(computeElementCount([1])).toBe(1); | |
| expect(computeElementCount([])).toBe(1); // scalar | |
| }); | |
| it('should compute correct byteSize for each dtype', () => { | |
| const dtypes = { 'F32': 4, 'F16': 2, 'BF16': 2, 'I8': 1, 'I64': 8, 'BOOL': 1, 'U32': 4, 'F64': 8 }; | |
| const shape = [10, 20]; // 200 elements | |
| for (const [dtype, bpe] of Object.entries(dtypes)) { | |
| const header = { 't': { dtype, shape, data_offsets: [0, 200 * bpe] } }; | |
| const buffer = buildSafeTensorsBuffer(header); | |
| const result = parse(buffer); | |
| expect(result.success).toBe(true); | |
| expect(result.data.tensors[0].elementCount).toBe(200); | |
| expect(result.data.tensors[0].byteSize).toBe(200 * bpe); | |
| } | |
| }); | |
| }); | |
| }); | |