model-explorer / js /tests /safeTensorsParser.test.js
mr4's picture
Upload 71 files
9bd422a verified
/**
* Unit tests for SafeTensorsParser
* Validates: Requirements 37.1, 37.2, 37.3, 37.4, 37.5, 37.6, 37.7
*/
import { describe, it, expect, beforeEach } from 'vitest';
// ─── Re-implement the pure logic from SafeTensorsParser for testability ──
const BYTES_PER_ELEMENT = {
'BOOL': 1, 'U8': 1, 'I8': 1,
'U16': 2, 'I16': 2, 'F16': 2, 'BF16': 2,
'I32': 4, 'U32': 4, 'F32': 4,
'F64': 8, 'I64': 8, 'U64': 8
};
function computeElementCount(shape) {
if (!shape || shape.length === 0) return 1;
return shape.reduce((acc, dim) => acc * dim, 1);
}
/**
* Build a fake .safetensors ArrayBuffer from a header object.
*/
function buildSafeTensorsBuffer(headerObj) {
const headerStr = JSON.stringify(headerObj);
const encoder = new TextEncoder();
const headerBytes = encoder.encode(headerStr);
const headerSize = headerBytes.byteLength;
// 8 bytes for header size (little-endian uint64) + header bytes
const totalSize = 8 + headerSize;
const buffer = new ArrayBuffer(totalSize);
const view = new DataView(buffer);
// Write header size as little-endian uint32 (low 32 bits)
view.setUint32(0, headerSize, true);
// High 32 bits = 0
view.setUint32(4, 0, true);
const dest = new Uint8Array(buffer, 8, headerSize);
dest.set(headerBytes);
return buffer;
}
/**
* Minimal parse function mirroring SafeTensorsParser.parse()
*/
function parse(buffer) {
try {
if (!buffer || buffer.byteLength < 8) {
return { success: false, error: 'Tệp khΓ΄ng hợp lệ: khΓ΄ng Δ‘α»§ dα»― liệu để đọc header size' };
}
const view = new DataView(buffer);
const headerSize = view.getUint32(0, true);
if (headerSize > buffer.byteLength - 8) {
return { success: false, error: 'Tệp khΓ΄ng hợp lệ: header size lα»›n hΖ‘n dα»― liệu cΓ³ sαΊ΅n' };
}
const headerBytes = new Uint8Array(buffer, 8, headerSize);
const headerString = new TextDecoder('utf-8').decode(headerBytes);
let headerObj;
try {
headerObj = JSON.parse(headerString);
} catch (_e) {
return { success: false, error: 'Tệp khΓ΄ng hợp lệ: header khΓ΄ng phαΊ£i JSON hợp lệ' };
}
const metadata = headerObj.__metadata__ || null;
const tensors = [];
for (const [name, info] of Object.entries(headerObj)) {
if (name === '__metadata__') continue;
const dtype = info.dtype || '';
const shape = info.shape || [];
const dataOffsets = info.data_offsets || [0, 0];
const elementCount = computeElementCount(shape);
const bytesPerEl = BYTES_PER_ELEMENT[dtype] || 1;
const byteSize = elementCount * bytesPerEl;
tensors.push({ name, dtype, shape, data_offsets: dataOffsets, elementCount, byteSize });
}
return { success: true, data: { tensors, metadata, headerSize } };
} catch (err) {
return { success: false, error: 'Tệp khΓ΄ng hợp lệ: ' + (err.message || 'lα»—i khΓ΄ng xΓ‘c Δ‘α»‹nh') };
}
}
// ─── Tests ──────────────────────────────────────────────────────────────
describe('SafeTensorsParser - parse', () => {
describe('Error handling', () => {
it('should return error for null buffer (Req 37.4)', () => {
const result = parse(null);
expect(result.success).toBe(false);
expect(result.error).toContain('khΓ΄ng Δ‘α»§ dα»― liệu để đọc header size');
});
it('should return error for buffer smaller than 8 bytes (Req 37.4)', () => {
const buffer = new ArrayBuffer(4);
const result = parse(buffer);
expect(result.success).toBe(false);
expect(result.error).toContain('khΓ΄ng Δ‘α»§ dα»― liệu để đọc header size');
});
it('should return error when header size exceeds remaining data (Req 37.5)', () => {
// Create buffer with 8 bytes header size pointing to 1000 bytes, but only 16 bytes total
const buffer = new ArrayBuffer(16);
const view = new DataView(buffer);
view.setUint32(0, 1000, true); // header size = 1000
view.setUint32(4, 0, true);
const result = parse(buffer);
expect(result.success).toBe(false);
expect(result.error).toContain('header size lα»›n hΖ‘n dα»― liệu cΓ³ sαΊ΅n');
});
it('should return error for invalid JSON header (Req 37.6)', () => {
// Build buffer with non-JSON content
const invalidJson = 'this is not json{{{';
const encoder = new TextEncoder();
const headerBytes = encoder.encode(invalidJson);
const buffer = new ArrayBuffer(8 + headerBytes.byteLength);
const view = new DataView(buffer);
view.setUint32(0, headerBytes.byteLength, true);
view.setUint32(4, 0, true);
new Uint8Array(buffer, 8).set(headerBytes);
const result = parse(buffer);
expect(result.success).toBe(false);
expect(result.error).toContain('header khΓ΄ng phαΊ£i JSON hợp lệ');
});
});
describe('Successful parsing', () => {
it('should parse a valid safetensors buffer with tensors (Req 37.1, 37.2, 37.3)', () => {
const header = {
'weight': { dtype: 'F32', shape: [768, 768], data_offsets: [0, 2359296] },
'bias': { dtype: 'F32', shape: [768], data_offsets: [2359296, 2362368] }
};
const buffer = buildSafeTensorsBuffer(header);
const result = parse(buffer);
expect(result.success).toBe(true);
expect(result.data.tensors).toHaveLength(2);
expect(result.data.metadata).toBeNull();
const weight = result.data.tensors.find(t => t.name === 'weight');
expect(weight.dtype).toBe('F32');
expect(weight.shape).toEqual([768, 768]);
expect(weight.elementCount).toBe(768 * 768);
expect(weight.byteSize).toBe(768 * 768 * 4);
});
it('should separate __metadata__ from tensors (Req 37.7)', () => {
const header = {
'__metadata__': { format: 'pt', framework: 'pytorch' },
'layer.weight': { dtype: 'F16', shape: [512, 256], data_offsets: [0, 262144] }
};
const buffer = buildSafeTensorsBuffer(header);
const result = parse(buffer);
expect(result.success).toBe(true);
expect(result.data.tensors).toHaveLength(1);
expect(result.data.tensors[0].name).toBe('layer.weight');
expect(result.data.metadata).toEqual({ format: 'pt', framework: 'pytorch' });
});
it('should handle empty header (no tensors, no metadata)', () => {
const buffer = buildSafeTensorsBuffer({});
const result = parse(buffer);
expect(result.success).toBe(true);
expect(result.data.tensors).toHaveLength(0);
expect(result.data.metadata).toBeNull();
});
it('should return correct headerSize', () => {
const header = { 'x': { dtype: 'I8', shape: [10], data_offsets: [0, 10] } };
const buffer = buildSafeTensorsBuffer(header);
const result = parse(buffer);
const expectedHeaderSize = new TextEncoder().encode(JSON.stringify(header)).byteLength;
expect(result.data.headerSize).toBe(expectedHeaderSize);
});
});
describe('Element count and byte size calculation', () => {
it('should compute elementCount as product of shape', () => {
expect(computeElementCount([3, 4, 5])).toBe(60);
expect(computeElementCount([1])).toBe(1);
expect(computeElementCount([])).toBe(1); // scalar
});
it('should compute correct byteSize for each dtype', () => {
const dtypes = { 'F32': 4, 'F16': 2, 'BF16': 2, 'I8': 1, 'I64': 8, 'BOOL': 1, 'U32': 4, 'F64': 8 };
const shape = [10, 20]; // 200 elements
for (const [dtype, bpe] of Object.entries(dtypes)) {
const header = { 't': { dtype, shape, data_offsets: [0, 200 * bpe] } };
const buffer = buildSafeTensorsBuffer(header);
const result = parse(buffer);
expect(result.success).toBe(true);
expect(result.data.tensors[0].elementCount).toBe(200);
expect(result.data.tensors[0].byteSize).toBe(200 * bpe);
}
});
});
});