File size: 8,811 Bytes
9bd422a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/**
 * Unit tests for SafeTensorsParser
 * Validates: Requirements 37.1, 37.2, 37.3, 37.4, 37.5, 37.6, 37.7
 */

import { describe, it, expect, beforeEach } from 'vitest';

// ─── Re-implement the pure logic from SafeTensorsParser for testability ──

const BYTES_PER_ELEMENT = {
    'BOOL': 1, 'U8': 1, 'I8': 1,
    'U16': 2, 'I16': 2, 'F16': 2, 'BF16': 2,
    'I32': 4, 'U32': 4, 'F32': 4,
    'F64': 8, 'I64': 8, 'U64': 8
};

function computeElementCount(shape) {
    if (!shape || shape.length === 0) return 1;
    return shape.reduce((acc, dim) => acc * dim, 1);
}

/**
 * Build a fake .safetensors ArrayBuffer from a header object.
 */
function buildSafeTensorsBuffer(headerObj) {
    const headerStr = JSON.stringify(headerObj);
    const encoder = new TextEncoder();
    const headerBytes = encoder.encode(headerStr);
    const headerSize = headerBytes.byteLength;

    // 8 bytes for header size (little-endian uint64) + header bytes
    const totalSize = 8 + headerSize;
    const buffer = new ArrayBuffer(totalSize);
    const view = new DataView(buffer);
    // Write header size as little-endian uint32 (low 32 bits)
    view.setUint32(0, headerSize, true);
    // High 32 bits = 0
    view.setUint32(4, 0, true);

    const dest = new Uint8Array(buffer, 8, headerSize);
    dest.set(headerBytes);

    return buffer;
}

/**
 * Minimal parse function mirroring SafeTensorsParser.parse()
 */
function parse(buffer) {
    try {
        if (!buffer || buffer.byteLength < 8) {
            return { success: false, error: 'Tệp không hợp lệ: không đủ dữ liệu để đọc header size' };
        }
        const view = new DataView(buffer);
        const headerSize = view.getUint32(0, true);
        if (headerSize > buffer.byteLength - 8) {
            return { success: false, error: 'Tệp không hợp lệ: header size lớn hơn dữ liệu có sẵn' };
        }
        const headerBytes = new Uint8Array(buffer, 8, headerSize);
        const headerString = new TextDecoder('utf-8').decode(headerBytes);
        let headerObj;
        try {
            headerObj = JSON.parse(headerString);
        } catch (_e) {
            return { success: false, error: 'Tệp không hợp lệ: header không phải JSON hợp lệ' };
        }
        const metadata = headerObj.__metadata__ || null;
        const tensors = [];
        for (const [name, info] of Object.entries(headerObj)) {
            if (name === '__metadata__') continue;
            const dtype = info.dtype || '';
            const shape = info.shape || [];
            const dataOffsets = info.data_offsets || [0, 0];
            const elementCount = computeElementCount(shape);
            const bytesPerEl = BYTES_PER_ELEMENT[dtype] || 1;
            const byteSize = elementCount * bytesPerEl;
            tensors.push({ name, dtype, shape, data_offsets: dataOffsets, elementCount, byteSize });
        }
        return { success: true, data: { tensors, metadata, headerSize } };
    } catch (err) {
        return { success: false, error: 'Tệp không hợp lệ: ' + (err.message || 'lỗi không xác định') };
    }
}

// ─── Tests ──────────────────────────────────────────────────────────────

describe('SafeTensorsParser - parse', () => {
    describe('Error handling', () => {
        it('should return error for null buffer (Req 37.4)', () => {
            const result = parse(null);
            expect(result.success).toBe(false);
            expect(result.error).toContain('không đủ dữ liệu để đọc header size');
        });

        it('should return error for buffer smaller than 8 bytes (Req 37.4)', () => {
            const buffer = new ArrayBuffer(4);
            const result = parse(buffer);
            expect(result.success).toBe(false);
            expect(result.error).toContain('không đủ dữ liệu để đọc header size');
        });

        it('should return error when header size exceeds remaining data (Req 37.5)', () => {
            // Create buffer with 8 bytes header size pointing to 1000 bytes, but only 16 bytes total
            const buffer = new ArrayBuffer(16);
            const view = new DataView(buffer);
            view.setUint32(0, 1000, true); // header size = 1000
            view.setUint32(4, 0, true);
            const result = parse(buffer);
            expect(result.success).toBe(false);
            expect(result.error).toContain('header size lớn hơn dữ liệu có sẵn');
        });

        it('should return error for invalid JSON header (Req 37.6)', () => {
            // Build buffer with non-JSON content
            const invalidJson = 'this is not json{{{';
            const encoder = new TextEncoder();
            const headerBytes = encoder.encode(invalidJson);
            const buffer = new ArrayBuffer(8 + headerBytes.byteLength);
            const view = new DataView(buffer);
            view.setUint32(0, headerBytes.byteLength, true);
            view.setUint32(4, 0, true);
            new Uint8Array(buffer, 8).set(headerBytes);

            const result = parse(buffer);
            expect(result.success).toBe(false);
            expect(result.error).toContain('header không phải JSON hợp lệ');
        });
    });

    describe('Successful parsing', () => {
        it('should parse a valid safetensors buffer with tensors (Req 37.1, 37.2, 37.3)', () => {
            const header = {
                'weight': { dtype: 'F32', shape: [768, 768], data_offsets: [0, 2359296] },
                'bias': { dtype: 'F32', shape: [768], data_offsets: [2359296, 2362368] }
            };
            const buffer = buildSafeTensorsBuffer(header);
            const result = parse(buffer);

            expect(result.success).toBe(true);
            expect(result.data.tensors).toHaveLength(2);
            expect(result.data.metadata).toBeNull();

            const weight = result.data.tensors.find(t => t.name === 'weight');
            expect(weight.dtype).toBe('F32');
            expect(weight.shape).toEqual([768, 768]);
            expect(weight.elementCount).toBe(768 * 768);
            expect(weight.byteSize).toBe(768 * 768 * 4);
        });

        it('should separate __metadata__ from tensors (Req 37.7)', () => {
            const header = {
                '__metadata__': { format: 'pt', framework: 'pytorch' },
                'layer.weight': { dtype: 'F16', shape: [512, 256], data_offsets: [0, 262144] }
            };
            const buffer = buildSafeTensorsBuffer(header);
            const result = parse(buffer);

            expect(result.success).toBe(true);
            expect(result.data.tensors).toHaveLength(1);
            expect(result.data.tensors[0].name).toBe('layer.weight');
            expect(result.data.metadata).toEqual({ format: 'pt', framework: 'pytorch' });
        });

        it('should handle empty header (no tensors, no metadata)', () => {
            const buffer = buildSafeTensorsBuffer({});
            const result = parse(buffer);

            expect(result.success).toBe(true);
            expect(result.data.tensors).toHaveLength(0);
            expect(result.data.metadata).toBeNull();
        });

        it('should return correct headerSize', () => {
            const header = { 'x': { dtype: 'I8', shape: [10], data_offsets: [0, 10] } };
            const buffer = buildSafeTensorsBuffer(header);
            const result = parse(buffer);

            const expectedHeaderSize = new TextEncoder().encode(JSON.stringify(header)).byteLength;
            expect(result.data.headerSize).toBe(expectedHeaderSize);
        });
    });

    describe('Element count and byte size calculation', () => {
        it('should compute elementCount as product of shape', () => {
            expect(computeElementCount([3, 4, 5])).toBe(60);
            expect(computeElementCount([1])).toBe(1);
            expect(computeElementCount([])).toBe(1); // scalar
        });

        it('should compute correct byteSize for each dtype', () => {
            const dtypes = { 'F32': 4, 'F16': 2, 'BF16': 2, 'I8': 1, 'I64': 8, 'BOOL': 1, 'U32': 4, 'F64': 8 };
            const shape = [10, 20]; // 200 elements

            for (const [dtype, bpe] of Object.entries(dtypes)) {
                const header = { 't': { dtype, shape, data_offsets: [0, 200 * bpe] } };
                const buffer = buildSafeTensorsBuffer(header);
                const result = parse(buffer);

                expect(result.success).toBe(true);
                expect(result.data.tensors[0].elementCount).toBe(200);
                expect(result.data.tensors[0].byteSize).toBe(200 * bpe);
            }
        });
    });
});