File size: 4,373 Bytes
9bd422a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/**
 * SafeTensorsParser - Bộ Phân Tích SafeTensors
 * Parses SafeTensors binary files by reading the 8-byte little-endian header size,
 * decoding the JSON header, and extracting tensor info and metadata.
 * Requirements: 37.1, 37.2, 37.3, 37.4, 37.5, 37.6, 37.7
 */

class SafeTensorsParser {
    constructor() {
        /**
         * Bytes per element for each SafeTensors dtype.
         * @type {Record<string, number>}
         */
        this.BYTES_PER_ELEMENT = {
            'BOOL': 1,
            'U8':   1,
            'I8':   1,
            'U16':  2,
            'I16':  2,
            'F16':  2,
            'BF16': 2,
            'I32':  4,
            'U32':  4,
            'F32':  4,
            'F64':  8,
            'I64':  8,
            'U64':  8
        };
    }

    /**
     * Parse an ArrayBuffer containing a .safetensors file.
     * @param {ArrayBuffer} buffer - The raw file content
     * @returns {{ success: boolean, data?: { tensors: Array, metadata: Object|null, headerSize: number }, error?: string }}
     */
    parse(buffer) {
        try {
            // Req 37.4: buffer must be at least 8 bytes
            if (!buffer || buffer.byteLength < 8) {
                return {
                    success: false,
                    error: 'Tệp không hợp lệ: không đủ dữ liệu để đọc header size'
                };
            }

            // Req 37.1: Read first 8 bytes as little-endian uint64 (header size)
            const view = new DataView(buffer);
            const headerSize = view.getUint32(0, true);
            // High 32 bits — for files < 4GB this is 0
            // const headerSizeHigh = view.getUint32(4, true);

            // Req 37.5: header size must not exceed remaining data
            if (headerSize > buffer.byteLength - 8) {
                return {
                    success: false,
                    error: 'Tệp không hợp lệ: header size lớn hơn dữ liệu có sẵn'
                };
            }

            // Req 37.2: Read N bytes from offset 8, decode as UTF-8, parse as JSON
            const headerBytes = new Uint8Array(buffer, 8, headerSize);
            const headerString = new TextDecoder('utf-8').decode(headerBytes);

            let headerObj;
            try {
                headerObj = JSON.parse(headerString);
            } catch (_e) {
                // Req 37.6: invalid JSON
                return {
                    success: false,
                    error: 'Tệp không hợp lệ: header không phải JSON hợp lệ'
                };
            }

            // Req 37.7: Separate __metadata__ from tensor entries
            const metadata = headerObj.__metadata__ || null;
            const tensors = [];

            for (const [name, info] of Object.entries(headerObj)) {
                if (name === '__metadata__') continue;

                const dtype = info.dtype || '';
                const shape = info.shape || [];
                const dataOffsets = info.data_offsets || [0, 0];

                const elementCount = this._computeElementCount(shape);
                const bytesPerEl = this.BYTES_PER_ELEMENT[dtype] || 1;
                const byteSize = elementCount * bytesPerEl;

                tensors.push({
                    name,
                    dtype,
                    shape,
                    data_offsets: dataOffsets,
                    elementCount,
                    byteSize
                });
            }

            // Req 37.3: Return parsed data
            return {
                success: true,
                data: {
                    tensors,
                    metadata,
                    headerSize
                }
            };
        } catch (err) {
            return {
                success: false,
                error: 'Tệp không hợp lệ: ' + (err.message || 'lỗi không xác định')
            };
        }
    }

    /**
     * Compute total element count as the product of all shape dimensions.
     * Returns 0 for empty shapes (scalar tensors have shape []).
     * @param {number[]} shape
     * @returns {number}
     */
    _computeElementCount(shape) {
        if (!shape || shape.length === 0) return 1;
        return shape.reduce((acc, dim) => acc * dim, 1);
    }
}

window.SafeTensorsParser = SafeTensorsParser;