Spaces:

mr4
/

model-explorer

Running

File size: 21,570 Bytes

9bd422a

/**
 * TFLiteParser - Bộ Phân Tích TFLite
 * Parses TFLite (.tflite) FlatBuffer binary files using DataView,
 * extracting model metadata, operators, tensors, and subgraph info.
 * Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6
 */

class TFLiteParser {
    constructor() {
        /**
         * TFLite BuiltinOperator enum lookup table.
         * Maps numeric code → operator name string.
         * Source: TFLite schema.fbs BuiltinOperator enum
         * @type {Record<number, string>}
         */
        this.BUILTIN_OPERATORS = {
            0: 'ADD',
            1: 'AVERAGE_POOL_2D',
            2: 'CONCATENATION',
            3: 'CONV_2D',
            4: 'DEPTHWISE_CONV_2D',
            5: 'DEPTH_TO_SPACE',
            6: 'DEQUANTIZE',
            7: 'EMBEDDING_LOOKUP',
            8: 'FLOOR',
            9: 'FULLY_CONNECTED',
            10: 'HASHTABLE_LOOKUP',
            11: 'L2_NORMALIZATION',
            12: 'L2_POOL_2D',
            13: 'LOCAL_RESPONSE_NORMALIZATION',
            14: 'LOGISTIC',
            15: 'LSH_PROJECTION',
            16: 'LSTM',
            17: 'MAX_POOL_2D',
            18: 'MUL',
            19: 'RELU',
            20: 'RELU_N1_TO_1',
            21: 'RELU6',
            22: 'RESHAPE',
            23: 'RESIZE_BILINEAR',
            24: 'RNN',
            25: 'SOFTMAX',
            26: 'SPACE_TO_DEPTH',
            27: 'SVDF',
            28: 'TANH',
            29: 'CONCAT_EMBEDDINGS',
            30: 'SKIP_GRAM',
            31: 'CALL',
            32: 'CUSTOM',
            33: 'EMBEDDING_LOOKUP_SPARSE',
            34: 'PAD',
            35: 'UNIDIRECTIONAL_SEQUENCE_RNN',
            36: 'GATHER',
            37: 'BATCH_TO_SPACE_ND',
            38: 'SPACE_TO_BATCH_ND',
            39: 'TRANSPOSE',
            40: 'MEAN',
            41: 'SUB',
            42: 'DIV',
            43: 'SQUEEZE',
            44: 'UNIDIRECTIONAL_SEQUENCE_LSTM',
            45: 'STRIDED_SLICE',
            46: 'BIDIRECTIONAL_SEQUENCE_RNN',
            47: 'EXP',
            48: 'TOPK_V2',
            49: 'SPLIT',
            50: 'LOG_SOFTMAX',
            51: 'DELEGATE',
            52: 'BIDIRECTIONAL_SEQUENCE_LSTM',
            53: 'CAST',
            54: 'PRELU',
            55: 'MAXIMUM',
            56: 'ARG_MAX',
            57: 'MINIMUM',
            58: 'LESS',
            59: 'NEG',
            60: 'PADV2',
            61: 'GREATER',
            62: 'GREATER_EQUAL',
            63: 'LESS_EQUAL',
            64: 'SELECT',
            65: 'SLICE',
            66: 'SIN',
            67: 'TRANSPOSE_CONV',
            68: 'SPARSE_TO_DENSE',
            69: 'TILE',
            70: 'EXPAND_DIMS',
            71: 'EQUAL',
            72: 'NOT_EQUAL',
            73: 'LOG',
            74: 'SUM',
            75: 'SQRT',
            76: 'RSQRT',
            77: 'SHAPE',
            78: 'POW',
            79: 'ARG_MIN',
            80: 'FAKE_QUANT',
            81: 'REDUCE_PROD',
            82: 'REDUCE_MAX',
            83: 'PACK',
            84: 'LOGICAL_OR',
            85: 'ONE_HOT',
            86: 'LOGICAL_AND',
            87: 'LOGICAL_NOT',
            88: 'UNPACK',
            89: 'REDUCE_MIN',
            90: 'FLOOR_DIV',
            91: 'REDUCE_ANY',
            92: 'SQUARE',
            93: 'ZEROS_LIKE',
            94: 'FILL',
            95: 'FLOOR_MOD',
            96: 'RANGE',
            97: 'RESIZE_NEAREST_NEIGHBOR',
            98: 'LEAKY_RELU',
            99: 'SQUARED_DIFFERENCE',
            100: 'MIRROR_PAD',
            101: 'ABS',
            102: 'SPLIT_V',
            103: 'UNIQUE',
            104: 'CEIL',
            105: 'REVERSE_V2',
            106: 'ADD_N',
            107: 'GATHER_ND',
            108: 'COS',
            109: 'WHERE',
            110: 'RANK',
            111: 'ELU',
            112: 'REVERSE_SEQUENCE',
            113: 'MATRIX_DIAG',
            114: 'QUANTIZE',
            115: 'MATRIX_SET_DIAG',
            116: 'ROUND',
            117: 'HARD_SWISH',
            118: 'IF',
            119: 'WHILE',
            120: 'NON_MAX_SUPPRESSION_V4',
            121: 'NON_MAX_SUPPRESSION_V5',
            122: 'SCATTER_ND',
            123: 'SELECT_V2',
            124: 'DENSIFY',
            125: 'SEGMENT_SUM',
            126: 'BATCH_MATMUL',
            127: 'PLACEHOLDER_FOR_GREATER_OP_CODES',
            128: 'CUMSUM',
            129: 'CALL_ONCE',
            130: 'BROADCAST_TO',
            131: 'RFFT2D',
            132: 'CONV_3D',
            133: 'IMAG',
            134: 'REAL',
            135: 'COMPLEX_ABS',
            136: 'HASHTABLE',
            137: 'HASHTABLE_FIND',
            138: 'HASHTABLE_IMPORT',
            139: 'HASHTABLE_SIZE',
            140: 'REDUCE_ALL',
            141: 'CONV_3D_TRANSPOSE',
            142: 'VAR_HANDLE',
            143: 'READ_VARIABLE',
            144: 'ASSIGN_VARIABLE',
            145: 'BROADCAST_ARGS',
            146: 'RANDOM_STANDARD_NORMAL',
            147: 'BUCKETIZE',
            148: 'RANDOM_UNIFORM',
            149: 'MULTINOMIAL',
            150: 'GELU',
            151: 'DYNAMIC_UPDATE_SLICE',
            152: 'RELU_0_TO_1',
            153: 'UNSORTED_SEGMENT_PROD',
            154: 'UNSORTED_SEGMENT_MAX',
            155: 'UNSORTED_SEGMENT_SUM',
            156: 'ATAN2',
            157: 'UNSORTED_SEGMENT_MIN',
            158: 'SIGN',
            159: 'BITCAST',
            160: 'BITWISE_XOR',
            161: 'RIGHT_SHIFT',
            162: 'STABLEHLO_LOGISTIC',
            163: 'STABLEHLO_ADD',
            164: 'STABLEHLO_DIVIDE',
            165: 'STABLEHLO_MULTIPLY',
            166: 'STABLEHLO_MAXIMUM',
            167: 'STABLEHLO_RESHAPE',
            168: 'STABLEHLO_CLAMP',
            169: 'STABLEHLO_CONCATENATE',
            170: 'STABLEHLO_BROADCAST_IN_DIM',
            171: 'STABLEHLO_CONVOLUTION',
            172: 'STABLEHLO_SLICE',
            173: 'STABLEHLO_CUSTOM_CALL',
            174: 'STABLEHLO_REDUCE',
            175: 'STABLEHLO_ABS',
            176: 'STABLEHLO_AND',
            177: 'STABLEHLO_COSINE',
            178: 'STABLEHLO_EXPONENTIAL',
            179: 'STABLEHLO_FLOOR',
            180: 'STABLEHLO_LOG',
            181: 'STABLEHLO_MINIMUM',
            182: 'STABLEHLO_NEGATE',
            183: 'STABLEHLO_OR',
            184: 'STABLEHLO_POWER',
            185: 'STABLEHLO_REMAINDER',
            186: 'STABLEHLO_RSQRT',
            187: 'STABLEHLO_SELECT',
            188: 'STABLEHLO_SUBTRACT',
            189: 'STABLEHLO_TANH',
            190: 'STABLEHLO_SCATTER',
            191: 'STABLEHLO_COMPARE',
            192: 'STABLEHLO_CONVERT',
            193: 'STABLEHLO_DYNAMIC_SLICE',
            194: 'STABLEHLO_DYNAMIC_UPDATE_SLICE',
            195: 'STABLEHLO_PAD',
            196: 'STABLEHLO_IOTA',
            197: 'STABLEHLO_DOT_GENERAL',
            198: 'STABLEHLO_REDUCE_WINDOW',
            199: 'STABLEHLO_SORT',
            200: 'STABLEHLO_WHILE',
            201: 'STABLEHLO_GATHER',
            202: 'STABLEHLO_TRANSPOSE',
            203: 'DILATE',
            204: 'STABLEHLO_RNG_BIT_GENERATOR',
            205: 'REDUCE_WINDOW',
            206: 'STABLEHLO_COMPOSITE',
            207: 'STABLEHLO_SHIFT_LEFT',
            208: 'STABLEHLO_CBRT'
        };

        /**
         * TFLite TensorType enum lookup table.
         * Maps numeric code → type name string.
         * @type {Record<number, string>}
         */
        this.TENSOR_TYPES = {
            0: 'FLOAT32',
            1: 'FLOAT16',
            2: 'INT32',
            3: 'UINT8',
            4: 'INT64',
            5: 'STRING',
            6: 'BOOL',
            7: 'INT16',
            8: 'COMPLEX64',
            9: 'INT8',
            10: 'FLOAT64',
            11: 'COMPLEX128',
            12: 'UINT64',
            13: 'UINT32',
            14: 'UINT16',
            15: 'INT4',
            16: 'BFLOAT16'
        };

        /**
         * Bytes per element for each TFLite tensor type.
         * Used for estimating memory footprint.
         * @type {Record<string, number>}
         */
        this.BYTES_PER_ELEMENT = {
            'FLOAT32': 4,
            'FLOAT16': 2,
            'INT32': 4,
            'UINT8': 1,
            'INT64': 8,
            'STRING': 1,
            'BOOL': 1,
            'INT16': 2,
            'COMPLEX64': 8,
            'INT8': 1,
            'FLOAT64': 8,
            'COMPLEX128': 16,
            'UINT64': 8,
            'UINT32': 4,
            'UINT16': 2,
            'INT4': 0.5,
            'BFLOAT16': 2
        };
    }

    // ─── Public API ───────────────────────────────────────────────────────────

    /**
     * Parse an ArrayBuffer containing a .tflite file.
     * @param {ArrayBuffer} buffer - The raw file content
     * @returns {{ success: boolean, data?: TFLiteModelData, error?: string }}
     */
    parse(buffer) {
        try {
            // Validate buffer
            if (!buffer || !(buffer instanceof ArrayBuffer) || buffer.byteLength === 0) {
                return { success: false, error: 'File không hợp lệ: buffer rỗng' };
            }

            if (buffer.byteLength < 8) {
                return { success: false, error: 'File không hợp lệ: không đủ dữ liệu' };
            }

            const view = new DataView(buffer);

            // Read root table offset (first 4 bytes, little-endian uint32)
            const rootTableOffset = view.getUint32(0, true);

            // Validate root table offset
            if (rootTableOffset >= buffer.byteLength || rootTableOffset < 4) {
                return { success: false, error: 'File không hợp lệ: cấu trúc FlatBuffer lỗi' };
            }

            // Read Model table
            const model = this._readModelTable(view, rootTableOffset, buffer.byteLength);

            return { success: true, data: model };
        } catch (err) {
            return {
                success: false,
                error: err.message || 'Lỗi không xác định khi parse file TFLite'
            };
        }
    }

    // ─── FlatBuffer Helpers ───────────────────────────────────────────────────

    /**
     * Read a vtable field offset for a given table position and field index.
     * Returns 0 if the field is not present.
     * @param {DataView} view
     * @param {number} tablePos - Absolute position of the table in the buffer
     * @param {number} fieldIndex - 0-based field index
     * @returns {number} Absolute offset to the field data, or 0 if not present
     */
    _getFieldOffset(view, tablePos, fieldIndex) {
        // Table starts with soffset_t (int32) pointing back to vtable
        const vtableRelOffset = view.getInt32(tablePos, true);
        const vtablePos = tablePos - vtableRelOffset;

        // vtable: [uint16 vtableSize] [uint16 objectSize] [uint16 field0] [uint16 field1] ...
        const vtableSize = view.getUint16(vtablePos, true);

        // Each field is at vtable offset 4 + fieldIndex * 2
        const fieldVtableOffset = 4 + fieldIndex * 2;

        if (fieldVtableOffset >= vtableSize) {
            return 0; // Field not present in this version
        }

        const fieldRelOffset = view.getUint16(vtablePos + fieldVtableOffset, true);
        if (fieldRelOffset === 0) {
            return 0; // Field not set
        }

        return tablePos + fieldRelOffset;
    }

    /**
     * Read a scalar uint32 field from a table.
     * @param {DataView} view
     * @param {number} tablePos
     * @param {number} fieldIndex
     * @param {number} defaultValue
     * @returns {number}
     */
    _readUint32Field(view, tablePos, fieldIndex, defaultValue = 0) {
        const offset = this._getFieldOffset(view, tablePos, fieldIndex);
        if (offset === 0) return defaultValue;
        return view.getUint32(offset, true);
    }

    /**
     * Read a scalar uint8 field from a table.
     * @param {DataView} view
     * @param {number} tablePos
     * @param {number} fieldIndex
     * @param {number} defaultValue
     * @returns {number}
     */
    _readUint8Field(view, tablePos, fieldIndex, defaultValue = 0) {
        const offset = this._getFieldOffset(view, tablePos, fieldIndex);
        if (offset === 0) return defaultValue;
        return view.getUint8(offset);
    }

    /**
     * Read a string field from a table.
     * FlatBuffer strings: offset → [uint32 length] [bytes...] [null]
     * @param {DataView} view
     * @param {number} tablePos
     * @param {number} fieldIndex
     * @returns {string}
     */
    _readStringField(view, tablePos, fieldIndex) {
        const offset = this._getFieldOffset(view, tablePos, fieldIndex);
        if (offset === 0) return '';

        // The field contains a uoffset_t to the string
        const stringRelOffset = view.getUint32(offset, true);
        const stringPos = offset + stringRelOffset;

        const strLen = view.getUint32(stringPos, true);
        const bytes = new Uint8Array(view.buffer, stringPos + 4, strLen);
        return new TextDecoder('utf-8').decode(bytes);
    }

    /**
     * Read a vector field from a table. Returns array of absolute offsets
     * to each element (for tables) or the vector position info.
     * @param {DataView} view
     * @param {number} tablePos
     * @param {number} fieldIndex
     * @returns {{ pos: number, length: number } | null}
     */
    _readVectorField(view, tablePos, fieldIndex) {
        const offset = this._getFieldOffset(view, tablePos, fieldIndex);
        if (offset === 0) return null;

        // Field contains uoffset_t to the vector
        const vectorRelOffset = view.getUint32(offset, true);
        const vectorPos = offset + vectorRelOffset;

        const length = view.getUint32(vectorPos, true);
        return { pos: vectorPos + 4, length };
    }

    /**
     * Read a vector of int32 values.
     * @param {DataView} view
     * @param {number} tablePos
     * @param {number} fieldIndex
     * @returns {number[]}
     */
    _readInt32Vector(view, tablePos, fieldIndex) {
        const vec = this._readVectorField(view, tablePos, fieldIndex);
        if (!vec) return [];

        const result = [];
        for (let i = 0; i < vec.length; i++) {
            result.push(view.getInt32(vec.pos + i * 4, true));
        }
        return result;
    }

    /**
     * Dereference a table offset within a vector.
     * Vector of tables: each element is a uoffset_t pointing to the table.
     * @param {DataView} view
     * @param {number} elementPos - Position of the uoffset_t element
     * @returns {number} Absolute position of the referenced table
     */
    _derefTable(view, elementPos) {
        const relOffset = view.getUint32(elementPos, true);
        return elementPos + relOffset;
    }

    // ─── Model Parsing ────────────────────────────────────────────────────────

    /**
     * Read the root Model table and extract all data.
     * Model fields (by vtable index):
     *   0: version (uint32)
     *   1: operator_codes (vector of OperatorCode tables)
     *   2: subgraphs (vector of SubGraph tables)
     *   3: description (string)
     * @param {DataView} view
     * @param {number} rootOffset
     * @param {number} bufferSize
     * @returns {TFLiteModelData}
     */
    _readModelTable(view, rootOffset, bufferSize) {
        const modelPos = rootOffset;

        // Validate we can read the vtable soffset
        if (modelPos + 4 > bufferSize) {
            throw new Error('File không hợp lệ: cấu trúc FlatBuffer lỗi');
        }

        const version = this._readUint32Field(view, modelPos, 0, 0);
        const description = this._readStringField(view, modelPos, 3);

        // Read operator_codes
        const operatorCodes = this._readOperatorCodes(view, modelPos);

        // Read subgraphs
        const subgraphsVec = this._readVectorField(view, modelPos, 2);
        const subgraphCount = subgraphsVec ? subgraphsVec.length : 0;

        // Parse first subgraph for tensors, operators, inputs, outputs
        let tensors = [];
        let operators = [];
        let inputIndices = [];
        let outputIndices = [];

        if (subgraphCount > 0) {
            const firstSubgraphPos = this._derefTable(view, subgraphsVec.pos);
            const subgraphData = this._readSubGraph(view, firstSubgraphPos, operatorCodes);
            tensors = subgraphData.tensors;
            operators = subgraphData.operators;
            inputIndices = subgraphData.inputIndices;
            outputIndices = subgraphData.outputIndices;
        }

        return {
            version,
            description,
            operators,
            operatorCodes,
            tensors,
            subgraphs: subgraphCount,
            inputIndices,
            outputIndices
        };
    }

    /**
     * Read operator_codes vector from Model table.
     * OperatorCode fields:
     *   0: builtin_code (byte/uint8)
     *   1: custom_code (string)
     *   2: version (int32)
     * @param {DataView} view
     * @param {number} modelPos
     * @returns {Array<{ builtinCode: number, customCode: string|null, opcodeName: string }>}
     */
    _readOperatorCodes(view, modelPos) {
        const vec = this._readVectorField(view, modelPos, 1);
        if (!vec) return [];

        const codes = [];
        for (let i = 0; i < vec.length; i++) {
            const opcodeTablePos = this._derefTable(view, vec.pos + i * 4);

            const builtinCode = this._readUint8Field(view, opcodeTablePos, 0, 0);
            const customCode = this._readStringField(view, opcodeTablePos, 1) || null;

            let opcodeName;
            if (builtinCode === 32 && customCode) {
                // CUSTOM operator — use custom_code as name
                opcodeName = customCode;
            } else {
                opcodeName = this.BUILTIN_OPERATORS[builtinCode] || `UNKNOWN_OP_${builtinCode}`;
            }

            codes.push({ builtinCode, customCode, opcodeName });
        }
        return codes;
    }

    /**
     * Read a SubGraph table.
     * SubGraph fields:
     *   0: tensors (vector of Tensor tables)
     *   1: inputs (vector of int32)
     *   2: outputs (vector of int32)
     *   3: operators (vector of Operator tables)
     *   4: name (string)
     * @param {DataView} view
     * @param {number} subgraphPos
     * @param {Array} operatorCodes
     * @returns {{ tensors: Array, operators: Array, inputIndices: number[], outputIndices: number[] }}
     */
    _readSubGraph(view, subgraphPos, operatorCodes) {
        const tensors = this._readTensors(view, subgraphPos);
        const inputIndices = this._readInt32Vector(view, subgraphPos, 1);
        const outputIndices = this._readInt32Vector(view, subgraphPos, 2);
        const operators = this._readOperators(view, subgraphPos, operatorCodes);

        return { tensors, operators, inputIndices, outputIndices };
    }

    /**
     * Read tensors vector from a SubGraph table.
     * Tensor fields:
     *   0: shape (vector of int32)
     *   1: type (TensorType byte)
     *   2: buffer (uint32)
     *   3: name (string)
     * @param {DataView} view
     * @param {number} subgraphPos
     * @returns {Array<{ name: string, shape: number[], dtype: string, byteSize: number }>}
     */
    _readTensors(view, subgraphPos) {
        const vec = this._readVectorField(view, subgraphPos, 0);
        if (!vec) return [];

        const tensors = [];
        for (let i = 0; i < vec.length; i++) {
            const tensorPos = this._derefTable(view, vec.pos + i * 4);

            const shape = this._readInt32Vector(view, tensorPos, 0);
            const typeCode = this._readUint8Field(view, tensorPos, 1, 0);
            const name = this._readStringField(view, tensorPos, 3);

            const dtype = this.TENSOR_TYPES[typeCode] || `UNKNOWN_TYPE_${typeCode}`;
            const bytesPerEl = this.BYTES_PER_ELEMENT[dtype] || 1;
            const elementCount = shape.length > 0
                ? shape.reduce((acc, dim) => acc * Math.abs(dim), 1)
                : 0;
            const byteSize = Math.ceil(elementCount * bytesPerEl);

            tensors.push({ name, shape, dtype, byteSize });
        }
        return tensors;
    }

    /**
     * Read operators vector from a SubGraph table.
     * Operator fields:
     *   0: opcode_index (uint32)
     * @param {DataView} view
     * @param {number} subgraphPos
     * @param {Array} operatorCodes
     * @returns {Array<{ opcodeName: string, opcodeIndex: number }>}
     */
    _readOperators(view, subgraphPos, operatorCodes) {
        const vec = this._readVectorField(view, subgraphPos, 3);
        if (!vec) return [];

        const operators = [];
        for (let i = 0; i < vec.length; i++) {
            const opPos = this._derefTable(view, vec.pos + i * 4);

            const opcodeIndex = this._readUint32Field(view, opPos, 0, 0);
            const opcodeName = (opcodeIndex < operatorCodes.length)
                ? operatorCodes[opcodeIndex].opcodeName
                : `UNKNOWN_OP_${opcodeIndex}`;

            operators.push({ opcodeName, opcodeIndex });
        }
        return operators;
    }
}

window.TFLiteParser = TFLiteParser;