{ "format": "onnx", "model_type": "vlm", "architecture": "fastvlm-qwen2", "encoder": { "file": "onnx/vision_encoder.onnx", "type": "vision_encoder", "inputs": { "pixel_values": { "dtype": "float32", "shape": ["batch", 3, "height", "width"] } }, "outputs": { "image_features": { "dtype": "float32", "shape": ["batch", "tokens", "hidden_dim"] } } }, "decoder": { "file": "onnx/decoder_model_merged.onnx", "type": "causal_lm", "inputs": { "input_ids": { "dtype": "int64", "shape": ["batch", "sequence"] }, "attention_mask": { "dtype": "int64", "shape": ["batch", "sequence"] } }, "outputs": { "logits": { "dtype": "float32", "shape": ["batch", "sequence", "vocab_size"] } } }, "embed_tokens": { "file": "onnx/embed_tokens.onnx", "type": "embedding", "inputs": { "input_ids": { "dtype": "int64", "shape": ["batch", "sequence"] } }, "outputs": { "embeddings": { "dtype": "float32", "shape": ["batch", "sequence", "hidden_dim"] } } }, "tokenizer": { "file": "tokenizer.json", "vocab_file": "vocab.json", "merges_file": "merges.txt", "special_tokens_map": "special_tokens_map.json", "tokenizer_config": "tokenizer_config.json" }, "generation_config": "generation_config.json", "preprocessor": { "config": "preprocessor_config.json", "processor_config": "processor_config.json" } }