{ "source_model": "nvidia/LocateAnything-3B", "runtime": "onnxruntime-web/webgpu", "language_input": "inputs_embeds (+ input_ids for block mask, visual_features for splice)", "vision_model": "vision_mlp.onnx", "float_language_model": "language_tail_fp.onnx", "quantization_target": "language_tail_int4.onnx", "embedding_int4": { "packed": "embed_tokens_int4_packed.bin", "scales": "embed_tokens_int4_scales.bin", "meta": "embed_tokens_int4_meta.json" }, "tail_tokens": 6, "image_token_index": 151665, "default_generation_mode": "slow", "notes": "Browser: tokenize -> INT4 embedding gather/dequant (JS) -> inputs_embeds -> language graph splices visual_features at image_token_index and applies the SDLM block mask from input_ids -> INT4 logits. No fp32 embedding Gather in the graph." }