| { | |
| "source_model": "nvidia/LocateAnything-3B", | |
| "runtime": "onnxruntime-web/webgpu", | |
| "language_input": "inputs_embeds (+ input_ids for block mask, visual_features for splice)", | |
| "vision_model": "vision_mlp.onnx", | |
| "float_language_model": "language_tail_fp.onnx", | |
| "quantization_target": "language_tail_int4.onnx", | |
| "embedding_int4": { | |
| "packed": "embed_tokens_int4_packed.bin", | |
| "scales": "embed_tokens_int4_scales.bin", | |
| "meta": "embed_tokens_int4_meta.json" | |
| }, | |
| "tail_tokens": 6, | |
| "image_token_index": 151665, | |
| "default_generation_mode": "slow", | |
| "notes": "Browser: tokenize -> INT4 embedding gather/dequant (JS) -> inputs_embeds -> language graph splices visual_features at image_token_index and applies the SDLM block mask from input_ids -> INT4 logits. No fp32 embedding Gather in the graph." | |
| } | |