{ "architectures": [ "HfMoondream" ], "auto_map": { "AutoConfig": "hf_moondream.HfConfig", "AutoModelForCausalLM": "hf_moondream.HfMoondream" }, "config": { "skills": [ "query", "caption", "detect", "point" ] }, "model_type": "moondream3", "torch_dtype": "bfloat16", "transformers_version": "4.51.1", "text": { "dim": 2048, "ff_dim": 8192, "n_layers": 24, "vocab_size": 51200, "max_context": 4096, "n_heads": 32, "n_kv_heads": 32, "prefix_attn": 730, "group_size": null, "moe": { "num_experts": 64, "start_layer": 4, "experts_per_token": 8, "expert_inner_dim": 1024 } }, "vision": { "enc_dim": 1152, "enc_patch_size": 14, "enc_n_layers": 27, "enc_ff_dim": 4304, "enc_n_heads": 16, "proj_out_dim": 2048, "crop_size": 378, "in_channels": 3, "max_crops": 12, "overlap_margin": 4, "proj_inner_dim": 8192 }, "region": { "dim": 2048, "coord_feat_dim": 256, "coord_out_dim": 1024, "size_feat_dim": 512, "size_out_dim": 2048, "group_size": null }, "dtype": "bfloat16" }