| { | |
| "architectures": [ | |
| "HfMoondream" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "hf_moondream.HfConfig", | |
| "AutoModelForCausalLM": "hf_moondream.HfMoondream" | |
| }, | |
| "config": { | |
| "skills": [ | |
| "query", | |
| "caption", | |
| "detect", | |
| "point" | |
| ] | |
| }, | |
| "model_type": "moondream3", | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.51.1", | |
| "text": { | |
| "dim": 2048, | |
| "ff_dim": 8192, | |
| "n_layers": 24, | |
| "vocab_size": 51200, | |
| "max_context": 4096, | |
| "n_heads": 32, | |
| "n_kv_heads": 32, | |
| "prefix_attn": 730, | |
| "group_size": null, | |
| "moe": { | |
| "num_experts": 64, | |
| "start_layer": 4, | |
| "experts_per_token": 8, | |
| "expert_inner_dim": 1024 | |
| } | |
| }, | |
| "vision": { | |
| "enc_dim": 1152, | |
| "enc_patch_size": 14, | |
| "enc_n_layers": 27, | |
| "enc_ff_dim": 4304, | |
| "enc_n_heads": 16, | |
| "proj_out_dim": 2048, | |
| "crop_size": 378, | |
| "in_channels": 3, | |
| "max_crops": 12, | |
| "overlap_margin": 4, | |
| "proj_inner_dim": 8192 | |
| }, | |
| "region": { | |
| "dim": 2048, | |
| "coord_feat_dim": 256, | |
| "coord_out_dim": 1024, | |
| "size_feat_dim": 512, | |
| "size_out_dim": 2048, | |
| "group_size": null | |
| }, | |
| "dtype": "bfloat16" | |
| } |