{
  "architectures": [
    "VLM"
  ],
  "hidden_size": 4096,
  "model_config_dict": {
    "connector": {
      "name": "mlp_2_gelu",
      "type": "mlp"
    },
    "llm": {
      "assistant_token": "<s>",
      "hf_name": "lmsys/vicuna-7b-v1.5",
      "hidden_size": null,
      "ignore_index": -100,
      "image_end_token": "<im_end>",
      "image_patch_token": "<im_patch>",
      "image_start_token": "<im_start>",
      "image_token": "<image>",
      "image_token_index": -200,
      "max_seq_length": 2048,
      "name": "vicuna-7b-v1.5",
      "system_token": null,
      "type": "hf_llm",
      "use_image_patch_token": false,
      "use_start_end_tokens": false,
      "user_token": null,
      "vocab_size": null
    },
    "name": "llava-7b",
    "visual_encoder": {
      "hf_name": "openai/clip-vit-large-patch14-336",
      "hidden_size": null,
      "img_size": null,
      "name": "clip-vit-large-patch14-336",
      "output_layer": -2,
      "patch_size": null,
      "type": "hf_visual_encoder",
      "use_cls_token": false
    }
  },
  "model_dtype": "bfloat16",
  "model_type": "vlm",
  "torch_dtype": "bfloat16",
  "transformers_version": "4.51.3"
}