| { | |
| "_name_or_path": "", | |
| "architectures": [ | |
| "IMAGELLMForCausalMLM" | |
| ], | |
| "attention_bias": false, | |
| "bos_token_id": 0, | |
| "eos_token_id": 1, | |
| "hidden_act": "silu", | |
| "hidden_size": 4096, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 11008, | |
| "log_attentions": false, | |
| "log_hidden_states": false, | |
| "loss_scale_schedule": "none", | |
| "loss_weight_lm": 1.0, | |
| "loss_weight_vm": 10.0, | |
| "max_position_embeddings": 2048, | |
| "max_sequence_length": 2048, | |
| "model_type": "IMAGEllm", | |
| "num_attention_heads": 32, | |
| "num_hidden_layers": 32, | |
| "num_key_value_heads": 32, | |
| "pad_token_id": -1, | |
| "plugins_init_kwargs": { | |
| "clip_vision_embedding": { | |
| "_target_": "modeling.modeling_plugins.CLIPVisionEmbedding", | |
| "clip_vision_model_name_or_path": "", | |
| "embed_hidden_size": 4096, | |
| "freeze_clip_vision_model": true, | |
| "freeze_embedding_layers": true, | |
| "freeze_projector": true, | |
| "local_files_only": false, | |
| "pretrained_model_name_or_path": "", | |
| "projector_depth": 1, | |
| "projector_type": "linear", | |
| "select_layer": -2, | |
| "use_additional_post_layernorm": false | |
| }, | |
| "IMAGE_embedding": { | |
| "_target_": "modeling.modeling_plugins.IMAGEEmbedding", | |
| "embed_hidden_size": 4096, | |
| "freeze_IMAGE_queries": true, | |
| "num_IMAGE_queries": 64, | |
| "pretrained_model_name_or_path": "" | |
| }, | |
| "stable_diffusion_head": { | |
| "_target_": "modeling.modeling_plugins.StableDiffusionHead", | |
| "diffusion_name_or_path": "", | |
| "embed_hidden_size": 4096, | |
| "freeze_projector": false, | |
| "freeze_unet": true, | |
| "freeze_vae": true, | |
| "local_files_only": false, | |
| "pretrained_model_name_or_path": "", | |
| "projector_depth": 1, | |
| "projector_type": "linear" | |
| } | |
| }, | |
| "plugins_type": { | |
| "clip_vision_embedding": "embedding", | |
| "IMAGE_embedding": "embedding", | |
| "stable_diffusion_head": "head" | |
| }, | |
| "pretraining_tp": 1, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 10000.0, | |
| "special_tokens2ids_dict": { | |
| "</s>": 2, | |
| "<s>": 1, | |
| "<unk>": 0, | |
| "[PAD]": 32000, | |
| "additional_special_tokens": { | |
| "<dream>": 32007, | |
| "<dream_end>": 32002, | |
| "<dream_start>": 32003, | |
| "<im_end>": 32005, | |
| "<im_patch>": 32004, | |
| "<im_start>": 32001, | |
| "<image>": 32006 | |
| } | |
| }, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "float16", | |
| "transformers_version": "4.45.1", | |
| "use_cache": true, | |
| "vocab_size": 32008 | |
| } | |