| { | |
| "model_class": "FineTuneTrainCogAgentModel", | |
| "tokenizer_type": "vicuna-7b-v1.5", | |
| "num_layers": 32, | |
| "hidden_size": 4096, | |
| "num_attention_heads": 32, | |
| "vocab_size": 32000, | |
| "layernorm_order": "pre", | |
| "model_parallel_size": 1, | |
| "max_sequence_length": 4096, | |
| "is_decoder": [ | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true, | |
| true | |
| ], | |
| "cross_attn_hidden_size": 1024, | |
| "use_bias": false, | |
| "inner_hidden_size": 11008, | |
| "cross_hidden_size_per_attention_head": 32, | |
| "pre_seq_len": 8, | |
| "lora_rank": 50, | |
| "use_ptuning": false, | |
| "use_lora": false, | |
| "use_qlora": false, | |
| "layer_range": null, | |
| "image_length": 256, | |
| "cross_image_pix": 1120, | |
| "eva_args": { | |
| "model_class": "EVA2CLIPModel", | |
| "num_layers": 63, | |
| "hidden_size": 1792, | |
| "num_attention_heads": 16, | |
| "vocab_size": 1, | |
| "layernorm_order": "post", | |
| "model_parallel_size": 1, | |
| "max_sequence_length": 257, | |
| "inner_hidden_size": 15360, | |
| "use_final_layernorm": false, | |
| "layernorm_epsilon": 1e-06, | |
| "row_parallel_linear_final_bias": false, | |
| "image_size": [ | |
| 224, | |
| 224 | |
| ], | |
| "pre_len": 1, | |
| "post_len": 0, | |
| "in_channels": 3, | |
| "patch_size": 14 | |
| }, | |
| "bos_token_id": 1, | |
| "eos_token_id": 2, | |
| "pad_token_id": 0 | |
| } |