VIGiA-8B / config.json
dmgcsilva's picture
Upload config.json with huggingface_hub
8768f48 verified
{
"_name_or_path": "/home/dmgcsilva/project/PROJECT/dmgcsilva/followup/experiments/vista_stage46/llama8binst_siglip400m_mlp_ln_patches_stage46/checkpoint_3252",
"architectures": [
"ReVista"
],
"cap_loss_scale": 1.0,
"connector_type": "mlp",
"end_ret_token_id": 128258,
"eos_token_id": 128009,
"freeze_cap": false,
"freeze_emb": false,
"freeze_lm": false,
"freeze_ret": false,
"freeze_vm": true,
"image_embed_dropout_prob": 0.0,
"img_token_id": 128256,
"lm_lora_alpha": 128,
"lm_lora_dropout": 0.03,
"lm_lora_rank": 32,
"model_type": "revistamodel",
"n_visual_tokens": 1,
"negative_count": 512,
"pad_token_id": 128009,
"projector_type": "linear",
"ret_loss_scale": 1.0,
"ret_token_id": 128257,
"shared_emb_dim": 512,
"start_ret_token_id": 128257,
"task": "captioning",
"text_decoder": "meta-llama/Llama-3.1-8B-Instruct",
"text_embed_dropout_prob": 0.1,
"torch_dtype": "float32",
"transformers_version": "4.49.0",
"use_cls_token": false,
"use_lora_on_lm": false,
"use_negatives": false,
"use_pos_emb": true,
"visual_encoder": "google/siglip-so400m-patch14-224",
"vocab_size": 128259
}