File size: 2,608 Bytes
b236fe8 0384d92 b236fe8 e51aead b236fe8 290ca9a b236fe8 290ca9a b236fe8 e51aead b236fe8 290ca9a b236fe8 290ca9a b236fe8 290ca9a b236fe8 290ca9a b236fe8 e51aead b236fe8 290ca9a b236fe8 290ca9a e51aead 290ca9a b236fe8 290ca9a b236fe8 e51aead b236fe8 290ca9a b236fe8 290ca9a b236fe8 290ca9a b236fe8 290ca9a b236fe8 290ca9a b236fe8 290ca9a b236fe8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | {
"architectures": [
"Pix2StructForConditionalGeneration"
],
"decoder_start_token_id": 0,
"dtype": "float32",
"eos_token_id": 1,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"is_encoder_decoder": true,
"is_vqa": true,
"model_type": "pix2struct",
"pad_token_id": 0,
"text_config": {
"add_cross_attention": false,
"bos_token_id": null,
"cross_attention_hidden_size": null,
"d_ff": 2048,
"d_kv": 64,
"decoder_start_token_id": 0,
"dense_act_fn": "gelu_new",
"dropout_rate": 0.1,
"dtype": "float32",
"encoder_hidden_size": 768,
"eos_token_id": 1,
"finetuning_task": null,
"hidden_size": 768,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"is_decoder": true,
"is_encoder_decoder": true,
"layer_norm_epsilon": 1e-06,
"model_type": "pix2struct_text_model",
"num_heads": 12,
"num_layers": 12,
"pad_token_id": 0,
"prefix": null,
"pruned_heads": {},
"relative_attention_max_distance": 128,
"relative_attention_num_buckets": 32,
"sep_token_id": null,
"task_specific_params": null,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": false,
"tokenizer_class": null,
"torchscript": false,
"use_bfloat16": false,
"use_cache": false,
"vocab_size": 50432
},
"tie_word_embeddings": false,
"transformers_version": "5.0.0",
"use_cache": false,
"vision_config": {
"add_cross_attention": false,
"attention_dropout": 0.0,
"bos_token_id": null,
"cross_attention_hidden_size": null,
"d_ff": 2048,
"d_kv": 64,
"decoder_start_token_id": null,
"dense_act_fn": "gelu_new",
"dropout_rate": 0.0,
"dtype": "float32",
"eos_token_id": null,
"finetuning_task": null,
"hidden_size": 768,
"initializer_factor": 1.0,
"initializer_range": 0.02,
"is_decoder": false,
"layer_norm_bias": false,
"layer_norm_eps": 1e-06,
"model_type": "pix2struct_vision_model",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"pad_token_id": null,
"patch_embed_hidden_size": 768,
"patch_size": 16,
"prefix": null,
"projection_dim": 768,
"pruned_heads": {},
"relative_attention_max_distance": 128,
"relative_attention_num_buckets": 32,
"sep_token_id": null,
"seq_len": 4096,
"task_specific_params": null,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"torchscript": false,
"use_bfloat16": false
}
}
|