File size: 2,608 Bytes
b236fe8
 
 
 
0384d92
b236fe8
 
 
 
 
e51aead
b236fe8
 
 
290ca9a
 
 
b236fe8
 
290ca9a
b236fe8
e51aead
b236fe8
 
290ca9a
 
b236fe8
 
 
290ca9a
b236fe8
 
 
 
 
290ca9a
 
 
b236fe8
 
290ca9a
 
 
 
 
 
 
 
b236fe8
e51aead
b236fe8
 
290ca9a
 
b236fe8
290ca9a
e51aead
290ca9a
 
b236fe8
 
290ca9a
b236fe8
e51aead
b236fe8
290ca9a
 
b236fe8
 
 
290ca9a
b236fe8
 
 
 
 
 
290ca9a
b236fe8
 
290ca9a
b236fe8
290ca9a
b236fe8
 
290ca9a
 
 
 
 
 
 
 
 
b236fe8
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
{
  "architectures": [
    "Pix2StructForConditionalGeneration"
  ],
  "decoder_start_token_id": 0,
  "dtype": "float32",
  "eos_token_id": 1,
  "initializer_factor": 1.0,
  "initializer_range": 0.02,
  "is_encoder_decoder": true,
  "is_vqa": true,
  "model_type": "pix2struct",
  "pad_token_id": 0,
  "text_config": {
    "add_cross_attention": false,
    "bos_token_id": null,
    "cross_attention_hidden_size": null,
    "d_ff": 2048,
    "d_kv": 64,
    "decoder_start_token_id": 0,
    "dense_act_fn": "gelu_new",
    "dropout_rate": 0.1,
    "dtype": "float32",
    "encoder_hidden_size": 768,
    "eos_token_id": 1,
    "finetuning_task": null,
    "hidden_size": 768,
    "initializer_factor": 1.0,
    "initializer_range": 0.02,
    "is_decoder": true,
    "is_encoder_decoder": true,
    "layer_norm_epsilon": 1e-06,
    "model_type": "pix2struct_text_model",
    "num_heads": 12,
    "num_layers": 12,
    "pad_token_id": 0,
    "prefix": null,
    "pruned_heads": {},
    "relative_attention_max_distance": 128,
    "relative_attention_num_buckets": 32,
    "sep_token_id": null,
    "task_specific_params": null,
    "tf_legacy_loss": false,
    "tie_encoder_decoder": false,
    "tie_word_embeddings": false,
    "tokenizer_class": null,
    "torchscript": false,
    "use_bfloat16": false,
    "use_cache": false,
    "vocab_size": 50432
  },
  "tie_word_embeddings": false,
  "transformers_version": "5.0.0",
  "use_cache": false,
  "vision_config": {
    "add_cross_attention": false,
    "attention_dropout": 0.0,
    "bos_token_id": null,
    "cross_attention_hidden_size": null,
    "d_ff": 2048,
    "d_kv": 64,
    "decoder_start_token_id": null,
    "dense_act_fn": "gelu_new",
    "dropout_rate": 0.0,
    "dtype": "float32",
    "eos_token_id": null,
    "finetuning_task": null,
    "hidden_size": 768,
    "initializer_factor": 1.0,
    "initializer_range": 0.02,
    "is_decoder": false,
    "layer_norm_bias": false,
    "layer_norm_eps": 1e-06,
    "model_type": "pix2struct_vision_model",
    "num_attention_heads": 12,
    "num_channels": 3,
    "num_hidden_layers": 12,
    "pad_token_id": null,
    "patch_embed_hidden_size": 768,
    "patch_size": 16,
    "prefix": null,
    "projection_dim": 768,
    "pruned_heads": {},
    "relative_attention_max_distance": 128,
    "relative_attention_num_buckets": 32,
    "sep_token_id": null,
    "seq_len": 4096,
    "task_specific_params": null,
    "tf_legacy_loss": false,
    "tie_encoder_decoder": false,
    "tie_word_embeddings": true,
    "tokenizer_class": null,
    "torchscript": false,
    "use_bfloat16": false
  }
}