VISOR-GPT / train /models /sbert /large_config.json
szukevin's picture
upload
7900c16
{
"stream_0": {
"vocab_size": 21128,
"emb_size": 1024,
"feedforward_size": 4096,
"hidden_size": 1024,
"hidden_act": "gelu",
"heads_num": 16,
"layers_num": 24,
"max_seq_length": 512,
"dropout": 0.1,
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible"
},
"stream_1": {
"vocab_size": 21128,
"emb_size": 1024,
"feedforward_size": 4096,
"hidden_size": 1024,
"hidden_act": "gelu",
"heads_num": 16,
"layers_num": 24,
"max_seq_length": 512,
"dropout": 0.1,
"embedding": ["word", "pos", "seg"],
"encoder": "transformer",
"mask": "fully_visible"
},
"embedding": "dual",
"encoder": "dual",
"pooling": "first",
"tie_weights": true
}