| model: | |
| class_path: model.lina.Lina | |
| init_args: | |
| n_warmup_steps: 500 | |
| learning_rate: 5e-4 | |
| n_codebook: 1024 | |
| n_special_token_in: 3 | |
| n_special_token_out: 3 | |
| n_txt_vocab: 256 | |
| d_context: 512 | |
| d_model: 512 | |
| quant_layer: [0, 1, 2, 3] | |
| txt_encoder: | |
| class_path: model.encoder.TextEncoder | |
| init_args: | |
| dim: 512 | |
| heads: 8 | |
| n_layers: 9 | |
| dropout: 0.1 | |
| attentive_rnn: | |
| class_path: model.gla.AttentiveGLA | |
| init_args: | |
| d_model: 512 | |
| d_context: 512 | |
| heads: 4 | |
| dropout_att: 0.2 | |
| dropout: 0. | |
| n_layer: 6 | |
| blind: True | |
| d_blind: 128 | |