EncDec-base / config.json
ilya16's picture
ACM MM paper model release
49b1de7 verified
{
"_name_": "Seq2SeqMusicTransformer",
"_model_version_": "1.0.0",
"_symupe_version_": "0.1.0",
"dim": 512,
"tie_token_emb": true,
"encoder": {
"token_embeddings": {
"_target_": "simple",
"emb_dims": 64,
"mode": "cat",
"emb_norm": true,
"discrete": false,
"sinusoidal": true,
"sinusoidal_learned": true,
"project_bias": false,
"embedding_kwargs": {
"depth": 1,
"with_positions": false,
"log_inv_freq": true
},
"tie_keys": {
"TimeDurationSustain": "TimeDuration"
},
"special_tokens": {
"PAD_None": 0,
"MASK_None": 1,
"BOS_None": 2,
"EOS_None": 3,
"IGNORE_None": 4,
"EOD_None": 5,
"Bar_Line": 6,
"Pedal_On": 7,
"Pedal_Off": 8
}
},
"emb_norm": false,
"emb_dropout": 0.0,
"use_abs_pos_emb": false,
"project_bias": false,
"transformer": {
"_target_": "encoder",
"depth": 4,
"memory_tokens": 4,
"final_norm_bias": false,
"attention": {
"_target_": "global",
"heads": 8,
"head_dim": 64,
"one_kv_head": true,
"dropout": 0.1,
"rotary_pos_emb": true,
"rotary_emb_base": 1024,
"alibi_pos_bias": false,
"alibi_learned": true,
"alibi_heads": 8
},
"feed_forward": {
"mult": 3,
"glu": true,
"swish": true,
"dropout": 0.1
}
}
},
"decoder": {
"token_embeddings": {
"_target_": "simple",
"emb_dims": 64,
"mode": "cat",
"emb_norm": true,
"discrete": false,
"sinusoidal": true,
"sinusoidal_learned": true,
"project_bias": false,
"embedding_kwargs": {
"depth": 1,
"with_positions": false,
"log_inv_freq": true
},
"tie_keys": {
"TimeDurationSustain": "TimeDuration"
},
"special_tokens": {
"PAD_None": 0,
"MASK_None": 1,
"BOS_None": 2,
"EOS_None": 3,
"IGNORE_None": 4,
"EOD_None": 5,
"Bar_Line": 6,
"Pedal_On": 7,
"Pedal_Off": 8
}
},
"emb_norm": false,
"emb_dropout": 0.0,
"use_abs_pos_emb": false,
"project_bias": false,
"context_embedding": "cat",
"transformer": {
"_target_": "decoder",
"depth": 4,
"memory_tokens": 4,
"final_norm_bias": false,
"attention": {
"_target_": "global",
"heads": 8,
"head_dim": 64,
"one_kv_head": true,
"dropout": 0.1,
"rotary_pos_emb": true,
"rotary_emb_base": 1024,
"alibi_pos_bias": false,
"alibi_learned": true,
"alibi_heads": 8
},
"feed_forward": {
"mult": 3,
"glu": true,
"swish": true,
"dropout": 0.1
}
},
"lm_head": {
"_target_": "lm",
"bias": true
},
"type_embedding": "sum",
"score_token_embeddings": {
"_target_": "simple",
"emb_dims": 64,
"mode": "cat",
"emb_norm": true,
"discrete": false,
"sinusoidal": true,
"sinusoidal_learned": true,
"project_bias": false,
"embedding_kwargs": {
"depth": 1,
"with_positions": false,
"log_inv_freq": true
},
"tie_keys": {
"TimeDurationSustain": "TimeDuration"
}
},
"score_tokens_dropout": 0.2
},
"token_keys": [
"Velocity",
"TimeShift",
"TimeDuration",
"TimeDurationSustain"
],
"num_tokens": {
"Pitch": 97,
"Position": 202,
"PositionShift": 142,
"Duration": 142,
"Velocity": 137,
"TimeShift": 371,
"TimeDuration": 319,
"TimeDurationSustain": 319
},
"context_num_tokens": null,
"score_num_tokens": {
"Velocity": 137,
"Tempo": 170
}
}