| { |
| "_name_": "Seq2SeqMusicTransformer", |
| "_model_version_": "1.0.0", |
| "_symupe_version_": "0.1.0", |
| "dim": 512, |
| "tie_token_emb": true, |
| "encoder": { |
| "token_embeddings": { |
| "_target_": "simple", |
| "emb_dims": 64, |
| "mode": "cat", |
| "emb_norm": true, |
| "discrete": false, |
| "sinusoidal": true, |
| "sinusoidal_learned": true, |
| "project_bias": false, |
| "embedding_kwargs": { |
| "depth": 1, |
| "with_positions": false, |
| "log_inv_freq": true |
| }, |
| "tie_keys": { |
| "TimeDurationSustain": "TimeDuration" |
| }, |
| "special_tokens": { |
| "PAD_None": 0, |
| "MASK_None": 1, |
| "BOS_None": 2, |
| "EOS_None": 3, |
| "IGNORE_None": 4, |
| "EOD_None": 5, |
| "Bar_Line": 6, |
| "Pedal_On": 7, |
| "Pedal_Off": 8 |
| } |
| }, |
| "emb_norm": false, |
| "emb_dropout": 0.0, |
| "use_abs_pos_emb": false, |
| "project_bias": false, |
| "transformer": { |
| "_target_": "encoder", |
| "depth": 4, |
| "memory_tokens": 4, |
| "final_norm_bias": false, |
| "attention": { |
| "_target_": "global", |
| "heads": 8, |
| "head_dim": 64, |
| "one_kv_head": true, |
| "dropout": 0.1, |
| "rotary_pos_emb": true, |
| "rotary_emb_base": 1024, |
| "alibi_pos_bias": false, |
| "alibi_learned": true, |
| "alibi_heads": 8 |
| }, |
| "feed_forward": { |
| "mult": 3, |
| "glu": true, |
| "swish": true, |
| "dropout": 0.1 |
| } |
| } |
| }, |
| "decoder": { |
| "token_embeddings": { |
| "_target_": "simple", |
| "emb_dims": 64, |
| "mode": "cat", |
| "emb_norm": true, |
| "discrete": false, |
| "sinusoidal": true, |
| "sinusoidal_learned": true, |
| "project_bias": false, |
| "embedding_kwargs": { |
| "depth": 1, |
| "with_positions": false, |
| "log_inv_freq": true |
| }, |
| "tie_keys": { |
| "TimeDurationSustain": "TimeDuration" |
| }, |
| "special_tokens": { |
| "PAD_None": 0, |
| "MASK_None": 1, |
| "BOS_None": 2, |
| "EOS_None": 3, |
| "IGNORE_None": 4, |
| "EOD_None": 5, |
| "Bar_Line": 6, |
| "Pedal_On": 7, |
| "Pedal_Off": 8 |
| } |
| }, |
| "emb_norm": false, |
| "emb_dropout": 0.0, |
| "use_abs_pos_emb": false, |
| "project_bias": false, |
| "context_embedding": "cat", |
| "transformer": { |
| "_target_": "decoder", |
| "depth": 4, |
| "memory_tokens": 4, |
| "final_norm_bias": false, |
| "attention": { |
| "_target_": "global", |
| "heads": 8, |
| "head_dim": 64, |
| "one_kv_head": true, |
| "dropout": 0.1, |
| "rotary_pos_emb": true, |
| "rotary_emb_base": 1024, |
| "alibi_pos_bias": false, |
| "alibi_learned": true, |
| "alibi_heads": 8 |
| }, |
| "feed_forward": { |
| "mult": 3, |
| "glu": true, |
| "swish": true, |
| "dropout": 0.1 |
| } |
| }, |
| "lm_head": { |
| "_target_": "lm", |
| "bias": true |
| }, |
| "type_embedding": "sum", |
| "score_token_embeddings": { |
| "_target_": "simple", |
| "emb_dims": 64, |
| "mode": "cat", |
| "emb_norm": true, |
| "discrete": false, |
| "sinusoidal": true, |
| "sinusoidal_learned": true, |
| "project_bias": false, |
| "embedding_kwargs": { |
| "depth": 1, |
| "with_positions": false, |
| "log_inv_freq": true |
| }, |
| "tie_keys": { |
| "TimeDurationSustain": "TimeDuration" |
| } |
| }, |
| "score_tokens_dropout": 0.2 |
| }, |
| "token_keys": [ |
| "Velocity", |
| "TimeShift", |
| "TimeDuration", |
| "TimeDurationSustain" |
| ], |
| "num_tokens": { |
| "Pitch": 97, |
| "Position": 202, |
| "PositionShift": 142, |
| "Duration": 142, |
| "Velocity": 137, |
| "TimeShift": 371, |
| "TimeDuration": 319, |
| "TimeDurationSustain": 319 |
| }, |
| "context_num_tokens": null, |
| "score_num_tokens": { |
| "Velocity": 137, |
| "Tempo": 170 |
| } |
| } |