Aurora-1B / config.json
alvanalrakib's picture
Upload checkpoint-10500
c15983a verified
{
"data": {
"data_dir": "data",
"sequence_length": 256,
"max_sequence_length": 512,
"min_sequence_length": 32,
"validation_split": 0.1,
"batch_size": 12,
"num_workers": 8,
"pin_memory": true,
"shuffle": true,
"use_cache": true,
"cache_dir": "data_cache",
"force_reprocess": false,
"parallel_workers": 8,
"cache_format": "pickle",
"dataset_version": "v1.0"
},
"model": {
"vocab_size": 275,
"hidden_size": 1536,
"num_attention_heads": 24,
"num_hidden_layers": 36,
"intermediate_size": 6144,
"max_position_embeddings": 1024,
"dropout": 0.1,
"attention_dropout": 0.1,
"layer_norm_eps": 1e-12,
"use_genre_conditioning": true,
"use_key_conditioning": true,
"genre_embed_size": 64,
"key_embed_size": 64,
"rotary_embedding": true,
"flash_attention": true,
"num_genres": 13,
"num_keys": 12
},
"training": {
"output_dir": "checkpoints",
"logging_dir": "logs",
"num_train_epochs": 20,
"learning_rate": 5e-05,
"weight_decay": 0.01,
"warmup_steps": 1000,
"max_grad_norm": 1.0,
"lr_scheduler_type": "cosine",
"lr_scheduler_warmup_ratio": 0.1,
"optimizer": "adamw",
"adam_beta1": 0.9,
"adam_beta2": 0.999,
"adam_epsilon": 1e-08,
"save_steps": 500,
"eval_steps": 500,
"logging_steps": 10,
"save_total_limit": 3,
"evaluation_strategy": "steps",
"load_best_model_at_end": true,
"metric_for_best_model": "eval_loss",
"greater_is_better": false,
"fp16": true,
"dataloader_pin_memory": true,
"gradient_checkpointing": true,
"resume_from_checkpoint": "checkpoints/checkpoint-5000"
},
"generation": {
"max_length": 512,
"temperature": 0.8,
"top_k": 50,
"top_p": 0.9,
"do_sample": true,
"num_return_sequences": 1,
"repetition_penalty": 1.1,
"length_penalty": 1.0,
"default_genre": "melodic_techno",
"default_key": "eb_maj_c_min"
},
"tokenizer": {
"time_resolution": 16,
"max_pitch": 127,
"min_pitch": 0,
"max_velocity": 127,
"min_velocity": 1
},
"hardware": {
"device": "cuda",
"mixed_precision": true,
"compile_model": false
},
"monitoring": {
"use_rich_ui": true,
"update_frequency": 0.5,
"show_memory_usage": true,
"show_gpu_usage": true,
"log_gradients": false
},
"wandb": {
"enabled": true,
"project": "aurora-1b",
"entity": null,
"tags": [
"midi",
"melody",
"transformer"
]
}
}