| model: | |
| name: 'google/t5-v1_1-small' | |
| spectrogram: | |
| sample_rate: 16000 | |
| hop_length: 128 | |
| n_fft: 1024 | |
| n_mels: 256 | |
| do_style_embed: false | |
| input_features: false | |
| model_path: './checkpoint' | |
| audio_path: '' # Path to input audio | |
| total_duration_ms: 0 # Total duration of audio in milliseconds, 0 for full audio | |
| output_path: '' # Path to output directory | |
| bpm: 120 # Beats per minute of input audio | |
| offset: 0 # Start of beat, in miliseconds, from the beginning of input audio | |
| resnap_objects: false # Resnap objects beat timing ticks, requires accurate BPM and offset | |
| slider_multiplier: 1.7 # Multiplier for slider velocity | |
| title: '' # Song title | |
| artist: '' # Song artist | |
| beatmap_path: '' # Path to .osu file which will be remapped | |
| other_beatmap_path: '' # Path to .osu file of other beatmap in the mapset to use as reference | |
| beatmap_id: -1 # Beatmap ID to use as style | |
| difficulty: -1 # Difficulty star rating to map | |
| creator: '' # Beatmap creator | |
| version: '' # Beatmap version | |
| full_set: false # Generate full mapset | |
| set_difficulties: 5 # Number of difficulties to generate. | |
| # Diffusion settings | |
| generate_positions: true # Use diffusion to generate object positions | |
| diff_ckpt: './osudiffusion/DiT-B-0700000.pt' # Path to checkpoint for diffusion model | |
| diff_refine_ckpt: '' # Path to checkpoint for refining diffusion model | |
| diffusion: | |
| style_id: 1451282 # Style ID to use for diffusion | |
| num_sampling_steps: 100 # Number of sampling steps | |
| cfg_scale: 1 # Scale of classifier-free guidance | |
| num_classes: 52670 # Number of classes stored in the model | |
| beatmap_idx: 'osudiffusion/beatmap_idx.pickle' # Path to beatmap index | |
| use_amp: true # Use automatic mixed precision | |
| refine_iters: 10 # Number of refinement iterations | |
| seq_len: 128 # Sequence length | |
| model: 'DiT-B' # Model architecture | |
| data: # Data settings | |
| src_seq_len: 800 | |
| tgt_seq_len: 600 | |
| sample_rate: ${model.spectrogram.sample_rate} | |
| hop_length: ${model.spectrogram.hop_length} | |
| sequence_stride: 1 # Fraction of audio sequence length to shift inference window | |
| center_pad_decoder: false # Center pad decoder input | |
| add_pre_tokens: true | |
| special_token_len: 2 | |
| diff_token_index: 0 | |
| style_token_index: -1 | |
| max_pre_token_len: 4 | |
| add_gd_context: false # Prefix the decoder with tokens of another beatmap in the mapset | |
| hydra: | |
| job: | |
| chdir: False | |
| run: | |
| dir: ./logs/${now:%Y-%m-%d}/${now:%H-%M-%S} |