osu_mapper2 / configs /inference.yaml
Tiger14n's picture
Update configs/inference.yaml
19ce837 verified
model:
name: 'google/t5-v1_1-small'
spectrogram:
sample_rate: 16000
hop_length: 128
n_fft: 1024
n_mels: 256
do_style_embed: false
input_features: false
model_path: './checkpoint'
audio_path: '' # Path to input audio
total_duration_ms: 0 # Total duration of audio in milliseconds, 0 for full audio
output_path: '' # Path to output directory
bpm: 120 # Beats per minute of input audio
offset: 0 # Start of beat, in miliseconds, from the beginning of input audio
resnap_objects: false # Resnap objects beat timing ticks, requires accurate BPM and offset
slider_multiplier: 1.7 # Multiplier for slider velocity
title: '' # Song title
artist: '' # Song artist
beatmap_path: '' # Path to .osu file which will be remapped
other_beatmap_path: '' # Path to .osu file of other beatmap in the mapset to use as reference
beatmap_id: -1 # Beatmap ID to use as style
difficulty: -1 # Difficulty star rating to map
creator: '' # Beatmap creator
version: '' # Beatmap version
full_set: false # Generate full mapset
set_difficulties: 5 # Number of difficulties to generate.
# Diffusion settings
generate_positions: true # Use diffusion to generate object positions
diff_ckpt: './osudiffusion/DiT-B-0700000.pt' # Path to checkpoint for diffusion model
diff_refine_ckpt: '' # Path to checkpoint for refining diffusion model
diffusion:
style_id: 1451282 # Style ID to use for diffusion
num_sampling_steps: 100 # Number of sampling steps
cfg_scale: 1 # Scale of classifier-free guidance
num_classes: 52670 # Number of classes stored in the model
beatmap_idx: 'osudiffusion/beatmap_idx.pickle' # Path to beatmap index
use_amp: true # Use automatic mixed precision
refine_iters: 10 # Number of refinement iterations
seq_len: 128 # Sequence length
model: 'DiT-B' # Model architecture
data: # Data settings
src_seq_len: 800
tgt_seq_len: 600
sample_rate: ${model.spectrogram.sample_rate}
hop_length: ${model.spectrogram.hop_length}
sequence_stride: 1 # Fraction of audio sequence length to shift inference window
center_pad_decoder: false # Center pad decoder input
add_pre_tokens: true
special_token_len: 2
diff_token_index: 0
style_token_index: -1
max_pre_token_len: 4
add_gd_context: false # Prefix the decoder with tokens of another beatmap in the mapset
hydra:
job:
chdir: False
run:
dir: ./logs/${now:%Y-%m-%d}/${now:%H-%M-%S}