SingingSDS / config /options.yaml
jhansss's picture
refactor init
91394e0
raw
history blame
2.29 kB
asr_models:
- id: openai/whisper-large-v3-turbo
name: Whisper large-v3-turbo
- id: openai/whisper-large-v3
name: Whisper large-v3
- id: openai/whisper-medium
name: Whisper medium
- id: sanchit-gandhi/whisper-small-dv
name: Whisper small-dv
- id: facebook/wav2vec2-base-960h
name: Wav2Vec2-Base-960h
llm_models:
- id: google/gemma-2-2b
name: Gemma 2 2B
- id: MiniMaxAI/MiniMax-M1-80k
name: MiniMax M1 80k
svs_models:
- id: mandarin-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained
name: Visinger2 (Bilingual)-zh
model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained
lang: mandarin
embeddings:
timbre1: resource/singer/singer_embedding_ace-2.npy
timbre2: resource/singer/singer_embedding_ace-8.npy
timbre3: resource/singer/singer_embedding_itako.npy
timbre4: resource/singer/singer_embedding_kising_orange.npy
timbre5: resource/singer/singer_embedding_m4singer_Alto-4.npy
- id: japanese-espnet/mixdata_svs_visinger2_spkemb_lang_pretrained
name: Visinger2 (Bilingual)-jp
model_path: espnet/mixdata_svs_visinger2_spkemb_lang_pretrained
lang: japanese
embeddings:
timbre1: resource/singer/singer_embedding_ace-2.npy
timbre2: resource/singer/singer_embedding_ace-8.npy
timbre3: resource/singer/singer_embedding_itako.npy
timbre4: resource/singer/singer_embedding_kising_orange.npy
timbre5: resource/singer/singer_embedding_m4singer_Alto-4.npy
- id: mandarin-espnet/aceopencpop_svs_visinger2_40singer_pretrain
name: Visinger2 (Chinese)
model_path: espnet/aceopencpop_svs_visinger2_40singer_pretrain
lang: mandarin
embeddings:
timbre1: 5
timbre2: 8
timbre3: 12
timbre4: 15
timbre5: 29
melody_sources:
- id: gen-random-none
name: Random Generation
desc: "Melody is generated without any structure or reference."
- id: sample-note-kising
name: Sampled Melody (KiSing)
desc: "Melody is retrieved from KiSing dataset."
- id: sample-note-touhou
name: Sampled Melody (Touhou)
desc: "Melody is retrieved from Touhou dataset."
- id: sample-lyric-kising
name: Sampled Melody with Lyrics (Kising)
desc: "Melody with aligned lyrics are sampled from Kising dataset."