irodori_tts_cpp_artifacts / manifest.json
yoshou's picture
Add files using upload-large-folder tool
292aa23 verified
{
"format_version": 1,
"checkpoint": "/home/ubuntu/.cache/huggingface/hub/models--Aratako--Irodori-TTS-500M-v2-VoiceDesign/snapshots/456e55708e7183f5c7faa1448209d54aa8991451/model.safetensors",
"opset_version": 18,
"model": {
"latent_dim": 32,
"latent_patch_size": 1,
"patched_latent_dim": 32,
"use_speaker_condition": false,
"use_caption_condition": true
},
"tokenizers": {
"text_repo": "llm-jp/llm-jp-3-150m",
"text_add_bos": true,
"caption_repo": "llm-jp/llm-jp-3-150m",
"caption_add_bos": true
},
"rfdit": {
"context_path": "rfdit_context_fp32_30s.onnx",
"step_path": "rfdit_step_fp32_30s.onnx",
"text_len": 256,
"caption_len": 512,
"ref_steps": 64,
"ref_dim": 32,
"max_patched_steps": 750,
"has_speaker": false,
"has_caption": true,
"step_buckets": {
"1": {
"step_path": "rfdit_step_fp32_1s.onnx",
"seconds": 1.0,
"latent_steps": 25,
"max_patched_steps": 25,
"step_migraphx_path": "migraphx/rfdit_step_fp32_1s.mxr"
},
"2": {
"step_path": "rfdit_step_fp32_2s.onnx",
"seconds": 2.0,
"latent_steps": 50,
"max_patched_steps": 50,
"step_migraphx_path": "migraphx/rfdit_step_fp32_2s.mxr"
},
"4": {
"step_path": "rfdit_step_fp32_4s.onnx",
"seconds": 4.0,
"latent_steps": 100,
"max_patched_steps": 100,
"step_migraphx_path": "migraphx/rfdit_step_fp32_4s.mxr"
},
"8": {
"step_path": "rfdit_step_fp32_8s.onnx",
"seconds": 8.0,
"latent_steps": 200,
"max_patched_steps": 200,
"step_migraphx_path": "migraphx/rfdit_step_fp32_8s.mxr"
},
"12": {
"step_path": "rfdit_step_fp32_12s.onnx",
"seconds": 12.0,
"latent_steps": 300,
"max_patched_steps": 300,
"step_migraphx_path": "migraphx/rfdit_step_fp32_12s.mxr"
},
"16": {
"step_path": "rfdit_step_fp32_16s.onnx",
"seconds": 16.0,
"latent_steps": 400,
"max_patched_steps": 400,
"step_migraphx_path": "migraphx/rfdit_step_fp32_16s.mxr"
},
"24": {
"step_path": "rfdit_step_fp32_24s.onnx",
"seconds": 24.0,
"latent_steps": 600,
"max_patched_steps": 600,
"step_migraphx_path": "migraphx/rfdit_step_fp32_24s.mxr"
},
"30": {
"step_path": "rfdit_step_fp32_30s.onnx",
"seconds": 30.0,
"latent_steps": 750,
"max_patched_steps": 750,
"step_migraphx_path": "migraphx/rfdit_step_fp32_30s.mxr"
}
},
"context_migraphx_path": "migraphx/rfdit_context_fp32_30s.mxr"
},
"dacvae": {
"sample_rate": 48000,
"hop_length": 1920,
"buckets": {
"1": {
"path": "dacvae_decode_1s.onnx",
"seconds": 1.0,
"latent_steps": 25,
"patched_steps": 25,
"samples": 48000,
"migraphx_path": "migraphx/dacvae_decode_1s.mxr"
},
"2": {
"path": "dacvae_decode_2s.onnx",
"seconds": 2.0,
"latent_steps": 50,
"patched_steps": 50,
"samples": 96000,
"migraphx_path": "migraphx/dacvae_decode_2s.mxr"
},
"4": {
"path": "dacvae_decode_4s.onnx",
"seconds": 4.0,
"latent_steps": 100,
"patched_steps": 100,
"samples": 192000,
"migraphx_path": "migraphx/dacvae_decode_4s.mxr"
},
"8": {
"path": "dacvae_decode_8s.onnx",
"seconds": 8.0,
"latent_steps": 200,
"patched_steps": 200,
"samples": 384000,
"migraphx_path": "migraphx/dacvae_decode_8s.mxr"
},
"12": {
"path": "dacvae_decode_12s.onnx",
"seconds": 12.0,
"latent_steps": 300,
"patched_steps": 300,
"samples": 576000,
"migraphx_path": "migraphx/dacvae_decode_12s.mxr"
},
"16": {
"path": "dacvae_decode_16s.onnx",
"seconds": 16.0,
"latent_steps": 400,
"patched_steps": 400,
"samples": 768000,
"migraphx_path": "migraphx/dacvae_decode_16s.mxr"
},
"24": {
"path": "dacvae_decode_24s.onnx",
"seconds": 24.0,
"latent_steps": 600,
"patched_steps": 600,
"samples": 1152000,
"migraphx_path": "migraphx/dacvae_decode_24s.mxr"
},
"30": {
"path": "dacvae_decode_30s.onnx",
"seconds": 30.0,
"latent_steps": 750,
"patched_steps": 750,
"samples": 1440000,
"migraphx_path": "migraphx/dacvae_decode_30s.mxr"
}
}
}
}