| { |
| "format_version": 1, |
| "checkpoint": "/home/ubuntu/.cache/huggingface/hub/models--Aratako--Irodori-TTS-500M-v2-VoiceDesign/snapshots/456e55708e7183f5c7faa1448209d54aa8991451/model.safetensors", |
| "opset_version": 18, |
| "model": { |
| "latent_dim": 32, |
| "latent_patch_size": 1, |
| "patched_latent_dim": 32, |
| "use_speaker_condition": false, |
| "use_caption_condition": true |
| }, |
| "tokenizers": { |
| "text_repo": "llm-jp/llm-jp-3-150m", |
| "text_add_bos": true, |
| "caption_repo": "llm-jp/llm-jp-3-150m", |
| "caption_add_bos": true |
| }, |
| "rfdit": { |
| "context_path": "rfdit_context_fp32_30s.onnx", |
| "step_path": "rfdit_step_fp32_30s.onnx", |
| "text_len": 256, |
| "caption_len": 512, |
| "ref_steps": 64, |
| "ref_dim": 32, |
| "max_patched_steps": 750, |
| "has_speaker": false, |
| "has_caption": true, |
| "step_buckets": { |
| "1": { |
| "step_path": "rfdit_step_fp32_1s.onnx", |
| "seconds": 1.0, |
| "latent_steps": 25, |
| "max_patched_steps": 25, |
| "step_migraphx_path": "migraphx/rfdit_step_fp32_1s.mxr" |
| }, |
| "2": { |
| "step_path": "rfdit_step_fp32_2s.onnx", |
| "seconds": 2.0, |
| "latent_steps": 50, |
| "max_patched_steps": 50, |
| "step_migraphx_path": "migraphx/rfdit_step_fp32_2s.mxr" |
| }, |
| "4": { |
| "step_path": "rfdit_step_fp32_4s.onnx", |
| "seconds": 4.0, |
| "latent_steps": 100, |
| "max_patched_steps": 100, |
| "step_migraphx_path": "migraphx/rfdit_step_fp32_4s.mxr" |
| }, |
| "8": { |
| "step_path": "rfdit_step_fp32_8s.onnx", |
| "seconds": 8.0, |
| "latent_steps": 200, |
| "max_patched_steps": 200, |
| "step_migraphx_path": "migraphx/rfdit_step_fp32_8s.mxr" |
| }, |
| "12": { |
| "step_path": "rfdit_step_fp32_12s.onnx", |
| "seconds": 12.0, |
| "latent_steps": 300, |
| "max_patched_steps": 300, |
| "step_migraphx_path": "migraphx/rfdit_step_fp32_12s.mxr" |
| }, |
| "16": { |
| "step_path": "rfdit_step_fp32_16s.onnx", |
| "seconds": 16.0, |
| "latent_steps": 400, |
| "max_patched_steps": 400, |
| "step_migraphx_path": "migraphx/rfdit_step_fp32_16s.mxr" |
| }, |
| "24": { |
| "step_path": "rfdit_step_fp32_24s.onnx", |
| "seconds": 24.0, |
| "latent_steps": 600, |
| "max_patched_steps": 600, |
| "step_migraphx_path": "migraphx/rfdit_step_fp32_24s.mxr" |
| }, |
| "30": { |
| "step_path": "rfdit_step_fp32_30s.onnx", |
| "seconds": 30.0, |
| "latent_steps": 750, |
| "max_patched_steps": 750, |
| "step_migraphx_path": "migraphx/rfdit_step_fp32_30s.mxr" |
| } |
| }, |
| "context_migraphx_path": "migraphx/rfdit_context_fp32_30s.mxr" |
| }, |
| "dacvae": { |
| "sample_rate": 48000, |
| "hop_length": 1920, |
| "buckets": { |
| "1": { |
| "path": "dacvae_decode_1s.onnx", |
| "seconds": 1.0, |
| "latent_steps": 25, |
| "patched_steps": 25, |
| "samples": 48000, |
| "migraphx_path": "migraphx/dacvae_decode_1s.mxr" |
| }, |
| "2": { |
| "path": "dacvae_decode_2s.onnx", |
| "seconds": 2.0, |
| "latent_steps": 50, |
| "patched_steps": 50, |
| "samples": 96000, |
| "migraphx_path": "migraphx/dacvae_decode_2s.mxr" |
| }, |
| "4": { |
| "path": "dacvae_decode_4s.onnx", |
| "seconds": 4.0, |
| "latent_steps": 100, |
| "patched_steps": 100, |
| "samples": 192000, |
| "migraphx_path": "migraphx/dacvae_decode_4s.mxr" |
| }, |
| "8": { |
| "path": "dacvae_decode_8s.onnx", |
| "seconds": 8.0, |
| "latent_steps": 200, |
| "patched_steps": 200, |
| "samples": 384000, |
| "migraphx_path": "migraphx/dacvae_decode_8s.mxr" |
| }, |
| "12": { |
| "path": "dacvae_decode_12s.onnx", |
| "seconds": 12.0, |
| "latent_steps": 300, |
| "patched_steps": 300, |
| "samples": 576000, |
| "migraphx_path": "migraphx/dacvae_decode_12s.mxr" |
| }, |
| "16": { |
| "path": "dacvae_decode_16s.onnx", |
| "seconds": 16.0, |
| "latent_steps": 400, |
| "patched_steps": 400, |
| "samples": 768000, |
| "migraphx_path": "migraphx/dacvae_decode_16s.mxr" |
| }, |
| "24": { |
| "path": "dacvae_decode_24s.onnx", |
| "seconds": 24.0, |
| "latent_steps": 600, |
| "patched_steps": 600, |
| "samples": 1152000, |
| "migraphx_path": "migraphx/dacvae_decode_24s.mxr" |
| }, |
| "30": { |
| "path": "dacvae_decode_30s.onnx", |
| "seconds": 30.0, |
| "latent_steps": 750, |
| "patched_steps": 750, |
| "samples": 1440000, |
| "migraphx_path": "migraphx/dacvae_decode_30s.mxr" |
| } |
| } |
| } |
| } |